;;; Copyright (c) 1998-2016 John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen ;;; see LICENSE for conditions (in-package :lkb) ;;; Creating and running the `quick check' ;;; *check-paths* is the parameter set in user file. This is used to construct ;;; value of *check-paths-optimised* which is what is used internally ;;; (defparameter *check-paths* nil) ;;; defined in globals (defvar *check-paths-optimised* (make-array 0)) ;;; Wrapping with-check-path-list-collection around some call to parse a sentence or ;;; a set of sentences collects stats on all feature paths that fail in unification, ;;; and computes the best set for checking values of before unifications. Writes ;;; out set to specified file ;;; ;;; NB existing set of quick check paths is left untouched #| (with-check-path-list-collection "/tmp/checkpaths-test.lsp" (parse '("Devito" "manages" "a" "programmer" "Abrams" "interviewed" "and" "Browne" "hired") nil)) (with-check-path-list-collection "/tmp/checkpaths-gen.lsp" (generate-from-mrs-file "~/Documents/grammars/erg-2018/tsdb/gold/mrs/result" t)) (with-check-path-list-collection "/tmp/checkpaths.lsp" (parse-sentences "~/Documents/grammars/erg-2018/lkb/checkpaths.items" t)) |# (defmacro with-check-path-list-collection (output-file &body forms) `(let ((.saved-names-and-fns. (install-unify-check-paths-functions)) (.completedp. nil)) (declare (special *fail-path-list*)) (unwind-protect (prog1 ;; disable any path checking currently in force ;; compute full forest and unpack only a single result (let ((*check-paths-optimised* (make-array 0)) (*first-only-p* 1) (*gen-first-only-p* 1) (*unpacking-scoring-hook* (constantly 0.0))) ,@forms) (setq .completedp. t)) ;; Restore original function definitions (dolist (name-and-fn .saved-names-and-fns.) (setf (symbol-function (car name-and-fn)) (cdr name-and-fn))) (when .completedp. (with-open-file (.str. ,output-file :direction :output :if-exists :supersede :if-does-not-exist :create) (let ((*print-pretty* nil)) (format .str. "#|~%Check paths created from execution of~% ~S~%|#~%" '(with-check-path-list-collection ,output-file ,@forms))) (format t "~%Extracting paths...") (force-output) (dolist (item *fail-path-list*) (setf (car item) (reverse (car item)))) (write `(defparameter *check-paths* ',(check-path-convert (extract-check-paths *fail-path-list*))) :stream .str. :escape t :pretty t :length nil :level nil) (terpri .str.) (format t "~&Wrote file ~A~%" (truename ,output-file))))))) (defun extract-check-paths (fail-path-list) (when fail-path-list (let ((max 0) (max-item nil)) ;; Identify the path that caused the greatest number of unification failures (dolist (item fail-path-list) (when (> (hash-table-count (cdr item)) max) (setq max (hash-table-count (cdr item))) (setq max-item item))) ;; For each path p with a lower number of fails, expunge the record of all ;; fails where the path just identified also failed, and drop p if no fails ;; remain (i.e. the path contributes no new failures of its own); then repeat ;; the process (cons (cons (car max-item) max) (extract-check-paths (mapcan #'(lambda (item) (let ((item-table (cdr item))) (maphash #'(lambda (key val) (declare (ignore val)) (remhash key item-table)) (cdr max-item)) (when (> (hash-table-count item-table) 0) (list (cons (car item) item-table))))) (remove max-item fail-path-list :test #'eq))))))) #| (defun extract-check-paths (fail-path-list) ;; simple-minded version which just returns paths in order of decreasing ;; number of fails (sort (mapcar #'(lambda (item) (cons (car item) (hash-table-count (cdr item)))) fail-path-list) #'> :key #'cdr)) |# ;;; Interactive interface (defun interactive-create-check-paths nil (let* ((test-file (ask-user-for-existing-pathname "Checkpaths sample file?")) (output-file (and test-file (ask-user-for-new-pathname "Checkpaths output file?")))) (when (and test-file output-file) (with-check-path-list-collection output-file (parse-sentences test-file t)) (format t "~%Grammar loading script should contain:~ ~%(lkb-load-lisp (this-directory) t)~%")))) ;;; Once one path has failed, continue to collect ALL failing paths - while recording ;;; the fact that the unification as a whole has failed. ;;; ;;; NB If unify-dags or unify2 change, then the code below had better be updated ;;; accordingly. We can remove any *unify-debug* conditionals for clarity. (defun install-unify-check-paths-functions nil (declare (special *collecting-check-paths-p* *unify-dags-fail-count* *fail-path-list*)) (prog1 (mapcar #'(lambda (name) (cons name (symbol-function name))) '(unify-dags unify2)) (setq *collecting-check-paths-p* nil) (setq *unify-dags-fail-count* 0) (setq *fail-path-list* nil) ;; ;; (setf (symbol-function 'unify-dags) #'(lambda (dag1 dag2) (if *within-unification-context-p* ;; --- modified code to collect paths (let ((*collecting-check-paths-p* t) (*unify-dags-failed-p* nil)) (declare (special *collecting-check-paths-p* *unify-dags-failed-p* *unify-dags-fail-count*)) (incf *unify-dags-fail-count*) (catch '*fail* (unify1 dag1 dag2 nil) (if *unify-dags-failed-p* nil dag1))) ;; --- end of modified code (with-unification-context (dag1) (when (unify-dags dag1 dag2) (copy-dag dag1)))))) ;; ;; (setf (symbol-function 'unify2) #'(lambda (dag1 dag2 path) (declare (special *collecting-check-paths-p* *unify-dags-failed-p* *unify-dags-fail-count* *fail-path-list*)) (let ((t1 (unify-get-type dag1)) (t2 (unify-get-type dag2))) (multiple-value-bind (new-type constraintp) (greatest-common-subtype t1 t2) ;; --- inserted code to collect paths (if (or new-type *collecting-check-paths-p*) (progn (unless new-type (setq *unify-dags-failed-p* t) (let ((item (assoc path *fail-path-list* :test #'equal))) (unless item ;; we must copy path since its conses are stack allocated - but ;; leave it in reverse order for now ;; an adjustable bit-vector might be better than a hash table ;; since the keys are integers and the values are irrelevant (setq item (cons (copy-list path) (make-hash-table))) (push item *fail-path-list*)) (setf (gethash *unify-dags-fail-count* (cdr item)) t))) ;; --- end of inserted code (unless (eq new-type t1) (setf (dag-new-type dag1) new-type)) (when (and constraintp *unify-wffs*) (let ((constraint (if *expanding-types* (possibly-new-constraint-of new-type) (may-copy-constraint-of new-type)))) (setq dag1 (unify2 dag1 constraint path)))) (cond ((and (null (dag-arcs dag1)) (null (dag-comp-arcs dag1))) (unless (eq new-type t2) (setf (dag-new-type dag2) new-type)) (setf (dag-forward dag1) dag2)) ((and (null (dag-arcs dag2)) (null (dag-comp-arcs dag2))) (setf (dag-forward dag2) dag1)) ((eq (dag-copy dag1) :inside) (throw '*fail* nil)) (t (setf (dag-forward dag2) dag1) (setf (dag-copy dag1) :inside) (unify-arcs dag1 dag2 path) (setf (dag-copy dag1) nil) dag1))) (progn (throw '*fail* nil)))))) ))) ;;; called from check-type-table, once constraints have been expanded. Needs ;;; to be kept in synch with type hierarchy and constraints ;;; ;;; daughters-restricted field of rules must be kept in synch with optimised ;;; paths; this is done when a rule is read in (defun optimise-check-unif-paths nil (when (or (null *check-paths*) (find :vanilla *features*)) (setq *check-paths-optimised* (make-array 0)) (return-from optimise-check-unif-paths nil)) (setq *check-paths-optimised* (make-array *check-path-count*)) (loop for path-and-freq in *check-paths* for n from 0 below *check-path-count* do (setf (aref *check-paths-optimised* n) (if (and (listp (car path-and-freq)) (integerp (cdr path-and-freq))) (optimise-check-unif-path (car path-and-freq) (cdr path-and-freq)) (error "Incorrect format for check path list")))) t) (defun optimise-check-unif-path (path freq) (cons path (if path (let* ((feat (car (last path))) (fs (constraint-of (or (maximal-type-of feat) (error "Inconsistency - *check-paths* uses feature ~A ~ which is not in grammar" feat)))) (type (type-of-fs (get-dag-value fs feat)))) (let* ((types (cons (get-type-entry type) (retrieve-descendants type))) (len (length types))) ;; (format t "~%Feature ~A, number of possible types ~A" feat len) (if (and (<= len (integer-length most-positive-fixnum)) ; restrict to fixnum (or (null *string-type*) (not (member *string-type* types :key #'ltype-name)))) (mapcar #'(lambda (d) (cons (ltype-name d) (let ((val 0) (pos 0)) (dolist (x (cons d (ltype-descendants d)) val) (setq pos (or (position x types) (error "Inconsistency in ~A" 'optimise-check-unif-path))) (setq val ;; set bit corresponding to pos of x in types list (dpb 1 (byte 1 pos) val)))))) types) freq))) freq))) (defmacro type-bit-representation-p (x) `(typep ,x 'fixnum)) ;;; Statically compute set of restrictor values for a tdfs or dag, and check ;;; two sets of values for compatibility (defun restrict-fs (fs) (declare (simple-vector *check-paths-optimised*)) (loop with vals = (make-array (length *check-paths-optimised*)) for path-spec across *check-paths-optimised* for n from 0 do (setf (svref vals n) (let ((v (existing-dag-at-end-of fs (car path-spec)))) (when v (let ((type (type-of-fs v))) (if (consp (cdr path-spec)) ;; there is a bit-vector encoding for the possible values ;; of this path, so use it instead of the type name (or (cdr (assoc type (cdr path-spec) :test #'eq)) (error "Inconsistency - ~A could not find restrictor ~ bit vector for type ~A at path ~A" 'restrict-fs type (car path-spec))) type))))) finally (return vals))) (defun restrictors-compatible-p (daughter-restricted child-restricted) (declare (simple-vector daughter-restricted child-restricted)) (loop for dt across daughter-restricted for ct across child-restricted always (cond ((or (eq dt ct) (null dt) (null ct))) ((and (type-bit-representation-p dt) (type-bit-representation-p ct)) ;; fixnum (bit) encodings (not (zerop (logand dt ct)))) (t ;; type name symbol encodings (greatest-common-subtype dt ct))))) (defun restrictors-subsuming-p (restricted1 restricted2) ;; succeed if there is subsumption consistently in one direction and/or the other (declare (simple-vector restricted1 restricted2)) (loop for dt across restricted1 for ct across restricted2 with forwardp = t ; dt subsumes or is equal to ct and backwardp = t ; the converse always (cond ((eq dt ct)) ((null dt) (setq backwardp nil) forwardp) ((null ct) (setq forwardp nil) backwardp) ((and (type-bit-representation-p dt) (type-bit-representation-p ct)) ;; fixnum (bit) encodings (or (= dt ct) ; in case = but not eq (let ((gcs (logand dt ct))) (cond ((= gcs ct) (setq backwardp nil) forwardp) ((= gcs dt) (setq forwardp nil) backwardp))))) (t ;; type name symbol encodings (let ((gcs (greatest-common-subtype dt ct))) (cond ((eq gcs ct) (setq backwardp nil) forwardp) ((eq gcs dt) (setq forwardp nil) backwardp))))))) ;;; Versions called dynamically inside the scope of a set of unifications (defun x-restrict-fs (fs) (declare (simple-vector *check-paths-optimised*)) (loop with fs = (deref-dag fs) with vals = (make-array (length *check-paths-optimised*)) for path-spec across *check-paths-optimised* for n from 0 do (setf (svref vals n) (let ((v (x-existing-dag-at-end-of fs (car path-spec)))) (when v (let ((type (unify-get-type v))) (if (consp (cdr path-spec)) (or (cdr (assoc type (cdr path-spec) :test #'eq)) (error "Inconsistency - ~A could not find restrictor ~ bit vector for type ~A at path ~A" 'x-restrict-fs type (car path-spec))) type))))) finally (return vals))) (defun x-restrict-and-compatible-p (fs child-restricted) (declare (simple-vector *check-paths-optimised* child-restricted)) (loop with fs = (deref-dag fs) for path-spec across *check-paths-optimised* for dt = (let ((v (x-existing-dag-at-end-of fs (car path-spec)))) (when v (let ((type (unify-get-type v))) (if (consp (cdr path-spec)) (or (cdr (assoc type (cdr path-spec) :test #'eq)) (error "Inconsistency - ~A could not find restrictor ~ bit vector for type ~A at path ~A" 'x-restrict-and-compatible-p type (car path-spec))) type)))) for ct across child-restricted always (cond ((or (eq dt ct) (null dt) (null ct))) ((and (type-bit-representation-p dt) (type-bit-representation-p ct)) ;; fixnum (bit) encodings (not (zerop (logand dt ct)))) (t ;; type name symbol encodings (greatest-common-subtype dt ct))))) (defun x-existing-dag-at-end-of (dag labels-chain) ;; assumes dag in top level call has already been dereferenced (cond ((null labels-chain) dag) (t (let ((one-step-down (x-get-dag-value dag (car labels-chain)))) (when one-step-down (x-existing-dag-at-end-of (deref-dag one-step-down) (cdr labels-chain))))))) (defun x-get-dag-value (dag attribute) (dolist (arc (dag-arcs dag)) (when (eq attribute (dag-arc-attribute arc)) (return-from x-get-dag-value (dag-arc-value arc)))) (dolist (arc (dag-comp-arcs dag)) (when (eq attribute (dag-arc-attribute arc)) (return-from x-get-dag-value (dag-arc-value arc)))) nil)