;;; Copyright (c) 1991--2018 ;;; John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen; ;;; see `LICENSE' for conditions. ;;; modifications for YADU - April 1997 ;;; bug fixes etc 1995 ;;; July 1996 - cacheing glbs ;;; structure mod to allow glbs to be calculated (in-package :lkb) ;;; ;;; For each type we need: ;;; ;;; name ;;; parents - ie immediate supertypes - a set of types ;;; constraint - a feature structure stored either in a fully ;;; expanded form or just as the feature structure ;;; specific to the type ;;; tdfs - the full typed default feature structure constraint ;;; ;;; For implementation purposes we also have: ;;; ;;; constraint-mark - unification generation last time the constraint ;;; was returned not completely-copied ;;; daughters - immediate subtypes - a set of types. ;;; appfeats - appropriate features - a set of features which can be ;;; derived from the constraint (top-level-features-of constraint) ;;; but cached in order to type untyped feature structures efficiently. ;;; ancestors - all the supertypes of a type - immediate or otherwise ;;; marks - see marks.lsp ;;; constraint-spec - the user specified unifications ;;; default-spec - the user specified default unifications ;;; local constraint - the fs derived from the user specified ;;; unifications ;;; inherited constraint - the fs after inheritance but before ;;; type inference - for debugging - zeroed ;;; after expanding all constraints ;;; atomic-p - t if the type has no appropriate features and none of ;;; its subtypes have any appropriate features ;;; July 1996 ;;; glbp - t if type was automatically created ;;; May 1997 ;;; descendants - for glb stuff (defstruct ltype name parents constraint (constraint-mark nil) tdfs comment daughters appfeats enumerated-p ancestors marks constraint-spec default-spec local-constraint inherited-constraint atomic-p glbp descendants shrunk-p visible-p ; for display in type hierarchy bit-code ; for glb computation ) (defmethod common-lisp:print-object ((instance ltype) stream) (if *print-readably* ;; print so object can be read back into lisp (call-next-method) ;; usual case (progn (write-string "# stream)))) (defstruct (leaf-type (:include ltype)) (expanded-p nil)) (defvar *types* (make-hash-table :test #'eq)) #+:sbcl (declaim (sb-ext:always-bound *types*)) (defparameter *ordered-type-list* nil) (defparameter *ordered-glbtype-list* nil) (defparameter *default-abbreviations* nil) (defvar *types-changed* nil) (defvar *lexicon-changed* nil) (defvar *type-reload-p* nil) (defun clear-types () (clear-type-cache) ; must be done whenever types table is cleared (disable-type-interactions) (clrhash *types*) (setf *ordered-type-list* nil) (setf *ordered-glbtype-list* nil) ;; (clear-leaf-types *leaf-types*) ; no longer needed here (clear-feature-table) (clear-expanded-lex) (when (and *gc-before-reload* *type-reload-p*) #+:allegro (excl:gc t) #+:sbcl (sb-ext:gc :full t)) (setf *type-reload-p* t)) (defun clear-types-for-patching-constraints nil (clear-type-cache) (clear-feature-table) (clear-expanded-lex)) (defun clear-type-visibility () (maphash #'(lambda (name entry) (declare (ignore name)) (setf (ltype-visible-p entry) nil)) *types*)) (defun collect-type-names () (let ((type-names nil)) (maphash #'(lambda (name entry) (declare (ignore entry)) (push name type-names)) *types*) type-names)) (defmacro get-type-entry (name) `(gethash ,name *types*)) (defun set-type-entry (name new-entry) (setf (gethash name *types*) new-entry)) (defun remove-type-entry (name) ;; effectively invalidates type, but caller responsible for updating any relevant ;; type caches etc. (remhash name *types*)) (defun is-valid-type (x) (typecase x (null nil) (symbol (get-type-entry x)) (string t))) (defun string-type-p (type-name) ;; AAC 30/12/94 ;; allow for no string type (let ((st *string-type*)) (and st (or (eq type-name st) (find (get-type-entry type-name) (the list (retrieve-ancestors st)) :test #'eq))))) (defun constraint-spec-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-constraint-spec type-record) (error "~%~A is not a valid type" type-name)))) (defun constraint-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-constraint type-record) (error "~%~A is not a valid type" type-name)))) (defun tdfs-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-tdfs type-record) (error "~%~A is not a valid type" type-name)))) (defun default-spec-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-default-spec type-record) (error "~%~A is not a valid type" type-name)))) (defun appropriate-features-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-appfeats type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-ancestors (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-ancestors type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-descendants (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-descendants type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-parents (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-parents type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-daughters (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-daughters type-record) (error "~%~A is not a valid type" type-name)))) (defun subtype-p (type1 type2) ;; is type1 a strict subtype of type2? ;; robust on invalid type names: if either of the args is not a type, the function ;; returns nil and does not signal an error (cond ((not (symbolp type2)) nil) ((symbolp type1) ;; an alternative using the type unification machinery would be ;; (and (not (eq type1 type2)) (eq (greatest-common-subtype type1 type2) type1)) ;; but that assumes the args are actually types, and can end up polluting the cache ;; with lots of lexical types (let ((t2 (get-type-entry type2))) (and t2 (ltype-descendants t2) ; chance to return immediately (let ((t1 (get-type-entry type1))) (and t1 (member t2 (ltype-ancestors t1) :test #'eq)))))) ((stringp type1) (string-type-p type2)))) (defun subtype-or-equal (type1 type2) ;; is type1 equal to type2 or a subtype of it? (cond ((eq type1 type2)) ((stringp type2) (and (stringp type1) (string= type1 type2))) (t (subtype-p type1 type2)))) (defun atomic-type-p (type-name) (or (stringp type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-atomic-p type-record) (error "~%~A is not a valid type" type-name))))) ;;; Type unification, with the results memoized for pairs of types where neither is ;;; the top type or a string type. (It would be a really bad idea to precompute a full ;;; table of results since in practice only a very small proportion of the possible ;;; pairs of types are encountered). ;;; ;;; The memoization table is keyed by a numeric combination of sxhash values of the ;;; two type names (symbols): key(t1,t2) = (sxhash(t1) ^ sxhash(t2)) & 0xFFFF. ;;; XOR is ideal here since t1 /= t2 and we want key(t1,t2)=key(t2,t1). ;;; We use only the lower-order bits of the combined key - there's no point in ;;; incorporating higher-order bits since the sxhash values of symbols should be ;;; well distributed. ;;; ;;; The two types are (re-)ordered canonically on their sxhash values so that either ;;; order in the call retrieves the same table entry. Of course, the types themselves ;;; must also be tested in case another pair of types has the same key. (defstruct type-cache-entry t1 t2 sub con (next nil :type (or null type-cache-entry))) #+:sbcl (declaim (sb-ext:freeze-type type-cache-entry)) (eval-when (:compile-toplevel :load-toplevel :execute) (defconstant +type-cache-size+ (expt 2 16))) ; power of 2 for optimum efficiency (#+:sbcl sb-ext:defglobal #-:sbcl defvar *type-cache* (make-array +type-cache-size+ :initial-element nil)) (declaim (type (simple-vector #.+type-cache-size+) *type-cache*)) (defun clear-type-cache nil ;; For consistency this cache must be cleared before (re-)loading a grammar. It's ;; probably best also to clear it after loading a grammar and before batch parsing ;; since different pairs of types will be exercised (fill *type-cache* nil) nil) (deftype symbol-type-name () '(and symbol (not null))) (defun greatest-common-subtype (type1 type2 &aux (toptype *toptype*)) ;; type1 and type2 should be acceptable as LKB type names (i.e. non-NIL symbols / strings) (labels ((greatest-common-subtype-symbols (t1 t2) (declare (symbol-type-name t1 t2)) ; guaranteed by caller (let ((h1 (sxhash t1)) (h2 (sxhash t2))) (when (> h1 h2) (rotatef t1 t2)) (do* ((key (mod (logxor h1 h2) +type-cache-size+)) (e (svref *type-cache* key) (type-cache-entry-next e))) ((null e) (add-new-entry t1 t2 key)) (declare (type (or null type-cache-entry) e)) (when (and (eq (type-cache-entry-t1 e) t1) (eq (type-cache-entry-t2 e) t2)) (return (values (type-cache-entry-sub e) (type-cache-entry-con e))))))) ;; (add-new-entry (t1 t2 key) (multiple-value-bind (subtype constraintp) (full-greatest-common-subtype t1 t2) (let ((new (make-type-cache-entry :t1 t1 :t2 t2 :sub subtype :con constraintp))) (if (svref *type-cache* key) ;; insert at end, on the assumption that first seen later => less frequent (do ((e (svref *type-cache* key) (type-cache-entry-next e))) ((null (type-cache-entry-next e)) (setf (type-cache-entry-next e) new)) (declare (type (or null type-cache-entry) e))) (setf (svref *type-cache* key) new)) (values subtype constraintp)))) ;; (incorrect-argument-types () (error "Inconsistency - invalid arguments ~S and ~S to GREATEST-COMMON-SUBTYPE" type1 type2))) ;; (declare (notinline greatest-common-subtype-symbols add-new-entry)) (cond ((eq type1 type2) (or type1 (incorrect-argument-types))) ((eq type1 toptype) type2) ; all types are compatible with the top type (NB this ((eq type2 toptype) type1) ; includes string types even if *string-type* is unset) ((typep type1 'symbol-type-name) (typecase type2 (symbol-type-name (greatest-common-subtype-symbols type1 type2)) (string (if (string-type-p type1) type2 nil)) (t (incorrect-argument-types)))) ((typep type2 'symbol-type-name) (typecase type1 (string (if (string-type-p type2) type1 nil)) (t (incorrect-argument-types)))) ((and (stringp type1) (stringp type2)) (if (string= type1 type2) type1 nil)) (t (incorrect-argument-types))))) #| ;;; investigate effectiveness of greatest common subtype cache (loop for bucket across *type-cache* for len = (loop for e = bucket then (type-cache-entry-next e) while e sum 1) with stats = nil do (let ((x (assoc len stats))) (if x (incf (cdr x)) (push (cons len 1) stats))) finally (return (sort stats #'> :key #'car))) (loop for n from 0 below (length *type-cache*) for bucket = (svref *type-cache* n) for len = (loop for e = bucket then (type-cache-entry-next e) while e sum 1) when (= len 4) do (print n) (print (loop for e = bucket then (type-cache-entry-next e) while e collect (cons (type-cache-entry-t1 e) (type-cache-entry-t2 e))))) (clear-type-cache) |# (defun full-greatest-common-subtype (type1 type2) (flet ((intersection-eq (set1 set2) (and set1 set2 (let ((set1-len (length set1)) (set2-len (length set2))) (when (> set2-len set1-len) (rotatef set1 set2) ; make set1 be the larger one (rotatef set1-len set2-len)) (if (> set2-len 20) ; avoid poor performance if both sets contain >20 elements (let ((table (make-hash-table :test #'eq :size set2-len)) ; the smaller one (res nil)) (dolist (e2 set2) (setf (gethash e2 table) t)) (dolist (e1 set1 res) (when (gethash e1 table) (push e1 res)))) (loop for e2 in set2 ; the smaller one when (member e2 set1 :test #'eq) collect e2)))))) (let ((t1 (get-type-entry type1)) (t2 (get-type-entry type2))) (cond ((eq type1 type2) type1) ((member t2 (ltype-ancestors t1) :test #'eq) type1) ((member t1 (ltype-ancestors t2) :test #'eq) type2) (t (let ((common-subtypes (intersection-eq (ltype-descendants t1) (ltype-descendants t2)))) (when common-subtypes (let ((gcsubtype-entries ;; find subtype whose own descendant list is shorter by just 1 (itself) (loop for ty in common-subtypes with sub-len = (1- (length common-subtypes)) when (= (length (ltype-descendants ty)) sub-len) collect ty))) (cond ((null gcsubtype-entries) (error "Type hierarchy inconsistent: ~A and ~A have common subtypes but descendant lists are contradictory" type1 type2)) ((cdr gcsubtype-entries) (error "Type hierarchy inconsistent: ~A and ~A have common subtypes but no unique greatest common subtype" type1 type2)) (t ;; return true as the second value if there is a constraint that may ;; have to be unified in (values (ltype-name (car gcsubtype-entries)) (if (extra-constraint-p (car gcsubtype-entries) t1 t2) t)))))))))))) (defun extra-constraint-p (gcsubtype t1 t2) ;;; test is whether any ancestor of the gcsubtype which ;;; isn't also an ancestor of the types being unified ;;; or the gcsubtype itself introduce any extra information ;;; on the constraint. (or (ltype-local-constraint gcsubtype) (let ((t1ancs (ltype-ancestors t1)) (t2ancs (ltype-ancestors t2))) (dolist (type (ltype-ancestors gcsubtype)) (when (and (not (eq type t1)) (not (eq type t2)) (ltype-local-constraint type) (not (member type t1ancs :test #'eq)) (not (member type t2ancs :test #'eq))) (return t)))))) ;;; called from generalisation (defun least-common-supertype (x y) (cond ((equal x y) x) ((stringp x) (if (stringp y) *string-type* (least-common-supertype *string-type* y))) ((stringp y) (least-common-supertype *string-type* x)) ((subtype-p x y) y) ((subtype-p y x) x) (t (let ((z (intersection (cons x (mapcar #'ltype-name (retrieve-ancestors x))) (cons y (mapcar #'ltype-name (retrieve-ancestors y)))))) (cond ((null z) (error "~%Types ~A and ~A have no common ancestor" x y)) ((= (length z) 1) (car z)) ((member x z) x) ((member y z) y) (t (let ((lcs-list (remove-ancestors z))) (cond ((null lcs-list) (error "~%Types ~A and ~A have no common ancestor" x y)) ((= (length lcs-list) 1) (car lcs-list)) (t (error "~%Types ~A and ~A have multiple common ancestors ~A" x y lcs-list)))))))))) (defun remove-ancestors (int-list) (do* ((done nil (cons initial done)) (initial (car int-list) (car (set-difference new-int-list done))) (new-int-list (set-difference int-list (mapcar #'ltype-name (retrieve-ancestors initial))) (set-difference new-int-list (mapcar #'ltype-name (retrieve-ancestors initial))))) ((null (set-difference new-int-list (cons initial done))) new-int-list))) ;;; The following utility functions assume that no cycles are present (defun get-real-types (type) (let ((type-entry (get-type-entry type))) (if (ltype-glbp type-entry) (loop for parent in (ltype-parents type-entry) append (get-real-types parent)) (list type)))) ;;; We need a record of the maximal type at which a particular feature ;;; is introduced (defvar *feature-list* (make-hash-table :test #'eq)) (defvar *feature-minimal-type* (make-hash-table :test #'eq)) (defun clear-feature-table nil (clrhash *feature-minimal-type*) (clrhash *feature-list*)) (defun maximal-type-of (feature) (gethash feature *feature-list*)) (defun set-feature-entry (feature type) (setf (gethash feature *feature-list*) type)) (defun check-feature-table nil (let ((ok t)) (maphash #'(lambda (feature type-list) (cond ((> (length type-list) 1) (format t "~%Feature ~A is introduced at multiple types ~A" feature type-list) (setf ok nil)) (t (set-feature-entry feature (car type-list))))) *feature-list*) ok)) (defun maximal-type-of-list (features) (loop for f in features for mt = (maximal-type-of f) then (greatest-common-subtype (maximal-type-of f) mt) while mt finally (return mt))) (defun maximal-type-of-list* (&rest features) (declare (dynamic-extent features)) (maximal-type-of-list features)) ;; Remove obsolete pointers from type constraints so that the garbage ;; collector can purge the structures they point to. (defun gc-types nil (maphash #'(lambda (name type) (declare (ignore name)) (when (ltype-tdfs type) (compress-dag (tdfs-indef (ltype-tdfs type)))) (compress-dag (ltype-constraint type)) (when (ltype-constraint-mark type) (mapc #'compress-dag (cadr (ltype-constraint-mark type))) (mapc #'compress-dag (cddr (ltype-constraint-mark type)))) (compress-dag (ltype-local-constraint type))) *types*)) ;;; Try to reduce the amount of space used by the expanded type hierarchy (defun clear-glbs nil (gc-types) (maphash #'(lambda (name type) (when (eql (mismatch "GLBTYPE" (symbol-name name)) 7) (setf (ltype-constraint type) nil) (setf (ltype-tdfs type) nil))) *types*)) (defun used-types (type) (let ((used (mapcar #'(lambda (x) (u-value-type (unification-rhs x))) (ltype-constraint-spec type)))) (when used (remove-duplicates used)))) (defun purge-constraints nil (gc-types) (let* ((leaves (mapcar #'(lambda (x) (gethash x *types*)) (slot-value *leaf-types* 'leaf-types))) (parents (reduce #'union (mapcar #'ltype-parents leaves))) (referred (reduce #'union (mapcar #'used-types leaves))) (save (union parents referred))) (maphash #'(lambda (name type) (unless (member (symbol-name name) save) ;; (setf (ltype-constraint type) nil) (setf (ltype-tdfs type) nil))) *types*))) (defun types-to-xml (&key (stream t) file) (loop with stream = (if file (open file :direction :output :if-exists :supersede :if-does-not-exist :create) stream) for type being each hash-value in *types* for name = (ltype-name type) for parents = (ltype-parents type) for daughters = (ltype-daughters type) do (format stream "~% ~%" name) (loop for parent in parents do (format stream " ~%" parent)) (format stream " ~% ~%") (loop for daughter in daughters do (format stream " ~%" daughter)) (format stream " ~%~%") finally (when file (close stream))))