;;; Copyright (c) 2022 ;;; John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen; ;;; see `LICENSE' for conditions. (in-package :lkb) ;;; Chart mapping: token mapping, lexical filtering, and post-generation mapping ;;; Published descriptions of chart mapping merely hint at an algorithm, and there ;;; are a number of grey areas where different behaviours seem equally valid, or ;;; where the result for a particular input is indeterminate. Pseudocode for the ;;; algorithm implemented here is below. ;;; ;;; Informally, each chart mapping rule has an 'anchor', which is the rightmost ;;; input/context argument; this anchor is moved across the chart from left to right, ;;; and if it matches an edge then the remaining input/context arguments are matched ;;; against other edges from right to left. ;;; ;;; If a rule adds any output edges to the chart, the rule is restarted with its anchor ;;; at the left vertex of the leftmost output. If a rule fires but there are no output ;;; edges, the rule is restarted at the current anchor position. Restarting the rule ;;; from scratch and doing so at these vertex positions ensures that the rule cannot ;;; spuriously match edges that it has just removed, and that it has the opportunity ;;; to match any new edges that it has just added. #| apply-cmrules(cmrules, chart) jumprule := catch jump for each rule in cmrules do start := 0 repeat while start \= false start := catch restart try-cmrule(rule, start, [], [], [], rule I/C args, corresponding rule paths, chart) if jumprule then start again from jumprule try-cmrule(rule, start, edges, ematch, rmatch, args, paths, chart) if args then arg := first args ; e.g. I2 path := first paths ; e.g. (+INPUT REST FIRST) for each edge in (if ematch then edges in chart satisfying positional constraints on arg, given ematch else edges in chart with left vertex>=start, for each resetting start to left vertex) do if edge is in edges then next if edge fs matches regexes at path in rule and unifies with fs at path in rule then try-cmrule(rule, start, edge + edges, arg/edge + ematch, regex matches + rmatch, rest args, rest paths, chart) else apply-cmrule(rule, start, ematch, rmatch, chart) return false apply-cmrule(rule, start, ematch, rmatch, chart) construct a chartfs with each edge fs in ematch at the corresponding rule path newfs := unify(rule fs, chartfs) if newfs then substitute all ${} in newfs as given by rmatch for each arg in rule output args and corresponding path in rule output paths do outputfs := fs at end of path in newfs find chart cell satisfying positional constraints on arg, given ematch create candidate output edge from outputfs if any of the candidate output edges duplicates an existing edge then return add output edges to chart remove from chart the edges corresponding to I1...n in ematch if rule contains jump then throw to jump with value being name of rule to jump to else throw to restart with value being left vertex of leftmost output edge, or start if no output edges |# (eval-when (:compile-toplevel :load-toplevel :execute) (pushnew :chart-mapping *features*)) ;;; Entry points for running token mapping, lexical filtering and post-generation chart ;;; mapping rule sets (defun perform-token-mapping () ;; for ease of processing, the token chart is converted to a list of edges sorted on ;; start vertices; this makes it trivial to insert new vertices and extract a list of ;; edges sorted l->r, while efficiency hardly suffers since the token chart is ;; relatively small (when *cm-debug* (print-tchart) (format t "~&Token mapping starting~%")) (create-token-chart-dags) (let ((lchart (token-chart-to-list))) (apply-cmrules (get-token-mapping-rules) lchart) (setq *tchart* (token-chart-from-list lchart))) (when *cm-debug* (format t "~&Token mapping finished~%") (print-tchart))) (defun create-token-chart-dags () ;; install an instantiated *token-type* feature structure in each token chart edge (loop for start from 0 below *tchart-max* do (loop for e in (aref *tchart* start 1) do (create-token-edge-dag e)))) (defun create-token-edge-dag (e &optional id pos) ;; id and pos not used by LKB as such, but optionally installed in token chart edge FS; ;; pos should be a (possibly empty) list of alternating tags and probabilities (let ((td (ltype-tdfs (get-type-entry *token-type*)))) (with-unification-context (td) (yadu! td (make-nondefault-tdfs (create-typed-dag (token-edge-string e))) *token-form-path*) (yadu! td (make-nondefault-tdfs (create-typed-dag (princ-to-string (token-edge-cfrom e)))) *token-from-path*) (yadu! td (make-nondefault-tdfs (create-typed-dag (princ-to-string (token-edge-cto e)))) *token-to-path*) (yadu! td (make-nondefault-tdfs ; external pre-processor ID, if any, otherwise edge-id (build-diff-list-dag (list (princ-to-string (or id (token-edge-id e)))))) *token-id-path*) (yadu! td (make-nondefault-tdfs (build-list-dag (loop for tag in pos by #'cddr collect tag))) *token-postags-path*) (yadu! td (make-nondefault-tdfs (build-list-dag (loop for prob in (cdr pos) by #'cddr collect (princ-to-string prob)))) *token-posprobs-path*) (setf (token-edge-dag e) (copy-tdfs-elements td))))) (defun build-diff-list-dag (items) (let ((end (create-typed-dag *list-type*))) (make-dag :type *diff-list-type* :arcs (list (cons *diff-list-list* (build-list-dag items end)) (cons *diff-list-last* end))))) (defun build-list-dag (items &optional (end (create-typed-dag *empty-list-type*))) (loop with dag = end and last-elt = nil with non-empty-list-type = ;; avoid using undocumented parameter *non-empty-list-type*; ACE has the ;; the equivalent (documented) parameter `cons-type' (maximal-type-of-list* (car *list-head*) (car *list-tail*)) for item in (reverse items) do (unless (dag-p item) (setq item (create-typed-dag item))) (unless last-elt (setq last-elt item)) (setq dag (make-dag :type non-empty-list-type :arcs (list (cons (car *list-head*) item) (cons (car *list-tail*) dag)))) finally (return (values dag last-elt)))) (defun token-chart-to-list () ;; the extra initial cons cell encapsulates the list of edges, allowing it to be ;; destructively modified and avoiding the need to return it up call chains (cons nil (loop for start from 0 below *tchart-max* append (aref *tchart* start 1)))) (defun token-chart-from-list (lchart) (dotimes (i (array-total-size *tchart*)) (setf (row-major-aref *tchart* i) nil)) (let ((vertices nil)) (loop for e in (cdr lchart) do (pushnew (token-edge-from e) vertices) (pushnew (token-edge-to e) vertices)) (setq vertices (sort vertices #'<)) (loop for e in (cdr lchart) for start = (position (token-edge-from e) vertices) for end = (position (token-edge-to e) vertices) do (setf (token-edge-from e) start) (setf (token-edge-to e) end) (push e (aref *tchart* start 1)) (push e (aref *tchart* end 0))) (setq *tchart-max* (1- (length vertices))) *tchart*)) (defun perform-lexical-filtering () ;; the parse chart is processed in-place; edges cannot be added so no new vertices ;; can be created, which means that the 2-d array representation indexed by start and ;; end vertices is perfectly adequate (when *cm-debug* (format t "~&Lexical filtering starting~%")) (apply-cmrules (get-lexical-filtering-rules) *chart*) (when *cm-debug* (format t "~&Lexical filtering finished~%"))) (defun perform-post-generation-mapping () ;; similarly to the token chart and for the same reasons, the post-generation mapping ;; chart is a list of edges (plus an initial empty cons cell) sorted on start vertices (declare (special *gen-record*)) ; due to order of compilation (when *cm-debug* (format t "~&Post-generation mapping starting~%")) (prog1 (loop for edge in *gen-record* collect (let ((lchart (edge-to-oedge-list edge))) (when *cm-debug* (format t "~&extracted lattice from edge ~D, comprising edges ~{~D~^, ~}~%" (g-edge-id edge) (mapcar #'g-edge-id (cdr lchart)))) (apply-cmrules (get-post-generation-mapping-rules) lchart) (when *cm-debug* (format t "~&recovering string from edges ~{~D~^, ~}~%" (mapcar #'g-edge-id (cdr lchart)))) (setf (g-edge-string edge) (string-from-oedge-list lchart)))) (when *cm-debug* (format t "~&Post-generation mapping finished~%")))) (defun edge-to-oedge-list (edge) ;; NB extracted edges are not necessarily leaves of the generator result tree, since when ;; orthographemic rules have applied, ORTH values are only correct in the edge at the top ;; of the respective rule chain; such edges have a non-null orth-tdfs (let ((n 0)) (labels ((collect-edges (e) (if (or (edge-orth-tdfs e) (null (edge-children e))) (let* ((e (copy-edge e)) (otdfs (edge-orth-tdfs e))) (when otdfs (let ((d (yadu (edge-dag e) otdfs))) ; unify in orth part of rule (when d (setf (edge-dag e) d)))) (setf (edge-from e) n) (setf (edge-to e) (incf n)) (list e)) (mapcan #'collect-edges (edge-children e))))) ;; note the extra initial cons cell to encapsulate the list (cons nil (collect-edges edge))))) (defun string-from-oedge-list (lchart) (format nil "~{~A~^ ~}" (loop for e in (cdr lchart) for orth = (existing-dag-at-end-of (tdfs-indef (edge-dag e)) *orth-path*) for prev-end = (edge-to e) then (if (>= (edge-from e) prev-end) (edge-to e) (error "Final post-generation mapping chart contains overlapping edges")) append (dag-to-list orth :key #'(lambda (dag) (if (stringp (dag-type dag)) (dag-type dag) (error "Ill-formed orth list in post-generation mapping edge ~A" (edge-id e)))))))) ;;; Edge manipulation... (defgeneric make-cm-edge (chart kind rule pdag start end)) (defgeneric get-cm-edges (chart &key start end)) (defgeneric delete-cm-edge (chart edge)) (defgeneric add-cm-edge (chart edge)) (defgeneric duplicate-cm-edge-p (chart cedge)) ;;; ...for token and post-generation charts - in their special CM-internal list representation (defmethod make-cm-edge ((chart cons) (kind (eql :tmr)) rule pdag start end) (declare (ignore rule)) (let ((base-word (dag-type (existing-dag-at-end-of pdag *token-form-path*)))) (make-token-edge :id 0 ; at this point the edge is only a candidate for output :dag (make-nondefault-tdfs pdag) :string base-word :word (string-upcase base-word) :leaves (list base-word) :from start :to end :cfrom (parse-integer (dag-type (existing-dag-at-end-of pdag *token-from-path*))) :cto (parse-integer (dag-type (existing-dag-at-end-of pdag *token-to-path*)))))) (defmethod make-cm-edge ((chart cons) (kind (eql :pgmr)) rule pdag start end) (make-g-edge :id 0 :rule (cmrule-id rule) :dag (make-nondefault-tdfs pdag) :from start :to end)) (defmethod get-cm-edges ((chart cons) &key start end) ;; NB in this and subsquent cons methods, accessors generalise across token and generator edges (if (or start end) (loop for e in (cdr chart) when (cond ((and start (null end)) (= (edge-from e) start)) ((and (null start) end) (= (edge-to e) end)) ((and start end) (and (= (edge-from e) start) (= (edge-to e) end)))) collect e) (cdr chart))) (defmethod delete-cm-edge ((chart cons) edge) (setf (cdr chart) (delete edge (cdr chart) :test #'eq))) ; remaining elements retain order (defmethod add-cm-edge ((chart cons) edge) (setf (cdr chart) (merge 'list (list edge) (cdr chart) #'< :key #'edge-from))) (defmethod duplicate-cm-edge-p ((chart cons) cedge) ;; is there an edge identical to cedge in the chart? (loop for e in (cdr chart) thereis (and (= (edge-from e) (edge-from cedge)) (= (edge-to e) (edge-to cedge)) (dag-equal-p (tdfs-indef (edge-dag e)) (tdfs-indef (edge-dag cedge)))))) ;;; ...and for parse chart - in its usual 2-d array representation (defmethod make-cm-edge ((chart array) kind rule pdag start end) (declare (ignore kind rule pdag start end)) (error "Inconsistency - attempt during lexical filtering to create a new edge")) (defmethod get-cm-edges ((chart array) &key start end) ;; *chart-max* is always valid since no new vertices can be added (flet ((filter-edges (edges) (loop for e in edges unless (or (edge-partial-tree e) (edge-frozen e)) collect e))) (cond ((and start end) (filter-edges (aref chart start end))) (start (loop for e from (1+ start) to *chart-max* nconc (filter-edges (aref chart start e)))) (end (loop for s from 0 to (1- end) nconc (filter-edges (aref chart s end)))) (t (loop for s from 0 to (1- *chart-max*) nconc (loop for e from (1+ s) to *chart-max* nconc (filter-edges (aref chart s e)))))))) (defmethod delete-cm-edge ((chart array) edge) (setf (aref chart (edge-from edge) (edge-to edge)) (delete edge (aref chart (edge-from edge) (edge-to edge)) :test #'eq))) (defmethod add-cm-edge ((chart array) edge) (declare (ignore chart edge)) (error "Inconsistency - attempt during lexical filtering to add an edge to the parse chart")) (defmethod duplicate-cm-edge-p ((chart array) cedge) (declare (ignore chart cedge)) (error "Inconsistency - attempt during lexical filtering to check if an edge can be added to the parse chart")) ;;; Extract properties from a chart mapping rule in a convenient form for processing: ;;; lists of match/output arguments, corresponding paths, argument ordering constraints, ;;; regular expressions, etc. (defmacro poscon-input-arg-p (x) `(eql (char (symbol-name ,x) 0) #\I)) (defmacro poscon-match-arg-p (x) `(member (char (symbol-name ,x) 0) '(#\I #\C))) (defmacro poscon-output-arg-p (x) `(eql (char (symbol-name ,x) 0) #\O)) (defun add-chart-mapping-rule (id unifs paths-regexes kind) (let ((entry (make-cmrule :id id :unifs unifs :kind kind))) (when (get-chart-mapping-rules id kind) (format t "~%WARNING: Chart mapping rule `~A' redefined." id)) (expand-cmrule id entry unifs paths-regexes kind))) (defun expand-cmrule (id rule unifs paths-regexes kind) (process-unif-list id unifs nil rule *description-persistence*) (let ((dag (tdfs-indef (cmrule-full-fs rule)))) ;; match-paths-regexes, output-paths-substs (loop for (p nil . nil) in paths-regexes ; check that regexes are only below match paths unless (or (eql (mismatch *chart-mapping-input-path* p) (length *chart-mapping-input-path*)) (eql (mismatch *chart-mapping-context-path* p) (length *chart-mapping-context-path*))) do (error "Misplaced regular expression in chart mapping rule ~A" id)) (setf (cmrule-match-paths-regexes rule) (reverse paths-regexes)) (setf (cmrule-output-paths-substs rule) (find-cmrule-output-substitutions (existing-dag-at-end-of dag *chart-mapping-output-path*) (reverse *chart-mapping-output-path*) nil)) ;; positional-constraints (let ((pos-val (get-value-at-end-of dag *chart-mapping-position-path*))) (when (stringp pos-val) (setf (cmrule-positional-constraints rule) (extract-cmrule-constraints id pos-val)))) ;; match-args (I/C), match-paths (let* ((match-graph (remove-if #'(lambda (con) (not (and (poscon-match-arg-p (first con)) (poscon-match-arg-p (third con))))) (cmrule-positional-constraints rule))) (match-args-paths (append (extract-cmrule-items-and-paths id dag *chart-mapping-input-path* "I") (extract-cmrule-items-and-paths id dag *chart-mapping-context-path* "C"))) (roots (find-cmrule-root-args match-graph (mapcar #'car match-args-paths)))) (when (cdr roots) (error "Missing positional constraint between rightmost items in chart mapping rule ~A" id)) (multiple-value-bind (match-args-sorted consistentp) (topo-sort-cmrule-args match-graph roots) (unless consistentp (error "Positional constraints between input / context items in chart mapping rule ~A are inconsistent" id)) (setf (cmrule-match-args rule) match-args-sorted) (setf (cmrule-match-paths rule) (loop for a in match-args-sorted collect (cdr (assoc a match-args-paths :test #'eq)))))) ;; reorder each positional constraint given the topo- ordering on match arguments and the ;; processing ordering of matching before output (setf (cmrule-positional-constraints rule) (reorder-cmrule-constraints (cmrule-positional-constraints rule) (cmrule-match-args rule))) ;; output-args (O), output-paths (let* ((output-graph (remove-if #'(lambda (con) (not (and (poscon-output-arg-p (first con)) (poscon-output-arg-p (third con))))) (cmrule-positional-constraints rule))) (output-args-paths (extract-cmrule-items-and-paths id dag *chart-mapping-output-path* "O")) (roots (find-cmrule-root-args output-graph (mapcar #'car output-args-paths)))) (multiple-value-bind (output-args-sorted consistentp) (topo-sort-cmrule-args output-graph roots) (unless consistentp (error "Positional constraints between output items in chart mapping rule ~A are inconsistent" id)) (setq output-args-sorted (reverse output-args-sorted)) ; change to l -> r (setf (cmrule-output-args rule) output-args-sorted) (setf (cmrule-output-paths rule) (loop for a in output-args-sorted collect (cdr (assoc a output-args-paths :test #'eq)))) (setf (cmrule-output-mappings rule) (compute-cmrule-output-mappings output-args-sorted (cmrule-positional-constraints rule) id)))) ;; consistency checking (when (and (cmrule-output-args rule) (null (cmrule-match-args rule))) (error "Chart mapping rule ~A specifies no input or context items - not supported" id)) (loop for ((a1 . nil) . rest) on (cmrule-positional-constraints rule) when (and (poscon-match-arg-p a1) (find a1 rest :key #'car)) do (error "Chart mapping rule ~A contains >1 constraint on input/context argument ~A - not supported" id a1)) ;; if rule with same name and kind already exists then replace it otherwise add to end (let* ((rules (getf *cmrules* kind)) (existing (find id rules :key #'cmrule-id))) (setf (getf *cmrules* kind) (if existing (nsubst rule existing rules :test #'eq) (nconc rules (list rule))))))) (defun find-cmrule-output-substitutions (dag rpath pvs) ;; traverse output items and return an alist of paths and values that contain regex ;; capturing groups ${}, segmenting each value into a list of substrings and 4-tuples ;; (casefn arg path index) representing captures, e.g. ;; "${uc(I1:ORTH.FIRST:1)}-${I1:ORTH.FIRST:2}" -> ;; ((string-upcase I1 (ORTH FIRST) 1) "-" (identity I1 (ORTH FIRST) 2)) (let ((dtype (dag-type dag))) (typecase dtype (string (when (search "${" dtype) ; capturing group somewhere in this string? (let ((segs (loop for seg in (cl-ppcre:split "(\\${[^}]*})" dtype :with-registers-p t) unless (string= seg "") ; split can give an initial empty segment collect (or (cl-ppcre:register-groups-bind (casefn arg path n) ("\\${([lu]c)?\\(?([IC][0-9]+):([^:]+):([0-9]+)\\)?}" seg) (list (cond ((equal casefn "lc") 'string-downcase) ((equal casefn "uc") 'string-upcase) (t 'identity)) (intern arg) (mapcar #'intern (cl-ppcre:split "\\." path)) (parse-integer n))) seg)))) (push (cons (reverse rpath) segs) pvs)))) (t (dolist (arc (dag-arcs dag)) (setq pvs (find-cmrule-output-substitutions (dag-arc-value arc) (cons (dag-arc-attribute arc) rpath) pvs))))) pvs)) (defun extract-cmrule-constraints (id str) ;; (extract-cmrule-constraints "R1" "I1 ((I1 < I2) (I2 < I3) (I3 < I4) (C1 << I1) (O1 @ I1) (^ < C1) (I4 < $)) (loop for con in (cl-ppcre:split "\\s*,\\s*" str) nconc (loop for (arg1 rel arg2) on (cl-ppcre:all-matches-as-strings "[ICO][0-9]+|\\^|\\$|<<|>>|<|>|@|[^ICO\\^$<>@ \\t]+" con) by #'cddr when rel do (unless (and (stringp arg1) (stringp rel) (stringp arg2) (not (string= arg1 arg2)) (cl-ppcre:scan "^[ICO][0-9]+|\\^|\\$$" arg1) (cl-ppcre:scan "^(<<|>>|<|>|@)$" rel) (cl-ppcre:scan "^[ICO][0-9]+|\\^|\\$$" arg2) ;; for ^ and $, allow only the appropriate one of < / >, and only wrt I/C (let ((a1ic-p (cl-ppcre:scan "^[IC]" arg1)) (a2ic-p (cl-ppcre:scan "^[IC]" arg2))) (cond ((string= arg1 "^") (and (string= rel "<") a2ic-p)) ((string= arg1 "$") (and (string= rel ">") a2ic-p)) ((string= arg2 "^") (and (string= rel ">") a1ic-p)) ((string= arg2 "$") (and (string= rel "<") a1ic-p)) (t t))) ;; for O, allow relations @ / < / > between Os, but only @ wrt I/C (let ((a1o-p (cl-ppcre:scan "^O" arg1)) (a2o-p (cl-ppcre:scan "^O" arg2))) (cond ((and a1o-p a2o-p) (member rel '("@" "<" ">") :test #'string=)) ((or a1o-p a2o-p) (string= rel "@")) (t t)))) (error "Ill-formed positional constraint ~A in chart mapping rule ~A" con id)) and collect (list (intern arg1) (intern rel) (intern arg2))))) (defun extract-cmrule-items-and-paths (id dag path prefix) ;; (extract-cmrule-items-and-paths "R1" (build-list-dag (list 'token 'token)) nil "I") ;; -> ((I1 . (FIRST)) (I2 . (REST FIRST))) (labels ((value-error (rpath reason) (error "Value of path < ~{~A ~^: ~}> in chart mapping rule ~A ~A" (reverse rpath) id reason)) (traverse (dag rpath prefix n) (let ((dtype (and (dag-p dag) (dag-type dag)))) (cond ((eq dtype *empty-list-type*) nil) ((and (get-dag-value dag (car *list-head*)) (get-dag-value dag (car *list-tail*))) (cons (cons (intern (format nil "~A~D" prefix n)) (cons (car *list-head*) rpath)) (traverse (get-dag-value dag (car *list-tail*)) (cons (car *list-tail*) rpath) prefix (1+ n)))) ((eq dtype *list-type*) (value-error rpath "is an underspecified list - should be a cons or empty")) (t (value-error rpath "has a non-list value")))))) (loop for (item . rpath) in (traverse (existing-dag-at-end-of dag path) (reverse path) prefix 1) collect (cons item (reverse rpath))))) (flet ((source (e) (if (member (second e) '(> >> @)) (first e) (third e))) ;; edges go from right to left, i.e. the source is greater than the destination; ;; force an order for @ - this is fine since we do not require the graph to be ;; unilateral (any topologically sorted order will do) (dest (e) (if (member (second e) '(> >> @)) (third e) (first e)))) (defun find-cmrule-root-args (graph all-nodes) ;; return nodes that have no incoming edge (loop with roots = (copy-list all-nodes) for e in graph do (setq roots (delete (dest e) roots :test #'eq)) finally (return (sort roots #'string>)))) (defun topo-sort-cmrule-args (graph roots) ;; Kahn's algorithm for topological sorting, with ties ordered lexicographically (loop with l = nil and s = roots while s do (let ((n (pop s))) (push n l) (loop with done = nil for e in graph for m = (dest e) when (eq (source e) n) do (push e done) (unless (loop for e1 in graph ; does m have another incoming edge? thereis (and (not (member e1 done :test #'eq)) (eq (dest e1) m))) (setq s (merge 'list (list m) s #'string>))) finally (setf graph (set-difference graph done :test #'eq)))) finally ;; if any edges remain in graph then there is no valid ordering (return (values (nreverse l) (null graph)))))) (defun reorder-cmrule-constraints (poscons match-args) ;; for convenience, order each constraint such that previously processed args are ;; in arg2 position (loop for (a1 rel a2) in poscons collect (if (or (member a1 '(^ $) :test #'eq) ; (^/$ rel a2)? (and (poscon-match-arg-p a1) (poscon-output-arg-p a2)) ; (I/C rel O)? ;; (I/Cn rel I/Cm) and I/Cm processed after I/Cn? (and (poscon-match-arg-p a1) (poscon-match-arg-p a2) (member a2 (cdr (member a1 match-args))))) (list a2 (ecase rel (< '>) (<< '>>) (> '<) (>> '<<) (@ '@)) ; invert direction a1) (list a1 rel a2)))) (defun compute-cmrule-output-mappings (args poscons id) ;; compute mappings between output args and match args, ;; e.g. O1 (((seq O1 O2) . (I1)) ((O3) . (I2 C1))) ;; - only need to consider positional constraints of form (On @ I/Cm) and (On @/ Om) ;; NB output args are ordered left-to-right (declare (ignore id)) (let ((output-match-parallels ; ((On . (I/Cn ...)) ...) (loop for arg in args collect (cons arg (loop for (o rel m) in poscons when (and (eq o arg) (eq rel '@) (poscon-match-arg-p m)) ; (oarg @ I/Cm) collect m)))) (mappings nil)) (loop for (o . ms) in output-match-parallels for e = (rassoc ms mappings :test #'equal) when ms ; !!! if no ms then perhaps there is an (On @ Om) constraint for this O do (if e (push o (car e)) (push (cons (list o) ms) mappings))) ;; where there is an (On Om) constraint, order outputs and mark with seq (loop for mp in mappings when (and (cdr (car mp)) (loop for (a rel b) in poscons thereis (and (member rel '(< >)) (member a (car mp)) (member b (car mp))))) do (setf (car mp) (cons 'seq (sort (car mp) #'(lambda (a1 a2) (member a2 (member a1 args))))))) ;; !!! _fix_me_ implement (On @ Om) (loop for (o1 rel o2) in poscons when (and (poscon-output-arg-p o1) (eq rel '@) (poscon-output-arg-p o2)) ; (On @ Om) do (error "Outputs ~A and ~A specified to be in parallel - not yet implemented" o1 o2)) mappings)) ;;; Entry point for executing a set of chart mapping rules (token mapping / lexical ;;; filtering / post-generation mapping) (defun apply-cmrules (cmrules chart) (let ((*recording-fail-paths-p* nil) ; quickcheck not used inside here (rule-tail cmrules)) (loop (let ((jump (catch 'jump-rule (loop for rule in rule-tail do (loop with start = 0 while start do (setq start (catch 'restart-rule (try-cmrule rule start nil nil nil (cmrule-match-args rule) (cmrule-match-paths rule) chart)))) finally (return nil))))) (if jump (setq rule-tail (or (member jump cmrules :key #'cmrule-id :test #'eq) (error "Chart mapping attempted to jump to a non-existent or invalid rule `~(~A~)'" jump))) (return nil)))))) ;;; Find possible matches for a single chart mapping rule (defun try-cmrule (rule start edges ematch rmatch args paths chart) ;; recurse across match (I/C) args and paths, attempting a match against each edge ;; that satisfies the current arg's positional constraints (or against all edges ;; in l->r order if no applicable constraints); next, match any regexes in this arg ;; and check dag unifiability, and finally on success accumulate matches between args ;; and edges, and between regex capturing groups and the substrings they matched (if args (loop with arg = (first args) and path = (first paths) for edge in (if (or ematch ; arg isn't the anchor, so it must be constrained by rule & ematch (find arg ; arg is the anchor - but does the rule constrain it (by ^/$)? (cmrule-positional-constraints rule) :key #'car :test #'eq)) (edges-satisfying-cmrule-match arg ematch (cmrule-positional-constraints rule) chart) (prog1 (loop for e in (get-cm-edges chart) ; NB edges must be in l->r order here when (>= (edge-from e) start) collect e) (setq start nil))) ; indicate that start is to be taken from each edge unless (member edge edges :test #'eq) ; can't use same edge >once in a single application do (when (and (numberp *cm-debug*) (>= *cm-debug* 3)) (format t "~&trying ~(~A~) anchored at ~A looking for args ~:A~%" (cmrule-id rule) (or start (edge-from edge)) args)) (multiple-value-bind (matchp regex-matches) (edge-regex-cmrule-match-p arg path edge (cmrule-match-paths-regexes rule)) (when (and matchp (yaduablep (edge-dag edge) (tdfs-at-end-of path (cmrule-full-fs rule)))) (try-cmrule rule (or start (edge-from edge)) (cons edge edges) (cons (cons arg edge) ematch) (nconc regex-matches rmatch) (rest args) (rest paths) chart)))) (apply-cmrule rule start ematch rmatch chart)) nil) (defun edges-satisfying-cmrule-match (arg ematch poscons chart) ;; all edges in chart satisfying positional-constraints on arg, given ematch association ;; between args and edges ;; for ^/$ what's the start/end of the chart? Below we take them to be the first/last ;; edges currently there, but originally there might have been edges further out (let ((con (find arg poscons :key #'car :test #'eq))) (unless con (error "Inconsistency - could not find positional constraint for ~A" arg)) (destructuring-bind (a1 rel a2) ; a2 is ^/$/In/Cn con (declare (ignore a1)) ; = arg (case a2 (^ (loop with min = most-positive-fixnum and edges = nil ; rel can only be > for e in (get-cm-edges chart) for start = (edge-from e) do (cond ((< start min) (setq min start edges (list e))) ((= start min) (push e edges))) finally (return edges))) ($ (loop with max = 0 and edges = nil ; rel can only be < for e in (get-cm-edges chart) for end = (edge-to e) do (cond ((> end max) (setq max end edges (list e))) ((= end max) (push e edges))) finally (return edges))) (t (let ((e2 (cdr (assoc a2 ematch :test #'eq)))) (unless (edge-p e2) (error "Inconsistency - could not find edge corresponding to argument ~A" a2)) (ecase rel (< (get-cm-edges chart :end (edge-from e2))) (<< (loop for e in (get-cm-edges chart) when (<= (edge-to e) (edge-from e2)) collect e)) (@ (get-cm-edges chart :start (edge-from e2) :end (edge-to e2)))))))))) (defun edge-regex-cmrule-match-p (arg path edge match-paths-regexes) ;; match-paths-regexes = alist of paths and corresponding REs in context/inputs ;; e.g. (((+INPUT FIRST +TNT +PRBS FIRST) . "^0?\\.0.*$")) ;; return values: matchp, ((arg path n . capture) ...) (let ((edge-dag (tdfs-indef (edge-dag edge)))) (loop with rmatch = nil for (mpath regex . scanner) in match-paths-regexes when (eql (mismatch path mpath) (length path)) ; are we at mpath? do (let* ((epath (nthcdr (length path) mpath)) (val (existing-dag-at-end-of edge-dag epath)) (dtype (and val (dag-type val)))) (when (and (numberp *cm-debug*) (>= *cm-debug* 2)) (format t "~&matching ~A ~A~%" regex (cond ((stringp dtype) (format nil "\"~A\"" dtype)) (dtype (format nil "~(~A~)" dtype)) (t "")))) ;; regex match succeeds trivially if path isn't in the edge; it always fails ;; if type at end of path isn't a string or an (unspecific) string type (cond ((null dtype)) ((stringp dtype) (multiple-value-bind (matchp captures) (cl-ppcre:scan-to-strings scanner dtype) (if matchp (loop for c across captures and n from 1 do (push (cons (list arg epath n) (or c "")) rmatch)) (return-from edge-regex-cmrule-match-p nil)))) ((not (string-type-p dtype)) (return-from edge-regex-cmrule-match-p nil)))) finally (return (values t rmatch))))) ;;; Apply a single chart mapping rule, given a set of input/context item matches and ;;; a set of regex captures. (defun apply-cmrule (rule start ematch rmatch chart) ;; instantiate rule dag with chart edges, substitute any regex capturing groups, create ;; candidate output edges, check they're not already there in chart, add them and ;; delete input edges, and finally throw control back to top-level driver (let* ((rdag (tdfs-indef (cmrule-full-fs rule))) (ndag (with-unification-context (rdag) (loop for arg in (cmrule-match-args rule) and path in (cmrule-match-paths rule) for edge = (cdr (assoc arg ematch :test #'eq)) do (unless (unify-wffs (unify-existing-dag-at-end-of rdag path) (tdfs-indef (edge-dag edge))) (return nil)) finally (return (copy-dag rdag)))))) (when ndag (when (cmrule-output-paths-substs rule) (setq ndag (apply-cmrule-substitute-captures rmatch ndag (cmrule-output-paths-substs rule)))) (let ((cedges (loop with s = nil and e = nil for arg in (cmrule-output-args rule) for path in (cmrule-output-paths rule) for pdag = (existing-dag-at-end-of ndag path) do (multiple-value-setq (s e) (apply-cmrule-output-span arg ematch (cmrule-output-mappings rule))) unless (and s e) do (error "Inconsistency - could not compute start and/or end of candidate output edge") collect (make-cm-edge chart (cmrule-kind rule) rule pdag s e)))) ;; !!! Descriptions of the chart mapping formalism are vague about the rule invocation ;; fixpoint; here we assume we have reached fixpoint if _any_ candidate output edge ;; duplicates an existing edge in the chart. An alternative would be to add _all_ ;; non-duplicating outputs (and in that case do we delete just the inputs associated ;; with those outputs and not any of the others?) Another way of thinking about this ;; is: can a rule be applied _partially_ when it's almost but not quite at the fixpoint? ;; I don't think that makes sense. (when (loop for ce in cedges thereis (duplicate-cm-edge-p chart ce)) (return-from apply-cmrule nil)) ;; finalise output candidates, update chart (loop for ce in cedges do (setf (edge-id ce) (next-edge)) (setf (edge-dag ce) (make-nondefault-tdfs (copy-dag-completely (tdfs-indef (edge-dag ce)))))) (when (if (numberp *cm-debug*) (> *cm-debug* 0) *cm-debug*) (format t "~&applying ~(~A~) ~:{~A:~A~:^ ~} ~:{~A:~A~:^ ~}~%" (cmrule-id rule) (loop for (a . e) in ematch collect (list a (edge-id e))) ; inputs (loop for a in (cmrule-output-args rule) and ce in cedges ; outputs collect (list a (edge-id ce))))) (loop for (a . e) in ematch when (poscon-input-arg-p a) do (delete-cm-edge chart e)) (loop for e in cedges do (add-cm-edge chart e)) (let* ((jdag (existing-dag-at-end-of rdag *chart-mapping-jump-path*)) (jrule-id (and jdag (stringp (dag-type jdag)) (intern (string-upcase (dag-type jdag)) :lkb)))) (if jrule-id (throw 'jump-rule jrule-id) (throw 'restart-rule (if cedges (loop for ce in cedges minimize (edge-from ce)) start)))))))) (defun apply-cmrule-substitute-captures (rmatch dag output-paths-substs) ;; arg output-paths-substs records output paths and (segmented) string values ;; containing references to regex capturing groups (e.g. ${I1:+FORM:1}) ;; - for each of these references look up the actual capture value in rmatch, ;; incorporate it into the string value, and substitute that into the dag (with-unification-context (dag) (loop for (path . val) in output-paths-substs for nval = (apply #'concatenate 'string (loop for seg in val collect (etypecase seg ; substring or 4-tuple (casefn arg path index) (string seg) (list (let* ((casefn (car seg)) (groupref (cdr seg)) (nseg (cdr (assoc groupref rmatch :test #'equal)))) (unless nseg ;; the capturing group has no value: either there was nothing ;; for the corresponding regex to match in the I/C edge, or ;; more seriously the regex doesn't contain the capture (we ;; could helpfully check for the latter case on load) (setq nseg "")) (funcall casefn nseg)))))) do (retype-dag (unify-existing-dag-at-end-of dag path) nval) finally (return (copy-dag dag))))) (defun apply-cmrule-output-span (arg ematch output-mappings) ;; compute start and end vertices for output arg - use output-mappings (recording the ;; arg's sequence/parallel relationship with other output args, and the input/context ;; args it is in parallel with), looking up I/C args in ematch to get their ;; corresponding edges (let ((map (loop for m in output-mappings when (member arg (car m)) return m))) (when map ; e.g. ((seq O1 O2) . (I1)) (loop with oargs = (car map) for marg in (cdr map) for medge = (cdr (assoc marg ematch :test #'eq)) minimize (edge-from medge) into s maximize (edge-to medge) into e finally (return (if (eq (car oargs) 'seq) (let ((p (position arg (cdr oargs))) (n (length (cdr oargs)))) ;; divide s-e range into n equal segments (values (+ s (* (- e s) (/ p n))) ; use rational numbers since exact (+ s (* (- e s) (/ (1+ p) n))))) (values s e)))))))