;;; -*- Mode: Lisp; Coding: utf-8 -*- ;;; HaG (Hausa Grammar) ;;; Based on Matrix user-fns.lsp ;;; Post-generation remapping of tones (BC) (in-package :lkb) ;;; ;;; identify characters that can form words; all other characters will create ;;; word boundaries and later be suppressed in tokenization. ;;; (defun alphanumeric-or-extended-p (c) (and (graphic-char-p c) (not (member c *punctuation-characters*)))) ;;; ;;; determine surface order of constituents in rule: returns list of paths into ;;; feature structure of rule, i.e. (nil (args first) (args rest first)) for a ;;; binary rule, where the first list element is the path to the mother node of ;;; the rule. ;;; (defun establish-linear-precedence (rule) (let ((daughters (loop for args = (existing-dag-at-end-of rule '(args)) then (existing-dag-at-end-of args *list-tail*) for daughter = (when args (get-value-at-end-of args *list-head*)) for path = (list 'args) then (append path *list-tail*) while (and daughter (not (eq daughter 'no-way-through))) collect (append path *list-head*)))) (if (null daughters) (cerror "Ignore it" "Rule without daughters") (cons nil daughters)))) ;;; ;;; detect rules that have orthographemic variation associated to them; those ;;; who do should only be applied within the morphology system; for the time ;;; being use value of NEEDS-AFFIX feature, though it would be nicer to rely ;;; on a type distinction of lexical rules or re-entrancy of ORTH. ;;; (defun spelling-change-rule-p (rule) (let ((affix (get-dag-value (tdfs-indef (rule-full-fs rule)) 'needs-affix))) (and affix (bool-value-true affix)))) ;;; ;;; create feature structure representation of orthography value for insertion ;;; into the output structure of inflectional rules; somewhat more complicated ;;; than one might expect because of treatment for multi-word elements. ;;; ; (defun make-orth-tdfs (orth) ; (let ((unifs nil) ; (tmp-orth-path *orth-path*)) ; (loop for orth-value in (split-into-words orth) ; do ; (let ((opath (create-path-from-feature-list ; (append tmp-orth-path *list-head*)))) ; (push (make-unification :lhs opath ; :rhs ; (make-u-value ; :type orth-value)) ; unifs) ; (setq tmp-orth-path (append tmp-orth-path *list-tail*)))) ; (let ((indef (process-unifications unifs))) ; (when indef ; (setf indef (create-wffs indef)) ; (make-tdfs :indef indef))))) ;;; ;;; assign priorities to parser tasks and lexical entries ;;; ;;; ERB 2008-03-12 rule-priory has to return a value for every ;;; rule or the mmt system fails. (defun rule-priority (rule) (case (rule-id rule) (subj 1000) (t 0))) (defun gen-rule-priority (rule) (rule-priority rule)) (defun lex-priority (mrec) (declare (ignore mrec)) 800) (defun gen-lex-priority (fs) (declare (ignore fs)) 800) ;;; ;;; determine path and file names for lexicon and leaf type cache files. ;;; (defun set-temporary-lexicon-filenames nil (let* ((version (or (find-symbol "*GRAMMAR-VERSION*" :common-lisp-user) (and (find-package :lkb) (find-symbol "*GRAMMAR-VERSION*" :lkb)))) (prefix (if (and version (boundp version)) (remove-if-not #'alphanumericp (symbol-value version)) "lexicon"))) (setf *psorts-temp-file* (make-pathname :name prefix :directory (pathname-directory (lkb-tmp-dir)))) (setf *psorts-temp-index-file* (make-pathname :name (concatenate 'string prefix ".idx") :directory (pathname-directory (lkb-tmp-dir)))) (setf *leaf-temp-file* (make-pathname :name (concatenate 'string prefix ".lfs") :directory (pathname-directory (lkb-tmp-dir)))))) (defun bool-value-true (fs) (and fs (let ((fs-type (type-of-fs fs))) (eql fs-type '+)))) (defun bool-value-false (fs) (and fs (let ((fs-type (type-of-fs fs))) (eql fs-type '-)))) ;;; Version from ERG ;; (defun gen-extract-surface (edge &optional (initialp t) &key cliticp stream) ;; (if stream ;; (let ((daughters (edge-children edge))) ;; (if daughters ;; (loop ;; for daughter in daughters ;; for foo = initialp then nil ;; do ;; (setf cliticp ;; (gen-extract-surface ;; daughter foo :cliticp cliticp :stream stream)) ;; #+:logon finally ;; #+:logon ;; (setf (edge-lnk edge) ;; (mrs::combine-lnks ;; (edge-lnk (first daughters)) ;; (edge-lnk (first (last daughters)))))) ;; (let* ((entry (get-lex-entry-from-id (first (edge-lex-ids edge)))) ;; (orth (format nil "~{~a~^ ~}" (lex-entry-orth entry))) ;; ;; ;; ;; need to fix-up irregular cases like `Englishmen' manually :-{ ;; ;; ;; (orth (if (ppcre::scan "man$" orth) ;; (subseq orth 0 (- (length orth) 3)) ;; orth)) ;; (tdfs (and entry (lex-entry-full-fs entry))) ;; (type (and tdfs (type-of-fs (tdfs-indef tdfs)))) ;; (string (string-downcase (copy-seq (first (edge-leaves edge))))) ;; ;; ;; ;; _fix_me_ ;; ;; maybe we could be more courageous and just search for .orth. ;; ;; as a sub-sequence of .string., starting at position .prefix. ;; ;; (22-dec-06; oe) ;; (prefix (loop ;; for c across string ;; while (member c '(#\( #\" #\') :test #'char=) ;; count 1)) ;; (suffix (min (length string) (+ prefix (length orth)))) ;; (suffix (when (string-equal ;; orth string :start2 prefix :end2 suffix) ;; suffix)) ;; (rawp (and suffix ;; (loop for c across orth thereis (upper-case-p c)))) ;; (capitalizep ;; (ignore-errors ;; (loop ;; for match in '(basic_n_proper_lexent ;; n_-_c-month_le ;; n_-_c-dow_le ;; n_-_pr-i_le) ;; thereis (or (eq type match) ;; (subtype-p type match))))) ;; (cliticp (or cliticp ;; (and (> (length string) 0) ;; (char= (char string 0) #\'))))) ;; (if rawp ;; (setf string ;; (concatenate 'string ;; (subseq string 0 prefix) orth (subseq string suffix))) ;; (when capitalizep ;; (loop ;; with spacep = t ;; for i from 0 to (- (length string) 1) ;; for c = (schar string i) ;; when (char= c #\Space) do (setf spacep t) ;; else when (char= c #\_) ;; do ;; (setf spacep t) ;; (setf (schar string i) #\Space) ;; else do ;; (when (and spacep (alphanumericp c)) ;; (setf (schar string i) (char-upcase c))) ;; (setf spacep nil)))) ;; (when (and (> (length string) 1) ;; (char= (char string 0) #\_) ;; (upper-case-p (char string 1))) ;; (setf string (subseq string 1))) ;; (when (and initialp (alphanumericp (schar string 0))) ;; (setf (schar string 0) (char-upcase (schar string 0)))) ;; (unless (or initialp cliticp) ;; (format stream " ")) ;; (let (#+:logon ;; (start (file-position stream))) ;; (loop ;; with hyphenp ;; for c across string ;; unless (and hyphenp (char= c #\space)) ;; do (write-char c stream) ;; when (char= c #\-) do (setf hyphenp t) ;; else do (setf hyphenp nil)) ;; #+:logon ;; (setf (edge-lnk edge) ;; (list :characters start (file-position stream)))) ;; ;; ;; ;; finally, inform the caller as to whether we output something that ;; ;; inhibits intervening space (e.g. `mid-July'). ;; ;; ;; (unless (string= orth "") ;; (member (schar orth (- (length orth) 1)) '(#\-) :test #'char=))))) ;; (let ((stream (make-string-output-stream))) ;; (gen-extract-surface edge initialp :stream stream) ;; (get-output-stream-string stream)))) ;;; HaG version ;; (defun gen-extract-surface (edge &optional (initialp t) &key cliticp stream) ;; (if stream ;; (let ((daughters (edge-children edge))) ;; ;(pprint (unify-get-type (tdfs-indef (edge-dag edge)))) ;; (if daughters ;; (loop ;; for daughter in daughters ;; for foo = initialp then nil ;; do ;; (and (not (get-dag-value ;; (get-dag-value ;; (get-dag-value ;; (get-dag-value ;; (tdfs-indef (edge-dag edge)) ;; 'SUPRA) ;; 'LEN) ;; 'LIST) ;; 'FIRST) ;; ) ;; (progn ;; (setf supra ;; (unify-dags ;; (tdfs-indef (ltype-tdfs (get-type-entry 'supra_reent))) ;; (get-dag-value (tdfs-indef (edge-dag daughter)) 'SUPRA) ;; ) ) ;; (setf tones (get-dag-value ;; (get-dag-value ;; supra ;; 'TONE) ;; 'LIST) ;; ) ;; (setf lengths (get-dag-value ;; (get-dag-value supra ;; 'LEN) ;; 'LIST) ;; ) ;; (setf rulename (unify-get-type (tdfs-indef (edge-dag daughter)))) ;; (setf head (unify-get-type (get-dag-value ;; (get-dag-value ;; (get-dag-value ;; (get-dag-value (tdfs-indef (edge-dag edge)) ;; 'SYNSEM) ;; 'LOCAL) ;; 'CAT) ;; 'HEAD) ;; ) ;; ) ;; (setf morph (get-dag-value (tdfs-indef (edge-dag edge)) ;; 'MORPH) ;; ) ;; ) ;; ) ;; (setf cliticp ;; (gen-extract-surface ;; daughter foo :cliticp cliticp :stream stream)) ;; #+:logon finally ;; #+:logon ;; (setf (edge-lnk edge) ;; (mrs::combine-lnks ;; (edge-lnk (first daughters)) ;; (edge-lnk (first (last daughters)))))) ;; (let* ((entry (get-lex-entry-from-id (first (edge-lex-ids edge)))) ;; (orth (format nil "~{~a~^ ~}" (lex-entry-orth entry))) ;; ;; ;; ;; ;; ;; (orth (if (ppcre::scan "man$" orth) ;; ;; (subseq orth 0 (- (length orth) 3)) ;; ;; orth)) ;; (tdfs (and entry (lex-entry-full-fs entry))) ;; (type (and tdfs (type-of-fs (tdfs-indef tdfs)))) ;; (string (string-downcase (copy-seq (first (edge-leaves edge))))) ;; ;;; Map Tonal annotation to diacritics ;; (string (loop ;; do ;; (if (not (eql head 'NOUN)) ;; (setf string (praf string rulename)) ;; ) ;; ; (and (ppcre::scan "^__redup__$" string) ;; ; (setf string ;; ; (get-dag-value ;; ; (get-dag-value ;; ; (get-dag-value ;; ; morph ;; ; '--REDUP) ;; ; '--STEM) ;; ; 'FIRST) ;; ; ) ;; ; ) ;; #+ppcre ;; (progn ;; ;(pprint rulename) ;; ;(pprint string) ;; (setf string (tonelen string lengths tones)) ;; (setf tones nil) ;; (setf lengths nil) ;; (if (and (boundp '*hag-demo*) *hag-demo*) ;; (return (composite-tone string)) ;; (return string) ;; ) ;; ) ;; )) ;; ;; ;; ;; _fix_me_ ;; ;; maybe we could be more courageous and just search for .orth. ;; ;; as a sub-sequence of .string., starting at position .prefix. ;; ;; (22-dec-06; oe) ;; (prefix (loop ;; for c across string ;; while (member c '(#\( #\" #\') :test #'char=) ;; count 1)) ;; (suffix (min (length string) (+ prefix (length orth)))) ;; (suffix (when (string-equal ;; orth string :start2 prefix :end2 suffix) ;; suffix)) ;; (rawp (and suffix ;; (loop for c across orth thereis (upper-case-p c)))) ;; (capitalizep ;; (ignore-errors ;; (loop ;; for match in '(proper-noun-lex ;; ) ;; thereis (or (eq type match) ;; (subtype-p type match))))) ;; ;;; English-specific: This interferes badly with "'yā" ;; ;;; (cliticp (or cliticp ;; ;;; (and (> (length string) 0) ;; ;;; (char= (char string 0) #\')))) ;; ) ;; (if rawp ;; (setf string ;; (concatenate 'string ;; (subseq string 0 prefix) orth (subseq string suffix))) ;; (when capitalizep ;; (loop ;; with spacep = t ;; for i from 0 to (- (length string) 1) ;; for c = (schar string i) ;; when (char= c #\Space) do (setf spacep t) ;; else when (char= c #\_) ;; do ;; (setf spacep t) ;; (setf (schar string i) #\Space) ;; else do ;; (when (and spacep (alphanumericp c)) ;; (setf (schar string i) (char-upcase c))) ;; (setf spacep nil)))) ;; (when (and (> (length string) 1) ;; (char= (char string 0) #\_) ;; (upper-case-p (char string 1))) ;; (setf string (subseq string 1))) ;; (when (and initialp (alphanumericp (schar string 0))) ;; (setf (schar string 0) (char-upcase (schar string 0)))) ;; (unless (or initialp cliticp) ;; (format stream " ")) ;; (let (#+:logon ;; (start (file-position stream))) ;; (loop ;; with hyphenp ;; for c across string ;; unless (and hyphenp (char= c #\space)) ;; do (write-char c stream) ;; when (char= c #\-) do (setf hyphenp t) ;; else do (setf hyphenp nil)) ;; #+:logon ;; (setf (edge-lnk edge) ;; (list :characters start (file-position stream)))) ;; ;; ;; ;; finally, inform the caller as to whether we output something that ;; ;; inhibits intervening space (e.g. `mid-July'). ;; ;; ;; (unless (string= orth "") ;; (member (schar orth (- (length orth) 1)) '(#\-) :test #'char=))))) ;; (let ((stream (make-string-output-stream))) ;; (gen-extract-surface edge initialp :stream stream) ;; (get-output-stream-string stream)))) ;; (eval-when #+:ansi-eval-when (:load-toplevel :compile-toplevel :execute) ;; #-:ansi-eval-when (load eval compile) ;; (setf *gen-extract-surface-hook* 'gen-extract-surface)) ;; (defun praf (string rulename) ;; (if (or (eql rulename 'DO-PRON-IRULE) (eql rulename 'HUMAN-DO-PRON-IRULE)) ;; (progn ;; ;;; Exclude datives and free possessives ;; (if (not (ppcre::scan "^(mi(ni|ki)|[mnt]a(ka|ta|na)|mu(ku|su)|[nt]a(mu|ku|su|ki))$" string)) ;; (setf string (ppcre::regex-replace "^(.*[aeiou])(ni|ka|ki|shi|ta|mu|ku|su)$" string "\\1 \\2")) ;; ) ;; ) ;; ) ;; string ;; ) ;; (defun tonelen (string lengths tones) ;; (if (setf len (get-dag-value lengths 'FIRST)) ;; (progn ;; (or (setf tone (get-dag-value tones 'FIRST)) ;; (setf tone (get-dag-value (possibly-new-constraint-of (unify-get-type (unify-dags (create-typed-dag 'CONS) tones ))) 'FIRST ) )) ;; ;(pprint (type-of-fs tone)) ;; ;(pprint (type-of-fs len)) ;; (setf string (map-tone-tfs string (type-of-fs tone) (type-of-fs len))) ;; (if (get-dag-value lengths 'REST) ;; (if (get-dag-value tones 'REST) ;; (setf string (tonelen string (get-dag-value lengths 'REST) (get-dag-value tones 'REST))) ;; (setf string (tonelen string (get-dag-value lengths 'REST) ;; (get-dag-value ;; (possibly-new-constraint-of ;; (unify-get-type (unify-dags (create-typed-dag 'CONS) tones ))) ;; 'REST ) ;; ) ;; ) ;; ) ;; ) ;; ) ;; ) ;; string ;; ) ;; (defun map-tone-tfs (string tone len) ;; ;;; FIXME: tone pretty printing currently presupposes chart packing ;; ;;; Does not really hurt, mais quand-même... ;; (setf vow (ppcre::regex-replace "^.*?(ai|au|[aeiou])[^aeiou]*$" string "\\1")) ;; (if (eql len lkb::'LONG) ;; (cond ;; ((or (eql tone lkb::'HIGH) (eql tone lkb::'PHIGH)) ;; (case (intern vow) ;; (|a| (setf string (ppcre::regex-replace "^(.*)([a])([^aeiou]*)$" string "\\1áá\\3"))) ;; (|e| (setf string (ppcre::regex-replace "^(.*)([e])([^aeiou]*)$" string "\\1éé\\3"))) ;; (|i| (setf string (ppcre::regex-replace "^(.*)([i])([^aeiou]*)$" string "\\1íí\\3"))) ;; (|o| (setf string (ppcre::regex-replace "^(.*)([o])([^aeiou]*)$" string "\\1óó\\3"))) ;; (|u| (setf string (ppcre::regex-replace "^(.*)([u])([^aeiou]*)$" string "\\1úú\\3"))) ;; ) ;; ) ;; ((or (eql tone lkb::'LOW) (eql tone lkb::'PLOW)) ;; (case (intern vow) ;; (|a| (setf string (ppcre::regex-replace "^(.*)([a])([^aeiou]*)$" string "\\1àà\\3"))) ;; (|e| (setf string (ppcre::regex-replace "^(.*)([e])([^aeiou]*)$" string "\\1èè\\3"))) ;; (|i| (setf string (ppcre::regex-replace "^(.*)([i])([^aeiou]*)$" string "\\1ìì\\3"))) ;; (|o| (setf string (ppcre::regex-replace "^(.*)([o])([^aeiou]*)$" string "\\1òò\\3"))) ;; (|u| (setf string (ppcre::regex-replace "^(.*)([u])([^aeiou]*)$" string "\\1ùù\\3"))) ;; ) ;; ) ;; ((or (eql tone lkb::'FALL) (eql tone lkb::'PFALL)) ;; (case (intern vow) ;; (|a| (setf string (ppcre::regex-replace "^(.*)([a])([^aeiou]*)$" string "\\1áà\\3"))) ;; (|e| (setf string (ppcre::regex-replace "^(.*)([e])([^aeiou]*)$" string "\\1éè\\3"))) ;; (|i| (setf string (ppcre::regex-replace "^(.*)([i])([^aeiou]*)$" string "\\1íì\\3"))) ;; (|o| (setf string (ppcre::regex-replace "^(.*)([o])([^aeiou]*)$" string "\\1óò\\3"))) ;; (|u| (setf string (ppcre::regex-replace "^(.*)([u])([^aeiou]*)$" string "\\1úù\\3"))) ;; ) ;; ) ;; ) ;; (cond ;; ((eql tone lkb::'HIGH) ;; (case (intern vow) ;; (|ai| (setf string (ppcre::regex-replace "^(.*)(ai)([^aeiou]*)$" string "\\1áí\\3"))) ;; (|au| (setf string (ppcre::regex-replace "^(.*)(au)([^aeiou]*)$" string "\\1áú\\3"))) ;; (|a| (setf string (ppcre::regex-replace "^(.*)([a])([^aeiou]*)$" string "\\1á\\3"))) ;; (|e| (setf string (ppcre::regex-replace "^(.*)([e])([^aeiou]*)$" string "\\1é\\3"))) ;; (|i| (setf string (ppcre::regex-replace "^(.*)([i])([^aeiou]*)$" string "\\1í\\3"))) ;; (|o| (setf string (ppcre::regex-replace "^(.*)([o])([^aeiou]*)$" string "\\1ó\\3"))) ;; (|u| (setf string (ppcre::regex-replace "^(.*)([u])([^aeiou]*)$" string "\\1ú\\3"))) ;; ) ;; ) ;; ((eql tone lkb::'LOW) ;; (case (intern vow) ;; (|ai| (setf string (ppcre::regex-replace "^(.*)(ai)([^aeiou]*)$" string "\\1àì\\3"))) ;; (|au| (setf string (ppcre::regex-replace "^(.*)(au)([^aeiou]*)$" string "\\1àù\\3"))) ;; (|a| (setf string (ppcre::regex-replace "^(.*)([a])([^aeiou]*)$" string "\\1à\\3"))) ;; (|e| (setf string (ppcre::regex-replace "^(.*)([e])([^aeiou]*)$" string "\\1è\\3"))) ;; (|i| (setf string (ppcre::regex-replace "^(.*)([i])([^aeiou]*)$" string "\\1ì\\3"))) ;; (|o| (setf string (ppcre::regex-replace "^(.*)([o])([^aeiou]*)$" string "\\1ò\\3"))) ;; (|u| (setf string (ppcre::regex-replace "^(.*)([u])([^aeiou]*)$" string "\\1ù\\3"))) ;; ) ;; ) ;; ((eql tone lkb::'FALL) ;; (case (intern vow) ;; (|ai| (setf string (ppcre::regex-replace "^(.*)(ai)([^aeiou]*)$" string "\\1áì\\3"))) ;; (|au| (setf string (ppcre::regex-replace "^(.*)(au)([^aeiou]*)$" string "\\1áù\\3"))) ;; (|a| (setf string (ppcre::regex-replace "^(.*)([a])([^aeiou]*)$" string "\\1â\\3"))) ;; (|e| (setf string (ppcre::regex-replace "^(.*)([e])([^aeiou]*)$" string "\\1ê\\3"))) ;; (|i| (setf string (ppcre::regex-replace "^(.*)([i])([^aeiou]*)$" string "\\1î\\3"))) ;; (|o| (setf string (ppcre::regex-replace "^(.*)([o])([^aeiou]*)$" string "\\1ô\\3"))) ;; (|u| (setf string (ppcre::regex-replace "^(.*)([u])([^aeiou]*)$" string "\\1û\\3"))) ;; ) ;; ) ;; ) ;; ) ;; string ;; ) ;; (defun map-tone (string) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(a)(.*)_h:$" string "\\1áá\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(e)(.*)_h:$" string "\\1éé\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(i)(.*)_h:$" string "\\1íí\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(o)(.*)_h:$" string "\\1óó\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(u)(.*)_h:$" string "\\1úú\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(a)(.*)_l:$" string "\\1àà\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(e)(.*)_l:$" string "\\1èè\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(i)(.*)_l:$" string "\\1ìì\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(o)(.*)_l:$" string "\\1òò\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(u)(.*)_l:$" string "\\1ùù\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(au)(.*)_h$" string "\\1áú\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(ai)(.*)_h$" string "\\1áí\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(a)(.*)_h$" string "\\1á\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(e)(.*)_h$" string "\\1é\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(i)(.*)_h$" string "\\1í\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(o)(.*)_h$" string "\\1ó\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(u)(.*)_h$" string "\\1ú\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(a)(.*)_l$" string "\\1à\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(e)(.*)_l$" string "\\1è\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(i)(.*)_l$" string "\\1ì\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(o)(.*)_l$" string "\\1ò\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(u)(.*)_l$" string "\\1ù\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(a)(.*)_hl:$" string "\\1áà\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(e)(.*)_hl:$" string "\\1éè\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(i)(.*)_hl:$" string "\\1íì\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(o)(.*)_hl:$" string "\\1óò\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(u)(.*)_hl:$" string "\\1úù\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(au)(.*)_hl$" string "\\1áù\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(ai)(.*)_hl$" string "\\1áì\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(a)(.*)_hl$" string "\\1â\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(e)(.*)_hl$" string "\\1ê\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(i)(.*)_hl$" string "\\1î\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(o)(.*)_hl$" string "\\1ô\\3")) ;; (setf string (ppcre::regex-replace "^([^aeiou]*)(u)(.*)_hl$" string "\\1û\\3")) ;; ) ;; (defun tidy-tone (string) ;; (setf string (ppcre::regex-replace-all "(áà)" string "âa")) ;; (setf string (ppcre::regex-replace-all "(áù)" string "âu")) ;; (setf string (ppcre::regex-replace-all "(áì)" string "âi")) ;; (setf string (ppcre::regex-replace-all "(éè)" string "êe")) ;; (setf string (ppcre::regex-replace-all "(íì)" string "îi")) ;; (setf string (ppcre::regex-replace-all "(óò)" string "ôo")) ;; (setf string (ppcre::regex-replace-all "(úù)" string "ûu")) ;; (setf string (ppcre::regex-replace-all "(áá)" string "aa")) ;; (setf string (ppcre::regex-replace-all "(éé)" string "ee")) ;; (setf string (ppcre::regex-replace-all "(íí)" string "ii")) ;; (setf string (ppcre::regex-replace-all "(óó)" string "oo")) ;; (setf string (ppcre::regex-replace-all "(úú)" string "uu")) ;; (setf string (ppcre::regex-replace-all "(àà)" string "àa")) ;; (setf string (ppcre::regex-replace-all "(àì)" string "ài")) ;; (setf string (ppcre::regex-replace-all "(àù)" string "àu")) ;; (setf string (ppcre::regex-replace-all "(èè)" string "èe")) ;; (setf string (ppcre::regex-replace-all "(ìì)" string "ìi")) ;; (setf string (ppcre::regex-replace-all "(òò)" string "òo")) ;; (setf string (ppcre::regex-replace-all "(ùù)" string "ùu")) ;; (setf string (ppcre::regex-replace-all "(á)" string "a")) ;; (setf string (ppcre::regex-replace-all "(é)" string "e")) ;; (setf string (ppcre::regex-replace-all "(í)" string "i")) ;; (setf string (ppcre::regex-replace-all "(ó)" string "o")) ;; (setf string (ppcre::regex-replace-all "(ú)" string "u")) ;; ) ;; (defun composite-tone (string) ;; (setf string (ppcre::regex-replace-all "(áà)" string "ā̂")) ;; (setf string (ppcre::regex-replace-all "(áù)" string "âu")) ;; (setf string (ppcre::regex-replace-all "(áì)" string "âi")) ;; (setf string (ppcre::regex-replace-all "(éè)" string "ē̂")) ;; (setf string (ppcre::regex-replace-all "(íì)" string "ī̂")) ;; (setf string (ppcre::regex-replace-all "(óò)" string "ō̂")) ;; (setf string (ppcre::regex-replace-all "(úù)" string "ū̂")) ;; (setf string (ppcre::regex-replace-all "(áá)" string "ā")) ;; (setf string (ppcre::regex-replace-all "(éé)" string "ē")) ;; (setf string (ppcre::regex-replace-all "(íí)" string "ī")) ;; (setf string (ppcre::regex-replace-all "(óó)" string "ō")) ;; (setf string (ppcre::regex-replace-all "(úú)" string "ū")) ;; (setf string (ppcre::regex-replace-all "(àà)" string "ā̀")) ;; (setf string (ppcre::regex-replace-all "(àì)" string "ài")) ;; (setf string (ppcre::regex-replace-all "(àù)" string "àu")) ;; (setf string (ppcre::regex-replace-all "(èè)" string "ḕ")) ;; (setf string (ppcre::regex-replace-all "(ìì)" string "ī̀")) ;; (setf string (ppcre::regex-replace-all "(òò)" string "ṑ")) ;; (setf string (ppcre::regex-replace-all "(ùù)" string "ū̀")) ;; (setf string (ppcre::regex-replace-all "(à)" string "à")) ;; (setf string (ppcre::regex-replace-all "(è)" string "è")) ;; (setf string (ppcre::regex-replace-all "(ì)" string "ì")) ;; (setf string (ppcre::regex-replace-all "(ò)" string "ò")) ;; (setf string (ppcre::regex-replace-all "(ù)" string "ù")) ;; (setf string (ppcre::regex-replace-all "(â)" string "â")) ;; (setf string (ppcre::regex-replace-all "(ê)" string "ê")) ;; (setf string (ppcre::regex-replace-all "(î)" string "î")) ;; (setf string (ppcre::regex-replace-all "(ô)" string "ô")) ;; (setf string (ppcre::regex-replace-all "(û)" string "û")) ;; (setf string (ppcre::regex-replace-all "(á)" string "a")) ;; (setf string (ppcre::regex-replace-all "(é)" string "e")) ;; (setf string (ppcre::regex-replace-all "(í)" string "i")) ;; (setf string (ppcre::regex-replace-all "(ó)" string "o")) ;; (setf string (ppcre::regex-replace-all "(ú)" string "u")) ;; ) (defun instantiate-generic-lexical-entry (gle surface pred &optional carg) (let ((tdfs (copy-tdfs-elements (lex-entry-full-fs (if (gle-p gle) (gle-le gle) gle)))) (spath (if carg '(SYNSEM LKEYS KEYREL CARG) '(SYNSEM LKEYS KEYREL PRED)))) (loop with dag = (tdfs-indef tdfs) for path in (list '(ORTH FIRST) spath) for foo = (existing-dag-at-end-of dag path) when foo do (setf (dag-type foo) *string-type*)) (let* ((surface (or #+:logon (case (gle-id gle) (guess_n_gle (format nil "/~a/" surface)) (decade_gle (format nil "~as" surface))) surface)) (unifications (list (make-unification :lhs (create-path-from-feature-list (append *orth-path* *list-head*)) :rhs (make-u-value :type surface)) (make-unification :lhs (create-path-from-feature-list (append *orth-path* *list-tail*)) :rhs (make-u-value :type *empty-list-type*)) (make-unification :lhs (create-path-from-feature-list spath) :rhs (make-u-value :type (or carg pred))))) (indef (process-unifications unifications)) (indef (and indef (create-wffs indef))) (overlay (and indef (make-tdfs :indef indef)))) (values (when overlay (with-unification-context (ignore) (let ((foo (yadu tdfs overlay))) (when foo (copy-tdfs-elements foo))))) surface)))) (defun extract-strings-from-parse-record nil (loop for edge in *parse-record* collect (extract-string-from-p-edge edge))) (defun extract-string-from-p-edge (edge) (or (edge-string edge) (let ((string (cond ((fboundp *gen-extract-surface-hook*) (funcall *gen-extract-surface-hook* edge)) (t (g-edge-leaves edge))))) (setf (edge-string edge) string)))) (defun find-infl-pos (unifs orths sense-id) (declare (ignore unifs orths sense-id)) nil) ;(in-package :lkb) ;;;(defun generate-from-mrs-internal (input-sem &key nanalyses) ;;; ;;; ;; (ERB 2003-10-08) For aligned generation -- if we're in first only ;;; ;; mode, break up the tree in *parse-record* for reference by ;;; ;; ag-gen-lex-priority and ag-gen-rule-priority. Store in *found-configs*. ;;; #+:arboretum ;;; (populate-found-configs) ;;; ;;; ;; ;;; ;; inside the generator, apply the VPM in reverse mode to map to grammar- ;;; ;; internal variable types, properties, and values. the internal MRS, beyond ;;; ;; doubt, is what we should use for lexical instantiations and Skolemization. ;;; ;; regarding trigger rules and the post-generation MRS compatibility test, on ;;; ;; the other hand, we have a choice. in principle, these should operate in ;;; ;; the external (SEM-I) MRS namespace (the real MRS layer); however, trigger ;;; ;; rules are created from FSs (using grammar-internal nomenclature) and, more ;;; ;; importantly, the post-generation test uses the grammar-internal hierarchy ;;; ;; to test for predicate, variable type, and property subsumption. hence, it ;;; ;; is currently convenient to apply these MRS-level operations with grammar- ;;; ;; internal names, i.e. at an ill-defined intermediate layer. ;;; ;; ;;; ;; _fix_me_ ;;; ;; the proper solution to all this mysery will be to create separate SEM-I ;;; ;; hierarchies, i.e. enrich the SEM-I files with whatever underspecifications ;;; ;; the grammar wants to provide at the MRS level, and then import that file ;;; ;; into its own, grammar-specific namespace. one day soon, i hope, i might ;;; ;; actually get to implementing this design ... (22-jan-09; oe) ;;; ;; ;;; (setf input-sem (mt:map-mrs input-sem :semi :backward)) ;;; ;;; ;; ;;; ;; per request by dan, manufacture a top handle, if missing and enable the ;;; ;; generator `input compliance' mechanism. (8-mar-10; oe) ;;; ;; ;;; (when (and mrs::*rel-handel-path* (null (mrs:psoa-top-h input-sem))) ;;; (setf (mrs:psoa-top-h input-sem) ;;; (mrs::make-var :id (funcall mrs::*variable-generator*) :type "h"))) ;;; (let ((fixup (mt::transfer-mrs input-sem :filter nil :task :fixup))) ;;; (when (rest fixup) ;;; (error 'generation/fixup-ambiguity :mrss fixup)) ;;; (when fixup ;;; (setf input-sem (mt::edge-mrs (first fixup))))) ;;; ;;; (setf *generator-internal-mrs* input-sem) ;;; (with-package (:lkb) ;;; (clear-gen-chart) ;;; (setf *cached-category-abbs* nil) ;;; ;;; ;; ;;; ;; no need to even try generating when there is no relation index ;;; ;; ;;; (unless (and (hash-table-p mrs::*relation-index*) ;;; (> (hash-table-count mrs::*relation-index*) 0)) ;;; (error 'generator-uninitialized)) ;;; ;;; (let ((*gen-packing-p* (if *gen-first-only-p* nil *gen-packing-p*)) ;;; lex-results lex-items grules lex-orderings ;;; tgc tcpu conses symbols others) ;;; (time-a-funcall ;;; #'(lambda () ;;; (multiple-value-setq (lex-results grules lex-orderings) ;;; (mrs::collect-lex-entries-from-mrs input-sem)) ;;; (multiple-value-setq (lex-items grules lex-orderings) ;;; (filter-generator-lexical-items ;;; (apply #'append lex-results) grules lex-orderings))) ;;; #'(lambda (tgcu tgcs tu ts tr scons ssym sother &rest ignore) ;;; (declare (ignore tr ignore)) ;;; (setf tgc (+ tgcu tgcs) tcpu (+ tu ts) ;;; conses (* scons 8) symbols (* ssym 24) others sother))) ;;; (setq %generator-statistics% ;;; (pairlis '(:ltgc :ltcpu :lconses :lsymbols :lothers) ;;; (list tgc tcpu conses symbols others))) ;;; ;;; (when *debugging* (print-generator-lookup-summary lex-items grules)) ;;; ;;; (let ((rel-indexes nil) (rel-indexes-n -1) (input-rels 0)) ;;; (dolist (lex lex-items) ;;; (loop ;;; with eps = (mrs::found-lex-main-rels lex) ;;; initially (setf (mrs::found-lex-main-rels lex) 0) ;;; for ep in eps ;;; for index = (ash 1 (or (getf rel-indexes ep) ;;; (setf (getf rel-indexes ep) ;;; (incf rel-indexes-n)))) ;;; do ;;; (setf (mrs::found-lex-main-rels lex) ;;; (logior (mrs::found-lex-main-rels lex) index)))) ;;; (dolist (grule grules) ;;; (when (mrs::found-rule-p grule) ;;; (loop ;;; with eps = (mrs::found-rule-main-rels grule) ;;; initially (setf (mrs::found-rule-main-rels grule) 0) ;;; for ep in eps ;;; for index = (ash 1 (or (getf rel-indexes ep) ;;; (setf (getf rel-indexes ep) ;;; (incf rel-indexes-n)))) ;;; do ;;; (setf (mrs::found-rule-main-rels grule) ;;; (logior (mrs::found-rule-main-rels grule) index))))) ;;; (setf %generator-unknown-eps% nil) ;;; (loop ;;; for ep in (mrs::psoa-liszt input-sem) ;;; do ;;; (if (getf rel-indexes ep) ;;; (setq input-rels ;;; (logior input-rels (ash 1 (getf rel-indexes ep)))) ;;; (push ep %generator-unknown-eps%))) ;;;; (when %generator-unknown-eps% ;;;; (error 'unknown-predicates :eps %generator-unknown-eps%) ;;;; ) ;;; ;;; #+:debug ;;; (setf %rel-indexes rel-indexes %input-rels input-rels) ;;; ;;; (chart-generate ;;; input-sem input-rels lex-items grules lex-orderings rel-indexes ;;; *gen-first-only-p* :nanalyses nanalyses)))))