;;; Copyright (c) 1991-2022 John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen ;;; see LICENSE for conditions (in-package :lkb) ;;; April 1997 - new file split off from old rules.lsp, in order to ;;; keep input functions separate ;;; ;;; July 2022 - added chart mapping rules (defvar *lexical-rule-file-list* nil) (defvar *morphology-rule-file-list* nil) (defvar *grammar-rule-file-list* nil) (defvar *chart-mapping-rule-file-list* nil) (defun clear-rule-load-files nil (setf *lexical-rule-file-list* nil) (setf *morphology-rule-file-list* nil) (setf *grammar-rule-file-list* nil) (setf *chart-mapping-rule-file-list* nil)) (defun read-grammar-file nil (let ((ovwr (and (not (zerop (hash-table-count *rules*))) (lkb-y-or-n-p "Overwrite existing grammar?")))) (let ((file-name (ask-user-for-existing-pathname "Grammar file?"))) (when file-name (if (eql *lkb-system-version* :page) (read-tdl-grammar-file-aux file-name ovwr) (read-grammar-file-aux file-name ovwr)))))) (defun read-grammar-file-aux (file-name &optional ovwr) (if ovwr (setf *grammar-rule-file-list* (list file-name)) (pushnew file-name *grammar-rule-file-list* :test #'equal)) (when ovwr (setf *ordered-rule-list* nil) (clear-grammar)) (read-lex-or-grammar-rule-file file-name nil) (format t "~%Grammar rule file read")) (defun reload-grammar-rules nil (setf *syntax-error* nil) (when (check-load-names *grammar-rule-file-list* "grammar rules") (let ((ovwr t)) (loop for grule-file in (reverse *grammar-rule-file-list*) do (if (eql *lkb-system-version* :page) (read-tdl-grammar-file-aux grule-file ovwr) (read-grammar-file-aux grule-file ovwr)) (setf ovwr nil))) (format t "~%Reload complete"))) (defun read-lex-rule-file nil (let ((ovwr (and (not (zerop (hash-table-count *lexical-rules*))) (lkb-y-or-n-p "Overwrite existing rules?")))) (let ((file-name (ask-user-for-existing-pathname "Lex rules file?"))) (when file-name (if (eql *lkb-system-version* :page) (read-tdl-lex-rule-file-aux file-name ovwr) (read-lex-rule-file-aux file-name ovwr)))))) (defun reload-lexical-rules nil ;;; reload both lexical and morphological files ;;; at once, since can't clear them independently (setf *syntax-error* nil) (if (or *lexical-rule-file-list* *morphology-rule-file-list*) (when (and (or (null *lexical-rule-file-list*) (check-load-names *lexical-rule-file-list* "lexical rules")) (or (null *morphology-rule-file-list*) (check-load-names *morphology-rule-file-list* "morphology rules"))) (let ((ovwr t) (movwr t)) (loop for lrule-file in *lexical-rule-file-list* do (if (eql *lkb-system-version* :page) (read-tdl-lex-rule-file-aux lrule-file ovwr) (read-lex-rule-file-aux lrule-file ovwr)) (setf ovwr nil)) (loop for mrule-file in *morphology-rule-file-list* do (read-morph-file-aux mrule-file ovwr movwr) (setf movwr nil))) (format t "~%Reload complete")) (format t "~%No lexical or morphological files loaded"))) (defun read-lex-rule-file-aux (file-name &optional ovwr) (unless (member file-name *morphology-rule-file-list* :test #'equal) (if ovwr (setf *lexical-rule-file-list* (list file-name)) (pushnew file-name *lexical-rule-file-list* :test #'equal))) (when ovwr (setf *ordered-lrule-list* nil) (setf *ordered-sprule-list* nil)) (when ovwr (clear-lex-rules) ) (read-lex-or-grammar-rule-file file-name t) (format t "~%Lexical rule file read")) (defun read-morph-file nil (let* ((ovwr (lkb-y-or-n-p "Overwrite any existing lexical rules?")) (filename (ask-user-for-existing-pathname "Morphological rules file?"))) (when filename (read-morph-file-aux filename ovwr)))) (defun read-morph-file-aux (filename &optional ovwr movwr) (if ovwr (setf *morphology-rule-file-list* (list filename)) (pushnew filename *morphology-rule-file-list* :test #'equal)) (when movwr (reset-morph-var)) (if (eql *lkb-system-version* :page) (read-tdl-lex-rule-file-aux filename ovwr) (read-lex-rule-file-aux filename ovwr))) (defun read-lex-or-grammar-rule-file (file-name lexical) (let ((*readtable* (define-break-characters '(#\% #\; #\< #\> #\= #\: #\.)))) (with-open-file (istream file-name :direction :input) (format t "~%Reading in ~Arules file ~A" (if lexical "lexical " "") (pathname-name file-name)) (loop (let ((next-char (peek-char t istream nil 'eof))) (when (eql next-char 'eof) (return)) (if (or (eql next-char #\;) (eql next-char #\%)) (read-line istream) (read-rule-entry istream lexical))))))) (defun read-rule-entry (istream lexical) (let ((id (lkb-read istream nil))) (multiple-value-bind (non-def def) (read-psort-unifications id istream) (add-grammar-rule id non-def def *description-persistence* lexical)))) (defun read-token-mapping-file-aux (file-name &optional ovwr) (read-chart-mapping-file-aux file-name :tmr ovwr)) (defun read-lexical-filtering-file-aux (file-name &optional ovwr) (read-chart-mapping-file-aux file-name :lfr ovwr)) ;;; LOGON assumptions are incompatible with post-generation chart mapping #-:logon (defun read-post-generation-mapping-file-aux (file-name &optional ovwr) (read-chart-mapping-file-aux file-name :pgmr ovwr)) (defun read-chart-mapping-file-aux (filename kind &optional ovwr) (cond (ovwr (setq *cmrules* nil) (setq *chart-mapping-rule-file-list* (list filename))) (t (pushnew filename *chart-mapping-rule-file-list* :test #'equal))) (read-chart-mapping-file filename kind)) (defun read-chart-mapping-file (filename kind) (with-open-file (istream filename :direction :input) (format t "~%Reading in ~A rules file ~A" (ecase kind (:tmr "token mapping") (:lfr "lexical filtering") (:pgmr "post-generation mapping")) (pathname-name filename)) (read-chart-mapping-stream istream kind))) (defun read-chart-mapping-stream (istream kind) (loop (let ((next-char (peek-with-comments istream))) (if (eql next-char 'eof) (return) (read-chart-mapping-rule-entry istream kind))))) (defun lkb-read-regex (istream) (loop with chars = nil and escapedp = nil for c = (read-char istream nil 'eof) do (case c ((eof #\newline) (lkb-read-cerror istream "Non-terminated regular expression") (return "^$")) (#\\ (setq escapedp (not escapedp))) (#\$ (unless escapedp (push c chars) (loop-finish))) (t (setq escapedp nil))) (push c chars) finally (return (coerce (nreverse chars) 'string)))) ;;; Support a limited range of POSIX character classes in chart mapping rules in ;;; order to allow DELPH-IN grammars to this date (June 2022) to run without any ;;; changes. For reasons of portability and consistency of interpretation, Unicode ;;; classes (using the \p{} syntax) are much to be preferred over POSIX classes ;;; (see discussion at https://github.com/delph-in/docs/wiki/ReppTop) (defparameter *posix-unicode-character-classes* (loop for (posix . unicode) in '(("\\[:alnum:\\]" . "\\p{Alphabetic}\\p{DecimalNumber}") ("\\[:alpha:\\]" . "\\p{Alphabetic}") ("\\[:digit:\\]" . "\\p{DecimalNumber}") ("\\[:lower:\\]" . "\\p{Lowercase}") ("\\[:punct:\\]" . "\\p{Punctuation}") ("\\[:upper:\\]" . "\\p{Uppercase}")) collect (cons (cl-ppcre:create-scanner posix) unicode))) (defun read-chart-mapping-rule-entry (istream kind) (let ((id (lkb-read istream nil))) #+(or :allegro :mcclim) (record-source id istream nil) (check-for-string ":=" istream id) (let* ((paths-regexes nil) (*tdl-expanded-syntax-function* #'(lambda (istream name path-so-far in-default-p) (declare (ignore name)) (let* ((regex (lkb-read-regex istream)) (uc-regex regex) (path (reverse path-so-far))) (loop for (posix . unicode) in *posix-unicode-character-classes* do (setq uc-regex (cl-ppcre:regex-replace-all posix uc-regex unicode))) (let ((other (cl-ppcre:scan-to-strings "\\[:[a-z]+:\\]" uc-regex))) (when other (error "Unsupported POSIX character class ~A in chart mapping rule ~A" other id))) (push (list* path regex (cl-ppcre:create-scanner uc-regex)) paths-regexes) (list ;; instead of the regex the rule fs gets the generic string type (make-tdl-path-value-unif path *string-type* in-default-p))))) (unifs (read-tdl-lex-avm-def istream id))) (declare (special *tdl-expanded-syntax-function*)) (check-for #\. istream id) (add-chart-mapping-rule id unifs paths-regexes kind))))