;;; Copyright (c) 1991-2021 ;;; John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen ;;; see `LICENSE' for conditions. (in-package :lkb) (defun clear-temporary-data () ;; Clear global data structures, temporary dags and caches used in parsing and ;; generation, for both packing and non-packing modes (do them all in case we've ;; switched processing types/modes and the other's data is still around). ;; ;; _fix_me_ ;; change [incr tsdb()] to use this function rather than its own similar (although ;; less effective) function release-temporary-storage ;; (clear-chart) (clear-gen-chart) #+:pooling (reset-pools :compressp t) (with-unification-context () (compress-lexicon *lexicon* :recurse t) (maphash #'(lambda (name type) (declare (ignore name)) (let ((c (ltype-constraint type))) (cond ((null c)) ((ltype-constraint-mark type) (compress-dag c) (setf (ltype-constraint-mark type) nil)) ((dag-x-visit-slot c) ; left over from a copy operation (compress-dag c))))) *types*) (dolist (rule-set (list (get-token-mapping-rules) (get-lexical-filtering-rules) (get-post-generation-mapping-rules))) (dolist (rule rule-set) (compress-dag (tdfs-indef (cmrule-full-fs rule))))) (dolist (rule-set (list (get-indexed-lrules nil) (get-indexed-rules nil))) (dolist (rule rule-set) (compress-dag (tdfs-indef (rule-full-fs rule))) (compress-dag (tdfs-indef (rule-rtdfs rule))))) (dolist (root *root-entries*) (compress-dag (tdfs-indef (psort-full-fs (cdr root)))))) ;; _fix_me_ some of the MRS code treats mrs::*named-nodes* as global, whereas it ;; should properly be dynamically bound - or at least cleared after final use (ignore-errors (setf (symbol-value (find-symbol "*NAMED-NODES*" :mrs)) nil))) ;;; Parsing sentences from file (when the fine system is just too much ...) (defun parse-sentences (&optional input-file (output-file nil output-file-p) &rest rest) ;; if output-file is nil or t, then output is suppressed or goes to standard-output, ;; respectively (unless input-file (setq input-file (or (ask-user-for-existing-pathname "Sentence file?") (return-from parse-sentences)))) (cond ((not (probe-file input-file)) (show-message-window (format nil "Input file `~a' does not exist" input-file)) (return-from parse-sentences)) ((file-xml-p input-file) ; if xml input, assume SAF XML (return-from parse-sentences (apply #'process-saf-file-sentences input-file rest))) (t (setq input-file (truename input-file)))) ;; (unless output-file-p (setq output-file (or (ask-user-for-new-pathname "Output file?" input-file) (return-from parse-sentences)))) (setq output-file (cond ((member output-file '(nil t)) output-file) ((probe-file output-file)) (t (merge-pathnames output-file)))) (when (equal output-file input-file) (show-message-window (format nil "Not permitted to overwrite the input file `~a'" input-file)) (return-from parse-sentences)) ;; (with-open-file (istream input-file :direction :input) (if (member output-file '(nil t)) (batch-parse-sentences istream output-file #'extract-fine-system-sentence) (with-open-file (ostream output-file :direction :output :if-exists :supersede :if-does-not-exist :create) (batch-parse-sentences istream ostream #'extract-fine-system-sentence))))) (defparameter *do-something-with-parse* nil) (defparameter *lex-ids-used* nil) (defparameter *parse-input* nil) (defparameter *ostream* nil) #+(and :sbcl :64-bit) (defvar *batch-parse-gc-fn* #'(lambda () (sb-ext:gc))) ; !!! experimental interface (defun batch-parse-sentences (istream ostream &optional access-fn) ;; if *do-something-with-parse* is bound to a function, then this is called after ;; parsing each sentence; otherwise the value of ostream determines what is output: ;; either nil (only final summary, to standard output), t (progress and summary to ;; standard output), or a stream (ditto to the stream) ;; (setq *unanalysed-tokens* nil) (setq *lex-ids-used* nil) (clear-temporary-data) (unexpand-leaf-types) ; also does clear-expanded-lex, clear-type-cache (let (#+(and :sbcl :64-bit) (gc-bytes (sb-ext:bytes-consed-between-gcs)) (nsent 0) (edge-total 0) (parse-total 0) (*print-right-margin* 300) ; in case *print-pretty* is true raw-sentence start-time) (format t "~%;;; Parsing test file~%") (force-output) (unwind-protect (progn #+(and :sbcl :64-bit) (progn ;; allow a lot of allocation before gc triggered automatically, and tenure everything ;; so generation 0 starts empty (setf (sb-ext:bytes-consed-between-gcs) (floor (* (sb-ext:dynamic-space-size) 3/4))) (sb-ext:gc :full t)) ;; start timing here to omit initial GC, which we shouldn't be accountable for (setq start-time (get-internal-run-time)) (loop #+(and :sbcl :64-bit) ; >2GB allocated since last GC? (when (> (sb-ext:generation-bytes-allocated 0) (* 2 (expt 2 30))) (setq *lex-ids-used* (union *lex-ids-used* (may-clear-expanded-lex) :test #'eq)) (clear-temporary-data) ; clear parser data structures, temporary dags and caches (funcall *batch-parse-gc-fn*) (let ((alloc (sb-ext:generation-bytes-allocated 0))) (when (> alloc (* 500 (expt 2 20))) ; 500MB - in trouble? (sb-ext:gc :full t)))) (setq raw-sentence (read-line istream nil 'eof)) (when (eq raw-sentence 'eof) (return)) (let ((sentence (string-trim '(#\Space #\Tab #\Newline) (if access-fn (funcall access-fn raw-sentence) raw-sentence)))) (unless (or (zerop (length sentence)) (string= sentence ";" :end1 1)) (incf nsent) (unless (fboundp *do-something-with-parse*) ;; if we're doing something else, let that function control output (when ostream (format ostream "~&~A ~A " nsent sentence) (force-output (if (eq ostream t) nil ostream)))) (#-:gdebug handler-case #+:gdebug progn (let ((parse-input (split-into-words (if (fboundp 'preprocess-sentence-string) (funcall (symbol-function 'preprocess-sentence-string) sentence) sentence))) ;; ask for recycling of safe dags from the pool ;; NB lexical entries must not contain safe dags - so expand-psort-entry ;; and friends must rebind *safe-not-to-copy-p* to nil #+:pooling (*dag-recycling-p* t)) #+:pooling (reset-pools #+:gdebug :forcep #+:gdebug t) (parse parse-input nil) (setf *sentence-number* nsent) (setf *sentence* sentence) (setf *parse-input* parse-input) (setf *ostream* ostream) (if (fboundp *do-something-with-parse*) (funcall *do-something-with-parse*) (when ostream (format ostream "~A ~A~%" (length *parse-record*) *edge-id*)))) #-:gdebug (storage-condition (condition) (format t "~&Memory allocation problem: ~A caused by ~A~%" condition raw-sentence)) #+(and (not :gdebug) (or :allegro :sbcl :ccl)) (#+:allegro excl:interrupt-signal #+:sbcl sb-sys:interactive-interrupt #+:ccl ccl:interrupt-signal-condition () (error "Interrupt signalled")) #-:gdebug (error (condition) (format t "~&Error: ~A caused by ~A~%" condition raw-sentence))) (incf parse-total (length *parse-record*)) (incf edge-total *edge-id*))) (when (eql (peek-char t istream nil 'eof) 'eof) (return)))) (setq *lex-ids-used* (union *lex-ids-used* (collect-expanded-lex-ids *lexicon*) :test #'eq)) (clear-temporary-data) ; !!! c-t-d before u-l-t since must compress dags in leaf (unexpand-leaf-types) ; types before clearing them #+(and :sbcl :64-bit) (progn (setf (sb-ext:bytes-consed-between-gcs) gc-bytes) (unwind-protect ; release unused memory, shrinking process size (progn (setf (sb-alien:extern-alien "small_generation_limit" sb-alien:int) 0) (sb-ext:gc)) (setf (sb-alien:extern-alien "small_generation_limit" sb-alien:int) 1)))) (unless (or (fboundp *do-something-with-parse*) (zerop nsent)) (let ((out (or ostream t))) (format out "~&~%;;; Total CPU time: ~A msecs~%" (round (* (- (get-internal-run-time) start-time) 1000) internal-time-units-per-second)) (format out ";;; Mean edges: ~,2F~%" (/ edge-total nsent)) (format out ";;; Mean parses: ~,2F~%" (/ parse-total nsent)))) (format t "~%;;; Finished test file~%") (finish-output) (lkb-beep) nil)) (defun may-clear-expanded-lex (&optional (limit 2000)) (let ((exp (collect-expanded-lex-ids *lexicon*))) (if (> (length exp) limit) (progn (clear-expanded-lex) ; completely clear out lexicon cache exp) nil)))