;;; -*- Mode: LISP; Syntax: Common-Lisp; Package: TSDB -*- ;;; ;;; [incr tsdb()] --- Competence and Performance Profiling Environment ;;; Copyright (c) 1996 -- 2005 Stephan Oepen (oe@csli.stanford.edu) ;;; ;;; This program is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU Lesser General Public License as published by ;;; the Free Software Foundation; either version 2.1 of the License, or (at ;;; your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, but WITHOUT ;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ;;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public ;;; License for more details. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; file: ;;; module: ;;; version: ;;; written by: ;;; last update: ;;; updated by: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; author | date | modification ;;; ------------------|-------------|------------------------------------------ ;;; | | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (in-package "TSDB") (defun retrieve (&optional condition (data *tsdb-data*) &key mrs (output (not *tsdb-ignore-output-p*)) (stream *tsdb-io*) (verbose t) meter) (initialize-tsdb) (when meter (status :text (format nil "retrieving `~a' data ..." data))) (let* ((imeter (if output (madjust * meter 0.5) meter)) (ometer (when output (madjust + (madjust * meter 0.5) (mduration imeter)))) (granularity (profile-granularity data)) (fields '("i-id" "i-wf" "i-length" "i-input" "i-comment")) (fields (if (< granularity 201011) fields (cons "i-tokens" fields))) (types '(:integer :integer :integer :string :string)) (types (if (< granularity 201011) types (cons :string types))) (items (select fields types "item" (unless mrs condition) data :unique nil :sort :i-id :meter imeter)) (outputs (when output (select '("i-id" "o-ignore" "o-surface" "o-wf" "o-gc" "o-edges") '(:integer :string :string :integer :integer :integer) "output" (unless mrs condition) data :unique nil :sort :i-id :meter ometer))) (condition (if mrs (if (or (null condition) (equal condition "")) "readings >= 1" (format nil "(~a) && (readings >= 1)" condition)) condition)) (results (when mrs (select '("i-id" "parse-id" "result-id" "derivation" "mrs") '(:integer :integer :integer :string :string) '("parse" "result") condition mrs :unique nil :sort :i-id))) (all (loop for item in items for iid = (get-field :i-id item) for length = (get-field+ :i-length item 0) for ogc = -2 for oedges = -2 for matches = (when output (loop for record in (member iid outputs :key #'(lambda (foo) (get-field :i-id foo))) while (= (get-field :i-id record) iid) do (setf ogc (max ogc (get-field+ :o-gc record -1))) (setf oedges (max oedges (get-field+ :o-edges record -1))) collect record)) do (let* ((tokens (get-field :i-tokens item)) (n (when (stringp tokens) (- (length tokens) 1)))) (when (and n (< 3 n) (char= (schar tokens 0) #\() (char= (schar tokens 1) #\() (char= (schar tokens (- n 1)) #\)) (char= (schar tokens n) #\))) (let ((tokens (ignore-errors (read-from-string tokens)))) (when tokens (set-field :i-tokens tokens item))))) (let* ((comment (get-field :i-comment item)) (n (when (stringp comment) (- (length comment) 1)))) (when (and n (< 3 n) (char= (schar comment 0) #\() (char= (schar comment 1) #\() (char= (schar comment (- n 1)) #\)) (char= (schar comment n) #\))) (let ((comment (ignore-errors (read-from-string comment)))) (nconc item comment)))) (nconc item (pairlis '(:o-gc :o-edges :outputs) (list (and (> ogc -2) ogc) (and (> oedges 2) oedges) matches))) when (> length 0) collect item))) (when results (setf all (loop for item in all for key = (get-field :i-id item) for matches = (when (eql key (get-field :i-id (first results))) (loop for result = (first results) while (and result (eql key (get-field :i-id result))) collect (pop results))) when matches collect (nconc item (pairlis '(:parse-id :results) (list (get-field :parse-id (first matches)) matches))))) (setf all (sort all #'< :key #'(lambda (foo) (get-field :parse-id foo)))) (rank-items items :gold mrs :sloppyp t)) (when verbose (format stream "~&retrieve(): found ~a item~:p (~a output specification~:p).~%" (length all) (length outputs))) (when meter (meter :value (get-field :end meter))) (when meter (status :text (format nil "retrieving `~a' data ... done" data))) all)) (defun tsdb-do-vocabulary (language &key condition (load :warn) (stream *tsdb-io*) meter interrupt) (initialize-tsdb) (let* ((*tsdb-gc-message-p* nil) (condition (if (equal condition "") nil condition)) (loadp (not (member load (list nil :off :no :none)))) (whitespace '(#\Space #\Newline #\Tab)) (garbage (append whitespace (loop for string in *tsdb-tokens-to-ignore* when (and (stringp string) (= (length string) 1)) collect (schar string 0) else when (characterp string) collect string))) (imeter (madjust * meter 0.1)) (wmeter (madjust + (madjust * meter 0.9) (mduration imeter))) (items (retrieve condition language)) (strings (loop for item in items for i-input = (get-field :i-input item) for p-input = (call-hook *tsdb-preprocessing-hook* i-input) collect (or p-input i-input))) (frequencies (make-hash-table :test #'equal)) (lexicon (make-hash-table :test #'equal )) (lstasks (make-hash-table :test #'equal )) (maximal-frequency 0) (message (format nil "retrieving `~a' vocabulary ..." language)) words unknown-words) (when meter (status :text message) (meter :value (get-field :start meter))) (when (eq load :collect) (setf %tsdb-lexical-preterminals% nil)) (when strings (format stream "~%") (dolist (string strings words) (do* ((i (position-if #'(lambda (c) (member c whitespace)) string) (position-if #'(lambda (c) (member c whitespace)) string)) (word (when i (subseq string 0 i)) (when i (subseq string 0 i))) (word (string-downcase (string-trim garbage word)) (string-downcase (string-trim garbage word))) (n (gethash word frequencies) (gethash word frequencies)) (string (if i (subseq string i) string) (if i (subseq string i) string)) (string (string-left-trim whitespace string) (string-left-trim whitespace string))) ((not i) (let* ((word (string-downcase (string-trim garbage string))) (n (gethash word frequencies))) (when (and word (> (length word) 0)) (setf (gethash word frequencies) (+ (or n 0) 1)) (setf (gethash word lexicon) 0) (setf (gethash word lstasks) 0) (setf maximal-frequency (max maximal-frequency (+ (or n 0) 1))) (pushnew word words :test #'equal)))) (when (and word (> (length word) 0)) (setf (gethash word frequencies) (+ (or n 0) 1)) (setf (gethash word lexicon) 0) (setf (gethash word lstasks) 0) (setf maximal-frequency (max maximal-frequency (+ (or n 0) 1))) (pushnew word words :test #'equal)))) (let* ((width (apply #'max (map 'list #'length words))) (tabulation (format nil "~~~d,0t| ~~~dd" (+ width 2 1) (length (format nil "~d" maximal-frequency)))) (increment (when wmeter (/ (mduration wmeter) (length words))))) (do* ((words (sort (copy-seq words) #'string-lessp) (rest words)) (word (first words) (first words))) ((null words) unknown-words) (when (and interrupt (probe-file interrupt)) (delete-file interrupt) (format stream "do-vocabulary(): received external interrupt signal.~%") (when meter (status :text (format nil "~a interrupt" message) :duration 5)) (return-from tsdb-do-vocabulary)) (when (and loadp (not (member word *tsdb-tokens-to-ignore* :test #'string-equal))) (let ((entries (parse-word word :load load))) (setf (gethash word lexicon) (get-field :words entries)) (setf (gethash word lstasks) (get-field :l-stasks entries)) (unless entries (push word unknown-words))) (format stream "~& ~a ~@? reference(s)~:[~; | [~d + ~d] lexical entrie(s)~];~%" word tabulation (gethash word frequencies) loadp (or (gethash word lexicon) -1) (or (gethash word lstasks) -1))) (when increment (meter-advance increment))))) (format stream "~&~%") (when meter (meter :value (get-field :end meter)) (status :text (format nil "~a done" message) :duration 5)) (length items))) (defun tsdb (&optional action argument &key condition run skeleton load gold host task wait quantum (file nil filep) (reset nil resetp) count target error) (unless (and action (keywordp action) (let ((action (string action))) (string-equal "ini" (subseq action 0 (min (length action) 3))))) (initialize-tsdb)) (if (stringp action) (let* ((result (call-tsdb action (or argument *tsdb-data*)))) (when (and result (not (zerop (length result)))) (format *tsdb-io* "~&~%~a~%" result))) (let ((action (string action))) (case (intern (subseq action 0 (min (length action) 3)) :keyword) ((:initialize :ini) (setf *tsdb-initialized-p* nil) (initialize-tsdb argument :pattern load :background run) (tsdb :info) (format *tsdb-io* "~&~%")) ((:podium :pod :po) (init-podium) (when argument (load-cache :pattern load :background t))) ((:cpus :cpu :cpu :cp) (format *tsdb-io* "~&~%") (cond ((null argument) (tsdb-do-cpus :action :active :host host :task task :stream *tsdb-io*)) ((member argument '(:active :list :kill :shutdown)) (tsdb-do-cpus :action argument :host host :task task :stream *tsdb-io*)) (t (let ((clients (cond ((and filep resetp) (initialize-cpus :classes argument :count count :wait wait :host host :task task :quantum quantum :file file :reset reset :stream *tsdb-io* :prefix " ")) (filep (initialize-cpus :classes argument :count count :wait wait :host host :task task :quantum quantum :file file :stream *tsdb-io* :prefix " ")) (resetp (initialize-cpus :classes argument :count count :wait wait :host host :task task :quantum quantum :reset reset :stream *tsdb-io* :prefix " ")) (t (initialize-cpus :classes argument :count count :wait wait :host host :task task :quantum quantum :stream *tsdb-io* :prefix " "))))) (when (and (eq error :exit) (< (length clients) (or count 1))) #+:allegro (excl:exit 0 :no-unwind t) #+:lispworks (lw:quit :ignore-errors-p t) #-(or :allegro :lispworks) (error "no known mechanism to shutdown Lisp (see `commands.lisp'"))))) (format *tsdb-io* "~&~%")) ((:info :inf) (tsdb-do-status :all :stream *tsdb-io*)) ((:home :hom :ho) (if (stringp argument) (tsdb-do-set (quote *tsdb-home*) argument) (tsdb-do-status :home :stream *tsdb-io*))) ((:default :def :de :d) (if (stringp argument) (tsdb-do-set (quote *tsdb-data*) argument) (tsdb-do-status :default :stream *tsdb-io*))) ((:list :lis :li :l) (tsdb-do-list (or argument *tsdb-home*))) ((:skeletons :ske :sk :s) (cond ((stringp argument) (tsdb-do-set (quote *tsdb-skeleton-directory*) argument) (tsdb-do-skeletons nil :format :short)) (t (format *tsdb-io* "~&~%") (tsdb-do-skeletons nil)))) ((:create :cre :cr :c) (format *tsdb-io* "~&~%") (tsdb-do-create argument (or skeleton *tsdb-default-skeleton*)) (format *tsdb-io* "~&~%")) ((:process :pro :pr) (format *tsdb-io* "~&~%") (tsdb-do-process (if (or (null argument) (member argument (list nil t ""))) *tsdb-data* argument) :condition condition :gold gold :run-id run :overwrite t)) ((:vocabulary :voc :vo :v) (format *tsdb-io* "~&~%") (tsdb-do-vocabulary (if (or (null argument) (member argument (list nil t ""))) *tsdb-data* argument) :condition condition :load (or load :quiet))) ((:compress :com :co) (format *tsdb-io* "~&~%") (tsdb-do-compress (if (or (null argument) (member argument (list nil t ""))) *tsdb-data* argument) target)) ((:help :hel :he) (format *tsdb-io* "~&~%") (tsdb-do-help (if argument (intern (subseq (string argument) 0 3) :keyword) :all))) (t (format *tsdb-io* "~&~% tsdb(): invalid or ambiguous command `~(~:a~)'; ~ try `(tsdb :help)'.~%~%" action)))))) (defun tsdb-do-set (variable value) (format *tsdb-io* "~%") (let ((value (case variable ((*tsdb-home* *tsdb-skeleton-directory*) (namestring (make-pathname :directory value))) (t value)))) (set variable value))) (defun tsdb-do-status (name &key (stream *tsdb-io*) (prefix " ")) (format stream "~&~%") (when (member name (list :all)) (format stream "~atsdb(1) application: `~a';~%" prefix *tsdb-application*)) (when (member name (list :all :home)) (format stream "~atsdb(1) database root: `~a';~%" prefix *tsdb-home*)) (when (member name (list :all :default)) (format stream "~adefault test suite database `~a'~%" prefix *tsdb-data*)) (when (member name (list :all)) (format stream "~askeletons directory: `~a';~%" prefix *tsdb-skeleton-directory*) (format stream "~adefault test suite skeleton `~a';~%" prefix *tsdb-default-skeleton*)) (when (member name (list :all)) (format stream "~awrite run: ~:[no~;yes~]; write parse: ~:[no~;yes~]; ~ write result: ~:[no~;yes~];~%" prefix *tsdb-write-run-p* *tsdb-write-parse-p* *tsdb-write-result-p*)) (when (member name (list :all)) (format stream "~awrite output: ~:[no~;yes~];~%" prefix *tsdb-write-output-p*)) (when (member name (list :all)) (format stream "~acache database writes ~:[no~;yes~]; flush cache threshold ~a~%" prefix *tsdb-cache-database-writes-p* *tsdb-flush-cache-threshold*)) (when (member name (list :all)) (format stream "~atrees hook: ~:[none~;`~(~a~)()'~];~%~ ~asemantix hook: ~:[none~;`~(~a~)()'~];~%" prefix *tsdb-trees-hook* *tsdb-trees-hook* prefix *tsdb-semantix-hook* *tsdb-semantix-hook*)) (when (member name (list :all)) (format stream "~aexhaustive search: ~:[no~;yes~]; ~ maximal number of edges ~a; item factor: ~a.~%" prefix *tsdb-exhaustive-p* *tsdb-maximal-number-of-edges* *tsdb-edge-factor*)) (format stream "~%")) (defun find-tsdb-directories (&optional (home *tsdb-home*) &key name pattern trace meter) (declare (ignore trace)) (when meter (meter :value (get-field :start meter))) (let* ((prefix (length *tsdb-home*)) (length (length home)) (absolute (not (equal home *tsdb-home*))) (directories (subdirectories home)) (directories (loop for directory in directories for suffix = (if (and (<= prefix length) (string= *tsdb-home* directory :end2 prefix)) (subseq directory prefix) directory) when (and suffix (not (search "/.svn" suffix)) (not (search "/.cvs" suffix)) (or (null name) (string= name suffix)) (or (null pattern) (ppcre::scan pattern suffix))) collect suffix)) (increment (when (and directories meter) (/ (mduration meter) (+ (length directories) 1)))) (databases (loop initially (when increment (meter-advance increment)) for directory in directories for status = (verify-tsdb-directory directory :absolute absolute) when status collect status when increment do (meter-advance increment)))) (when meter (meter :value (get-field :end meter))) databases)) (defun tsdb-do-list (home &key (stream *tsdb-io*) (prefix " ") (format :ascii) (indentation 0) name pattern meter index) (when stream (format stream "~%")) (loop with dbs = (sort (find-tsdb-directories home :name name :pattern pattern :meter (madjust * meter 0.95)) #'string< :key #'(lambda (foo) (get-field :database foo))) with result = nil initially (case format (:html (format stream "~v,0t
~ | Test Suite Instance | ~Items | Parses | ~Options |
---|---|---|---|---|
~%~ ~v,0t | ~a | ~a | ~~a | ~a~a~a~a~a~a | ~%~ ~v,0t