;;; -*- Mode: COMMON-LISP; Syntax: Common-Lisp; Package: TSDB -*- ;;; ;;; [incr tsdb()] --- Competence and Performance Profiling Environment ;;; Copyright (c) 1996 -- 2006 Stephan Oepen (oe@csli.stanford.edu) ;;; Copyright (c) 2005 -- 2006 Erik Velldal (erikve@ifi.uio.no) ;;; ;;; This program is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU Lesser General Public License as published by ;;; the Free Software Foundation; either version 2.1 of the License, or (at ;;; your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, but WITHOUT ;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ;;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public ;;; License for more details. ;;; ;;; ;;; typical TADM output: ;;; ;;; Classes = 26944 ;;; Contexts = 600 ;;; Features = 149774 / 283752 ;;; Non-zeros = 24071755 ;;; ;;; we interpret that as 26944 events in 600 contexts; 283752 total features of ;;; which 149774 have some property; and 24071755 actual (non-zero) feature ;;; counts, i.e. actually observed feature occurences. a day later, we think ;;; the relevant property of the 149774 sub-set of features might be that they ;;; are attested in active events, but this remains guesswork. ;;; (in-package :tsdb) (defparameter *maxent-method* :tao_lmvm) (defparameter *maxent-iterations* 5000) (defparameter *maxent-relative-tolerance* 1e-10) (defparameter *maxent-absolute-tolerance* 1e-20) (defparameter *maxent-variance* 1e-2) (defparameter *maxent-uniform* nil) (defparameter *maxent-extra-options* nil) (defparameter *maxent-options* '(*maxent-method* *maxent-iterations* *maxent-relative-tolerance* *maxent-absolute-tolerance* *maxent-variance*)) (defparameter *maxent-debug-p* t) (defparameter *svm-kernel* 0) (defparameter *svm-error-to-margin* nil) (defparameter *svm-cost-balance* 1.0) (defparameter *svm-iterations* 1e+5) (defparameter *svm-tolerance* 0.001) (defparameter *svm-poly-d* nil) (defparameter *svm-rbf-g* nil) (defparameter *svm-sig-poly-s* nil) (defparameter *svm-sig-poly-r* nil) (defparameter *svm-cache-size* 5000.0) (defparameter *svm-options* '(*svm-kernel* *svm-rbf-g* *svm-poly-d* *svm-sig-poly-s* *svm-sig-poly-r* *svm-iterations* *svm-cost-balance* *svm-error-to-margin* *svm-tolerance*)) (defun feature-environment (&key (format :string)) (flet ((filter-positive (x) (if (and (numberp x) (> x 0)) x nil))) (case format (:string (let ((counts (when (counts-p *feature-frequency-threshold*) (list (filter-positive (counts-absolute *feature-frequency-threshold*)) (filter-positive (counts-contexts *feature-frequency-threshold*)) (filter-positive (counts-events *feature-frequency-threshold*)) (filter-positive (counts-relevant *feature-frequency-threshold*))))) (ngramp (> *feature-ngram-size* 0)) (weightp (and (numberp *feature-constituent-weight*) (> *feature-constituent-weight* 0)))) (if *feature-flags* (format nil "~{~a[~a] ~}~ FT[~{~@[~a~]~^:~}] RS[~@[~a~]]" (loop for flag in *feature-flags* collect (second flag) collect (first flag)) counts *feature-random-sample-size*) (format nil "GP[~a] ~:[-~;+~]PT ~:[-~;+~]LEX CW[~@[~a~]] ~ ~:[-~;+~]AE NS[~a] ~ NT[~@[~(~a~)~]] ~:[-~;+~]NB LM[~:[0~*~;~a~]] ~ FT[~{~@[~a~]~^:~}] RS[~@[~a~]]" *feature-grandparenting* *feature-use-preterminal-types-p* *feature-lexicalization-p* (and weightp *feature-constituent-weight*) *feature-active-edges-p* *feature-ngram-size* (and ngramp *feature-ngram-tag*) (and ngramp *feature-ngram-back-off-p*) *feature-lm-p* *feature-lm-p* counts *feature-random-sample-size*)))) (:compact (let ((ngramp (> *feature-ngram-size* 0))) (format nil "~(g~d_p~:[0~;1~]_l~:[0~;1~]_cw~d_a~:[0~;1~]_~ n~d_nt~:[0~;1~]_nb~:[0~;1~]_lm~a_c~:[0~;~:*~a~]_~ r~:[0~;~:*~a~]_rs~:[0~;~:*~d~]~)" *feature-grandparenting* *feature-use-preterminal-types-p* *feature-lexicalization-p* (or *feature-constituent-weight* 0) *feature-active-edges-p* *feature-ngram-size* (and ngramp (eq *feature-ngram-tag* :type)) (and ngramp *feature-ngram-back-off-p*) *feature-lm-p* (and (counts-p *feature-frequency-threshold*) (counts-contexts *feature-frequency-threshold*)) (and (counts-p *feature-frequency-threshold*) (counts-relevant *feature-frequency-threshold*)) *feature-random-sample-size*))) (:list (loop for key in *feature-options* collect (cons key (symbol-value key))))))) ;;fix_me add experiment type to names (defun mem-environment (&key (format :string) full prefix) (let ((features (and full (feature-environment :format format)))) (case format (:string (concatenate 'string (format nil "~@[[~a] ~]~@[~a ~]MM[~(~a~)] MI[~@[~a~]] ~ RT[~@[~e~]] AT[~@[~e~]] VA[~@[~e~]]~ ~@[~* PC[~a]~]" prefix features *maxent-method* *maxent-iterations* *maxent-relative-tolerance* *maxent-absolute-tolerance* *maxent-variance* full (or *redwoods-train-percentage* 100)) #+:onet (format nil "~@[ C[~a]~]" *feature-custom*))) (:compact (format nil "~(~@[~a~]mem_~@[~a_~]rt~e_at~e_v~e~@[_pc~d~]~)" prefix features *maxent-relative-tolerance* *maxent-absolute-tolerance* *maxent-variance* *redwoods-train-percentage*)) (:list (nconc features (loop for key in *maxent-options* collect (cons key (symbol-value key)))))))) (defun svm-environment (&key (format :string) full prefix) (let ((features (and full (feature-environment :format format)))) (case format (:string (format nil "~@[[~a] ~]~@[~a ~] K[~[lin~;pol~;rbf~;sig~;usr~]]~ ~@[ G[~a]~]~@[ D[~a]~]~@[ S[~a]~]~@[ R[~a]~]~ ~@[ IT[~a]~]~@[ B[~a]~]~@[ EM[~a]~]~@[ T[~a]~]" prefix features *svm-kernel* *svm-rbf-g* *svm-poly-d* *svm-sig-poly-s* *svm-sig-poly-r* *svm-iterations* *svm-cost-balance* *svm-error-to-margin* *svm-tolerance*)) (:compact (format nil "~(~@[~a~]svm_~@[~a_~]~[lin~;pol~;rbf~;sig~;usr~]~ ~@[_g~a~]~@[_d~a~]~@[_s~a~]~@[_r~a~]~@[_it~e~]~ ~@[_b~a~]~@[_em~a~]~@[_t~a~]~@[_pc~d~]~)" prefix features *svm-kernel* *svm-rbf-g* *svm-poly-d* *svm-sig-poly-s* *svm-sig-poly-r* *svm-iterations* *svm-cost-balance* *svm-error-to-margin* *svm-tolerance* *redwoods-train-percentage*))))) (defun print-model (model &key (file "/dev/null") stream (format :rpm)) (case format ((:mem :rpm) (with-open-file (foo file :direction :output :if-exists :supersede) (loop with stream = (or stream foo) for context in (model-contexts model) do (print-context context :stream stream :model model :format format)))) ((:freeze :export) (with-open-file (foo file :direction :output :if-exists :supersede) (let ((stream (or stream foo)) (table (model-table model))) (format stream ";;;~%;;; ~a~%;;; (~a@~a; ~a)~%;;;~%" model (current-user) (current-host) (current-time :long :pretty)) (format stream "~%:begin :model ~d.~%~%" (model-ncontexts model)) (when (> (model-count model) 0) (if *feature-flags* (let ((*print-case* :downcase)) (format stream "*feature-flags := [~{~a~^ ~}].~%~%" (loop for flag in *feature-flags* collect (first flag)))) (loop with *print-case* = :downcase for key in *feature-options* for value = (let ((foo (symbol-value key))) (cond ((null foo) "no") ((eq foo t) "yes") (t foo))) when (boundp key) do (format stream "~a := ~a.~%~%" key value))) (loop with *print-case* = :downcase for key in *maxent-options* when (boundp key) do (format stream "~a := ~a.~%~%" key (symbol-value key)))) (format stream ":begin :features ~d.~%~%" (symbol-table-count table)) (loop with *print-case* = :downcase with *print-right-margin* = 65536 with *package* = (find-package :lkb) with map = (model-map model) with i = 0 for code from 0 to (- (symbol-table-count table) 1) for symbol = (code-to-symbol code table) for weight = (aref (model-weights model) code) for counts = (or (aref (model-counts model) code) (make-counts)) for mapped = (and map (symbol-to-code code map :rop t)) for minmax = (aref (model-minmax model) code) when (or (eq format :freeze) (numberp weight)) do (case format (:freeze (format stream "(~d~@[ ~d~]) " code mapped)) (:export (format stream "(~d) " i) (incf i))) (format stream "[~{~s~^ ~}] ~:[null~*~;~f~] " symbol weight weight) (print-object counts stream) (when minmax (format stream " [~d ~d]" (first minmax) (second minmax))) (format stream "~%")) (format stream "~%:end :features.~%~%:end :model.~%")))))) (defun read-model (file &key (verbose t) id) (labels ((|[|-reader (stream char) (declare (ignore char)) (read-delimited-list #\] stream nil)) (|{|-reader (stream char) (declare (ignore char)) (read-delimited-list #\} stream nil))) (let* ((*readtable* (copy-readtable nil)) (*package* (find-package :lkb)) (model (make-model)) (table (model-table model)) (name (file-namestring file))) (set-syntax-from-char #\. #\space *readtable*) (if (probe-file file) (with-open-file (stream file :direction :input) (unless (and (eq (read stream nil nil) :begin) (eq (read stream nil nil) :model) (integerp (setf (model-ncontexts model) (read stream nil nil)))) (format t "~&read-model(): invalid header in `~a'.~%" name) (return-from read-model)) (when verbose (format t "~&read-model(): reading file `~a'." name) (force-output)) (loop with bodyp = nil for form = (read stream nil :eof) while (not (eq form :eof)) when (and (eq form :begin) (eq (read stream nil nil) :features)) do (let ((n (read stream nil nil))) (unless (and (integerp n) (>= n 0)) (format t "~&read-model(): invalid `:begin :feature' block in `~a'.~%" name) (return-from read-model)) (setf (model-size model) n) (setf (model-minmax model) (make-array n)) (setf (model-counts model) (make-array n)) (setf (model-weights model) (make-array n))) (setf *readtable* (copy-readtable nil)) (set-syntax-from-char #\[ #\( *readtable*) (set-syntax-from-char #\] #\) *readtable*) (set-macro-character #\[ #'|[|-reader nil *readtable*) (set-syntax-from-char #\{ #\( *readtable*) (set-syntax-from-char #\} #\) *readtable*) (set-macro-character #\{ #'|{|-reader nil *readtable*) (setf bodyp t) else when (eq form :end) do (set-syntax-from-char #\. #\space *readtable*) (unless (and (eq (read stream nil nil) :features) (eq (read stream nil nil) :end) (eq (read stream nil nil) :model)) (format t "~&read-model(): invalid model prologue.~%") (return-from read-model)) (when id (push (cons id model) *models*)) (return (setf %model% model)) else when bodyp do (unless (and (consp form) (numberp (first form))) (format t "~&read-model(): invalid codes `~a'.~%" form) (return-from read-model)) (let* ((symbol (let ((foo (read stream nil nil))) (unless (consp foo) (format t "~&read-model(): invalid symbol `~a'.~%" foo) (return-from read-model)) foo)) (code (first form)) (mapped (second form)) (weight (read stream nil nil)) (counts (read-preserving-whitespace stream nil nil)) (minmax (unless (eq #\Newline (peek-char nil stream nil nil)) (read stream nil nil)))) (set-symbol-and-code symbol code table) (when (numberp mapped) (set-symbol-and-code code mapped (model-map model))) (when (>= code (model-size model)) (format t "~&read-model(): mysterious feature overflow (~a vs. ~a).~%" code (model-size model)) (return-from read-model)) (when (numberp weight) (setf (aref (model-weights model) code) weight) (incf (model-count model))) (setf (aref (model-counts model) code) (make-counts :absolute (first counts) :contexts (second counts) :events (third counts) :relevant (fourth counts))) (when minmax (setf (aref (model-minmax model) code) minmax))) finally (terpri))) (format t "~&read-model(): unable to open `~a'.~%" name))))) (defun estimate-model (items &key (identity (current-pid)) fold (stream *tsdb-io*) model type) (declare (ignore stream)) (let* ((model (or model (make-model))) (events (format nil "~a/.model.~a.~a.events" (tmp :redwoods) (current-user) (current-pid))) (trace (format nil "~a/.model.~a.~a.trace" (tmp :redwoods) (current-user) (current-pid))) (source (get-field :source (first items))) (cache (profile-find-context-cache source identity))) (unless (model-parameters model) (setf (model-parameters model) (format nil "~a/.model.~a.~a.weights" (tmp :redwoods) (current-user) (current-pid)))) (with-open-file (out events :direction :output :if-does-not-exist :create :if-exists :supersede :element-type '(unsigned-byte 8)) (loop for item in items for cc = (let ((foo (get-field :source item))) (cond ((string= source foo) cache) (t (setf source foo) (setf cache (profile-find-context-cache source identity))))) for iid = (get-field :i-id item) for file = (merge-pathnames cc (make-pathname :name (format nil "~a~@[.~a~]" iid *feature-random-sample-size*))) for readings = (get-field :readings item) when (> readings 1) do ;; ;; for items that were not annotated or for some other reason have ;; no information in the feature and, thus, context cache, cp() ;; will just do nothing, when .file. does not exist. ;; (when (or (null *feature-random-sample-size*) (<= readings *feature-random-sample-size*) (null (cp file out))) ;; ;; even when random sampling is enabled, for contexts with fewer ;; events than the maximum sample size, the context cache will ;; not contain two files, but rather just the base file name; so ;; try to fall back on the non-sampled file then. ;; (let ((file (merge-pathnames cc (make-pathname :name (format nil "~a" iid))))) (cp file out))))) (let* ((parameters (model-parameters model)) (variances (when (numberp *maxent-variance*) (let ((name (format nil "~a/.model.~a.~a.variances" (tmp :redwoods) (current-user) (current-pid)))) (with-open-file (stream name :direction :output :if-exists :supersede) (format stream "~f" *maxent-variance*)) name))) (command (case type (:mem (format nil "tadm -monitor -events_in ~a -params_out ~a~ ~@[ -method ~(~a~)~]~ ~@[ -max_it ~a~]~@[ -frtol ~a~]~@[ -fatol ~a~]~ ~@[ -variances ~a~]~ ~:[~; -uniform~]~ ~@[ ~a~]" events parameters *maxent-method* *maxent-iterations* *maxent-relative-tolerance* *maxent-absolute-tolerance* variances *maxent-uniform* *maxent-extra-options*)) (:perf (format nil "svm_perform_learn -v 2 -y 2 -n 10 -q 40 -t 0~ ~@[ -# ~a~]~@[ -e ~a~]~@[ -c ~a~] ~a ~a" *svm-iterations* *svm-tolerance* *svm-error-to-margin* events parameters)) (:svm (format nil "svm_learn -v 2 -n 10 -q 40 -m ~a -z p -t ~a~ ~@[ -g ~a~]~ ~@[ -d ~a~]~@[ -s ~a~]~@[ -r ~a~]~@[ -# ~a~]~ ~@[ -j ~a~]~@[ -e ~a~]~@[ -c ~a~] ~a ~a" *svm-cache-size* *svm-kernel* *svm-rbf-g* *svm-poly-d* *svm-sig-poly-s* *svm-sig-poly-r* *svm-iterations* *svm-cost-balance* *svm-tolerance* *svm-error-to-margin* events parameters)))) (output (if *maxent-debug-p* nil "/dev/null"))) (when (and (zerop (run-process (format nil "~a | tee '~a'" command trace) :wait t :output output :if-output-exists :supersede)) (probe-file parameters)) (when (probe-file trace) (let ((trace (read-file trace)) (iterations 0) events) ;; ;; extract number of event and features and count up the number ;; of iterations ;; (multiple-value-bind (foo matches) (ppcre::scan-to-strings "\\nClasses = ([0-9]+)" trace) (declare (ignore foo)) (when matches (setf events (parse-integer (aref matches 0) :junk-allowed t)))) (ppcre:do-matches (start end "\\n *[0-9]+ [0-9.e+-]+ [0-9.e+-]+ [0-9.e+-]+" trace nil) (incf iterations)) (nconc fold (pairlis '(:f-events :f-iterations :f-estimation) (list events iterations trace))))) (when (and (eq type :mem) (probe-file parameters)) (nconc fold (acons :f-features (get-field :lines (wc parameters)) nil))) (unless *maxent-debug-p* (ignore-errors (delete-file trace)) (ignore-errors (delete-file events)) (ignore-errors (delete-file parameters))) model)))) (defun read-weights (model &optional (file (model-parameters model))) (with-open-file (stream file :direction :input :if-does-not-exist nil) (when stream (loop for i from 0 for code = (if (model-map model) (code-to-symbol i (model-map model)) i) for weight = (read stream nil nil) while weight do (when (>= code (model-size model)) (let ((n (setf (model-size model) (* (model-size model) 2)))) (setf (model-counts model) (adjust-array (model-counts model) n)) (setf (model-weights model) (adjust-array (model-weights model) n)))) (setf (aref (model-weights model) code) weight) (incf (model-count model)))))) (defun learner-rank-items (items model &key (identity (current-pid)) fold type (stream *tsdb-io*)) (let* ((parameters (model-parameters model)) (events (format nil "~a/.model.~a.~a.events" (tmp :redwoods) (current-user) (current-pid))) (source (get-field :source (first items))) (cache (profile-find-context-cache source identity)) active) (when (null parameters) (format t "learner-rank-items(): invalid model: no parameters.~%") (return-from learner-rank-items)) (format stream "~&[~a] learner-rank-items(): evaluating ~d item~p ~%" (current-time :long :short) (length items) (length items)) ;; ;; in order to keep using this stream across multiple calls to cp(), its ;; :element-type needs to match the expecations of cp(), i.e. be byte-wise ;; binary data. ;; (with-open-file (out events :direction :output :if-does-not-exist :create :if-exists :supersede :element-type '(unsigned-byte 8)) (loop for item in items for cc = (let ((foo (get-field :source item))) (cond ((string= source foo) cache) (t (setf source foo) (setf cache (profile-find-context-cache source identity))))) for iid = (get-field :i-id item) for readings = (get-field :readings item) for file = (merge-pathnames cc (make-pathname :name (format nil "~a" iid))) when (and (> readings 1) (probe-file file)) do (cp file out) (push item active) else do (format t "~&[~a] learner-rank-items(): mysteriously skipping item # ~d.~%" (current-time :long :short) (get-field :i-id item)))) (setf active (nreverse active)) (let* ((scores (format nil "~a/.model.~a.~a.scores" (tmp :redwoods) (current-user) (current-pid))) (output (format nil "~a/.model.~a.~a.output" (tmp :redwoods) (current-user) (current-pid))) (command (case type (:mem (format nil "evaluate -s '~a' '~a' '~a'" scores parameters events)) (:perf (format nil "svm_perform_classify ~a ~a ~a" events parameters scores)) (:svm (format nil "svm_classify '~a' '~a' '~a'" events parameters scores))))) (when (and (zerop (run-process command :wait t :output output :if-output-exists :supersede)) (probe-file scores) (probe-file output)) (format t "~&[~a] learner-rank-items(): ranking ~d item~p ~%" (current-time :long :short) (length active) (length active)) (with-open-file (stream scores :direction :input) (loop for item in active for results = (get-field :results item) for ranks = (loop for result in results for rid = (get-field :result-id result) for score = (read stream nil nil) unless score do (error "learner-rank-items(): mysterious score deficit") collect (pairlis '(:result-id :score) (list rid score))) do (let* ((ranks (sort ranks #'> :key #'(lambda (foo) (get-field :score foo)))) (ranks (loop with last = (get-field :score (first ranks)) with i = 1 with j = 2 for rank in ranks for score = (get-field :score rank) unless (= score last) do (setf i j) (setf last score) (incf j) collect (acons :rank i rank)))) (if (get-field :ranks item) (setf (get-field :ranks item) ranks) (nconc item (acons :ranks ranks nil)))))) (when fold (when (eq type :mem) (with-open-file (stream output :direction :input) (let ((line (read-line stream nil nil))) (when line (multiple-value-bind (foo matches) (ppcre:scan-to-strings "([0-9.]+) [0-9]*$" line) (declare (ignore foo)) (when matches (let ((accuracy (acons :eaccuracy (aref matches 0) nil))) (if (get-field :f-extras fold) (nconc (get-field :f-extras fold) accuracy) (nconc fold (acons :f-extras accuracy nil)))))))))))) (unless *maxent-debug-p* (ignore-errors (delete-file events)) (ignore-errors (delete-file scores)) (ignore-errors (delete-file output)))) active)) (defconstant e (exp 1d0)) (defun scores-to-probabilities (scores) (loop with sum = 0d0 for score in scores for foo = (if (stringp score) (read-from-string score) score) for p = (expt e (coerce foo 'long-float)) collect p into probabilities do (incf sum p) finally (return (loop for p in probabilities collect (/ p sum))))) (defun entropy (probabilities) (loop with h = 0d0 for p in probabilities do (incf h (* p (log p 2d0))) finally (return (- h)))) (defun baseline (profile &key condition (n 1) (resolvedp t) enhancers) (loop with nitems = 0 for item in (loop for item in (let* ((condition (if resolvedp (if condition (format nil "readings > 1 && t-active >= 1 && (~a)" condition) "readings > 1 && t-active >= 1") (if condition (format nil "readings > 1 && (~a)" condition) "readings > 1"))) (items (analyze profile :thorough '(:flags) :condition condition :gold profile))) (loop for enhancer in enhancers do (loop for item in items do (call-raw-hook enhancer item))) items) for readings = (length (get-field :results item)) for ranks = (length (get-field :ranks item)) unless (= readings ranks) collect item and do (incf nitems)) for readings = (length (get-field :results item)) for gold = (max n (length (get-field :ranks item))) sum gold into gsum sum readings into rsum sum (/ gold readings) into sum finally (return (list (float (/ sum nitems)) ; average random chance (float (/ gsum nitems)) ; average # gold (float (/ rsum nitems)) ; average # results nitems)))) ; total # items (defun print-score-file (&key (output (format nil "~a/scores" (tmp :redwoods))) gold name pattern condition (similarities '(:bleu))) (with-open-file (stream output :direction :output :if-exists :supersede) (loop with *redwoods-score-similarities* = similarities for db in (find-tsdb-directories *tsdb-home* :pattern pattern :name name) for name = (let ((name (get-field :database db))) (unless (string= name gold) name)) when name do (let* ((scores (summarize-scores name gold :condition condition :n 1 :test :id :spartanp t :loosep t)) (total (rest (rest (find :total scores :key #'first)))) (nscores (get-field :scores total)) (exact (get-field :exact total)) (tsims (get-field :tsimilarities total)) (nsims (get-field :nsimilarities total))) ;;;; (bleu (get-field :bleu (get-field :similarities total)))) (purge-profile-cache name) (format stream "~,6f ~a ~a `~a'~%" ;;;; "~,6f ~,6f `~a'~%" (* 100 (divide exact nscores)) tsims nsims ;;;; (if bleu (divide bleu nscores) 0.0) name) (force-output stream)))) (purge-profile-cache gold)) (defun summarize-folds (&key (output (format nil "~a/folds" (tmp :redwoods))) name pattern (score :accuracy) (type :total)) (with-open-file (stream output :direction :output :if-exists :supersede :if-does-not-exist :create) ; *** typo (let* ((key (if (and (eq type :total) (eq score :accuracy)) :f-accuracy :f-extras)) (foo (case type (:total :tsimilarities) (:nbest :nsimilarities))) (selector (cond ((eq :f-accuracy key) #'(lambda (scores) (read-from-string (get-field key scores) nil))) ((and (eq type :nbest) (eq score :accuracy)) #'(lambda (scores) (get-field :naccuracy (read-from-string (get-field key scores) nil)))) (t #'(lambda (scores) (get-field score (get-field foo (read-from-string (get-field key scores) nil)))))))) (loop for profile in (cond ((not (null pattern)) (mapcar #'(lambda(db) (get-field :database db)) (find-tsdb-directories *tsdb-home* :pattern pattern :name name))) ((and name (listp name)) name) ((stringp name) (list name)) (t (error "summarize-folds():~ name or pattern argument missing."))) for values = (select (list (format nil "~(~a~)" key)) '(:string) "fold" nil profile) for scores = (when values (map 'list selector values)) when (and scores (notany 'null scores)) do (let* ((n (length scores)) (sum (sum scores)) (mean (/ sum n)) (min (apply #'min scores)) (max (apply #'max scores)) (range (- max min)) (var (if (= n 1) 0 (/ (sum (mapcar #'(lambda (x) (expt (- x mean) 2)) scores)) (- n 1)))) (std-dev (sqrt var))) (purge-profile-cache profile) (format stream "~,6f ~,6f ~,6f `~a'~%" mean std-dev range profile) (force-output stream)))))) (defun wilcoxon (list1 list2) (unless (= (length list1) (length list2)) (error "wilcoxon(): given lists of different lengths: ~a and ~a~%" list1 list2)) (let* ((signed-diffs (remove-if #'zerop (mapcar #'(lambda (x y) (- x y)) list1 list2))) (ranked-diffs (sort (copy-list signed-diffs) #'< :key #'abs)) (ranks (loop for (d1 d2) on ranked-diffs with ranks = nil with result = nil for rank from 1 with push-p = nil with push-n = 1 with avg do (cond ((and d2 (= (abs d1) (abs d2))) (incf push-n) (setq push-p nil)) (t (setq push-p t))) (push rank ranks) when push-p do (setq avg (float (/ (sum ranks) push-n))) (dotimes (n push-n) (push avg result)) (setq push-n 1) (setq ranks nil) finally (return (nreverse result)))) (signed-ranks (loop for d in ranked-diffs for r in ranks collect (if (> d 0) r (- r)))) (w-pos (loop for r in signed-ranks when (> r 0) sum r)) (w-neg (abs (loop for r in signed-ranks when (< r 0) sum r)))) (values (min w-pos (abs w-neg)) (length signed-ranks)))) (defun t-test (list1 list2) (let* ((n1 (length list1)) (n2 (length list2)) (n (if (not (= n1 n2)) (error "t-test(): given lists of different lengths: ~a and ~a~%" list1 list2) n1)) (diffs (mapcar #'(lambda (x y) (- x y)) list1 list2)) (sum (sum diffs)) (mean (/ sum n)) (std-dev (sqrt (/ (sum (mapcar #'(lambda (x) (expt (- x mean) 2)) diffs)) (- n 1)))) (result (/ (* mean (sqrt n)) std-dev))) (values ;;; t ratio: ;;; (Use this when consulting a table of Student's t-distribution ;;; confidence intervals to determine the significance level at which two ;;; distributions differ) (abs result) ;;; degrees of freedom: (- n 1)))) (defun mw-t-test (list1 list2) (let* ((n1 (length list1)) (n2 (length list2)) (n (if (not (= n1 n2)) (error "mw-t-test(): given lists of different lengths:~ ~a and ~a~%" list1 list2) n1)) (sum1 (sum list1)) (sum2 (sum list2)) (mean1 (/ sum1 n)) (mean2 (/ sum2 n)) (diffs1 (mapcar #'(lambda (x) (- x mean1)) list1)) (diffs2 (mapcar #'(lambda (x) (- x mean2)) list2)) (sum-squared-diffs (sum (mapcar #'(lambda (x y) (expt (- x y) 2)) diffs1 diffs2))) (result (* (- mean1 mean2) (sqrt (/ (* n (- n 1)) sum-squared-diffs))))) (values (abs result) (- n 1)))) (defparameter *t-dist* '((0.10 (3.078 1.886 1.638 1.533 1.476 1.44 1.415 1.397 1.383 1.372 1.363 1.356 1.35 1.345 1.341 1.337 1.333 1.33 1.328 1.325)) (0.05 (6.314 2.92 2.353 2.132 2.015 1.943 1.895 1.86 1.833 1.812 1.796 1.782 1.771 1.761 1.753 1.746 1.74 1.734 1.729 1.725)) (0.025 (12.706 4.303 3.182 2.776 2.571 2.447 2.365 2.306 2.262 2.228 2.201 2.179 2.16 2.145 2.131 2.12 2.11 2.101 2.093 2.086)) (0.01 (31.821 6.965 4.541 3.747 3.365 3.143 2.998 2.896 2.821 2.764 2.718 2.681 2.65 2.624 2.602 2.583 2.567 2.552 2.539 2.528)) (0.005 (63.657 9.925 5.841 4.604 4.032 3.707 3.499 3.355 3.25 3.169 3.106 3.055 3.012 2.977 2.947 2.921 2.898 2.878 2.861 2.845)) (0.001) (318.313 22.327 10.215 7.173 5.893 5.208 4.782 4.499 4.296 4.143 4.024 3.929 3.852 3.787 3.733 3.686 3.646 3.61 3.579 3.552)) "t-test upper critical values") (defparameter *wilcoxon-dist* '((0.05 (NIL NIL NIL NIL 1 2 4 6 8 11 14 17 21 26 30 36 41 47 54 60)) (0.025 (NIL NIL NIL NIL NIL 1 2 4 6 8 11 14 17 21 25 30 35 40 46 52)) (0.01 (NIL NIL NIL NIL NIL NIL 0 2 3 5 7 10 13 16 20 24 28 33 38 43)) (0.005 (NIL NIL NIL NIL NIL NIL NIL 0 2 3 5 7 10 13 16 19 23 28 32 37))) "wilcoxon upper critical values") (defun get-critical-value (&key (level 0.05) (df 9) (sides 2) (test :ttest)) (unless (or (= sides 1) (= sides 2)) (error "critical-value(): sides should be 1 or 2. given ~a." sides)) (nth (- df 1) (cadr (assoc (/ level sides) (case test ((:t :tee :ttest) *t-dist*) ((:w :wilcoxon) *wilcoxon-dist*)))))) (defun compare-folds (name1 name2 &key (test :t) (stream *tsdb-io*) (level 0.05) (sides 2)) (let* ((acc1 (select '("f-accuracy") '(:string) "fold" nil name1)) (acc2 (select '("f-accuracy") '(:string) "fold" nil name2)) (acc1 (map 'list #'(lambda (x) (read-from-string (cdar x) nil nil)) acc1)) (acc2 (map 'list #'(lambda (x) (read-from-string (cdar x) nil nil)) acc2)) (testfun (case test ((:t :ttest) #'t-test) ((:w :wilcoxon) #'wilcoxon)))) (multiple-value-bind (stat df) (funcall testfun acc1 acc2) (let ((cv (get-critical-value :level level :sides sides :test test :df df))) (purge-profile-cache name1) (purge-profile-cache name2) (format stream "~&~,6f ~a ~a~%`~a'~%`~a'~%" stat cv df name1 name2))))) (defun create-evaluation-file (data &optional (gold data) &key (condition "readings > 1 && t-active > 0") (n 5) (test :id) supersede (loosep t) (stream *tsdb-io*) (similarities '(:neva :wa))) (let* ((compress-command "gzip -c -9") (tsdb-dir (find-tsdb-directory data)) (eval (make-pathname :directory tsdb-dir :name "eval")) (eval-gz (make-pathname :directory tsdb-dir :name "eval" :type "gz"))) ;;; clean up: (when (or (probe-file eval-gz) (probe-file eval)) (if (not supersede) (return-from create-evaluation-file) (progn (when (probe-file eval-gz) (delete-file eval-gz)) (when (probe-file eval) (delete-file eval))))) (format stream "~&[~a] create-evaluation-file(): creating `eval.gz' for ~a~%" (current-time :long :short) data) (multiple-value-bind (eval-stream foo pid) (run-process compress-command :wait nil :input :stream :output eval-gz :if-output-exists :supersede :error-output nil) (declare (ignore foo)) (let* ((*redwoods-score-similarities* similarities) (thorough (when (eq test :derivation) '(:derivation))) (thorough (if *redwoods-score-similarities* (cons :surface thorough) thorough)) (gitems-unsifted (analyze gold :thorough thorough :condition condition :gold gold :readerp (eq test :derivation))) (items (loop for item in (analyze gold :thorough thorough :condition condition :score data :scorep t :readerp (eq test :derivation)) for gitem in gitems-unsifted for readings = (length (get-field :results gitem)) for ranks = (length (get-field :ranks gitem)) unless (or (= readings ranks) (not (= (get-field :i-id gitem) (get-field :i-id item)))) collect (copy-tree item))) (gitems (loop for item in gitems-unsifted for readings = (length (get-field :results item)) for ranks = (length (get-field :ranks item)) unless (= readings ranks) collect (copy-tree item))) (nkeys (loop for sim in similarities collect ;;; eg., (:nwa :nbleu :nneva) (read-from-string (format nil ":n~a" sim)))) (nkey-alist (pairlis similarities nkeys)) (keys (append (list :i-id :accuracy :naccuracy);; :r-id similarities nkeys)) (data (make-list (length keys))) (scores (pairlis keys data))) (loop for item in items for gitem in gitems for i-id = (get-field :i-id item) do (multiple-value-bind (i score loosep similarities) (score-item item gitem :test test :n n :loosep loosep) ;; (= i 0) means no match ;; (<= i n) means we have a hit ;; (= i 1) means exact match (declare (ignore loosep)) (push (if (= i 1) (float score) 0) (get-field :accuracy scores)) (push (if (<= i n) (float score) 0) (get-field :naccuracy scores)) (push i-id (get-field :i-id scores)) (loop for (key score nscore) in similarities do (push (float score) (get-field key scores)) (push (float nscore) (get-field (get-field key nkey-alist) scores))))) (loop for (key . list) in scores do (setf (get-field key scores) (nreverse list))) (loop for list in scores do (prin1 list eval-stream) (terpri eval-stream))) (force-output eval-stream) (close eval-stream) (sys:os-wait nil pid)))) (defun batch-create-evaluation-files (&key pattern supersede gold n (condition "readings > 1 && t-active > 0") (similarities *redwoods-score-similarities*)) (loop for db in (find-tsdb-directories *tsdb-home* :pattern pattern) for name = (let ((name (get-field :database db))) (unless (string= name gold) name)) when name do (create-evaluation-file name gold :condition condition :n n :supersede supersede :similarities similarities) (purge-profile-cache name)) (purge-profile-cache gold)) (defun read-evaluation-file (profile &key (score :accuracy)) (let* ((tsdb-dir (find-tsdb-directory profile)) (eval (make-pathname :directory tsdb-dir :name "eval")) (eval-gz (make-pathname :directory tsdb-dir :name "eval" :type "gz")) (did-unzip-p nil)) (when (and (not (probe-file eval-gz)) (not (probe-file eval))) (error "read-evaluation-file(): `eval(.gz)' does not exists for `~a'." profile)) (when (probe-file eval-gz) (run-process (format nil "gunzip ~a" eval-gz) :wait t) (setq did-unzip-p t)) (if (not (probe-file eval)) (error "read-evaluation-file(): cannot find `eval' file for `~a'." profile) (with-open-file (in eval :direction :input) (loop for list = (read in nil nil) until (or (null list) (eq (first list) score)) finally (progn (when did-unzip-p (run-process (format nil "gzip -9 ~a" eval) :wait t)) (return (rest list)))))))) (defun summarize-evaluation-file (profile &optional (stream *tsdb-io*)) (let* ((tsdb-dir (find-tsdb-directory profile)) (eval (make-pathname :directory tsdb-dir :name "eval")) (eval-gz (make-pathname :directory tsdb-dir :name "eval" :type "gz")) (did-unzip-p nil)) (when (and (not (probe-file eval-gz)) (not (probe-file eval))) (error "summarize-evaluation-file(): `eval(.gz)' does not exists for `~a'." profile)) (when (probe-file eval-gz) (run-process (format nil "gunzip ~a" eval) :wait t) (setq did-unzip-p t)) (if (not (probe-file eval)) (error "summarize-evaluation-file(): cannot find `eval' file for `~a'." profile) (with-open-file (in eval :direction :input) (loop with total-items for list = (read in nil nil) until (null list) do (case (car list) ((:accuracy :naccuracy ) (format stream "~&~a: ~,2f~%" (car list) (* (/ (sum (cdr list)) (length (cdr list))) 100))) (:i-id (setq total-items (length (cdr list)))) (t (format stream "~&~a: ~,3f~%" (car list) (/ (sum (cdr list)) (length (cdr list)))))) finally (format stream "~&# Items: ~a~%" total-items)))) (when did-unzip-p (run-process (format nil "gzip -9 ~a" eval) :wait t))) nil) (defun test-evaluation-scores (profile1 profile2 &key (score :accuracy) (test :signtest) (tails :both)) (let ((scores1 (read-evaluation-file profile1 :score score)) (scores2 (read-evaluation-file profile2 :score score))) (case test ((:signtest :binomial) (stats:sign-test-on-sequences scores1 scores2 :tails tails)) ((:wilcoxon :signed-rank :wilcoxon-signed-rank) (stats:wilcoxon-signed-rank-test-on-sequences scores1 scores2 :tails tails)) ((:ttest :paired-ttest) (stats:t-test-paired-on-sequences scores1 scores2 :tails tails)) (t (error "test-evaluation-scores(): unknown test `~a'." test)))))