;;; -*- Mode: LISP; Package: CGP; BASE: 10; Syntax: ANSI-Common-Lisp; Readtable: augmented-readtable-*- ;; ;; Copyright (C) Paul Meurer 2000 - 2007. All rights reserved. ;; paul.meurer@aksis.uib.no ;; Aksis, UNIFOB, University of Bergen ;; (in-package :cgp) (defclass feature-filter () ((filter-bv :initform nil :initarg :filter-bv :accessor filter-bitvector) (pos-list :initform nil :initarg :pos-list) (substitutions :initform nil :initarg :substitutions :accessor string-substitutions))) (defmethod map-filter-features ((filter feature-filter)) nil) (defmethod map-syntactic-functions ((filter feature-filter)) nil) (defclass feature-map-filter (feature-filter) ((tagger :initform nil :initarg :tagger :reader multi-tagger) (map-filter-features :initform nil :initarg :map-filter-features :reader map-filter-features) ;; rename! (map-syntactic-functions :initform () :initarg :syntactic-functions :reader map-syntactic-functions) (feature-mapping-fn :initform nil :initarg :feature-mapping-fn :reader feature-mapping-fn))) (defmethod initialize-instance :after ((filter feature-map-filter) &key syntactic-functions &allow-other-keys) (with-slots (map-syntactic-functions map-filter-features) filter (setf map-syntactic-functions (loop for sf in syntactic-functions collect (position sf map-filter-features))))) (defun %filter-features (feature-bv filter-bv &optional union-bv) (let ((filtered-feature-bv (or union-bv (make-array (length feature-bv) :element-type 'bit :initial-element 0)))) (loop for i from 0 to (1- (length feature-bv)) when (and (= 1 (bit filter-bv i)) (= 0 (bit filtered-feature-bv i))) do (setf (bit filtered-feature-bv i) (bit feature-bv i))) filtered-feature-bv)) (defmethod %filter-map-features ((filter feature-map-filter) feature-bv &optional union-bv) (with-slots (map-filter-features feature-mapping-fn) filter (let ((filtered-feature-bv (or union-bv (make-array (length map-filter-features) :element-type 'bit :initial-element 0))) (features (code-features feature-bv))) (when feature-mapping-fn (setf features (funcall #'feature-mapping-fn features))) (loop for i from 0 to (1- (length map-filter-features)) for f across map-filter-features when (find f features :test #'string-equal) do (setf (bit filtered-feature-bv i) 1)) filtered-feature-bv))) (defun part-of-speech (feature-bv pos-list) (dolist (pos pos-list) (when (bit feature-bv pos) (return pos)))) (defmethod filter-features ((filter t) readings) readings) (defmethod filter-features ((filter feature-map-filter) readings) (with-slots (pos-list tagger) filter (let ((*tagger* tagger) (filtered ())) (dolist (reading readings) (when (car reading) (destructuring-bind (lemma . bv) reading (let ((found-p nil)) (loop for filtered-reading in filtered until found-p ;; do (print (list lemma (car filtered-reading))) do (cond ((string/= lemma (car filtered-reading)) nil) #+test ((= (part-of-speech bv pos-list) (part-of-speech (cdr filtered-reading) pos-list)) (%filter-map-features filter bv (cdr filtered-reading)) (setf found-p t)) (t nil))) (unless found-p (push (cons lemma (%filter-map-features filter bv)) filtered)))))) filtered))) (defmethod filter-features ((filter feature-filter) readings) (with-slots (filter-bv pos-list) filter (let ((filtered ())) (dolist (reading readings) (when (car reading) (destructuring-bind (lemma . bv) reading (let ((found-p nil)) (loop for filtered-reading in filtered until found-p do (cond ((string/= lemma (car filtered-reading)) nil) ((= (part-of-speech bv pos-list) (part-of-speech (cdr filtered-reading) pos-list)) (%filter-features bv filter-bv (cdr filtered-reading)) (setf found-p t)) (t nil))) (unless found-p (push (cons lemma (%filter-features bv filter-bv)) filtered)))))) filtered))) (defmethod stringify-readings ((filter t) readings &key feature-vector (downcase-p t)) (collecting (dolist (reading readings) (when reading (collect (cons (car reading) (when (cdr reading) (if downcase-p (mapcar (lambda (f) (string-downcase f)) (code-features (cdr reading) feature-vector)) (code-features (cdr reading) feature-vector))))))))) (defmethod stringify-readings ((filter feature-filter) readings &key feature-vector) (with-slots (substitutions) filter (mapcar (lambda (reading) (cons (car reading) (mapcar (lambda (f) (subst-substrings (string-downcase f) substitutions)) (code-features (cdr reading) feature-vector)))) readings))) #+test (let ((*tagger* *nbo-tagger*)) (print (stringify-readings *nbo-feature-filter* (print (filter-features *nbo-feature-filter* (list (cons "fifi" (encode-features 'subst 'noeyt)) (cons "fifi" (encode-features 'subst 'mask)))))))) ;; This gives the filtered features in correct order, to be put into .csp file. #+test (let ((*tagger* *nbo-tagger*)) (princ (cdar (stringify-readings *nbo-feature-filter* (filter-features *nbo-feature-filter* (list (cons "" (filter-bitvector *nbo-feature-filter*)))))))) (defparameter *nbo-feature-filter* (make-instance 'feature-filter :filter-bv (let ((*tagger* *nbo-tagger*)) (apply #'encode-features '(subst appell ent ub be fl mask noeyt adj verb fem pos m/f pres inf sup perf-part pret imp pass komp prop n unorm adv fork uboey prep @adv pref interj det prep+subst kvant prob @tittel symb sbu dem pron poss @s-pred @interj pers @ konj+adj \3 forst prep+adj @loes-np @ prep+subst+subst subst+subst det+adj prep+det+subst res clb prep+prep @adv> interj+adv verb+det konj+adv+adj \2 subst+prep+subst pron+verb+verb \1 adj+verb konj+adv+prep @i-obj gen @det> prep+konj+prep + subst+prep adj+det prep+adv nynorsk adv+prep verb+verb sbu+adj adv+adj prep+adj+adj interj+adj subst+konj+subst konj+det+adj adv+subst verb+det+subst prep+perf-part+subst prep+adv+subst v+v @kon @adj> prep+det+subst+kon+det+subst adj+prep+subst verb+subst subst+kvant prep+subst+prep+sbu adv+adv+prep mask/fem/noeyt prep+det+sbu ub/be inf-merke det+adj+det subst+prep+adj+subst pron+prep+adj det+subst+prep+subst mask/fem adj+kon+adj part+prep adv+prep+subst refl adj+adj inf/pres prep+subst+konj+subst adv+adj+prep fl/be subst+adj subst+v+subst subst+perf-part ent/fl ;; refl4 a1 t ;; those are fishy ;; added after calculation below ukjent samset inter bu @sbu @infmerke ;; fra multi-tagger.lisp @ @s-gr @ inf m/f pres komp imp perf-part pret st-form prop unorm adv fork prep @adv pref det interj prep+subst tr kvant sideform dem symb poss sbu i pron adj+subst uttr pers @interj prob prep+adj @ prep+subst+prep nom subst+kon+subst @subj forst prep+adj+subst prep+subst+subst @fv prep+subst+kon+subst be konj+adv+prep prep+prop \1 @iv sp subst+verb interj+adv suff prep+konj+prep res subst+subst clb hoeflig bokmaal \2 adj+verb prep+adv adv+adj subst+prep+subst det+adj prep+det+subst verb+det subst+prep @adv> konj+adv+adj prep+adv+subst pron+verb+verb @det> verb+verb adj+det adv+adv gen @i-obj adv+verb @adj> prep+perf-part+subst adv+adv+prep adv+subst prep+prep prep+adj+adj konj+det+adj adj+kon+adj @kon verb+det+subst prep+det+sbu subst+perf-part subst+kvant subst+verb+subst ikke-hum prep+kon+subst @sbu interj+adj subst+prep+adj+subst adj+adj sbu+prep inf-merke prep+det+subst+kon+subst part+prep prep+subst+konj+adv verb+subst refl1 det+subst+prep+subst prep+subst+prep+sbu prep+det+subst+kon+det+subst prep+subst+konj+subst ;; added after calculation below ukjent samset @infmerke test uboey >>> <<< ;; fra multi-tagger.lisp ;; fra norsk-map.lisp; evaluate form below @ @app @