;;; -*- Mode: tdl; Coding: utf-8; -*-


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; at this point, we multiply out PoS values on all tokens, where for each
;;; original token as many additional tokens are created (in the same chart
;;; cell) as there are PoS readings.  at this point, we start distinguishing
;;; between tokens that activate native lexical entries (LEs), vs. those that
;;; activate generic LEs.  in the token universe, this distinction is made by
;;; virtue of +TRAIT, with generic_trait reserved for generic LEs.  the two 
;;; sets of tokens (in each cell) do not overlap, i.e. for a single original
;;; token with two PoS readings, we end up with a total of three new tokens.
;;;  the pair of rules below resembles a recursive function, terminating once
;;; the PoS list has been reduced to a singleton element.  form-based named
;;; entities identified earlier avoid this kind of PoS multiplication because
;;; they have already emptied out their PoS list.
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;
;; even though we originally made sure all tokens had a fully specified +TNT
;; value, intervening rules could have `leaked' PoS information.  if so, once
;; again, fully annul the +TNT value.
;;
tnt_default_tmr := one_one_tmt &
[ +INPUT < [ +FORM #form, +TRAIT #trait, +CLASS #class,
             +PRED #pred, +CARG #carg, +TNT [ +TAGS < anti_string, ... > ] ] >,
  +OUTPUT < [ +FORM #form, +TRAIT #trait, +CLASS #class,
              +PRED #pred, +CARG #carg, +TNT null_tnt ] > ].

tnt_recurse_tmr := token_mapping_rule &
[ +INPUT < [ +FORM #form, +TRAIT anti_trait,
             +CLASS #class, +PRED #pred, +CARG #carg,
             +IDS #ids, +FROM #from, +TO #to,
             +TNT [ +TAGS < #tag . #tags & ne-list >,
                    +PRBS < #prb . #prbs & ne-list > ] ] > ,
  +OUTPUT < [ +FORM #form, +TRAIT generic_trait,
              +CLASS #class, +PRED #pred, +CARG #carg,
              +IDS #ids, +FROM #from, +TO #to,
              +TNT [ +TAGS < #tag >, +PRBS < #prb > ] ],
            [ +FORM #form,
              +CLASS #class, +PRED #pred, +CARG #carg,
              +IDS #ids, +FROM #from, +TO #to,
              +TNT [ +TAGS #tags, +PRBS #prbs ] ] > ,
  +POSITION "O1@I1, O2@I1" ].

tnt_terminate_tmr := token_mapping_rule &
[ +INPUT < [ +FORM #form, +TRAIT anti_trait,
             +CLASS #class, +PRED #pred, +CARG #carg,
             +IDS #ids, +FROM #from, +TO #to,
             +TNT [ +TAGS < #tag >, +PRBS < #prb > ] ] > ,
  +OUTPUT < [ +FORM #form, +TRAIT generic_trait,
              +CLASS #class, +PRED #pred, +CARG #carg,
              +IDS #ids, +FROM #from, +TO #to,
              +TNT [ +TAGS < #tag >, +PRBS < #prb > ] ],
            [ +FORM #form, +TRAIT native_trait,
              +CLASS #class, +PRED #pred, +CARG #carg,
              +IDS #ids, +FROM #from, +TO #to,
              +TNT null_tnt ] >,
  +POSITION "O1@I1, O2@I1" ].

;;;
;;; with singleton PoS readings multiplied out in each chart cell, we can prune
;;; undesirable alternatives, e.g. a foreign word reading when there also is a
;;; common noun.  also, ditch PoS readings with very low probability, and ones
;;; for which no PoS-activated generic entries exist anyway (function words).
;;;

tnt_ditch_unlikely_tmr := token_mapping_rule &
[ +INPUT < [ +TNT.+PRBS < ^0?\.0.*$ > ] >,
  +OUTPUT < > ].

;;
;; _fix_me_
;; experimentally, also ditch PoS information on punctuation-only tokens.  we
;; appear to get noun and adjective readings for n- and m-dashes, which hardly
;; can do us any good.                                         (24-sep-08; oe)
;;
tnt_ditch_punctuation_tmr := token_mapping_rule &
[ +INPUT < [ +FORM ^[[:punct:]]+$, +TNT.+TAGS ne-list ] >,
  +OUTPUT < > ].


;;
;; on all tokens that we expect to activate generic entries, make the +PRED
;; value reflect the orthography and PoS tag.
;;
generic_pred_tmr := token_mapping_rule &  
[ +INPUT < [ +FORM #form,
             +TRAIT #trait & generic_trait, +CLASS #class & non_ne, 
             +PRED anti_string, +CARG #carg & ^(.+)$,
             +IDS #ids, +FROM #from, +TO #to,
             +TNT #tnt & [ +TAGS < ^([[:alpha:]])(.*)$ > ] ] >,
  +OUTPUT < [ +FORM #form,
              +TRAIT #trait, +CLASS #class,
              +PRED "_${lc(I1:+CARG:1)}_${lc(I1:+TNT.+TAGS.FIRST:1)}_unk_rel",
              +CARG #carg,
              +IDS #ids, +FROM #from, +TO #to,
              +TNT #tnt ] >,
  +POSITION "O1@I1" ].