;;; -*- Mode: tdl; Coding: utf-8; -*- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; at this point, we multiply out PoS values on all tokens, where for each ;;; original token as many additional tokens are created (in the same chart ;;; cell) as there are PoS readings. at this point, we start distinguishing ;;; between tokens that activate native lexical entries (LEs), vs. those that ;;; activate generic LEs. in the token universe, this distinction is made by ;;; virtue of +TRAIT, with generic_trait reserved for generic LEs. the two ;;; sets of tokens (in each cell) do not overlap, i.e. for a single original ;;; token with two PoS readings, we end up with a total of three new tokens. ;;; the pair of rules below resembles a recursive function, terminating once ;;; the PoS list has been reduced to a singleton element. form-based named ;;; entities identified earlier avoid this kind of PoS multiplication because ;;; they have already emptied out their PoS list. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; even though we originally made sure all tokens had a fully specified +TNT ;; value, intervening rules could have `leaked' PoS information. if so, once ;; again, fully annul the +TNT value. ;; tnt_default_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +TRAIT #trait, +CLASS #class, +PRED #pred, +CARG #carg, +TNT [ +TAGS < anti_string, ... > ] ] >, +OUTPUT < [ +FORM #form, +TRAIT #trait, +CLASS #class, +PRED #pred, +CARG #carg, +TNT null_tnt ] > ]. tnt_recurse_tmr := token_mapping_rule & [ +INPUT < [ +FORM #form, +TRAIT anti_trait, +CLASS #class, +PRED #pred, +CARG #carg, +IDS #ids, +FROM #from, +TO #to, +TNT [ +TAGS < #tag . #tags & ne-list >, +PRBS < #prb . #prbs & ne-list > ] ] > , +OUTPUT < [ +FORM #form, +TRAIT generic_trait, +CLASS #class, +PRED #pred, +CARG #carg, +IDS #ids, +FROM #from, +TO #to, +TNT [ +TAGS < #tag >, +PRBS < #prb > ] ], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +IDS #ids, +FROM #from, +TO #to, +TNT [ +TAGS #tags, +PRBS #prbs ] ] > , +POSITION "O1@I1, O2@I1" ]. tnt_terminate_tmr := token_mapping_rule & [ +INPUT < [ +FORM #form, +TRAIT anti_trait, +CLASS #class, +PRED #pred, +CARG #carg, +IDS #ids, +FROM #from, +TO #to, +TNT [ +TAGS < #tag >, +PRBS < #prb > ] ] > , +OUTPUT < [ +FORM #form, +TRAIT generic_trait, +CLASS #class, +PRED #pred, +CARG #carg, +IDS #ids, +FROM #from, +TO #to, +TNT [ +TAGS < #tag >, +PRBS < #prb > ] ], [ +FORM #form, +TRAIT native_trait, +CLASS #class, +PRED #pred, +CARG #carg, +IDS #ids, +FROM #from, +TO #to, +TNT null_tnt ] >, +POSITION "O1@I1, O2@I1" ]. ;;; ;;; with singleton PoS readings multiplied out in each chart cell, we can prune ;;; undesirable alternatives, e.g. a foreign word reading when there also is a ;;; common noun. also, ditch PoS readings with very low probability, and ones ;;; for which no PoS-activated generic entries exist anyway (function words). ;;; tnt_ditch_unlikely_tmr := token_mapping_rule & [ +INPUT < [ +TNT.+PRBS < ^0?\.0.*$ > ] >, +OUTPUT < > ]. ;; ;; _fix_me_ ;; experimentally, also ditch PoS information on punctuation-only tokens. we ;; appear to get noun and adjective readings for n- and m-dashes, which hardly ;; can do us any good. (24-sep-08; oe) ;; tnt_ditch_punctuation_tmr := token_mapping_rule & [ +INPUT < [ +FORM ^[[:punct:]]+$, +TNT.+TAGS ne-list ] >, +OUTPUT < > ]. ;; ;; on all tokens that we expect to activate generic entries, make the +PRED ;; value reflect the orthography and PoS tag. ;; generic_pred_tmr := token_mapping_rule & [ +INPUT < [ +FORM #form, +TRAIT #trait & generic_trait, +CLASS #class & non_ne, +PRED anti_string, +CARG #carg & ^(.+)$, +IDS #ids, +FROM #from, +TO #to, +TNT #tnt & [ +TAGS < ^([[:alpha:]])(.*)$ > ] ] >, +OUTPUT < [ +FORM #form, +TRAIT #trait, +CLASS #class, +PRED "_${lc(I1:+CARG:1)}_${lc(I1:+TNT.+TAGS.FIRST:1)}_unk_rel", +CARG #carg, +IDS #ids, +FROM #from, +TO #to, +TNT #tnt ] >, +POSITION "O1@I1" ].