;;; -*- Mode: tdl; Coding: utf-8; -*- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; a second pass at lightweight NEs, now that we have further split up tokens ;;; at hyphens and dashes. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; decades: |1950s|, |50s|, |1950's|, and |50's|. ;;; decade_ne_1_tmr := ne_tmt & [ +INPUT < [ +FORM ^(1[0-9])?[0-9]0ern?$ ] >, +OUTPUT < [ +CLASS plur_ne ] > ]. ;; ;; _fix_me_ ;; this latter type (plural of decades and, in principle, other names) actually ;; is ambiguous with the possessive (i suspect), e.g. `the 1950's style'. so, ;; maybe, optional rules for those? (24-sep-08; oe) ;; ;; Not for German ... #| decade_ne_2_tmr := two_one_tmt & [ +INPUT < [ +FORM ^((1[0-9])?[0-9]0)$, +TRAIT #trait, +CLASS non_ne, +PRED #pred, +CARG #carg ], [ +FORM "('[sS])", +CLASS non_ne ] >, +OUTPUT < [ +FORM "${I1:+FORM:1}${I2:+FORM:1}", +TRAIT #trait, +CLASS plur_ne, +PRED #pred, +CARG #carg, +TNT null_tnt ] > ]. |# ;;; ;;; numerals, including some sub-sets (days of the month or years). ;;; ;; ;; (candidate) days of the month: |1| to |9|, |10| to |29|, |30|, and |31| ;; card_or_dom_ne_tmr := ne_tmt & [ +INPUT < [ +FORM ^([1-9]|[1-2][0-9]|3[01])$ ] >, +OUTPUT < [ +CLASS card_or_dom_ne ] > ]. ;; ;; (candidate) years: |950|, |1805|, |1957|, |2005|, et al. ;; card_or_year_ne_tmr := ne_tmt & [ +INPUT < [ +FORM ^[12]?[0-9]{2}[0-9]?$ ] >, +OUTPUT < [ +CLASS card_or_year_ne ] > ]. ;; ;; any sequence of digits, with optional sign and optional decimal comma ;;; (German). ;; card_ne_1_tmr := ne_tmt & [ +INPUT < [ +FORM ^[+-±~]?[1-9][0-9]*$ ] >, +OUTPUT < [ +CLASS card_ne ] > ]. ;; ;; floating point numbers, with optional sign and at least one decimal ;; card_ne_2_tmr := ne_tmt & [ +INPUT < [ +FORM ^[+-±~]?[0-9]*,[0-9]+$ ] >, +OUTPUT < [ +CLASS card_ne ] > ]. ;; ;; US-style or German separators, optional sign and decimals: e.g. |23,000.-| ;; card_ne_3_tmr := ne_tmt & [ +INPUT < [ +FORM ^[+-±]?[1-9][0-9]{0,2}([,.][0-9]{3})+([,.]([0-9]*|-))?$ ] >, +OUTPUT < [ +CLASS card_ne ] > ]. combine_card_ne_tmr := two_one_tmt & [ +INPUT < [ +CLASS #class & card_ne, +TRAIT #trait, +FORM ^([+-±]?[0-9]+)$ ], [ +CLASS #class, +TRAIT #trait, +FORM ^(000)$ ]>, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +FORM "${I1:+FORM:1}${I2:+FORM:1}" ] > ]. opt_combine_card_ne_tmr := token_mapping_rule & [ +CONTEXT < [ +CLASS #class & card_ne, +TRAIT #trait, +FORM ^([+-±]?[0-9]+)$, +FROM #from], [ +CLASS #class, +TRAIT #trait, +TO #to, +FORM ^([0-9]+)$ ]>, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +FROM #from, +TO #to, +FORM "${C1:+FORM:1}${C2:+FORM:1}"] >, +POSITION "O1@C1, O1@C2, C1, +OUTPUT < [ +CLASS ord_or_dom_ne ] >]. ord_ne_tmr := ne_tmt & [ +INPUT < [ +FORM ^[0-9]*((^|[^1])(1|2|3)\.|(11|12|13|[04-9]))\.$ ] >, +OUTPUT < [ +CLASS ord_ne ] >].