;;; -*- Mode: tdl; Coding: utf-8; -*- ;;; ;;; Copyright (c) 2000 -- 2018 Dan Flickinger (danf@stanford.edu); ;;; copyright (c) 2009 -- 2018 Stephan Oepen (oe@ifi.uio.no); ;;; see `LICENSE' for conditions. ;;; ;;; ;;; generic lexical entries, i.e. entries that are not activated directly by ;;; orthography. all generic entries are distinguished from native lexical ;;; entries by virtue of their [ ONSET unk_onset ]. generics are sub-divided ;;; into two sub-classes, named (somewhat unfortunately) `unknown' generics ;;; and `generic' generics (that is `unk' and `gen', respectively). ;;; ;;; the former are designed for unknown words and activated on the basis of ;;; PoS tags, the latter correspond to named entities that are recognized by ;;; string-level properties during chart mapping (the equivalent of what in ;;; the YY tradition used to be `ersatz' lexical entries). see the comments ;;; in `tmr.tdl' (token mapping) and `lfr.tdl' (lexical filtering) for further ;;; background. ;;; ;;; _fix_me_ ;;; i believe nothing stands in our way of giving these entries a `meaningful' ;;; orthography now, e.g. something like "_generic_trans_verb_bse_", or so, for ;;; interactive testing (including the LKB). ;;; ;;; furthermore, i suspect we can now ditch the older `gle' entries, used for ;;; generation only, and instead use some of the entries below (those with a ;;; singleton RELS list, fully instantiated PRED, and underspecified CARG) in ;;; *generic-lexical-entries*. (22-jan-09; oe) ;;; ;;; ;;; the first group of `unknown' generic entries, each for a specific PoS tag ;;; generic_trans_verb_bse := v_np*_bse-unk_le & [ ORTH < "_generic_vb_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VB" ] > ]. generic_trans_verb_pres3sg := v_np*_pr-3s-unk_le & [ ORTH < "_generic_vbz_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBZ" ] > ]. generic_trans_verb_presn3sg := v_np*_pr-n3s-unk_le & [ ORTH < "_generic_vbp_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBP" ] > ]. generic_trans_verb_past := v_np*_pa-unk_le & [ ORTH < "_generic_vbd_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBD" ] > ]. generic_trans_verb_prp := v_np*_prp-unk_le & [ ORTH < "_generic_vbg_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBG" ] > ]. generic_trans_verb_psp := v_np*_psp-unk_le & [ ORTH < "_generic_vbn_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBN" ] > ]. generic_trans_verb_pas := v_-_pas-unk_le & [ ORTH < "_generic_vbn_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBN" ] > ]. generic_mass_noun := n_-_mc-unk_le & [ ORTH < "_generic_fw_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "FW" ] > ]. generic_mass_count_noun := n_-_mc-unk_le & [ ORTH < "_generic_nn_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NN" ] > ]. ;; This is a robustness measure to accommodate a TnT quirk (see tmr/pos.tdl) ;; generic_mass_count_noun_cctag := n_-_mc-unk_le & [ ORTH < "_generic_nn_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "CC" ] > ]. generic_pl_noun := n_-_c-pl-unk_le & [ ORTH < "_generic_nns_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NNS" ] > ]. genericname := n_-_pn-unk_le & [ ORTH < "_generic_nnp_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NNP" ] > ]. generic_quoted := n_-_gq_le & [ ORTH < "_generic_quoted_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "GQ" ] > ]. ;; DPF 2017-02-01 - The tagger is not always right with the NNPS tag, as with ;; |[[Franglais]], for example, ...| So ignore that plural, and let the parser ;; decide on inflection. #| genericname_pl := n_-_pn-pl-unk_le & [ ORTH < "_generic_nnps_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NNPS" ] > ]. |# genericname_pl := n_-_pn-unk_le & [ ORTH < "_generic_nnps_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NNPS" ] > ]. generic_adj := aj_-_i-unk_le & [ ORTH < "_generic_jj_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "JJ" ] > ]. generic_adj_compar := aj_-_i-cmp-unk_le & [ ORTH < "_generic_jjr_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "JJR" ] > ]. generic_adj_superl := aj_-_i-sup-unk_le & [ ORTH < "_generic_jjs_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "JJS" ] > ]. generic_number := aj_-_i-crd-unk_le & [ ORTH < "_generic_cd_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "CD" ] > ]. generic_adverb := av_-_i-unk_le & [ ORTH < "_generic_rb_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "RB" ] > ]. ;; DPF 2012-12-21 - For e.g. |zounds| ;; generic_uh_disc_adv := av_-_dc-like-unk_le & [ ORTH < "_generic_uh_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "UH" ] > ]. ;; DPF 2016-11-23 - For stranded punctuation mark tokens ;; generic_punct := av_-_dc-like-unk_le & [ ORTH < "_generic_punct_" >, TOKENS.+LIST < [ +TNT.+TAGS.FIRST "." ] > ]. ;;; ;;; and the second group of `generic' named entities, activated by their +CLASS ;;; values, which get set in token mapping. to avoid overlap with the unknown ;;; lexical entries, the token mapping rules make sure to `empty out' the PoS ;;; information whenever a named entity token is created. ;;; generic_proper_ne := n_-_pn-gen_le & [ ORTH < "_generic_proper_ne_" >, TOKENS.+LIST < [ +CLASS proper_ne ] > ]. generic_pl_noun_ne := n_-_c-pl-gen_le & [ ORTH < "_generic_plur_ne_" >, TOKENS.+LIST < [ +CLASS plur_ne ] > ]. generic_pl_apos_noun_ne := n_-_c-pl-gen_le & [ ORTH < "_generic_plur_apos_ne_" >, TOKENS.+LIST < [ +CLASS plur_apos_ne ] > ]. generic_date_ne := n_-_day-crd-gen_le & [ ORTH < "_generic_date_ne_" >, TOKENS.+LIST < [ +CLASS date_ne ] > ]. generic_dom_card_ne := n_-_pn-dom-gen_le & [ ORTH < "_generic_dom_card_ne_" >, TOKENS.+LIST < [ +CLASS dom_card_ne, +ONSET c-onset ] > ]. generic_dom_card_voc_ne := n_-_pn-dom-gen_le & [ ORTH < "_generic_dom_card_ne_" >, TOKENS.+LIST < [ +CLASS dom_card_ne, +ONSET v-onset ] > ]. generic_dom_ord_ne := n_-_pn-dom-o-gen_le & [ ORTH < "_generic_dom_ord_ne_" >, TOKENS.+LIST < [ +CLASS dom_ord_ne ] > ]. generic_dom_euro_ne := n_-_pn-dom-e-gen_le & [ ORTH < "_generic_dom_euro_ne_" >, TOKENS.+LIST < [ +CLASS dom_card_ne, +ONSET c-onset ] > ]. generic_dom_euro_voc_ne := n_-_pn-dom-e-gen_le & [ ORTH < "_generic_dom_euro_ne_" >, TOKENS.+LIST < [ +CLASS dom_card_ne, +ONSET v-onset ] > ]. generic_year_ne := n_-_pn-yoc-gen_le & [ ORTH < "_generic_year_ne_" >, SYNSEM.PHON.ONSET con, TOKENS.+LIST < [ +CLASS year_ne, +ONSET c-onset ] > ]. generic_year_voc_ne := n_-_pn-yoc-gen_le & [ ORTH < "_generic_year_ne_" >, SYNSEM.PHON.ONSET voc, TOKENS.+LIST < [ +CLASS year_ne, +ONSET v-onset ] > ]. generic_meas_np_ne := n_-_meas-gen_le & [ ORTH < "_generic_meas_np_ne_" >, TOKENS.+LIST < [ +CLASS meas_ne ] > ]. ;; DPF 2018-03-30 - This type's complement is not constrained enough, and the ;; entry may not be needed anyway, so let's seek to get rid of it. FIX. generic_meas_np_np_ne := n_np_meas-gen_le & [ ORTH < "_generic_meas_np_np_ne_" >, TOKENS.+LIST < [ +CLASS meas_ne ] > ]. generic_meas_n_ne := n_-_meas-n-gen_le & [ ORTH < "_generic_meas_noun_ne_" >, TOKENS.+LIST < [ +CLASS meas_noun_ne ] > ]. generic_time_noun_ne := n_-_pn-hour-gen_le & [ ORTH < "_generic_time_ne_" >, TOKENS.+LIST < [ +CLASS time_ne ] > ]. ;; DPF 2018-04-12 - For robustness, let's omit [PHON.ONSET con] here for now, ;; since several of the token-mapping rules want to remain underspecified about ;; onset (related to the underspecified +CLASS value that they assign, one of ;; whose subtypes is card_ne), and we don't want those rules to each give rise ;; to two generic lexical card entries (one con and one voc), so we stamp their ;; outputs as +ONSET c-onset. FIX someday. ;; DPF 2020-05-11 - Re 2018-04-02: We also don't want to leave ONSET unspecified ;; since we don't want robust "a" for |a apple| to combine for |a 10% drop|. So ;; let's add the ONSET constraint, and see how it goes. ;; generic_card_ne := aj_-_i-crd-gen_le & [ ORTH < "_generic_card_ne_" >, SYNSEM.PHON.ONSET con, TOKENS.+LIST < [ +CLASS card_ne, +ONSET c-onset ] > ]. generic_card_voc_ne := aj_-_i-crd-gen_le & [ ORTH < "_generic_card_ne_" >, SYNSEM.PHON.ONSET voc, TOKENS.+LIST < [ +CLASS card_ne, +ONSET v-onset ] > ]. generic_ord_ne := aj_-_i-ord-gen_le & [ ORTH < "_generic_ord_ne_" >, TOKENS.+LIST < [ +CLASS ord_ne ] > ]. generic_fract_ne := aj_-_i-frct-gen_le & [ ORTH < "_generic_fract_ne_" >, TOKENS.+LIST < [ +CLASS frct_ne ] > ]. ;; DPF 2012-09-19 - Added entry for generic sub-one decimals in measure ;; phrases, as in |the price rose 0.3 point| and |the 2.0 release| (while ;; still blocking |the two release|). ;; generic_decimal_ne := aj_-_i-one-gen_le & [ ORTH < "_generic_decimal_ne_" >, TOKENS.+LIST < [ +CLASS decimal_ne ] > ].