;;; files to load grammar-top := "../english.tdl". variable-property-mapping := "../semi.vpm". maxent-model := "../redwoods.mem". preprocessor := "../rpp/tokenizer.rpp". preprocessor-modules := ../rpp/xml.rpp ../rpp/ascii.rpp ../rpp/lgt.rpp ../rpp/quotes.rpp ../rpp/wiki.rpp ../rpp/gml.rpp ../rpp/html.rpp. generation-ignore-lexemes := "../lkb/nogen-lex.set". generation-ignore-rules := "../lkb/nogen-rules.set". parse-node-labels := "../parse-nodes.tdl". generation-trigger-rules := "../trigger.mtr". version := "../Version.lsp". semantic-interface-2016 := "../etc/erg.smi". semantic-interface-top-type := top. idiom-rules := "../idioms.mtr". non-idiom-root := root_non_idiom. irregular-forms := ../irregs.tab. quickcheck-code := "../ace/ace-erg-qc.txt". post-model-path := "english-postagger.hmm". ;;; grammar internal properties orth-path := ORTH. semantics-path := SYNSEM LOCAL CONT. lex-rels-path := SYNSEM LOCAL CONT RELS. lex-carg-path := SYNSEM LKEYS KEYREL CARG. lex-pred-path := SYNSEM LKEYS KEYREL PRED. rule-rels-path := C-CONT RELS. ; parsing-roots := root_informal root_frag. ; good: parsing-roots := root_strict root_informal root_frag root_inffrag root_robust root_robust_frag root_exclam_ques. ;parsing-roots := root_strict root_informal root_frag root_inffrag root_bridge. ;parsing-roots := root_strict root_informal root_frag root_inffrag root_robust. ; demo: ;parsing-roots := root_strict root_informal. ;generation-roots := root_strict root_frag. ;generation-roots := root_strict. ;; Use this one to exclude focus movement (non-WH topicalization) ;generation-roots := root_gen. generation-roots := root_gen_nofocus. semarg-type := semarg. list-type := *list*. cons-type := *cons*. null-type := *null*. diff-list-type := *diff-list*. chart-dependencies := "SYNSEM LKEYS --+COMPKEY" "SYNSEM LOCAL CAT HEAD MINORS MIN" "SYNSEM LKEYS --+OCOMPKEY" "SYNSEM LOCAL CAT HEAD MINORS MIN" "SYNSEM LKEYS --+ARGIND" "SYNSEM LOCAL CONT HOOK INDEX" . ;; Consider restricting this in ERG... ;; (e.g. the completive 'up' as in |I should remind myself up of that idea| :) process-chart-dependencies-before-lexical-parsing := no. deleted-daughters := ARGS HD-DTR NH-DTR LCONJ-DTR RCONJ-DTR DTR DTR1 DTR2 DTRA DTRB. parsing-packing-restrictor := RELS HCONS ICONS RNAME +TI +LL +TG. ; parsing-packing-restrictor := HCONS RELS RNAME ORTH RCLSTR ONSET LTOP --SLTOP GENRE NEGPOL ARG-S TO KEY-ARG --TL FROM IDIOM FORM LKEYS ARG0 +STAG L-HNDL LBL DATIVE +ID IND KEYREL E DIALECT ARG3 HS-LEX GEN LARG CPNG +PRB +TAG +CASE PROG C-CONT TENSE CPUNCT PSF ND-AFF ASPECT INSTLOC CTX CFROM +INIIAL DTR2 HD-DTR CTO RCONJ-DTR DTR1 IFORM ALT2KEYREL SQPNCT --TPC ADVNGADD DTR CLEX ARG1 BODY RSTR PERF KCMP LNAME LCONJ-DTR PRF POSS L-INDEX RPAREN RFP ADVADD TOKENS +LAST META-PREFIX PASSIVE DTRA R-HNDL. generation-packing-restrictor := ONSET RELS HCONS ICONS RNAME. generic-les-for-semantic-index := generic_proper_ne generic_card_ne generic_ord_ne generic_dom_card_ne generic_dom_ord_ne generic_year_ne generic_date_ne generic_pl_noun_ne. generic-les-by-part-of-speech := "generic_adj a" "generic_adverb a" "gen_generic_noun n" "gen_generic_verb v". mrs-deleted-roles := IDIOMP LNK CFROM CTO --PSV ;; starting here, mrs deleted roles left over from old ACE config file WLINK PARAMS. ;; DPF 2022-05-16 - Remove cl-cl_runon-cma_c from this list, to allow ;; "We arrive, we arise, we leave." ;; DPF 2022-12-13 - Tried removing cl_cp-frg_c to allow "the charge -- that ;; we lost --", but it's too annoying, especially with generalization packing, ;; so added hyphen-marked variant rule that is not spanning only. ;; DPF 2022-12-15 - Can't include r_cl-frg_c, since sentence-segmentation can ;; be wrong: "we arise. because they arrive." ;; DPF 2022-12-17 - Removed np-aj_rorp-frg_c so we can get non-vocative ;; "Easternrs, obviously" in "the visitors - '' Easterners, obviously - arrived" ;; spanning-only-rules := aj-hd_int-inv_c hd_yesno-period_c aj-r_frg_c np-aj_frg_c cl_rel-frg_c np_nb-frg_c aj-np_int-frg_c pp-aj_frg_c j-aj_frg_c np-cl_numitem_c np-cl_lettitem_c cl_cp-frg_c cl-np_runon-prn_c conj-frg_c hd-aj_scp-noclpnct_c. ; also vp_sl-frg_c if used ;fragment-only-rules := ; frag_np frag_nbar frag_pp_i ; frag_adj frag_int_adv frag_cl_adv frag_vp ; frag_l_mod_np frag_l_mod_pp frag_l_mod_adv frag_r_mod_np ; frag_r_mod_s_pp frag_r_mod_ap. ; heuristic: ; a rule should be hyperactive if the expected cost of processing ; with it as ordinary exceeds the expected cost of processing with it as ; hyperactive. Alternately, for each rule there is a maximum number of times ; we should be willing to build it before copying ; ; cost(reconstructing N times when our max unifies is K) ; = K * unify + copy if N >= K ; = N * unify if N < K ; ; say we know P(need dag n times) ; then E(cost) = sum_n P(n) cost(n) ; ; we don't know P(n), but we do know E(n) per rule ; maxent principle: assume poisson distribution of recombinations ; based on that assumption, can compute "correct" K given E(n) ; never copy: E(cost) = E(unify * n) = unify * E(n) ; always copy: E(cost) = unify + copy ; when is unify * E(n) > unify + copy? ; answer: when unify (E(n)-1) > copy ; E(n) > 1 + copy/unify ; i.e. when E(recombinations) > copy / unify ; answer, based on unify = 4.4us and copy = 9.8us: ; if E(recomb) < 2.227, then K = \infty ; else K = 1 (i.e. disable hyperactivity for this rule) ; calculations show that (at least in this range), K=\infty is always better ; than K>1 ; cl-np_runon_c hyper-active-rules := hdn-np_app-idf-p_c hdn-n_prnth_c n-n_num-seq_c vppr-vppr_crd-m_c vp-vp_crd-nfin-m_c j-n_crd-t_c np-hdn_cty-cpd_c hdn-aj_rc-pr_c jpr-jpr_crd-m_c n-j_crd-t_c v-v_crd-fin-ncj_c np-np_crd-i-t_c np-np_crd-nc-t_c cl-cl_runon_c pp-pp_crd-m_c np-hdn_nme-cpd_c np-aj_j-frg_c ppr-ppr_crd-m_c n-n_crd-nc-m_c hdn-aj_redrel-pr_c hdn-np_app-nbr_c np-aj_frg_c hd-aj_vmod-s_c sp-hd_hc_c jpr-vpr_crd-m_c np-hdn_ttl-cpd_c cl-cl_runon-cma_c hdn-np_app-idf_c n-hdn_cpd_c aj-np_frg_c n-j_j-t-cpd_c n-n_crd-m_c np-np_crd-nc-m_c n-hdn_j-n-cpd_c n-j_j-cpd_c aj-pp_frg_c aj-r_frg_c j-j_crd-prd-m_c flr-hd_nwh_c aj-hd_int-inv_c hd-hd_rnr-nv_c ppr-ppr_crd-im_c np-hdn_cpd_c ppr-ppr_crd-t_c aj-np_int-frg_c jpr-vpr_crd-im_c pp-aj_frg_c hd-aj_vmod_c np-hdn_num-cpd_c j-j_crd-prd-im_c n-n_crd-3-t_c jpr-jpr_crd-im_c nb-aj_frg_c jpr-jpr_crd-t_c vp-vp_crd-nfin-im_c pp-pp_crd-t_c jpr-vpr_crd-t_c pp-pp_crd-im_c j-j_crd-prd-t_c np-np_crd-m_c vp-vp_crd-nfin-t_c np-np_crd-i3-t_c flr-hd_wh-nmc-inf_c n-n_crd-t_c np-aj_rorp-frg_c j-aj_frg_c sp-hd_n_c n-v_j-cpd_c aj-hdn_adjn_c hd-aj_scp-pr_c n-n_crd-im_c j-j_crd-att-t_c hdn-aj_rc_c flr-hd_nwh-nc_c aj-hdn_norm_c flr-hd_wh-nmc-fin_c num-n_mnp_c hdn-aj_redrel_c cl-cl_crd-m_c np-np_crd-i2-t_c hd-hd_rnr_c vppr-vppr_crd-im_c vppr-vppr_crd-t_c mrk-nh_n_c flr-hd_wh-mc_c hdn-np_app-pr_c hdn-np_app_c sb-hd_q_c vp-vp_crd-fin-im_c hdn-cl_dsh_c vp-vp_crd-fin-m_c cl-cl_crd-im_c sb-hd_mc_c np-prdp_vpmod_c cl-cl_crd-rc-t_c sb-hd_nmc_c n-n_crd-asym-t_c flr-hd_wh-mc-sb_c hd-cmp_u_c flr-hd_rel-fin_c cl-cl_crd-int-t_c vp-vp_crd-fin-t_c cl-np_runon-prn_c hd-cl_fr-rel_c np-np_crd-im_c cl-cl_crd-t_c. old-hyper-active-rules := adv_coord_mid num_seq np_city_state adv_coord_top measure_np hspechc fillhead_wh_nr_i adj_pred_coord_top adj_pred_coord_mid fillhead_wh_r np_n_cmpnd frag_r_mod_np adj_attr_coord_top frag_l_mod_adv frag_l_mod_pp frag_r_mod_s_pp frag_l_mod_np np_name_cmpnd hspec hmarkatom noun_n_cmpnd meas_np_symb fillhead_rel n_ttl_cmpnd nadj_rc_pr nadj_rc fillhead_wh_nr_f v_coord_nonfin_top p_coord_top appos_npr frag_r_mod_ap v_coord_nonfin_mid adj_adjn p_coord_mid fillhead_non_wh. ;; reduce storage requirements for the compiled lexicon when possible simplify-lexicon := enabled. ;; faster generation index-accessibility-filtering := enabled. ;; keep an extra copy of *ocons* and a couple other types around, so we don't ;; have to copy it when we use it as a glb constraint (1st time anyway). extra-erg-dag-stash := enabled. ;; part of speech tagging english-pos-tagger := enabled. ;; ERG peculiarity: after generating a passive edge, set its top-level type ;; to 'sign' during forest creation. this results in improved packing at ;; comparatively little cost in unpacking failures. generalize-edge-top-types := enabled. ;; token settings token-mapping := enabled. lexicon-tokens-path := TOKENS +LIST. lexicon-last-token-path := TOKENS +LAST. token-type := token. token-form-path := +FORM. ; [required] string for lexical lookup token-id-path := +ID. ; [optional] list of external ids token-from-path := +FROM. ; [optional] surface start position token-to-path := +TO. ; [optional] surface end position token-postags-path := +TNT +TAGS. ; [optional] list of POS tags token-posprobs-path := +TNT +PRBS. ; [optional] list of POS probabilities ;; lattice mapping settings lattice-mapping-input-path := +INPUT. lattice-mapping-output-path := +OUTPUT. lattice-mapping-context-path := +CONTEXT. lattice-mapping-position-path := +POSITION. ; below here are not technically settings, but types that ACE needs to import that PET (owner of english.tdl) doesn't need. :begin :type. :include "../mtr". :end :type. ; icons enable-icons := yes. mrs-icons-list := ICONS LIST. icons-left := IARG1. icons-right := IARG2. ;; Standard model ;; DPF 2019-11-20 - These next four should be uncommented for release ;übertag-emission-path := "../ut/nanc_wsj_redwoods_noaffix.ex.gz". ;übertag-transition-path := "../ut/nanc_wsj_redwoods_noaffix.tx.gz". ;übertag-generic-map-path := "../ut/generics.cfg". ;übertag-whitelist-path := "../ut/whitelist.cfg". ;;; Model trained on just trunk Redwoods ;;übertag-emission-path := "../ut/redwoods-train.ex.gz". ;;übertag-transition-path := "../ut/redwoods-train.tx.gz". ;;; Model trained on trunk Redwoods (gold) and NANC (not gold) ;;übertag-emission-path := "../ut/nanc-redwoods-train.ex.gz". ;;übertag-transition-path := "../ut/nanc-redwoods-train.tx.gz". freezer-megabytes := 512. ;; For PCFG parsing, to stamp [GENRE robust] on edges that fail unification robustness-marker-path := GENRE. robustness-marker-type := robust. ;; For improved unknown word handling in generation generics-overwrite-orth := true.