;;;
;;; grammar-specific settings for PET; run-time part for cheap parser.
;;;

include "global".

;;
;; encoding used in grammar source files (should be `euc-jp' for Japanese).
;;
encoding := utf-8.

;;
;; input preprocessing: disable downcasing and (English-specific) explosion of
;; apostrophes.
;;
trivial-tokenizer.

;;
;; type of a valid parse, start symbol
;;
start-symbols := $root_vp_inf $root_s $root_vpnom $root_np $root_nbar $root_pp $root_ap $root_rp $root_c $root_i.

;;
;; word-breaking characters in tokenization --- treated as whitespace
;;
punctuation-characters := "<>{}[]+*-_\"".

;;
;; suppress results of regular morphology rules if irregular form exists
;;
irregular-forms-only.
lex-entries-can-fail.

;;
;; names of attributes not to pass from daughter to mother in parsing
;;
deleted-daughters := ARGS HEAD-DTR NON-HEAD-DTR.

;;
;; names of attributes to filter for packing parser
;;
packing-restrictor := RELS HCONS.

;;
;; path into the MRS semantics, and (temporarily) features to `characterize'
;;
mrs-path := "SYNSEM.LOCAL.CONT".
mrs-cfrom-path := "SYNSEM.LOCAL.CONT.RELS.LIST.CFROM".
mrs-cto-path := "SYNSEM.LOCAL.CONT.RELS.LIST.CTO".

;;
;; name of type containing quick check structure
;;
qc-structure := $qc_unif_set.

;; 
;; the scoring model, either for on-line use in best-first parsing or off-line
;; ranking of complete parse results after parsing.
;;
sm := "tibidabo.mem".
;sm := "bosque.mem".

;;
;; a `generic model' (i.e. PCFG), used to discard low-probability edges in 
;; chart pruning (when requested by the `-cp' command-line option).
;;
;gm := "tibidabo.pcfg".

;;
;; as an alternative to the `KEY-ARG' mechanism in the actual rules, one can
;; stipulate the key daughter for each rule, e.g. (using names that will most
;; likely not be used in a grammar to avoid interference):
;;
#|
rule-keyargs := 
  $subj-head-example 2
  $head-complement-example 1.
|#

;;
;; some rules should be constrained to only apply over the entire string, i.e.
;; to edges that span the full input; this should improve parsing efficiency
;; only, rather than be considered part of the linguistic analyses.  e.g.
;;
#|
spanning-only-rules := 
  $runon-s-example.
|#

;;;
;;; generic lexical entries for unknown words: basically, for each unknown
;;; token in the input all generic entries are postulated.  optionally, there
;;; are two devices to filter out generic entries: suffix-based and by virtue
;;; of POS tag information.  generic entries that require a certain suffix
;;; (`generic-le-suffixes') only fire if the input form has the suffix.  if
;;; the input word has one more more POS tags associated to it, these are
;;; looked up in the `posmapping' table: this table is a list of pairs (tag,
;;; gle) where `gle' is the name of one of the generic items in `generic-les'.
;;; a non-empty `posmapping' table will filter all generic entries that are not
;;; explicitly licensed by a POS tag.
;;;

generic-lexentry-status-values := generic-lex-entry.

;;
;; some generic lexical entries require inflectional marking.  this mechanism
;; is a filter on which generic entries proposed by other means can survive:
;; generic entries listed here will only be postulated if the required suffix 
;; can be matched against the input token.
;;
;; when using only generic entries licensed by a POS tag, the suffix filter
;; really does not make a lot of sense anymore.               (6-jun-03; oe)
;;
#|
generic-le-suffixes := 
  $generic_trans_verb_pres3sg "S" 
  $generic_trans_verb_past "ED" 
  $generic_trans_verb_psp "ED" 
  $generic_trans_verb_prp "ING" 
  $generic_pl_noun "S"
.
|#

;;
;; a sample mapping of POS tags to generic lexical entries, borrowed from the
;; ERG and using the Penn Treebank tagset.
;;
#|
posmapping := 
  JJ $generic_adj
  JJR $generic_adj_compar
  JJS $generic_adj_superl
  NN $generic_sg_noun
  NN $generic_mass_noun
  NNS $generic_pl_noun
  NNPS $generic_pl_noun
  NNP $genericname
  FW $generic_mass_noun
  RB $generic_adverb
  VB $generic_trans_verb_bse
  VBD $generic_trans_verb_past
  VBG $generic_trans_verb_prp
  VBN $generic_trans_verb_psp
  VBP $generic_trans_verb_presn3sg
  VBZ $generic_trans_verb_pres3sg
|#

;;
;; as of august 2003, PET includes ECL and the generic (R)MRS code; hence, we
;; need to include the MRS settings from the grammar.
;;
postload-lisp-files := "lkb/mrsglobals.lisp".


;;;
;;; following are a number of settings for the new (as of late 2008) token
;;; mapping and lexical filtering support in PET.
;;;

;;
;; first, the general format of chart mapping rules, much like MRS transfer.
;;
chart-mapping-context-path  := "+CONTEXT".
chart-mapping-input-path    := "+INPUT".
chart-mapping-output-path   := "+OUTPUT".
chart-mapping-position-path := "+POSITION".
;;
;; in lexical instatiation, the list of tokens activating a lexical entry (be
;; it native or generic) are unified into the lexical entry under this path.
;;
lexicon-tokens-path := "TOKENS.+LIST".
lexicon-last-token-path := "TOKENS.+LAST".
;;
;; furthermore, for the various input formats, we need to declare how parts of
;; input descriptions correspond to the grammar-internal feature geometry; in
;; the YY input format, for example, token feature structures (aka input items
;; PET-internally) are created from various parts of the token description.
;; 
token-form-path     := "+STEM".       ; [required] string for lexical lookup
token-id-path       := "+ID".         ; [optional] list of external ids
token-from-path     := "+FROM".       ; [optional] surface start position
token-to-path       := "+TO".         ; [optional] surface end position
token-postags-path  := "+TAG".   ; [optional] list of POS tags
token-posprobs-path := "+TAG".  ; [optional] list of POS probs


;;
;; finally, declare TDL status values for the various new entity types
;;
;;token-mapping-rule-status-values := token-mapping-rule.
;;lexical-filtering-rule-status-values := lexical-filtering-rule.