;;; -*- Mode: tdl; Coding: utf-8; -*-
;
; Period affixed to end of word
; cat.
;
;
;
w_period_plr :=
%suffix (!. !..)
punctuation_period_rule &
[ RNAME lpp ].
;
; Question mark affixed to word
; cat?
;
;
;
w_qmark_plr :=
%suffix (!p !p\?)
punctuation_qmark_rule &
[ RNAME lpq ].
;
; Double question mark affixed
; cat??
;
;
;
w_qqmark_plr :=
%suffix (!p !p\?)
w_qqmark_rule &
[ RNAME lpqq ].
;
; Qmark and exclam point affixed
; cat?!
;
;
;
w_qmark-bang_plr :=
%suffix (!p !p\!)
w_qmark-bang_rule &
[ RNAME lpcq ].
;
; Comma affixed
; cat,
;
;
;
w_comma_plr :=
%suffix (!q !q,)
punctuation_comma_rule &
[ RNAME lpcm ].
;
; Exclamation point affixed
; cat!
;
;
;
w_bang_plr :=
%suffix (!p !p\!)
punctuation_bang_rule &
[ RNAME lpx ].
;
; Semicolon affixed
; cat;
;
;
;
w_semicol_plr :=
%suffix (!. !.;)
w_semicol_rule &
[ RNAME lpsc ].
; For robustness, where typo leaves two semicolons
;
; Robust double semicolon
; cat;;
;
;
;
w_double_semicol_plr :=
%suffix (!. !.;;)
w_double_semicol_rule &
[ RNAME lpds ].
;
; Right parenthesis affixed
; cat)
;
;
;
w_rparen_plr :=
%suffix (!p !p\))
w_rparen_rule &
[ RNAME lprp ].
; For robustness, where mis-editing left comma preceding right paren
;
; Comma wrongly before right paren
; cat,)
;
;
;
w_comma-rp_plr :=
%suffix (!p !p,\))
w_comma-rp_rule &
[ RNAME lpcp ].
;
; Left parenthesis prefixed
; (cat
;
;
;
w_lparen_plr :=
%prefix (!f \(!f)
w_lparen_rule &
[ RNAME lplp ].
;
; Right square bracket affixed
; cat]
;
;
;
w_rbrack_plr :=
%suffix (!p !p]) (!p !p}) (!p !p|)
w_rbrack_rule &
[ RNAME lprb ].
;
; Left square bracket prefixed
; [cat
;
;
;
w_lbrack_plr :=
%prefix (!f [!f) (!f {!f) (!f |!f)
w_lbrack_rule &
[ RNAME lplb ].
;;
;; _fix_me_
;; how many variants of quote marks to include depends on what decisions we
;; make in pre-processing. as of September 2008, we have opted for a total of
;; six, viz. UniCode directional quotes (|“|, |”|, |‘|, and |’|), as well as
;; straight ASCII quotes (|"| and |'|). when an input text makes use of good
;; directional quotes we preserve those (to not introduce ambiguity). also, by
;; default we disambiguate quotes in pre-processing, based on proximity to
;; token boundaries, i.e. in processing inputs following standard orthography
;; we only see proper opening and closing quotes (of which the latter can also
;; be apostrophes or units of measure). however, there also is a configuration
;; for `messed up' inputs, e.g. ones that were pre-tokenized, so we no longer
;; stand a chance of disambiguating quotes. in this mode, straight quotes are
;; ambiguous between opening or closing ones. finally, if only for robustness
;; (or use without the full pre-processors), allow some variants that we do not
;; expect to see in the ideal world: |``|, |''|, and |`|.
;;
;
; Double quote affixed to end
; cat"
;
;
;
w_dqright_plr :=
%suffix (!q !q”) (!q !q") (!q !q'')
punctuation_dqright_rule &
[ RNAME lpdr ].
;
; Double quote prefixed
; "cat
;
;
;
w_dqleft_plr :=
%prefix (!f ”!f) (!f “!f) (!f "!f) (!f ``!f)
punctuation_dqleft_rule &
[ RNAME lpdl ].
;
; Single quote affixed to end
; cat'
;
;
;
w_sqright_plr :=
%suffix (!q !q’) (!q !q')
punctuation_sqright_rule &
[ RNAME lpsr ].
;; DPF 20-feb-10 - Let's try doing without the straight quote as left
;; punctuation, since it creates spurious ambiguity for |They'd arrived.|
;; DPF 06-mar-10 - But now we need it for e.g. |The 'cat' arrived.|
;; _fix_me_
;; in fact, this ambiguity will disappear once we fully move to supporting the
;; UniCode apostrophe, i.e. convert the lexicon to include |don’t|, |’d|, and
;; |o’clock|. at present, we hack around these by `normalizing' (in the wrong
;; direction) apostrophes to straight typewriter quotes. (23-mar-10; oe)
;;
;
; Single quote prefixed
; 'cat
;
;
;
w_sqleft_plr :=
%prefix (!f ‘!f) (!f '!f) (!f `!f)
punctuation_sqleft_rule &
[ RNAME lpsl ].
;
; Hyphen affixed to end
; cat-
;
;
;
w_hyphen_plr :=
%suffix (!p !p-)
w_hyphen_rule &
[ RNAME lphr ].
;; DPF 2012-08-29 - Recent REPP changes now attach three dots to preceding
;; token without a space, when the dots are between two tokens,
;; so add variant of hyphen suffix rule to accommodate.
;;
;
; Three dots affixed to end
; cat...
;
;
;
w_threedot_plr :=
%suffix (!p !p...)
w_threedot_rule &
[ RNAME lptd ].
;
; Asterisk affixed to end
; cat*
;
;
;
w_asterisk_plr :=
%suffix (!p !p\*)
w_asterisk_rule &
[ RNAME lpax ].
;
; Asterisk prefixed to word
; *cat
;
;
;
w_asterisk-pre_plr :=
%prefix (!p \*!p)
w_asterisk_pre_rule &
[ RNAME lpaxp ].
; Add punct_comma_informal_rule for e.g. comma-marked subjects and
; hd-cmp_u_c heads but don't use for generation.
;
; Nonformal comma affixed
; cat,
;
;
;
w_comma-nf_plr :=
%suffix (!. !.,)
punctuation_comma_informal_rule &
[ RNAME lpcr ].
;; DPF 2012-02-18 - Tried adding double quotes to enable foreign-word analysis
;; but this leads to a lot of ambiguity whenever double quotes appaer. More
;; research is needed.
;; %prefix (!f ¦i!f) (!f ”!f) (!f “!f) (!f "!f) (!f ``!f)
;; %suffix (!q !qi¦) (!q !q”) (!q !q") (!q !q'')
;
; Italics mark |ยฆi| prefixed
; ยฆicat
;
;
;
w_italleft_plr :=
%prefix (!f ¦i!f)
punctuation_italleft_rule &
[ RNAME lpil ].
;
; Italics mark |iยฆ| affixed
; catiยฆ
;
;
;
w_italright_plr :=
%suffix (!q !qi¦)
punctuation_italright_rule &
[ RNAME lpir ].
;
; Ignored italics mark affixed
; catiยฆ
;
;
;
w_drop-ileft_plr :=
%prefix (!f ¦i!f)
punct_drop_italleft_rule &
[ RNAME lpxdl ].
;
; Ignored italics mark prefixed
; catiยฆ
;
;
;
w_drop-iright_plr :=
%suffix (!q !qi¦)
punct_drop_italright_rule &
[ RNAME lpxdr ].