;;; -*- Mode: tdl; Coding: utf-8; -*-

;;;
;;; Copyright (c) 2009 -- 2018 Stephan Oepen (oe@ifi.uio.no); 
;;; see `LICENSE' for conditions.
;;;


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; application- or task-specific `accomodation' rules (aka hacks :-).  patch
;;; up the input token lattice as needed.  in principle, such rules should go
;;; into separate modules, once we provide a mechanism to selectively activate
;;; rules or sets of rules.
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;
;; the data for the CoNLL 2009 shared task has invented a few new tokenization
;; conventions, apparently splitting at all occurences of hyphens and slashes
;; (which, more or less, is what we would do below ourselves), but making the
;; hyphen a token of its own.  furthermore, CoNLL 2009 hallucinates a few PoS
;; tags that are not in the original PTB inventory.  for the most part, try to
;; map these idiosyncrasies back to standard PTB conventions.
;;
conll_hyphen_tmr := two_one_tmt &
[ +INPUT < [ +FORM ^(.+)$, 
             +TRAIT #trait, +CLASS #class, 
             +PRED #pred, +CARG #carg, +TNT #tnt ],
           [ +TNT.+TAGS.FIRST "HYPH" ] >,
  +OUTPUT < [ +FORM "${I1:+FORM:1}-", 
              +TRAIT #trait, +CLASS #class,
              +PRED #pred, +CARG #carg, +TNT #tnt ] > ].

conll_left_brace_tmr := one_one_form_tmt &
[ +INPUT < [ +FORM "{" ] >,
  +OUTPUT < [ +FORM "[" ] > ].

conll_right_brace_tmr := one_one_form_tmt &
[ +INPUT < [ +FORM "}" ] >,
  +OUTPUT < [ +FORM "]" ] > ].