;;; -*- Mode: TDL; Coding: utf-8 -*- ;;; HAG (Hausa Grammar) ;;; Author: Berthold Crysmann ;;; 2009 ;;; Post generation token mapping rules. ;;; Split rules should come first (sensitive to SYNSEM) ;;; Don't forget to copy suprasegmental information! praf_split_preempt := no_context_cm_rule & [+INPUT < pron-or-resump-do-irule & [STEM < ^([tn]a|m[uia])(ni|ka|ki|shi|ta|[mks]u)$ >, SUPRA [TONE #tone, LEN #len, PRFX #prfx ] ] >, +OUTPUT < output-token-type & [STEM < "${I1:STEM.FIRST:1}${I1:STEM.FIRST:2}" >, SUPRA supra_reent & [PRFX #prfx & [TONE #t-prfx], TONE #tone & [LIST #tl, LAST null], LEN #len & [LIST.--TL #tl, LAST null & [--TL.--LEFT #t-prfx]] ] ] >, +POSITION "I1@O1" , +JUMP "specialise_supra" ]. praf_split := chart_mapping_rule & [+INPUT < pron-or-resump-do-irule & [STEM < ^(..*)(ni|ka|ki|shi|ta|[mks]u)$ >, SYNSEM.LOCAL.CAT.HEAD verb, SUPRA [TONE [LIST <#tpraf . #thost>, LAST #tlast], LEN [LIST <#lpraf & short . #lhost>, LAST #llast], PRFX #prfx ] ]>, +CONTEXT <>, +OUTPUT <[STEM < "${I1:STEM.FIRST:1}" >, SUPRA supra_reent & [TONE [LIST #thost, LAST #tlast], LEN [LIST #lhost & [--TL #thost], LAST #llast & null & [--TL.--LEFT #t-prfx ]], PRFX #prfx & [TONE #t-prfx]] ], [STEM < "${I1:STEM.FIRST:2}" >, SUPRA [TONE , LEN , PRFX [TONE , LEN ]]] >, +POSITION "I1@O1,I1@O2,O1, +OUTPUT < [STEM #1, SUPRA supra_reent & [PRFX #prfx & [TONE #t-prfx], TONE #tone & [LIST #tl, LAST null], LEN #len & [LIST.--TL #tl, LAST null & [--TL.--LEFT #t-prfx]] ] ] >, +POSITION "I1@O1"]. ; reduplicant_copy := chart_mapping_rule & ; [+INPUT < reduplicant-lex & ; [STEM < "__REDUP__" >, ; SYNSEM #ss, ; SUPRA #supra, ; INFLECTED #infl, ; MORPH #morph & [--REDUP [--STEM < ^(.*)$ > ]] ] >, ; +CONTEXT < [MORPH.--REDUP [--STEM < ^(.*)$ > ] ] >, ; +OUTPUT <[STEM < "${I1:MORPH.--REDUP.--STEM.FIRST:1}-" >, ; SYNSEM #ss, ; SUPRA #supra, ; INFLECTED #infl, ; MORPH #morph] >, ; +POSITION "I1@O1,I1, SYNSEM #ss, SUPRA #supra, INFLECTED #infl, MORPH #morph & [--REDUP [--STEM < ^(.*)$ > ]] ] >, +OUTPUT <[STEM < "${I1:MORPH.--REDUP.--STEM.FIRST:1}-" >, SYNSEM #ss, SUPRA #supra, INFLECTED #infl, MORPH #morph] >, +POSITION "I1@O1"]. ; +CONTEXT < [INFLECTED infl-word, ; MORPH.--REDUP [--STEM < ^(.*)$ > ] ] >, ;;; ,I1 ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}áí${I1:STEM.FIRST:2}" >] > ]. au_h_short_tmr := output_h_short_tmt & [ +INPUT < [ STEM < ^(.*)au([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}áú${I1:STEM.FIRST:2}" > ] > ]. ai_l_short_tmr := output_l_short_tmt & [ +INPUT < [ STEM < ^(.*)ai([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}àì${I1:STEM.FIRST:2}" > ] >]. au_l_short_tmr := output_l_short_tmt & [ +INPUT < [ STEM < ^(.*)au([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}àù${I1:STEM.FIRST:2}" > ] >]. ai_hl_short_tmr := output_hl_short_tmt & [ +INPUT < [ STEM < ^(.*)ai([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}âî${I1:STEM.FIRST:2}" > ] >]. au_hl_short_tmr := output_hl_short_tmt & [ +INPUT < [ STEM < ^(.*)au([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}âû${I1:STEM.FIRST:2}" > ] >]. a_h_long_tmr := output_h_long_tmt & [ +INPUT < [ STEM < ^(.*)a([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}áá${I1:STEM.FIRST:2}" > ] >]. e_h_long_tmr := output_h_long_tmt & [ +INPUT < [ STEM < ^(.*)e([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}éé${I1:STEM.FIRST:2}" > ] >]. i_h_long_tmr := output_h_long_tmt & [ +INPUT < [ STEM < ^(.*)i([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}íí${I1:STEM.FIRST:2}" > ] >]. o_h_long_tmr := output_h_long_tmt & [ +INPUT < [ STEM < ^(.*)o([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}óó${I1:STEM.FIRST:2}" > ] >]. u_h_long_tmr := output_h_long_tmt & [ +INPUT < [ STEM < ^(.*)u([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}úú${I1:STEM.FIRST:2}" > ] >]. a_l_long_tmr := output_l_long_tmt & [ +INPUT < [ STEM < ^(.*)a([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}àà${I1:STEM.FIRST:2}" > ] >]. e_l_long_tmr := output_l_long_tmt & [ +INPUT < [ STEM < ^(.*)e([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}èè${I1:STEM.FIRST:2}" > ] >]. i_l_long_tmr := output_l_long_tmt & [ +INPUT < [ STEM < ^(.*)i([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ìì${I1:STEM.FIRST:2}" > ] >]. o_l_long_tmr := output_l_long_tmt & [ +INPUT < [ STEM < ^(.*)o([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}òò${I1:STEM.FIRST:2}" > ] >]. u_l_long_tmr := output_l_long_tmt & [ +INPUT < [ STEM < ^(.*)u([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ùù${I1:STEM.FIRST:2}" > ] >]. a_hl_long_tmr := output_hl_long_tmt & [ +INPUT < [ STEM < ^(.*)a([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ââ${I1:STEM.FIRST:2}" > ] >]. e_hl_long_tmr := output_hl_long_tmt & [ +INPUT < [ STEM < ^(.*)e([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}êê${I1:STEM.FIRST:2}" > ] >]. i_hl_long_tmr := output_hl_long_tmt & [ +INPUT < [ STEM < ^(.*)i([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}îî${I1:STEM.FIRST:2}" > ] >]. o_hl_long_tmr := output_hl_long_tmt & [ +INPUT < [ STEM < ^(.*)o([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ôô${I1:STEM.FIRST:2}" > ] >]. u_hl_long_tmr := output_hl_long_tmt & [ +INPUT < [ STEM < ^(.*)u([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ûû${I1:STEM.FIRST:2}" > ] >]. ;;; Short a_h_short_tmr := output_h_short_tmt & [ +INPUT < [ STEM < ^(.*)a([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}á${I1:STEM.FIRST:2}" > ] >]. e_h_short_tmr := output_h_short_tmt & [ +INPUT < [ STEM < ^(.*)e([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}é${I1:STEM.FIRST:2}" > ] >]. i_h_short_tmr := output_h_short_tmt & [ +INPUT < [ STEM < ^(.*)i([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}í${I1:STEM.FIRST:2}" > ] >]. o_h_short_tmr := output_h_short_tmt & [ +INPUT < [ STEM < ^(.*)o([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ó${I1:STEM.FIRST:2}" > ] >]. u_h_short_tmr := output_h_short_tmt & [ +INPUT < [ STEM < ^(.*)u([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ú${I1:STEM.FIRST:2}" > ] >]. a_l_short_tmr := output_l_short_tmt & [ +INPUT < [ STEM < ^(.*)a([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}à${I1:STEM.FIRST:2}" > ] >]. e_l_short_tmr := output_l_short_tmt & [ +INPUT < [ STEM < ^(.*)e([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}è${I1:STEM.FIRST:2}" > ] >]. i_l_short_tmr := output_l_short_tmt & [ +INPUT < [ STEM < ^(.*)i([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ì${I1:STEM.FIRST:2}" > ] >]. o_l_short_tmr := output_l_short_tmt & [ +INPUT < [ STEM < ^(.*)o([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ò${I1:STEM.FIRST:2}" > ] >]. u_l_short_tmr := output_l_short_tmt & [ +INPUT < [ STEM < ^(.*)u([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ù${I1:STEM.FIRST:2}" > ] >]. a_hl_short_tmr := output_hl_short_tmt & [ +INPUT < [ STEM < ^(.*)a([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}â${I1:STEM.FIRST:2}" > ] >]. e_hl_short_tmr := output_hl_short_tmt & [ +INPUT < [ STEM < ^(.*)e([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ê${I1:STEM.FIRST:2}" > ] >]. i_hl_short_tmr := output_hl_short_tmt & [ +INPUT < [ STEM < ^(.*)i([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}î${I1:STEM.FIRST:2}" > ] >]. o_hl_short_tmr := output_hl_short_tmt & [ +INPUT < [ STEM < ^(.*)o([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ô${I1:STEM.FIRST:2}" > ] >]. u_hl_short_tmr := output_hl_short_tmt & [ +INPUT < [ STEM < ^(.*)u([^aeiou]*)$ > ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}û${I1:STEM.FIRST:2}" > ] >]. ;;; Compositing unicode diacritics not well supported downstream ai_h_short_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)áí(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ai${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. ai_l_short_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)àì(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ài${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. ai_hl_short_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)áì(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}âi${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. au_h_short_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)áú(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}au${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. au_l_short_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)àù(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}àu${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. au_hl_short_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)áù(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}âu${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. a_l_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)àà(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ā̀${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. e_l_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)èè(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ḕ${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. i_l_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)ìì(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ī̀${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. o_l_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)òò(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ṑ${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. u_l_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)ùù(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ū̀${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. a_hl_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)ââ(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ā̂${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. e_hl_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)éè(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ē̂${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. i_hl_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)îî(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ī̂${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. o_hl_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)ôô(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ō̂${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. u_hl_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)ûû(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ū̂${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. a_h_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)áá(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ā${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. e_h_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)éé(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ē${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. i_h_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)íí(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ī${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. o_h_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)óó(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ō${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. u_h_long_composite_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)úú(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}ū${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. ;;; Remove second diacritic on long vowels a_long_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*[áàâ])[àâá](.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}a${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. e_long_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*[éèê])[éèê](.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}e${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. i_long_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*[íìîáàâ])[íìî](.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}i${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. o_long_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*[óòô])[óòô](.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}o${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. u_long_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*[úùûáàâ])[úùû](.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}u${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. ;;; Do not mark high: a_h_short_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)á(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}a${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. e_h_short_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)é(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}e${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. i_h_short_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)í(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}i${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. o_h_short_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)ó(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}o${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. u_h_short_simplify_tmr:= no_context_cm_rule & [ +INPUT < [ STEM < ^(.*)ú(.*)$ >, SUPRA #supra ] >, +OUTPUT < [ STEM < "${I1:STEM.FIRST:1}u${I1:STEM.FIRST:2}" >, SUPRA #supra ] >, +POSITION "O1@I1" ]. #| initial_caps := no_context_cm_rule & [ +INPUT < [ STEM < ^([a-zàâèêìîòôùûɓɗƙ])(.*)$ >, ] >, +OUTPUT < [ STEM < "${uc(I1:STEM.FIRST:1)}${I1:STEM.FIRST:2}" >, ] >, +POSITION "^ ], [STEM < ^(.*)$>, SYNSEM #ss] >, +CONTEXT <>, +OUTPUT <[SYNSEM #ss, STEM < "${I1:STEM.FIRST:1}${I2:STEM.FIRST:1}" > ]>, +POSITION "O1@I1,O1@I2,I1