this directory contains files specific to running DELPH-IN tools on the UiO TITAN cluster. the scheduler used on TITAN is SLURM (plus Maui), so some of these files might be easy to adapt to another cluster running SLURM. using SGE instead should not be hard either, but in any case some of the defaults need to be tuned to the specific hardware of each cluster anyway. following are a few use examples. note that it absolutely must be the case that all source files are compiled, cache files built, and such, as jobs are likely to start up in parallel. to accomplish this for the relevant (T)ERG configurations, for example, it works to complete two small interactive jobs, e.g. ./parse --terg mrs ./parse --terg+tnt/speech mrs ------------------------------------------------------------------------------- update all the profiles of a forthcoming new ERG release ------------------------------------------------------------------------------- for i in csli fracas mrs trec \ hike jh0 jh1 jh2 jh3 jh4 jh5 tg1 tg2 ps jhk tgk psk rondane; do /usr/bin/sbatch $LOGONROOT/uio/titan/parse --time \ --terg --best 500 --update --compress --thin $i; sleep 0.5; done for i in jhu psu tgu cb sc01 sc02 sc03; do /usr/bin/sbatch $LOGONROOT/uio/titan/parse --time \ --terg+tnt --best 500 --update --compress --thin $i; sleep 0.5; done for i in ws01 ws02 ws03 ws04 ws05 ws06 ws07 ws08 ws09 ws10 ws11 ws12 ws13; do /usr/bin/sbatch $LOGONROOT/uio/titan/parse --time \ --terg+tnt/wiki --best 500 --update --compress --thin $i; sleep 0.5; done for i in ecoc ecos ecpa ecpr vm6 vm13 vm31 vm32; do /usr/bin/sbatch $LOGONROOT/uio/titan/parse --time \ --terg+tnt/speech --best 500 --update --compress --thin $i; done ------------------------------------------------------------------------------- parse the Penn Treebank (assuming the `ptb' add-on SVN component) ------------------------------------------------------------------------------- for i in wsj00 wsj01 wsj02 wsj03 wsj04 wsj05 wsj06 wsj07 wsj09 wsj09 \ wsj10 wsj11 wsj12 wsj13 wsj14 wsj15 wsj16 wsj17 wsj18 wsj19 \ wsj20 wsj21 wsj22 wsj23 wsj24; do /usr/bin/sbatch $LOGONROOT/uio/titan/parse --time \ --terg/ptb --best 1 --compress $i; sleep 0.5; done ------------------------------------------------------------------------------- parse the GENIA Treebank ------------------------------------------------------------------------------- for i in gtb00 gtb01 gtb02 gtb03 gtb04 gtb05 gtb06 gtb07 gtb08 gtb09 \ gtb10 gtb11 gtb12 gtb13 gtb14 gtb15 gtb16 gtb17 gtb18 \ pgtb00 pgtb01 pgtb02 pgtb03 pgtb04 pgtb05 pgtb06 pgtb07 pgtb08 pgtb09; do /usr/bin/sbatch $LOGONROOT/uio/titan/parse --time \ --terg/genia --best 1 --compress $i; sleep 0.5; done ------------------------------------------------------------------------------- for a complete run on English Wikipedia (assuming some local files) ------------------------------------------------------------------------------- { for file in $LOGONROOT/uio/wikiwoods/txt/?????.txt.gz; do name=$(basename $file .gz); name=$(basename $name .txt); echo \ /usr/bin/sbatch $LOGONROOT/uio/titan/parse --time --terg+tnt/wiki \ --best 1 --target "wikiwoods/1010/$name" --text --compress $file; \ done } > ~/wikiwoods.job $LOGONROOT/uio/titan/trickle --start --limit 400 ~/wikiwoods.job while true; do $LOGONROOT/uio/titan/trickle --limit 400 ~/wikiwoods.job; sleep 60; done ------------------------------------------------------------------------------- to confirm basic consistency of parsing profiles ------------------------------------------------------------------------------- { ti=0; tr=0; for d in *; do if [ -f $d/parse.gz ]; then i=$(zcat $d/item.gz | wc -l); ti=$[$ti + $i]; o=$(zcat $d/parse.gz | wc -l); r=$(zcat $d/result.gz | wc -l); tr=$[$tr + $r]; if [ $i != $o ]; then echo "incomplete: $d ($i vs. $o)"; else echo "$d $i $r" >> .counts fi; else echo "missing: $d"; fi; done echo "$tr results of $ti items: $[$tr * 100 / $ti]% coverage" } ------------------------------------------------------------------------------- while playing with CoNLL 2007 data (for parser stacking) ------------------------------------------------------------------------------- for i in 02 03 04 05 06 07 08 09 \ 10 11 12 13 14 15 16 17 18 19 \ 20 21 22 23 24; do /usr/bin/sbatch $LOGONROOT/uio/titan/parse --time \ --terg/conll --best 1 conll${i}; sleep 0.5; done; ------------------------------------------------------------------------------- blazing (via a pseudo-update) from the GTB annotation ------------------------------------------------------------------------------- for i in 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18; do /usr/bin/sbatch $LOGONROOT/uio/titan/redwoods \ --terg --default --epilogue $LOGONROOT/mu/epilogue.lisp \ --gold erg/1010/gtb${i}/10-12-19/pet.500 --update \ erg/1010/gtb${i}/10-12-19/pet.500; sleep 0.5; done