#ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include "unicode.h" #include "grammar.h" #include "profile.h" #include "derivReader.h" #include "derivReader.cpp" using namespace std; enum TagType {LETYPE, LETYPEMORPH}; TagType tagtype; bool affix; void collectleaves(Grammar &g, tToken s, vector &ancestors, vector &leaves); void parse_options(int argc, char **argv, string *gfname, string *pname, bool *goldonly, TagType *tagtype, int *parsenum, bool *affix); int main (int argc, char **argv) { // setting option variables string gfname, pname; int parsenum; bool goldonly; parse_options(argc, argv, &gfname, &pname, &goldonly, &tagtype, &parsenum, &affix); // UTF-8 encoder initialize_encoding_converter("utf-8"); Grammar g(gfname); Profile p(pname); vector leaves; //used in collectleaves() DerivReader > reader(g, leaves, NULL, NULL, &collectleaves); pair,string> result = p.getResult(); tIid context = -1; // context is item id int event = -1; // event is parse id while (result.first.first >= 0) {//new item/context if (result.first.first != context) {//new item/context if (context != -1) { //finish last context }//finished last context } context = result.first.first; event = result.first.second; if (parsenum >= 0 && event != parsenum) { result = p.getResult(); continue; } if (goldonly && (p.numGold(context) > 1 || !p.isGold(context, event))) { result = p.getResult(); continue; } //do stuff with result leaves.clear(); reader.readDeriv(result.second); result = p.getResult(); for(vector::iterator it = leaves.begin(); it != leaves.end(); ++it) { cout << it->surface() << "\t" << it->tag() << endl; } cout << endl; } //finish last context return 0; } void parse_options(int argc, char **argv, string *gfname, string *pname, bool *goldonly, TagType *tagtype, int *parsenum, bool *affix) { namespace po = boost::program_options; po::options_description visible("Options"); visible.add_options() ("help,h", "This usage information.") ("infl,i", "Tags include morphological inflection rules.") ("affix,a", "Tags include affix (punctuation) rules.") ("goldonly,g", "Only extract tags from 'gold' trees") ("result,r", po::value(parsenum)->default_value(-1), "Select a specific result number, default (-1): all.") ; po::options_description hidden("Hidden options"); hidden.add_options() ("grammar-file", po::value(gfname), "grammar .tdl file") ("profile", po::value(pname), "profile") ; po::options_description cmd_line ("Command line options"); cmd_line.add(visible).add(hidden); po::positional_options_description p; p.add("grammar-file",1).add("profile",1); po::variables_map vm; po::store(po::command_line_parser(argc, argv). options(cmd_line).positional(p).run(), vm); notify(vm); if (vm.count("help")) { cout << "Usage: " << argv[0] << " [options] " << "grammar-file profile" << endl; cout << visible << endl; exit(0); } if (!vm.count("grammar-file") || !vm.count("profile")) { cerr << "Insufficient arguments given." << endl; cerr << "Usage: " << argv[0] << " [options] " << "grammar-file profile" << endl; cerr << visible << endl; exit(1); } if (vm.count("infl")) *tagtype = LETYPEMORPH; else *tagtype = LETYPE; if (vm.count("goldonly")) *goldonly = true; else *goldonly = false; if (vm.count("affix")) { if (vm.count("infl")) *affix = true; else { cerr << "Warning: affix rules only possible with -infl " << "option, ignoring -affix." << endl; *affix = false; } } else *affix = false; } void collectleaves(Grammar &g, tToken s, vector &ancestors, vector &leaves) { leaves.push_back(s); string tag(ancestors.back().surface()); if (tagtype == LETYPEMORPH) { for (int x=ancestors.size()-2; x >= 0 && g.is_lexrule(ancestors[x].surface()); x--) { string infl = ancestors[x].surface(); if (affix || (infl.compare(infl.length()-3, 3,"plr") != 0 && (infl.length() < 5 || (infl.length() >=5 && infl.compare(0,5,"punct") != 0)))) { tag+=":"; tag+=infl; } } } leaves.back().tag(tag); }