#include #include #include #include #include #include #include "unicode.h" #include "grammar.h" #include "profile.h" #include "derivReader.h" #include "derivReader.cpp" #include "tdl_options.h" using namespace std; void collectleaves(delphin::Grammar &g, delphin::Node s, vector &ancestors, vector &leaves); void parse_options(int argc, char **argv, string *gfname, string *pname, bool *goldonly, int *parsenum, int *itemnum, tdlOptions *tdl_opts); int main (int argc, char **argv) { // setting option variables string gfname, pname; tdlOptions *tdl_opts = new tdlOptions(); int parsenum, itemnum; bool goldonly; try { parse_options(argc, argv, &gfname, &pname, &goldonly, &parsenum, &itemnum, tdl_opts); } catch ( const boost::program_options::error& e ) { cerr << "Invalid command: " << e.what() << "\nExiting." << endl; exit(1); } //caseclass separator set while parsing options string caseclass_sep = tdl_opts->get("ut-caseclass_separator"); // UTF-8 encoder initialize_encoding_converter("utf-8"); delphin::Grammar g(gfname); delphin::Profile p(pname); vector leaves; //used in collectleaves() delphin::DerivReader > reader(g, leaves, NULL, NULL, &collectleaves); pair,string> result = p.getResult(); tIid context = -1; // context is item id int event = -1; // event is parse id while (result.first.first >= 0) {//new item/context if (result.first.first != context) {//new item/context if (context != -1) { //finish last context }//finished last context } context = result.first.first; event = result.first.second; if (parsenum >= 0 && event != parsenum) { result = p.getResult(); continue; } if (goldonly && (p.numGold(context) > 1 || !p.isGold(context, event))) { result = p.getResult(); continue; } if (itemnum > -1 && context != itemnum) { result = p.getResult(); continue; } //do stuff with result leaves.clear(); reader.readDeriv(result.second); result = p.getResult(); for(vector::iterator it = leaves.begin(); it != leaves.end(); ++it) { cout << it->surface << (it->caseclass.empty()?"":string(caseclass_sep+it->caseclass)) << "\t" << it->tags[0] << endl; } cout << endl; } //finish last context return 0; } void parse_options(int argc, char **argv, string *gfname, string *pname, bool *goldonly, int *parsenum, int *itemnum, tdlOptions *tdl_opts) { namespace po = boost::program_options; po::options_description visible("Options"); visible.add_options() ("help,h", "This usage information.") ("config,c", po::value(), "Configuration file that sets caseclass separator") ("goldonly,g", "Only extract tags from 'gold' trees") ("single,s", po::value(itemnum)->default_value(-1), "Select a specific item, default (-1): all") ("result,r", po::value(parsenum)->default_value(-1), "Select a specific result number, default (-1): all.") ; po::options_description hidden("Hidden options"); hidden.add_options() ("grammar-file", po::value(gfname), "grammar .tdl file") ("profile", po::value(pname), "profile") ; po::options_description cmd_line ("Command line options"); cmd_line.add(visible).add(hidden); po::positional_options_description p; p.add("grammar-file",1).add("profile",1); po::variables_map vm; po::store(po::command_line_parser(argc, argv). options(cmd_line).positional(p).run(), vm); notify(vm); if (vm.count("help")) { cout << "Usage: " << argv[0] << " [options] " << "grammar-file profile" << endl; cout << visible << endl; exit(0); } if (!vm.count("grammar-file") || !vm.count("profile")) { cerr << "Insufficient arguments given." << endl; cerr << "Usage: " << argv[0] << " [options] " << "grammar-file profile" << endl; cerr << visible << endl; exit(1); } if (vm.count("config")) { try { string cfname = vm["config"].as(); tdl_opts->read(cfname); if (tdl_opts->lookup("ut-caseclass_separator") == NULL) { cerr << "Warning: no ut-caseclass_separator set in config file. " << "Using default separator ▲." << endl; tdl_opts->set("ut-caseclass_separator", "▲"); } } catch (exception &e) { cerr << "Error reading config file: " << e.what() << endl; exit(1); } } else { cerr << "Warning: no config file given to set ut-caseclass_separator. " << "Using default separator ▲." << endl; tdl_opts->set("ut-caseclass_separator", "▲"); } if (vm.count("goldonly")) *goldonly = true; else *goldonly = false; } void collectleaves(delphin::Grammar &g, delphin::Node s, vector &ancestors, vector &leaves) { leaves.push_back(s); if (leaves.back().surface.at(0) == '"' && leaves.back().surface.at(leaves.back().surface.length()-1) == '"') { leaves.back().surface.erase(0,1); leaves.back().surface.erase(leaves.back().surface.length()-1,1); } string tag(ancestors.back().surface); for (int x=ancestors.size()-2; x >= 0 && g.is_lexrule(ancestors[x].surface); x--) { tag+= ":"; tag+= ancestors[x].surface; } leaves.back().tags.push_back(tag); }