#include "grammar.h" #include #include #include using namespace std; Grammar::Grammar(const string &fname) { gfilename = fname; gfile = boost::filesystem::path(fname); gpath = boost::filesystem::path(gfile.parent_path()); //read the lexicon files to fill in the lex entry to letype map ifstream gf(gfilename.c_str()); enum FileState {OUT, LEXENT, LEXRULE}; if (gf.is_open()) { FileState gfstate = OUT; string gfline; getline(gf, gfline); while (!gf.eof()) { if (gfline.compare(0,1,";") == 0) { getline(gf, gfline); continue; //comment } if (gfstate != OUT) { if (gfline.find(":end :instance.") != string::npos) { gfstate = OUT; } int lfindex; if ((lfindex = gfline.find(":include \"")) != string::npos) { string lfilename = gfline.substr(lfindex+10, (gfline.find('"',lfindex+11)-(lfindex+10))); if (lfilename.length() < 4 || lfilename.compare(lfilename.length()-4, 4, ".tdl") != 0) lfilename += ".tdl"; if (gfstate == LEXENT) readLexicon(string(gpath.string()+"/"+lfilename)); if (gfstate == LEXRULE) { readLexRules(string(gpath.string()+"/"+lfilename)); //cerr << "reading " << lfilename << endl; } } } else { if (gfline.find(":begin :instance :status lex-entry.") != string::npos || gfline.find(":begin :instance :status generic-lex-entry.") != string::npos) { gfstate = LEXENT; } if (gfline.find(":begin :instance :status lex-rule.") != string::npos) { gfstate = LEXRULE; } } getline(gf, gfline); } gf.close(); } else { cerr << "Couldn't open grammar file " << gfilename << endl; exit(1); } } Grammar::~Grammar() { } string Grammar::letype(string &le) { if (letypes.count(le)) { return letypes[le]; } else { return string(); } } string Grammar::lexeme(string &le) { if (lexemes.count(le)) { return lexemes[le]; } else { return string(); } } bool Grammar::is_lexrule(string rule) { if (lex_rules.count(rule)) return true; else return false; } void Grammar::readLexRules(const string &fn) { ifstream lexrf(fn.c_str()); if (lexrf.is_open()) { string line; bool in_comment = false; getline(lexrf, line); while (!lexrf.eof()) { if (in_comment) { if (line.compare("|#") == 0) in_comment = false; } else { if (line.compare("#|") == 0) in_comment = true; else { if (line.compare(0,1,";") == 0) { getline(lexrf, line); continue; //comment } string rulename, delim; istringstream tmpline(line); tmpline >> rulename >> delim; if (delim.compare(":=") == 0) { lex_rules.insert(set::value_type(rulename)); // cerr << "adding lex rule " << rulename << endl; } } } getline(lexrf, line); } lexrf.close(); } else { cerr << "Couldn't open lex file " << fn << endl; exit(1); } } void Grammar::readLexicon(const string &fn) { ifstream lexiconf(fn.c_str()); if (lexiconf.is_open()) { string line; bool in_comment = false; getline(lexiconf, line); while (!lexiconf.eof()) { if (in_comment) { if (line.compare("|#") == 0) in_comment = false; } else { if (line.compare("#|") == 0) in_comment = true; else { string lename, delim, let, amp; istringstream tmpline(line); tmpline >> lename >> delim >> let >> amp; if (delim.compare(":=") == 0 && amp.compare("&")==0) { letypes.insert(tSSValue(lename, let)); getline(lexiconf, line); while (line.find("ORTH") == string::npos && !line.empty()) getline(lexiconf, line); if (!line.empty()) { //found ORTH in lex entry int q1 = line.find_first_of('"'); if (q1 != string::npos) {//find lexeme int q2 = line.find('"', q1+1); if (q2 != string::npos) { string orth = line.substr(q1+1, q2-(q1+1)); q1 = line.find('"', q2+1); while (q1 != string::npos) {//MWE q2 = line.find('"', q1+1); if (q2 != string::npos) { orth = orth+"_"+line.substr(q1+1, q2-(q1+1)); q1 = line.find('"', q2+1); } else { cerr << "Unmatched quotes at " << line << endl; break; } } lexemes.insert(tSSValue(lename, orth)); } else { cerr << "Unmatched quotes at " << line << endl; } } } } } } getline(lexiconf, line); } lexiconf.close(); } else { cerr << "Couldn't open lexicon file " << fn << endl; exit(1); } }