#include "profile.h"
#include <iostream>
#include <sstream>
#include <boost/foreach.hpp>

#define foreach BOOST_FOREACH

using namespace std;
namespace fs = boost::filesystem;

Profile::Profile(const string &pn)
{
	pname = pn;
	int profilecounter=0;
	if (pname.at(pname.length()-1) == '/')
		pname.erase(pname.length()-1, 1); //remove trailing slash
	fs::path ip(pname);
	if (!fs::is_directory(ip)) {
		cerr << "Profile path '" << ip << "' not found. Cannot continue";
		exit(1);
	}

	ifstream virtualf(string(ip.string()+"/virtual").c_str());
	if (virtualf.is_open()) { //virtual profile, so add actual profiles
		string prof_count;
		getline(virtualf, prof_count);
		while (!virtualf.eof()) {
			// erase quotes
			int prof_start = prof_count.find_first_of('"',0)+1;
			int prof_end = prof_count.find_last_of('"');
			string prof = prof_count.substr(prof_start, prof_end-prof_start);
			profiles.push_back(fs::path(ip.parent_path()/prof));
	
			// we extend the TSDB virtual profile syntax by allowing an 
			// optional number of repetitions of the actual profile
			unsigned int reps;
			istringstream repnum(prof_count.substr(prof_end+1));
			repnum >> reps;
			if (repnum.fail()) { reps = 1; }
			profileRepCount.push_back(reps);

			getline(virtualf, prof_count);
		}
	} else {
		profiles.push_back(ip); //not virtual
		profileRepCount.push_back(1);
	}

	for (vector<fs::path>::iterator piter = profiles.begin();
			piter != profiles.end(); ++piter) {
		// these need to be declare in scope, although they are 
		// assigned in the openf function
		boost::iostreams::filtering_stream<boost::iostreams::input> in;
		ifstream fstream;

		map<File, map<Relation, Field> > *rptr 
			= new map<File, map<Relation, Field> >;
		ifstream rfile(string((*piter).string()+"/relations").c_str());
		if (rfile.is_open()) {
			string line,fname, relation;
			int fieldcount = 0;
			getline(rfile, line);
			while (!rfile.eof()) {
				if (line.empty()){
					getline(rfile, line);
					continue;
				}
				if (!isspace(line.at(0))) {//file name
					fname = line.substr(0, line.find(':'));
					(*rptr)[fname] = map<Relation,Field>();
					fieldcount = 0;
				} else {
					while (isspace(line.at(0)))
						line.erase(0,1);
					relation = line.substr(0,line.find_first_of(" 	"));
					(*rptr)[fname][relation] = fieldcount++;
				}
				getline(rfile, line);
			}
			relations.push_back(rptr);
			rfile.close();
		} else {
			cerr << "No relation file in profile. Exiting." << endl;
			exit(1);
		}

		map<tIid, string> *iptr = new map<tIid, string>;
		if (openf(in, fstream, string((*piter).string()+"/item"))) {
			string line;
			getline(in, line);
			while (!in.eof()) {
				int item_id;
				istringstream(getField(line, (*rptr)["item"]["i-id"])) >> item_id;
				iptr->insert(tISValue(item_id, 
					getField(line, (*rptr)["item"]["i-input"])));
				itemToProfile.insert(tIidIValue(item_id, profilecounter));
				getline(in, line);
			}
			items.push_back(iptr);
			fstream.close();
			in.reset();
		} else {
			cerr << "Problem opening item file in " << (*piter).string()
				 << endl;
		}

		map<tIid, int> *readingsptr = new map<tIid, int>;
		map<tIid, string> *pinputptr = new map<tIid, string>;
		map<tIid, map<int, int> > *prefptr = new map<tIid, map<int, int> >;
		if (openf(in, fstream, string((*piter).string()+"/parse"))) {
			string line;
			getline(in,line);
			while (!in.eof()) {
				int item_id, num_readings;
				istringstream(getField(line, (*rptr)["parse"]["i-id"])) >> item_id;
				istringstream(getField(line, (*rptr)["parse"]["readings"])) 
					>> num_readings;
				readingsptr->insert(tIidIValue(item_id, num_readings));
				pinputptr->insert(tISValue(item_id, 
					getField(line, (*rptr)["parse"]["p-input"])));
				prefptr->insert(map<tIid,map<int,int> >::value_type(item_id,
						map<int,int>()));
				getline(in, line);
			}
			readings.push_back(readingsptr);
			inputs.push_back(pinputptr);
			fstream.close();
			in.reset();
		} else {
			cerr << "Problem opening parse file in " << (*piter).string()
				 << endl;
		}

		if (openf(in, fstream, string((*piter).string()+"/preference"))) {
			string line;
			getline(in,line);
			while (!in.eof()) {
				tIid item_id;
				int parse_id;
				istringstream(getField(line, (*rptr)["preference"]["parse-id"])) 
					>> item_id;
				istringstream(getField(line, (*rptr)["preference"]["result-id"])) 
					>> parse_id;
				((*prefptr)[item_id]).insert(tIidIValue(parse_id, 1));
				getline(in, line);
			}
			fstream.close();
			in.reset();
		} else {
			cerr << "Problem opening preference file in " << (*piter).string() <<
				 "; no gold preferences recorded" << endl;
		}
		preferences.push_back(prefptr);

		profilecounter++;
	}
	if (!profilecounter) {
		cerr << "No profiles were loaded from " << pname << ". Cannot continue." << endl;
		exit(1);
	}
	resultprofilecount = 0;
	autoPrefsTopN = -1;
}

Profile::~Profile()
{
	for (vector<map<tIid,string> *>::iterator iter = items.begin();
			iter != items.end(); ++iter) {
		delete *iter;
	}
	for (vector<map<tIid,map<int,int> > *>::iterator iter = preferences.begin();
			iter != preferences.end(); ++iter) {
		delete *iter;
	}
}

bool Profile::openf(
	boost::iostreams::filtering_stream<boost::iostreams::input> &in,
	ifstream &filestream, const string &fname)
{
	filestream.open(string(fname+".gz").c_str(),
			ios_base::in | ios_base::binary);
	if (filestream.is_open()) {
		in.push(boost::iostreams::gzip_decompressor());
		in.push(filestream);
	} else {
		filestream.open(fname.c_str());
		if (filestream.is_open())
			in.push(filestream);
		else
			return false;
	}
	return true;
}

string Profile::getItem(tIid item)
{
	if (itemToProfile.count(item)) {
		if (items[itemToProfile[item]]->count(item)) {
			return (items[itemToProfile[item]]->find(item))->second;
		}
	}
	return string();
}

string Profile::getInput(tIid item)
{
	if (itemToProfile.count(item)) {
		if (inputs[itemToProfile[item]]->count(item)) {
			return (inputs[itemToProfile[item]]->find(item))->second;
		}
	}
	return string();
}

int Profile::getPreference(tIid item)
{
	if (itemToProfile.count(item)) {
		if (preferences[itemToProfile[item]]->count(item)) {
			if (!(*preferences[itemToProfile[item]])[item].empty())
				return (*preferences[itemToProfile[item]])[item].begin()->first;
		}
	}
	return -1;
}

void Profile::setPreference(tIid &item, int &parse, bool append)
{
	if (itemToProfile.count(item)) {
		if (append==false)
			(*preferences[itemToProfile[item]])[item].clear();
		(*preferences[itemToProfile[item]])[item].insert(tIIValue(parse,1));
	}
}

bool Profile::isGold(tIid item, int parse)
{
	if ((*preferences[itemToProfile[item]])[item].count(parse) == 1 &&
			((*preferences[itemToProfile[item]])[item])[parse] == 1)
		return true;
	else
		return false;
}

int Profile::numGold(tIid item)
{
	return (*preferences[itemToProfile[item]])[item].size();
}

int Profile::getReadings(tIid item)
{
	if (itemToProfile.count(item)) {
		if (readings[itemToProfile[item]]->count(item)) {
			return (readings[itemToProfile[item]]->find(item))->second;
		}
	}
	return -1;
}

pair<pair<tIid,int>,string> Profile::getResult(ResultType type)
{
	if (resultfilter.empty()) {
		//first result
		if (!openf(resultfilter, resultfile,
				string(profiles[0].string()+"/result"))) {
			cerr << "Couldn't open a result file in "
				 << profiles[0].string() << endl;
			return pair<pair<tIid,int>,string>(pair<tIid,int>(-1,-1), string());
		}
	}
	string line;
	getline(resultfilter,line);
	while (resultfilter.eof()) {
		resultprofilecount++;
		if (resultprofilecount == profiles.size()) {
			//read all profiles
			resultfile.close();
			resultfilter.reset();
			return pair<pair<tIid,int>,string>(pair<tIid,int>(-2,-1), string());
		} else {
			resultfile.close();
			resultfilter.reset();
			if (!openf(resultfilter, resultfile,
					string(profiles[resultprofilecount].string()+"/result"))) {
				cerr << "Couldn't open a result file in "
					 << profiles[resultprofilecount].string() << endl;
				return pair<pair<tIid,int>,string>(pair<tIid,int>(-3,-1), string());
			}
		}
		getline(resultfilter,line);
	}
	if (resultfilter.eof()) {
		//last line
		return pair<pair<tIid,int>,string>(pair<tIid,int>(-4,-1), string());
	}
	tIid item_id;
	int parse_id;
	istringstream(getField(line, 
		(*(relations[resultprofilecount]))["result"]["parse-id"])) >> item_id;
	istringstream(getField(line, 
		(*(relations[resultprofilecount]))["result"]["result-id"])) >> parse_id;
	string rtype("derivation");
	if (type == MRS)
		rtype = string("mrs");
	return pair<pair<tIid,int>,string>(pair<tIid,int>(item_id, parse_id),
			getField(line, 
			(*(relations[resultprofilecount]))["result"][rtype]));
}

string Profile::getResult(tIid &item, int &parse, ResultType type)
{
	if (itemToProfile.count(item)) {
		boost::iostreams::filtering_stream<boost::iostreams::input> in;
		ifstream fstream;

		if (openf(in, fstream,
				string(profiles[itemToProfile[item]].string()+"/result"))) {
			string line;
			getline(in,line);
			while (!in.eof()) {
				tIid item_id;
				int parse_id;
				istringstream(getField(line, 
					(*(relations[resultprofilecount]))["result"]["parse-id"])) 
					>> item_id;
				istringstream(getField(line, 
					(*(relations[resultprofilecount]))["result"]["result-id"])) 
					>> parse_id;
				if (item_id == item && parse_id == parse) {
					fstream.close();
					in.reset();
					string rtype("derivation");
					if (type == MRS)
						rtype = string("mrs");
					return getField(line, 
						(*(relations[resultprofilecount]))["result"][rtype]);
				}
				getline(in, line);
			}
			fstream.close();
			in.reset();
			cerr << "result " << parse << " of item " << item
				 << " not found in result file in "
				 << profiles[itemToProfile[item]].string() << endl;
			return string();
		} else {
			cerr << "Problem opening result file in "
				 << profiles[itemToProfile[item]].string() << endl;
			return string();
		}
	} else {
		cerr << "No item " << item << "in profile " << pname << endl;
		return string();
	}
}

set<tIid>* Profile::getItemIDs()
{
	set<tIid>* item_ids = new set<tIid>();
	for (vector<map<tIid, string>*>::const_iterator pi_itr = items.begin();
			pi_itr != items.end(); pi_itr++) {
		foreach (tIidSValue iid_value, **pi_itr) {
			item_ids->insert(iid_value.first);
		}
	}
	return item_ids;
}

unsigned int Profile::getRepsForItem(tIid item_id)
{
	// returns the number of times we should duplicate the actual item
	// due do to duplication requested in the training profile
	return profileRepCount[itemToProfile[item_id]];
}

string &Profile::getPath()
{
	return pname;
}

bool Profile::isVirtual()
{
	return fs::exists(fs::path(pname) / "virtual");
	// we could have a profile count of 1 and still have a virtual profile 
	// (technically) so need to check for the filename
}

void Profile::scoreProfile(const string &scorefile, bool all, int n)
{
	int total_exact = 0;
	int total_seen = 0;
	set<tIid>* test_item_ids = getItemIDs();
	for (set<tIid>::iterator iiter=test_item_ids->begin();
		iiter != test_item_ids->end(); ++iiter) {
			if (!all && getReadings(*iiter) == 0) 
				continue; // skip items without results if all==false
			++total_seen;
			for (int r = 0; r < n; ++r) {
				if (isGold(*iiter, r)) {
					++total_exact;
					break;
				}
			}
	}
	//TODO change filename to reflect N
	string outfilename = scorefile + ".cvexact";
	ofstream outf(outfilename.c_str());
	if (outf.is_open()) {
		outf << (float) total_exact / total_seen << "\n"
			<< total_exact << "/" << total_seen << endl;
	}
	outf.close();
}

string Profile::getField(string line, int field)
{
	int start = 0;
	int end = line.find('@');
	int atcount;
	for (atcount=0; atcount < field && end != string::npos; atcount++) {
		start = end+1;
		end = line.find('@', start);
	}
	if (atcount != field) {
		cerr << "Mal-formed line: " << line << endl;
		cerr << "Insufficent delimiters." << endl;
		exit(1);
	}
	return line.substr(start, end-start);
}

void Profile::setPrefsFromTopNResults(int top_n)
{
	set<tIid> *item_ids = getItemIDs();
	for (set<tIid>::const_iterator iid_iter = item_ids->begin();
			iid_iter != item_ids->end(); ++iid_iter) {
		tIid item = *iid_iter;
		int readings = getReadings(item);
		for (int i = 0; i < readings && i < top_n; ++i) {
			setPreference(item, i, false); 
			// overwrites all gold prefs, plus more (those w/o gold marked as well)
		}
	}
	delete item_ids;
	autoPrefsTopN = top_n;
}

int Profile::topNForAutoPrefs()
{
	return autoPrefsTopN;
}