import re import operator import sys from argparse import ArgumentParser import json HYPERPARAMS = ["SL1", "SL2", "SEP", "EL1", "EL2", "EEP"] class Classifier(object): def __init__(self, name): self.name = name self.full_negation = None self.scope = None self.event = None self.params = {"SL1": None, "SL2": None, "SEP": None, "EL1": None, "EL2": None, "EEP": None} def set_full_negation(self, f1_score): self.full_negation = f1_score def set_scope_score(self, scope_score): self.scope = scope_score def set_event_score(self, event_score): self.event = event_score def set_params(self, params): if params[-1] == 'default': return else: self.params['SL1'] = params[0] self.params['SL2'] = params[1] self.params['SEP'] = params[2] self.params['EL1'] = params[3] self.params['EL2'] = params[4] self.params['EEP'] = params[5] def get_f1(line): return float(line.split('|')[-1].strip('\n')) def get_params(line): file_name_re = re.compile('x(\d+\.?\d*)-(\d+\.?\d*)-(\d+\.?\d*)x(\d+\.?\d*)-(\d+\.?\d*)-(\d+\.?\d*)|(default)/score') return re.split(file_name_re, line)[1:-1] def read_full_negation(file_name): systems = {} with open(file_name, 'r') as f: for l in f.readlines(): params = get_params(l) if params[-1] == 'default': system = Classifier(params[-1]) else: system = Classifier('-'.join(params[:-1])) system.set_params(params) system.set_full_negation(get_f1(l)) systems[system.name] = system return systems def read_scope(systems, file_name): ''' Note that this function assumes that the full negation scores have been read already. ''' with open(file_name, 'r') as f: for l in f.readlines(): params = get_params(l) if params[-1] == 'default': systems[params[-1]].set_scope_score(get_f1(l)) else: key = '-'.join(params[:-1]) if key in systems: systems[key].set_scope_score(get_f1(l)) return systems def read_event(systems, file_name): ''' Note that this function assumes that the full negation scores have been read already. ''' with open(file_name, 'r') as f: for l in f.readlines(): params = get_params(l) if params[-1] == 'default': systems[params[-1]].set_event_score(get_f1(l)) else: systems[('-'.join(params[:-1]))].set_event_score(get_f1(l)) return systems def params_str(system, prev_sys=None): SL1 = system.params['SL1'] SL2 = system.params['SL2'] SEP = system.params['SEP'] EL1 = system.params['EL1'] EL2 = system.params['EL2'] EEP = system.params['EEP'] if prev_sys: if SL1 == prev_sys.params['SL1']: SL1 = "_" if SL2 == prev_sys.params['SL2']: SL2 = "_" if SEP == prev_sys.params['SEP']: SEP = "_" if EL1 == prev_sys.params['EL1']: EL1 = "_" if EL2 == prev_sys.params['EL2']: EL2 = "_" if EEP == prev_sys.params['EEP']: EEP = "_" return "%s\t%s\t%s\t%s\t%s\t%s" % (SL1, SL2, SEP, EL1, EL2, EEP) def top_n(sorted_systems, n): # prints the top n systems rank = 1 i = 0 while rank <= n and i < len(sorted_systems): if i > 0: if sorted_systems[i].full_negation != sorted_systems[i - 1].full_negation: rank += 1 if rank <= n: print("%d\t%s\t%s\t%s" % (rank, sorted_systems[i].full_negation, sorted_systems[i].scope, params_str(sorted_systems[i], sorted_systems[i - 1]))) else: print("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" %('#', 'F1', 'Scope', 'SL1', 'SL2', 'SEP', 'EL1', 'EL2', 'EEP')) print("%d\t%s\t%s\t%s" % (rank, sorted_systems[i].full_negation, sorted_systems[i].scope, params_str(sorted_systems[i]))) i += 1 def top_params(sorted_systems, top_percent, scope_params=None): # if no scope params, then we are doing single stage ranking if not scope_params: best_params = {"SL1": {}, "SL2": {}, "SEP": {}, "EL1": {}, "EL2": {}, "EEP": {}} else: best_params = scope_params threshold = sorted_systems[0].full_negation * top_percent print("Threshold: %s" %(threshold)) total_top = 0 for system in sorted_systems: if system.full_negation <= sorted_systems[0].full_negation - threshold: break else: # if scope parameters are not already ranked, then # we are doing a 'single stage' ranking if not scope_params: for key, value in system.params.iteritems(): if value not in best_params[key]: best_params[key][value] = 0 best_params[key][value] += 1 total_top += 1 elif system.params['SL1'] in best_params['SL1'] and system.params['SL2'] in best_params['SL2'] and system.params['SEP'] in best_params['SEP']: for key in ["EL1", "EL2", "EEP"]: value = system.params[key] if value not in best_params[key]: best_params[key][value] = 0 best_params[key][value] += 1 total_top += 1 #print("Total number of systems: %d" % (sum(best_params['SL1'][value] for value in best_params['SL1']))) top_params = {"SL1": None, "SL2": None, "SEP": None, "EL1": None, "EL2": None, "EEP": None} for key in HYPERPARAMS: sorted_params = sorted(best_params[key].items(), key=operator.itemgetter(1), reverse=True) print("%s: %s" % (key, sorted_params)) top_params[key] = sorted_params[0][0] print(top_params) print("Total number of systems: %d" % total_top) return top_params def top_params_scope(sorted_systems, top_percent): threshold = sorted_systems[0].scope * top_percent #print("Threshold: %s" %(threshold)) best_params = {"SL1": {}, "SL2": {}, "SEP": {}, "EL1": {}, "EL2": {}, "EEP": {}} for system in sorted_systems: if system.scope <= sorted_systems[0].scope - threshold: break else: for key in ["SL1", "SL2", "SEP"]: value = system.params[key] if value not in best_params[key]: best_params[key][value] = 0 best_params[key][value] += 1 # for key in HYPERPARAMS: # print("%s: %s" % (key, sorted(best_params[key].items(), key=operator.itemgetter(1), reverse=True))) # print("Total number of systems: %d" % (sum(best_params['SL1'][value] for value in best_params['SL1']))) for key in ["SL1", "SL2", "SEP"]: max_value = max(best_params[key].iteritems(), key=operator.itemgetter(1)) best_params[key] = {} best_params[key][max_value[0]] = max_value[1] return best_params def save_best_system_params(system, out_path): params = {} if system.name != 'default': params['scope_parameters'] = "-T crf -a l-bfgs --histsz 5 -1 %s -2 %s -e %s" % (system.params['SL1'], system.params['SL2'], system.params['SEP']) params['event_parameters'] = "-T crf -a l-bfgs --histsz 5 -1 %s -2 %s -e %s" % (system.params['EL1'], system.params['EL2'], system.params['EEP']) else: params['scope_parameters'] = "default" params['event_parameters'] = "default" with open(out_path, 'w') as f: json.dump(params, f) def save_top_params(top, out_path): params = {} params['scope_parameters'] = "-T crf -a l-bfgs --histsz 5 -1 %s -2 %s -e %s" % (top['SL1'], top['SL2'], top['SEP']) params['event_parameters'] = "-T crf -a l-bfgs --histsz 5 -1 %s -2 %s -e %s" % (top['EL1'], top['EL2'], top['EEP']) with open(out_path, 'w') as f: json.dump(params, f, indent=2) def main(): argparser = ArgumentParser(description=__doc__) argparser.add_argument('-full', help='''File containing full negation results.''') argparser.add_argument('--scope', help='''File containing scope scroes results.''') argparser.add_argument('--out', default=None, help='''File to save the best params in JSON format''') argparser.add_argument('--n', default=0, type=int, help='''N in top n performing systems''') args = argparser.parse_args(sys.argv[1:]) systems = read_full_negation(args.full) if args.scope: systems = read_scope(systems, args.scope) #systems = read_event(systems, args.dir + args.team + '.events') # sort by full_negation score first and then scope score sorted_systems = sorted(systems.values(), key=operator.attrgetter('full_negation', 'scope'), reverse=True) top_n(sorted_systems, args.n) top_params(sorted_systems, 0.05) print("\n") scope_sorted_systems = sorted(systems.values(), key=operator.attrgetter('scope', 'full_negation'), reverse=True) top_scope_params = top_params_scope(scope_sorted_systems, 0.01) best_params = top_params(sorted_systems, 0.05, top_scope_params) save_top_params(best_params, args.out) # write out best parameters #save_best_system_params(sorted_systems[0], 'tmp.txt') if __name__ == '__main__': main()