import os, sys, time, itertools, operator, numpy as np
from collections import Counter, OrderedDict
mod_pth = os.path.abspath(os.path.join('..'))
if mod_pth not in sys.path: sys.path.append(mod_pth)
from src.grammar_learner.utl import UTC, kwa
from src.grammar_learner.read_files import check_dir, check_mst_files
from src.grammar_learner.widgets import html_table
from src.grammar_learner.write_files import list2file
from src.grammar_learner.grammar_checker import _compare_lg_dicts_
out_dir = mod_pth + '/output/Alternative_F1_ALE_ILE_' + str(UTC())[:10] + '_'
if check_dir(out_dir, True): print(out_dir)
corpus = 'GCB'; dataset = 'LG-E-noQuotes'
kwargs = {'mod_pth': mod_pth,
'reference_path': mod_pth + '/data/' + corpus + '/' + dataset}
files, re = check_mst_files(kwargs['reference_path']); files
mwc = 1
test_path = 'ILE-GCB-LG-E-noQuotes-LG-551-S94-2019-04-02'
test = '/GCB_LG-E-noQuotes_dILEd_no-gen'
kwargs['test_path'] = mod_pth + '/output/' + test_path + test
if mwc > 1: kwargs['test_path'] += '_mwc=' + str(mwc)
kwargs['test_path'] += '/GC_LGEnglish_noQuotes_fullyParsed.ull.ull'
precision, recall, f1, re = _compare_lg_dicts_(**kwargs)
print('Recall:\t\t', str(round(recall*100, 2)) + '%',
'\nPrecision:\t', str(round(precision*100, 2)) + '%',
'\nF1:\t\t ', round(f1,2),
'\nReference:\t', re['reference_sentences'], 'sentences',
'\nTest set:\t', re['test_ull_sentences'], 'sentences')
with open(kwargs['test_path'][:-3] + 'stat', 'r') as f: stat = f.read() #; print(stat)
for i in [2,7,14,15,16,18]: print(stat.splitlines()[i])
# Check Grammar Tester results: precision ~ recall / PA: (approximation)
pa = 0.6169
recall = 0.5961
alt_precision = recall / pa
alt_f1 = 2 * alt_precision * recall / (alt_precision + recall);
print('Alternative precision estimation ~', str(round(alt_precision*100, 2))
+ '%, F1 ~', round(alt_f1, 2))
min_word_count = [31,21,11,6,1]
-- lines 57-52, column U¶ile = []
for mwc in [31,21,11,6,1]:
kwargs['test_path'] = mod_pth + '/output/' + test_path + test
if mwc > 1: kwargs['test_path'] += '_mwc=' + str(mwc)
kwargs['test_path'] += '/GC_LGEnglish_noQuotes_fullyParsed.ull.ull'
precision, recall, f1, r_ = _compare_lg_dicts_(**kwargs)
if 'error' in r_: print(r_)
if mwc == 1: ile.append(('ILE', 2, 'err'))
ile.append(('ILE', mwc, round(f1,2)))
display(html_table([['Clustering', 'MWC', 'F1']] + ile))
def f1_set(clusters, **kwargs):
tp = kwargs['mod_pth'] + '/output/' + kwargs['test_path'] \
+ '/GCB_LG-E-noQuotes_cALWEd_no-gen'
results = []
for mwc in [31,21,11,6,2,1]:
kwargs['test_path'] = tp
if mwc > 1: kwargs['test_path'] += '_mwc=' + str(mwc)
kwargs['test_path'] += '/GC_LGEnglish_noQuotes_fullyParsed.ull.ull'
precision, recall, f1, r_ = _compare_lg_dicts_(**kwargs)
if 'error' in r_: print(r_)
results.append((clusters, mwc, round(f1,2)))
return(results)
tests = []
kwargs['test_path'] = 'cALEd-50-GCB-LG-E-noQuotes-2019-04-04'
tests.extend(f1_set(50, **kwargs))
kwargs['test_path'] = 'cALEd-500-GCB-LG-E-noQuotes-S94-2019-04-02'
tests.extend(f1_set(500, **kwargs))
kwargs['test_path'] = 'cALEd-1000-GCB-LG-E-noQuotes-2019-04-03'
tests.extend(f1_set(1000, **kwargs))
kwargs['test_path'] = 'cALEd-2000-GCB-LG-E-noQuotes-2019-04-03'
tests.extend(f1_set(2000, **kwargs))
tests.extend(ile)
table = [['MWC', 'ALE50', 'ALE500', 'ALE1000', 'ALE2000', ' ILE ']]
for mwc in [31,21,11,6,2,1]:
table.append([str(mwc)] + [str(t[2]) for t in tests if t[1]==mwc])
print(list2file(table, out_dir + '/Alternative_F1_lines_47-52.txt'))