Alternative F1 estimations for ALE and ILE clustering

ULL Project Plan ⇒ Parses ⇒ lines 47-52

Basic settings

In [1]:
import os, sys, time, itertools, operator, numpy as np
from collections import Counter, OrderedDict
mod_pth = os.path.abspath(os.path.join('..'))
if mod_pth not in sys.path: sys.path.append(mod_pth)
from src.grammar_learner.utl import UTC, kwa
from src.grammar_learner.read_files import check_dir, check_mst_files
from src.grammar_learner.widgets import html_table
from src.grammar_learner.write_files import list2file
from src.grammar_learner.grammar_checker import _compare_lg_dicts_
out_dir = mod_pth + '/output/Alternative_F1_ALE_ILE_' + str(UTC())[:10] + '_'
if check_dir(out_dir, True): print(out_dir)
/home/obaskov/94/ULL/output/Alternative_F1_ALE_ILE_2019-04-12_

Reference dataset

In [2]:
corpus = 'GCB'; dataset = 'LG-E-noQuotes'
kwargs = {'mod_pth': mod_pth, 
          'reference_path': mod_pth + '/data/' + corpus + '/' + dataset}
files, re = check_mst_files(kwargs['reference_path']); files
Out[2]:
['/home/obaskov/94/ULL/data/GCB/LG-E-noQuotes/GC_LGEnglish_noQuotes_fullyParsed.ull']

ILE clustering

Min_word_count = 1 -- cell U52

In [3]:
mwc = 1
test_path = 'ILE-GCB-LG-E-noQuotes-LG-551-S94-2019-04-02'
test = '/GCB_LG-E-noQuotes_dILEd_no-gen'
kwargs['test_path'] = mod_pth + '/output/' + test_path + test
if mwc > 1: kwargs['test_path'] += '_mwc=' + str(mwc)
kwargs['test_path'] += '/GC_LGEnglish_noQuotes_fullyParsed.ull.ull'
precision, recall, f1, re = _compare_lg_dicts_(**kwargs)
print('Recall:\t\t', str(round(recall*100, 2)) + '%',
      '\nPrecision:\t', str(round(precision*100, 2)) + '%',
      '\nF1:\t\t ', round(f1,2),
      '\nReference:\t', re['reference_sentences'], 'sentences',
      '\nTest set:\t', re['test_ull_sentences'], 'sentences')
Recall:		 56.14% 
Precision:	 97.76% 
F1:		  0.71 
Reference:	 68826 sentences 
Test set:	 68826 sentences

Grammar Tester results 2019-04-02

In [4]:
with open(kwargs['test_path'][:-3] + 'stat', 'r') as f: stat = f.read()  #; print(stat)
for i in [2,7,14,15,16,18]: print(stat.splitlines()[i])
Average sentence parse:			 61.69%
Parse quality:	 59.61%
Recall:		 59.61%
Precision:	 72.32%
F1:		  0.65
Total sentences: 68826.00
In [5]:
# Check Grammar Tester results: precision ~ recall / PA: (approximation)
pa = 0.6169
recall = 0.5961
alt_precision = recall / pa
alt_f1 = 2 * alt_precision * recall / (alt_precision + recall);
print('Alternative precision estimation ~', str(round(alt_precision*100, 2)) 
      + '%, F1 ~', round(alt_f1, 2))
Alternative precision estimation ~ 96.63%, F1 ~ 0.74

ILE clustering, min_word_count = [31,21,11,6,1] -- lines 57-52, column U

In [15]:
ile = []
for mwc in [31,21,11,6,1]:
    kwargs['test_path'] = mod_pth + '/output/' + test_path + test
    if mwc > 1: kwargs['test_path'] += '_mwc=' + str(mwc)
    kwargs['test_path'] += '/GC_LGEnglish_noQuotes_fullyParsed.ull.ull'
    precision, recall, f1, r_ = _compare_lg_dicts_(**kwargs)
    if 'error' in r_: print(r_)
    if mwc == 1: ile.append(('ILE', 2, 'err'))
    ile.append(('ILE', mwc, round(f1,2))) 
display(html_table([['Clustering', 'MWC', 'F1']] + ile))
ClusteringMWCF1
ILE310.59
ILE210.62
ILE110.66
ILE60.69
ILE2err
ILE10.71

ALE clustering -- lines 57-52, colums Q:T

In [7]:
def f1_set(clusters, **kwargs):
    tp = kwargs['mod_pth'] + '/output/' + kwargs['test_path'] \
        + '/GCB_LG-E-noQuotes_cALWEd_no-gen'
    results = []
    for mwc in [31,21,11,6,2,1]:
        kwargs['test_path'] = tp
        if mwc > 1: kwargs['test_path'] += '_mwc=' + str(mwc)
        kwargs['test_path'] += '/GC_LGEnglish_noQuotes_fullyParsed.ull.ull'
        precision, recall, f1, r_ = _compare_lg_dicts_(**kwargs)
        if 'error' in r_: print(r_)
        results.append((clusters, mwc, round(f1,2)))
    return(results)
In [8]:
tests = []
kwargs['test_path'] = 'cALEd-50-GCB-LG-E-noQuotes-2019-04-04'
tests.extend(f1_set(50, **kwargs))
kwargs['test_path'] = 'cALEd-500-GCB-LG-E-noQuotes-S94-2019-04-02'
tests.extend(f1_set(500, **kwargs))
kwargs['test_path'] = 'cALEd-1000-GCB-LG-E-noQuotes-2019-04-03'
tests.extend(f1_set(1000, **kwargs))
kwargs['test_path'] = 'cALEd-2000-GCB-LG-E-noQuotes-2019-04-03'
tests.extend(f1_set(2000, **kwargs))
tests.extend(ile)

Save results

In [9]:
table = [['MWC', 'ALE50', 'ALE500', 'ALE1000', 'ALE2000', ' ILE ']]
for mwc in [31,21,11,6,2,1]:
    table.append([str(mwc)] + [str(t[2]) for t in tests if t[1]==mwc])
print(list2file(table, out_dir + '/Alternative_F1_lines_47-52.txt'))
MWC	ALE50	ALE500	ALE1000	ALE2000	 ILE 
31	0.57	0.66	0.65	0.65	0.59
21	0.59	0.68	0.68	0.67	0.62
11	0.6	0.71	0.71	0.7	0.66
6	0.6	0.73	0.72	0.72	0.69
2	0.61	0.74	0.73	0.72	err
1	0.62	0.74	0.73	0.72	0.71