Skip to content
This repository has been archived by the owner on Sep 15, 2022. It is now read-only.

Commit

Permalink
[tool] Modified to simultaneously report on Plain and Reranked results
Browse files Browse the repository at this point in the history
  • Loading branch information
JosephGeoBenjamin committed Oct 26, 2020
1 parent 0508596 commit c26a847
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 18 deletions.
4 changes: 2 additions & 2 deletions tasks/infer_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def inferencer(word, topk = 5):
p_out_list = model.active_beam_inference(in_vec, beam_width = topk)
p_result = [ tgt_glyph.xlitvec2word(out.cpu().numpy()) for out in p_out_list]

result = voc_sanitize.reposition(p_result)
r_result = voc_sanitize.reposition(p_result)

return result
return p_result, r_result


##=============== Corr/ Emb Stacked
Expand Down
38 changes: 23 additions & 15 deletions tools/accuracy_reporter/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,11 @@ def merge_pred_truth_json(pred_path, truth_path ):

def inference_looper(in_words, topk = 3):
from tasks.infer_engine import inferencer
out_dict = {}
p_dict = {}
r_dict = {}
for i in tqdm(in_words):
out_dict[i] = inferencer(i, topk=topk)
return out_dict
p_dict[i], r_dict[i] = inferencer(i, topk=topk)
return p_dict, r_dict

def vocab_sanity_runner(pred_json, voc_json):
'''
Expand Down Expand Up @@ -111,28 +112,35 @@ def vocab_sanity_runner(pred_json, voc_json):
if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR)

if __name__ == "__main__":

'''
Modified to simultaneously report on Plain and Reranked results
'''
for fi in files:
tfi = toggle_json(fi, save_prefix=SAVE_DIR)
words = get_from_json(tfi, "key")
out_dict = inference_looper(words, topk = 10)
p_dict, r_dict = inference_looper(words, topk = 10)

## Testing with LM adjustments
# out_dict = vocab_sanity_runner( "hypotheses/training_knk_103/acc_log/pred_EnKnk_ann1_test.json",
# "data/konkani/gom_word_list.json")

pred_path = os.path.join(SAVE_DIR, "pred_"+os.path.basename(fi) )
save_to_json(pred_path, out_dict)
save_to_json(pred_path, p_dict)
repos_path = os.path.join(SAVE_DIR, "repos_"+os.path.basename(fi) )
save_to_json(repos_path, r_dict)

gt_json = tfi
pred_json = pred_path
save_prefix = os.path.join(SAVE_DIR, os.path.basename(fi).replace(".json", ""))

for topk in [10, 5, 3, 2, 1]:
## GT json file passed to below script must be in { En(input): [NativeLang (predict)] } format
run_accuracy_news = "( echo {} && python tools/accuracy_reporter/accuracy_news.py --gt-json {} --pred-json {} --topk {} --save-output-csv {}_top{}-scores.csv ) | tee -a {}/Summary.txt".format(
os.path.basename(fi),
gt_json, pred_json, topk,
save_prefix, topk, SAVE_DIR )
for path in (pred_path, repos_path):
pred_json = path
save_prefix = os.path.join(SAVE_DIR, os.path.basename(path).replace(".json", ""))

for topk in [10, 5, 3, 2, 1]:
## GT json file passed to below script must be in { En(input): [NativeLang (predict)] } format
run_accuracy_news = "( echo {} && python tools/accuracy_reporter/accuracy_news.py --gt-json {} --pred-json {} --topk {} --save-output-csv {}_top{}-scores.csv ) | tee -a {}/Summary.txt".format(
os.path.basename(fi),
gt_json, pred_json, topk,
save_prefix, topk, SAVE_DIR )

os.system(run_accuracy_news)

os.system(run_accuracy_news)
5 changes: 4 additions & 1 deletion tools/vocab_n_embeds/Monolingual_Vocab_creator.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,11 @@
"id": "_kSgIKYiMuTT"
},
"source": [
"word_set = set(word_list)\n",
"for k in removal_word_set:\n",
" word_list.remove(k)"
" word_set.remove(k)\n",
"\n",
"word_list = list(word_set)"
],
"execution_count": null,
"outputs": []
Expand Down

0 comments on commit c26a847

Please sign in to comment.