-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
136 lines (127 loc) · 5.75 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
MedProcNER evaluation library evaluation and util functions.
Partially based on the DisTEMIST and MEDDOPLACE evaluation scripts.
@author: salva
"""
# METRICS
def calculate_fscore(gold_standard, predictions, task):
"""
Calculate micro-averaged precision, recall and f-score from two pandas dataframe
Depending on the task, do some different pre-processing to the data
"""
# Cumulative true positives, false positives, false negatives
total_tp, total_fp, total_fn = 0, 0, 0
# Dictionary to store files in gold and prediction data.
gs_files = {}
pred_files = {}
for document in gold_standard:
document_id = document[0][0]
gs_files[document_id] = document
for document in predictions:
document_id = document[0][0]
pred_files[document_id] = document
# Dictionary to store scores
scores = {}
# Iterate through documents in the Gold Standard
for document_id in gs_files.keys():
doc_tp, doc_fp, doc_fn = 0, 0, 0
gold_doc = gs_files[document_id]
# Check if there are predictions for the current document, default to empty document if false
if document_id not in pred_files.keys():
predicted_doc = []
else:
predicted_doc = pred_files[document_id]
if task == 'index': # Separate codes
gold_doc = list(set(gold_doc[0][1].split('+')))
predicted_doc = list(set(predicted_doc[0][1].split('+'))) if predicted_doc else []
# Iterate through a copy of our gold mentions
for gold_annotation in gold_doc[:]:
# Iterate through predictions looking for a match
for prediction in predicted_doc[:]:
# Separate possible composite normalizations
if task == 'norm':
separate_prediction = prediction[:-1] + [code.rstrip() for code in sorted(str(prediction[-1]).split('+'))] # Need to sort
separate_gold_annotation = gold_annotation[:-1] + [code.rstrip() for code in str(gold_annotation[-1]).split('+')]
if set(separate_gold_annotation) == set(separate_prediction):
# Add a true positive
doc_tp += 1
# Remove elements from list to calculate later false positives and false negatives
predicted_doc.remove(prediction)
gold_doc.remove(gold_annotation)
break
if set(gold_annotation) == set(prediction):
# Add a true positive
doc_tp += 1
# Remove elements from list to calculate later false positives and false negatives
predicted_doc.remove(prediction)
gold_doc.remove(gold_annotation)
break
# Get the number of false positives and false negatives from the items remaining in our lists
doc_fp += len(predicted_doc)
doc_fn += len(gold_doc)
# Calculate document score
try:
precision = doc_tp / (doc_tp + doc_fp)
except ZeroDivisionError:
precision = 0
try:
recall = doc_tp / (doc_tp + doc_fn)
except ZeroDivisionError:
recall = 0
if precision == 0 or recall == 0:
f_score = 0
else:
f_score = 2 * precision * recall / (precision + recall)
# Add to dictionary
scores[document_id] = {"recall": round(recall, 4), "precision": round(precision, 4), "f_score": round(f_score, 4)}
# Update totals
total_tp += doc_tp
total_fn += doc_fn
total_fp += doc_fp
# Now let's calculate the micro-averaged score using the cumulative TP, FP, FN
try:
precision = total_tp / (total_tp + total_fp)
except ZeroDivisionError:
precision = 0
try:
recall = total_tp / (total_tp + total_fn)
except ZeroDivisionError:
recall = 0
if precision == 0 or recall == 0:
f_score = 0
else:
f_score = 2 * precision * recall / (precision + recall)
scores['total'] = {"recall": round(recall, 4), "precision": round(precision, 4), "f_score": round(f_score, 4)}
return scores
# HELPER
def write_results(task, scores, output_path, verbose):
"""
Helper function to write the results for each of the tasks
"""
headers_dict = {'ner': 'MedProcNER Shared Task: Subtask 1 (Named Entity Recognition) Results',
'norm': 'MedProcNER Shared Task: Subtask 2 (Entity Linking) Results',
'index': 'MedProcNER Shared Task: Subtask 3 (Document Indexing) Results'}
with open(output_path, 'w') as f_out:
# This looks super ugly, but if we keep the indentation it will also appear in the output file
f_out.write("""-------------------------------------------------------------------
{}
-------------------------------------------------------------------
""".format(headers_dict[task]))
if verbose:
for k in scores.keys():
if k != 'total':
f_out.write("""-------------------------------------------------------------------
Results for document: {}
-------------------------------------------------------------------
Precision: {}
Recall: {}
F-score: {}
""".format(k, scores[k]["precision"], scores[k]["recall"], scores[k]["f_score"]))
f_out.write("""-------------------------------------------------------------------
Overall results:
-------------------------------------------------------------------
Micro-average precision: {}
Micro-average recall: {}
Micro-average F-score: {}
""".format(scores['total']["precision"], scores['total']["recall"], scores['total']["f_score"]))
print("Written MedProcNER {} scores to {}".format(task, output_path))