-
Notifications
You must be signed in to change notification settings - Fork 32
/
evaluation_metrics.py
34 lines (28 loc) · 1.22 KB
/
evaluation_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# -*- coding: utf-8 -*-
import numpy as np
def evaluate_summary(predicted_summary, user_summary, eval_method):
""" Compare the predicted summary with the user defined one(s).
:param ndarray predicted_summary: The generated summary from our model.
:param ndarray user_summary: The user defined ground truth summaries (or summary).
:param str eval_method: The proposed evaluation method; either 'max' (SumMe) or 'avg' (TVSum).
:return: The reduced fscore based on the eval_method
"""
max_len = max(len(predicted_summary), user_summary.shape[1])
S = np.zeros(max_len, dtype=int)
G = np.zeros(max_len, dtype=int)
S[:len(predicted_summary)] = predicted_summary
f_scores = []
for user in range(user_summary.shape[0]):
G[:user_summary.shape[1]] = user_summary[user]
overlapped = S & G
# Compute precision, recall, f-score
precision = sum(overlapped)/sum(S)
recall = sum(overlapped)/sum(G)
if precision+recall == 0:
f_scores.append(0)
else:
f_scores.append(2 * precision * recall * 100 / (precision + recall))
if eval_method == 'max':
return max(f_scores)
else:
return sum(f_scores)/len(f_scores)