Spaces:
Runtime error
Runtime error
File size: 2,249 Bytes
e41b03f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
def evaluate_pr(system, gold, system_score_cutoff=0, k=5, verbosity=0):
"""
Returns the precision,recall and f1 score @k.
Also prints the precision,recall and f1 score @k=1 to 5.
Parameters
----------
system : list of tuples
System output for sentence in form (position, score).
gold : list of tuple
Gold standard for sentence in form (position, score).
system_score_cutoff : float
Threshold of importance score for system output, deafaul to 0.
k : int
Top k recommendations to be evaluate on.
Returns
-------
tuple
A tuple contains precision, recall and f1 score for the system.
"""
# recommended by system and gold
system = [i for i in system if i[1] > system_score_cutoff] # have the flexibility to change the number of recommendation
gold = [i for i in gold if i[1] > 0]
if len(gold)>k:
n = len(gold)
else:
n = 0
# sort
system.sort(key=lambda x: -x[1])
gold_sent = [j[0] for j in gold]
# print("system:", system)
# print("gold:", gold)
for i in range(1, k + 1): # show how precision and recall change at different k
num_correct = 0
if len(system)<i:
sys = system
else:
sys = system[:i]
for s in sys:
if s[0] in gold_sent:
num_correct+=1
precision = num_correct / len(sys)
recall = num_correct / len(gold)
if verbosity > 0:
print("k=", i, "\nprecision=", precision, "\nrecall=", recall)
if n:
num_correct = 0
sys = system[:n]
for s in sys:
if s[0] in gold_sent:
num_correct += 1
precision = num_correct/len(sys)
recall = num_correct/len(gold)
if verbosity > 0:
print("k=", i, "\nprecision=", precision, "\nrecall=", recall)
try:
f_score = 2 * precision * recall / (precision + recall)
except:
f_score = 0
if verbosity > 0:
print("f1 score=", f_score)
return (precision, recall, f_score) # return precision and recall at k=n, showing how the system performs by recommending the same number of sent as gold |