-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathautomatic_metrics.py
executable file
·76 lines (60 loc) · 2.3 KB
/
automatic_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import json
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import single_meteor_score
from rouge import Rouge
from bert_score import score
import numpy as np
def get_bleu_score(question, reference, hypothesis, task):
reference, hypothesis = (
reference.replace("\n", " ").split(),
hypothesis.replace("\n", " ").split(),
)
bleu1 = sentence_bleu([reference], hypothesis, weights=(1, 0, 0, 0))
bleu4 = sentence_bleu([reference], hypothesis, weights=(0, 0, 0, 1))
return bleu1, bleu4
def get_rouge_score(question, reference, hypothesis, task, metric="r"):
rouge = Rouge()
rouge_ = rouge.get_scores(hyps=[hypothesis], refs=[reference])[0]
return (
rouge_["rouge-1"][metric],
rouge_["rouge-2"][metric],
rouge_["rouge-l"][metric],
)
def get_meteor_score(question, reference, hypothesis, task):
reference, hypothesis = (
reference.replace("\n", " ").split(),
hypothesis.replace("\n", " ").split(),
)
meteor = single_meteor_score(set(reference), set(hypothesis))
return float(meteor)
def get_bertscore(question, reference, hypothesis, task):
bertscore = score([reference], [hypothesis], lang="EN")
return float(bertscore[1])
def get_exact_match(question, reference, hypothesis, task):
count = len(reference)
if type(hypothesis) is str:
try:
hypothesis = eval(hypothesis)
assert isinstance(hypothesis, dict)
except Exception as e:
return 0, count
exact_score_count = 0
for key in reference:
if key in hypothesis and hypothesis[key] == reference[key]:
exact_score_count += 1
return exact_score_count, count
def get_partial_match(question, reference, hypothesis, task):
count = len(reference)
if isinstance(hypothesis, str):
try:
hypothesis = eval(hypothesis)
assert isinstance(hypothesis, dict)
except Exception as e:
return 0, count
partial_score_count = 0
for key in reference:
if key in hypothesis:
true_set = set(reference[key].split())
pred_set = set(hypothesis[key].split())
partial_score_count += int(len(true_set.intersection(pred_set)) > 0)
return partial_score_count, count