Spaces:
Running
Running
Commit
·
c5c293d
1
Parent(s):
febbf71
ignore old votes
Browse files- get_results.py +15 -0
- results.csv +2 -2
get_results.py
CHANGED
@@ -15,6 +15,9 @@ load_dotenv()
|
|
15 |
|
16 |
video_pairs = pd.read_csv('file_pairs.csv')[['file_name', 'vista_id', 'gem_id']].values.tolist()
|
17 |
|
|
|
|
|
|
|
18 |
my_credentials = {
|
19 |
"type": "service_account",
|
20 |
"project_id": "human-eval-c4f83",
|
@@ -45,9 +48,15 @@ qids = [
|
|
45 |
'q2',
|
46 |
]
|
47 |
|
|
|
|
|
48 |
for vote in tqdm(votes):
|
49 |
vote_dict = vote.to_dict()
|
50 |
for qid in qids:
|
|
|
|
|
|
|
|
|
51 |
category = qid
|
52 |
if category not in gem_vote_count:
|
53 |
gem_vote_count[category] = 0
|
@@ -55,18 +64,24 @@ for vote in tqdm(votes):
|
|
55 |
no_preference_count[category] = 0
|
56 |
|
57 |
if vote_dict[qid] == 'Video 1' and vote_dict['video1_id'] == vote_dict['gem_id']:
|
|
|
58 |
gem_vote_count[category] += 1
|
59 |
elif vote_dict[qid] == 'Video 1' and vote_dict['video1_id'] == vote_dict['vista_id']:
|
|
|
60 |
vista_vote_count[category] += 1
|
61 |
elif vote_dict[qid] == 'Video 2' and vote_dict['video2_id'] == vote_dict['gem_id']:
|
|
|
62 |
gem_vote_count[category] += 1
|
63 |
elif vote_dict[qid] == 'Video 2' and vote_dict['video2_id'] == vote_dict['vista_id']:
|
|
|
64 |
vista_vote_count[category] += 1
|
65 |
elif vote_dict[qid] == 'No preference':
|
66 |
no_preference_count[category] += 1
|
67 |
else:
|
68 |
raise ValueError('Invalid vote: ' + str(vote_dict))
|
69 |
|
|
|
|
|
70 |
# Create a table with the results. One row for GEM, one row for Vista, one row for no preference
|
71 |
|
72 |
data = []
|
|
|
15 |
|
16 |
video_pairs = pd.read_csv('file_pairs.csv')[['file_name', 'vista_id', 'gem_id']].values.tolist()
|
17 |
|
18 |
+
vista_ids = [x[1] for x in video_pairs]
|
19 |
+
gem_ids = [x[2] for x in video_pairs]
|
20 |
+
|
21 |
my_credentials = {
|
22 |
"type": "service_account",
|
23 |
"project_id": "human-eval-c4f83",
|
|
|
48 |
'q2',
|
49 |
]
|
50 |
|
51 |
+
skips = 0
|
52 |
+
|
53 |
for vote in tqdm(votes):
|
54 |
vote_dict = vote.to_dict()
|
55 |
for qid in qids:
|
56 |
+
if vote_dict['gem_id'] not in gem_ids or vote_dict['vista_id'] not in vista_ids:
|
57 |
+
skips += 1
|
58 |
+
continue
|
59 |
+
|
60 |
category = qid
|
61 |
if category not in gem_vote_count:
|
62 |
gem_vote_count[category] = 0
|
|
|
64 |
no_preference_count[category] = 0
|
65 |
|
66 |
if vote_dict[qid] == 'Video 1' and vote_dict['video1_id'] == vote_dict['gem_id']:
|
67 |
+
assert vote_dict['gem_id'] in gem_ids
|
68 |
gem_vote_count[category] += 1
|
69 |
elif vote_dict[qid] == 'Video 1' and vote_dict['video1_id'] == vote_dict['vista_id']:
|
70 |
+
assert vote_dict['vista_id'] in vista_ids
|
71 |
vista_vote_count[category] += 1
|
72 |
elif vote_dict[qid] == 'Video 2' and vote_dict['video2_id'] == vote_dict['gem_id']:
|
73 |
+
assert vote_dict['gem_id'] in gem_ids
|
74 |
gem_vote_count[category] += 1
|
75 |
elif vote_dict[qid] == 'Video 2' and vote_dict['video2_id'] == vote_dict['vista_id']:
|
76 |
+
assert vote_dict['vista_id'] in vista_ids
|
77 |
vista_vote_count[category] += 1
|
78 |
elif vote_dict[qid] == 'No preference':
|
79 |
no_preference_count[category] += 1
|
80 |
else:
|
81 |
raise ValueError('Invalid vote: ' + str(vote_dict))
|
82 |
|
83 |
+
print("Skipped", skips, "votes")
|
84 |
+
|
85 |
# Create a table with the results. One row for GEM, one row for Vista, one row for no preference
|
86 |
|
87 |
data = []
|
results.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
category,gem,vista,no_preference
|
2 |
-
q1,
|
3 |
-
q2,
|
|
|
1 |
category,gem,vista,no_preference
|
2 |
+
q1,12,24,54
|
3 |
+
q2,43,27,20
|