human-eval / get_results.py
PedroMartelleto's picture
controlability
b5dbf16
import gradio as gr
import random
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import uuid
import json
import os
from dotenv import load_dotenv
import re
import pandas as pd
from tqdm import tqdm
load_dotenv()
video_pairs = pd.read_csv('file_pairs.csv')[['file_name', 'vista_id', 'gem_id']].values.tolist()
my_credentials = {
"type": "service_account",
"project_id": "human-eval-c4f83",
"private_key_id": os.environ.get("PRIVATE_KEY_ID"),
"private_key": os.environ.get("PRIVATE_KEY").replace(r'\n', '\n'),
"client_email": os.environ.get("CLIENT_EMAIL"),
"client_id": os.environ.get("CLIENT_ID"),
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": os.environ.get("AUTH_PROVIDER_X509_CERT_URL"),
"client_x509_cert_url": os.environ.get("CLIENT_X509_CERT_URL")
}
if not firebase_admin._apps:
cred = credentials.Certificate(my_credentials)
firebase_admin.initialize_app(cred)
db = firestore.client()
# get all votes
votes = db.collection('votes').get()
gem_vote_count = {}
vista_vote_count = {}
no_preference_count = {}
qids = [
'q1',
'q2',
]
for vote in tqdm(votes):
vote_dict = vote.to_dict()
for qid in qids:
category = qid
if category not in gem_vote_count:
gem_vote_count[category] = 0
vista_vote_count[category] = 0
no_preference_count[category] = 0
if vote_dict[qid] == 'Video 1' and vote_dict['video1_id'] == vote_dict['gem_id']:
gem_vote_count[category] += 1
elif vote_dict[qid] == 'Video 1' and vote_dict['video1_id'] == vote_dict['vista_id']:
vista_vote_count[category] += 1
elif vote_dict[qid] == 'Video 2' and vote_dict['video2_id'] == vote_dict['gem_id']:
gem_vote_count[category] += 1
elif vote_dict[qid] == 'Video 2' and vote_dict['video2_id'] == vote_dict['vista_id']:
vista_vote_count[category] += 1
elif vote_dict[qid] == 'No preference':
no_preference_count[category] += 1
else:
raise ValueError('Invalid vote: ' + str(vote_dict))
# Create a table with the results. One row for GEM, one row for Vista, one row for no preference
data = []
for qid in qids:
category = qid
data.append({
'category': category,
'gem': gem_vote_count[category],
'vista': vista_vote_count[category],
'no_preference': no_preference_count[category]
})
df = pd.DataFrame(data)
df.to_csv('results.csv', index=False)