Spaces:
Running
Running
import pandas as pd | |
import json | |
import time | |
import os | |
from openai import OpenAI | |
from tqdm import tqdm # for progress bar | |
import dotenv | |
dotenv.load_dotenv() | |
# Initialize OpenAI client | |
api_key = os.environ.get("OPENAI_API_KEY") | |
if not api_key: | |
api_key = input("Enter your OpenAI API key: ") | |
client = OpenAI(api_key=api_key) | |
def generate_evaluation_data(description): | |
""" | |
Use GPT-4o mini to generate evaluation questions, choices, and answers for an SVG image description | |
""" | |
prompt = f""" | |
Based on the following description of an SVG image: | |
"{description}" | |
Generate 3-5 questions about visual elements that would be in this image, along with multiple-choice options and the correct answers. | |
For each question: | |
1. The question should be answerable by looking at the image that matches the description | |
2. Provide 2-4 possible answer choices for each question | |
3. Indicate the correct answer that matches the description | |
Format your response as a JSON object with exactly these three keys: | |
- "question": a list of question strings | |
- "choices": a list of lists, where each inner list contains the possible choices for the corresponding question | |
- "answer": a list of strings, where each string is the correct answer for the corresponding question | |
Example format: | |
{{ | |
"question": ["Is there a red circle?", "What shape is present?"], | |
"choices": [["yes", "no"], ["square", "circle", "triangle", "hexagon"]], | |
"answer": ["yes", "circle"] | |
}} | |
Make sure your response is strictly in this JSON format with no additional text. | |
""" | |
try: | |
response = client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.7, | |
max_tokens=1000, | |
response_format={"type": "json_object"} | |
) | |
# Parse the JSON response | |
result = json.loads(response.choices[0].message.content) | |
# Validate the response structure | |
if not all(key in result for key in ["question", "choices", "answer"]): | |
print(f"Warning: Response missing required keys for '{description}'") | |
return None | |
# Check that all lists are the same length | |
if not (len(result["question"]) == len(result["choices"]) == len(result["answer"])): | |
print(f"Warning: Lists in response have inconsistent lengths for '{description}'") | |
return None | |
return result | |
except Exception as e: | |
print(f"Error generating evaluation data for '{description}': {e}") | |
return None | |
def create_evaluation_dataset(csv_path, output_path): | |
""" | |
Process a CSV file with descriptions and create an evaluation dataset | |
""" | |
# Read the CSV file | |
df = pd.read_csv(csv_path) | |
print(f"Loaded {len(df)} descriptions from {csv_path}") | |
# Initialize lists to store the evaluation data | |
ids = [] | |
questions = [] | |
choices = [] | |
answers = [] | |
# Process each row in the CSV | |
for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing descriptions"): | |
item_id = row["id"] | |
description = row["description"] | |
# Generate evaluation data | |
eval_data = generate_evaluation_data(description) | |
if eval_data: | |
ids.append(item_id) | |
questions.append(json.dumps(eval_data["question"])) | |
choices.append(json.dumps(eval_data["choices"])) | |
answers.append(json.dumps(eval_data["answer"])) | |
# Sleep briefly to avoid hitting API rate limits | |
time.sleep(0.5) | |
# Create a DataFrame with the evaluation data | |
eval_df = pd.DataFrame({ | |
"id": ids, | |
"question": questions, | |
"choices": choices, | |
"answer": answers | |
}) | |
# Save as CSV | |
eval_df.to_csv(output_path, index=False) | |
print(f"CSV version saved to {output_path}") | |
return eval_df | |
def main(): | |
# Get input/output paths | |
input_path = "data/descriptions.csv" | |
output_path = "data/eval.csv" | |
# Create the evaluation dataset | |
eval_df = create_evaluation_dataset(input_path, output_path) | |
# Display sample of the generated dataset | |
print("\nSample of generated evaluation data:") | |
print(eval_df.head()) | |
# Show stats | |
print(f"\nGenerated evaluation data for {len(eval_df)} out of {pd.read_csv(input_path).shape[0]} descriptions") | |
if __name__ == "__main__": | |
main() |