Spaces:
Running
Running
File size: 4,590 Bytes
15369ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import pandas as pd
import json
import time
import os
from openai import OpenAI
from tqdm import tqdm # for progress bar
import dotenv
dotenv.load_dotenv()
# Initialize OpenAI client
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
api_key = input("Enter your OpenAI API key: ")
client = OpenAI(api_key=api_key)
def generate_evaluation_data(description):
"""
Use GPT-4o mini to generate evaluation questions, choices, and answers for an SVG image description
"""
prompt = f"""
Based on the following description of an SVG image:
"{description}"
Generate 3-5 questions about visual elements that would be in this image, along with multiple-choice options and the correct answers.
For each question:
1. The question should be answerable by looking at the image that matches the description
2. Provide 2-4 possible answer choices for each question
3. Indicate the correct answer that matches the description
Format your response as a JSON object with exactly these three keys:
- "question": a list of question strings
- "choices": a list of lists, where each inner list contains the possible choices for the corresponding question
- "answer": a list of strings, where each string is the correct answer for the corresponding question
Example format:
{{
"question": ["Is there a red circle?", "What shape is present?"],
"choices": [["yes", "no"], ["square", "circle", "triangle", "hexagon"]],
"answer": ["yes", "circle"]
}}
Make sure your response is strictly in this JSON format with no additional text.
"""
try:
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=1000,
response_format={"type": "json_object"}
)
# Parse the JSON response
result = json.loads(response.choices[0].message.content)
# Validate the response structure
if not all(key in result for key in ["question", "choices", "answer"]):
print(f"Warning: Response missing required keys for '{description}'")
return None
# Check that all lists are the same length
if not (len(result["question"]) == len(result["choices"]) == len(result["answer"])):
print(f"Warning: Lists in response have inconsistent lengths for '{description}'")
return None
return result
except Exception as e:
print(f"Error generating evaluation data for '{description}': {e}")
return None
def create_evaluation_dataset(csv_path, output_path):
"""
Process a CSV file with descriptions and create an evaluation dataset
"""
# Read the CSV file
df = pd.read_csv(csv_path)
print(f"Loaded {len(df)} descriptions from {csv_path}")
# Initialize lists to store the evaluation data
ids = []
questions = []
choices = []
answers = []
# Process each row in the CSV
for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing descriptions"):
item_id = row["id"]
description = row["description"]
# Generate evaluation data
eval_data = generate_evaluation_data(description)
if eval_data:
ids.append(item_id)
questions.append(json.dumps(eval_data["question"]))
choices.append(json.dumps(eval_data["choices"]))
answers.append(json.dumps(eval_data["answer"]))
# Sleep briefly to avoid hitting API rate limits
time.sleep(0.5)
# Create a DataFrame with the evaluation data
eval_df = pd.DataFrame({
"id": ids,
"question": questions,
"choices": choices,
"answer": answers
})
# Save as CSV
eval_df.to_csv(output_path, index=False)
print(f"CSV version saved to {output_path}")
return eval_df
def main():
# Get input/output paths
input_path = "data/descriptions.csv"
output_path = "data/eval.csv"
# Create the evaluation dataset
eval_df = create_evaluation_dataset(input_path, output_path)
# Display sample of the generated dataset
print("\nSample of generated evaluation data:")
print(eval_df.head())
# Show stats
print(f"\nGenerated evaluation data for {len(eval_df)} out of {pd.read_csv(input_path).shape[0]} descriptions")
if __name__ == "__main__":
main() |