File size: 4,590 Bytes
15369ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import pandas as pd
import json
import time
import os
from openai import OpenAI
from tqdm import tqdm  # for progress bar
import dotenv

dotenv.load_dotenv()

# Initialize OpenAI client
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
    api_key = input("Enter your OpenAI API key: ")
client = OpenAI(api_key=api_key)

def generate_evaluation_data(description):
    """
    Use GPT-4o mini to generate evaluation questions, choices, and answers for an SVG image description
    """
    prompt = f"""
    Based on the following description of an SVG image:
    "{description}"
    
    Generate 3-5 questions about visual elements that would be in this image, along with multiple-choice options and the correct answers.
    
    For each question:
    1. The question should be answerable by looking at the image that matches the description
    2. Provide 2-4 possible answer choices for each question
    3. Indicate the correct answer that matches the description
    
    Format your response as a JSON object with exactly these three keys:
    - "question": a list of question strings
    - "choices": a list of lists, where each inner list contains the possible choices for the corresponding question
    - "answer": a list of strings, where each string is the correct answer for the corresponding question
    
    Example format:
    {{
        "question": ["Is there a red circle?", "What shape is present?"],
        "choices": [["yes", "no"], ["square", "circle", "triangle", "hexagon"]],
        "answer": ["yes", "circle"]
    }}
    
    Make sure your response is strictly in this JSON format with no additional text.
    """
    
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=1000,
            response_format={"type": "json_object"}
        )
        
        # Parse the JSON response
        result = json.loads(response.choices[0].message.content)
        
        # Validate the response structure
        if not all(key in result for key in ["question", "choices", "answer"]):
            print(f"Warning: Response missing required keys for '{description}'")
            return None
        
        # Check that all lists are the same length
        if not (len(result["question"]) == len(result["choices"]) == len(result["answer"])):
            print(f"Warning: Lists in response have inconsistent lengths for '{description}'")
            return None
            
        return result
    
    except Exception as e:
        print(f"Error generating evaluation data for '{description}': {e}")
        return None

def create_evaluation_dataset(csv_path, output_path):
    """
    Process a CSV file with descriptions and create an evaluation dataset
    """
    # Read the CSV file
    df = pd.read_csv(csv_path)
    print(f"Loaded {len(df)} descriptions from {csv_path}")
    
    # Initialize lists to store the evaluation data
    ids = []
    questions = []
    choices = []
    answers = []
    
    # Process each row in the CSV
    for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing descriptions"):
        item_id = row["id"]
        description = row["description"]
        
        # Generate evaluation data
        eval_data = generate_evaluation_data(description)
        
        if eval_data:
            ids.append(item_id)
            questions.append(json.dumps(eval_data["question"]))
            choices.append(json.dumps(eval_data["choices"]))
            answers.append(json.dumps(eval_data["answer"]))
        
        # Sleep briefly to avoid hitting API rate limits
        time.sleep(0.5)
    
    # Create a DataFrame with the evaluation data
    eval_df = pd.DataFrame({
        "id": ids,
        "question": questions,
        "choices": choices,
        "answer": answers
    })
    
    # Save as CSV
    eval_df.to_csv(output_path, index=False)
    print(f"CSV version saved to {output_path}")
    
    return eval_df

def main():
    # Get input/output paths
    input_path = "data/descriptions.csv"
    output_path = "data/eval.csv"
    
    # Create the evaluation dataset
    eval_df = create_evaluation_dataset(input_path, output_path)
    
    # Display sample of the generated dataset
    print("\nSample of generated evaluation data:")
    print(eval_df.head())
    
    # Show stats
    print(f"\nGenerated evaluation data for {len(eval_df)} out of {pd.read_csv(input_path).shape[0]} descriptions")

if __name__ == "__main__":
    main()