File size: 3,999 Bytes
f29b99e
417a068
f29b99e
 
651ca1a
 
 
 
 
f29b99e
417a068
 
651ca1a
 
 
 
 
 
 
 
417a068
 
f29b99e
 
 
 
 
 
 
417a068
 
f29b99e
417a068
 
 
 
 
 
 
f29b99e
 
651ca1a
417a068
 
651ca1a
f29b99e
417a068
 
 
 
 
651ca1a
 
417a068
f29b99e
 
 
 
 
 
 
 
 
417a068
f29b99e
 
 
 
417a068
 
651ca1a
 
 
 
 
 
 
 
 
 
 
 
 
f29b99e
651ca1a
 
f29b99e
417a068
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f29b99e
417a068
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
from openai import OpenAI
import base64
import io
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def solve_stem_problem(api_key, image, subject="math"):
    # Initialize OpenAI client with user-provided API key
    try:
        client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=api_key,
        )
    except Exception as e:
        logger.error(f"Failed to initialize OpenAI client: {str(e)}")
        return f"Error initializing API client: {str(e)}"
    
    # Define detective based on subject
    detectives = {
        "math": "Algebra Ace",
        "physics": "Physics Phantom",
        "chemistry": "Chemistry Clue-finder",
        "coding": "Code Cracker"
    }
    detective = detectives.get(subject, "Algebra Ace")
    
    # Encode the uploaded image to base64
    try:
        # Convert the image to bytes
        img_byte_arr = io.BytesIO()
        image.save(img_byte_arr, format='PNG')
        img_byte_arr = img_byte_arr.getvalue()
        
        # Encode to base64
        encoded_image = base64.b64encode(img_byte_arr).decode('utf-8')
        image_url_data = f"data:image/png;base64,{encoded_image}"
    except Exception as e:
        logger.error(f"Image encoding error: {str(e)}")
        return f"Error encoding image: {str(e)}"
    
    # Call the API with error handling
    try:
        completion = client.chat.completions.create(
            extra_headers={
                "HTTP-Referer": "https://stem-sleuth.example.com",
                "X-Title": "STEM Sleuth",
            },
            # Using a more stable model (adjust based on OpenRouter's available models)
            model="google/gemini-flash-1.5",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"Act as {detective} and solve this {subject} problem step-by-step with a detective narrative."
                        },
                        {
                            "type": "image_url",
                            "image_url": {"url": image_url_data}
                        }
                    ]
                }
            ]
        )
        
        # Detailed response checking
        if not completion.choices:
            logger.warning("API returned no choices")
            return "API returned no choices. Please check model availability or API key permissions."
        
        if not completion.choices[0].message:
            logger.warning("API returned no message content")
            return "API returned no message content. Please try again or check the model."
        
        solution = completion.choices[0].message.content
        logger.info("Successfully retrieved solution")
        return solution
        
    except Exception as e:
        logger.error(f"API call failed: {str(e)}")
        return f"Error calling API: {str(e)}. Please verify model availability or try again later."

# Create Gradio interface
with gr.Blocks() as app:
    gr.Markdown("# STEM Sleuth Problem Solver")
    gr.Markdown("Upload an image of a STEM problem, select the subject, and provide your API key to get a step-by-step solution.")
    
    with gr.Row():
        api_key_input = gr.Textbox(label="OpenRouter API Key", type="password", placeholder="Enter your API key")
        subject_input = gr.Dropdown(
            choices=["math", "physics", "chemistry", "coding"],
            label="Subject",
            value="math"
        )
    
    image_input = gr.Image(type="pil", label="Upload Problem Image")
    solve_button = gr.Button("Solve Problem")
    output = gr.Textbox(label="Solution", lines=10)
    
    solve_button.click(
        fn=solve_stem_problem,
        inputs=[api_key_input, image_input, subject_input],
        outputs=output
    )

# Launch the app
app.launch()