import gradio as gr from openai import OpenAI import base64 import io import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def solve_stem_problem(api_key, image, subject="math"): # Initialize OpenAI client with user-provided API key try: client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=api_key, ) except Exception as e: logger.error(f"Failed to initialize OpenAI client: {str(e)}") return f"Error initializing API client: {str(e)}" # Define detective based on subject detectives = { "math": "Algebra Ace", "physics": "Physics Phantom", "chemistry": "Chemistry Clue-finder", "coding": "Code Cracker" } detective = detectives.get(subject, "Algebra Ace") # Encode the uploaded image to base64 try: # Convert the image to bytes img_byte_arr = io.BytesIO() image.save(img_byte_arr, format='PNG') img_byte_arr = img_byte_arr.getvalue() # Encode to base64 encoded_image = base64.b64encode(img_byte_arr).decode('utf-8') image_url_data = f"data:image/png;base64,{encoded_image}" except Exception as e: logger.error(f"Image encoding error: {str(e)}") return f"Error encoding image: {str(e)}" # Call the API with error handling try: completion = client.chat.completions.create( extra_headers={ "HTTP-Referer": "https://stem-sleuth.example.com", "X-Title": "STEM Sleuth", }, # Using a more stable model (adjust based on OpenRouter's available models) model="google/gemini-flash-1.5", messages=[ { "role": "user", "content": [ { "type": "text", "text": f"Act as {detective} and solve this {subject} problem step-by-step with a detective narrative." }, { "type": "image_url", "image_url": {"url": image_url_data} } ] } ] ) # Detailed response checking if not completion.choices: logger.warning("API returned no choices") return "API returned no choices. Please check model availability or API key permissions." if not completion.choices[0].message: logger.warning("API returned no message content") return "API returned no message content. Please try again or check the model." solution = completion.choices[0].message.content logger.info("Successfully retrieved solution") return solution except Exception as e: logger.error(f"API call failed: {str(e)}") return f"Error calling API: {str(e)}. Please verify model availability or try again later." # Create Gradio interface with gr.Blocks() as app: gr.Markdown("# STEM Sleuth Problem Solver") gr.Markdown("Upload an image of a STEM problem, select the subject, and provide your API key to get a step-by-step solution.") with gr.Row(): api_key_input = gr.Textbox(label="OpenRouter API Key", type="password", placeholder="Enter your API key") subject_input = gr.Dropdown( choices=["math", "physics", "chemistry", "coding"], label="Subject", value="math" ) image_input = gr.Image(type="pil", label="Upload Problem Image") solve_button = gr.Button("Solve Problem") output = gr.Textbox(label="Solution", lines=10) solve_button.click( fn=solve_stem_problem, inputs=[api_key_input, image_input, subject_input], outputs=output ) # Launch the app app.launch()