# import torch # from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor # import gradio as gr # from PIL import Image # # Use a publicly available high-capacity model. # # For instance, we use "google/pix2struct-docvqa-large". # # (If you need a different model or a private one, adjust accordingly and add authentication if necessary.) # model_name = "google/pix2struct-docvqa-large" # model = Pix2StructForConditionalGeneration.from_pretrained(model_name) # processor = Pix2StructProcessor.from_pretrained(model_name) # def solve_problem(image): # try: # # Ensure the image is in RGB. # image = image.convert("RGB") # # Preprocess image and text prompt. # inputs = processor( # images=[image], # text="Solve the following problem:", # return_tensors="pt", # max_patches=2048 # ) # # Generate prediction. # predictions = model.generate( # **inputs, # max_new_tokens=200, # early_stopping=True, # num_beams=4, # temperature=0.2 # ) # # Decode the prompt (input IDs) and the generated output. # problem_text = processor.decode( # inputs["input_ids"][0], # skip_special_tokens=True, # clean_up_tokenization_spaces=True # ) # solution = processor.decode( # predictions[0], # skip_special_tokens=True, # clean_up_tokenization_spaces=True # ) # return f"Problem: {problem_text}\nSolution: {solution}" # except Exception as e: # return f"Error processing image: {str(e)}" # # Set up the Gradio interface. # iface = gr.Interface( # fn=solve_problem, # inputs=gr.Image(type="pil", label="Upload Your Problem Image", image_mode="RGB"), # outputs=gr.Textbox(label="Solution", show_copy_button=True), # title="Problem Solver with Pix2Struct", # description=( # "Upload an image (for example, a handwritten math or logic problem) " # "and get a solution generated by a high-capacity Pix2Struct model.\n\n" # "Note: For best results on domain-specific tasks, consider fine-tuning on your own dataset." # ), # examples=[ # ["example_problem1.png"], # ["example_problem2.jpg"] # ], # theme="soft", # allow_flagging="never" # ) # if __name__ == "__main__": # iface.launch()