Spaces:

HenryShan
/

AP_Solver

Sleeping

App Files Files Community

HenryShan commited on Feb 8

Commit

f5cfe60

verified ·

1 Parent(s): c90d6e8

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -7

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ vl_chat_processor = VLChatProcessor.from_pretrained(model_path)
 tokenizer = vl_chat_processor.tokenizer
-def describe_image(image, user_question="Solve this AP Problem step by step and explain to the student who don't know how to solve it"):
     try:
         # Convert the PIL Image to a BytesIO object for compatibility
         image_byte_arr = BytesIO()
@@ -43,21 +43,31 @@ def describe_image(image, user_question="Solve this AP Problem step by step and
             force_batchify=True
         )
         # Load and prepare the model
         vl_gpt = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to(torch.float16).eval()
-        vl_gpt = vl_gpt.to(torch.float16)
         # Generate embeddings from the image input
         inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs).to(dtype=torch.float16)
         print(f"Inputs Embeds dtype: {inputs_embeds.dtype}")
         print(f"Attention Mask dtype: {attention_mask.dtype}")
         print(f"Model dtype: {next(vl_gpt.parameters()).dtype}")
         # Generate the model's response
         outputs = vl_gpt.language_model.generate(
-            inputs_embeds=inputs_embeds,
-            attention_mask = prepare_inputs.attention_mask.to(vl_gpt.device).to(dtype=torch.float16),
             pad_token_id=tokenizer.eos_token_id,
             bos_token_id=tokenizer.bos_token_id,
             eos_token_id=tokenizer.eos_token_id,
@@ -65,16 +75,13 @@ def describe_image(image, user_question="Solve this AP Problem step by step and
             do_sample=False,
             use_cache=True
         )
-        outputs = outputs.to(torch.float16)
         # Decode the generated tokens into text
         answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
         return answer
     except Exception as e:
         # Provide detailed error information
         return f"Error: {str(e)}"
 # Gradio interface
 def gradio_app():
     with gr.Blocks() as demo:

 tokenizer = vl_chat_processor.tokenizer
+def describe_image(image, user_question="You are the best AP teacher in the world. Analyze the AP problem in the image, and solve it step by step to let a student who don't know how to solve it understand"):
     try:
         # Convert the PIL Image to a BytesIO object for compatibility
         image_byte_arr = BytesIO()
             force_batchify=True
         )
+        # Explicitly cast all tensors in prepare_inputs to torch.float16
+        prepare_inputs = {
+            k: v.to(torch.float16) if isinstance(v, torch.Tensor) else v
+            for k, v in prepare_inputs.items()
+        }
         # Load and prepare the model
         vl_gpt = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to(torch.float16).eval()
+        vl_gpt = vl_gpt.to(torch.float16)  # Explicitly ensure all components are in float16
         # Generate embeddings from the image input
         inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs).to(dtype=torch.float16)
+        # Ensure attention mask is also in torch.float16
+        attention_mask = prepare_inputs["attention_mask"].to(vl_gpt.device).to(dtype=torch.float16)
+        # Debugging: Print tensor dtypes
         print(f"Inputs Embeds dtype: {inputs_embeds.dtype}")
         print(f"Attention Mask dtype: {attention_mask.dtype}")
         print(f"Model dtype: {next(vl_gpt.parameters()).dtype}")
         # Generate the model's response
         outputs = vl_gpt.language_model.generate(
+            inputs_embeds=inputs_embeds.to(torch.float16),
+            attention_mask=attention_mask.to(torch.float16),
             pad_token_id=tokenizer.eos_token_id,
             bos_token_id=tokenizer.bos_token_id,
             eos_token_id=tokenizer.eos_token_id,
             do_sample=False,
             use_cache=True
         )
         # Decode the generated tokens into text
         answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
         return answer
     except Exception as e:
         # Provide detailed error information
         return f"Error: {str(e)}"
 # Gradio interface
 def gradio_app():
     with gr.Blocks() as demo: