xzerus commited on
Commit
9c6ea3f
·
verified ·
1 Parent(s): b67129f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -1
app.py CHANGED
@@ -1,3 +1,32 @@
1
  import gradio as gr
 
 
 
2
 
3
- gr.load("models/meta-llama/Llama-3.2-11B-Vision-Instruct").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForCausalLM
3
+ import torch
4
+ from PIL import Image
5
 
6
+ # Load the model and processor
7
+ model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, trust_remote_code=True).to(device)
12
+
13
+ # Function to process image and text prompt
14
+ def process_image(image, prompt="<ocr>"):
15
+ inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
16
+ outputs = model.generate(**inputs, max_new_tokens=1024)
17
+ generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
18
+ return generated_text
19
+
20
+ # Gradio Interface
21
+ iface = gr.Interface(
22
+ fn=process_image,
23
+ inputs=[
24
+ gr.Image(type="pil", label="Upload Image"),
25
+ gr.Textbox(value="<ocr>", label="Prompt"),
26
+ ],
27
+ outputs="text",
28
+ title="OCR with Llama-3.2-11B-Vision-Instruct",
29
+ description="Upload an image and input a prompt (e.g., '<ocr>') to extract text.",
30
+ )
31
+
32
+ iface.launch()