prithivMLmods commited on
Commit
df7c39c
·
verified ·
1 Parent(s): 94ce5a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -20
app.py CHANGED
@@ -14,9 +14,6 @@ from PIL import Image
14
  import requests
15
  from io import BytesIO
16
 
17
- # -------------------------
18
- # Qwen2-VL Model for OCR-based tasks
19
- # -------------------------
20
  QV_MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
21
  qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
22
  qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -25,9 +22,6 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
25
  torch_dtype=torch.float16
26
  ).to("cuda").eval()
27
 
28
- # -------------------------
29
- # Aya-Vision Model for image-text tasks (@aya-vision)
30
- # -------------------------
31
  AYA_MODEL_ID = "CohereForAI/aya-vision-8b"
32
  aya_processor = AutoProcessor.from_pretrained(AYA_MODEL_ID)
33
  aya_model = AutoModelForImageTextToText.from_pretrained(
@@ -137,26 +131,22 @@ def model_inference(input_dict, history):
137
  time.sleep(0.01)
138
  yield buffer
139
 
140
- # -------------------------
141
- # Example inputs for the combined interface
142
- # -------------------------
143
  examples = [
 
144
  [{"text": "@aya-vision Extract JSON from the image", "files": ["example_images/document.jpg"]}],
145
- [{"text": "Summarize the letter", "files": ["examples/1.png"]}],
146
- [{"text": "Describe the photo", "files": ["examples/3.png"]}],
147
- [{"text": "Extract as JSON table from the table", "files": ["examples/4.jpg"]}],
148
- [{"text": "Summarize the full image in detail", "files": ["examples/2.jpg"]}],
149
- [{"text": "Describe this image.", "files": ["example_images/campeones.jpg"]}],
150
- [{"text": "What is this UI about?", "files": ["example_images/s2w_example.png"]}],
151
- [{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
152
- [{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
153
- [{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
154
  ]
155
 
156
- # Build the Gradio ChatInterface.
157
  demo = gr.ChatInterface(
158
  fn=model_inference,
159
- description="# **Multimodal OCR with @aya-vision Feature**",
160
  examples=examples,
161
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
162
  stop_btn="Stop Generation",
 
14
  import requests
15
  from io import BytesIO
16
 
 
 
 
17
  QV_MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
18
  qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
19
  qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
 
22
  torch_dtype=torch.float16
23
  ).to("cuda").eval()
24
 
 
 
 
25
  AYA_MODEL_ID = "CohereForAI/aya-vision-8b"
26
  aya_processor = AutoProcessor.from_pretrained(AYA_MODEL_ID)
27
  aya_model = AutoModelForImageTextToText.from_pretrained(
 
131
  time.sleep(0.01)
132
  yield buffer
133
 
 
 
 
134
  examples = [
135
+ [{"text": "@aya-vision Extract as JSON table from the table", "files": ["examples/4.jpg"]}],
136
  [{"text": "@aya-vision Extract JSON from the image", "files": ["example_images/document.jpg"]}],
137
+ [{"text": "@aya-vision Summarize the letter", "files": ["examples/1.png"]}],
138
+ [{"text": "@aya-vision Describe the photo", "files": ["examples/3.png"]}],
139
+ [{"text": "@aya-vision Summarize the full image in detail", "files": ["examples/2.jpg"]}],
140
+ [{"text": "@aya-vision Describe this image.", "files": ["example_images/campeones.jpg"]}],
141
+ [{"text": "@aya-vision What is this UI about?", "files": ["example_images/s2w_example.png"]}],
142
+ [{"text": "@aya-vision Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
143
+ [{"text": "@aya-vision Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
144
+ [{"text": "@aya-vision Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
 
145
  ]
146
 
 
147
  demo = gr.ChatInterface(
148
  fn=model_inference,
149
+ description="# **Multimodal OCR with `@aya-vision` Feature**",
150
  examples=examples,
151
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
152
  stop_btn="Stop Generation",