prithivMLmods commited on
Commit
9522057
·
verified ·
1 Parent(s): 91cda81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -34
app.py CHANGED
@@ -6,37 +6,16 @@ import time
6
  import torch
7
  import spaces
8
 
9
- # Define model options
10
- MODEL_OPTIONS = {
11
- "Qwen2VL Base": "Qwen/Qwen2-VL-2B-Instruct",
12
- "Latex OCR": "prithivMLmods/Qwen2-VL-OCR-2B-Instruct",
13
- "Math Prase": "prithivMLmods/Qwen2-VL-Math-Prase-2B-Instruct",
14
- "Text Analogy Ocrtest": "prithivMLmods/Qwen2-VL-Ocrtest-2B-Instruct"
15
- }
16
-
17
- # Default model setup
18
- current_model_id = MODEL_OPTIONS["Latex OCR"]
19
- processor = AutoProcessor.from_pretrained(current_model_id, trust_remote_code=True)
20
  model = Qwen2VLForConditionalGeneration.from_pretrained(
21
- current_model_id,
22
  trust_remote_code=True,
23
  torch_dtype=torch.float16
24
  ).to("cuda").eval()
25
 
26
  @spaces.GPU
27
- def model_inference(input_dict, history, model_id):
28
- global model, processor
29
-
30
- # Reload the model and processor if the model selection changes
31
- if model_id != current_model_id:
32
- current_model_id = model_id
33
- processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
34
- model = Qwen2VLForConditionalGeneration.from_pretrained(
35
- model_id,
36
- trust_remote_code=True,
37
- torch_dtype=torch.float16
38
- ).to("cuda").eval()
39
-
40
  text = input_dict["text"]
41
  files = input_dict["files"]
42
 
@@ -102,18 +81,12 @@ examples = [
102
  [{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
103
  [{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
104
  [{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
105
- ]
106
 
107
- # Gradio components
108
- model_choice = gr.Dropdown(
109
- label="Model Selection",
110
- choices=list(MODEL_OPTIONS.keys()),
111
- value="Latex OCR"
112
- )
113
 
114
  demo = gr.ChatInterface(
115
- fn=lambda inputs, history: model_inference(inputs, history, MODEL_OPTIONS[model_choice.value]),
116
- description="# **Qwen2.5-VL-3B-Instruct**",
117
  examples=examples,
118
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
119
  stop_btn="Stop Generation",
 
6
  import torch
7
  import spaces
8
 
9
+ MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
10
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
11
  model = Qwen2VLForConditionalGeneration.from_pretrained(
12
+ MODEL_ID,
13
  trust_remote_code=True,
14
  torch_dtype=torch.float16
15
  ).to("cuda").eval()
16
 
17
  @spaces.GPU
18
+ def model_inference(input_dict, history):
 
 
 
 
 
 
 
 
 
 
 
 
19
  text = input_dict["text"]
20
  files = input_dict["files"]
21
 
 
81
  [{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
82
  [{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
83
  [{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
 
84
 
85
+ ]
 
 
 
 
 
86
 
87
  demo = gr.ChatInterface(
88
+ fn=model_inference,
89
+ description="# **Multimodal OCR**",
90
  examples=examples,
91
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
92
  stop_btn="Stop Generation",