prithivMLmods commited on
Commit
3d010db
·
verified ·
1 Parent(s): 03e534e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -40
app.py CHANGED
@@ -15,7 +15,6 @@ import cv2
15
 
16
  from transformers import (
17
  Qwen2_5_VLForConditionalGeneration,
18
- Qwen2VLForConditionalGeneration,
19
  AutoProcessor,
20
  AutoTokenizer,
21
  TextIteratorStreamer,
@@ -34,7 +33,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
34
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
35
 
36
  # Load typhoon
37
- MODEL_ID_M = "scb10x/typhoon-ocr-7b"
38
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
39
  model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
40
  MODEL_ID_M,
@@ -42,17 +41,8 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
42
  torch_dtype=torch.float16
43
  ).to(device).eval()
44
 
45
- # Load DocScope
46
- MODEL_ID_X = "prithivMLmods/coreOCR-7B-050325-preview"
47
- processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
48
- model_x = Qwen2VLForConditionalGeneration.from_pretrained(
49
- MODEL_ID_X,
50
- trust_remote_code=True,
51
- torch_dtype=torch.float16
52
- ).to(device).eval()
53
-
54
  # Load Space Thinker
55
- MODEL_ID_Z = "remyxai/SpaceThinker-Qwen2.5VL-3B"
56
  processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
57
  model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
58
  MODEL_ID_Z,
@@ -60,15 +50,6 @@ model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
60
  torch_dtype=torch.float16
61
  ).to(device).eval()
62
 
63
- # Load Qwen2-VL-7B-Instruct
64
- MODEL_ID_T = "Qwen/Qwen2-VL-7B-Instruct"
65
- processor_t = AutoTokenizer.from_pretrained(MODEL_ID_X, trust_remote_code=True)
66
- model_t = Qwen2VLForConditionalGeneration.from_pretrained(
67
- MODEL_ID_T,
68
- trust_remote_code=True,
69
- torch_dtype=torch.float16
70
- ).to(device).eval()
71
-
72
 
73
 
74
  def downsample_video(video_path):
@@ -102,18 +83,12 @@ def generate_image(model_name: str, text: str, image: Image.Image,
102
  """
103
  Generates responses using the selected model for image input.
104
  """
105
- if model_name == "typhoon-ocr-7b":
106
  processor = processor_m
107
  model = model_m
108
- elif model_name == "coreOCR-7B-050325-preview":
109
- processor = processor_x
110
- model = model_x
111
- elif model_name == "SpaceThinker-Qwen2.5VL-3B":
112
  processor = processor_z
113
  model = model_z
114
- elif model_name == "Qwen2-VL-7B-Instruct":
115
- processor = processor_t
116
- model = model_t
117
  else:
118
  yield "Invalid model selected."
119
  return
@@ -158,18 +133,12 @@ def generate_video(model_name: str, text: str, video_path: str,
158
  """
159
  Generates responses using the selected model for video input.
160
  """
161
- if model_name == "typhoon-ocr-7b":
162
  processor = processor_m
163
  model = model_m
164
- elif model_name == "coreOCR-7B-050325-preview":
165
- processor = processor_x
166
- model = model_x
167
- elif model_name == "SpaceThinker-Qwen2.5VL-3B":
168
  processor = processor_z
169
  model = model_z
170
- elif model_name == "Qwen2-VL-7B-Instruct":
171
- processor = processor_t
172
- model = model_t
173
  else:
174
  yield "Invalid model selected."
175
  return
@@ -270,9 +239,9 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
270
  with gr.Column():
271
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
272
  model_choice = gr.Radio(
273
- choices=["coreOCR-7B-050325-preview", "Qwen2-VL-7B-Instruct", "SpaceThinker-Qwen2.5VL-3B", "typhoon-ocr-7b"],
274
  label="Select Model",
275
- value="Qwen2-VL-7B-Instruct"
276
  )
277
 
278
  image_submit.click(
@@ -287,4 +256,4 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
287
  )
288
 
289
  if __name__ == "__main__":
290
- demo.queue(max_size=40).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)
 
15
 
16
  from transformers import (
17
  Qwen2_5_VLForConditionalGeneration,
 
18
  AutoProcessor,
19
  AutoTokenizer,
20
  TextIteratorStreamer,
 
33
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
34
 
35
  # Load typhoon
36
+ MODEL_ID_M = "Qwen/Qwen2.5-VL-3B-Instruct"
37
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
38
  model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
39
  MODEL_ID_M,
 
41
  torch_dtype=torch.float16
42
  ).to(device).eval()
43
 
 
 
 
 
 
 
 
 
 
44
  # Load Space Thinker
45
+ MODEL_ID_Z = "One-RL-to-See-Them-All/Orsta-32B-0326"
46
  processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
47
  model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
48
  MODEL_ID_Z,
 
50
  torch_dtype=torch.float16
51
  ).to(device).eval()
52
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
  def downsample_video(video_path):
 
83
  """
84
  Generates responses using the selected model for image input.
85
  """
86
+ if model_name == "Qwen2.5-VL-3B":
87
  processor = processor_m
88
  model = model_m
89
+ elif model_name == "Orsta-32B-0326":
 
 
 
90
  processor = processor_z
91
  model = model_z
 
 
 
92
  else:
93
  yield "Invalid model selected."
94
  return
 
133
  """
134
  Generates responses using the selected model for video input.
135
  """
136
+ if model_name == "Qwen2.5-VL-3B":
137
  processor = processor_m
138
  model = model_m
139
+ elif model_name == "Orsta-32B-0326":
 
 
 
140
  processor = processor_z
141
  model = model_z
 
 
 
142
  else:
143
  yield "Invalid model selected."
144
  return
 
239
  with gr.Column():
240
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
241
  model_choice = gr.Radio(
242
+ choices=["Qwen2.5-VL-3B", "Orsta-32B-0326"],
243
  label="Select Model",
244
+ value="Orsta-32B-0326"
245
  )
246
 
247
  image_submit.click(
 
256
  )
257
 
258
  if __name__ == "__main__":
259
+ demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)