prithivMLmods commited on
Commit
73e9439
·
verified ·
1 Parent(s): f33a64a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -16,6 +16,7 @@ import cv2
16
 
17
  from transformers import (
18
  Qwen2_5_VLForConditionalGeneration,
 
19
  AutoProcessor,
20
  AutoTokenizer,
21
  TextIteratorStreamer,
@@ -51,12 +52,22 @@ model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
51
  # Load coreOCR-7B-050325-preview
52
  MODEL_ID_K = "prithivMLmods/coreOCR-7B-050325-preview"
53
  processor_k = AutoProcessor.from_pretrained(MODEL_ID_K, trust_remote_code=True)
54
- model_k = Qwen2_5_VLForConditionalGeneration.from_pretrained(
55
  MODEL_ID_K,
56
  trust_remote_code=True,
57
  torch_dtype=torch.float16
58
  ).to(device).eval()
59
 
 
 
 
 
 
 
 
 
 
 
60
  def downsample_video(video_path):
61
  """
62
  Downsamples the video to evenly spaced frames.
@@ -97,6 +108,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
97
  elif model_name == "coreOCR-7B-050325-preview":
98
  processor = processor_k
99
  model = model_k
 
 
 
100
  else:
101
  yield "Invalid model selected."
102
  return
@@ -150,6 +164,9 @@ def generate_video(model_name: str, text: str, video_path: str,
150
  elif model_name == "coreOCR-7B-050325-preview":
151
  processor = processor_k
152
  model = model_k
 
 
 
153
  else:
154
  yield "Invalid model selected."
155
  return
@@ -250,7 +267,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
250
  with gr.Column():
251
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
252
  model_choice = gr.Radio(
253
- choices=["SkyCaptioner-V1", "SpaceThinker-3B", "coreOCR-7B-050325-preview"],
254
  label="Select Model",
255
  value="SkyCaptioner-V1"
256
  )
 
16
 
17
  from transformers import (
18
  Qwen2_5_VLForConditionalGeneration,
19
+ Qwen2VLForConditionalGeneration,
20
  AutoProcessor,
21
  AutoTokenizer,
22
  TextIteratorStreamer,
 
52
  # Load coreOCR-7B-050325-preview
53
  MODEL_ID_K = "prithivMLmods/coreOCR-7B-050325-preview"
54
  processor_k = AutoProcessor.from_pretrained(MODEL_ID_K, trust_remote_code=True)
55
+ model_k = Qwen2VLForConditionalGeneration.from_pretrained(
56
  MODEL_ID_K,
57
  trust_remote_code=True,
58
  torch_dtype=torch.float16
59
  ).to(device).eval()
60
 
61
+ # Load Imgscope-OCR-2B-0527
62
+ MODEL_ID_Y = "prithivMLmods/Imgscope-OCR-2B-0527"
63
+ processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
64
+ model_y = Qwen2VLForConditionalGeneration.from_pretrained(
65
+ MODEL_ID_Y,
66
+ trust_remote_code=True,
67
+ torch_dtype=torch.float16
68
+ ).to(device).eval()
69
+
70
+
71
  def downsample_video(video_path):
72
  """
73
  Downsamples the video to evenly spaced frames.
 
108
  elif model_name == "coreOCR-7B-050325-preview":
109
  processor = processor_k
110
  model = model_k
111
+ elif model_name == "Imgscope-OCR-2B-0527":
112
+ processor = processor_y
113
+ model = model_y
114
  else:
115
  yield "Invalid model selected."
116
  return
 
164
  elif model_name == "coreOCR-7B-050325-preview":
165
  processor = processor_k
166
  model = model_k
167
+ elif model_name == "Imgscope-OCR-2B-0527":
168
+ processor = processor_y
169
+ model = model_y
170
  else:
171
  yield "Invalid model selected."
172
  return
 
267
  with gr.Column():
268
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
269
  model_choice = gr.Radio(
270
+ choices=["SkyCaptioner-V1", "SpaceThinker-3B", "coreOCR-7B-050325-preview", "Imgscope-OCR-2B-0527"],
271
  label="Select Model",
272
  value="SkyCaptioner-V1"
273
  )