prithivMLmods commited on
Commit
eb4ae3c
·
verified ·
1 Parent(s): 271c4da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -29,6 +29,15 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
29
 
30
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
31
 
 
 
 
 
 
 
 
 
 
32
  # Load SkyCaptioner-V1
33
  MODEL_ID_M = "Skywork/SkyCaptioner-V1"
34
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
@@ -65,7 +74,7 @@ model_y = Qwen2_5_VLForConditionalGeneration.from_pretrained(
65
  torch_dtype=torch.float16
66
  ).to(device).eval()
67
 
68
-
69
  def downsample_video(video_path):
70
  """
71
  Downsamples the video to evenly spaced frames.
@@ -100,6 +109,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
100
  if model_name == "SkyCaptioner-V1":
101
  processor = processor_m
102
  model = model_m
 
 
 
103
  elif model_name == "SpaceThinker-3B":
104
  processor = processor_z
105
  model = model_z
@@ -157,6 +169,9 @@ def generate_video(model_name: str, text: str, video_path: str,
157
  if model_name == "SkyCaptioner-V1":
158
  processor = processor_m
159
  model = model_m
 
 
 
160
  elif model_name == "SpaceThinker-3B":
161
  processor = processor_z
162
  model = model_z
@@ -267,7 +282,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
267
  with gr.Column():
268
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
269
  model_choice = gr.Radio(
270
- choices=["SkyCaptioner-V1", "SpaceThinker-3B", "coreOCR-7B-050325-preview", "SpaceOm-3B"],
271
  label="Select Model",
272
  value="SkyCaptioner-V1"
273
  )
 
29
 
30
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
31
 
32
+ # Load Behemoth-3B-070225-post0.1
33
+ MODEL_ID_N = "prithivMLmods/Behemoth-3B-070225-post0.1"
34
+ processor_n = AutoProcessor.from_pretrained(MODEL_ID_N, trust_remote_code=True)
35
+ model_n = Qwen2_5_VLForConditionalGeneration.from_pretrained(
36
+ MODEL_ID_N,
37
+ trust_remote_code=True,
38
+ torch_dtype=torch.float16
39
+ ).to(device).eval()
40
+
41
  # Load SkyCaptioner-V1
42
  MODEL_ID_M = "Skywork/SkyCaptioner-V1"
43
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 
74
  torch_dtype=torch.float16
75
  ).to(device).eval()
76
 
77
+ #video sampling
78
  def downsample_video(video_path):
79
  """
80
  Downsamples the video to evenly spaced frames.
 
109
  if model_name == "SkyCaptioner-V1":
110
  processor = processor_m
111
  model = model_m
112
+ elif model_name == "Behemoth-3B-070225-post0.1":
113
+ processor = processor_n
114
+ model = model_n
115
  elif model_name == "SpaceThinker-3B":
116
  processor = processor_z
117
  model = model_z
 
169
  if model_name == "SkyCaptioner-V1":
170
  processor = processor_m
171
  model = model_m
172
+ elif model_name == "Behemoth-3B-070225-post0.1":
173
+ processor = processor_n
174
+ model = model_n
175
  elif model_name == "SpaceThinker-3B":
176
  processor = processor_z
177
  model = model_z
 
282
  with gr.Column():
283
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
284
  model_choice = gr.Radio(
285
+ choices=["SkyCaptioner-V1", "Behemoth-3B-070225-post0.1", "SpaceThinker-3B", "coreOCR-7B-050325-preview", "SpaceOm-3B"],
286
  label="Select Model",
287
  value="SkyCaptioner-V1"
288
  )