prithivMLmods commited on
Commit
147a2d3
·
verified ·
1 Parent(s): c74513c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -17,6 +17,7 @@ from transformers import (
17
  Qwen2_5_VLForConditionalGeneration,
18
  Qwen2VLForConditionalGeneration,
19
  Gemma3ForConditionalGeneration,
 
20
  AutoProcessor,
21
  TextIteratorStreamer,
22
  )
@@ -48,9 +49,9 @@ model_x = Qwen2VLForConditionalGeneration.from_pretrained(
48
  ).to(device).eval()
49
 
50
  # Load Relaxed
51
- MODEL_ID_Z = "Ertugrul/Qwen2.5-VL-7B-Captioner-Relaxed"
52
  processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
53
- model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
54
  MODEL_ID_Z,
55
  trust_remote_code=True,
56
  torch_dtype=torch.float16
@@ -93,7 +94,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
93
  elif model_name == "coreOCR-7B-050325-preview":
94
  processor = processor_x
95
  model = model_x
96
- elif model_name == "Captioner-7B":
97
  processor = processor_z
98
  model = model_z
99
  else:
@@ -146,7 +147,7 @@ def generate_video(model_name: str, text: str, video_path: str,
146
  elif model_name == "coreOCR-7B-050325-preview":
147
  processor = processor_x
148
  model = model_x
149
- elif model_name == "Captioner-7B":
150
  processor = processor_z
151
  model = model_z
152
  else:
@@ -246,7 +247,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
246
  with gr.Column():
247
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
248
  model_choice = gr.Radio(
249
- choices=["gemma-3-4b-it", "coreOCR-7B-050325-preview", "Captioner-7B"],
250
  label="Select Model",
251
  value="gemma-3-4b-it"
252
  )
 
17
  Qwen2_5_VLForConditionalGeneration,
18
  Qwen2VLForConditionalGeneration,
19
  Gemma3ForConditionalGeneration,
20
+ LlavaForConditionalGeneration,
21
  AutoProcessor,
22
  TextIteratorStreamer,
23
  )
 
49
  ).to(device).eval()
50
 
51
  # Load Relaxed
52
+ MODEL_ID_Z = "fancyfeast/llama-joycaption-beta-one-hf-llava"
53
  processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
54
+ model_z = LlavaForConditionalGeneration.from_pretrained(
55
  MODEL_ID_Z,
56
  trust_remote_code=True,
57
  torch_dtype=torch.float16
 
94
  elif model_name == "coreOCR-7B-050325-preview":
95
  processor = processor_x
96
  model = model_x
97
+ elif model_name == "llama-joycaption":
98
  processor = processor_z
99
  model = model_z
100
  else:
 
147
  elif model_name == "coreOCR-7B-050325-preview":
148
  processor = processor_x
149
  model = model_x
150
+ elif model_name == "llama-joycaption":
151
  processor = processor_z
152
  model = model_z
153
  else:
 
247
  with gr.Column():
248
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
249
  model_choice = gr.Radio(
250
+ choices=["gemma-3-4b-it", "coreOCR-7B-050325-preview", "llama-joycaption"],
251
  label="Select Model",
252
  value="gemma-3-4b-it"
253
  )