Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -31,7 +31,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
31 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
32 |
|
33 |
# Load text-only model and tokenizer
|
34 |
-
model_id = "prithivMLmods/
|
35 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
36 |
model = AutoModelForCausalLM.from_pretrained(
|
37 |
model_id,
|
@@ -40,8 +40,8 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
40 |
)
|
41 |
model.eval()
|
42 |
|
43 |
-
# Load multimodal processor and model
|
44 |
-
MODEL_ID = "prithivMLmods/
|
45 |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
46 |
model_m = Qwen2VLForConditionalGeneration.from_pretrained(
|
47 |
MODEL_ID,
|
@@ -130,7 +130,7 @@ def generate(input_dict: dict, chat_history: list[dict],
|
|
130 |
Generates chatbot responses with support for multimodal input, video processing,
|
131 |
and Edge TTS when using the new tags @JennyNeural or @GuyNeural.
|
132 |
Special command:
|
133 |
-
- "@video-infer": triggers video processing using
|
134 |
"""
|
135 |
text = input_dict["text"]
|
136 |
files = input_dict.get("files", [])
|
@@ -191,7 +191,7 @@ def generate(input_dict: dict, chat_history: list[dict],
|
|
191 |
thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
|
192 |
thread.start()
|
193 |
buffer = ""
|
194 |
-
yield progress_bar_html("Processing video with
|
195 |
for new_text in streamer:
|
196 |
buffer += new_text
|
197 |
buffer = buffer.replace("<|im_end|>", "")
|
@@ -229,7 +229,7 @@ def generate(input_dict: dict, chat_history: list[dict],
|
|
229 |
thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
|
230 |
thread.start()
|
231 |
buffer = ""
|
232 |
-
yield progress_bar_html("Processing image with
|
233 |
for new_text in streamer:
|
234 |
buffer += new_text
|
235 |
buffer = buffer.replace("<|im_end|>", "")
|
@@ -259,7 +259,7 @@ def generate(input_dict: dict, chat_history: list[dict],
|
|
259 |
t = Thread(target=model.generate, kwargs=generation_kwargs)
|
260 |
t.start()
|
261 |
outputs = []
|
262 |
-
yield progress_bar_html("Processing With
|
263 |
for new_text in streamer:
|
264 |
outputs.append(new_text)
|
265 |
yield "".join(outputs)
|
@@ -288,7 +288,7 @@ demo = gr.ChatInterface(
|
|
288 |
[{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}]
|
289 |
],
|
290 |
cache_examples=False,
|
291 |
-
description="# **
|
292 |
type="messages",
|
293 |
fill_height=True,
|
294 |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),
|
|
|
31 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
32 |
|
33 |
# Load text-only model and tokenizer
|
34 |
+
model_id = "prithivMLmods/Galactic-Qwen-14B-Exp2"
|
35 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
36 |
model = AutoModelForCausalLM.from_pretrained(
|
37 |
model_id,
|
|
|
40 |
)
|
41 |
model.eval()
|
42 |
|
43 |
+
# Load multimodal processor and model
|
44 |
+
MODEL_ID = "prithivMLmods/Imgscope-OCR-2B-0527"
|
45 |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
46 |
model_m = Qwen2VLForConditionalGeneration.from_pretrained(
|
47 |
MODEL_ID,
|
|
|
130 |
Generates chatbot responses with support for multimodal input, video processing,
|
131 |
and Edge TTS when using the new tags @JennyNeural or @GuyNeural.
|
132 |
Special command:
|
133 |
+
- "@video-infer": triggers video processing using Imgscope-OCR
|
134 |
"""
|
135 |
text = input_dict["text"]
|
136 |
files = input_dict.get("files", [])
|
|
|
191 |
thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
|
192 |
thread.start()
|
193 |
buffer = ""
|
194 |
+
yield progress_bar_html("Processing video with Imgscope-OCR")
|
195 |
for new_text in streamer:
|
196 |
buffer += new_text
|
197 |
buffer = buffer.replace("<|im_end|>", "")
|
|
|
229 |
thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
|
230 |
thread.start()
|
231 |
buffer = ""
|
232 |
+
yield progress_bar_html("Processing image with Imgscope-OCR")
|
233 |
for new_text in streamer:
|
234 |
buffer += new_text
|
235 |
buffer = buffer.replace("<|im_end|>", "")
|
|
|
259 |
t = Thread(target=model.generate, kwargs=generation_kwargs)
|
260 |
t.start()
|
261 |
outputs = []
|
262 |
+
yield progress_bar_html("Processing With Galactic Qwen")
|
263 |
for new_text in streamer:
|
264 |
outputs.append(new_text)
|
265 |
yield "".join(outputs)
|
|
|
288 |
[{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}]
|
289 |
],
|
290 |
cache_examples=False,
|
291 |
+
description="# **Imgscope-OCR**",
|
292 |
type="messages",
|
293 |
fill_height=True,
|
294 |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),
|