Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -59,7 +59,6 @@ def clean_chat_history(chat_history):
|
|
59 |
return cleaned
|
60 |
|
61 |
# Environment variables and parameters for Stable Diffusion XL
|
62 |
-
# Use : SG161222/RealVisXL_V4.0_Lightning or SG161222/RealVisXL_V5.0_Lightning
|
63 |
MODEL_ID_SD = os.getenv("MODEL_VAL_PATH") # SDXL Model repository path via env variable
|
64 |
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
|
65 |
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
|
@@ -205,7 +204,7 @@ def generate(
|
|
205 |
Generates chatbot responses with support for multimodal input and image generation.
|
206 |
Special commands:
|
207 |
- "@image": triggers image generation using the SDXL pipeline.
|
208 |
-
- "@video
|
209 |
"""
|
210 |
text = input_dict["text"]
|
211 |
files = input_dict.get("files", [])
|
@@ -280,7 +279,11 @@ def generate(
|
|
280 |
yield buffer
|
281 |
return
|
282 |
|
283 |
-
#
|
|
|
|
|
|
|
|
|
284 |
if files:
|
285 |
if len(files) > 1:
|
286 |
images = [load_image(image) for image in files]
|
@@ -309,8 +312,6 @@ def generate(
|
|
309 |
time.sleep(0.01)
|
310 |
yield buffer
|
311 |
else:
|
312 |
-
conversation = clean_chat_history(chat_history)
|
313 |
-
conversation.append({"role": "user", "content": text})
|
314 |
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
315 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
316 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
@@ -358,7 +359,7 @@ demo = gr.ChatInterface(
|
|
358 |
],
|
359 |
cache_examples=False,
|
360 |
type="messages",
|
361 |
-
description="# **
|
362 |
fill_height=True,
|
363 |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder=" @image for image gen, @video-infer for video, default [text, vision]"),
|
364 |
stop_btn="Stop Generation",
|
|
|
59 |
return cleaned
|
60 |
|
61 |
# Environment variables and parameters for Stable Diffusion XL
|
|
|
62 |
MODEL_ID_SD = os.getenv("MODEL_VAL_PATH") # SDXL Model repository path via env variable
|
63 |
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
|
64 |
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
|
|
|
204 |
Generates chatbot responses with support for multimodal input and image generation.
|
205 |
Special commands:
|
206 |
- "@image": triggers image generation using the SDXL pipeline.
|
207 |
+
- "@qwen2vl-video": triggers video processing using Qwen2VL.
|
208 |
"""
|
209 |
text = input_dict["text"]
|
210 |
files = input_dict.get("files", [])
|
|
|
279 |
yield buffer
|
280 |
return
|
281 |
|
282 |
+
# For regular chat (text and multimodal input), process the conversation.
|
283 |
+
text = text.strip()
|
284 |
+
conversation = clean_chat_history(chat_history)
|
285 |
+
conversation.append({"role": "user", "content": text})
|
286 |
+
|
287 |
if files:
|
288 |
if len(files) > 1:
|
289 |
images = [load_image(image) for image in files]
|
|
|
312 |
time.sleep(0.01)
|
313 |
yield buffer
|
314 |
else:
|
|
|
|
|
315 |
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
316 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
317 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
|
|
359 |
],
|
360 |
cache_examples=False,
|
361 |
type="messages",
|
362 |
+
description="# **QwQ Edge @video-infer 'prompt..', @image**",
|
363 |
fill_height=True,
|
364 |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder=" @image for image gen, @video-infer for video, default [text, vision]"),
|
365 |
stop_btn="Stop Generation",
|