prithivMLmods commited on
Commit
26bd89a
·
verified ·
1 Parent(s): b98a70c

Upload 13 files

Browse files
Files changed (14) hide show
  1. .gitattributes +3 -0
  2. app.py +368 -0
  3. assets/1.png +3 -0
  4. assets/2.png +3 -0
  5. assets/3.png +3 -0
  6. assets/4.png +3 -0
  7. assets/5.png +3 -0
  8. assets/6.png +3 -0
  9. assets/7.png +3 -0
  10. assets/8.png +3 -0
  11. assets/9.png +3 -0
  12. assets/GenVis.gif +3 -0
  13. assets/genv.png +3 -0
  14. requirements.txt +24 -24
.gitattributes CHANGED
@@ -45,3 +45,6 @@ assets/8.png filter=lfs diff=lfs merge=lfs -text
45
  assets/9.png filter=lfs diff=lfs merge=lfs -text
46
  cc.gif filter=lfs diff=lfs merge=lfs -text
47
  examples/1.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
45
  assets/9.png filter=lfs diff=lfs merge=lfs -text
46
  cc.gif filter=lfs diff=lfs merge=lfs -text
47
  examples/1.png filter=lfs diff=lfs merge=lfs -text
48
+ assets/6.png filter=lfs diff=lfs merge=lfs -text
49
+ assets/genv.png filter=lfs diff=lfs merge=lfs -text
50
+ assets/GenVis.gif filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import uuid
4
+ import json
5
+ import time
6
+ import asyncio
7
+ import re
8
+ from threading import Thread
9
+
10
+ import gradio as gr
11
+ import spaces
12
+ import torch
13
+ import numpy as np
14
+ from PIL import Image
15
+ import edge_tts
16
+
17
+ from transformers import (
18
+ AutoModelForCausalLM,
19
+ AutoTokenizer,
20
+ TextIteratorStreamer,
21
+ Qwen2VLForConditionalGeneration,
22
+ AutoProcessor,
23
+ )
24
+ from transformers.image_utils import load_image
25
+ from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
26
+
27
+ DESCRIPTION = """
28
+ # Gen Vision 🎃
29
+ """
30
+
31
+ css = '''
32
+ h1 {
33
+ text-align: center;
34
+ display: block;
35
+ }
36
+
37
+ #duplicate-button {
38
+ margin: auto;
39
+ color: #fff;
40
+ background: #1565c0;
41
+ border-radius: 100vh;
42
+ }
43
+ '''
44
+
45
+ MAX_MAX_NEW_TOKENS = 2048
46
+ DEFAULT_MAX_NEW_TOKENS = 1024
47
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
48
+
49
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
50
+
51
+ # -----------------------
52
+ # Progress Bar Helper
53
+ # -----------------------
54
+ def progress_bar_html(label: str) -> str:
55
+ """
56
+ Returns an HTML snippet for a thin progress bar with a label.
57
+ The progress bar is styled as a dark red animated bar.
58
+ """
59
+ return f'''
60
+ <div style="display: flex; align-items: center;">
61
+ <span style="margin-right: 10px; font-size: 14px;">{label}</span>
62
+ <div style="width: 110px; height: 5px; background-color: #DDA0DD; border-radius: 2px; overflow: hidden;">
63
+ <div style="width: 100%; height: 100%; background-color: #FF00FF; animation: loading 1.5s linear infinite;"></div>
64
+ </div>
65
+ </div>
66
+ <style>
67
+ @keyframes loading {{
68
+ 0% {{ transform: translateX(-100%); }}
69
+ 100% {{ transform: translateX(100%); }}
70
+ }}
71
+ </style>
72
+ '''
73
+
74
+ # -----------------------
75
+ # Text Generation Setup
76
+ # -----------------------
77
+ model_id = "prithivMLmods/FastThink-0.5B-Tiny"
78
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
79
+ model = AutoModelForCausalLM.from_pretrained(
80
+ model_id,
81
+ device_map="auto",
82
+ torch_dtype=torch.bfloat16,
83
+ )
84
+ model.eval()
85
+
86
+ TTS_VOICES = [
87
+ "en-US-JennyNeural", # @tts1
88
+ "en-US-GuyNeural", # @tts2
89
+ ]
90
+
91
+ # -----------------------
92
+ # Multimodal OCR Setup
93
+ # -----------------------
94
+ MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
95
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
96
+ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
97
+ MODEL_ID,
98
+ trust_remote_code=True,
99
+ torch_dtype=torch.float16
100
+ ).to("cuda").eval()
101
+
102
+ async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
103
+ """Convert text to speech using Edge TTS and save as MP3"""
104
+ communicate = edge_tts.Communicate(text, voice)
105
+ await communicate.save(output_file)
106
+ return output_file
107
+
108
+ def clean_chat_history(chat_history):
109
+ """
110
+ Filter out any chat entries whose "content" is not a string.
111
+ """
112
+ cleaned = []
113
+ for msg in chat_history:
114
+ if isinstance(msg, dict) and isinstance(msg.get("content"), str):
115
+ cleaned.append(msg)
116
+ return cleaned
117
+
118
+ # -----------------------
119
+ # Stable Diffusion Image Generation Setup
120
+ # -----------------------
121
+
122
+ MAX_SEED = np.iinfo(np.int32).max
123
+ USE_TORCH_COMPILE = False
124
+ ENABLE_CPU_OFFLOAD = False
125
+
126
+ if torch.cuda.is_available():
127
+ pipe = StableDiffusionXLPipeline.from_pretrained(
128
+ "SG161222/RealVisXL_V4.0_Lightning",
129
+ torch_dtype=torch.float16,
130
+ use_safetensors=True,
131
+ )
132
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
133
+
134
+ # LoRA options with one example for each.
135
+ LORA_OPTIONS = {
136
+ "Realism": ("prithivMLmods/Canopus-Realism-LoRA", "Canopus-Realism-LoRA.safetensors", "rlms"),
137
+ "Pixar": ("prithivMLmods/Canopus-Pixar-Art", "Canopus-Pixar-Art.safetensors", "pixar"),
138
+ "Photoshoot": ("prithivMLmods/Canopus-Photo-Shoot-Mini-LoRA", "Canopus-Photo-Shoot-Mini-LoRA.safetensors", "photo"),
139
+ "Clothing": ("prithivMLmods/Canopus-Clothing-Adp-LoRA", "Canopus-Dress-Clothing-LoRA.safetensors", "clth"),
140
+ "Interior": ("prithivMLmods/Canopus-Interior-Architecture-0.1", "Canopus-Interior-Architecture-0.1δ.safetensors", "arch"),
141
+ "Fashion": ("prithivMLmods/Canopus-Fashion-Product-Dilation", "Canopus-Fashion-Product-Dilation.safetensors", "fashion"),
142
+ "Minimalistic": ("prithivMLmods/Pegasi-Minimalist-Image-Style", "Pegasi-Minimalist-Image-Style.safetensors", "minimalist"),
143
+ "Modern": ("prithivMLmods/Canopus-Modern-Clothing-Design", "Canopus-Modern-Clothing-Design.safetensors", "mdrnclth"),
144
+ "Animaliea": ("prithivMLmods/Canopus-Animaliea-Artism", "Canopus-Animaliea-Artism.safetensors", "Animaliea"),
145
+ "Wallpaper": ("prithivMLmods/Canopus-Liquid-Wallpaper-Art", "Canopus-Liquid-Wallpaper-Minimalize-LoRA.safetensors", "liquid"),
146
+ "Cars": ("prithivMLmods/Canes-Cars-Model-LoRA", "Canes-Cars-Model-LoRA.safetensors", "car"),
147
+ "PencilArt": ("prithivMLmods/Canopus-Pencil-Art-LoRA", "Canopus-Pencil-Art-LoRA.safetensors", "Pencil Art"),
148
+ "ArtMinimalistic": ("prithivMLmods/Canopus-Art-Medium-LoRA", "Canopus-Art-Medium-LoRA.safetensors", "mdm"),
149
+ }
150
+
151
+ # Load all LoRA weights
152
+ for model_name, weight_name, adapter_name in LORA_OPTIONS.values():
153
+ pipe.load_lora_weights(model_name, weight_name=weight_name, adapter_name=adapter_name)
154
+ pipe.to("cuda")
155
+ else:
156
+ pipe = StableDiffusionXLPipeline.from_pretrained(
157
+ "SG161222/RealVisXL_V4.0_Lightning",
158
+ torch_dtype=torch.float32,
159
+ use_safetensors=True,
160
+ ).to(device)
161
+
162
+ def save_image(img: Image.Image) -> str:
163
+ """Save a PIL image with a unique filename and return the path."""
164
+ unique_name = str(uuid.uuid4()) + ".png"
165
+ img.save(unique_name)
166
+ return unique_name
167
+
168
+ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
169
+ if randomize_seed:
170
+ seed = random.randint(0, MAX_SEED)
171
+ return seed
172
+
173
+ @spaces.GPU(duration=180, enable_queue=True)
174
+ def generate_image(
175
+ prompt: str,
176
+ negative_prompt: str = "",
177
+ seed: int = 0,
178
+ width: int = 1024,
179
+ height: int = 1024,
180
+ guidance_scale: float = 3.0,
181
+ randomize_seed: bool = True,
182
+ lora_model: str = "Realism",
183
+ progress=gr.Progress(track_tqdm=True),
184
+ ):
185
+ seed = int(randomize_seed_fn(seed, randomize_seed))
186
+ effective_negative_prompt = negative_prompt # Use provided negative prompt if any
187
+ model_name, weight_name, adapter_name = LORA_OPTIONS[lora_model]
188
+ pipe.set_adapters(adapter_name)
189
+ outputs = pipe(
190
+ prompt=prompt,
191
+ negative_prompt=effective_negative_prompt,
192
+ width=width,
193
+ height=height,
194
+ guidance_scale=guidance_scale,
195
+ num_inference_steps=28,
196
+ num_images_per_prompt=1,
197
+ cross_attention_kwargs={"scale": 0.65},
198
+ output_type="pil",
199
+ )
200
+ images = outputs.images
201
+ image_paths = [save_image(img) for img in images]
202
+ return image_paths, seed
203
+
204
+ # -----------------------
205
+ # Main Chat/Generation Function
206
+ # -----------------------
207
+ @spaces.GPU
208
+ def generate(
209
+ input_dict: dict,
210
+ chat_history: list[dict],
211
+ max_new_tokens: int = 1024,
212
+ temperature: float = 0.6,
213
+ top_p: float = 0.9,
214
+ top_k: int = 50,
215
+ repetition_penalty: float = 1.2,
216
+ ):
217
+ """
218
+ Generates chatbot responses with support for multimodal input, TTS, and image generation.
219
+ Special commands:
220
+ - "@tts1" or "@tts2": triggers text-to-speech.
221
+ - "@<lora_command>": triggers image generation using the new LoRA pipeline.
222
+ Available commands (case-insensitive): @realism, @pixar, @photoshoot, @clothing, @interior, @fashion,
223
+ @minimalistic, @modern, @animaliea, @wallpaper, @cars, @pencilart, @artminimalistic.
224
+ """
225
+ text = input_dict["text"]
226
+ files = input_dict.get("files", [])
227
+
228
+ # Check for image generation command based on LoRA tags.
229
+ lora_mapping = { key.lower(): key for key in LORA_OPTIONS }
230
+ for key_lower, key in lora_mapping.items():
231
+ command_tag = "@" + key_lower
232
+ if text.strip().lower().startswith(command_tag):
233
+ prompt_text = text.strip()[len(command_tag):].strip()
234
+ yield progress_bar_html(f"Processing Image Generation ({key} style)")
235
+ image_paths, used_seed = generate_image(
236
+ prompt=prompt_text,
237
+ negative_prompt="",
238
+ seed=1,
239
+ width=1024,
240
+ height=1024,
241
+ guidance_scale=3,
242
+ randomize_seed=True,
243
+ lora_model=key,
244
+ )
245
+ yield progress_bar_html("Finalizing Image Generation")
246
+ yield gr.Image(image_paths[0])
247
+ return
248
+
249
+ # Check for TTS command (@tts1 or @tts2)
250
+ tts_prefix = "@tts"
251
+ is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
252
+ voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
253
+
254
+ if is_tts and voice_index:
255
+ voice = TTS_VOICES[voice_index - 1]
256
+ text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
257
+ conversation = [{"role": "user", "content": text}]
258
+ else:
259
+ voice = None
260
+ text = text.replace(tts_prefix, "").strip()
261
+ conversation = clean_chat_history(chat_history)
262
+ conversation.append({"role": "user", "content": text})
263
+
264
+ if files:
265
+ if len(files) > 1:
266
+ images = [load_image(image) for image in files]
267
+ elif len(files) == 1:
268
+ images = [load_image(files[0])]
269
+ else:
270
+ images = []
271
+ messages = [{
272
+ "role": "user",
273
+ "content": [
274
+ *[{"type": "image", "image": image} for image in images],
275
+ {"type": "text", "text": text},
276
+ ]
277
+ }]
278
+ prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
279
+ inputs = processor(text=[prompt], images=images, return_tensors="pt", padding=True).to("cuda")
280
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
281
+ generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
282
+ thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
283
+ thread.start()
284
+
285
+ buffer = ""
286
+ yield progress_bar_html("Processing with Qwen2VL Ocr")
287
+ for new_text in streamer:
288
+ buffer += new_text
289
+ buffer = buffer.replace("<|im_end|>", "")
290
+ time.sleep(0.01)
291
+ yield buffer
292
+ else:
293
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
294
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
295
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
296
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
297
+ input_ids = input_ids.to(model.device)
298
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
299
+ generation_kwargs = {
300
+ "input_ids": input_ids,
301
+ "streamer": streamer,
302
+ "max_new_tokens": max_new_tokens,
303
+ "do_sample": True,
304
+ "top_p": top_p,
305
+ "top_k": top_k,
306
+ "temperature": temperature,
307
+ "num_beams": 1,
308
+ "repetition_penalty": repetition_penalty,
309
+ }
310
+ t = Thread(target=model.generate, kwargs=generation_kwargs)
311
+ t.start()
312
+
313
+ outputs = []
314
+ for new_text in streamer:
315
+ outputs.append(new_text)
316
+ yield "".join(outputs)
317
+
318
+ final_response = "".join(outputs)
319
+ yield final_response
320
+
321
+ if is_tts and voice:
322
+ output_file = asyncio.run(text_to_speech(final_response, voice))
323
+ yield gr.Audio(output_file, autoplay=True)
324
+
325
+ # -----------------------
326
+ # Gradio Chat Interface
327
+ # -----------------------
328
+ demo = gr.ChatInterface(
329
+ fn=generate,
330
+ additional_inputs=[
331
+ gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS),
332
+ gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6),
333
+ gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
334
+ gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50),
335
+ gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
336
+ ],
337
+ examples=[
338
+ ['@realism Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic'],
339
+ ["@pixar A young man with light brown wavy hair and light brown eyes sitting in an armchair and looking directly at the camera, pixar style, disney pixar, office background, ultra detailed, 1 man"],
340
+ ["@realism A futuristic cityscape with neon lights"],
341
+ ["@photoshoot A portrait of a person with dramatic lighting"],
342
+ [{"text": "summarize the letter", "files": ["examples/1.png"]}],
343
+ ["Python Program for Array Rotation"],
344
+ ["@tts1 Who is Nikola Tesla, and why did he die?"],
345
+ ["@clothing Fashionable streetwear in an urban environment"],
346
+ ["@interior A modern living room interior with minimalist design"],
347
+ ["@fashion A runway model in haute couture"],
348
+ ["@minimalistic A simple and elegant design of a serene landscape"],
349
+ ["@modern A contemporary art piece with abstract geometric shapes"],
350
+ ["@animaliea A cute animal portrait with vibrant colors"],
351
+ ["@wallpaper A scenic mountain range perfect for a desktop wallpaper"],
352
+ ["@cars A sleek sports car cruising on a city street"],
353
+ ["@pencilart A detailed pencil sketch of a historic building"],
354
+ ["@artminimalistic An artistic minimalist composition with subtle tones"],
355
+ ["@tts2 What causes rainbows to form?"],
356
+ ],
357
+ cache_examples=False,
358
+ type="messages",
359
+ description=DESCRIPTION,
360
+ css=css,
361
+ fill_height=True,
362
+ textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="default [text, vision] , scroll down examples to explore more art styles"),
363
+ stop_btn="Stop Generation",
364
+ multimodal=True,
365
+ )
366
+
367
+ if __name__ == "__main__":
368
+ demo.queue(max_size=20).launch(share=True)
assets/1.png ADDED

Git LFS Details

  • SHA256: 3a270dd91945bc510cba4070ced43f281e49f769df57f8af095ccfa78b825f67
  • Pointer size: 131 Bytes
  • Size of remote file: 706 kB
assets/2.png ADDED

Git LFS Details

  • SHA256: 6d11b034814e7f4046b0d4564ca985d84b01d17228cacf76c81c69118d64e1cf
  • Pointer size: 131 Bytes
  • Size of remote file: 816 kB
assets/3.png ADDED

Git LFS Details

  • SHA256: a996e388227cd408d9cc1a062cb4ebf3f63e832341da747ef3cc202119216057
  • Pointer size: 131 Bytes
  • Size of remote file: 751 kB
assets/4.png ADDED

Git LFS Details

  • SHA256: 3f82c80fbb4a4246536477b144252ef9e4c789627e300fe36546e76878fe6851
  • Pointer size: 131 Bytes
  • Size of remote file: 706 kB
assets/5.png ADDED

Git LFS Details

  • SHA256: 817d3ef879fd7c715f504f9a288c784918a6623830b6fb898d64439c23cc251d
  • Pointer size: 131 Bytes
  • Size of remote file: 763 kB
assets/6.png ADDED

Git LFS Details

  • SHA256: 5cf54fa33f979e539a1f9f3ada76c1610bb488d1520576057ee906f75ca045d7
  • Pointer size: 131 Bytes
  • Size of remote file: 556 kB
assets/7.png ADDED

Git LFS Details

  • SHA256: 31f61f40ced81994aa35d87565d0ee1147ff5cf6fbd352b6b7b79efee4d76ee6
  • Pointer size: 131 Bytes
  • Size of remote file: 635 kB
assets/8.png ADDED

Git LFS Details

  • SHA256: cc12eda4d532e2ffda38c0da18a9326b0090d465525834416d53bf96cb43b7e5
  • Pointer size: 131 Bytes
  • Size of remote file: 749 kB
assets/9.png ADDED

Git LFS Details

  • SHA256: 28775fdf8badf2545020e23e36c2aa051a9371534efda6cdd494533898307f0a
  • Pointer size: 131 Bytes
  • Size of remote file: 872 kB
assets/GenVis.gif ADDED

Git LFS Details

  • SHA256: 45857b740442a2856cbddbd1256c2f4ad93941f1467fa32c6326f7ad5d6e0d1c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.13 MB
assets/genv.png ADDED

Git LFS Details

  • SHA256: 7f962e058c5d54b31814536bbb38e4aca672bc2bf15c299086ab3f8a455e7ed7
  • Pointer size: 131 Bytes
  • Size of remote file: 675 kB
requirements.txt CHANGED
@@ -1,24 +1,24 @@
1
- torch==2.4.0
2
- torchvision==0.19.0
3
- transformers-stream-generator==0.0.4
4
- gradio_client==1.3.0
5
- diffusers
6
- accelerate
7
- ultralytics
8
- peft
9
- huggingface_hub
10
- git+https://github.com/huggingface/transformers.git
11
- sentencepiece
12
- pandas
13
- requests
14
- scipy
15
- asyncio
16
- spaces
17
- safetensors
18
- librosa
19
- pydub
20
- ffmpeg-python
21
- av
22
- audiosegment
23
- edge-tts
24
- qwen-vl-utils==0.0.2
 
1
+ torch==2.4.0
2
+ torchvision==0.19.0
3
+ transformers-stream-generator==0.0.4
4
+ gradio_client==1.3.0
5
+ diffusers
6
+ accelerate
7
+ ultralytics
8
+ peft
9
+ huggingface_hub
10
+ git+https://github.com/huggingface/transformers.git
11
+ sentencepiece
12
+ pandas
13
+ requests
14
+ scipy
15
+ asyncio
16
+ spaces
17
+ safetensors
18
+ librosa
19
+ pydub
20
+ ffmpeg-python
21
+ av
22
+ audiosegment
23
+ edge-tts
24
+ qwen-vl-utils==0.0.2