prithivMLmods commited on
Commit
3af868a
·
verified ·
1 Parent(s): 7d47057

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -11
app.py CHANGED
@@ -49,14 +49,10 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
49
  torch_dtype=torch.float16
50
  ).to("cuda").eval()
51
 
52
- # Extended Edge TTS voices mapping for new tags.
53
- # Use any of these tags at the start of your prompt to trigger TTS.
54
  TTS_VOICE_MAP = {
55
  "@jennyneural": "en-US-JennyNeural",
56
  "@guyneural": "en-US-GuyNeural",
57
- "@arianeural": "en-US-AriaNeural",
58
- "@michaelneural": "en-US-MichaelNeural",
59
- "@olivianeural": "en-US-OliviaNeural",
60
  }
61
 
62
  async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
@@ -129,7 +125,7 @@ def generate(input_dict: dict, chat_history: list[dict],
129
  repetition_penalty: float = 1.2):
130
  """
131
  Generates chatbot responses with support for multimodal input, video processing,
132
- and Edge TTS when using the new tags for TTS.
133
  Special command:
134
  - "@video-infer": triggers video processing using Callisto OCR3.
135
  """
@@ -285,13 +281,10 @@ demo = gr.ChatInterface(
285
  examples=[
286
  ["Write the code that converts temperatures between Celsius and Fahrenheit in short"],
287
  [{"text": "Create a short story based on the image.", "files": ["examples/1.jpg"]}],
288
- ["@GuyNeural Explain how rainbows are formed."],
289
  [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
290
  [{"text": "@video-infer Describe the Ad", "files": ["examples/coca.mp4"]}],
291
- ["@JennyNeural Who was Nikola Tesla and what were his contributions?"],
292
- ["@AriaNeural Provide an overview of the solar system."],
293
- ["@MichaelNeural Summarize the benefits of a healthy lifestyle."],
294
- ["@OliviaNeural Tell me a joke."]
295
  ],
296
  cache_examples=False,
297
  description="# **Pocket Llama**",
 
49
  torch_dtype=torch.float16
50
  ).to("cuda").eval()
51
 
52
+ # Edge TTS voices mapping for new tags.
 
53
  TTS_VOICE_MAP = {
54
  "@jennyneural": "en-US-JennyNeural",
55
  "@guyneural": "en-US-GuyNeural",
 
 
 
56
  }
57
 
58
  async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
 
125
  repetition_penalty: float = 1.2):
126
  """
127
  Generates chatbot responses with support for multimodal input, video processing,
128
+ and Edge TTS when using the new tags @JennyNeural or @GuyNeural.
129
  Special command:
130
  - "@video-infer": triggers video processing using Callisto OCR3.
131
  """
 
281
  examples=[
282
  ["Write the code that converts temperatures between Celsius and Fahrenheit in short"],
283
  [{"text": "Create a short story based on the image.", "files": ["examples/1.jpg"]}],
284
+ ["@JennyNeural Who was Nikola Tesla and what were his contributions?"],
285
  [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
286
  [{"text": "@video-infer Describe the Ad", "files": ["examples/coca.mp4"]}],
287
+ ["@GuyNeural Explain how rainbows are formed."]
 
 
 
288
  ],
289
  cache_examples=False,
290
  description="# **Pocket Llama**",