Spaces:

prithivMLmods
/

Pocket-Callisto

Running on Zero

App Files Files Community

prithivMLmods commited on 9 days ago

Commit

7d47057

verified ·

1 Parent(s): 0aa3c52

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -26

app.py CHANGED Viewed

@@ -49,21 +49,14 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to("cuda").eval()
-# Expanded Edge TTS voices mapping for new tags.
 TTS_VOICE_MAP = {
     "@jennyneural": "en-US-JennyNeural",
     "@guyneural": "en-US-GuyNeural",
-    "@adrineural": "af-ZA-AdriNeural",
-    "@willemneural": "af-ZA-WillemNeural",
-    "@amehaneural": "am-ET-AmehaNeural",
-    "@mekdesneural": "am-ET-MekdesNeural",
-    "@fatimaneural": "ar-AE-FatimaNeural",
-    "@hamdanneural": "ar-AE-HamdanNeural",
-    "@alineural": "ar-BH-AliNeural",
-    "@lailaneural": "ar-BH-LailaNeural",
-    "@aminaneural": "ar-DZ-AminaNeural",
-    "@ismaelneural": "ar-DZ-IsmaelNeural",
-    "@salmaneural": "ar-EG-SalmaNeural",
 }
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
@@ -136,7 +129,7 @@ def generate(input_dict: dict, chat_history: list[dict],
              repetition_penalty: float = 1.2):
     """
     Generates chatbot responses with support for multimodal input, video processing,
-    and Edge TTS when using the new tags for voices.
     Special command:
       - "@video-infer": triggers video processing using Callisto OCR3.
     """
@@ -292,24 +285,16 @@ demo = gr.ChatInterface(
     examples=[
         ["Write the code that converts temperatures between Celsius and Fahrenheit in short"],
         [{"text": "Create a short story based on the image.", "files": ["examples/1.jpg"]}],
         [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
         [{"text": "@video-infer Describe the Ad", "files": ["examples/coca.mp4"]}],
         ["@JennyNeural Who was Nikola Tesla and what were his contributions?"],
-        ["@GuyNeural Explain how rainbows are formed."],
-        ["@AdriNeural Provide a brief overview of South African wildlife."],
-        ["@WillemNeural Tell me a fun fact about astronomy."],
-        ["@AmehaNeural What are the main features of Ethiopian culture?"],
-        ["@MekdesNeural Share a short story about innovation."],
-        ["@FatimaNeural Explain the importance of renewable energy."],
-        ["@HamdanNeural Describe the evolution of modern technology."],
-        ["@AliNeural What causes thunderstorms?"],
-        ["@LailaNeural Describe the process of photosynthesis."],
-        ["@AminaNeural Summarize the history of North Africa."],
-        ["@IsmaelNeural What are the benefits of meditation?"],
-        ["@SalmaNeural Tell me about the influence of ancient Egyptian culture."]
     ],
     cache_examples=False,
-    description="# **Pocket Llama with Expanded Edge TTS**\n\nUse one of the TTS tags at the beginning of your query (e.g., **@JennyNeural**, **@GuyNeural**, **@AdriNeural**, etc.) to trigger text-to-speech output.",
     type="messages",
     fill_height=True,
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),

     torch_dtype=torch.float16
 ).to("cuda").eval()
+# Extended Edge TTS voices mapping for new tags.
+# Use any of these tags at the start of your prompt to trigger TTS.
 TTS_VOICE_MAP = {
     "@jennyneural": "en-US-JennyNeural",
     "@guyneural": "en-US-GuyNeural",
+    "@arianeural": "en-US-AriaNeural",
+    "@michaelneural": "en-US-MichaelNeural",
+    "@olivianeural": "en-US-OliviaNeural",
 }
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
              repetition_penalty: float = 1.2):
     """
     Generates chatbot responses with support for multimodal input, video processing,
+    and Edge TTS when using the new tags for TTS.
     Special command:
       - "@video-infer": triggers video processing using Callisto OCR3.
     """
     examples=[
         ["Write the code that converts temperatures between Celsius and Fahrenheit in short"],
         [{"text": "Create a short story based on the image.", "files": ["examples/1.jpg"]}],
+        ["@GuyNeural Explain how rainbows are formed."],
         [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
         [{"text": "@video-infer Describe the Ad", "files": ["examples/coca.mp4"]}],
         ["@JennyNeural Who was Nikola Tesla and what were his contributions?"],
+        ["@AriaNeural Provide an overview of the solar system."],
+        ["@MichaelNeural Summarize the benefits of a healthy lifestyle."],
+        ["@OliviaNeural Tell me a joke."]
     ],
     cache_examples=False,
+    description="# **Pocket Llama**",
     type="messages",
     fill_height=True,
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),