Spaces:

prithivMLmods
/

Imgscope-OCR-Mini

Running on Zero

App Files Files Community

prithivMLmods commited on Apr 3

Commit

0aa3c52

verified ·

1 Parent(s): ce5f63a

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -4

app.py CHANGED Viewed

@@ -49,10 +49,21 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to("cuda").eval()
-# Edge TTS voices mapping for new tags.
 TTS_VOICE_MAP = {
     "@jennyneural": "en-US-JennyNeural",
     "@guyneural": "en-US-GuyNeural",
 }
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
@@ -125,7 +136,7 @@ def generate(input_dict: dict, chat_history: list[dict],
              repetition_penalty: float = 1.2):
     """
     Generates chatbot responses with support for multimodal input, video processing,
-    and Edge TTS when using the new tags @JennyNeural or @GuyNeural.
     Special command:
       - "@video-infer": triggers video processing using Callisto OCR3.
     """
@@ -284,10 +295,21 @@ demo = gr.ChatInterface(
         [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
         [{"text": "@video-infer Describe the Ad", "files": ["examples/coca.mp4"]}],
         ["@JennyNeural Who was Nikola Tesla and what were his contributions?"],
-        ["@GuyNeural Explain how rainbows are formed."]
     ],
     cache_examples=False,
-    description="# **Pocket Llama**",
     type="messages",
     fill_height=True,
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),

     torch_dtype=torch.float16
 ).to("cuda").eval()
+# Expanded Edge TTS voices mapping for new tags.
 TTS_VOICE_MAP = {
     "@jennyneural": "en-US-JennyNeural",
     "@guyneural": "en-US-GuyNeural",
+    "@adrineural": "af-ZA-AdriNeural",
+    "@willemneural": "af-ZA-WillemNeural",
+    "@amehaneural": "am-ET-AmehaNeural",
+    "@mekdesneural": "am-ET-MekdesNeural",
+    "@fatimaneural": "ar-AE-FatimaNeural",
+    "@hamdanneural": "ar-AE-HamdanNeural",
+    "@alineural": "ar-BH-AliNeural",
+    "@lailaneural": "ar-BH-LailaNeural",
+    "@aminaneural": "ar-DZ-AminaNeural",
+    "@ismaelneural": "ar-DZ-IsmaelNeural",
+    "@salmaneural": "ar-EG-SalmaNeural",
 }
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
              repetition_penalty: float = 1.2):
     """
     Generates chatbot responses with support for multimodal input, video processing,
+    and Edge TTS when using the new tags for voices.
     Special command:
       - "@video-infer": triggers video processing using Callisto OCR3.
     """
         [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
         [{"text": "@video-infer Describe the Ad", "files": ["examples/coca.mp4"]}],
         ["@JennyNeural Who was Nikola Tesla and what were his contributions?"],
+        ["@GuyNeural Explain how rainbows are formed."],
+        ["@AdriNeural Provide a brief overview of South African wildlife."],
+        ["@WillemNeural Tell me a fun fact about astronomy."],
+        ["@AmehaNeural What are the main features of Ethiopian culture?"],
+        ["@MekdesNeural Share a short story about innovation."],
+        ["@FatimaNeural Explain the importance of renewable energy."],
+        ["@HamdanNeural Describe the evolution of modern technology."],
+        ["@AliNeural What causes thunderstorms?"],
+        ["@LailaNeural Describe the process of photosynthesis."],
+        ["@AminaNeural Summarize the history of North Africa."],
+        ["@IsmaelNeural What are the benefits of meditation?"],
+        ["@SalmaNeural Tell me about the influence of ancient Egyptian culture."]
     ],
     cache_examples=False,
+    description="# **Pocket Llama with Expanded Edge TTS**\n\nUse one of the TTS tags at the beginning of your query (e.g., **@JennyNeural**, **@GuyNeural**, **@AdriNeural**, etc.) to trigger text-to-speech output.",
     type="messages",
     fill_height=True,
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple"),