Spaces:

ganga4364
/

mms-tts-bod

Sleeping

ganga4364 commited on Oct 8, 2024

Commit

f1e65b3

verified ·

1 Parent(s): d8933db

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,11 +6,6 @@ import datetime
 import shutil
 from ttsmms import download
 from ttsmms import TTS
-import nltk
-from nltk import sent_tokenize
-from nltk import sent_tokenize as nltk_sent_tokenize
-nltk.download("punkt")
 # Description for the Gradio interface
 this_description = """Text To Speech for Tibetan - using MMS TTS."""
@@ -19,22 +14,13 @@ this_description = """Text To Speech for Tibetan - using MMS TTS."""
 tts_model_path = download("bod", "./data")
 tts = TTS(tts_model_path)
-# Function to prepare sentences
 def prepare_sentences(text, lang="bod"):
-    sentences = []
-    # Pre-process the text (lowercase as per your original code)
-    text = text.lower()
-    # Split paragraphs and sentences using NLTK
-    paragraphs = [paragraph for paragraph in text.split("\n") if paragraph.strip()]
-    sentences = [
-        sentence
-        for paragraph in paragraphs
-        for sentence in sent_tokenize(paragraph)
-        if sentence.strip()
-    ]
     return sentences
@@ -67,7 +53,7 @@ def combine_wav(source_dir, stamp):
 # Main function to process Tibetan text and generate audio
 def tts_tibetan(input_text):
-    # Prepare sentences from the input text
     sentences = prepare_sentences(input_text)
     # Create a unique directory for storing audio chunks

 import shutil
 from ttsmms import download
 from ttsmms import TTS
 # Description for the Gradio interface
 this_description = """Text To Speech for Tibetan - using MMS TTS."""
 tts_model_path = download("bod", "./data")
 tts = TTS(tts_model_path)
+# Custom function to split Tibetan text into sentences
 def prepare_sentences(text, lang="bod"):
+    # Convert Tibetan punctuation "།" into a period to help split sentences
+    text = text.replace("།", ".")
+    # Split the text into sentences based on the periods
+    sentences = [sentence.strip() for sentence in text.split('.') if sentence.strip()]
     return sentences
 # Main function to process Tibetan text and generate audio
 def tts_tibetan(input_text):
+    # Prepare sentences from the input text using the custom function
     sentences = prepare_sentences(input_text)
     # Create a unique directory for storing audio chunks