Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,11 +6,6 @@ import datetime
|
|
6 |
import shutil
|
7 |
from ttsmms import download
|
8 |
from ttsmms import TTS
|
9 |
-
import nltk
|
10 |
-
from nltk import sent_tokenize
|
11 |
-
from nltk import sent_tokenize as nltk_sent_tokenize
|
12 |
-
|
13 |
-
nltk.download("punkt")
|
14 |
|
15 |
# Description for the Gradio interface
|
16 |
this_description = """Text To Speech for Tibetan - using MMS TTS."""
|
@@ -19,22 +14,13 @@ this_description = """Text To Speech for Tibetan - using MMS TTS."""
|
|
19 |
tts_model_path = download("bod", "./data")
|
20 |
tts = TTS(tts_model_path)
|
21 |
|
22 |
-
#
|
23 |
def prepare_sentences(text, lang="bod"):
|
24 |
-
|
25 |
-
|
26 |
-
# Pre-process the text (lowercase as per your original code)
|
27 |
-
text = text.lower()
|
28 |
-
|
29 |
-
# Split paragraphs and sentences using NLTK
|
30 |
-
paragraphs = [paragraph for paragraph in text.split("\n") if paragraph.strip()]
|
31 |
|
32 |
-
sentences
|
33 |
-
|
34 |
-
for paragraph in paragraphs
|
35 |
-
for sentence in sent_tokenize(paragraph)
|
36 |
-
if sentence.strip()
|
37 |
-
]
|
38 |
|
39 |
return sentences
|
40 |
|
@@ -67,7 +53,7 @@ def combine_wav(source_dir, stamp):
|
|
67 |
|
68 |
# Main function to process Tibetan text and generate audio
|
69 |
def tts_tibetan(input_text):
|
70 |
-
# Prepare sentences from the input text
|
71 |
sentences = prepare_sentences(input_text)
|
72 |
|
73 |
# Create a unique directory for storing audio chunks
|
|
|
6 |
import shutil
|
7 |
from ttsmms import download
|
8 |
from ttsmms import TTS
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# Description for the Gradio interface
|
11 |
this_description = """Text To Speech for Tibetan - using MMS TTS."""
|
|
|
14 |
tts_model_path = download("bod", "./data")
|
15 |
tts = TTS(tts_model_path)
|
16 |
|
17 |
+
# Custom function to split Tibetan text into sentences
|
18 |
def prepare_sentences(text, lang="bod"):
|
19 |
+
# Convert Tibetan punctuation "།" into a period to help split sentences
|
20 |
+
text = text.replace("།", ".")
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
# Split the text into sentences based on the periods
|
23 |
+
sentences = [sentence.strip() for sentence in text.split('.') if sentence.strip()]
|
|
|
|
|
|
|
|
|
24 |
|
25 |
return sentences
|
26 |
|
|
|
53 |
|
54 |
# Main function to process Tibetan text and generate audio
|
55 |
def tts_tibetan(input_text):
|
56 |
+
# Prepare sentences from the input text using the custom function
|
57 |
sentences = prepare_sentences(input_text)
|
58 |
|
59 |
# Create a unique directory for storing audio chunks
|