ganga4364 commited on
Commit
f1e65b3
·
verified ·
1 Parent(s): d8933db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -20
app.py CHANGED
@@ -6,11 +6,6 @@ import datetime
6
  import shutil
7
  from ttsmms import download
8
  from ttsmms import TTS
9
- import nltk
10
- from nltk import sent_tokenize
11
- from nltk import sent_tokenize as nltk_sent_tokenize
12
-
13
- nltk.download("punkt")
14
 
15
  # Description for the Gradio interface
16
  this_description = """Text To Speech for Tibetan - using MMS TTS."""
@@ -19,22 +14,13 @@ this_description = """Text To Speech for Tibetan - using MMS TTS."""
19
  tts_model_path = download("bod", "./data")
20
  tts = TTS(tts_model_path)
21
 
22
- # Function to prepare sentences
23
  def prepare_sentences(text, lang="bod"):
24
- sentences = []
25
-
26
- # Pre-process the text (lowercase as per your original code)
27
- text = text.lower()
28
-
29
- # Split paragraphs and sentences using NLTK
30
- paragraphs = [paragraph for paragraph in text.split("\n") if paragraph.strip()]
31
 
32
- sentences = [
33
- sentence
34
- for paragraph in paragraphs
35
- for sentence in sent_tokenize(paragraph)
36
- if sentence.strip()
37
- ]
38
 
39
  return sentences
40
 
@@ -67,7 +53,7 @@ def combine_wav(source_dir, stamp):
67
 
68
  # Main function to process Tibetan text and generate audio
69
  def tts_tibetan(input_text):
70
- # Prepare sentences from the input text
71
  sentences = prepare_sentences(input_text)
72
 
73
  # Create a unique directory for storing audio chunks
 
6
  import shutil
7
  from ttsmms import download
8
  from ttsmms import TTS
 
 
 
 
 
9
 
10
  # Description for the Gradio interface
11
  this_description = """Text To Speech for Tibetan - using MMS TTS."""
 
14
  tts_model_path = download("bod", "./data")
15
  tts = TTS(tts_model_path)
16
 
17
+ # Custom function to split Tibetan text into sentences
18
  def prepare_sentences(text, lang="bod"):
19
+ # Convert Tibetan punctuation "།" into a period to help split sentences
20
+ text = text.replace("།", ".")
 
 
 
 
 
21
 
22
+ # Split the text into sentences based on the periods
23
+ sentences = [sentence.strip() for sentence in text.split('.') if sentence.strip()]
 
 
 
 
24
 
25
  return sentences
26
 
 
53
 
54
  # Main function to process Tibetan text and generate audio
55
  def tts_tibetan(input_text):
56
+ # Prepare sentences from the input text using the custom function
57
  sentences = prepare_sentences(input_text)
58
 
59
  # Create a unique directory for storing audio chunks