Futuresony commited on
Commit
3bb7afd
·
verified ·
1 Parent(s): 5019f14

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ttsmms import download, TTS
3
+ from langdetect import detect
4
+ import os
5
+ from pydub import AudioSegment
6
+ from pydub.playback import play
7
+
8
+ # Ensure ffmpeg works inside Hugging Face Spaces
9
+ AudioSegment.converter = "/usr/bin/ffmpeg"
10
+
11
+ # Download and load TTS models
12
+ swahili_dir = download("swh", "./data/swahili")
13
+ english_dir = download("eng", "./data/english") # Ensure an English TTS model is available
14
+
15
+ swahili_tts = TTS(swahili_dir)
16
+ english_tts = TTS(english_dir)
17
+
18
+ # Function to process mixed-language text
19
+ def text_to_speech(text):
20
+ words = text.split() # Split text into words
21
+ audio_clips = []
22
+
23
+ for word in words:
24
+ lang = detect(word) # Detect language of each word
25
+ wav_path = f"./temp_{word}.wav"
26
+
27
+ if lang == "sw":
28
+ swahili_tts.synthesis(word, wav_path=wav_path)
29
+ else:
30
+ english_tts.synthesis(word, wav_path=wav_path)
31
+
32
+ audio_clips.append(AudioSegment.from_wav(wav_path))
33
+ os.remove(wav_path) # Remove temporary files
34
+
35
+ # Combine all audio clips
36
+ final_audio = sum(audio_clips)
37
+ output_path = "./output.wav"
38
+ final_audio.export(output_path, format="wav")
39
+
40
+ return output_path
41
+
42
+ # Gradio UI
43
+ gr.Interface(
44
+ fn=text_to_speech,
45
+ inputs=gr.Textbox(label="Enter Text"),
46
+ outputs=gr.Audio(label="Generated Speech"),
47
+ title="Swahili & English Text-to-Speech",
48
+ description="Type text in Swahili and English, and listen to the mixed-language speech.",
49
+ ).launch()