subprocess1

Running

App Files Files Community

sreepathi-ravikumar commited on May 9

Commit

e80998a

verified ·

1 Parent(s): f467a84

Rename tts_engine.py to audio.py

Browse files

Files changed (2) hide show

audio.py +12 -0
tts_engine.py +0 -59

audio.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# audio.py
+import torchaudio
+from bark import SAMPLE_RATE, generate_audio
+from bark.generation import preload_models
+import os
+preload_models()  # Download models at startup
+def generate_speech(text, output_path="output.wav"):
+    audio_array = generate_audio(text)
+    torchaudio.save(output_path, audio_array, SAMPLE_RATE)
+    return output_path

tts_engine.py DELETED Viewed

@@ -1,59 +0,0 @@
-import edge_tts
-import asyncio
-import os
-from pydub import AudioSegment
-from langdetect import detect, LangDetectException
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class BilingualTTS:
-    def _init_(self):
-        self.voices = {
-            'ta': 'ta-IN-PallaviNeural',  # Tamil voice
-            'en': 'en-US-AriaNeural'     # English voice
-        }
-        self.temp_dir = "/app/temp"
-        os.makedirs(self.temp_dir, exist_ok=True)
-    async def _generate_segment(self, text, voice, idx):
-        output_file = os.path.join(self.temp_dir, f"segment_{idx}.mp3")
-        communicate = edge_tts.Communicate(text, voice)
-        await communicate.save(output_file)
-        return output_file
-    async def generate_audio(self, text):
-        try:
-            # Split text into sentences
-            sentences = [s.strip() for s in text.split('.') if s.strip()]
-            # Generate audio segments
-            tasks = []
-            for idx, sentence in enumerate(sentences):
-                try:
-                    lang = detect(sentence)
-                    voice = self.voices['en'] if lang not in ['ta', 'en'] else self.voices[lang]
-                except LangDetectException:
-                    voice = self.voices['en']
-                tasks.append(self._generate_segment(sentence, voice, idx))
-            # Process all segments concurrently
-            segment_files = await asyncio.gather(*tasks)
-            # Combine audio segments
-            combined = AudioSegment.empty()
-            for sf in segment_files:
-                combined += AudioSegment.from_mp3(sf)
-                os.remove(sf)  # Cleanup
-            # Save final output
-            output_path = os.path.join(self.temp_dir, "final_output.mp3")
-            combined.export(output_path, format="mp3")
-            return output_path
-        except Exception as e:
-            logger.error(f"TTS Generation Error: {str(e)}")
-            raise RuntimeError(f"TTS Failed: {str(e)}")