Spaces:

younes21000
/

DAI_Project

Sleeping

App Files Files Community

younes21000 commited on Oct 10, 2024

Commit

e70c3e8

verified ·

1 Parent(s): bd05d7b

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -12

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import moviepy.editor as mp
 import librosa
 from transformers import pipeline
 # Load Whisper model for speech-to-text
 asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
@@ -21,6 +22,10 @@ languages = {
     "Russian (ru)": "ru"
 }
 def generate_subtitles(video_file, language_name):
     try:
         # Extract the target language code from the selected language name
@@ -46,18 +51,13 @@ def generate_subtitles(video_file, language_name):
         waveform, sr = librosa.load(audio_path, sr=16000)  # sr=16000 for Whisper
         # Process audio in chunks
-        chunk_duration = 30  # seconds
         chunk_size = sr * chunk_duration  # number of samples per chunk
-        transcriptions = []
-        for i in range(0, len(waveform), chunk_size):
-            chunk = waveform[i:i + chunk_size]
-            if len(chunk) == 0:
-                break  # Avoid processing empty chunks
-            # Pass the chunk to Whisper's ASR model
-            transcription = asr(chunk)["text"]
-            transcriptions.append(transcription)
         # Combine all transcriptions into a single string
         full_transcription = " ".join(transcriptions)
@@ -65,8 +65,7 @@ def generate_subtitles(video_file, language_name):
         print("Starting translation")
         # Translate transcription to the target language using M2M100
-        translation_pipeline = pipeline('translation', model='facebook/m2m100_418M')
-        translated_subtitles = translation_pipeline(
             full_transcription,
             src_lang="en",  # Source language is English
             tgt_lang=target_language  # Target language from user selection

 import moviepy.editor as mp
 import librosa
 from transformers import pipeline
+from concurrent.futures import ThreadPoolExecutor
 # Load Whisper model for speech-to-text
 asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
     "Russian (ru)": "ru"
 }
+def transcribe_audio(chunk):
+    """Transcribe a single audio chunk."""
+    return asr(chunk)["text"]
 def generate_subtitles(video_file, language_name):
     try:
         # Extract the target language code from the selected language name
         waveform, sr = librosa.load(audio_path, sr=16000)  # sr=16000 for Whisper
         # Process audio in chunks
+        chunk_duration = 15  # seconds
         chunk_size = sr * chunk_duration  # number of samples per chunk
+        chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
+        # Use ThreadPoolExecutor for parallel processing
+        with ThreadPoolExecutor() as executor:
+            transcriptions = list(executor.map(transcribe_audio, chunks))
         # Combine all transcriptions into a single string
         full_transcription = " ".join(transcriptions)
         print("Starting translation")
         # Translate transcription to the target language using M2M100
+        translated_subtitles = translator(
             full_transcription,
             src_lang="en",  # Source language is English
             tgt_lang=target_language  # Target language from user selection