Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import moviepy.editor as mp
|
3 |
import librosa
|
4 |
from transformers import pipeline
|
|
|
5 |
|
6 |
# Load Whisper model for speech-to-text
|
7 |
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
|
@@ -21,6 +22,10 @@ languages = {
|
|
21 |
"Russian (ru)": "ru"
|
22 |
}
|
23 |
|
|
|
|
|
|
|
|
|
24 |
def generate_subtitles(video_file, language_name):
|
25 |
try:
|
26 |
# Extract the target language code from the selected language name
|
@@ -46,18 +51,13 @@ def generate_subtitles(video_file, language_name):
|
|
46 |
waveform, sr = librosa.load(audio_path, sr=16000) # sr=16000 for Whisper
|
47 |
|
48 |
# Process audio in chunks
|
49 |
-
chunk_duration =
|
50 |
chunk_size = sr * chunk_duration # number of samples per chunk
|
51 |
-
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
break # Avoid processing empty chunks
|
57 |
-
|
58 |
-
# Pass the chunk to Whisper's ASR model
|
59 |
-
transcription = asr(chunk)["text"]
|
60 |
-
transcriptions.append(transcription)
|
61 |
|
62 |
# Combine all transcriptions into a single string
|
63 |
full_transcription = " ".join(transcriptions)
|
@@ -65,8 +65,7 @@ def generate_subtitles(video_file, language_name):
|
|
65 |
print("Starting translation")
|
66 |
|
67 |
# Translate transcription to the target language using M2M100
|
68 |
-
|
69 |
-
translated_subtitles = translation_pipeline(
|
70 |
full_transcription,
|
71 |
src_lang="en", # Source language is English
|
72 |
tgt_lang=target_language # Target language from user selection
|
|
|
2 |
import moviepy.editor as mp
|
3 |
import librosa
|
4 |
from transformers import pipeline
|
5 |
+
from concurrent.futures import ThreadPoolExecutor
|
6 |
|
7 |
# Load Whisper model for speech-to-text
|
8 |
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
|
|
|
22 |
"Russian (ru)": "ru"
|
23 |
}
|
24 |
|
25 |
+
def transcribe_audio(chunk):
|
26 |
+
"""Transcribe a single audio chunk."""
|
27 |
+
return asr(chunk)["text"]
|
28 |
+
|
29 |
def generate_subtitles(video_file, language_name):
|
30 |
try:
|
31 |
# Extract the target language code from the selected language name
|
|
|
51 |
waveform, sr = librosa.load(audio_path, sr=16000) # sr=16000 for Whisper
|
52 |
|
53 |
# Process audio in chunks
|
54 |
+
chunk_duration = 15 # seconds
|
55 |
chunk_size = sr * chunk_duration # number of samples per chunk
|
56 |
+
chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
|
57 |
|
58 |
+
# Use ThreadPoolExecutor for parallel processing
|
59 |
+
with ThreadPoolExecutor() as executor:
|
60 |
+
transcriptions = list(executor.map(transcribe_audio, chunks))
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
# Combine all transcriptions into a single string
|
63 |
full_transcription = " ".join(transcriptions)
|
|
|
65 |
print("Starting translation")
|
66 |
|
67 |
# Translate transcription to the target language using M2M100
|
68 |
+
translated_subtitles = translator(
|
|
|
69 |
full_transcription,
|
70 |
src_lang="en", # Source language is English
|
71 |
tgt_lang=target_language # Target language from user selection
|