younes21000 commited on
Commit
e70c3e8
·
verified ·
1 Parent(s): bd05d7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import moviepy.editor as mp
3
  import librosa
4
  from transformers import pipeline
 
5
 
6
  # Load Whisper model for speech-to-text
7
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
@@ -21,6 +22,10 @@ languages = {
21
  "Russian (ru)": "ru"
22
  }
23
 
 
 
 
 
24
  def generate_subtitles(video_file, language_name):
25
  try:
26
  # Extract the target language code from the selected language name
@@ -46,18 +51,13 @@ def generate_subtitles(video_file, language_name):
46
  waveform, sr = librosa.load(audio_path, sr=16000) # sr=16000 for Whisper
47
 
48
  # Process audio in chunks
49
- chunk_duration = 30 # seconds
50
  chunk_size = sr * chunk_duration # number of samples per chunk
51
- transcriptions = []
52
 
53
- for i in range(0, len(waveform), chunk_size):
54
- chunk = waveform[i:i + chunk_size]
55
- if len(chunk) == 0:
56
- break # Avoid processing empty chunks
57
-
58
- # Pass the chunk to Whisper's ASR model
59
- transcription = asr(chunk)["text"]
60
- transcriptions.append(transcription)
61
 
62
  # Combine all transcriptions into a single string
63
  full_transcription = " ".join(transcriptions)
@@ -65,8 +65,7 @@ def generate_subtitles(video_file, language_name):
65
  print("Starting translation")
66
 
67
  # Translate transcription to the target language using M2M100
68
- translation_pipeline = pipeline('translation', model='facebook/m2m100_418M')
69
- translated_subtitles = translation_pipeline(
70
  full_transcription,
71
  src_lang="en", # Source language is English
72
  tgt_lang=target_language # Target language from user selection
 
2
  import moviepy.editor as mp
3
  import librosa
4
  from transformers import pipeline
5
+ from concurrent.futures import ThreadPoolExecutor
6
 
7
  # Load Whisper model for speech-to-text
8
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
 
22
  "Russian (ru)": "ru"
23
  }
24
 
25
+ def transcribe_audio(chunk):
26
+ """Transcribe a single audio chunk."""
27
+ return asr(chunk)["text"]
28
+
29
  def generate_subtitles(video_file, language_name):
30
  try:
31
  # Extract the target language code from the selected language name
 
51
  waveform, sr = librosa.load(audio_path, sr=16000) # sr=16000 for Whisper
52
 
53
  # Process audio in chunks
54
+ chunk_duration = 15 # seconds
55
  chunk_size = sr * chunk_duration # number of samples per chunk
56
+ chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
57
 
58
+ # Use ThreadPoolExecutor for parallel processing
59
+ with ThreadPoolExecutor() as executor:
60
+ transcriptions = list(executor.map(transcribe_audio, chunks))
 
 
 
 
 
61
 
62
  # Combine all transcriptions into a single string
63
  full_transcription = " ".join(transcriptions)
 
65
  print("Starting translation")
66
 
67
  # Translate transcription to the target language using M2M100
68
+ translated_subtitles = translator(
 
69
  full_transcription,
70
  src_lang="en", # Source language is English
71
  tgt_lang=target_language # Target language from user selection