younes21000 commited on
Commit
73c9093
·
verified ·
1 Parent(s): 9c779fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -4,6 +4,7 @@ import librosa
4
  import numpy as np
5
  from transformers import pipeline
6
  from concurrent.futures import ThreadPoolExecutor
 
7
 
8
  # Load Whisper model for speech-to-text
9
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
@@ -43,18 +44,24 @@ def generate_subtitles(video_file, language_name):
43
  # Load the video and extract audio directly
44
  video = mp.VideoFileClip(video_path)
45
  audio = video.audio
46
- waveform, sr = librosa.load(audio.reader, sr=16000) # Load directly from audio reader
47
 
48
- print("Starting speech-to-text transcription")
 
 
49
 
50
- # Process audio in chunks
51
- chunk_duration = 15 # seconds
52
- chunk_size = sr * chunk_duration # number of samples per chunk
53
- chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
54
 
55
- # Use ThreadPoolExecutor for parallel processing
56
- with ThreadPoolExecutor() as executor:
57
- transcriptions = list(executor.map(transcribe_audio, chunks))
 
 
 
 
 
 
 
 
58
 
59
  # Combine all transcriptions into a single string
60
  full_transcription = " ".join(transcriptions)
 
4
  import numpy as np
5
  from transformers import pipeline
6
  from concurrent.futures import ThreadPoolExecutor
7
+ import tempfile
8
 
9
  # Load Whisper model for speech-to-text
10
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
 
44
  # Load the video and extract audio directly
45
  video = mp.VideoFileClip(video_path)
46
  audio = video.audio
 
47
 
48
+ # Use a temporary file to hold the audio data
49
+ with tempfile.NamedTemporaryFile(delete=True) as tmp_audio_file:
50
+ audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le')
51
 
52
+ print("Starting speech-to-text transcription")
 
 
 
53
 
54
+ # Load the audio file as a waveform using librosa
55
+ waveform, sr = librosa.load(tmp_audio_file.name, sr=16000) # sr=16000 for Whisper
56
+
57
+ # Process audio in chunks
58
+ chunk_duration = 15 # seconds
59
+ chunk_size = sr * chunk_duration # number of samples per chunk
60
+ chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
61
+
62
+ # Use ThreadPoolExecutor for parallel processing
63
+ with ThreadPoolExecutor() as executor:
64
+ transcriptions = list(executor.map(transcribe_audio, chunks))
65
 
66
  # Combine all transcriptions into a single string
67
  full_transcription = " ".join(transcriptions)