Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import librosa
|
|
4 |
import numpy as np
|
5 |
from transformers import pipeline
|
6 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
7 |
|
8 |
# Load Whisper model for speech-to-text
|
9 |
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
|
@@ -43,18 +44,24 @@ def generate_subtitles(video_file, language_name):
|
|
43 |
# Load the video and extract audio directly
|
44 |
video = mp.VideoFileClip(video_path)
|
45 |
audio = video.audio
|
46 |
-
waveform, sr = librosa.load(audio.reader, sr=16000) # Load directly from audio reader
|
47 |
|
48 |
-
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
chunk_duration = 15 # seconds
|
52 |
-
chunk_size = sr * chunk_duration # number of samples per chunk
|
53 |
-
chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Combine all transcriptions into a single string
|
60 |
full_transcription = " ".join(transcriptions)
|
|
|
4 |
import numpy as np
|
5 |
from transformers import pipeline
|
6 |
from concurrent.futures import ThreadPoolExecutor
|
7 |
+
import tempfile
|
8 |
|
9 |
# Load Whisper model for speech-to-text
|
10 |
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
|
|
|
44 |
# Load the video and extract audio directly
|
45 |
video = mp.VideoFileClip(video_path)
|
46 |
audio = video.audio
|
|
|
47 |
|
48 |
+
# Use a temporary file to hold the audio data
|
49 |
+
with tempfile.NamedTemporaryFile(delete=True) as tmp_audio_file:
|
50 |
+
audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le')
|
51 |
|
52 |
+
print("Starting speech-to-text transcription")
|
|
|
|
|
|
|
53 |
|
54 |
+
# Load the audio file as a waveform using librosa
|
55 |
+
waveform, sr = librosa.load(tmp_audio_file.name, sr=16000) # sr=16000 for Whisper
|
56 |
+
|
57 |
+
# Process audio in chunks
|
58 |
+
chunk_duration = 15 # seconds
|
59 |
+
chunk_size = sr * chunk_duration # number of samples per chunk
|
60 |
+
chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]
|
61 |
+
|
62 |
+
# Use ThreadPoolExecutor for parallel processing
|
63 |
+
with ThreadPoolExecutor() as executor:
|
64 |
+
transcriptions = list(executor.map(transcribe_audio, chunks))
|
65 |
|
66 |
# Combine all transcriptions into a single string
|
67 |
full_transcription = " ".join(transcriptions)
|