Spaces:
Runtime error
Runtime error
File size: 2,421 Bytes
50c4728 83ebf54 50c4728 83ebf54 50c4728 83ebf54 dfa9b1d 50c4728 83ebf54 50c4728 dfa9b1d 50c4728 83ebf54 50c4728 dfa9b1d 50c4728 dfa9b1d 38e25e4 50c4728 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
import gradio as gr
import time
from moviepy.editor import VideoFileClip
from faster_whisper import WhisperModel
# λΉλμ€λ₯Ό MP3λ‘ λ³ννλ ν¨μ
def convert_mp4_to_mp3(video_file_path, output_dir):
video = VideoFileClip(video_file_path)
audio = video.audio
output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(video_file_path))[0] + ".mp3")
audio.write_audiofile(output_path)
audio.close()
video.close()
return output_path
# Whisper λͺ¨λΈμ μ¬μ©νμ¬ MP3 νμΌμ ν
μ€νΈλ‘ λ³ννλ ν¨μ
def transcribe_audio(model_size, audio_file):
model = WhisperModel(model_size, device="cpu", compute_type="int8")
start_time = time.time()
try:
segments, info = model.transcribe(audio_file, beam_size=5)
detected_language = "Detected language '%s' with probability %f" % (info.language, info.language_probability)
result = []
for segment in segments:
result.append("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
result_text = "\n".join(result)
except PermissionError as e:
return f"PermissionError: {e}"
except ValueError as e:
return f"ValueError: {e}"
end_time = time.time()
elapsed_time = end_time - start_time
return f"{detected_language}\n\nTranscription:\n{result_text}\n\nElapsed time: {elapsed_time:.2f} seconds"
# Gradio μΈν°νμ΄μ€μμ μ¬μ©ν λ©μΈ ν¨μ
def process_video(model_size, video_file=None):
if not video_file:
return "Please upload a video file."
video_file_path = video_file.name
print(f"Using uploaded video file: {video_file_path}")
save_path = "/tmp"
mp3_file_path = convert_mp4_to_mp3(video_file_path, save_path)
print(f"Converted video to MP3: {mp3_file_path}")
transcription = transcribe_audio(model_size, mp3_file_path)
print("Transcription complete")
return transcription
# Gradio μΈν°νμ΄μ€ μ μ
iface = gr.Interface(
fn=process_video,
inputs=[
gr.Dropdown(["tiny", "base", "small", "medium", "large"], label="Model Size"),
gr.File(label="Upload Video File")
],
outputs="text",
title="Video to Text Converter using Whisper",
description="Upload a video file, select the Whisper model size, and get the transcribed text.",
live=True
)
if __name__ == "__main__":
iface.launch()
|