import os import gradio as gr import time from moviepy.editor import VideoFileClip from faster_whisper import WhisperModel from pytube import YouTube from pytube.exceptions import VideoUnavailable, PytubeError # 비디오를 MP3로 변환하는 함수 def convert_mp4_to_mp3(video_file_path, output_dir): video = VideoFileClip(video_file_path) audio = video.audio output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(video_file_path))[0] + ".mp3") audio.write_audiofile(output_path) audio.close() video.close() return output_path # Whisper 모델을 사용하여 MP3 파일을 텍스트로 변환하는 함수 def transcribe_audio(model_size, audio_file): model = WhisperModel(model_size, device="cpu", compute_type="int8") start_time = time.time() try: segments, info = model.transcribe(audio_file, beam_size=5) detected_language = "Detected language '%s' with probability %f" % (info.language, info.language_probability) result = [] for segment in segments: result.append("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) result_text = "\n".join(result) except PermissionError as e: return f"PermissionError: {e}" except ValueError as e: return f"ValueError: {e}" end_time = time.time() elapsed_time = end_time - start_time return f"{detected_language}\n\nTranscription:\n{result_text}\n\nElapsed time: {elapsed_time:.2f} seconds" # YouTube URL에서 비디오를 다운로드하는 함수 def download_youtube_video(url, output_dir): try: yt = YouTube(url) stream = yt.streams.filter(file_extension='mp4').first() output_path = stream.download(output_dir) return output_path, None except VideoUnavailable: return None, "Video unavailable. Please check the URL." except PytubeError as e: return None, f"An error occurred: {e}" # Gradio 인터페이스에서 사용할 메인 함수 def process_video(model_size, video_file=None, video_url=None): if video_url and not video_file: print(f"Downloading video from URL: {video_url}") video_file_path, error = download_youtube_video(video_url, '/tmp') if error: print(f"Error downloading video: {error}") return error print(f"Downloaded video to: {video_file_path}") elif video_file and not video_url: video_file_path = video_file.name print(f"Using uploaded video file: {video_file_path}") else: return "Please upload a video file or provide a video URL, but not both." save_path = "/tmp" mp3_file_path = convert_mp4_to_mp3(video_file_path, save_path) print(f"Converted video to MP3: {mp3_file_path}") transcription = transcribe_audio(model_size, mp3_file_path) print(f"Transcription complete") return transcription # Gradio 인터페이스 정의 iface = gr.Interface( fn=process_video, inputs=[ gr.Dropdown(["tiny", "base", "small", "medium", "large"], label="Model Size"), gr.File(label="Upload Video File"), gr.Textbox(label="Video URL") ], outputs="text", title="Video to Text Converter using Whisper", description="Upload a video file or provide a video URL, select the Whisper model size, and get the transcribed text.", live=True ) if __name__ == "__main__": iface.launch()