Spaces:
Running
Running
File size: 4,501 Bytes
a6eae86 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
import torch
import yt_dlp
import os
import subprocess
import json
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces
import time
import langdetect
import uuid
HF_TOKEN = os.environ.get("HF_TOKEN")
print("Starting the program...")
model_path = "Qwen/Qwen2.5-7B-Instruct"
print(f"Loading model {model_path}...")
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
model = model.eval()
print("Model successfully loaded.")
def generate_unique_filename(extension):
return f"{uuid.uuid4()}{extension}"
def cleanup_files(*files):
for file in files:
if file and os.path.exists(file):
os.remove(file)
print(f"Removed file: {file}")
def download_youtube_audio(url):
print(f"Downloading audio from YouTube: {url}")
output_path = generate_unique_filename(".wav")
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
}],
'outtmpl': output_path,
'keepvideo': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
if os.path.exists(output_path + ".wav"):
os.rename(output_path + ".wav", output_path)
return output_path
@spaces.GPU(duration=90)
def transcribe_audio(file_path):
print(f"Starting transcription of file: {file_path}")
temp_audio = None
if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
print("Video file detected. Extracting audio using ffmpeg...")
temp_audio = generate_unique_filename(".wav")
command = ["ffmpeg", "-i", file_path, "-q:a", "0", "-map", "a", temp_audio]
subprocess.run(command, check=True)
file_path = temp_audio
output_file = generate_unique_filename(".json")
command = [
"insanely-fast-whisper",
"--file-name", file_path,
"--device-id", "0",
"--model-name", "openai/whisper-large-v3",
"--task", "transcribe",
"--timestamp", "chunk",
"--transcript-path", output_file
]
subprocess.run(command, check=True)
with open(output_file, "r") as f:
transcription = json.load(f)
result = transcription.get("text", " ".join([chunk["text"] for chunk in transcription.get("chunks", [])]))
cleanup_files(output_file)
if temp_audio:
cleanup_files(temp_audio)
return result
def generate_summary_stream(transcription):
detected_language = langdetect.detect(transcription)
prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:
{transcription[:300000]}..."""
response, history = model.chat(tokenizer, prompt, history=[])
return response
def process_youtube(url):
if not url:
return "Please enter a YouTube URL.", None
audio_file = download_youtube_audio(url)
transcription = transcribe_audio(audio_file)
cleanup_files(audio_file)
return transcription, None
def process_uploaded_video(video_path):
transcription = transcribe_audio(video_path)
return transcription, None
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🎥 Video Transcription and Smart Summary
Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
""")
with gr.Tabs():
with gr.TabItem("📤 Video Upload"):
video_input = gr.Video()
video_button = gr.Button("🚀 Process Video")
with gr.TabItem("🔗 YouTube Link"):
url_input = gr.Textbox(placeholder="https://www.youtube.com/watch?v=...")
url_button = gr.Button("🚀 Process URL")
transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
summary_button = gr.Button("📝 Generate Summary")
video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
demo.launch() |