Spaces:
Running
Running
File size: 6,363 Bytes
083a014 58ddc5a 083a014 e6813af 083a014 e6813af 083a014 e6813af 58ddc5a e6813af 083a014 58ddc5a 083a014 58ddc5a 083a014 58ddc5a 083a014 58ddc5a 083a014 58ddc5a e6813af 58ddc5a e6813af 083a014 58ddc5a 083a014 58ddc5a 083a014 58ddc5a 083a014 a4ecef9 58ddc5a 083a014 58ddc5a e6813af 083a014 58ddc5a 083a014 58ddc5a 083a014 58ddc5a 083a014 58ddc5a 083a014 58ddc5a a4ecef9 58ddc5a e6813af 58ddc5a a4ecef9 58ddc5a a4ecef9 083a014 58ddc5a a4ecef9 083a014 a4ecef9 58ddc5a 083a014 a4ecef9 083a014 a4ecef9 58ddc5a a4ecef9 58ddc5a a4ecef9 083a014 e6813af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import gradio as gr
import torch
import yt_dlp
import os
import subprocess
import json
import time
import langdetect
import uuid
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
# Load Hugging Face Token
HF_TOKEN = os.getenv("HF_TOKEN")
print("Starting the program...")
model_path = "Qwen/Qwen2.5-7B-Instruct"
# **Efficient Model Loading**
bnb_config = BitsAndBytesConfig(load_in_8bit=True) # Use 8-bit precision to reduce memory usage
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
quantization_config=bnb_config, # Load in 8-bit to save memory
trust_remote_code=True
).to(device).eval()
print("Model successfully loaded.")
def generate_unique_filename(extension):
return f"{uuid.uuid4()}{extension}"
def cleanup_files(*files):
for file in files:
if file and os.path.exists(file):
os.remove(file)
print(f"Removed file: {file}")
def download_youtube_audio(url):
"""Downloads audio from a YouTube video and converts it to WAV format."""
print(f"Downloading audio from YouTube: {url}")
output_path = generate_unique_filename(".wav")
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}],
'outtmpl': output_path,
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
if os.path.exists(output_path + ".wav"):
os.rename(output_path + ".wav", output_path) # Ensure correct naming
except Exception as e:
return f"Error downloading audio: {str(e)}"
return output_path if os.path.exists(output_path) else "Download Failed"
def transcribe_audio(file_path):
"""Transcribes audio using `insanely-fast-whisper` and handles large files efficiently."""
print(f"Starting transcription of file: {file_path}")
temp_audio = None
if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
print("Video file detected. Extracting audio using ffmpeg...")
temp_audio = generate_unique_filename(".wav")
command = ["ffmpeg", "-i", file_path, "-q:a", "0", "-map", "a", temp_audio]
subprocess.run(command, check=True)
file_path = temp_audio # Use extracted audio file
output_file = generate_unique_filename(".json")
command = [
"insanely-fast-whisper",
"--file-name", file_path,
"--device-id", "0",
"--model-name", "openai/whisper-large-v3",
"--task", "transcribe",
"--timestamp", "chunk",
"--transcript-path", output_file
]
try:
subprocess.run(command, check=True)
except Exception as e:
return f"Error in transcription: {str(e)}"
# Process the JSON file in chunks to avoid memory overflow
result = []
try:
with open(output_file, "r") as f:
for line in f:
chunk = json.loads(line.strip()) # Read JSON line by line
result.append(chunk.get("text", ""))
except Exception as e:
return f"Error reading transcription file: {str(e)}"
cleanup_files(output_file)
if temp_audio:
cleanup_files(temp_audio)
return " ".join(result)[:500000] # Limit transcription size
def generate_summary_stream(transcription):
"""Summarizes the transcription efficiently to avoid memory overflow."""
detected_language = langdetect.detect(transcription[:1000]) # Detect using a smaller portion
# Use smaller chunks for processing
chunk_size = 2000
transcript_chunks = [transcription[i:i+chunk_size] for i in range(0, len(transcription), chunk_size)]
summary_result = []
for chunk in transcript_chunks[:3]: # Process only the first 3 chunks to avoid OOM
prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:\n{chunk}"""
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
output_ids = model.generate(input_ids, max_length=300) # Limit output size
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
summary_result.append(response)
return "\n\n".join(summary_result)
def process_youtube(url):
"""Handles YouTube video processing: downloads audio, transcribes it, and cleans up."""
if not url:
return "Please enter a YouTube URL.", None
audio_file = download_youtube_audio(url)
if "Error" in audio_file or audio_file == "Download Failed":
return audio_file, None
transcription = transcribe_audio(audio_file)
cleanup_files(audio_file) # Clean up the downloaded file
return transcription, None
def process_uploaded_video(video_path):
"""Processes uploaded video file for transcription."""
transcription = transcribe_audio(video_path)
return transcription, None
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🎥 Video Transcription and Smart Summary
Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
""")
with gr.Tabs():
with gr.TabItem("📤 Video Upload"):
video_input = gr.Video()
video_button = gr.Button("🚀 Process Video")
with gr.TabItem("🔗 YouTube Link"):
url_input = gr.Textbox(placeholder="https://www.youtube.com/watch?v=...")
url_button = gr.Button("🚀 Process URL")
transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
summary_button = gr.Button("📝 Generate Summary")
video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
demo.launch()
|