JaganathC commited on
Commit
9431860
·
verified ·
1 Parent(s): 11bb14f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -116
app.py CHANGED
@@ -1,132 +1,153 @@
1
- import gradio as gr
2
- import torch
3
- import yt_dlp
4
- import os
5
- import subprocess
6
- import json
7
- import moviepy.editor as mp
8
  import time
9
- import langdetect
10
- import uuid
11
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
12
 
13
- # Load Hugging Face Model
14
- HF_TOKEN = os.environ.get("HF_TOKEN")
15
- model_path = "Qwen/Qwen2.5-7B-Instruct"
16
- print(f"Loading model {model_path}...")
17
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
18
- model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
19
- model = model.eval()
20
- print("Model successfully loaded.")
21
 
22
- # Generate unique filenames
23
- def generate_unique_filename(extension):
24
- return f"{uuid.uuid4()}{extension}"
 
 
25
 
26
- # Cleanup temporary files
27
- def cleanup_files(*files):
28
- for file in files:
29
- if file and os.path.exists(file):
30
- os.remove(file)
31
- print(f"Removed file: {file}")
32
 
33
- # Extract audio from video
34
- def extract_audio(video_path):
35
- audio_path = generate_unique_filename(".wav")
36
- try:
37
- video = mp.VideoFileClip(video_path)
38
- video.audio.write_audiofile(audio_path)
39
- return audio_path
40
- except Exception as e:
41
- print(f"Error extracting audio: {e}")
42
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # Download YouTube audio
45
- def download_youtube_audio(url):
46
- output_path = generate_unique_filename(".wav")
47
- ydl_opts = {
48
- 'format': 'bestaudio/best',
49
- 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],
50
- 'outtmpl': output_path,
51
- 'keepvideo': True,
52
- }
53
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
54
- ydl.download([url])
55
- return output_path if os.path.exists(output_path) else None
56
 
57
- # Transcribe audio using Whisper
58
- def transcribe_audio(file_path):
59
- if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
60
- file_path = extract_audio(file_path)
61
- if not file_path:
62
- return "Audio extraction failed.", None
63
-
64
- output_file = generate_unique_filename(".json")
65
- command = [
66
- "insanely-fast-whisper", "--file-name", file_path,
67
- "--device-id", "cpu", "--model-name", "openai/whisper-large-v3",
68
- "--task", "transcribe", "--timestamp", "chunk",
69
- "--transcript-path", output_file
70
- ]
71
-
72
- result = subprocess.run(command, capture_output=True, text=True)
73
- if result.returncode != 0:
74
- return f"Transcription failed: {result.stderr}", None
75
 
76
- if not os.path.exists(output_file):
77
- return "Transcription file missing.", None
 
 
 
 
 
 
78
 
79
- with open(output_file, "r") as f:
80
- transcription = json.load(f)
81
 
82
- text = transcription.get("text", " ".join([chunk["text"] for chunk in transcription.get("chunks", [])]))
83
- cleanup_files(output_file, file_path)
84
- return text, None
85
-
86
- # Generate summary using Qwen Model
87
- def generate_summary(transcription):
88
- detected_language = langdetect.detect(transcription)
89
- prompt = f"""Summarize the following transcription in 150-300 words:
90
- Language: {detected_language}
91
- {transcription[:100000]}"""
92
 
93
- response, _ = model.chat(tokenizer, prompt, history=[])
94
- return response
95
-
96
- # Process YouTube video
97
- def process_youtube(url):
98
- if not url:
99
- return "Please enter a valid YouTube URL.", None
100
- audio_file = download_youtube_audio(url)
101
- return transcribe_audio(audio_file) if audio_file else ("Download failed.", None)
102
 
103
- # Process uploaded video
104
- def process_uploaded_video(video_path):
105
- return transcribe_audio(video_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- # Gradio Interface
108
- demo = gr.Blocks()
109
- with demo:
110
- gr.Markdown("""
111
- # 🎥 AI Video Transcription & Summary
112
- Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
113
- """)
114
 
115
- with gr.Tabs():
116
- with gr.TabItem("📤 Video Upload"):
117
- video_input = gr.File(label="Upload a video file")
118
- video_button = gr.Button("🚀 Process Video")
119
-
120
- with gr.TabItem("🔗 YouTube Link"):
121
- url_input = gr.Textbox(label="Paste YouTube URL")
122
- url_button = gr.Button("🚀 Process URL")
 
 
 
 
123
 
124
- transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
125
- summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
126
- summary_button = gr.Button("📝 Generate Summary")
127
 
128
- video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
129
- url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
130
- summary_button.click(generate_summary, inputs=[transcription_output], outputs=[summary_output])
131
-
132
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from phi.agent import Agent
3
+ from phi.model.google import Gemini
4
+ from phi.tools.duckduckgo import DuckDuckGo
5
+ from google.generativeai import upload_file, get_file
6
+ import google.generativeai as genai
 
7
  import time
8
+ from pathlib import Path
9
+ import tempfile
10
+ from dotenv import load_dotenv
11
+ import os
12
+ from phi.model.groq import Groq
13
+ from phi.tools.youtube_tools import YouTubeTools
14
 
15
+ # Load environment variables
16
+ load_dotenv()
 
 
 
 
 
 
17
 
18
+ # Configure API keys
19
+ API_KEY = os.getenv("GOOGLE_API_KEY")
20
+ groq_api_key = os.getenv("GROQ_API_KEY")
21
+ if API_KEY:
22
+ genai.configure(api_key=API_KEY)
23
 
24
+ # Page configuration
25
+ st.set_page_config(
26
+ page_title="Multimodal AI Applications",
27
+ page_icon="🌐",
28
+ layout="wide"
29
+ )
30
 
31
+ # Custom CSS for UI Styling
32
+ def load_custom_css():
33
+ st.markdown(
34
+ """
35
+ <style>
36
+ .stButton>button {
37
+ width: 100%;
38
+ height: 50px;
39
+ font-size: 18px;
40
+ font-weight: bold;
41
+ background: rgba(255, 255, 255, 0.2);
42
+ border-radius: 12px;
43
+ border: 2px solid rgba(255, 255, 255, 0.5);
44
+ box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.2);
45
+ }
46
+ .stTextInput>div>div>input, .stTextArea>div>textarea {
47
+ background: rgba(255, 255, 255, 0.1);
48
+ border-radius: 8px;
49
+ border: 1px solid rgba(255, 255, 255, 0.3);
50
+ color: white;
51
+ padding: 10px;
52
+ }
53
+ </style>
54
+ """,
55
+ unsafe_allow_html=True
56
+ )
57
 
58
+ load_custom_css()
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ st.markdown("# 🎥 Video Transcription and AI Summary")
61
+ st.markdown("Upload a video or provide a YouTube link to get a transcription and AI-generated summary.")
62
+
63
+ # Tabs for the two applications
64
+ tab1, tab2 = st.tabs(["📤 Video Upload", "🔗 YouTube Video Analysis"])
65
+
66
+ # Tab 1: Video Summarizer with Gemini
67
+ with tab1:
68
+ st.subheader("Phidata Video AI Summarizer Agent 🎥")
 
 
 
 
 
 
 
 
 
69
 
70
+ @st.cache_resource
71
+ def initialize_agent():
72
+ return Agent(
73
+ name="Video AI Summarizer",
74
+ model=Gemini(id="gemini-2.0-flash-exp"),
75
+ tools=[DuckDuckGo()],
76
+ markdown=True,
77
+ )
78
 
79
+ multimodal_Agent = initialize_agent()
 
80
 
81
+ video_file = st.file_uploader("Upload a video file", type=['mp4'])
 
 
 
 
 
 
 
 
 
82
 
83
+ if video_file:
84
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
85
+ temp_video.write(video_file.read())
86
+ video_path = temp_video.name
87
+
88
+ st.video(video_path, format="video/mp4", start_time=0)
 
 
 
89
 
90
+ user_query = st.text_area("What insights are you seeking from the video?", "")
91
+
92
+ if st.button("🚀 Analyze Video", key="analyze_video_button"):
93
+ if not user_query:
94
+ st.warning("Please enter a question or insight to analyze the video.")
95
+ else:
96
+ try:
97
+ with st.spinner("Processing video..."):
98
+ processed_video = upload_file(video_path)
99
+ while processed_video.state.name == "PROCESSING":
100
+ time.sleep(1)
101
+ processed_video = get_file(processed_video.name)
102
+
103
+ prompt = f"""
104
+ Analyze the uploaded video and provide a summary.
105
+ Respond to: {user_query}
106
+ """
107
+ response = multimodal_Agent.run(prompt, videos=[processed_video])
108
+ st.subheader("Analysis Result")
109
+ st.markdown(response.content)
110
+ except Exception as error:
111
+ st.error(f"Error: {error}")
112
+ finally:
113
+ Path(video_path).unlink(missing_ok=True)
114
+ else:
115
+ st.info("Upload a video file to begin analysis.")
116
 
117
+ # Tab 2: YouTube Video Analyzer with Groq
118
+ with tab2:
119
+ st.subheader("YouTube Video Analyzer 🎬")
 
 
 
 
120
 
121
+ try:
122
+ youtube_agent = Agent(
123
+ model=Groq(id="llama3-8b-8192", api_key=groq_api_key),
124
+ tools=[YouTubeTools(), DuckDuckGo()],
125
+ show_tool_calls=True,
126
+ get_video_captions=True,
127
+ get_video_data=True,
128
+ description="Analyze YouTube videos for content, key points, and web research.",
129
+ )
130
+ except Exception as e:
131
+ st.error(f"Error initializing the agent: {e}")
132
+ st.stop()
133
 
134
+ video_url = st.text_input("Enter YouTube Video URL:", "")
135
+ user_query = st.text_area("Enter your question about the video (optional):", "")
 
136
 
137
+ if st.button("✨ Analyze Video", key="analyze_video_button"):
138
+ if video_url:
139
+ with st.spinner("Analyzing..."):
140
+ try:
141
+ prompt = f"""
142
+ Analyze the YouTube video.
143
+ Provide a detailed summary with key points.
144
+ {f'Respond to: {user_query}' if user_query else ''}
145
+ Video URL: {video_url}
146
+ """
147
+ output = youtube_agent.run(prompt)
148
+ st.subheader("Analysis Result")
149
+ st.markdown(output.content)
150
+ except Exception as e:
151
+ st.error(f"Error: {e}")
152
+ else:
153
+ st.warning("Please enter a YouTube video URL.")