import streamlit as st from phi.agent import Agent from phi.model.google import Gemini from phi.tools.duckduckgo import DuckDuckGo from google.generativeai import upload_file, get_file import google.generativeai as genai import time from pathlib import Path import tempfile from dotenv import load_dotenv import os from phi.model.groq import Groq from phi.tools.youtube_tools import YouTubeTools # Load environment variables load_dotenv() # Configure API keys API_KEY = os.getenv("GOOGLE_API_KEY") groq_api_key = os.getenv("GROQ_API_KEY") if API_KEY: genai.configure(api_key=API_KEY) # Page configuration st.set_page_config( page_title="Multimodal AI Applications", page_icon="🌐", layout="wide" ) # Custom CSS for UI Styling def load_custom_css(): st.markdown( """ """, unsafe_allow_html=True ) load_custom_css() st.markdown("# 🎥 Video Transcription and AI Summary") st.markdown("Upload a video or provide a YouTube link to get a transcription and AI-generated summary.") # Tabs for the two applications tab1, tab2 = st.tabs(["📤 Video Upload", "🔗 YouTube Video Analysis"]) # Tab 1: Video Summarizer with Gemini with tab1: st.subheader("Phidata Video AI Summarizer Agent 🎥") @st.cache_resource def initialize_agent(): return Agent( name="Video AI Summarizer", model=Gemini(id="gemini-2.0-flash-exp"), tools=[DuckDuckGo()], markdown=True, ) multimodal_Agent = initialize_agent() video_file = st.file_uploader("Upload a video file", type=['mp4']) if video_file: with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video: temp_video.write(video_file.read()) video_path = temp_video.name st.video(video_path, format="video/mp4", start_time=0) user_query = st.text_area("What insights are you seeking from the video?", "") if st.button("🚀 Analyze Video", key="analyze_video_button"): if not user_query: st.warning("Please enter a question or insight to analyze the video.") else: try: with st.spinner("Processing video..."): processed_video = upload_file(video_path) while processed_video.state.name == "PROCESSING": time.sleep(1) processed_video = get_file(processed_video.name) prompt = f""" Analyze the uploaded video and provide a summary. Respond to: {user_query} """ response = multimodal_Agent.run(prompt, videos=[processed_video]) st.subheader("Analysis Result") st.markdown(response.content) except Exception as error: st.error(f"Error: {error}") finally: Path(video_path).unlink(missing_ok=True) else: st.info("Upload a video file to begin analysis.") # Tab 2: YouTube Video Analyzer with Groq with tab2: st.subheader("YouTube Video Analyzer 🎬") try: youtube_agent = Agent( model=Groq(id="llama3-8b-8192", api_key=groq_api_key), tools=[YouTubeTools(), DuckDuckGo()], show_tool_calls=True, get_video_captions=True, get_video_data=True, description="Analyze YouTube videos for content, key points, and web research.", ) except Exception as e: st.error(f"Error initializing the agent: {e}") st.stop() video_url = st.text_input("Enter YouTube Video URL:", "") user_query = st.text_area("Enter your question about the video (optional):", "") if st.button("✨ Analyze Video", key="analyze_video_button"): if video_url: with st.spinner("Analyzing..."): try: prompt = f""" Analyze the YouTube video. Provide a detailed summary with key points. {f'Respond to: {user_query}' if user_query else ''} Video URL: {video_url} """ output = youtube_agent.run(prompt) st.subheader("Analysis Result") st.markdown(output.content) except Exception as e: st.error(f"Error: {e}") else: st.warning("Please enter a YouTube video URL.")