import os import ffmpeg import whisper import streamlit as st from groq import Groq # Set the app title and description with styling st.set_page_config(page_title="Audio/Video Transcription & Summarization", page_icon="🎙️") st.title("🎙️ Audio/Video Transcription & Summarization") st.write("Easily upload an audio or video file to get a transcription and a quick summary.") # Add a sidebar for settings and instructions with st.sidebar: st.header("Settings") st.write("Configure app preferences here.") enable_summary = st.checkbox("Enable Summarization", value=True) st.info("Note: Summarization uses the Groq API.") # Retrieve the API key from environment variables or Streamlit secrets GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"] os.environ["GROQ_API_KEY"] = GROQ_API_KEY # Create a temporary directory temp_dir = "temp" os.makedirs(temp_dir, exist_ok=True) # Display file uploader with improved layout and style st.subheader("Upload Audio/Video File") uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"]) # Function to extract audio from video def extract_audio(video_path, audio_path="temp/temp_audio.wav"): """Extracts audio from video.""" try: # Run ffmpeg command with stderr capture for better error handling ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True) except ffmpeg.Error as e: st.error("Error processing file with FFmpeg: " + e.stderr.decode()) return audio_path # Function to transcribe audio using Whisper model def transcribe_audio(audio_path): """Transcribes audio to text using Whisper model.""" model = whisper.load_model("base") result = model.transcribe(audio_path) return result["text"] # Function to summarize text using Groq API def summarize_text(text): """Summarizes text using Groq API.""" client = Groq(api_key=os.environ.get("GROQ_API_KEY")) response = client.chat.completions.create( messages=[{"role": "user", "content": f"Summarize the following text: {text}"}], model="llama3-8b-8192" ) summary = response.choices[0].message.content return summary # Main processing function with progress indicators def process_media(media_file): """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription if enabled.""" # Save the uploaded file to a temporary path temp_file_path = os.path.join(temp_dir, media_file.name) with open(temp_file_path, "wb") as f: f.write(media_file.getbuffer()) # Determine if the file is a video or audio if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): st.info("Extracting audio from video...") audio_path = extract_audio(temp_file_path) else: audio_path = temp_file_path # If already audio, use it as is # Transcribe audio to text with progress spinner with st.spinner("Transcribing audio..."): transcription = transcribe_audio(audio_path) st.success("Transcription completed!") st.write("### Transcription:") st.write(transcription) # Summarize transcription if enabled if enable_summary: with st.spinner("Generating summary..."): summary = summarize_text(transcription) st.success("Summary generated!") st.write("### Summary:") st.write(summary) # Cleanup temporary files os.remove(temp_file_path) if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): os.remove(audio_path) # Run the app and handle file upload state if uploaded_file is not None: st.info("Processing your file...") process_media(uploaded_file) else: st.warning("Please upload an audio or video file to begin.") # # # Set your Groq API key here or use environment variable # # GROQ_API_TOKEN = os.getenv("groq_api") # # client = Groq(api_key=GROQ_API_TOKEN) # import os # import ffmpeg # import whisper # import streamlit as st # from groq import Groq # # Set the title and description of the app # st.title("Audio/Video Transcription and Summarization") # st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.") # # Retrieve the API key from environment variables or Streamlit secrets # GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"] # os.environ["GROQ_API_KEY"] = GROQ_API_KEY # # Create a temporary directory if it does not exist # temp_dir = "temp" # os.makedirs(temp_dir, exist_ok=True) # # Upload the audio or video file # uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"]) # # Function to extract audio from video # def extract_audio(video_path, audio_path="temp/temp_audio.wav"): # """Extracts audio from video.""" # try: # # Run ffmpeg command with stderr capture for better error handling # ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True) # except ffmpeg.Error as e: # st.error("FFmpeg error encountered: " + e.stderr.decode()) # return audio_path # # Function to transcribe audio to text using Whisper model # def transcribe_audio(audio_path): # """Transcribes audio to text using Whisper model.""" # model = whisper.load_model("base") # Load the Whisper model # result = model.transcribe(audio_path) # return result["text"] # # Function to summarize text using Groq API # def summarize_text(text): # """Summarizes text using Groq API.""" # client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # response = client.chat.completions.create( # messages=[{"role": "user", "content": f"Summarize the following text: {text}"}], # model="llama3-8b-8192" # ) # summary = response.choices[0].message.content # return summary # # Complete function to process audio or video # def process_media(media_file): # """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription.""" # # Save the uploaded file to a temporary path # temp_file_path = os.path.join(temp_dir, media_file.name) # with open(temp_file_path, "wb") as f: # f.write(media_file.getbuffer()) # # Determine if the file is a video or audio based on the file extension # if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): # # Step 1: Extract audio from video # audio_path = extract_audio(temp_file_path) # else: # audio_path = temp_file_path # If it's already audio, use it as is # # Step 2: Transcribe audio to text # transcription = transcribe_audio(audio_path) # st.write("### Transcription:") # st.write(transcription) # # Step 3: Summarize transcription # summary = summarize_text(transcription) # st.write("### Summary:") # st.write(summary) # # Clean up temporary files if needed # os.remove(temp_file_path) # if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): # os.remove(audio_path) # # Run the app # if uploaded_file is not None: # process_media(uploaded_file) # else: # st.warning("Please upload a file.") # --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # import os # import ffmpeg # import whisper # import streamlit as st # from groq import Groq # # Set the title and description of the app # st.title("Audio/Video Transcription and Summarization") # st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.") # # Get the API key from user input (You may want to use Streamlit secrets management) # GROQ_API_KEY = st.text_input("Enter your Groq API Key:") # os.environ["GROQ_API_KEY"] = GROQ_API_KEY # # Create a temporary directory if it does not exist # temp_dir = "temp" # os.makedirs(temp_dir, exist_ok=True) # # Upload the audio or video file # uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"]) # # Function to extract audio from video # def extract_audio(video_path, audio_path="temp/temp_audio.wav"): # """Extracts audio from video.""" # try: # # Run ffmpeg command with stderr capture for better error handling # ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True) # except ffmpeg.Error as e: # st.error("FFmpeg error encountered: " + e.stderr.decode()) # return audio_path # # Function to transcribe audio to text using Whisper model # def transcribe_audio(audio_path): # """Transcribes audio to text using Whisper model.""" # model = whisper.load_model("base") # Load the Whisper model # result = model.transcribe(audio_path) # return result["text"] # # Function to summarize text using Groq API # def summarize_text(text): # """Summarizes text using Groq API.""" # client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # response = client.chat.completions.create( # messages=[{"role": "user", "content": f"Summarize the following text: {text}"}], # model="llama3-8b-8192" # ) # summary = response.choices[0].message.content # return summary # # Complete function to process audio or video # def process_media(media_file): # """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription.""" # # Save the uploaded file to a temporary path # temp_file_path = os.path.join(temp_dir, media_file.name) # with open(temp_file_path, "wb") as f: # f.write(media_file.getbuffer()) # # Determine if the file is a video or audio based on the file extension # if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): # # Step 1: Extract audio from video # audio_path = extract_audio(temp_file_path) # else: # audio_path = temp_file_path # If it's already audio, use it as is # # Step 2: Transcribe audio to text # transcription = transcribe_audio(audio_path) # st.write("### Transcription:") # st.write(transcription) # # Step 3: Summarize transcription # summary = summarize_text(transcription) # st.write("### Summary:") # st.write(summary) # # Clean up temporary files if needed # os.remove(temp_file_path) # if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): # os.remove(audio_path) # # Run the app # if uploaded_file is not None and GROQ_API_KEY: # process_media(uploaded_file) # else: # st.warning("Please upload a file and enter your Groq API key.")