import os import ffmpeg import whisper import streamlit as st from groq import Groq # Set the title and description of the app st.title("Audio/Video Transcription and Summarization") st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.") # # Retrieve the API key from environment variables or Streamlit secrets # GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"] # os.environ["GROQ_API_KEY"] = GROQ_API_KEY # Set your Groq API key here or use environment variable GROQ_API_KEY = os.getenv("GROQ_API_KEY") # client = Groq(api_key=GROQ_API_KEY) # Create a temporary directory if it does not exist temp_dir = "temp" os.makedirs(temp_dir, exist_ok=True) # Upload the audio or video file uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"]) # Function to extract audio from video def extract_audio(video_path, audio_path="temp/temp_audio.wav"): """Extracts audio from video.""" try: # Run ffmpeg command with stderr capture for better error handling ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True) except ffmpeg.Error as e: st.error("FFmpeg error encountered: " + e.stderr.decode()) return audio_path # Function to transcribe audio to text using Whisper model def transcribe_audio(audio_path): """Transcribes audio to text using Whisper model.""" model = whisper.load_model("base") # Load the Whisper model result = model.transcribe(audio_path) return result["text"] # Function to summarize text using Groq API def summarize_text(text): """Summarizes text using Groq API.""" client = Groq(api_key=os.environ.get("GROQ_API_KEY")) response = client.chat.completions.create( # import os # import ffmpeg # import whisper # import streamlit as st # from groq import Groq # # Set the title and description of the app # st.title("Audio/Video Transcription and Summarization") # st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.") # # Get the API key from user input (You may want to use Streamlit secrets management) # GROQ_API_KEY = st.text_input("Enter your Groq API Key:") # os.environ["GROQ_API_KEY"] = GROQ_API_KEY # # Create a temporary directory if it does not exist # temp_dir = "temp" # os.makedirs(temp_dir, exist_ok=True) # # Upload the audio or video file # uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"]) # # Function to extract audio from video # def extract_audio(video_path, audio_path="temp/temp_audio.wav"): # """Extracts audio from video.""" # try: # # Run ffmpeg command with stderr capture for better error handling # ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True) # except ffmpeg.Error as e: # st.error("FFmpeg error encountered: " + e.stderr.decode()) # return audio_path # # Function to transcribe audio to text using Whisper model # def transcribe_audio(audio_path): # """Transcribes audio to text using Whisper model.""" # model = whisper.load_model("base") # Load the Whisper model # result = model.transcribe(audio_path) # return result["text"] # # Function to summarize text using Groq API # def summarize_text(text): # """Summarizes text using Groq API.""" # client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # response = client.chat.completions.create( # messages=[{"role": "user", "content": f"Summarize the following text: {text}"}], # model="llama3-8b-8192" # ) # summary = response.choices[0].message.content # return summary # # Complete function to process audio or video # def process_media(media_file): # """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription.""" # # Save the uploaded file to a temporary path # temp_file_path = os.path.join(temp_dir, media_file.name) # with open(temp_file_path, "wb") as f: # f.write(media_file.getbuffer()) # # Determine if the file is a video or audio based on the file extension # if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): # # Step 1: Extract audio from video # audio_path = extract_audio(temp_file_path) # else: # audio_path = temp_file_path # If it's already audio, use it as is # # Step 2: Transcribe audio to text # transcription = transcribe_audio(audio_path) # st.write("### Transcription:") # st.write(transcription) # # Step 3: Summarize transcription # summary = summarize_text(transcription) # st.write("### Summary:") # st.write(summary) # # Clean up temporary files if needed # os.remove(temp_file_path) # if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): # os.remove(audio_path) # # Run the app # if uploaded_file is not None and GROQ_API_KEY: # process_media(uploaded_file) # else: # st.warning("Please upload a file and enter your Groq API key.")