Spaces:

abdullahzunorain
/

Audio-or-Video-Transcription-and-Summarization-Application

Running

App Files Files Community

Audio-or-Video-Transcription-and-Summarization-Application / app.py

abdullahzunorain

Update app.py

88eaaac verified 4 months ago

raw

history blame

11 kB

	import os
	import ffmpeg
	import whisper
	import streamlit as st
	from groq import Groq

	# Set the app title and description with styling
	st.set_page_config(page_title="Audio/Video Transcription & Summarization", page_icon="🎙️")
	st.title("🎙️ Audio/Video Transcription & Summarization")
	st.write("Easily upload an audio or video file to get a transcription and a quick summary.")

	# Add a sidebar for settings and instructions
	with st.sidebar:
	st.header("Settings")
	st.write("Configure app preferences here.")
	enable_summary = st.checkbox("Enable Summarization", value=True)
	st.info("Note: Summarization uses the Groq API.")

	# Retrieve the API key from environment variables or Streamlit secrets
	GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"]
	os.environ["GROQ_API_KEY"] = GROQ_API_KEY

	# Create a temporary directory
	temp_dir = "temp"
	os.makedirs(temp_dir, exist_ok=True)

	# Display file uploader with improved layout and style
	st.subheader("Upload Audio/Video File")
	uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"])

	# Function to extract audio from video
	def extract_audio(video_path, audio_path="temp/temp_audio.wav"):
	"""Extracts audio from video."""
	try:
	# Run ffmpeg command with stderr capture for better error handling
	ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
	except ffmpeg.Error as e:
	st.error("Error processing file with FFmpeg: " + e.stderr.decode())
	return audio_path

	# Function to transcribe audio using Whisper model
	def transcribe_audio(audio_path):
	"""Transcribes audio to text using Whisper model."""
	model = whisper.load_model("base")
	result = model.transcribe(audio_path)
	return result["text"]

	# Function to summarize text using Groq API
	def summarize_text(text):
	"""Summarizes text using Groq API."""
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
	response = client.chat.completions.create(
	messages=[{"role": "user", "content": f"Summarize the following text: {text}"}],
	model="llama3-8b-8192"
	)
	summary = response.choices[0].message.content
	return summary

	# Main processing function with progress indicators
	def process_media(media_file):
	"""Processes audio or video: extracts audio, transcribes it, and summarizes the transcription if enabled."""
	# Save the uploaded file to a temporary path
	temp_file_path = os.path.join(temp_dir, media_file.name)
	with open(temp_file_path, "wb") as f:
	f.write(media_file.getbuffer())

	# Determine if the file is a video or audio
	if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
	st.info("Extracting audio from video...")
	audio_path = extract_audio(temp_file_path)
	else:
	audio_path = temp_file_path # If already audio, use it as is

	# Transcribe audio to text with progress spinner
	with st.spinner("Transcribing audio..."):
	transcription = transcribe_audio(audio_path)
	st.success("Transcription completed!")
	st.write("### Transcription:")
	st.write(transcription)

	# Summarize transcription if enabled
	if enable_summary:
	with st.spinner("Generating summary..."):
	summary = summarize_text(transcription)
	st.success("Summary generated!")
	st.write("### Summary:")
	st.write(summary)

	# Cleanup temporary files
	os.remove(temp_file_path)
	if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
	os.remove(audio_path)

	# Run the app and handle file upload state
	if uploaded_file is not None:
	st.info("Processing your file...")
	process_media(uploaded_file)
	else:
	st.warning("Please upload an audio or video file to begin.")













	# # # Set your Groq API key here or use environment variable
	# # GROQ_API_TOKEN = os.getenv("groq_api")
	# # client = Groq(api_key=GROQ_API_TOKEN)

	# import os
	# import ffmpeg
	# import whisper
	# import streamlit as st
	# from groq import Groq

	# # Set the title and description of the app
	# st.title("Audio/Video Transcription and Summarization")
	# st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.")

	# # Retrieve the API key from environment variables or Streamlit secrets
	# GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"]
	# os.environ["GROQ_API_KEY"] = GROQ_API_KEY

	# # Create a temporary directory if it does not exist
	# temp_dir = "temp"
	# os.makedirs(temp_dir, exist_ok=True)

	# # Upload the audio or video file
	# uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"])

	# # Function to extract audio from video
	# def extract_audio(video_path, audio_path="temp/temp_audio.wav"):
	# """Extracts audio from video."""
	# try:
	# # Run ffmpeg command with stderr capture for better error handling
	# ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
	# except ffmpeg.Error as e:
	# st.error("FFmpeg error encountered: " + e.stderr.decode())
	# return audio_path

	# # Function to transcribe audio to text using Whisper model
	# def transcribe_audio(audio_path):
	# """Transcribes audio to text using Whisper model."""
	# model = whisper.load_model("base") # Load the Whisper model
	# result = model.transcribe(audio_path)
	# return result["text"]

	# # Function to summarize text using Groq API
	# def summarize_text(text):
	# """Summarizes text using Groq API."""
	# client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
	# response = client.chat.completions.create(
	# messages=[{"role": "user", "content": f"Summarize the following text: {text}"}],
	# model="llama3-8b-8192"
	# )
	# summary = response.choices[0].message.content
	# return summary

	# # Complete function to process audio or video
	# def process_media(media_file):
	# """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription."""
	# # Save the uploaded file to a temporary path
	# temp_file_path = os.path.join(temp_dir, media_file.name)
	# with open(temp_file_path, "wb") as f:
	# f.write(media_file.getbuffer())

	# # Determine if the file is a video or audio based on the file extension
	# if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
	# # Step 1: Extract audio from video
	# audio_path = extract_audio(temp_file_path)
	# else:
	# audio_path = temp_file_path # If it's already audio, use it as is

	# # Step 2: Transcribe audio to text
	# transcription = transcribe_audio(audio_path)
	# st.write("### Transcription:")
	# st.write(transcription)

	# # Step 3: Summarize transcription
	# summary = summarize_text(transcription)
	# st.write("### Summary:")
	# st.write(summary)

	# # Clean up temporary files if needed
	# os.remove(temp_file_path)
	# if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
	# os.remove(audio_path)

	# # Run the app
	# if uploaded_file is not None:
	# process_media(uploaded_file)
	# else:
	# st.warning("Please upload a file.")




	# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------




	# import os
	# import ffmpeg
	# import whisper
	# import streamlit as st
	# from groq import Groq

	# # Set the title and description of the app
	# st.title("Audio/Video Transcription and Summarization")
	# st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.")

	# # Get the API key from user input (You may want to use Streamlit secrets management)
	# GROQ_API_KEY = st.text_input("Enter your Groq API Key:")
	# os.environ["GROQ_API_KEY"] = GROQ_API_KEY

	# # Create a temporary directory if it does not exist
	# temp_dir = "temp"
	# os.makedirs(temp_dir, exist_ok=True)

	# # Upload the audio or video file
	# uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"])

	# # Function to extract audio from video
	# def extract_audio(video_path, audio_path="temp/temp_audio.wav"):
	# """Extracts audio from video."""
	# try:
	# # Run ffmpeg command with stderr capture for better error handling
	# ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
	# except ffmpeg.Error as e:
	# st.error("FFmpeg error encountered: " + e.stderr.decode())
	# return audio_path

	# # Function to transcribe audio to text using Whisper model
	# def transcribe_audio(audio_path):
	# """Transcribes audio to text using Whisper model."""
	# model = whisper.load_model("base") # Load the Whisper model
	# result = model.transcribe(audio_path)
	# return result["text"]

	# # Function to summarize text using Groq API
	# def summarize_text(text):
	# """Summarizes text using Groq API."""
	# client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
	# response = client.chat.completions.create(
	# messages=[{"role": "user", "content": f"Summarize the following text: {text}"}],
	# model="llama3-8b-8192"
	# )
	# summary = response.choices[0].message.content
	# return summary

	# # Complete function to process audio or video
	# def process_media(media_file):
	# """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription."""
	# # Save the uploaded file to a temporary path
	# temp_file_path = os.path.join(temp_dir, media_file.name)
	# with open(temp_file_path, "wb") as f:
	# f.write(media_file.getbuffer())

	# # Determine if the file is a video or audio based on the file extension
	# if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
	# # Step 1: Extract audio from video
	# audio_path = extract_audio(temp_file_path)
	# else:
	# audio_path = temp_file_path # If it's already audio, use it as is

	# # Step 2: Transcribe audio to text
	# transcription = transcribe_audio(audio_path)
	# st.write("### Transcription:")
	# st.write(transcription)

	# # Step 3: Summarize transcription
	# summary = summarize_text(transcription)
	# st.write("### Summary:")
	# st.write(summary)

	# # Clean up temporary files if needed
	# os.remove(temp_file_path)
	# if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
	# os.remove(audio_path)

	# # Run the app
	# if uploaded_file is not None and GROQ_API_KEY:
	# process_media(uploaded_file)
	# else:
	# st.warning("Please upload a file and enter your Groq API key.")