Spaces:

abdullahzunorain
/

Audio-or-Video-Transcription-and-Summarization-Application

Running

Audio-or-Video-Transcription-and-Summarization-Application

File size: 10,984 Bytes

import os
import ffmpeg
import whisper
import streamlit as st
from groq import Groq

# Set the app title and description with styling
st.set_page_config(page_title="Audio/Video Transcription & Summarization", page_icon="🎙️")
st.title("🎙️ Audio/Video Transcription & Summarization")
st.write("Easily upload an audio or video file to get a transcription and a quick summary.")

# Add a sidebar for settings and instructions
with st.sidebar:
    st.header("Settings")
    st.write("Configure app preferences here.")
    enable_summary = st.checkbox("Enable Summarization", value=True)
    st.info("Note: Summarization uses the Groq API.")

# Retrieve the API key from environment variables or Streamlit secrets
GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"]
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

# Create a temporary directory
temp_dir = "temp"
os.makedirs(temp_dir, exist_ok=True)

# Display file uploader with improved layout and style
st.subheader("Upload Audio/Video File")
uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"])

# Function to extract audio from video
def extract_audio(video_path, audio_path="temp/temp_audio.wav"):
    """Extracts audio from video."""
    try:
        # Run ffmpeg command with stderr capture for better error handling
        ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
    except ffmpeg.Error as e:
        st.error("Error processing file with FFmpeg: " + e.stderr.decode())
    return audio_path

# Function to transcribe audio using Whisper model
def transcribe_audio(audio_path):
    """Transcribes audio to text using Whisper model."""
    model = whisper.load_model("base")
    result = model.transcribe(audio_path)
    return result["text"]

# Function to summarize text using Groq API
def summarize_text(text):
    """Summarizes text using Groq API."""
    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": f"Summarize the following text: {text}"}],
        model="llama3-8b-8192"
    )
    summary = response.choices[0].message.content
    return summary

# Main processing function with progress indicators
def process_media(media_file):
    """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription if enabled."""
    # Save the uploaded file to a temporary path
    temp_file_path = os.path.join(temp_dir, media_file.name)
    with open(temp_file_path, "wb") as f:
        f.write(media_file.getbuffer())

    # Determine if the file is a video or audio
    if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
        st.info("Extracting audio from video...")
        audio_path = extract_audio(temp_file_path)
    else:
        audio_path = temp_file_path  # If already audio, use it as is

    # Transcribe audio to text with progress spinner
    with st.spinner("Transcribing audio..."):
        transcription = transcribe_audio(audio_path)
    st.success("Transcription completed!")
    st.write("### Transcription:")
    st.write(transcription)
    
    # Summarize transcription if enabled
    if enable_summary:
        with st.spinner("Generating summary..."):
            summary = summarize_text(transcription)
        st.success("Summary generated!")
        st.write("### Summary:")
        st.write(summary)

    # Cleanup temporary files
    os.remove(temp_file_path)
    if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
        os.remove(audio_path)

# Run the app and handle file upload state
if uploaded_file is not None:
    st.info("Processing your file...")
    process_media(uploaded_file)
else:
    st.warning("Please upload an audio or video file to begin.")













# # # Set your Groq API key here or use environment variable
# # GROQ_API_TOKEN = os.getenv("groq_api")
# # client = Groq(api_key=GROQ_API_TOKEN)

# import os
# import ffmpeg
# import whisper
# import streamlit as st
# from groq import Groq

# # Set the title and description of the app
# st.title("Audio/Video Transcription and Summarization")
# st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.")

# # Retrieve the API key from environment variables or Streamlit secrets
# GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"]
# os.environ["GROQ_API_KEY"] = GROQ_API_KEY

# # Create a temporary directory if it does not exist
# temp_dir = "temp"
# os.makedirs(temp_dir, exist_ok=True)

# # Upload the audio or video file
# uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"])

# # Function to extract audio from video
# def extract_audio(video_path, audio_path="temp/temp_audio.wav"):
#     """Extracts audio from video."""
#     try:
#         # Run ffmpeg command with stderr capture for better error handling
#         ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
#     except ffmpeg.Error as e:
#         st.error("FFmpeg error encountered: " + e.stderr.decode())
#     return audio_path

# # Function to transcribe audio to text using Whisper model
# def transcribe_audio(audio_path):
#     """Transcribes audio to text using Whisper model."""
#     model = whisper.load_model("base")  # Load the Whisper model
#     result = model.transcribe(audio_path)
#     return result["text"]

# # Function to summarize text using Groq API
# def summarize_text(text):
#     """Summarizes text using Groq API."""
#     client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
#     response = client.chat.completions.create(
#         messages=[{"role": "user", "content": f"Summarize the following text: {text}"}],
#         model="llama3-8b-8192"
#     )
#     summary = response.choices[0].message.content
#     return summary

# # Complete function to process audio or video
# def process_media(media_file):
#     """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription."""
#     # Save the uploaded file to a temporary path
#     temp_file_path = os.path.join(temp_dir, media_file.name)
#     with open(temp_file_path, "wb") as f:
#         f.write(media_file.getbuffer())

#     # Determine if the file is a video or audio based on the file extension
#     if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
#         # Step 1: Extract audio from video
#         audio_path = extract_audio(temp_file_path)
#     else:
#         audio_path = temp_file_path  # If it's already audio, use it as is

#     # Step 2: Transcribe audio to text
#     transcription = transcribe_audio(audio_path)
#     st.write("### Transcription:")
#     st.write(transcription)
    
#     # Step 3: Summarize transcription
#     summary = summarize_text(transcription)
#     st.write("### Summary:")
#     st.write(summary)
    
#     # Clean up temporary files if needed
#     os.remove(temp_file_path)
#     if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
#         os.remove(audio_path)

# # Run the app
# if uploaded_file is not None:
#     process_media(uploaded_file)
# else:
#     st.warning("Please upload a file.")




# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------




# import os
# import ffmpeg
# import whisper
# import streamlit as st
# from groq import Groq

# # Set the title and description of the app
# st.title("Audio/Video Transcription and Summarization")
# st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.")

# # Get the API key from user input (You may want to use Streamlit secrets management)
# GROQ_API_KEY = st.text_input("Enter your Groq API Key:")
# os.environ["GROQ_API_KEY"] = GROQ_API_KEY

# # Create a temporary directory if it does not exist
# temp_dir = "temp"
# os.makedirs(temp_dir, exist_ok=True)

# # Upload the audio or video file
# uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"])

# # Function to extract audio from video
# def extract_audio(video_path, audio_path="temp/temp_audio.wav"):
#     """Extracts audio from video."""
#     try:
#         # Run ffmpeg command with stderr capture for better error handling
#         ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
#     except ffmpeg.Error as e:
#         st.error("FFmpeg error encountered: " + e.stderr.decode())
#     return audio_path

# # Function to transcribe audio to text using Whisper model
# def transcribe_audio(audio_path):
#     """Transcribes audio to text using Whisper model."""
#     model = whisper.load_model("base")  # Load the Whisper model
#     result = model.transcribe(audio_path)
#     return result["text"]

# # Function to summarize text using Groq API
# def summarize_text(text):
#     """Summarizes text using Groq API."""
#     client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
#     response = client.chat.completions.create(
#         messages=[{"role": "user", "content": f"Summarize the following text: {text}"}],
#         model="llama3-8b-8192"
#     )
#     summary = response.choices[0].message.content
#     return summary

# # Complete function to process audio or video
# def process_media(media_file):
#     """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription."""
#     # Save the uploaded file to a temporary path
#     temp_file_path = os.path.join(temp_dir, media_file.name)
#     with open(temp_file_path, "wb") as f:
#         f.write(media_file.getbuffer())

#     # Determine if the file is a video or audio based on the file extension
#     if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
#         # Step 1: Extract audio from video
#         audio_path = extract_audio(temp_file_path)
#     else:
#         audio_path = temp_file_path  # If it's already audio, use it as is

#     # Step 2: Transcribe audio to text
#     transcription = transcribe_audio(audio_path)
#     st.write("### Transcription:")
#     st.write(transcription)
    
#     # Step 3: Summarize transcription
#     summary = summarize_text(transcription)
#     st.write("### Summary:")
#     st.write(summary)
    
#     # Clean up temporary files if needed
#     os.remove(temp_file_path)
#     if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
#         os.remove(audio_path)

# # Run the app
# if uploaded_file is not None and GROQ_API_KEY:
#     process_media(uploaded_file)
# else:
#     st.warning("Please upload a file and enter your Groq API key.")