Spaces:

Deepakkori45
/

AudioBot

Sleeping

App Files Files Community

Deepakkori45 commited on Feb 2

Commit

1c336cb

verified ·

1 Parent(s): 5bfb1a0

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -43

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import streamlit as st
-import openai
 import os
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
@@ -7,12 +6,22 @@ from dotenv import load_dotenv
 from tempfile import NamedTemporaryFile
 import math
 from docx import Document
-# Load environment variables from .env file
 load_dotenv()
-# Set your OpenAI API key
-openai.api_key = os.getenv("OPENAI_API_KEY")
 def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
     """
@@ -20,7 +29,7 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
     Args:
         audio_file_path (str): Path to the audio file.
-        min_silence_len (int): Minimum length of silence (in ms) required to be used as a split point.
         silence_thresh (int): The volume (in dBFS) below which is considered silence.
         keep_silence (int): Amount of silence (in ms) to retain at the beginning and end of each chunk.
@@ -38,41 +47,35 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
 def transcribe(audio_file):
     """
-    Transcribe an audio file using the OpenAI Whisper model.
     Args:
         audio_file (str): Path to the audio file.
     Returns:
         str: Transcribed text.
     """
-    with open(audio_file, "rb") as audio:
-        response = openai.audio.transcriptions.create(
-            model="whisper-1",
-            file=audio,
-            response_format="text",
-            language="en"  # Ensures transcription is in English
-        )
-    return response
 def process_audio_chunks(audio_chunks):
     """
-    Process and transcribe each audio chunk.
     Args:
         audio_chunks (list): List of AudioSegment chunks.
     Returns:
         str: Combined transcription from all chunks.
     """
     transcriptions = []
-    min_length_ms = 100  # Minimum length required by OpenAI API (0.1 seconds)
     for i, chunk in enumerate(audio_chunks):
         if len(chunk) < min_length_ms:
             st.warning(f"Chunk {i} is too short to be processed.")
             continue
         with NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
             chunk.export(temp_audio_file.name, format="wav")
             temp_audio_file_path = temp_audio_file.name
@@ -88,32 +91,22 @@ def process_audio_chunks(audio_chunks):
 def save_transcription_to_docx(transcription, audio_file_path):
     """
     Save the transcription as a .docx file.
     Args:
         transcription (str): Transcribed text.
         audio_file_path (str): Path to the original audio file for naming purposes.
     Returns:
         str: Path to the saved .docx file.
     """
-    # Extract the base name of the audio file (without extension)
     base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
-    # Create a new file name by appending "_full_transcription" with .docx extension
     output_file_name = f"{base_name}_full_transcription.docx"
-    # Create a new Document object
     doc = Document()
-    # Add the transcription text to the document
     doc.add_paragraph(transcription)
-    # Save the document in .docx format
     doc.save(output_file_name)
     return output_file_name
-st.title("Audio Transcription with OpenAI's Whisper")
 # Allow uploading of audio or video files
 uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
@@ -126,7 +119,6 @@ if uploaded_file is not None and st.session_state.transcription is None:
     # Save uploaded file temporarily
     file_extension = uploaded_file.name.split(".")[-1]
-    original_file_name = uploaded_file.name.rsplit('.', 1)[0]  # Get original file name without extension
     temp_audio_file = f"temp_audio_file.{file_extension}"
     with open(temp_audio_file, "wb") as f:
         f.write(uploaded_file.getbuffer())
@@ -135,27 +127,22 @@ if uploaded_file is not None and st.session_state.transcription is None:
     with st.spinner('Transcribing...'):
         audio_chunks = split_audio_on_silence(
             temp_audio_file,
-            min_silence_len=500,  # adjust based on your audio characteristics
-            silence_thresh=-40,   # adjust based on the ambient noise level
-            keep_silence=250      # optional: keeps a bit of silence at the edges
         )
         transcription = process_audio_chunks(audio_chunks)
         if transcription:
             st.session_state.transcription = transcription
             st.success('Transcription complete!')
-            # Save transcription to a Word (.docx) file
             output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
             st.session_state.output_docx_file = output_docx_file
-    # Clean up temporary file
     if os.path.exists(temp_audio_file):
         os.remove(temp_audio_file)
 if st.session_state.transcription:
     st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
-    # Download the transcription as a .docx file
     with open(st.session_state.output_docx_file, "rb") as docx_file:
         st.download_button(
             label="Download Transcription (.docx)",

 import streamlit as st
 import os
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
 from tempfile import NamedTemporaryFile
 import math
 from docx import Document
+import whisper
+# Load environment variables from .env file (if needed for other configurations)
 load_dotenv()
+@st.cache_resource
+def load_whisper_model():
+    """
+    Load the Whisper model once and cache it for future use.
+    You can choose the model size: "tiny", "base", "small", "medium", or "large".
+    """
+    model = whisper.load_model("base")
+    return model
+# Load the Whisper model globally so it’s only loaded once.
+model = load_whisper_model()
 def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
     """
     Args:
         audio_file_path (str): Path to the audio file.
+        min_silence_len (int): Minimum length of silence (in ms) required for a split.
         silence_thresh (int): The volume (in dBFS) below which is considered silence.
         keep_silence (int): Amount of silence (in ms) to retain at the beginning and end of each chunk.
 def transcribe(audio_file):
     """
+    Transcribe an audio file using the locally loaded Whisper model.
     Args:
         audio_file (str): Path to the audio file.
     Returns:
         str: Transcribed text.
     """
+    result = model.transcribe(audio_file, language="en")
+    return result["text"]
 def process_audio_chunks(audio_chunks):
     """
+    Process and transcribe each audio chunk in sequence.
     Args:
         audio_chunks (list): List of AudioSegment chunks.
     Returns:
         str: Combined transcription from all chunks.
     """
     transcriptions = []
+    min_length_ms = 100  # Minimum length required for processing
     for i, chunk in enumerate(audio_chunks):
         if len(chunk) < min_length_ms:
             st.warning(f"Chunk {i} is too short to be processed.")
             continue
+        # Save the chunk temporarily as a WAV file
         with NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
             chunk.export(temp_audio_file.name, format="wav")
             temp_audio_file_path = temp_audio_file.name
 def save_transcription_to_docx(transcription, audio_file_path):
     """
     Save the transcription as a .docx file.
     Args:
         transcription (str): Transcribed text.
         audio_file_path (str): Path to the original audio file for naming purposes.
     Returns:
         str: Path to the saved .docx file.
     """
     base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
     output_file_name = f"{base_name}_full_transcription.docx"
     doc = Document()
     doc.add_paragraph(transcription)
     doc.save(output_file_name)
     return output_file_name
+st.title("Audio Transcription with Whisper (Local)")
 # Allow uploading of audio or video files
 uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
     # Save uploaded file temporarily
     file_extension = uploaded_file.name.split(".")[-1]
     temp_audio_file = f"temp_audio_file.{file_extension}"
     with open(temp_audio_file, "wb") as f:
         f.write(uploaded_file.getbuffer())
     with st.spinner('Transcribing...'):
         audio_chunks = split_audio_on_silence(
             temp_audio_file,
+            min_silence_len=500,   # adjust based on your audio
+            silence_thresh=-40,    # adjust based on ambient noise level
+            keep_silence=250       # retains a bit of silence at the edges
         )
         transcription = process_audio_chunks(audio_chunks)
         if transcription:
             st.session_state.transcription = transcription
             st.success('Transcription complete!')
             output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
             st.session_state.output_docx_file = output_docx_file
     if os.path.exists(temp_audio_file):
         os.remove(temp_audio_file)
 if st.session_state.transcription:
     st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
     with open(st.session_state.output_docx_file, "rb") as docx_file:
         st.download_button(
             label="Download Transcription (.docx)",