Spaces:

HARISH20205
/

Speech-Summarize

Sleeping

App Files Files Community

HARISH20205 commited on Mar 17

Commit

fe98768

1 Parent(s): e0e1c9c

deploy

Browse files

Files changed (6) hide show

Dockerfile +26 -0
app.py +206 -0
requirements.txt +9 -0
static/css/styles.css +151 -0
static/js/scripts.js +67 -0
templates/index.html +42 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM python:3.10-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    build-essential \
+    libsndfile1 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+RUN mkdir -p /app/static/audio
+EXPOSE 7860
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    MODEL_NAME="google/pegasus-xsum"
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+from flask import Flask, request, jsonify, render_template
+import whisper
+from pydub import AudioSegment
+import os
+import io
+import numpy as np
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import math
+from yt_dlp import YoutubeDL
+import logging
+from functools import lru_cache
+from dotenv import load_dotenv
+import time
+import re
+import tempfile
+load_dotenv()
+app = Flask(__name__)
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+# Model setup
+MODEL_NAME = "google/pegasus-xsum"
+# Function to convert audio bytes to MP3 format in memory
+def convert_audio_to_mp3(audio_bytes, original_format=None):
+    try:
+        logging.info(f"Converting audio from {original_format} to MP3 in memory...")
+        audio = AudioSegment.from_file(io.BytesIO(audio_bytes), format=original_format)
+        buffer = io.BytesIO()
+        audio.export(buffer, format="mp3")
+        buffer.seek(0)
+        logging.info("Conversion successful")
+        return buffer
+    except Exception as e:
+        logging.error(f"Error converting audio to MP3: {e}")
+        raise ValueError(f"Error converting audio to MP3: {e}")
+# Function to load Whisper model
+@lru_cache(maxsize=1)
+def load_whisper_model():
+    return whisper.load_model("base")
+# Function to load Pegasus model
+@lru_cache(maxsize=1)
+def load_pegasus_model():
+    tokenizer = PegasusTokenizer.from_pretrained(MODEL_NAME)
+    model = PegasusForConditionalGeneration.from_pretrained(MODEL_NAME)
+    return tokenizer, model
+# Function to transcribe audio using Whisper
+def transcribe_audio_with_whisper(audio_data):
+    try:
+        logging.info("Transcribing audio data")
+        model = load_whisper_model()
+        # Create a temporary file for Whisper (which requires a file path)
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as temp_file:
+            if isinstance(audio_data, io.BytesIO):
+                temp_file.write(audio_data.getvalue())
+            else:
+                temp_file.write(audio_data)
+            temp_file.flush()
+            # Transcribe using the temporary file
+            result = model.transcribe(temp_file.name)
+        return result["text"]
+    except Exception as e:
+        logging.error(f"Error in audio transcription: {e}")
+        raise ValueError(f"Error in audio transcription: {e}")
+# Function to summarize text using Pegasus
+def summarize_text_with_pegasus(text, tokenizer, model):
+    try:
+        inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
+        total_tokens = len(inputs["input_ids"][0])
+        min_summary_length = max(math.ceil(total_tokens / 4), 75)
+        max_summary_length = max(math.ceil(total_tokens / 3), 200)
+        if min_summary_length >= max_summary_length:
+            min_summary_length = max_summary_length - 1
+        summary_ids = model.generate(
+            inputs.input_ids,
+            num_beams=5,
+            min_length=min_summary_length,
+            max_length=max_summary_length,
+            early_stopping=True
+        )
+        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+        summary = remove_repeated_sentences(summary)  # Remove repeated sentences from summary
+        return summary
+    except Exception as e:
+        logging.error(f"Error in text summarization: {e}")
+        raise ValueError(f"Error in text summarization: {e}")
+# Function to download audio from YouTube using yt_dlp (in memory)
+def download_audio_from_youtube(url):
+    # Create a buffer to store the downloaded audio
+    buffer = io.BytesIO()
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+        # Use temp directory for intermediate files
+        'outtmpl': '-',
+        'logtostderr': True,
+        'quiet': True,
+        'no_warnings': True,
+        # Stream to stdout and capture
+        'extract_audio': True,
+    }
+    try:
+        logging.info(f"Downloading audio from YouTube: {url}")
+        # Create temp file for YouTube-DL (it needs a file path)
+        with tempfile.NamedTemporaryFile(suffix=".%(ext)s") as temp_file:
+            ydl_opts['outtmpl'] = temp_file.name
+            with YoutubeDL(ydl_opts) as ydl:
+                # Extract info and download
+                info = ydl.extract_info(url, download=True)
+                # Get the filename of the downloaded audio
+                audio_file_path = ydl.prepare_filename(info).replace('.webm', '.mp3').replace('.m4a', '.mp3')
+                # Read the file into memory
+                with open(audio_file_path, 'rb') as audio_file:
+                    buffer = io.BytesIO(audio_file.read())
+                    buffer.seek(0)
+        return buffer
+    except Exception as e:
+        logging.error(f"Unexpected error downloading audio: {e}")
+        raise ValueError(f"Error downloading audio from YouTube: {e}")
+# Function to check allowed file extensions
+def allowed_file(filename):
+    ALLOWED_EXTENSIONS = {'mp3', 'aac', 'flac', 'm4a'}
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+# Function to remove repeated sentences
+def remove_repeated_sentences(text):
+    sentences = re.split(r'(?<=[.!?]) +', text)  # Split by sentence-ending punctuation
+    unique_sentences = []
+    seen_sentences = set()
+    for sentence in sentences:
+        normalized_sentence = sentence.lower().strip()
+        if normalized_sentence not in seen_sentences:
+            unique_sentences.append(sentence)
+            seen_sentences.add(normalized_sentence)
+    return ' '.join(unique_sentences)
+# Route to render index.html template
+@app.route('/')
+def index():
+    return render_template('index.html')
+# Route to handle transcription and summarization
+@app.route('/transcribe', methods=['POST'])
+def transcribe():
+    try:
+        audio_data = None
+        if 'url' in request.form and request.form['url']:
+            youtube_url = request.form['url']
+            audio_data = download_audio_from_youtube(youtube_url)
+        elif 'file' in request.files:
+            audio_file = request.files['file']
+            if not audio_file.filename:
+                return jsonify({"error": "No file selected."}), 400
+            if not allowed_file(audio_file.filename):
+                return jsonify({"error": "Invalid file type. Please upload an audio file."}), 400
+            # Read file data into memory
+            audio_bytes = audio_file.read()
+            file_format = audio_file.filename.rsplit('.', 1)[1].lower()
+            audio_data = convert_audio_to_mp3(audio_bytes, original_format=file_format)
+        else:
+            return jsonify({"error": "No audio file or URL provided."}), 400
+        transcription = transcribe_audio_with_whisper(audio_data)
+        if transcription:
+            tokenizer, model = load_pegasus_model()
+            summary = summarize_text_with_pegasus(transcription, tokenizer, model)
+            return jsonify({"transcription": transcription, "summary": summary})
+        else:
+            return jsonify({"error": "Transcription failed."}), 500
+    except ValueError as e:
+        return jsonify({"error": str(e)}), 400
+    except Exception as e:
+        logging.error(f"An unexpected error occurred: {e}")
+        return jsonify({"error": "An unexpected error occurred."}), 500
+if __name__ == "__main__":
+    app.run(debug=True, port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+flask==2.3.3
+openai-whisper==20231117
+pydub==0.25.1
+transformers==4.36.2
+yt-dlp==2023.11.16
+python-dotenv==1.0.0
+torch==2.1.1
+gunicorn==21.2.0
+numpy==1.24.2

static/css/styles.css ADDED Viewed

	@@ -0,0 +1,151 @@

+/* General styling */
+body {
+    font-family: 'Arial', sans-serif;
+    margin: 0;
+    padding: 0;
+    background-color: #121212;
+    color: #ffffff;
+}
+h1 {
+    color: #ffffff;
+    text-align: center;
+    margin-top: 20px;
+    font-size: 2.5em;
+    padding: 10px;
+}
+.container {
+    display: flex;
+    justify-content: space-between;
+    padding: 20px;
+    background-color: #1e1e1e;
+    margin: 20px auto;
+    max-width: 1200px;
+    box-shadow: 0 0 10px rgba(0, 0, 0, 0.5);
+    border-radius: 8px;
+}
+.left-container, .right-container {
+    flex: 1;
+    margin: 10px;
+    display: flex;
+    flex-direction: column;
+    justify-content: flex-start;
+}
+form {
+    display: flex;
+    flex-direction: column;
+}
+.input-container {
+    display: flex;
+    flex-direction: column;
+    margin-bottom: 20px;
+}
+.input-container input[type="text"],
+.input-container input[type="file"] {
+    padding: 10px;
+    margin: 5px 0;
+    border: 1px solid #ccc;
+    border-radius: 5px;
+    box-sizing: border-box;
+    background-color: #2e2e2e;
+    color: #ffffff;
+}
+.input-container input[type="submit"],
+.input-container button {
+    background-color: #007bff;
+    color: #fff;
+    border: none;
+    padding: 10px 20px;
+    cursor: pointer;
+    border-radius: 5px;
+    transition: background-color 0.3s ease;
+    margin-top: 10px;
+}
+.input-container input[type="submit"]:hover,
+.input-container button:hover {
+    background-color: #0056b3;
+}
+#processing {
+    display: none;
+    color: #007bff;
+    font-size: 1.2em;
+    margin-top: 20px;
+}
+/* Summary and Transcription */
+.response-box {
+    padding: 20px;
+    background-color: #2e2e2e;
+    border-radius: 8px;
+    margin-top: 20px;
+    box-shadow: 0 0 10px rgba(0, 0, 0, 0.5);
+    overflow-wrap: break-word;
+}
+.response-box h2 {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    margin-top: 0;
+    font-size: 1.5em;
+    color: #ffffff;
+}
+.response-box p {
+    margin: 10px 0 0;
+    font-size: 1em;
+    color: #b0b0b0;
+}
+#summary, #transcription {
+    margin-top: 10px;
+}
+.copy-btn {
+    background-color: #007bff;
+    color: #fff;
+    border: none;
+    padding: 5px 10px;
+    cursor: pointer;
+    border-radius: 5px;
+    font-size: 0.7em;
+    transition: background-color 0.3s ease;
+}
+.copy-btn:hover {
+    background-color: #0056b3;
+}
+/* Responsive styling */
+@media (max-width: 768px) {
+    .container {
+        flex-direction: column;
+        align-items: center;
+    }
+    .left-container, .right-container {
+        width: 100%;
+    }
+}
+@media (max-width: 480px) {
+    h1 {
+        font-size: 2em;
+    }
+    .input-container input[type="text"],
+    .input-container input[type="file"],
+    .input-container input[type="submit"],
+    .input-container button {
+        width: 100%;
+        margin: 5px 0;
+    }
+}

static/js/scripts.js ADDED Viewed

	@@ -0,0 +1,67 @@

+document.getElementById("transcribeForm").addEventListener("submit", function(event) {
+    event.preventDefault();
+    var form = event.target;
+    var formData = new FormData(form);
+    var xhr = new XMLHttpRequest();
+    // Display the processing message before sending the request
+    document.getElementById("processing").style.display = "block";
+    document.getElementById("processing").textContent = "Processing...";
+    xhr.open("POST", form.action, true);
+    xhr.onreadystatechange = function() {
+        if (xhr.readyState === XMLHttpRequest.DONE) {
+            document.getElementById("processing").style.display = "none";
+            var response = JSON.parse(xhr.responseText);
+            if (xhr.status === 200) {
+                document.getElementById("summary-content").textContent = response.summary;
+                document.getElementById("transcription-content").textContent = response.transcription;
+            } else {
+                document.getElementById("processing").style.display = "block";
+                document.getElementById("processing").textContent = response.error;
+            }
+            document.getElementById("youtube-url").disabled = false;
+            document.getElementById("file-input").disabled = false;
+        }
+    };
+    xhr.send(formData);
+    document.getElementById("youtube-url").disabled = true;
+    document.getElementById("file-input").disabled = true;
+    document.getElementById("summary-content").textContent = "Summary content will appear here...";
+    document.getElementById("transcription-content").textContent = "Transcription content will appear here...";
+});
+document.getElementById('clear-btn').addEventListener('click', function() {
+    document.getElementById('youtube-url').value = '';
+    document.getElementById('file-input').value = '';
+    document.getElementById("summary-content").textContent = "Summary content will appear here...";
+    document.getElementById("transcription-content").textContent = "Transcription content will appear here...";
+    document.getElementById("processing").style.display = "none";
+    document.getElementById("processing").textContent = "Processing...";
+});
+document.getElementById('copy-summary-btn').addEventListener('click', function() {
+    copyToClipboard('summary-content');
+});
+document.getElementById('copy-transcription-btn').addEventListener('click', function() {
+    copyToClipboard('transcription-content');
+});
+function copyToClipboard(elementId) {
+    var text = document.getElementById(elementId).textContent;
+    var textarea = document.createElement("textarea");
+    textarea.value = text;
+    document.body.appendChild(textarea);
+    textarea.select();
+    document.execCommand("copy");
+    document.body.removeChild(textarea);
+    alert("Copied to clipboard");
+}

templates/index.html ADDED Viewed

	@@ -0,0 +1,42 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Speech-to-Text Summarization</title>
+    <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/styles.css') }}">
+</head>
+<body>
+    <h1>Speech to Text Summarization</h1>
+    <div class="container">
+        <div class="left-container">
+            <form id="transcribeForm" action="/transcribe" method="post" enctype="multipart/form-data">
+                <div class="input-container">
+                    <input type="text" id="youtube-url" name="url" placeholder="Enter YouTube URL">
+                </div>
+                <div class="input-container">
+                    <input type="file" id="file-input" name="file" accept="audio/*">
+                </div>
+                <div class="input-container">
+                    <input type="submit" value="Transcribe and Summarize">
+                </div>
+                <div class="input-container">
+                    <button type="button" id="clear-btn">Clear</button>
+                </div>
+            </form>
+            <div id="processing">Processing...</div>
+        </div>
+        <div class="right-container">
+            <div id="summary" class="response-box">
+                <h2>Summary <button id="copy-summary-btn" class="copy-btn">Copy</button></h2>
+                <p id="summary-content">Summary content will appear here...</p>
+            </div>
+            <div id="transcription" class="response-box">
+                <h2>Transcription <button id="copy-transcription-btn" class="copy-btn">Copy</button></h2>
+                <p id="transcription-content">Transcription content will appear here...</p>
+            </div>
+        </div>
+    </div>
+    <script src="{{ url_for('static', filename='js/scripts.js') }}"></script>
+</body>
+</html>