Spaces:

NLPV
/

BihariVernacular

Sleeping

App Files Files Community

NLPV commited on Feb 11

Commit

34a09fd

verified ·

1 Parent(s): 537e080

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -92

app.py CHANGED Viewed

@@ -1,56 +1,12 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Dec  9 16:43:31 2024
-@author: Pradeep Kumar
-"""
-import whisper
 import torch
 import os
-from flask import Flask, request, abort, jsonify, render_template
 from deep_translator import GoogleTranslator
-#%%
-import subprocess
-# List of packages to check versions for
-packages = ["whisper", "torch", "os", "flask", "deep-translator"]
-# Dictionary to store versions
-package_versions = {}
-for package in packages:
-    try:
-        # Run pip show to get version info
-        result = subprocess.run(
-            ["pip", "show", package],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True
-        )
-        if result.returncode == 0:
-            # Parse the version from the output
-            for line in result.stdout.splitlines():
-                if line.startswith("Version:"):
-                    package_versions[package] = line.split(":", 1)[1].strip()
-        else:
-            package_versions[package] = "Not Installed"
-    except Exception as e:
-        package_versions[package] = f"Error: {str(e)}"
-package_versions
-#%%
 # Check if NVIDIA GPU is available
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Initialize Flask app
-app = Flask(__name__)
 # Directories for transcripts
 BASE_DIR = os.getcwd()
 TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
@@ -62,74 +18,61 @@ def check_directory(path):
 check_directory(TRANSCRIPTS_FOLDER)
-@app.route('/')
-def upload_page():
-    """
-    Render the upload page for audio file submission.
-    """
-    return render_template('upload.html')
-@app.route('/process_audio', methods=['POST'])
-def process_audio():
     """
-    Process audio directly from the destination using Whisper.
     """
-    if 'audio_file' not in request.files:
-        return abort(400, "No file part in the request.")
-    audio_file = request.files['audio_file']
-    selected_language = request.form.get('language', None)
-    model_type = request.form.get('model_type', "base")
-    if not audio_file or audio_file.filename == '':
-        return abort(400, "No file selected for upload.")
-    # Save the uploaded file to a temporary location
-    temp_audio_path = os.path.join(BASE_DIR, audio_file.filename)
-    audio_file.save(temp_audio_path)
     try:
         # Load the Whisper model based on user selection
         model = whisper.load_model(model_type, device=DEVICE)
     except Exception as e:
-        return jsonify({"error": f"Failed to load Whisper model ({model_type}): {e}"}), 500
     try:
         # Transcribe with the user-selected language
-        if selected_language:
-            result = model.transcribe(temp_audio_path,fp16=False, language=selected_language, verbose=False)
-        else:
-            return abort(400, "Language selection is required.")
         # Save the transcription with timestamps
-        transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.filename}_transcript.txt")
         with open(transcript_file, 'w', encoding='utf-8') as text_file:
             for segment in result['segments']:
                 start_time = segment['start']
                 end_time = segment['end']
                 text = segment['text']
                 text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
-                if selected_language == 'nl':
                     text_en = GoogleTranslator(source='auto', target='en').translate(text)
                     text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
-        # Return the transcription metadata
-        return jsonify({
-            "message": "Transcription successful!",
-            "transcript_path": transcript_file,
-            "transcription_preview": result['text']
-        })
     except Exception as e:
-        return jsonify({"error": f"Failed to process the audio file: {e}"}), 500
-    finally:
-        # Clean up temporary audio file
-        if os.path.exists(temp_audio_path):
-            os.remove(temp_audio_path)
 if __name__ == '__main__':
-    # Run the Flask application
-    app.run(debug=True)

 import torch
 import os
+import gradio as gr
 from deep_translator import GoogleTranslator
+import whisper
 # Check if NVIDIA GPU is available
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # Directories for transcripts
 BASE_DIR = os.getcwd()
 TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
 check_directory(TRANSCRIPTS_FOLDER)
+def transcribe_and_translate(audio_file, selected_language, model_type="base"):
     """
+    Transcribe audio using Whisper and translate it into English if required.
+    :param audio_file: Path to the uploaded audio file
+    :param selected_language: Language code for transcription
+    :param model_type: Whisper model type (default is 'base')
+    :return: Transcription and translation
     """
+    if not audio_file:
+        return "No audio file uploaded."
     try:
         # Load the Whisper model based on user selection
         model = whisper.load_model(model_type, device=DEVICE)
     except Exception as e:
+        return f"Failed to load Whisper model ({model_type}): {e}"
     try:
         # Transcribe with the user-selected language
+        result = model.transcribe(audio_file, language=selected_language, verbose=False)
         # Save the transcription with timestamps
+        transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{os.path.basename(audio_file)}_transcript.txt")
+        translated_text = []
         with open(transcript_file, 'w', encoding='utf-8') as text_file:
             for segment in result['segments']:
                 start_time = segment['start']
                 end_time = segment['end']
                 text = segment['text']
                 text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
+                if selected_language in ['bn', 'mag', 'bho']:
                     text_en = GoogleTranslator(source='auto', target='en').translate(text)
+                    translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
                     text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
+        # Return the transcription and translation
+        return "\n".join(translated_text) if translated_text else result['text']
     except Exception as e:
+        return f"Failed to process the audio file: {e}"
+# Define the Gradio interface
+interface = gr.Interface(
+    fn=transcribe_and_translate,
+    inputs=[
+        gr.Audio(type="filepath", label="Upload Audio"),
+        gr.Dropdown(label="Select Language", choices=["bn", "mag", "bho", "en"], value="mai"),
+        gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
+    ],
+    outputs="text",
+    title="Maithili, Maghi, and Bhojpuri Transcription and Translation"
+)
 if __name__ == '__main__':
+    # Launch the Gradio interface
+    interface.launch()