Spaces:
Running
Running
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Dec 9 16:43:31 2024 | |
@author: Pradeep Kumar | |
""" | |
import whisper | |
import torch | |
import os | |
from flask import Flask, request, abort, jsonify, render_template | |
from deep_translator import GoogleTranslator | |
#%% | |
import subprocess | |
# List of packages to check versions for | |
packages = ["whisper", "torch", "os", "flask", "deep-translator"] | |
# Dictionary to store versions | |
package_versions = {} | |
for package in packages: | |
try: | |
# Run pip show to get version info | |
result = subprocess.run( | |
["pip", "show", package], | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
text=True | |
) | |
if result.returncode == 0: | |
# Parse the version from the output | |
for line in result.stdout.splitlines(): | |
if line.startswith("Version:"): | |
package_versions[package] = line.split(":", 1)[1].strip() | |
else: | |
package_versions[package] = "Not Installed" | |
except Exception as e: | |
package_versions[package] = f"Error: {str(e)}" | |
package_versions | |
#%% | |
# Check if NVIDIA GPU is available | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
# Initialize Flask app | |
app = Flask(__name__) | |
# Directories for transcripts | |
BASE_DIR = os.getcwd() | |
TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts') | |
# Ensure transcripts directory exists | |
def check_directory(path): | |
if not os.path.exists(path): | |
os.makedirs(path) | |
check_directory(TRANSCRIPTS_FOLDER) | |
def upload_page(): | |
""" | |
Render the upload page for audio file submission. | |
""" | |
return render_template('upload.html') | |
def process_audio(): | |
""" | |
Process audio directly from the destination using Whisper. | |
""" | |
if 'audio_file' not in request.files: | |
return abort(400, "No file part in the request.") | |
audio_file = request.files['audio_file'] | |
selected_language = request.form.get('language', None) | |
model_type = request.form.get('model_type', "base") | |
if not audio_file or audio_file.filename == '': | |
return abort(400, "No file selected for upload.") | |
# Save the uploaded file to a temporary location | |
temp_audio_path = os.path.join(BASE_DIR, audio_file.filename) | |
audio_file.save(temp_audio_path) | |
try: | |
# Load the Whisper model based on user selection | |
model = whisper.load_model(model_type, device=DEVICE) | |
except Exception as e: | |
return jsonify({"error": f"Failed to load Whisper model ({model_type}): {e}"}), 500 | |
try: | |
# Transcribe with the user-selected language | |
if selected_language: | |
result = model.transcribe(temp_audio_path,fp16=False, language=selected_language, verbose=False) | |
else: | |
return abort(400, "Language selection is required.") | |
# Save the transcription with timestamps | |
transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.filename}_transcript.txt") | |
with open(transcript_file, 'w', encoding='utf-8') as text_file: | |
for segment in result['segments']: | |
start_time = segment['start'] | |
end_time = segment['end'] | |
text = segment['text'] | |
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n") | |
if selected_language == 'nl': | |
text_en = GoogleTranslator(source='auto', target='en').translate(text) | |
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n") | |
# Return the transcription metadata | |
return jsonify({ | |
"message": "Transcription successful!", | |
"transcript_path": transcript_file, | |
"transcription_preview": result['text'] | |
}) | |
except Exception as e: | |
return jsonify({"error": f"Failed to process the audio file: {e}"}), 500 | |
finally: | |
# Clean up temporary audio file | |
if os.path.exists(temp_audio_path): | |
os.remove(temp_audio_path) | |
if __name__ == '__main__': | |
# Run the Flask application | |
app.run(debug=True) | |