# -*- coding: utf-8 -*- """ Created on Mon Dec 9 16:43:31 2024 @author: Pradeep Kumar """ import whisper import torch import os from flask import Flask, request, abort, jsonify, render_template from deep_translator import GoogleTranslator #%% import subprocess # List of packages to check versions for packages = ["whisper", "torch", "os", "flask", "deep-translator"] # Dictionary to store versions package_versions = {} for package in packages: try: # Run pip show to get version info result = subprocess.run( ["pip", "show", package], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) if result.returncode == 0: # Parse the version from the output for line in result.stdout.splitlines(): if line.startswith("Version:"): package_versions[package] = line.split(":", 1)[1].strip() else: package_versions[package] = "Not Installed" except Exception as e: package_versions[package] = f"Error: {str(e)}" package_versions #%% # Check if NVIDIA GPU is available DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Initialize Flask app app = Flask(__name__) # Directories for transcripts BASE_DIR = os.getcwd() TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts') # Ensure transcripts directory exists def check_directory(path): if not os.path.exists(path): os.makedirs(path) check_directory(TRANSCRIPTS_FOLDER) @app.route('/') def upload_page(): """ Render the upload page for audio file submission. """ return render_template('upload.html') @app.route('/process_audio', methods=['POST']) def process_audio(): """ Process audio directly from the destination using Whisper. """ if 'audio_file' not in request.files: return abort(400, "No file part in the request.") audio_file = request.files['audio_file'] selected_language = request.form.get('language', None) model_type = request.form.get('model_type', "base") if not audio_file or audio_file.filename == '': return abort(400, "No file selected for upload.") # Save the uploaded file to a temporary location temp_audio_path = os.path.join(BASE_DIR, audio_file.filename) audio_file.save(temp_audio_path) try: # Load the Whisper model based on user selection model = whisper.load_model(model_type, device=DEVICE) except Exception as e: return jsonify({"error": f"Failed to load Whisper model ({model_type}): {e}"}), 500 try: # Transcribe with the user-selected language if selected_language: result = model.transcribe(temp_audio_path,fp16=False, language=selected_language, verbose=False) else: return abort(400, "Language selection is required.") # Save the transcription with timestamps transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.filename}_transcript.txt") with open(transcript_file, 'w', encoding='utf-8') as text_file: for segment in result['segments']: start_time = segment['start'] end_time = segment['end'] text = segment['text'] text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n") if selected_language == 'nl': text_en = GoogleTranslator(source='auto', target='en').translate(text) text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n") # Return the transcription metadata return jsonify({ "message": "Transcription successful!", "transcript_path": transcript_file, "transcription_preview": result['text'] }) except Exception as e: return jsonify({"error": f"Failed to process the audio file: {e}"}), 500 finally: # Clean up temporary audio file if os.path.exists(temp_audio_path): os.remove(temp_audio_path) if __name__ == '__main__': # Run the Flask application app.run(debug=True)