from flask import Flask, request, jsonify import whisper import os import tempfile import io import torchaudio app = Flask(__name__) # Initialize Whisper model whisper_model = whisper.load_model("small") # Renamed variable @app.route('/transcribe', methods=['POST']) def transcribe(): try: # Read raw bytes from the request audio_bytes = request.data if not audio_bytes: return jsonify({"error": "No audio data provided"}), 400 # Convert bytes to a file-like object audio_file = io.BytesIO(audio_bytes) # Load audio as a waveform using torchaudio waveform, sample_rate = torchaudio.load(audio_file) # Whisper expects a NumPy array, so we convert it audio_numpy = waveform.squeeze().numpy() # Transcribe the audio result = model.transcribe(audio_numpy) return jsonify({"text": result["text"]}) except Exception as e: print("Error:", str(e)) # Log error for debugging return jsonify({"error": "Internal Server Error", "details": str(e)}), 500