speech-to-text / app.py
Nechba's picture
Update app.py
b10bae9 verified
raw
history blame
1.09 kB
from flask import Flask, request, jsonify
import whisper
import os
import tempfile
import io
import torchaudio
app = Flask(__name__)
# Initialize Whisper model
whisper_model = whisper.load_model("small") # Renamed variable
@app.route('/transcribe', methods=['POST'])
def transcribe():
try:
# Read raw bytes from the request
audio_bytes = request.data
if not audio_bytes:
return jsonify({"error": "No audio data provided"}), 400
# Convert bytes to a file-like object
audio_file = io.BytesIO(audio_bytes)
# Load audio as a waveform using torchaudio
waveform, sample_rate = torchaudio.load(audio_file)
# Whisper expects a NumPy array, so we convert it
audio_numpy = waveform.squeeze().numpy()
# Transcribe the audio
result = model.transcribe(audio_numpy)
return jsonify({"text": result["text"]})
except Exception as e:
print("Error:", str(e)) # Log error for debugging
return jsonify({"error": "Internal Server Error", "details": str(e)}), 500