speech-to-text

Sleeping

speech-to-text / app.py

Update app.py

b10bae9 verified 3 months ago

1.09 kB

	from flask import Flask, request, jsonify

	import whisper
	import os
	import tempfile
	import io
	import torchaudio

	app = Flask(__name__)

	# Initialize Whisper model
	whisper_model = whisper.load_model("small") # Renamed variable




	@app.route('/transcribe', methods=['POST'])
	def transcribe():
	try:
	# Read raw bytes from the request
	audio_bytes = request.data
	if not audio_bytes:
	return jsonify({"error": "No audio data provided"}), 400

	# Convert bytes to a file-like object
	audio_file = io.BytesIO(audio_bytes)

	# Load audio as a waveform using torchaudio
	waveform, sample_rate = torchaudio.load(audio_file)

	# Whisper expects a NumPy array, so we convert it
	audio_numpy = waveform.squeeze().numpy()

	# Transcribe the audio
	result = model.transcribe(audio_numpy)

	return jsonify({"text": result["text"]})

	except Exception as e:
	print("Error:", str(e)) # Log error for debugging
	return jsonify({"error": "Internal Server Error", "details": str(e)}), 500