speech-to-text

Sleeping

File size: 1,093 Bytes

c2864d3
b10bae9
2ea3a36
 
03f0774
419ab6f
9bb1bc6
fcf5834
c2864d3
2ea3a36
fbe647d
 
2ea3a36
 
 
 
 
5020140
68d753f
9bb1bc6
 
68d753f
 
 
 
 
fa58d25
9bb1bc6
 
 
 
 
 
68d753f
9bb1bc6
fa58d25
68d753f
2ea3a36
68d753f
9bb1bc6
68d753f
fa58d25
b10bae9

from flask import Flask, request, jsonify

import whisper
import os
import tempfile
import io
import torchaudio

app = Flask(__name__)

# Initialize Whisper model
whisper_model = whisper.load_model("small")  # Renamed variable




@app.route('/transcribe', methods=['POST'])
def transcribe():
    try:
        # Read raw bytes from the request
        audio_bytes = request.data  
        if not audio_bytes:
            return jsonify({"error": "No audio data provided"}), 400

        # Convert bytes to a file-like object
        audio_file = io.BytesIO(audio_bytes)

        # Load audio as a waveform using torchaudio
        waveform, sample_rate = torchaudio.load(audio_file)

        # Whisper expects a NumPy array, so we convert it
        audio_numpy = waveform.squeeze().numpy()

        # Transcribe the audio
        result = model.transcribe(audio_numpy)

        return jsonify({"text": result["text"]})

    except Exception as e:
        print("Error:", str(e))  # Log error for debugging
        return jsonify({"error": "Internal Server Error", "details": str(e)}), 500