File size: 1,436 Bytes
b7fca68
 
 
bb780d8
b7fca68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb780d8
b7fca68
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import speech_recognition as sr
from io import BytesIO
from pydub import AudioSegment
import os

app = FastAPI()

class TranscriptionResponse(BaseModel):
    text: str

@app.post("/transcribe", response_model=TranscriptionResponse)
async def transcribe_audio(file: UploadFile = File(...)):
    if file.content_type not in ["audio/wav", "audio/mpeg", "audio/mp3", "audio/x-wav", "audio/flac"]:
        raise HTTPException(status_code=400, detail="Unsupported file type")
    
    try:
        # Read the file into bytes
        audio_data = await file.read()
        audio_file = BytesIO(audio_data)

        # Use pydub to handle different audio formats
        audio = AudioSegment.from_file(audio_file, format=file.filename.split('.')[-1])
        wav_audio = BytesIO()
        audio.export(wav_audio, format="wav")
        wav_audio.seek(0)

        # Use speech_recognition to process the audio
        recognizer = sr.Recognizer()
        with sr.AudioFile(wav_audio) as source:
            audio = recognizer.record(source)
        
        # Recognize speech using Google Web Speech API
        text = recognizer.recognize_google(audio, language="en-US")
        return TranscriptionResponse(text=text)
    
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))