from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import JSONResponse from pydantic import BaseModel import speech_recognition as sr from io import BytesIO from pydub import AudioSegment import os app = FastAPI() class TranscriptionResponse(BaseModel): text: str @app.post("/transcribe", response_model=TranscriptionResponse) async def transcribe_audio(file: UploadFile = File(...)): if file.content_type not in ["audio/wav", "audio/mpeg", "audio/mp3", "audio/x-wav", "audio/flac"]: raise HTTPException(status_code=400, detail="Unsupported file type") try: # Read the file into bytes audio_data = await file.read() audio_file = BytesIO(audio_data) # Use pydub to handle different audio formats audio = AudioSegment.from_file(audio_file, format=file.filename.split('.')[-1]) wav_audio = BytesIO() audio.export(wav_audio, format="wav") wav_audio.seek(0) # Use speech_recognition to process the audio recognizer = sr.Recognizer() with sr.AudioFile(wav_audio) as source: audio = recognizer.record(source) # Recognize speech using Google Web Speech API text = recognizer.recognize_google(audio, language="en-US") return TranscriptionResponse(text=text) except Exception as e: raise HTTPException(status_code=500, detail=str(e))