from fastapi import APIRouter, File, UploadFile, HTTPException from fastapi.responses import JSONResponse from pydantic import BaseModel import speech_recognition as sr from io import BytesIO from pydub import AudioSegment import os SpeachRouter = APIRouter() class TranscriptionResponse(BaseModel): text: str @SpeachRouter.post("/SpeechToText", response_model=TranscriptionResponse) async def transcribe_audio(file: UploadFile = File(...)): if file.content_type not in ["audio/wav", "audio/mpeg", "audio/mp3", "audio/x-wav", "audio/flac"]: raise HTTPException(status_code=400, detail="Unsupported file type") try: # Read the file into bytes audio_data = await file.read() audio_file = BytesIO(audio_data) # Use pydub to handle different audio formats # Use speech_recognition to process the audio recognizer = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio = recognizer.record(source) # Recognize speech using Google Web Speech API text = recognizer.recognize_google(audio, language="en-US") return TranscriptionResponse(text=text) except Exception as e: raise HTTPException(status_code=500, detail=str(e))