Spaces:
Sleeping
Sleeping
File size: 1,280 Bytes
a0c7be6 b7fca68 bb780d8 b7fca68 a0c7be6 b7fca68 a0c7be6 b7fca68 2a6e246 b7fca68 2a6e246 b7fca68 bb780d8 b7fca68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from fastapi import APIRouter, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import speech_recognition as sr
from io import BytesIO
from pydub import AudioSegment
import os
SpeachRouter = APIRouter()
class TranscriptionResponse(BaseModel):
text: str
@SpeachRouter.post("/SpeechToText", response_model=TranscriptionResponse)
async def transcribe_audio(file: UploadFile = File(...)):
if file.content_type not in ["audio/wav", "audio/mpeg", "audio/mp3", "audio/x-wav", "audio/flac"]:
raise HTTPException(status_code=400, detail="Unsupported file type")
try:
# Read the file into bytes
audio_data = await file.read()
audio_file = BytesIO(audio_data)
# Use pydub to handle different audio formats
# Use speech_recognition to process the audio
recognizer = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio = recognizer.record(source)
# Recognize speech using Google Web Speech API
text = recognizer.recognize_google(audio, language="en-US")
return TranscriptionResponse(text=text)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) |