Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, File, UploadFile, HTTPException | |
from fastapi.responses import JSONResponse | |
from pydantic import BaseModel | |
import speech_recognition as sr | |
from io import BytesIO | |
from pydub import AudioSegment | |
import os | |
app = FastAPI() | |
class TranscriptionResponse(BaseModel): | |
text: str | |
async def transcribe_audio(file: UploadFile = File(...)): | |
if file.content_type not in ["audio/wav", "audio/mpeg", "audio/mp3", "audio/x-wav", "audio/flac"]: | |
raise HTTPException(status_code=400, detail="Unsupported file type") | |
try: | |
# Read the file into bytes | |
audio_data = await file.read() | |
audio_file = BytesIO(audio_data) | |
# Use pydub to handle different audio formats | |
audio = AudioSegment.from_file(audio_file, format=file.filename.split('.')[-1]) | |
wav_audio = BytesIO() | |
audio.export(wav_audio, format="wav") | |
wav_audio.seek(0) | |
# Use speech_recognition to process the audio | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(wav_audio) as source: | |
audio = recognizer.record(source) | |
# Recognize speech using Google Web Speech API | |
text = recognizer.recognize_google(audio, language="en-US") | |
return TranscriptionResponse(text=text) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) |