sachin commited on
Commit
7c45c2d
·
1 Parent(s): 4b3b2f1
Files changed (1) hide show
  1. src/server/main.py +73 -0
src/server/main.py CHANGED
@@ -615,6 +615,79 @@ async def visual_query(
615
  except ValueError as e:
616
  logger.error(f"Invalid JSON response: {str(e)}")
617
  raise HTTPException(status_code=500, detail="Invalid response format from visual query service")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
 
619
  if __name__ == "__main__":
620
  parser = argparse.ArgumentParser(description="Run the FastAPI server.")
 
615
  except ValueError as e:
616
  logger.error(f"Invalid JSON response: {str(e)}")
617
  raise HTTPException(status_code=500, detail="Invalid response format from visual query service")
618
+
619
+
620
+ # Add this import at the top with other imports
621
+ from fastapi.responses import StreamingResponse
622
+
623
+ # Define supported languages for validation
624
+ from enum import Enum
625
+ class SupportedLanguage(str, Enum):
626
+ kannada = "kannada"
627
+ hindi = "hindi"
628
+ tamil = "tamil"
629
+
630
+ # Add the new endpoint
631
+ @app.post("/v1/speech_to_speech",
632
+ summary="Speech-to-Speech Conversion",
633
+ description="Convert input speech to processed speech by calling an external speech-to-speech API. Rate limited to 5 requests per minute per user. Requires authentication.",
634
+ tags=["Audio"],
635
+ responses={
636
+ 200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
637
+ 400: {"description": "Invalid input"},
638
+ 401: {"description": "Unauthorized - Token required"},
639
+ 429: {"description": "Rate limit exceeded"},
640
+ 504: {"description": "External API timeout"},
641
+ 500: {"description": "External API error"}
642
+ })
643
+ @limiter.limit(settings.speech_rate_limit)
644
+ async def speech_to_speech(
645
+ request: Request,
646
+ file: UploadFile = File(..., description="Audio file to process"),
647
+ language: SupportedLanguage = Query(..., description="Language of the audio (kannada, hindi, tamil)"),
648
+ credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)
649
+ ) -> StreamingResponse:
650
+ user_id = await get_current_user(credentials)
651
+ logger.info("Processing speech-to-speech request", extra={
652
+ "endpoint": "/v1/speech_to_speech",
653
+ "filename": file.filename,
654
+ "language": language,
655
+ "client_ip": get_remote_address(request),
656
+ "user_id": user_id
657
+ })
658
+
659
+ try:
660
+ file_content = await file.read()
661
+ files = {"file": (file.filename, file_content, file.content_type)}
662
+ external_url = f"https://slabstech-dhwani-internal-api-server.hf.space/v1/speech_to_speech?language={language}"
663
+
664
+ response = requests.post(
665
+ external_url,
666
+ files=files,
667
+ headers={"accept": "application/json"},
668
+ stream=True,
669
+ timeout=60
670
+ )
671
+ response.raise_for_status()
672
+
673
+ headers = {
674
+ "Content-Disposition": f"inline; filename=\"speech.mp3\"",
675
+ "Cache-Control": "no-cache",
676
+ "Content-Type": "audio/mp3"
677
+ }
678
+
679
+ return StreamingResponse(
680
+ response.iter_content(chunk_size=8192),
681
+ media_type="audio/mp3",
682
+ headers=headers
683
+ )
684
+
685
+ except requests.Timeout:
686
+ logger.error("External speech-to-speech API timed out")
687
+ raise HTTPException(status_code=504, detail="External API timeout")
688
+ except requests.RequestException as e:
689
+ logger.error(f"External speech-to-speech API error: {str(e)}")
690
+ raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
691
 
692
  if __name__ == "__main__":
693
  parser = argparse.ArgumentParser(description="Run the FastAPI server.")