sachin commited on
Commit
95359fb
·
1 Parent(s): cd53954
Files changed (1) hide show
  1. src/server/main.py +74 -0
src/server/main.py CHANGED
@@ -997,6 +997,80 @@ async def speech_to_speech(
997
  logger.error(f"External speech-to-speech API error: {str(e)}", extra={"user_id": user_id})
998
  raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1000
  if __name__ == "__main__":
1001
  parser = argparse.ArgumentParser(description="Run the FastAPI server.")
1002
  parser.add_argument("--port", type=int, default=settings.port, help="Port to run the server on.")
 
997
  logger.error(f"External speech-to-speech API error: {str(e)}", extra={"user_id": user_id})
998
  raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
999
 
1000
+
1001
+ @app.post("/v1/speech_to_speech_v2",
1002
+ summary="Speech-to-Speech Conversion",
1003
+ description="Convert input encrypted speech to processed speech in the specified encrypted language by calling an external speech-to-speech API. Rate limited to 5 requests per minute per user. Requires authentication and X-Session-Key header.",
1004
+ tags=["Audio"],
1005
+ responses={
1006
+ 200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
1007
+ 400: {"description": "Invalid input, encrypted audio, or language"},
1008
+ 401: {"description": "Unauthorized - Token required"},
1009
+ 429: {"description": "Rate limit exceeded"},
1010
+ 504: {"description": "External API timeout"},
1011
+ 500: {"description": "External API error"}
1012
+ })
1013
+ async def speech_to_speech_v2(
1014
+ request: Request,
1015
+ file: UploadFile = File(..., description="Encrypted audio file to process"),
1016
+ language: str = Query(..., description="Base64-encoded encrypted language of the audio (kannada, hindi, tamil after decryption)"),
1017
+ ) -> StreamingResponse:
1018
+
1019
+ # Decrypt the language
1020
+ try:
1021
+ encrypted_language = language
1022
+ decrypted_language = encrypted_language
1023
+ except Exception as e:
1024
+ logger.error(f"Language decryption failed: {str(e)}")
1025
+ raise HTTPException(status_code=400, detail="Invalid encrypted language")
1026
+
1027
+ # Validate language
1028
+ allowed_languages = [lang.value for lang in SupportedLanguage]
1029
+ if decrypted_language not in allowed_languages:
1030
+ raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
1031
+
1032
+ logger.info("Processing speech-to-speech request", extra={
1033
+ "endpoint": "/v1/speech_to_speech",
1034
+ "audio_filename": file.filename,
1035
+ "language": decrypted_language,
1036
+ "client_ip": get_remote_address(request),
1037
+ })
1038
+
1039
+ try:
1040
+ encrypted_content = await file.read()
1041
+ file_content = encrypted_content
1042
+ files = {"file": (file.filename, file_content, file.content_type)}
1043
+ external_url = f"{settings.external_api_base_url}/v1/speech_to_speech?language={decrypted_language}"
1044
+
1045
+ response = requests.post(
1046
+ external_url,
1047
+ files=files,
1048
+ headers={"accept": "application/json"},
1049
+ stream=True,
1050
+ timeout=60
1051
+ )
1052
+ response.raise_for_status()
1053
+
1054
+ headers = {
1055
+ "Content-Disposition": f"inline; filename=\"speech.mp3\"",
1056
+ "Cache-Control": "no-cache",
1057
+ "Content-Type": "audio/mp3"
1058
+ }
1059
+
1060
+ return StreamingResponse(
1061
+ response.iter_content(chunk_size=8192),
1062
+ media_type="audio/mp3",
1063
+ headers=headers
1064
+ )
1065
+
1066
+ except requests.Timeout:
1067
+ logger.error("External speech-to-speech API timed out")
1068
+ raise HTTPException(status_code=504, detail="External API timeout")
1069
+ except requests.RequestException as e:
1070
+ logger.error(f"External speech-to-speech API error: {str(e)}")
1071
+ raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
1072
+
1073
+
1074
  if __name__ == "__main__":
1075
  parser = argparse.ArgumentParser(description="Run the FastAPI server.")
1076
  parser.add_argument("--port", type=int, default=settings.port, help="Port to run the server on.")