sachin commited on
Commit
636e178
·
1 Parent(s): 120ac19

update-endpoints

Browse files
Files changed (2) hide show
  1. docs/issues.md +1 -6
  2. src/server/main.py +3 -3
docs/issues.md CHANGED
@@ -1,6 +1 @@
1
- 2025-03-17 22:33:24,340 - parler_tts.modeling_parler_tts - WARNING - `prompt_attention_mask` is specified but `attention_mask` is not. A full `attention_mask` will be created. Make sure this is the intended behaviour.
2
- W0317 22:33:36.322000 1 torch/_inductor/utils.py:1137] [0/0] Not enough SMs to use max_autotune_gemm mode
3
- CUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed 51 distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.
4
-
5
-
6
-
 
1
+ https://github.com/codelion/optillm
 
 
 
 
 
src/server/main.py CHANGED
@@ -579,7 +579,7 @@ limiter = Limiter(key_func=get_remote_address)
579
  app.state.limiter = limiter
580
 
581
  # Endpoints
582
- @app.post("/audio/speech", response_class=StreamingResponse)
583
  async def synthesize_kannada(request: KannadaSynthesizeRequest):
584
  if not tts_manager.model:
585
  raise HTTPException(status_code=503, detail="TTS model not loaded")
@@ -593,7 +593,7 @@ async def synthesize_kannada(request: KannadaSynthesizeRequest):
593
  headers={"Content-Disposition": "attachment; filename=synthesized_kannada_speech.wav"}
594
  )
595
 
596
- @app.post("/translate", response_model=TranslationResponse)
597
  async def translate(request: TranslationRequest, translate_manager: TranslateManager = Depends(get_translate_manager)):
598
  if not request.sentences:
599
  raise HTTPException(status_code=400, detail="Input sentences are required")
@@ -827,7 +827,7 @@ async def chat_v2(
827
  logger.error(f"Error processing request: {str(e)}")
828
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
829
 
830
- @app.post("/transcribe/", response_model=TranscriptionResponse)
831
  async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
832
  async with request_queue:
833
  if not asr_manager.model:
 
579
  app.state.limiter = limiter
580
 
581
  # Endpoints
582
+ @app.post("/v1/audio/speech", response_class=StreamingResponse)
583
  async def synthesize_kannada(request: KannadaSynthesizeRequest):
584
  if not tts_manager.model:
585
  raise HTTPException(status_code=503, detail="TTS model not loaded")
 
593
  headers={"Content-Disposition": "attachment; filename=synthesized_kannada_speech.wav"}
594
  )
595
 
596
+ @app.post("/v1/translate", response_model=TranslationResponse)
597
  async def translate(request: TranslationRequest, translate_manager: TranslateManager = Depends(get_translate_manager)):
598
  if not request.sentences:
599
  raise HTTPException(status_code=400, detail="Input sentences are required")
 
827
  logger.error(f"Error processing request: {str(e)}")
828
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
829
 
830
+ @app.post("/v1/transcribe/", response_model=TranscriptionResponse)
831
  async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
832
  async with request_queue:
833
  if not asr_manager.model: