sachin
commited on
Commit
·
636e178
1
Parent(s):
120ac19
update-endpoints
Browse files- docs/issues.md +1 -6
- src/server/main.py +3 -3
docs/issues.md
CHANGED
@@ -1,6 +1 @@
|
|
1 |
-
|
2 |
-
W0317 22:33:36.322000 1 torch/_inductor/utils.py:1137] [0/0] Not enough SMs to use max_autotune_gemm mode
|
3 |
-
CUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed 51 distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
1 |
+
https://github.com/codelion/optillm
|
|
|
|
|
|
|
|
|
|
src/server/main.py
CHANGED
@@ -579,7 +579,7 @@ limiter = Limiter(key_func=get_remote_address)
|
|
579 |
app.state.limiter = limiter
|
580 |
|
581 |
# Endpoints
|
582 |
-
@app.post("/audio/speech", response_class=StreamingResponse)
|
583 |
async def synthesize_kannada(request: KannadaSynthesizeRequest):
|
584 |
if not tts_manager.model:
|
585 |
raise HTTPException(status_code=503, detail="TTS model not loaded")
|
@@ -593,7 +593,7 @@ async def synthesize_kannada(request: KannadaSynthesizeRequest):
|
|
593 |
headers={"Content-Disposition": "attachment; filename=synthesized_kannada_speech.wav"}
|
594 |
)
|
595 |
|
596 |
-
@app.post("/translate", response_model=TranslationResponse)
|
597 |
async def translate(request: TranslationRequest, translate_manager: TranslateManager = Depends(get_translate_manager)):
|
598 |
if not request.sentences:
|
599 |
raise HTTPException(status_code=400, detail="Input sentences are required")
|
@@ -827,7 +827,7 @@ async def chat_v2(
|
|
827 |
logger.error(f"Error processing request: {str(e)}")
|
828 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
829 |
|
830 |
-
@app.post("/transcribe/", response_model=TranscriptionResponse)
|
831 |
async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
|
832 |
async with request_queue:
|
833 |
if not asr_manager.model:
|
|
|
579 |
app.state.limiter = limiter
|
580 |
|
581 |
# Endpoints
|
582 |
+
@app.post("/v1/audio/speech", response_class=StreamingResponse)
|
583 |
async def synthesize_kannada(request: KannadaSynthesizeRequest):
|
584 |
if not tts_manager.model:
|
585 |
raise HTTPException(status_code=503, detail="TTS model not loaded")
|
|
|
593 |
headers={"Content-Disposition": "attachment; filename=synthesized_kannada_speech.wav"}
|
594 |
)
|
595 |
|
596 |
+
@app.post("/v1/translate", response_model=TranslationResponse)
|
597 |
async def translate(request: TranslationRequest, translate_manager: TranslateManager = Depends(get_translate_manager)):
|
598 |
if not request.sentences:
|
599 |
raise HTTPException(status_code=400, detail="Input sentences are required")
|
|
|
827 |
logger.error(f"Error processing request: {str(e)}")
|
828 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
829 |
|
830 |
+
@app.post("/v1/transcribe/", response_model=TranscriptionResponse)
|
831 |
async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
|
832 |
async with request_queue:
|
833 |
if not asr_manager.model:
|