sachin commited on
Commit
460983d
·
1 Parent(s): 564e070

add- llm optimisation

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. requirements.txt +2 -1
  3. src/server/main.py +152 -162
Dockerfile CHANGED
@@ -6,6 +6,6 @@ COPY . .
6
  ENV HF_HOME=/data/huggingface
7
  # Expose port
8
  EXPOSE 7860
9
-
10
  # Start the server
11
  CMD ["python", "/app/src/server/main.py", "--host", "0.0.0.0", "--port", "7860", "--config", "config_two"]
 
6
  ENV HF_HOME=/data/huggingface
7
  # Expose port
8
  EXPOSE 7860
9
+ RUN pip install torchvision
10
  # Start the server
11
  CMD ["python", "/app/src/server/main.py", "--host", "0.0.0.0", "--port", "7860", "--config", "config_two"]
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  torch
 
2
  accelerate
3
  bitsandbytes
4
  pillow
@@ -175,7 +176,7 @@ torch==2.6.0
175
  torchaudio==2.6.0
176
  torchdiffeq==0.2.5
177
  tqdm==4.67.1
178
- transformers==4.50.3
179
  transformers-stream-generator==0.0.5
180
  triton==3.2.0
181
  typer==0.15.2
 
1
  torch
2
+ torchvision
3
  accelerate
4
  bitsandbytes
5
  pillow
 
176
  torchaudio==2.6.0
177
  torchdiffeq==0.2.5
178
  tqdm==4.67.1
179
+ transformers
180
  transformers-stream-generator==0.0.5
181
  triton==3.2.0
182
  typer==0.15.2
src/server/main.py CHANGED
@@ -5,7 +5,7 @@ from time import time
5
  from typing import List
6
  import tempfile
7
  import uvicorn
8
- from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
11
  from PIL import Image
@@ -593,91 +593,10 @@ async def add_request_timing(request: Request, call_next):
593
  limiter = Limiter(key_func=get_remote_address)
594
  app.state.limiter = limiter
595
 
596
- # API Endpoints
597
- @app.post("/audio/speech", response_class=StreamingResponse)
598
- async def synthesize_kannada(request: KannadaSynthesizeRequest):
599
- if not tts_manager.model:
600
- raise HTTPException(status_code=503, detail="TTS model not loaded")
601
- kannada_example = next(ex for ex in EXAMPLES if ex["audio_name"] == "KAN_F (Happy)")
602
- if not request.text.strip():
603
- raise HTTPException(status_code=400, detail="Text to synthesize cannot be empty.")
604
-
605
- audio_buffer = synthesize_speech(
606
- tts_manager,
607
- text=request.text,
608
- ref_audio_name="KAN_F (Happy)",
609
- ref_text=kannada_example["ref_text"]
610
- )
611
-
612
- return StreamingResponse(
613
- audio_buffer,
614
- media_type="audio/wav",
615
- headers={"Content-Disposition": "attachment; filename=synthesized_kannada_speech.wav"}
616
- )
617
-
618
- @app.post("/translate", response_model=TranslationResponse)
619
- async def translate(request: TranslationRequest, translate_manager: TranslateManager = Depends(get_translate_manager)):
620
- input_sentences = request.sentences
621
- src_lang = request.src_lang
622
- tgt_lang = request.tgt_lang
623
-
624
- if not input_sentences:
625
- raise HTTPException(status_code=400, detail="Input sentences are required")
626
 
627
- batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang)
628
- inputs = translate_manager.tokenizer(
629
- batch,
630
- truncation=True,
631
- padding="longest",
632
- return_tensors="pt",
633
- return_attention_mask=True,
634
- ).to(translate_manager.device_type)
635
-
636
- with torch.no_grad():
637
- generated_tokens = translate_manager.model.generate(
638
- **inputs,
639
- use_cache=True,
640
- min_length=0,
641
- max_length=256,
642
- num_beams=5,
643
- num_return_sequences=1,
644
- )
645
-
646
- with translate_manager.tokenizer.as_target_tokenizer():
647
- generated_tokens = translate_manager.tokenizer.batch_decode(
648
- generated_tokens.detach().cpu().tolist(),
649
- skip_special_tokens=True,
650
- clean_up_tokenization_spaces=True,
651
- )
652
-
653
- translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
654
- return TranslationResponse(translations=translations)
655
-
656
- async def perform_internal_translation(sentences: List[str], src_lang: str, tgt_lang: str) -> List[str]:
657
- try:
658
- translate_manager = model_manager.get_model(src_lang, tgt_lang)
659
- except ValueError as e:
660
- logger.info(f"Model not preloaded: {str(e)}, loading now...")
661
- key = model_manager._get_model_key(src_lang, tgt_lang)
662
- model_manager.load_model(src_lang, tgt_lang, key)
663
- translate_manager = model_manager.get_model(src_lang, tgt_lang)
664
-
665
- if not translate_manager.model:
666
- translate_manager.load()
667
-
668
- request = TranslationRequest(sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang)
669
- response = await translate(request, translate_manager)
670
- return response.translations
671
-
672
- @app.get("/v1/health")
673
- async def health_check():
674
- return {"status": "healthy", "model": settings.llm_model_name}
675
-
676
- @app.get("/")
677
- async def home():
678
- return RedirectResponse(url="/docs")
679
-
680
- @app.post("/v1/unload_all_models")
681
  async def unload_all_models():
682
  try:
683
  logger.info("Starting to unload all models...")
@@ -688,7 +607,7 @@ async def unload_all_models():
688
  logger.error(f"Error unloading models: {str(e)}")
689
  raise HTTPException(status_code=500, detail=f"Failed to unload models: {str(e)}")
690
 
691
- @app.post("/v1/load_all_models")
692
  async def load_all_models():
693
  try:
694
  logger.info("Starting to load all models...")
@@ -699,32 +618,15 @@ async def load_all_models():
699
  logger.error(f"Error loading models: {str(e)}")
700
  raise HTTPException(status_code=500, detail=f"Failed to load models: {str(e)}")
701
 
702
- @app.post("/v1/translate", response_model=TranslationResponse)
703
- async def translate_endpoint(request: TranslationRequest):
704
- logger.info(f"Received translation request: {request.dict()}")
705
- try:
706
- translations = await perform_internal_translation(
707
- sentences=request.sentences,
708
- src_lang=request.src_lang,
709
- tgt_lang=request.tgt_lang
710
- )
711
- logger.info(f"Translation successful: {translations}")
712
- return TranslationResponse(translations=translations)
713
- except Exception as e:
714
- logger.error(f"Unexpected error during translation: {str(e)}")
715
- raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
716
-
717
- @app.post("/v1/chat", response_model=ChatResponse)
718
  @limiter.limit(settings.chat_rate_limit)
719
  async def chat(request: Request, chat_request: ChatRequest):
720
  if not chat_request.prompt:
721
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
722
  logger.info(f"Received prompt: {chat_request.prompt}, src_lang: {chat_request.src_lang}, tgt_lang: {chat_request.tgt_lang}")
723
-
724
- EUROPEAN_LANGUAGES = {"deu_Latn", "fra_Latn", "nld_Latn", "spa_Latn", "ita_Latn", "por_Latn", "rus_Cyrl", "pol_Latn"}
725
-
726
  try:
727
- if chat_request.src_lang != "eng_Latn" and chat_request.src_lang not in EUROPEAN_LANGUAGES:
 
728
  translated_prompt = await perform_internal_translation(
729
  sentences=[chat_request.prompt],
730
  src_lang=chat_request.src_lang,
@@ -734,12 +636,14 @@ async def chat(request: Request, chat_request: ChatRequest):
734
  logger.info(f"Translated prompt to English: {prompt_to_process}")
735
  else:
736
  prompt_to_process = chat_request.prompt
737
- logger.info("Prompt in English or European language, no translation needed")
738
 
 
739
  response = await llm_manager.generate(prompt_to_process, settings.max_tokens)
740
- logger.info(f"Generated response: {response}")
741
 
742
- if chat_request.tgt_lang != "eng_Latn" and chat_request.tgt_lang not in EUROPEAN_LANGUAGES:
 
743
  translated_response = await perform_internal_translation(
744
  sentences=[response],
745
  src_lang="eng_Latn",
@@ -749,14 +653,14 @@ async def chat(request: Request, chat_request: ChatRequest):
749
  logger.info(f"Translated response to {chat_request.tgt_lang}: {final_response}")
750
  else:
751
  final_response = response
752
- logger.info(f"Response in {chat_request.tgt_lang}, no translation needed")
753
 
754
  return ChatResponse(response=final_response)
755
  except Exception as e:
756
  logger.error(f"Error processing request: {str(e)}")
757
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
758
 
759
- @app.post("/v1/visual_query/")
760
  async def visual_query(
761
  file: UploadFile = File(...),
762
  query: str = Body(...),
@@ -768,6 +672,7 @@ async def visual_query(
768
  if image.size == (0, 0):
769
  raise HTTPException(status_code=400, detail="Uploaded image is empty or invalid")
770
 
 
771
  if src_lang != "eng_Latn":
772
  translated_query = await perform_internal_translation(
773
  sentences=[query],
@@ -780,9 +685,11 @@ async def visual_query(
780
  query_to_process = query
781
  logger.info("Query already in English, no translation needed")
782
 
 
783
  answer = await llm_manager.vision_query(image, query_to_process)
784
  logger.info(f"Generated English answer: {answer}")
785
 
 
786
  if tgt_lang != "eng_Latn":
787
  translated_answer = await perform_internal_translation(
788
  sentences=[answer],
@@ -800,7 +707,7 @@ async def visual_query(
800
  logger.error(f"Error processing request: {str(e)}")
801
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
802
 
803
- @app.post("/v1/chat_v2", response_model=ChatResponse)
804
  @limiter.limit(settings.chat_rate_limit)
805
  async def chat_v2(
806
  request: Request,
@@ -817,71 +724,154 @@ async def chat_v2(
817
  logger.info(f"Received prompt: {prompt}, src_lang: {src_lang}, tgt_lang: {tgt_lang}, Image provided: {image is not None}")
818
 
819
  try:
 
 
820
  if image:
821
  image_data = await image.read()
822
  if not image_data:
823
  raise HTTPException(status_code=400, detail="Uploaded image is empty")
824
  img = Image.open(io.BytesIO(image_data))
825
 
826
- if src_lang != "eng_Latn":
827
- translated_prompt = await perform_internal_translation(
828
- sentences=[prompt],
829
- src_lang=src_lang,
830
- tgt_lang="eng_Latn"
831
- )
832
- prompt_to_process = translated_prompt[0]
833
- logger.info(f"Translated prompt to English: {prompt_to_process}")
834
- else:
835
- prompt_to_process = prompt
836
- logger.info("Prompt already in English, no translation needed")
837
-
838
- decoded = await llm_manager.chat_v2(img, prompt_to_process)
839
- logger.info(f"Generated English response: {decoded}")
840
-
841
- if tgt_lang != "eng_Latn":
842
- translated_response = await perform_internal_translation(
843
- sentences=[decoded],
844
- src_lang="eng_Latn",
845
- tgt_lang=tgt_lang
846
- )
847
- final_response = translated_response[0]
848
- logger.info(f"Translated response to {tgt_lang}: {final_response}")
849
- else:
850
- final_response = decoded
851
- logger.info("Response kept in English, no translation needed")
852
  else:
853
- if src_lang != "eng_Latn":
854
- translated_prompt = await perform_internal_translation(
855
- sentences=[prompt],
856
- src_lang=src_lang,
857
- tgt_lang="eng_Latn"
858
- )
859
- prompt_to_process = translated_prompt[0]
860
- logger.info(f"Translated prompt to English: {prompt_to_process}")
861
- else:
862
- prompt_to_process = prompt
863
- logger.info("Prompt already in English, no translation needed")
864
 
865
- decoded = await llm_manager.generate(prompt_to_process, settings.max_tokens)
866
- logger.info(f"Generated English response: {decoded}")
 
 
 
 
867
 
868
- if tgt_lang != "eng_Latn":
869
- translated_response = await perform_internal_translation(
870
- sentences=[decoded],
871
- src_lang="eng_Latn",
872
- tgt_lang=tgt_lang
873
- )
874
- final_response = translated_response[0]
875
- logger.info(f"Translated response to {tgt_lang}: {final_response}")
876
- else:
877
- final_response = decoded
878
- logger.info("Response kept in English, no translation needed")
 
879
 
880
  return ChatResponse(response=final_response)
881
  except Exception as e:
882
  logger.error(f"Error processing request: {str(e)}")
883
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
884
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  @app.post("/transcribe/", response_model=TranscriptionResponse)
886
  async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
887
  if not asr_manager.model:
 
5
  from typing import List
6
  import tempfile
7
  import uvicorn
8
+ from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form, APIRouter
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
11
  from PIL import Image
 
593
  limiter = Limiter(key_func=get_remote_address)
594
  app.state.limiter = limiter
595
 
596
+ # LLM Router
597
+ llm_router = APIRouter(prefix="/v1", tags=["LLM"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
+ @llm_router.post("/unload_all_models")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
  async def unload_all_models():
601
  try:
602
  logger.info("Starting to unload all models...")
 
607
  logger.error(f"Error unloading models: {str(e)}")
608
  raise HTTPException(status_code=500, detail=f"Failed to unload models: {str(e)}")
609
 
610
+ @llm_router.post("/load_all_models")
611
  async def load_all_models():
612
  try:
613
  logger.info("Starting to load all models...")
 
618
  logger.error(f"Error loading models: {str(e)}")
619
  raise HTTPException(status_code=500, detail=f"Failed to load models: {str(e)}")
620
 
621
+ @llm_router.post("/chat", response_model=ChatResponse)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
  @limiter.limit(settings.chat_rate_limit)
623
  async def chat(request: Request, chat_request: ChatRequest):
624
  if not chat_request.prompt:
625
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
626
  logger.info(f"Received prompt: {chat_request.prompt}, src_lang: {chat_request.src_lang}, tgt_lang: {chat_request.tgt_lang}")
 
 
 
627
  try:
628
+ # Step 1: Translate prompt to English if needed
629
+ if chat_request.src_lang != "eng_Latn":
630
  translated_prompt = await perform_internal_translation(
631
  sentences=[chat_request.prompt],
632
  src_lang=chat_request.src_lang,
 
636
  logger.info(f"Translated prompt to English: {prompt_to_process}")
637
  else:
638
  prompt_to_process = chat_request.prompt
639
+ logger.info("Prompt already in English, no translation needed")
640
 
641
+ # Step 2: Generate response in English
642
  response = await llm_manager.generate(prompt_to_process, settings.max_tokens)
643
+ logger.info(f"Generated English response: {response}")
644
 
645
+ # Step 3: Translate response to target language if needed
646
+ if chat_request.tgt_lang != "eng_Latn":
647
  translated_response = await perform_internal_translation(
648
  sentences=[response],
649
  src_lang="eng_Latn",
 
653
  logger.info(f"Translated response to {chat_request.tgt_lang}: {final_response}")
654
  else:
655
  final_response = response
656
+ logger.info("Response kept in English, no translation needed")
657
 
658
  return ChatResponse(response=final_response)
659
  except Exception as e:
660
  logger.error(f"Error processing request: {str(e)}")
661
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
662
 
663
+ @llm_router.post("/visual_query/")
664
  async def visual_query(
665
  file: UploadFile = File(...),
666
  query: str = Body(...),
 
672
  if image.size == (0, 0):
673
  raise HTTPException(status_code=400, detail="Uploaded image is empty or invalid")
674
 
675
+ # Step 1: Translate query to English if needed
676
  if src_lang != "eng_Latn":
677
  translated_query = await perform_internal_translation(
678
  sentences=[query],
 
685
  query_to_process = query
686
  logger.info("Query already in English, no translation needed")
687
 
688
+ # Step 2: Generate answer in English
689
  answer = await llm_manager.vision_query(image, query_to_process)
690
  logger.info(f"Generated English answer: {answer}")
691
 
692
+ # Step 3: Translate answer to target language if needed
693
  if tgt_lang != "eng_Latn":
694
  translated_answer = await perform_internal_translation(
695
  sentences=[answer],
 
707
  logger.error(f"Error processing request: {str(e)}")
708
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
709
 
710
+ @llm_router.post("/chat_v2", response_model=ChatResponse)
711
  @limiter.limit(settings.chat_rate_limit)
712
  async def chat_v2(
713
  request: Request,
 
724
  logger.info(f"Received prompt: {prompt}, src_lang: {src_lang}, tgt_lang: {tgt_lang}, Image provided: {image is not None}")
725
 
726
  try:
727
+ # Step 1: Handle image if provided
728
+ img = None
729
  if image:
730
  image_data = await image.read()
731
  if not image_data:
732
  raise HTTPException(status_code=400, detail="Uploaded image is empty")
733
  img = Image.open(io.BytesIO(image_data))
734
 
735
+ # Step 2: Translate prompt to English if needed
736
+ if src_lang != "eng_Latn":
737
+ translated_prompt = await perform_internal_translation(
738
+ sentences=[prompt],
739
+ src_lang=src_lang,
740
+ tgt_lang="eng_Latn"
741
+ )
742
+ prompt_to_process = translated_prompt[0]
743
+ logger.info(f"Translated prompt to English: {prompt_to_process}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
  else:
745
+ prompt_to_process = prompt
746
+ logger.info("Prompt already in English, no translation needed")
 
 
 
 
 
 
 
 
 
747
 
748
+ # Step 3: Generate response in English
749
+ if img:
750
+ response = await llm_manager.chat_v2(img, prompt_to_process)
751
+ else:
752
+ response = await llm_manager.generate(prompt_to_process, settings.max_tokens)
753
+ logger.info(f"Generated English response: {response}")
754
 
755
+ # Step 4: Translate response to target language if needed
756
+ if tgt_lang != "eng_Latn":
757
+ translated_response = await perform_internal_translation(
758
+ sentences=[response],
759
+ src_lang="eng_Latn",
760
+ tgt_lang=tgt_lang
761
+ )
762
+ final_response = translated_response[0]
763
+ logger.info(f"Translated response to {tgt_lang}: {final_response}")
764
+ else:
765
+ final_response = response
766
+ logger.info("Response kept in English, no translation needed")
767
 
768
  return ChatResponse(response=final_response)
769
  except Exception as e:
770
  logger.error(f"Error processing request: {str(e)}")
771
  raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
772
 
773
+ # Include LLM Router
774
+ app.include_router(llm_router)
775
+
776
+ # Other API Endpoints
777
+ @app.post("/audio/speech", response_class=StreamingResponse)
778
+ async def synthesize_kannada(request: KannadaSynthesizeRequest):
779
+ if not tts_manager.model:
780
+ raise HTTPException(status_code=503, detail="TTS model not loaded")
781
+ kannada_example = next(ex for ex in EXAMPLES if ex["audio_name"] == "KAN_F (Happy)")
782
+ if not request.text.strip():
783
+ raise HTTPException(status_code=400, detail="Text to synthesize cannot be empty.")
784
+
785
+ audio_buffer = synthesize_speech(
786
+ tts_manager,
787
+ text=request.text,
788
+ ref_audio_name="KAN_F (Happy)",
789
+ ref_text=kannada_example["ref_text"]
790
+ )
791
+
792
+ return StreamingResponse(
793
+ audio_buffer,
794
+ media_type="audio/wav",
795
+ headers={"Content-Disposition": "attachment; filename=synthesized_kannada_speech.wav"}
796
+ )
797
+
798
+ @app.post("/translate", response_model=TranslationResponse)
799
+ async def translate(request: TranslationRequest, translate_manager: TranslateManager = Depends(get_translate_manager)):
800
+ input_sentences = request.sentences
801
+ src_lang = request.src_lang
802
+ tgt_lang = request.tgt_lang
803
+
804
+ if not input_sentences:
805
+ raise HTTPException(status_code=400, detail="Input sentences are required")
806
+
807
+ batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang)
808
+ inputs = translate_manager.tokenizer(
809
+ batch,
810
+ truncation=True,
811
+ padding="longest",
812
+ return_tensors="pt",
813
+ return_attention_mask=True,
814
+ ).to(translate_manager.device_type)
815
+
816
+ with torch.no_grad():
817
+ generated_tokens = translate_manager.model.generate(
818
+ **inputs,
819
+ use_cache=True,
820
+ min_length=0,
821
+ max_length=256,
822
+ num_beams=5,
823
+ num_return_sequences=1,
824
+ )
825
+
826
+ with translate_manager.tokenizer.as_target_tokenizer():
827
+ generated_tokens = translate_manager.tokenizer.batch_decode(
828
+ generated_tokens.detach().cpu().tolist(),
829
+ skip_special_tokens=True,
830
+ clean_up_tokenization_spaces=True,
831
+ )
832
+
833
+ translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
834
+ return TranslationResponse(translations=translations)
835
+
836
+ async def perform_internal_translation(sentences: List[str], src_lang: str, tgt_lang: str) -> List[str]:
837
+ try:
838
+ translate_manager = model_manager.get_model(src_lang, tgt_lang)
839
+ except ValueError as e:
840
+ logger.info(f"Model not preloaded: {str(e)}, loading now...")
841
+ key = model_manager._get_model_key(src_lang, tgt_lang)
842
+ model_manager.load_model(src_lang, tgt_lang, key)
843
+ translate_manager = model_manager.get_model(src_lang, tgt_lang)
844
+
845
+ if not translate_manager.model:
846
+ translate_manager.load()
847
+
848
+ request = TranslationRequest(sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang)
849
+ response = await translate(request, translate_manager)
850
+ return response.translations
851
+
852
+ @app.get("/v1/health")
853
+ async def health_check():
854
+ return {"status": "healthy", "model": settings.llm_model_name}
855
+
856
+ @app.get("/")
857
+ async def home():
858
+ return RedirectResponse(url="/docs")
859
+
860
+ @app.post("/v1/translate", response_model=TranslationResponse)
861
+ async def translate_endpoint(request: TranslationRequest):
862
+ logger.info(f"Received translation request: {request.dict()}")
863
+ try:
864
+ translations = await perform_internal_translation(
865
+ sentences=request.sentences,
866
+ src_lang=request.src_lang,
867
+ tgt_lang=request.tgt_lang
868
+ )
869
+ logger.info(f"Translation successful: {translations}")
870
+ return TranslationResponse(translations=translations)
871
+ except Exception as e:
872
+ logger.error(f"Unexpected error during translation: {str(e)}")
873
+ raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
874
+
875
  @app.post("/transcribe/", response_model=TranscriptionResponse)
876
  async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
877
  if not asr_manager.model: