sachin commited on
Commit
843c466
·
1 Parent(s): 6a6d015
Files changed (1) hide show
  1. src/server/main.py +13 -2
src/server/main.py CHANGED
@@ -602,6 +602,10 @@ class TranscriptionResponse(BaseModel):
602
  class ASRModelManager:
603
  def __init__(self, device_type="cuda"):
604
  self.device_type = device_type
 
 
 
 
605
  self.model_language = {
606
  "kannada": "kn", "hindi": "hi", "malayalam": "ml", "assamese": "as", "bengali": "bn",
607
  "bodo": "brx", "dogri": "doi", "gujarati": "gu", "kashmiri": "ks", "konkani": "kok",
@@ -609,6 +613,7 @@ class ASRModelManager:
609
  "punjabi": "pa", "sanskrit": "sa", "santali": "sat", "sindhi": "sd", "tamil": "ta",
610
  "telugu": "te", "urdu": "ur"
611
  }
 
612
 
613
  from fastapi import FastAPI, UploadFile
614
  import torch
@@ -628,12 +633,17 @@ model = AutoModel.from_pretrained("ai4bharat/indic-conformer-600m-multilingual",
628
  asr_manager = ASRModelManager()
629
 
630
  # Language to script mapping
 
 
 
 
631
  LANGUAGE_TO_SCRIPT = {
632
  "kannada": "kan_Knda", "hindi": "hin_Deva", "malayalam": "mal_Mlym", "tamil": "tam_Taml",
633
  "telugu": "tel_Telu", "assamese": "asm_Beng", "bengali": "ben_Beng", "gujarati": "guj_Gujr",
634
  "marathi": "mar_Deva", "odia": "ory_Orya", "punjabi": "pan_Guru", "urdu": "urd_Arab",
635
  # Add more as needed
636
  }
 
637
 
638
  @app.post("/transcribe/", response_model=TranscriptionResponse)
639
  async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
@@ -654,7 +664,6 @@ async def speech_to_speech(
654
  request: Request, # Inject Request object from FastAPI
655
  file: UploadFile = File(...),
656
  language: str = Query(..., enum=list(asr_manager.model_language.keys())),
657
- voice: str = Body(default=config.voice)
658
  ) -> StreamingResponse:
659
  # Step 1: Transcribe audio to text
660
  transcription = await transcribe_audio(file, language)
@@ -669,9 +678,11 @@ async def speech_to_speech(
669
  processed_text = await chat(request, chat_request) # Pass the injected request
670
  logger.info(f"Processed text: {processed_text.response}")
671
 
 
 
672
  # Step 3: Convert processed text to speech
673
  audio_response = await synthesize_kannada(
674
- input=processed_text.response,
675
  )
676
  return audio_response
677
 
 
602
  class ASRModelManager:
603
  def __init__(self, device_type="cuda"):
604
  self.device_type = device_type
605
+ self.model_language = {
606
+ "kannada": "kn"
607
+ }
608
+ '''
609
  self.model_language = {
610
  "kannada": "kn", "hindi": "hi", "malayalam": "ml", "assamese": "as", "bengali": "bn",
611
  "bodo": "brx", "dogri": "doi", "gujarati": "gu", "kashmiri": "ks", "konkani": "kok",
 
613
  "punjabi": "pa", "sanskrit": "sa", "santali": "sat", "sindhi": "sd", "tamil": "ta",
614
  "telugu": "te", "urdu": "ur"
615
  }
616
+ '''
617
 
618
  from fastapi import FastAPI, UploadFile
619
  import torch
 
633
  asr_manager = ASRModelManager()
634
 
635
  # Language to script mapping
636
+ LANGUAGE_TO_SCRIPT = {
637
+ "kannada": "kan_Knda"
638
+ }
639
+ '''
640
  LANGUAGE_TO_SCRIPT = {
641
  "kannada": "kan_Knda", "hindi": "hin_Deva", "malayalam": "mal_Mlym", "tamil": "tam_Taml",
642
  "telugu": "tel_Telu", "assamese": "asm_Beng", "bengali": "ben_Beng", "gujarati": "guj_Gujr",
643
  "marathi": "mar_Deva", "odia": "ory_Orya", "punjabi": "pan_Guru", "urdu": "urd_Arab",
644
  # Add more as needed
645
  }
646
+ '''
647
 
648
  @app.post("/transcribe/", response_model=TranscriptionResponse)
649
  async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
 
664
  request: Request, # Inject Request object from FastAPI
665
  file: UploadFile = File(...),
666
  language: str = Query(..., enum=list(asr_manager.model_language.keys())),
 
667
  ) -> StreamingResponse:
668
  # Step 1: Transcribe audio to text
669
  transcription = await transcribe_audio(file, language)
 
678
  processed_text = await chat(request, chat_request) # Pass the injected request
679
  logger.info(f"Processed text: {processed_text.response}")
680
 
681
+ voice_request = KannadaSynthesizeRequest(text=processed_text.response)
682
+
683
  # Step 3: Convert processed text to speech
684
  audio_response = await synthesize_kannada(
685
+ voice_request
686
  )
687
  return audio_response
688