sachin
commited on
Commit
·
843c466
1
Parent(s):
6a6d015
update
Browse files- src/server/main.py +13 -2
src/server/main.py
CHANGED
@@ -602,6 +602,10 @@ class TranscriptionResponse(BaseModel):
|
|
602 |
class ASRModelManager:
|
603 |
def __init__(self, device_type="cuda"):
|
604 |
self.device_type = device_type
|
|
|
|
|
|
|
|
|
605 |
self.model_language = {
|
606 |
"kannada": "kn", "hindi": "hi", "malayalam": "ml", "assamese": "as", "bengali": "bn",
|
607 |
"bodo": "brx", "dogri": "doi", "gujarati": "gu", "kashmiri": "ks", "konkani": "kok",
|
@@ -609,6 +613,7 @@ class ASRModelManager:
|
|
609 |
"punjabi": "pa", "sanskrit": "sa", "santali": "sat", "sindhi": "sd", "tamil": "ta",
|
610 |
"telugu": "te", "urdu": "ur"
|
611 |
}
|
|
|
612 |
|
613 |
from fastapi import FastAPI, UploadFile
|
614 |
import torch
|
@@ -628,12 +633,17 @@ model = AutoModel.from_pretrained("ai4bharat/indic-conformer-600m-multilingual",
|
|
628 |
asr_manager = ASRModelManager()
|
629 |
|
630 |
# Language to script mapping
|
|
|
|
|
|
|
|
|
631 |
LANGUAGE_TO_SCRIPT = {
|
632 |
"kannada": "kan_Knda", "hindi": "hin_Deva", "malayalam": "mal_Mlym", "tamil": "tam_Taml",
|
633 |
"telugu": "tel_Telu", "assamese": "asm_Beng", "bengali": "ben_Beng", "gujarati": "guj_Gujr",
|
634 |
"marathi": "mar_Deva", "odia": "ory_Orya", "punjabi": "pan_Guru", "urdu": "urd_Arab",
|
635 |
# Add more as needed
|
636 |
}
|
|
|
637 |
|
638 |
@app.post("/transcribe/", response_model=TranscriptionResponse)
|
639 |
async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
|
@@ -654,7 +664,6 @@ async def speech_to_speech(
|
|
654 |
request: Request, # Inject Request object from FastAPI
|
655 |
file: UploadFile = File(...),
|
656 |
language: str = Query(..., enum=list(asr_manager.model_language.keys())),
|
657 |
-
voice: str = Body(default=config.voice)
|
658 |
) -> StreamingResponse:
|
659 |
# Step 1: Transcribe audio to text
|
660 |
transcription = await transcribe_audio(file, language)
|
@@ -669,9 +678,11 @@ async def speech_to_speech(
|
|
669 |
processed_text = await chat(request, chat_request) # Pass the injected request
|
670 |
logger.info(f"Processed text: {processed_text.response}")
|
671 |
|
|
|
|
|
672 |
# Step 3: Convert processed text to speech
|
673 |
audio_response = await synthesize_kannada(
|
674 |
-
|
675 |
)
|
676 |
return audio_response
|
677 |
|
|
|
602 |
class ASRModelManager:
|
603 |
def __init__(self, device_type="cuda"):
|
604 |
self.device_type = device_type
|
605 |
+
self.model_language = {
|
606 |
+
"kannada": "kn"
|
607 |
+
}
|
608 |
+
'''
|
609 |
self.model_language = {
|
610 |
"kannada": "kn", "hindi": "hi", "malayalam": "ml", "assamese": "as", "bengali": "bn",
|
611 |
"bodo": "brx", "dogri": "doi", "gujarati": "gu", "kashmiri": "ks", "konkani": "kok",
|
|
|
613 |
"punjabi": "pa", "sanskrit": "sa", "santali": "sat", "sindhi": "sd", "tamil": "ta",
|
614 |
"telugu": "te", "urdu": "ur"
|
615 |
}
|
616 |
+
'''
|
617 |
|
618 |
from fastapi import FastAPI, UploadFile
|
619 |
import torch
|
|
|
633 |
asr_manager = ASRModelManager()
|
634 |
|
635 |
# Language to script mapping
|
636 |
+
LANGUAGE_TO_SCRIPT = {
|
637 |
+
"kannada": "kan_Knda"
|
638 |
+
}
|
639 |
+
'''
|
640 |
LANGUAGE_TO_SCRIPT = {
|
641 |
"kannada": "kan_Knda", "hindi": "hin_Deva", "malayalam": "mal_Mlym", "tamil": "tam_Taml",
|
642 |
"telugu": "tel_Telu", "assamese": "asm_Beng", "bengali": "ben_Beng", "gujarati": "guj_Gujr",
|
643 |
"marathi": "mar_Deva", "odia": "ory_Orya", "punjabi": "pan_Guru", "urdu": "urd_Arab",
|
644 |
# Add more as needed
|
645 |
}
|
646 |
+
'''
|
647 |
|
648 |
@app.post("/transcribe/", response_model=TranscriptionResponse)
|
649 |
async def transcribe_audio(file: UploadFile = File(...), language: str = Query(..., enum=list(asr_manager.model_language.keys()))):
|
|
|
664 |
request: Request, # Inject Request object from FastAPI
|
665 |
file: UploadFile = File(...),
|
666 |
language: str = Query(..., enum=list(asr_manager.model_language.keys())),
|
|
|
667 |
) -> StreamingResponse:
|
668 |
# Step 1: Transcribe audio to text
|
669 |
transcription = await transcribe_audio(file, language)
|
|
|
678 |
processed_text = await chat(request, chat_request) # Pass the injected request
|
679 |
logger.info(f"Processed text: {processed_text.response}")
|
680 |
|
681 |
+
voice_request = KannadaSynthesizeRequest(text=processed_text.response)
|
682 |
+
|
683 |
# Step 3: Convert processed text to speech
|
684 |
audio_response = await synthesize_kannada(
|
685 |
+
voice_request
|
686 |
)
|
687 |
return audio_response
|
688 |
|