Spaces:

slabstech
/

dhwani-internal-api-server

Sleeping

App Files Files Community

sachin commited on 20 days ago

Commit

af923b7

1 Parent(s): 3f6f875

add-tts

Browse files

Files changed (1) hide show

src/server/main.py +4 -4

src/server/main.py CHANGED Viewed

@@ -176,7 +176,7 @@ async def generate_audio(
     response_format: Annotated[ResponseFormat, Body(include_in_schema=False)] = config.response_format,
     speed: Annotated[float, Body(include_in_schema=False)] = SPEED,
 ) -> StreamingResponse:
-    tts, tokenizer, description_tokenizer = model_manager.get_or_load_model(model)
     if speed != SPEED:
         logger.warning(
             "Specifying speed isn't supported by this model. Audio will be generated with the default speed"
@@ -190,11 +190,11 @@ async def generate_audio(
         desc_inputs = description_tokenizer(voice,
                                           return_tensors="pt",
                                           padding="max_length",
-                                          max_length=model_manager.max_length).to(device)
         prompt_inputs = tokenizer(input,
                                 return_tensors="pt",
                                 padding="max_length",
-                                max_length=model_manager.max_length).to(device)
         # Use the tensor fields directly instead of BatchEncoding object
         input_ids = desc_inputs["input_ids"]
@@ -262,7 +262,7 @@ async def generate_audio_batch(
     response_format: Annotated[ResponseFormat, Body()] = config.response_format,
     speed: Annotated[float, Body(include_in_schema=False)] = SPEED,
 ) -> StreamingResponse:
-    tts, tokenizer, description_tokenizer = model_manager.get_or_load_model(model)
     if speed != SPEED:
         logger.warning(
             "Specifying speed isn't supported by this model. Audio will be generated with the default speed"

     response_format: Annotated[ResponseFormat, Body(include_in_schema=False)] = config.response_format,
     speed: Annotated[float, Body(include_in_schema=False)] = SPEED,
 ) -> StreamingResponse:
+    tts, tokenizer, description_tokenizer = tts_model_manager.get_or_load_model(model)
     if speed != SPEED:
         logger.warning(
             "Specifying speed isn't supported by this model. Audio will be generated with the default speed"
         desc_inputs = description_tokenizer(voice,
                                           return_tensors="pt",
                                           padding="max_length",
+                                          max_length=tts_model_manager.max_length).to(device)
         prompt_inputs = tokenizer(input,
                                 return_tensors="pt",
                                 padding="max_length",
+                                max_length=tts_model_manager.max_length).to(device)
         # Use the tensor fields directly instead of BatchEncoding object
         input_ids = desc_inputs["input_ids"]
     response_format: Annotated[ResponseFormat, Body()] = config.response_format,
     speed: Annotated[float, Body(include_in_schema=False)] = SPEED,
 ) -> StreamingResponse:
+    tts, tokenizer, description_tokenizer = tts_model_manager.get_or_load_model(model)
     if speed != SPEED:
         logger.warning(
             "Specifying speed isn't supported by this model. Audio will be generated with the default speed"