kumararvindibs
/

ibs_textToSpeechGeneration

Text-to-Audio

Transformers

vits

Model card Files Files and versions Community

kumararvindibs commited on Apr 26, 2024

Commit

5c6e3ec

verified ·

1 Parent(s): ee3fbca

Update handlerForAudio.py

Browse files

Files changed (1) hide show

handlerForAudio.py +18 -24

handlerForAudio.py CHANGED Viewed

@@ -1,11 +1,9 @@
-import requests
 from typing import Dict, Any
-from dotenv import load_dotenv, find_dotenv
-import os
-import streamlit as st
-import json
 from textToStoryGeneration import *
 import logging
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
@@ -14,33 +12,29 @@ logging.basicConfig(level=logging.ERROR)
 # Configure logging
 logging.basicConfig(level=logging.WARNING)
-load_dotenv(find_dotenv())
-HUGGINFACE_API = os.getenv("HUGNINGFACEHUB_API_TOKEN")
 class CustomHandler:
     def __init__(self):
-        self.model_name = "espnet/kan-bayashi_ljspeech_vits"
-        self.endpoint = f"https://api-inference.huggingface.co/models/{self.model_name}"
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         # Prepare the payload with input data
         logging.warning(f"------input_data-- {str(data)}")
-        payload = {"inputs": data}
-        print("payload----", payload)
         # Set headers with API token
-        headers = {"Authorization": f"Bearer {HUGGINFACE_API}"}
-        # Send POST request to the Hugging Face model endpoint
-        response = requests.post(self.endpoint, json=payload, headers=headers)
-        with open('StoryAudio.mp3', 'wb') as file:
-            file.write(response.content)
-        return 'StoryAudio.mp3'
         # Check if the request was successful
-# Example usage
-# if __name__ == "__main__":
-#     handler = CustomHandler()
-#     input_data =  "Today I have tried with many model but I didnt find the any model which gives us better result and can be deployed on the endpoints. I think we need to Create custom Inference Handler and then it can be deployed on the interfernce end poitn.As I have deployed on model on interfernce endpoint i,e. text-to-story generation. I have also compared the result created with this endpoint and my local server as well that is not same. The endpoint is generating the different stroy."
-#     result = handler(input_data)
-#     print(result)dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddv 4

 from typing import Dict, Any
 from textToStoryGeneration import *
 import logging
+import torch
+import soundfile as sf
+from transformers import AutoTokenizer, AutoModelForTextToWaveform
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
 # Configure logging
 logging.basicConfig(level=logging.WARNING)
 class CustomHandler:
     def __init__(self):
+        self.tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+        self.model= AutoModelForTextToWaveform.from_pretrained("facebook/mms-tts-eng")
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         # Prepare the payload with input data
         logging.warning(f"------input_data-- {str(data)}")
+        payload = str(data)
+        logging.warning(f"payload----{str(payload)}")
         # Set headers with API token
+        inputs = self.tokenizer(payload, return_tensors="pt")
+        # Generate the waveform from the input text
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        # Save the audio to a file
+        sf.write("StoryAudio.wav", outputs["waveform"][0].numpy(), self.model.config.sampling_rate)
+        return 'StoryAudio.wav'
         # Check if the request was successful