unity
/

inference-engine-jets-text-to-speech

alexsuvorov commited on Apr 1

Commit

148e5e2

verified ·

1 Parent(s): e97b228

Upload 3 files

Files changed (3) hide show

README.md CHANGED Viewed

@@ -3,16 +3,15 @@ license: cc-by-4.0
 library_name: unity-sentis
 ---
-# Jets Text-to-Speech Model validated for Unity Sentis (Version 1.4.0-pre.2*)
-*Version 1.3.0 Sentis files are not compatible with version 1.4.0 and above and will need to be recreated
 This is a text to speech model called [Jets](https://huggingface.co/imdanboy/jets). It takes in a text string which you convert to phonemes using a dictionary and then outputs a wav to play the voice.
 ## How to Use
-* Create a new scene in Unity 2023
-* Install `com.unity.sentis` version `1.4.0-pre.2` package
 * Put the c# script on the Main Camera
-* Put the `sentis` file and the `phoneme_dict.txt` file in the `Assets/StreamingAssets` folder
 * Add an AudioSource component on the Main Camera
 * Set the `inputText` string for what you want it to say
 * Press play

 library_name: unity-sentis
 ---
+# Jets Text-to-Speech Model validated for Sentis 2.1.2 in Unity 6
 This is a text to speech model called [Jets](https://huggingface.co/imdanboy/jets). It takes in a text string which you convert to phonemes using a dictionary and then outputs a wav to play the voice.
 ## How to Use
+* Create a new scene in Unity 6
+* Install `com.unity.sentis` version `2.1.2` package
 * Put the c# script on the Main Camera
+* Put the `jets-text-to-speech.sentis` file and the `phoneme_dict.txt` file in the `Assets/StreamingAssets` folder
 * Add an AudioSource component on the Main Camera
 * Set the `inputText` string for what you want it to say
 * Press play

RunJets.cs CHANGED Viewed

@@ -38,7 +38,7 @@ public class RunJets : MonoBehaviour
     Dictionary<string, string> dict = new ();
-    IWorker engine;
     AudioClip clip;
@@ -51,8 +51,8 @@ public class RunJets : MonoBehaviour
     void LoadModel()
     {
-        var model = ModelLoader.Load(Path.Join(Application.streamingAssetsPath ,"jets-text-to-speech.sentis"));
-        engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
     }
     void TextToSpeech()
@@ -167,12 +167,11 @@ public class RunJets : MonoBehaviour
     {
         int[] tokens = GetTokens(ptext);
-        using var input = new TensorInt(new TensorShape(tokens.Length), tokens);
-        var result = engine.Execute(input);
-        var output = result.PeekOutput("wav") as TensorFloat;
-        output.CompleteOperationsAndDownload();
-        var samples = output.ToReadOnlyArray();
         Debug.Log($"Audio size = {samples.Length / samplerate} seconds");
@@ -205,6 +204,6 @@ public class RunJets : MonoBehaviour
     private void OnDestroy()
     {
-        engine?.Dispose();
     }
 }

     Dictionary<string, string> dict = new ();
+    Worker worker;
     AudioClip clip;
     void LoadModel()
     {
+        var model = ModelLoader.Load(Path.Join(Application.streamingAssetsPath, "jets-text-to-speech.sentis"));
+        worker = new Worker(model, BackendType.GPUCompute);
     }
     void TextToSpeech()
     {
         int[] tokens = GetTokens(ptext);
+        using var input = new Tensor<int>(new TensorShape(tokens.Length), tokens);
+        worker.Schedule(input);
+        using var samplesTensor = (worker.PeekOutput("wav") as Tensor<float>).ReadbackAndClone();
+        var samples = samplesTensor.AsReadOnlySpan();
         Debug.Log($"Audio size = {samples.Length / samplerate} seconds");
     private void OnDestroy()
     {
+        worker?.Dispose();
     }
 }

info.json CHANGED Viewed

@@ -1,6 +1,15 @@
 {
-   "version" : [
-       "1.4.0"
-   ]
 }

 {
+    "code": [
+        "RunJets.cs"
+    ],
+    "models": [
+        "jets-text-to-speech.onnx",
+        "jets-text-to-speech.sentis"
+    ],
+    "data": [
+        "phoneme_dict.txt"
+    ],
+    "version": [
+        "2.1.2"
+    ]
 }