yaara1 commited on
Commit
cb8c5a6
·
verified ·
1 Parent(s): c6068ea

Upload 2 files

Browse files
Files changed (2) hide show
  1. piper_onnx/__init__.py +106 -0
  2. piper_onnx/py.typed +0 -0
piper_onnx/__init__.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from numpy.typing import NDArray
3
+ import json
4
+ from phonemizer.backend.espeak.wrapper import EspeakWrapper
5
+ from phonemizer import phonemize
6
+ import espeakng_loader
7
+ import onnxruntime as ort
8
+
9
+
10
+ _BOS = "^"
11
+ _EOS = "$"
12
+ _PAD = "_"
13
+
14
+
15
+ class Piper:
16
+ def __init__(
17
+ self,
18
+ model_path: str,
19
+ config_path: str,
20
+ ):
21
+ self.setup(model_path, config_path)
22
+
23
+ def setup(self, model_path, config_path, session = None):
24
+ with open(config_path) as fp:
25
+ self.config: dict = json.load(fp)
26
+ self.sample_rate: int = self.config['audio']['sample_rate']
27
+ self.phoneme_id_map: dict = self.config['phoneme_id_map']
28
+ self._voices: dict = self.config.get('speaker_id_map')
29
+
30
+ EspeakWrapper.set_library(espeakng_loader.get_library_path())
31
+ EspeakWrapper.set_data_path(espeakng_loader.get_data_path())
32
+ self.sess = session or ort.InferenceSession(
33
+ model_path,
34
+ sess_options=ort.SessionOptions(),
35
+ providers=['CPUExecutionProvider']
36
+ )
37
+ self.sess_inputs_names = [i.name for i in self.sess.get_inputs()]
38
+
39
+ @classmethod
40
+ def from_session(
41
+ cls,
42
+ session: ort.InferenceSession,
43
+ config_path: str,
44
+ ):
45
+ instance = cls.__new__(cls)
46
+ instance.setup(model_path='', config_path=config_path, session=session)
47
+ return instance
48
+
49
+ def create(
50
+ self,
51
+ text: str,
52
+ speaker_id: str | int = None,
53
+ is_phonemes = False,
54
+ length_scale: int = None,
55
+ noise_scale: int = None,
56
+ noise_w: int = None,
57
+ ) -> tuple[NDArray[np.float32], int]:
58
+
59
+ inference_cfg = self.config['inference']
60
+ length_scale = length_scale or inference_cfg['length_scale']
61
+ noise_scale = noise_scale or inference_cfg['noise_scale']
62
+ noise_w = noise_w or inference_cfg['noise_w']
63
+
64
+ sid = 0
65
+ if isinstance(speaker_id, str) and speaker_id in self._voices:
66
+ sid = self._voices[speaker_id]
67
+ elif isinstance(speaker_id, int):
68
+ sid = speaker_id
69
+
70
+ phonemes = text if is_phonemes else phonemize(text)
71
+ phonemes = list(phonemes)
72
+ phonemes.insert(0, _BOS)
73
+
74
+ ids = self._phoneme_to_ids(phonemes)
75
+
76
+ inputs = self._create_input(ids, length_scale, noise_w, noise_scale, sid)
77
+
78
+ samples = self.sess.run(None, inputs)[0].squeeze((0,1)).squeeze()
79
+ return samples, self.sample_rate
80
+
81
+ def get_voices(self) -> dict | None:
82
+ return self._voices
83
+
84
+ def _phoneme_to_ids(self, phonemes: str) -> list[int]:
85
+ ids = []
86
+ for p in phonemes:
87
+ if p in self.phoneme_id_map:
88
+ ids.extend(self.phoneme_id_map[p])
89
+ ids.extend(self.phoneme_id_map[_PAD])
90
+ ids.extend(self.phoneme_id_map[_EOS])
91
+ return ids
92
+
93
+ def _create_input(self, ids: list[int], length_scale: int, noise_w: int, noise_scale: int, sid: int) -> dict:
94
+ ids = np.expand_dims(np.array(ids, dtype=np.int64), 0)
95
+ length = np.array([ids.shape[1]], dtype=np.int64)
96
+ scales = np.array([noise_scale, length_scale, noise_w],dtype=np.float32)
97
+
98
+ sid = np.array([sid], dtype=np.int64) if sid is not None else None
99
+ input = {
100
+ 'input': ids,
101
+ 'input_lengths': length,
102
+ 'scales': scales,
103
+ }
104
+ if 'sid' in self.sess_inputs_names:
105
+ input['sid'] = sid
106
+ return input
piper_onnx/py.typed ADDED
File without changes