yaara1 commited on
Commit
abaaea9
·
verified ·
1 Parent(s): 0ac98ca

Upload 6 files

Browse files
acronym-phonemes-dict.xlsx ADDED
Binary file (13.2 kB). View file
 
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ from phonikud_onnx import Phonikud
4
+ from phonikud import phonemize
5
+ from phonikud_tts import Piper
6
+ import soundfile as sf
7
+ import gradio as gr
8
+
9
+
10
+ phonikud_onnx = Phonikud("phonikud-1.0.int8.onnx")
11
+ piper = Piper('tts-model.onnx', 'tts-model.config.json')
12
+
13
+ # Letter-to-phoneme mapping for acronyms
14
+ LETTER_TO_PHONEME = {
15
+ 'א': 'ʔa', 'ב': 'ba', 'ג': 'ɡa', 'ד': 'da', 'ה': 'ha', 'ו': 'va',
16
+ 'ז': 'za', 'ח': 'χa', 'ט': 'ta', 'י': 'ja', 'כ': 'ka', 'ל': 'la',
17
+ 'מ': 'ma', 'נ': 'na', 'ס': 'sa', 'ע': 'ʔa', 'פ': 'pa', 'צ': 'tsa',
18
+ 'ק': 'ka', 'ר': 'ʁa', 'ש': 'ʃa', 'ת': 'ta', 'ם': 'm', 'ן': 'n',
19
+ 'ף': 'f', 'ך': 'χ', 'ץ': 'ts'
20
+ }
21
+
22
+ # Split text into acronyms and regular chunks
23
+ def split_text(text):
24
+ words = []
25
+ tokens = text.split()
26
+ for token in tokens:
27
+ if re.sub(r'[״":,.!?]', '', token) in acronym_dict:
28
+ words.append(("in_dict", re.sub(r'[״":,.!?]', '', token))) # Remove quotes here
29
+ elif re.search(r'\w+["״]\w+', token):
30
+ words.append(("acronym", token))
31
+ elif token.strip():
32
+ words.append(("text", token))
33
+ return words
34
+
35
+
36
+ def handle_acronym(acronym):
37
+ acronym = re.sub(r'[״":,.!?]', '', acronym) # Remove quotes here
38
+ # Convert all letters except the last one
39
+ phonemes = ''.join(LETTER_TO_PHONEME.get(letter, letter) for letter in acronym[:-1])
40
+
41
+ # Convert the last letter and remove trailing 'a' if present
42
+ last_phoneme = LETTER_TO_PHONEME.get(acronym[-1], acronym[-1]).replace('a', '') # Removes ALL 'a's
43
+ phonemes += last_phoneme
44
+
45
+ print(f"Acronym: {acronym} → Phonemes: {phonemes}") # Optional debug
46
+ return phonemes
47
+
48
+ def convert_txt_to_phonemes(text):
49
+ result = []
50
+ for kind, chunk in split_text(text):
51
+ if kind== "in_dict":
52
+ print(f"Found! {chunk} → {acronym_dict[chunk]}")
53
+ result.append(acronym_dict[chunk])
54
+ elif kind == "acronym":
55
+ result.append(handle_acronym(chunk))
56
+ else:
57
+ diacritized = phonikud_onnx.add_diacritics(chunk)
58
+ result.append(phonemize(diacritized))
59
+ phonemes = ' '.join(result)
60
+ print(phonemes)
61
+ return phonemes
62
+
63
+ def heb_to_speech(text, temp_word=" רות", temp_duration=0.36):
64
+ # Step 1: Add temporary word
65
+ text_with_temp = text + temp_word
66
+ phonemes = convert_txt_to_phonemes(text_with_temp)
67
+
68
+ # Step 2: Generate audio
69
+ samples, sample_rate = piper.create(phonemes, is_phonemes=True)
70
+ sf.write("raw_audio.wav", samples, sample_rate)
71
+
72
+ # Step 3: Trim temporary word
73
+ trim_samples = int(temp_duration * sample_rate)
74
+ trimmed_samples = samples[:-trim_samples]
75
+ sf.write("final_audio.wav", trimmed_samples, sample_rate)
76
+ return "final_audio.wav"
77
+
78
+
79
+ acronym_df = pd.read_excel("acronym-phonemes-dict.xlsx")
80
+ acronym_dict = {row['acronym']: row['phonemes'] for _, row in acronym_df.iterrows()}
81
+
82
+ with gr.Blocks() as demo:
83
+ text_input = gr.Textbox(label="Insert Hebrew text", lines=2)
84
+ generate_btn = gr.Button("Generate")
85
+ audio_output = gr.Audio(label="🔊", type="filepath", interactive=False)
86
+
87
+ generate_btn.click(fn=heb_to_speech, inputs=text_input, outputs=audio_output)
88
+
89
+ demo.launch(share=True)
phonikud-1.0.int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1fa2624b1e8202a0c0a23259b560b0c41ad92a3a6750bd0e322ce5a2b1acdb6
3
+ size 307844158
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ soundfile
3
+ numpy
4
+ onnxruntime
5
+ phonikud
6
+ phonikud-onnx
7
+ phonikud-tts
8
+ odfpy
tts-model.config.json ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "",
3
+ "audio": {
4
+ "sample_rate": 22050,
5
+ "quality": "train"
6
+ },
7
+ "espeak": {
8
+ "voice": "he"
9
+ },
10
+ "language": {
11
+ "code": "he"
12
+ },
13
+ "inference": {
14
+ "noise_scale": 0.667,
15
+ "length_scale": 1,
16
+ "noise_w": 0.8
17
+ },
18
+ "phoneme_type": "raw",
19
+ "phoneme_map": {},
20
+ "phoneme_id_map": {
21
+ " ": [
22
+ 3
23
+ ],
24
+ "!": [
25
+ 4
26
+ ],
27
+ "\"": [
28
+ 150
29
+ ],
30
+ "#": [
31
+ 149
32
+ ],
33
+ "$": [
34
+ 2
35
+ ],
36
+ "'": [
37
+ 5
38
+ ],
39
+ "(": [
40
+ 6
41
+ ],
42
+ ")": [
43
+ 7
44
+ ],
45
+ ",": [
46
+ 8
47
+ ],
48
+ "-": [
49
+ 9
50
+ ],
51
+ ".": [
52
+ 10
53
+ ],
54
+ "0": [
55
+ 130
56
+ ],
57
+ "1": [
58
+ 131
59
+ ],
60
+ "2": [
61
+ 132
62
+ ],
63
+ "3": [
64
+ 133
65
+ ],
66
+ "4": [
67
+ 134
68
+ ],
69
+ "5": [
70
+ 135
71
+ ],
72
+ "6": [
73
+ 136
74
+ ],
75
+ "7": [
76
+ 137
77
+ ],
78
+ "8": [
79
+ 138
80
+ ],
81
+ "9": [
82
+ 139
83
+ ],
84
+ ":": [
85
+ 11
86
+ ],
87
+ ";": [
88
+ 12
89
+ ],
90
+ "?": [
91
+ 13
92
+ ],
93
+ "X": [
94
+ 156
95
+ ],
96
+ "^": [
97
+ 1
98
+ ],
99
+ "_": [
100
+ 0
101
+ ],
102
+ "a": [
103
+ 14
104
+ ],
105
+ "b": [
106
+ 15
107
+ ],
108
+ "c": [
109
+ 16
110
+ ],
111
+ "d": [
112
+ 17
113
+ ],
114
+ "e": [
115
+ 18
116
+ ],
117
+ "f": [
118
+ 19
119
+ ],
120
+ "g": [
121
+ 154
122
+ ],
123
+ "h": [
124
+ 20
125
+ ],
126
+ "i": [
127
+ 21
128
+ ],
129
+ "j": [
130
+ 22
131
+ ],
132
+ "k": [
133
+ 23
134
+ ],
135
+ "l": [
136
+ 24
137
+ ],
138
+ "m": [
139
+ 25
140
+ ],
141
+ "n": [
142
+ 26
143
+ ],
144
+ "o": [
145
+ 27
146
+ ],
147
+ "p": [
148
+ 28
149
+ ],
150
+ "q": [
151
+ 29
152
+ ],
153
+ "r": [
154
+ 30
155
+ ],
156
+ "s": [
157
+ 31
158
+ ],
159
+ "t": [
160
+ 32
161
+ ],
162
+ "u": [
163
+ 33
164
+ ],
165
+ "v": [
166
+ 34
167
+ ],
168
+ "w": [
169
+ 35
170
+ ],
171
+ "x": [
172
+ 36
173
+ ],
174
+ "y": [
175
+ 37
176
+ ],
177
+ "z": [
178
+ 38
179
+ ],
180
+ "æ": [
181
+ 39
182
+ ],
183
+ "ç": [
184
+ 40
185
+ ],
186
+ "ð": [
187
+ 41
188
+ ],
189
+ "ø": [
190
+ 42
191
+ ],
192
+ "ħ": [
193
+ 43
194
+ ],
195
+ "ŋ": [
196
+ 44
197
+ ],
198
+ "œ": [
199
+ 45
200
+ ],
201
+ "ǀ": [
202
+ 46
203
+ ],
204
+ "ǁ": [
205
+ 47
206
+ ],
207
+ "ǂ": [
208
+ 48
209
+ ],
210
+ "ǃ": [
211
+ 49
212
+ ],
213
+ "ɐ": [
214
+ 50
215
+ ],
216
+ "ɑ": [
217
+ 51
218
+ ],
219
+ "ɒ": [
220
+ 52
221
+ ],
222
+ "ɓ": [
223
+ 53
224
+ ],
225
+ "ɔ": [
226
+ 54
227
+ ],
228
+ "ɕ": [
229
+ 55
230
+ ],
231
+ "ɖ": [
232
+ 56
233
+ ],
234
+ "ɗ": [
235
+ 57
236
+ ],
237
+ "ɘ": [
238
+ 58
239
+ ],
240
+ "ə": [
241
+ 59
242
+ ],
243
+ "ɚ": [
244
+ 60
245
+ ],
246
+ "ɛ": [
247
+ 61
248
+ ],
249
+ "ɜ": [
250
+ 62
251
+ ],
252
+ "ɞ": [
253
+ 63
254
+ ],
255
+ "ɟ": [
256
+ 64
257
+ ],
258
+ "ɠ": [
259
+ 65
260
+ ],
261
+ "ɡ": [
262
+ 66
263
+ ],
264
+ "ɢ": [
265
+ 67
266
+ ],
267
+ "ɣ": [
268
+ 68
269
+ ],
270
+ "ɤ": [
271
+ 69
272
+ ],
273
+ "ɥ": [
274
+ 70
275
+ ],
276
+ "ɦ": [
277
+ 71
278
+ ],
279
+ "ɧ": [
280
+ 72
281
+ ],
282
+ "ɨ": [
283
+ 73
284
+ ],
285
+ "ɪ": [
286
+ 74
287
+ ],
288
+ "ɫ": [
289
+ 75
290
+ ],
291
+ "ɬ": [
292
+ 76
293
+ ],
294
+ "ɭ": [
295
+ 77
296
+ ],
297
+ "ɮ": [
298
+ 78
299
+ ],
300
+ "ɯ": [
301
+ 79
302
+ ],
303
+ "ɰ": [
304
+ 80
305
+ ],
306
+ "ɱ": [
307
+ 81
308
+ ],
309
+ "ɲ": [
310
+ 82
311
+ ],
312
+ "ɳ": [
313
+ 83
314
+ ],
315
+ "ɴ": [
316
+ 84
317
+ ],
318
+ "ɵ": [
319
+ 85
320
+ ],
321
+ "ɶ": [
322
+ 86
323
+ ],
324
+ "ɸ": [
325
+ 87
326
+ ],
327
+ "ɹ": [
328
+ 88
329
+ ],
330
+ "ɺ": [
331
+ 89
332
+ ],
333
+ "ɻ": [
334
+ 90
335
+ ],
336
+ "ɽ": [
337
+ 91
338
+ ],
339
+ "ɾ": [
340
+ 92
341
+ ],
342
+ "ʀ": [
343
+ 93
344
+ ],
345
+ "ʁ": [
346
+ 94
347
+ ],
348
+ "ʂ": [
349
+ 95
350
+ ],
351
+ "ʃ": [
352
+ 96
353
+ ],
354
+ "ʄ": [
355
+ 97
356
+ ],
357
+ "ʈ": [
358
+ 98
359
+ ],
360
+ "ʉ": [
361
+ 99
362
+ ],
363
+ "ʊ": [
364
+ 100
365
+ ],
366
+ "ʋ": [
367
+ 101
368
+ ],
369
+ "ʌ": [
370
+ 102
371
+ ],
372
+ "ʍ": [
373
+ 103
374
+ ],
375
+ "ʎ": [
376
+ 104
377
+ ],
378
+ "ʏ": [
379
+ 105
380
+ ],
381
+ "ʐ": [
382
+ 106
383
+ ],
384
+ "ʑ": [
385
+ 107
386
+ ],
387
+ "ʒ": [
388
+ 108
389
+ ],
390
+ "ʔ": [
391
+ 109
392
+ ],
393
+ "ʕ": [
394
+ 110
395
+ ],
396
+ "ʘ": [
397
+ 111
398
+ ],
399
+ "ʙ": [
400
+ 112
401
+ ],
402
+ "ʛ": [
403
+ 113
404
+ ],
405
+ "ʜ": [
406
+ 114
407
+ ],
408
+ "ʝ": [
409
+ 115
410
+ ],
411
+ "ʟ": [
412
+ 116
413
+ ],
414
+ "ʡ": [
415
+ 117
416
+ ],
417
+ "ʢ": [
418
+ 118
419
+ ],
420
+ "ʦ": [
421
+ 155
422
+ ],
423
+ "ʰ": [
424
+ 145
425
+ ],
426
+ "ʲ": [
427
+ 119
428
+ ],
429
+ "ˈ": [
430
+ 120
431
+ ],
432
+ "ˌ": [
433
+ 121
434
+ ],
435
+ "ː": [
436
+ 122
437
+ ],
438
+ "ˑ": [
439
+ 123
440
+ ],
441
+ "˞": [
442
+ 124
443
+ ],
444
+ "ˤ": [
445
+ 146
446
+ ],
447
+ "̃": [
448
+ 141
449
+ ],
450
+ "̧": [
451
+ 140
452
+ ],
453
+ "̩": [
454
+ 144
455
+ ],
456
+ "̪": [
457
+ 142
458
+ ],
459
+ "̯": [
460
+ 143
461
+ ],
462
+ "̺": [
463
+ 152
464
+ ],
465
+ "̻": [
466
+ 153
467
+ ],
468
+ "β": [
469
+ 125
470
+ ],
471
+ "ε": [
472
+ 147
473
+ ],
474
+ "θ": [
475
+ 126
476
+ ],
477
+ "χ": [
478
+ 127
479
+ ],
480
+ "ᵻ": [
481
+ 128
482
+ ],
483
+ "↑": [
484
+ 151
485
+ ],
486
+ "↓": [
487
+ 148
488
+ ],
489
+ "ⱱ": [
490
+ 129
491
+ ]
492
+ },
493
+ "num_symbols": 256,
494
+ "num_speakers": 1,
495
+ "speaker_id_map": {},
496
+ "piper_version": "1.0.0"
497
+ }
tts-model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfe0a8f33002654fa560c4cdb796d934b6aa84b3bfb16779646a5b0f1bd9d968
3
+ size 63511038