Spaces:

yaara1
/

TTS

Running

App Files Files Community

yaara1 commited on Jul 6

Commit

abaaea9

verified ·

1 Parent(s): 0ac98ca

Upload 6 files

Browse files

Files changed (6) hide show

acronym-phonemes-dict.xlsx +0 -0
app.py +89 -0
phonikud-1.0.int8.onnx +3 -0
requirements.txt +8 -0
tts-model.config.json +497 -0
tts-model.onnx +3 -0

acronym-phonemes-dict.xlsx ADDED Viewed

Binary file (13.2 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import re
+import pandas as pd
+from phonikud_onnx import Phonikud
+from phonikud import phonemize
+from phonikud_tts import Piper
+import soundfile as sf
+import gradio as gr
+phonikud_onnx = Phonikud("phonikud-1.0.int8.onnx")
+piper = Piper('tts-model.onnx', 'tts-model.config.json')
+# Letter-to-phoneme mapping for acronyms
+LETTER_TO_PHONEME = {
+    'א': 'ʔa', 'ב': 'ba', 'ג': 'ɡa', 'ד': 'da', 'ה': 'ha', 'ו': 'va',
+    'ז': 'za', 'ח': 'χa', 'ט': 'ta', 'י': 'ja', 'כ': 'ka', 'ל': 'la',
+    'מ': 'ma', 'נ': 'na', 'ס': 'sa', 'ע': 'ʔa', 'פ': 'pa', 'צ': 'tsa',
+    'ק': 'ka', 'ר': 'ʁa', 'ש': 'ʃa', 'ת': 'ta', 'ם': 'm', 'ן': 'n',
+    'ף': 'f', 'ך': 'χ', 'ץ': 'ts'
+}
+# Split text into acronyms and regular chunks
+def split_text(text):
+    words = []
+    tokens = text.split()
+    for token in tokens:
+        if re.sub(r'[״":,.!?]', '', token) in acronym_dict:
+            words.append(("in_dict", re.sub(r'[״":,.!?]', '', token))) # Remove quotes here
+        elif re.search(r'\w+["״]\w+', token):
+            words.append(("acronym", token))
+        elif token.strip():
+            words.append(("text", token))
+    return words
+def handle_acronym(acronym):
+    acronym = re.sub(r'[״":,.!?]', '', acronym) # Remove quotes here
+    # Convert all letters except the last one
+    phonemes = ''.join(LETTER_TO_PHONEME.get(letter, letter) for letter in acronym[:-1])
+    # Convert the last letter and remove trailing 'a' if present
+    last_phoneme = LETTER_TO_PHONEME.get(acronym[-1], acronym[-1]).replace('a', '')  # Removes ALL 'a's
+    phonemes += last_phoneme
+    print(f"Acronym: {acronym} → Phonemes: {phonemes}")  # Optional debug
+    return phonemes
+def convert_txt_to_phonemes(text):
+    result = []
+    for kind, chunk in split_text(text):
+        if kind== "in_dict":
+            print(f"Found! {chunk} → {acronym_dict[chunk]}")
+            result.append(acronym_dict[chunk])
+        elif kind == "acronym":
+            result.append(handle_acronym(chunk))
+        else:
+            diacritized = phonikud_onnx.add_diacritics(chunk)
+            result.append(phonemize(diacritized))
+    phonemes = ' '.join(result)
+    print(phonemes)
+    return phonemes
+def heb_to_speech(text, temp_word=" רות", temp_duration=0.36):
+    # Step 1: Add temporary word
+    text_with_temp = text + temp_word
+    phonemes = convert_txt_to_phonemes(text_with_temp)
+    # Step 2: Generate audio
+    samples, sample_rate = piper.create(phonemes, is_phonemes=True)
+    sf.write("raw_audio.wav", samples, sample_rate)
+    # Step 3: Trim temporary word
+    trim_samples = int(temp_duration * sample_rate)
+    trimmed_samples = samples[:-trim_samples]
+    sf.write("final_audio.wav", trimmed_samples, sample_rate)
+    return "final_audio.wav"
+acronym_df = pd.read_excel("acronym-phonemes-dict.xlsx")
+acronym_dict = {row['acronym']: row['phonemes'] for _, row in acronym_df.iterrows()}
+with gr.Blocks() as demo:
+    text_input = gr.Textbox(label="Insert Hebrew text", lines=2)
+    generate_btn = gr.Button("Generate")
+    audio_output = gr.Audio(label="🔊", type="filepath", interactive=False)
+    generate_btn.click(fn=heb_to_speech, inputs=text_input, outputs=audio_output)
+demo.launch(share=True)

phonikud-1.0.int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1fa2624b1e8202a0c0a23259b560b0c41ad92a3a6750bd0e322ce5a2b1acdb6
+size 307844158

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+soundfile
+numpy
+onnxruntime
+phonikud
+phonikud-onnx
+phonikud-tts
+odfpy

tts-model.config.json ADDED Viewed

	@@ -0,0 +1,497 @@

+{
+    "dataset": "",
+    "audio": {
+        "sample_rate": 22050,
+        "quality": "train"
+    },
+    "espeak": {
+        "voice": "he"
+    },
+    "language": {
+        "code": "he"
+    },
+    "inference": {
+        "noise_scale": 0.667,
+        "length_scale": 1,
+        "noise_w": 0.8
+    },
+    "phoneme_type": "raw",
+    "phoneme_map": {},
+    "phoneme_id_map": {
+        " ": [
+            3
+        ],
+        "!": [
+            4
+        ],
+        "\"": [
+            150
+        ],
+        "#": [
+            149
+        ],
+        "$": [
+            2
+        ],
+        "'": [
+            5
+        ],
+        "(": [
+            6
+        ],
+        ")": [
+            7
+        ],
+        ",": [
+            8
+        ],
+        "-": [
+            9
+        ],
+        ".": [
+            10
+        ],
+        "0": [
+            130
+        ],
+        "1": [
+            131
+        ],
+        "2": [
+            132
+        ],
+        "3": [
+            133
+        ],
+        "4": [
+            134
+        ],
+        "5": [
+            135
+        ],
+        "6": [
+            136
+        ],
+        "7": [
+            137
+        ],
+        "8": [
+            138
+        ],
+        "9": [
+            139
+        ],
+        ":": [
+            11
+        ],
+        ";": [
+            12
+        ],
+        "?": [
+            13
+        ],
+        "X": [
+            156
+        ],
+        "^": [
+            1
+        ],
+        "_": [
+            0
+        ],
+        "a": [
+            14
+        ],
+        "b": [
+            15
+        ],
+        "c": [
+            16
+        ],
+        "d": [
+            17
+        ],
+        "e": [
+            18
+        ],
+        "f": [
+            19
+        ],
+        "g": [
+            154
+        ],
+        "h": [
+            20
+        ],
+        "i": [
+            21
+        ],
+        "j": [
+            22
+        ],
+        "k": [
+            23
+        ],
+        "l": [
+            24
+        ],
+        "m": [
+            25
+        ],
+        "n": [
+            26
+        ],
+        "o": [
+            27
+        ],
+        "p": [
+            28
+        ],
+        "q": [
+            29
+        ],
+        "r": [
+            30
+        ],
+        "s": [
+            31
+        ],
+        "t": [
+            32
+        ],
+        "u": [
+            33
+        ],
+        "v": [
+            34
+        ],
+        "w": [
+            35
+        ],
+        "x": [
+            36
+        ],
+        "y": [
+            37
+        ],
+        "z": [
+            38
+        ],
+        "æ": [
+            39
+        ],
+        "ç": [
+            40
+        ],
+        "ð": [
+            41
+        ],
+        "ø": [
+            42
+        ],
+        "ħ": [
+            43
+        ],
+        "ŋ": [
+            44
+        ],
+        "œ": [
+            45
+        ],
+        "ǀ": [
+            46
+        ],
+        "ǁ": [
+            47
+        ],
+        "ǂ": [
+            48
+        ],
+        "ǃ": [
+            49
+        ],
+        "ɐ": [
+            50
+        ],
+        "ɑ": [
+            51
+        ],
+        "ɒ": [
+            52
+        ],
+        "ɓ": [
+            53
+        ],
+        "ɔ": [
+            54
+        ],
+        "ɕ": [
+            55
+        ],
+        "ɖ": [
+            56
+        ],
+        "ɗ": [
+            57
+        ],
+        "ɘ": [
+            58
+        ],
+        "ə": [
+            59
+        ],
+        "ɚ": [
+            60
+        ],
+        "ɛ": [
+            61
+        ],
+        "ɜ": [
+            62
+        ],
+        "ɞ": [
+            63
+        ],
+        "ɟ": [
+            64
+        ],
+        "ɠ": [
+            65
+        ],
+        "ɡ": [
+            66
+        ],
+        "ɢ": [
+            67
+        ],
+        "ɣ": [
+            68
+        ],
+        "ɤ": [
+            69
+        ],
+        "ɥ": [
+            70
+        ],
+        "ɦ": [
+            71
+        ],
+        "ɧ": [
+            72
+        ],
+        "ɨ": [
+            73
+        ],
+        "ɪ": [
+            74
+        ],
+        "ɫ": [
+            75
+        ],
+        "ɬ": [
+            76
+        ],
+        "ɭ": [
+            77
+        ],
+        "ɮ": [
+            78
+        ],
+        "ɯ": [
+            79
+        ],
+        "ɰ": [
+            80
+        ],
+        "ɱ": [
+            81
+        ],
+        "ɲ": [
+            82
+        ],
+        "ɳ": [
+            83
+        ],
+        "ɴ": [
+            84
+        ],
+        "ɵ": [
+            85
+        ],
+        "ɶ": [
+            86
+        ],
+        "ɸ": [
+            87
+        ],
+        "ɹ": [
+            88
+        ],
+        "ɺ": [
+            89
+        ],
+        "ɻ": [
+            90
+        ],
+        "ɽ": [
+            91
+        ],
+        "ɾ": [
+            92
+        ],
+        "ʀ": [
+            93
+        ],
+        "ʁ": [
+            94
+        ],
+        "ʂ": [
+            95
+        ],
+        "ʃ": [
+            96
+        ],
+        "ʄ": [
+            97
+        ],
+        "ʈ": [
+            98
+        ],
+        "ʉ": [
+            99
+        ],
+        "ʊ": [
+            100
+        ],
+        "ʋ": [
+            101
+        ],
+        "ʌ": [
+            102
+        ],
+        "ʍ": [
+            103
+        ],
+        "ʎ": [
+            104
+        ],
+        "ʏ": [
+            105
+        ],
+        "ʐ": [
+            106
+        ],
+        "ʑ": [
+            107
+        ],
+        "ʒ": [
+            108
+        ],
+        "ʔ": [
+            109
+        ],
+        "ʕ": [
+            110
+        ],
+        "ʘ": [
+            111
+        ],
+        "ʙ": [
+            112
+        ],
+        "ʛ": [
+            113
+        ],
+        "ʜ": [
+            114
+        ],
+        "ʝ": [
+            115
+        ],
+        "ʟ": [
+            116
+        ],
+        "ʡ": [
+            117
+        ],
+        "ʢ": [
+            118
+        ],
+        "ʦ": [
+            155
+        ],
+        "ʰ": [
+            145
+        ],
+        "ʲ": [
+            119
+        ],
+        "ˈ": [
+            120
+        ],
+        "ˌ": [
+            121
+        ],
+        "ː": [
+            122
+        ],
+        "ˑ": [
+            123
+        ],
+        "˞": [
+            124
+        ],
+        "ˤ": [
+            146
+        ],
+        "̃": [
+            141
+        ],
+        "̧": [
+            140
+        ],
+        "̩": [
+            144
+        ],
+        "̪": [
+            142
+        ],
+        "̯": [
+            143
+        ],
+        "̺": [
+            152
+        ],
+        "̻": [
+            153
+        ],
+        "β": [
+            125
+        ],
+        "ε": [
+            147
+        ],
+        "θ": [
+            126
+        ],
+        "χ": [
+            127
+        ],
+        "ᵻ": [
+            128
+        ],
+        "↑": [
+            151
+        ],
+        "↓": [
+            148
+        ],
+        "ⱱ": [
+            129
+        ]
+    },
+    "num_symbols": 256,
+    "num_speakers": 1,
+    "speaker_id_map": {},
+    "piper_version": "1.0.0"
+}

tts-model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dfe0a8f33002654fa560c4cdb796d934b6aa84b3bfb16779646a5b0f1bd9d968
+size 63511038