File size: 13,273 Bytes
5592e3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
#@title <-- Just run the cell (config edge TTS)
edge_folder="."
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

from deep_translator import GoogleTranslator

from lang_data import languages,male_voice_list,female_voice_list

def translate_text(text, Language):
    # print("calling translate")
    target_language=languages[Language]
    if Language == "Chinese":
          target_language='zh-CN'
    translator = GoogleTranslator(target=target_language)
    translation = translator.translate(text.strip())
    t_text=str(translation)
    # print(f"{t_text}---{Language}----{target_language}")
    return t_text


def chunks_sentences(paragraph, join_limit=2):
    sentences = sent_tokenize(paragraph)
    # Initialize an empty list to store the new sentences
    new_sentences = []

    # Iterate through the list of sentences in steps of 'join_limit'
    for i in range(0, len(sentences), join_limit):
        # Join the sentences with a space between them
        new_sentence = ' '.join(sentences[i:i + join_limit])
        new_sentences.append(new_sentence)
    return new_sentences


def calculate_rate_string(input_value):
    rate = (input_value - 1) * 100
    sign = '+' if input_value >= 1 else '-'
    return f"{sign}{abs(int(rate))}"


def make_chunks(input_text, language):
    language="English"
    if language == "English":
      filtered_list=chunks_sentences(input_text, join_limit=2)
      # temp_list = input_text.strip().split(".")
      # filtered_list = [element.strip() + '.' for element in temp_list[:-1] if element.strip() and element.strip() != "'" and element.strip() != '"']
      # if temp_list[-1].strip():
      #     filtered_list.append(temp_list[-1].strip())
      return filtered_list




import re
import uuid
def tts_file_name(text):
    if text.endswith("."):
        text = text[:-1]
    text = text.lower()
    text = text.strip()
    text = text.replace(" ","_")
    truncated_text = text[:25] if len(text) > 25 else text if len(text) > 0 else "empty"
    random_string = uuid.uuid4().hex[:8].upper()
    file_name = f"{edge_folder}/edge_tts_voice/{truncated_text}_{random_string}.mp3"
    return file_name


from pydub import AudioSegment
import shutil
import os
def merge_audio_files(audio_paths, output_path):
    # Initialize an empty AudioSegment
    merged_audio = AudioSegment.silent(duration=0)

    # Iterate through each audio file path
    for audio_path in audio_paths:
        # Load the audio file using Pydub
        audio = AudioSegment.from_file(audio_path)

        # Append the current audio file to the merged_audio
        merged_audio += audio

    # Export the merged audio to the specified output path
    merged_audio.export(output_path, format="mp3")

def edge_free_tts(chunks_list,speed,voice_name,save_path,translate_text_flag,Language):
  # print(voice_name)
  # print(chunks_list)
  store_text=""
  if len(chunks_list)>1:
    chunk_audio_list=[]
    if os.path.exists(f"{edge_folder}/edge_tts_voice"):
      shutil.rmtree(f"{edge_folder}/edge_tts_voice")
    os.mkdir(f"{edge_folder}/edge_tts_voice")
    k=1
    for i in chunks_list:
      # print(i)
      if translate_text_flag:
        text=translate_text(i, Language)
      else:
        text=i
      store_text+=text+" "
      text=text.replace('"',"")
      edge_command=f'edge-tts  --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{text}" --write-media {edge_folder}/edge_tts_voice/{k}.mp3'
      var1=os.system(edge_command)
      if var1==0:
        pass
      else:
        print(f"Failed: {i}")
        print(edge_command)
      chunk_audio_list.append(f"{edge_folder}/edge_tts_voice/{k}.mp3")
      k+=1
    # print(chunk_audio_list)
    merge_audio_files(chunk_audio_list, save_path)
  else:
    if translate_text_flag:
      text=translate_text(chunks_list[0], Language)
    else:
      text=chunks_list[0]
    text=text.replace('"',"")
    store_text+=text+" "
    edge_command=f'edge-tts  --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{text}" --write-media {save_path}'
    var2=os.system(edge_command)
    if var2==0:
      pass
    else:
      print(f"Failed: {chunks_list[0]}")
      print(edge_command)
  with open("./temp.txt", "w", encoding="utf-8") as text_file:
    text_file.write(store_text)
  return save_path


# speed = 1  # @param {type: "number"}
# translate_text_flag  = True # @param {type:"boolean"}
# long_sentence = True # @param {type:"boolean"}








# from IPython.display import clear_output
# from IPython.display import Audio
if not os.path.exists(f"{edge_folder}/audio"):
    os.mkdir(f"{edge_folder}/audio")
import uuid
def random_audio_name_generate():
  random_uuid = uuid.uuid4()
  audio_extension = ".mp3"
  random_audio_name = str(random_uuid)[:8] + audio_extension
  return random_audio_name
def edge_tts_pipeline(input_text,Language='English',voice_name=None,Gender='Male',translate_text_flag=True,no_silence=False,speed=1,tts_save_path="",long_sentence=True):
  # print("calling gradio_talk")
  # global long_sentence,translate_text_flag,Language,speed,voice_name,Gender
  global male_voice_list,female_voice_list
  # long_sentence=True
  # translate_text_flag=True
  # speed=1
  if long_sentence==False:
    if len(input_text)>500:
      long_sentence=True
  if voice_name==None:
    if Gender=="Male":
      voice_name=male_voice_list[Language]
    if Gender=="Female":
      voice_name=female_voice_list[Language]
  if long_sentence==True and translate_text_flag==True:
    chunks_list=make_chunks(input_text,Language)
  elif long_sentence==True and translate_text_flag==False:
    chunks_list=make_chunks(input_text,"English")
  else:
    chunks_list=[input_text]
  temp_save_path=f"{edge_folder}/audio/"+random_audio_name_generate()
  save_path=temp_save_path.lower().replace(".mp3",".wav")
  # print(chunks_list,speed,voice_name,save_path,translate_text_flag,Language)
  edge_save_path=edge_free_tts(chunks_list,speed,voice_name,temp_save_path,translate_text_flag,Language)
  mp3_to_wav(edge_save_path, save_path)
  audio_return_path=save_path
  if no_silence:
    clean_path=f"{edge_folder}/audio/"+random_audio_name_generate().replace(".mp3",".wav")
    remove_silence(save_path,clean_path)
    audio_return_path=clean_path
    # return clean_path
  if tts_save_path=="":
    return audio_return_path
  else:
    shutil.copyfile(audio_return_path,tts_save_path)
    return audio_return_path



def talk(input_text):
  # global long_sentence,translate_text_flag,Language,speed,voice_name,Gender
  global Language, Gender,male_voice_list,female_voice_list
  global no_silence
  long_sentence=True
  translate_text_flag=False
  speed=1

  if Gender=="Male":
    voice_name=male_voice_list[Language]
  if Gender=="Female":
    voice_name=female_voice_list[Language]
  if long_sentence==True and translate_text_flag==True:
    chunks_list=make_chunks(input_text,Language)
  elif long_sentence==True and translate_text_flag==False:
    chunks_list=make_chunks(input_text,"English")
  else:
    chunks_list=[input_text]
  
  temp_save_path=f"{edge_folder}/audio/"+random_audio_name_generate()
  # print(f"temp_save_path: {temp_save_path}")
  save_path=temp_save_path.replace(".mp3",".wav")
  # print(f"save_path: {save_path}")
  edge_save_path=edge_free_tts(chunks_list,speed,voice_name,temp_save_path,translate_text_flag,Language)
  
  mp3_to_wav(edge_save_path, save_path)
  if no_silence:
    clean_path=f"{edge_folder}/audio/"+random_audio_name_generate().replace(".mp3",".wav")
    remove_silence(save_path,clean_path)
    return clean_path
  return save_path

from pydub import AudioSegment
from pydub.silence import split_on_silence
import os
 
def remove_silence(file_path,output_path):
    # Extract file name and format from the provided path
    file_name = os.path.basename(file_path)
    audio_format = "wav"

    # Reading and splitting the audio file into chunks
    sound = AudioSegment.from_file(file_path, format=audio_format)
    audio_chunks = split_on_silence(sound,
                                    min_silence_len=100,
                                    silence_thresh=-45,
                                    keep_silence=50)

    # Putting the file back together
    combined = AudioSegment.empty()
    for chunk in audio_chunks:
        combined += chunk


    combined.export(output_path, format=audio_format)
    print(f"Remove silence successfully: {output_path}")

    return output_path


from pydub import AudioSegment

def mp3_to_wav(mp3_file, wav_file):
    # Load the MP3 file
    # print("calling mp3_to_wav")
    # print(mp3_file,wav_file)
    audio = AudioSegment.from_mp3(mp3_file)

    # Export the audio to WAV format
    audio.export(wav_file, format="wav")


###use case

# from microsoft_tts import edge_tts_pipeline
# def tts(text, Language='English',voice_name=None, tts_save_path='', Gender='Male', translate_text_flag=False, no_silence=True, speed=1.0, long_sentence=True):          
#     edge_save_path = edge_tts_pipeline(text, Language,voice_name, Gender, translate_text_flag=translate_text_flag, 
#                                         no_silence=no_silence, speed=speed, tts_save_path=tts_save_path, 
#                                         long_sentence=long_sentence)
#     return edge_save_path

# text="Machine learning is the study of computer algorithms that improve automatically through experience. It is seen as a subset of artificial intelligence. Machine learning algorithms build a model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to do so. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision, where it is difficult or infeasible to develop conventional algorithms to perform the needed tasks."
# save_path = tts(text, Language='English',Gender="Male")
# print(save_path)
# import simpleaudio as sa
# def play_sound(filename):
#     wave_obj = sa.WaveObject.from_wave_file(filename)
#     play_obj = wave_obj.play()
#     play_obj.wait_done()
# play_sound(save_path)



























# edge_save_path=talk(text)
# print(f"Audio File Save at: {edge_save_path}")

# text = "a quick brown fox jumps over the lazy dog and the dog barks loudly"
# Language = "English" # @param ['English','Hindi','Bengali','Afrikaans', 'Amharic', 'Arabic', 'Azerbaijani', 'Bulgarian', 'Bosnian', 'Catalan', 'Czech', 'Welsh', 'Danish', 'German', 'Greek', 'Spanish', 'French', 'Irish', 'Galician', 'Gujarati', 'Hebrew', 'Croatian', 'Hungarian', 'Indonesian', 'Icelandic', 'Italian', 'Japanese', 'Javanese', 'Georgian', 'Kazakh', 'Khmer', 'Kannada', 'Korean', 'Lao', 'Lithuanian', 'Latvian', 'Macedonian', 'Malayalam', 'Mongolian', 'Marathi', 'Malay', 'Maltese', 'Burmese', 'Norwegian Bokmål', 'Nepali', 'Dutch', 'Polish', 'Pashto', 'Portuguese', 'Romanian', 'Russian', 'Sinhala', 'Slovak', 'Slovenian', 'Somali', 'Albanian', 'Serbian', 'Sundanese', 'Swedish', 'Swahili', 'Tamil', 'Telugu', 'Thai', 'Turkish', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Chinese', 'Zulu']
# no_silence = False
# Gender = "Male"# @param ['Male', 'Female']
# translate_text_flag=True
# no_silence=True
# speed=1
# tts_save_path='temp.wav'
# edge_save_path=edge_tts_pipeline(text,Language,Gender,translate_text_flag=translate_text_flag,no_silence=no_silence,speed=speed,tts_save_path=tts_save_path)
# print(f"Audio File Save at: {edge_save_path}")

# from microsoft_tts import edge_tts_pipeline
# def tts(text,tts_save_path=''):
#     # text = "a quick brown fox jumps over the lazy dog and the dog barks loudly"
#     Language = "English" # @param ['English','Hindi','Bengali','Afrikaans', 'Amharic', 'Arabic', 'Azerbaijani', 'Bulgarian', 'Bosnian', 'Catalan', 'Czech', 'Welsh', 'Danish', 'German', 'Greek', 'Spanish', 'French', 'Irish', 'Galician', 'Gujarati', 'Hebrew', 'Croatian', 'Hungarian', 'Indonesian', 'Icelandic', 'Italian', 'Japanese', 'Javanese', 'Georgian', 'Kazakh', 'Khmer', 'Kannada', 'Korean', 'Lao', 'Lithuanian', 'Latvian', 'Macedonian', 'Malayalam', 'Mongolian', 'Marathi', 'Malay', 'Maltese', 'Burmese', 'Norwegian Bokmål', 'Nepali', 'Dutch', 'Polish', 'Pashto', 'Portuguese', 'Romanian', 'Russian', 'Sinhala', 'Slovak', 'Slovenian', 'Somali', 'Albanian', 'Serbian', 'Sundanese', 'Swedish', 'Swahili', 'Tamil', 'Telugu', 'Thai', 'Turkish', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Chinese', 'Zulu']
#     no_silence = False
#     Gender = "Male"# @param ['Male', 'Female']
#     translate_text_flag=True
#     no_silence=True
#     speed=1
#     # tts_save_path='temp.wav'
#     long_sentence=True
#     edge_save_path=edge_tts_pipeline(text,Language,Gender,translate_text_flag=translate_text_flag,no_silence=no_silence,speed=speed,tts_save_path=tts_save_path,long_sentence=long_sentence)
#     print(f"Audio File Save at: {edge_save_path}")
#     return edge_save_path