englissi commited on
Commit
2e7a42e
ยท
verified ยท
1 Parent(s): d1e7af9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -26
app.py CHANGED
@@ -3,41 +3,45 @@ from gtts import gTTS
3
  from pydub import AudioSegment
4
  import numpy as np
5
  import os
6
- import io
7
 
8
  def text_to_speech(prompt):
9
- # gTTS๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Bulgarian ํ…์ŠคํŠธ๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
10
- tts = gTTS(text=prompt, lang="bg")
11
- audio_file = "output.mp3"
12
- tts.save(audio_file)
13
-
14
- # pydub์„ ์‚ฌ์šฉํ•˜์—ฌ mp3 ํŒŒ์ผ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
15
- sound = AudioSegment.from_mp3(audio_file)
16
-
17
- # pydub์˜ raw data๋ฅผ numpy ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜ (16๋น„ํŠธ ์ •์ˆ˜ํ˜•)
18
- samples = np.array(sound.get_array_of_samples())
19
-
20
- # ๋ชจ๋…ธ ์ฑ„๋„์ด ์•„๋‹ˆ๋ผ๋ฉด, ์ฑ„๋„์„ ํ•ฉ์นฉ๋‹ˆ๋‹ค.
21
- if sound.channels > 1:
22
- samples = samples.reshape((-1, sound.channels))
23
- samples = samples.mean(axis=1).astype(np.int16)
24
-
25
- sample_rate = sound.frame_rate
26
 
27
- # ์ž„์‹œ mp3 ํŒŒ์ผ ์‚ญ์ œ
28
- os.remove(audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # gr.Audio(type="numpy")๋Š” (numpy array, sample_rate) ํŠœํ”Œ์„ ๊ธฐ๋Œ€ํ•ฉ๋‹ˆ๋‹ค.
31
  return samples, sample_rate
32
 
33
  with gr.Blocks() as demo:
34
  gr.Markdown("## Bulgarian Text-to-Speech (TTS)")
35
- with gr.Row():
36
- input_prompt = gr.Textbox(label="Enter a prompt in Bulgarian:")
37
- output_audio = gr.Audio(label="Generated Speech", type="numpy")
38
  generate_button = gr.Button("Generate Speech")
39
 
40
  generate_button.click(text_to_speech, inputs=input_prompt, outputs=output_audio)
41
 
42
- if __name__ == "__main__":
43
- demo.launch()
 
3
  from pydub import AudioSegment
4
  import numpy as np
5
  import os
 
6
 
7
  def text_to_speech(prompt):
8
+ try:
9
+ # gTTS๋ฅผ ์ด์šฉํ•ด Bulgarian ํ…์ŠคํŠธ๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
10
+ tts = gTTS(text=prompt, lang="bg")
11
+ audio_file = "output.mp3"
12
+ tts.save(audio_file)
13
+ except Exception as e:
14
+ return f"TTS ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}"
 
 
 
 
 
 
 
 
 
 
15
 
16
+ try:
17
+ # pydub์œผ๋กœ mp3 ํŒŒ์ผ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
18
+ sound = AudioSegment.from_mp3(audio_file)
19
+
20
+ # pydub๊ฐ€ ์ œ๊ณตํ•˜๋Š” raw ๋ฐ์ดํ„ฐ๋ฅผ NumPy ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜ (int16)
21
+ samples = np.array(sound.get_array_of_samples())
22
+
23
+ # ๋งŒ์•ฝ ์Šคํ…Œ๋ ˆ์˜ค๋ผ๋ฉด ๋ชจ๋…ธ๋กœ ๋ณ€ํ™˜ (์ฑ„๋„ ํ‰๊ท )
24
+ if sound.channels > 1:
25
+ samples = samples.reshape((-1, sound.channels))
26
+ samples = samples.mean(axis=1)
27
+
28
+ # int16 ๋ฐ์ดํ„ฐ๋ฅผ float32๋กœ ์ •๊ทœํ™” (๋ฒ”์œ„: [-1.0, 1.0])
29
+ samples = samples.astype(np.float32) / 32768.0
30
+ sample_rate = sound.frame_rate
31
+ except Exception as e:
32
+ return f"์˜ค๋””์˜ค ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}"
33
+ finally:
34
+ if os.path.exists(audio_file):
35
+ os.remove(audio_file)
36
 
 
37
  return samples, sample_rate
38
 
39
  with gr.Blocks() as demo:
40
  gr.Markdown("## Bulgarian Text-to-Speech (TTS)")
41
+ input_prompt = gr.Textbox(label="Enter a prompt in Bulgarian:")
42
+ output_audio = gr.Audio(label="Generated Speech", type="numpy")
 
43
  generate_button = gr.Button("Generate Speech")
44
 
45
  generate_button.click(text_to_speech, inputs=input_prompt, outputs=output_audio)
46
 
47
+ demo.launch()