englissi commited on
Commit
80d2986
ยท
verified ยท
1 Parent(s): 564910c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -23
app.py CHANGED
@@ -5,37 +5,41 @@ import numpy as np
5
  import os
6
 
7
  def text_to_speech(prompt):
8
- # gTTS๋ฅผ ์ด์šฉํ•ด Bulgarian ํ…์ŠคํŠธ๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
9
- tts = gTTS(text=prompt, lang="bg")
10
- audio_file = "output.mp3"
11
- tts.save(audio_file)
 
 
 
 
12
 
13
- # pydub๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ mp3 ํŒŒ์ผ์„ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.
14
- sound = AudioSegment.from_mp3(audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # pydub์˜ raw ๋ฐ์ดํ„ฐ๋ฅผ numpy ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜ (int16)
17
- samples = np.array(sound.get_array_of_samples())
18
-
19
- # ๋งŒ์•ฝ ์Šคํ…Œ๋ ˆ์˜ค๋ผ๋ฉด ๋ชจ๋…ธ๋กœ ๋ณ€ํ™˜ (์ฑ„๋„ ํ‰๊ท )
20
- if sound.channels > 1:
21
- samples = samples.reshape((-1, sound.channels))
22
- samples = samples.mean(axis=1)
23
-
24
- # int16 ๋ฐ์ดํ„ฐ๋ฅผ float32๋กœ ์ •๊ทœํ™” (๋ฒ”์œ„: [-1.0, 1.0])
25
- samples = samples.astype(np.float32) / 32768.0
26
- sample_rate = sound.frame_rate
27
-
28
- # ์ž„์‹œ๋กœ ์ƒ์„ฑํ•œ mp3 ํŒŒ์ผ ์‚ญ์ œ
29
- os.remove(audio_file)
30
-
31
- # gr.Audio(type="numpy")๋Š” (numpy_array, sample_rate) ํŠœํ”Œ์„ ๊ธฐ๋Œ€ํ•ฉ๋‹ˆ๋‹ค.
32
  return samples, sample_rate
33
 
34
  with gr.Blocks() as demo:
35
  gr.Markdown("## Bulgarian Text-to-Speech (TTS)")
36
  with gr.Row():
37
  input_prompt = gr.Textbox(label="Enter a prompt in Bulgarian:")
38
- # type์„ "numpy"๋กœ ์„ค์ •ํ•˜์—ฌ numpy ๋ฐฐ์—ด์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
39
  output_audio = gr.Audio(label="Generated Speech", type="numpy")
40
  generate_button = gr.Button("Generate Speech")
41
 
 
5
  import os
6
 
7
  def text_to_speech(prompt):
8
+ try:
9
+ # 1) gTTS๋กœ mp3 ์ƒ์„ฑ
10
+ tts = gTTS(text=prompt, lang="bg")
11
+ audio_file = "output.mp3"
12
+ tts.save(audio_file)
13
+ except Exception as e:
14
+ print("gTTS ์ƒ์„ฑ ์˜ค๋ฅ˜:", e)
15
+ raise e # ์—๋Ÿฌ๋ฅผ ๋‹ค์‹œ ๋ฐœ์ƒ์‹œ์ผœ Gradio์—์„œ ๊ฐ์ง€ํ•˜๋„๋ก ํ•จ
16
 
17
+ try:
18
+ # 2) mp3 -> numpy ๋ณ€ํ™˜ (pydub ์‚ฌ์šฉ)
19
+ sound = AudioSegment.from_mp3(audio_file)
20
+ samples = np.array(sound.get_array_of_samples())
21
+
22
+ # ์Šคํ…Œ๋ ˆ์˜ค๋ฉด ๋ชจ๋…ธ๋กœ ๋ณ€ํ™˜
23
+ if sound.channels > 1:
24
+ samples = samples.reshape((-1, sound.channels))
25
+ samples = samples.mean(axis=1)
26
+
27
+ # int16 -> float32 ์ •๊ทœํ™”
28
+ samples = samples.astype(np.float32) / 32768.0
29
+ sample_rate = sound.frame_rate
30
+ except Exception as e:
31
+ print("pydub ๋กœ๋”ฉ/์ฒ˜๋ฆฌ ์˜ค๋ฅ˜:", e)
32
+ raise e
33
+ finally:
34
+ if os.path.exists(audio_file):
35
+ os.remove(audio_file)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  return samples, sample_rate
38
 
39
  with gr.Blocks() as demo:
40
  gr.Markdown("## Bulgarian Text-to-Speech (TTS)")
41
  with gr.Row():
42
  input_prompt = gr.Textbox(label="Enter a prompt in Bulgarian:")
 
43
  output_audio = gr.Audio(label="Generated Speech", type="numpy")
44
  generate_button = gr.Button("Generate Speech")
45