englissi commited on
Commit
2ec67ac
ยท
verified ยท
1 Parent(s): b8b1773

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -1,29 +1,39 @@
1
  import gradio as gr
2
  from gtts import gTTS
3
- import librosa
 
4
  import os
 
5
 
6
  def text_to_speech(prompt):
7
  # gTTS๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Bulgarian ํ…์ŠคํŠธ๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
8
- tts = gTTS(text=prompt, lang="bg") # 'bg'๋Š” Bulgarian ์–ธ์–ด ์ฝ”๋“œ์ž…๋‹ˆ๋‹ค.
9
  audio_file = "output.mp3"
10
  tts.save(audio_file)
11
 
12
- # librosa๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ mp3 ํŒŒ์ผ์„ numpy ๋ฐฐ์—ด๊ณผ ์ƒ˜ํ”Œ๋ ˆ์ดํŠธ๋กœ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
13
- # sr=None๋กœ ์„ค์ •ํ•˜๋ฉด ์›๋ณธ ์ƒ˜ํ”Œ๋ ˆ์ดํŠธ๋ฅผ ์œ ์ง€ํ•ฉ๋‹ˆ๋‹ค.
14
- audio_array, sample_rate = librosa.load(audio_file, sr=None)
15
 
 
 
 
 
 
 
 
 
 
 
16
  # ์ž„์‹œ mp3 ํŒŒ์ผ ์‚ญ์ œ
17
  os.remove(audio_file)
18
 
19
  # gr.Audio(type="numpy")๋Š” (numpy array, sample_rate) ํŠœํ”Œ์„ ๊ธฐ๋Œ€ํ•ฉ๋‹ˆ๋‹ค.
20
- return audio_array, sample_rate
21
 
22
  with gr.Blocks() as demo:
23
  gr.Markdown("## Bulgarian Text-to-Speech (TTS)")
24
  with gr.Row():
25
  input_prompt = gr.Textbox(label="Enter a prompt in Bulgarian:")
26
- # type์„ "numpy"๋กœ ์„ค์ •ํ•˜์—ฌ numpy ๋ฐฐ์—ด์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
27
  output_audio = gr.Audio(label="Generated Speech", type="numpy")
28
  generate_button = gr.Button("Generate Speech")
29
 
 
1
  import gradio as gr
2
  from gtts import gTTS
3
+ from pydub import AudioSegment
4
+ import numpy as np
5
  import os
6
+ import io
7
 
8
  def text_to_speech(prompt):
9
  # gTTS๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Bulgarian ํ…์ŠคํŠธ๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
10
+ tts = gTTS(text=prompt, lang="bg")
11
  audio_file = "output.mp3"
12
  tts.save(audio_file)
13
 
14
+ # pydub์„ ์‚ฌ์šฉํ•˜์—ฌ mp3 ํŒŒ์ผ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
15
+ sound = AudioSegment.from_mp3(audio_file)
 
16
 
17
+ # pydub์˜ raw data๋ฅผ numpy ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜ (16๋น„ํŠธ ์ •์ˆ˜ํ˜•)
18
+ samples = np.array(sound.get_array_of_samples())
19
+
20
+ # ๋ชจ๋…ธ ์ฑ„๋„์ด ์•„๋‹ˆ๋ผ๋ฉด, ์ฑ„๋„์„ ํ•ฉ์นฉ๋‹ˆ๋‹ค.
21
+ if sound.channels > 1:
22
+ samples = samples.reshape((-1, sound.channels))
23
+ samples = samples.mean(axis=1).astype(np.int16)
24
+
25
+ sample_rate = sound.frame_rate
26
+
27
  # ์ž„์‹œ mp3 ํŒŒ์ผ ์‚ญ์ œ
28
  os.remove(audio_file)
29
 
30
  # gr.Audio(type="numpy")๋Š” (numpy array, sample_rate) ํŠœํ”Œ์„ ๊ธฐ๋Œ€ํ•ฉ๋‹ˆ๋‹ค.
31
+ return samples, sample_rate
32
 
33
  with gr.Blocks() as demo:
34
  gr.Markdown("## Bulgarian Text-to-Speech (TTS)")
35
  with gr.Row():
36
  input_prompt = gr.Textbox(label="Enter a prompt in Bulgarian:")
 
37
  output_audio = gr.Audio(label="Generated Speech", type="numpy")
38
  generate_button = gr.Button("Generate Speech")
39