rayl-aoit commited on
Commit
f1908d5
·
verified ·
1 Parent(s): 7af3726

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -1,23 +1,23 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- from IPython.display import Audio as IPythonAudio
4
 
5
  playground = gr.Blocks()
6
 
7
  image_pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
8
  summary_pipe = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
9
  ner_pipe = pipeline("ner", model="dslim/bert-base-NER")
10
- narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
11
 
12
- def generate_audio(text):
13
- # Generate speech from text
14
- narrated_text = narrator(text)
15
- audio_data = narrated_text["audio"][0]
16
- sampling_rate = narrated_text["sampling_rate"]
17
 
18
- # Use IPythonAudio to play the audio
19
- audio = IPythonAudio(audio_data, rate=sampling_rate)
20
- return audio_data, sampling_rate
21
 
22
  def launch_image_pipe(input):
23
  out = image_pipe(input)
@@ -117,12 +117,12 @@ with playground:
117
  img = gr.Image(type='pil')
118
  with gr.Column():
119
  generated_textbox = gr.Textbox(lines=2, placeholder="", label="Generated Text")
120
- generate_audio_button = gr.Button(value="Generate Audio", variant="primary")
121
- audio_output = gr.Audio(label="Generated Audio")
122
- ITT_Clear_button = gr.ClearButton(components=[img, generated_textbox, audio_output], value="Clear")
123
 
124
  ITT_button.click(launch_image_pipe, inputs=[img], outputs=[generated_textbox])
125
- generate_audio_button.click(generate_audio, inputs=[generated_textbox], outputs=[audio_output])
126
 
127
  with gr.TabItem("Text"):
128
  with gr.Row():
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ # from IPython.display import Audio as IPythonAudio
4
 
5
  playground = gr.Blocks()
6
 
7
  image_pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
8
  summary_pipe = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
9
  ner_pipe = pipeline("ner", model="dslim/bert-base-NER")
10
+ # narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
11
 
12
+ # def generate_audio(text):
13
+ # # Generate speech from text
14
+ # narrated_text = narrator(text)
15
+ # audio_data = narrated_text["audio"][0]
16
+ # sampling_rate = narrated_text["sampling_rate"]
17
 
18
+ # # Use IPythonAudio to play the audio
19
+ # audio = IPythonAudio(audio_data, rate=sampling_rate)
20
+ # return audio_data, sampling_rate
21
 
22
  def launch_image_pipe(input):
23
  out = image_pipe(input)
 
117
  img = gr.Image(type='pil')
118
  with gr.Column():
119
  generated_textbox = gr.Textbox(lines=2, placeholder="", label="Generated Text")
120
+ # generate_audio_button = gr.Button(value="Generate Audio", variant="primary")
121
+ # audio_output = gr.Audio(label="Generated Audio")
122
+ ITT_Clear_button = gr.ClearButton(components=[img, generated_textbox], value="Clear")
123
 
124
  ITT_button.click(launch_image_pipe, inputs=[img], outputs=[generated_textbox])
125
+ # generate_audio_button.click(generate_audio, inputs=[generated_textbox], outputs=[audio_output])
126
 
127
  with gr.TabItem("Text"):
128
  with gr.Row():