rayl-aoit commited on
Commit
e1c8796
·
verified ·
1 Parent(s): a55bcb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -5
app.py CHANGED
@@ -1,17 +1,30 @@
1
  import gradio as gr
2
  from transformers import pipeline
 
3
 
4
  playground = gr.Blocks()
5
 
6
  image_pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
7
  summary_pipe = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
  ner_pipe = pipeline("ner", model="dslim/bert-base-NER")
 
9
 
 
 
 
 
 
 
 
 
 
10
 
11
  def launch_image_pipe(input):
12
  out = image_pipe(input)
13
- return out[0]['generated_text']
14
-
 
 
15
  def translate(input_text, source, target):
16
  try:
17
  model = f"Helsinki-NLP/opus-mt-{source}-{target}"
@@ -99,16 +112,16 @@ with playground:
99
  """)
100
  with gr.Column(scale=1):
101
  ITT_button = gr.Button(value="Start Process", variant="primary")
102
-
103
-
104
  with gr.Row():
105
  with gr.Column():
106
  img = gr.Image(type='pil')
107
  with gr.Column():
108
  generated_textbox = gr.Textbox(lines=2, placeholder="", label="Generated Text")
 
109
  ITT_Clear_button = gr.ClearButton(components=[img, generated_textbox], value="Clear")
110
 
111
- ITT_button.click(launch_image_pipe, inputs=[img], outputs=[generated_textbox])
112
 
113
  with gr.TabItem("Text"):
114
  with gr.Row():
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ from IPython.display import Audio as IPythonAudio
4
 
5
  playground = gr.Blocks()
6
 
7
  image_pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
8
  summary_pipe = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
9
  ner_pipe = pipeline("ner", model="dslim/bert-base-NER")
10
+ narrator = pipeline("text-to-speech", model="./models/kakao-enterprise/vits-ljs")
11
 
12
+ def generate_audio(text):
13
+ # Generate speech from text
14
+ narrated_text = narrator(text)
15
+ audio_data = narrated_text["audio"][0]
16
+ sampling_rate = narrated_text["sampling_rate"]
17
+
18
+ # Use IPythonAudio to play the audio
19
+ audio = IPythonAudio(audio_data, rate=sampling_rate)
20
+ return audio
21
 
22
  def launch_image_pipe(input):
23
  out = image_pipe(input)
24
+ text = out[0]['generated_text']
25
+ audio = generate_audio(text)
26
+ return text, audio
27
+
28
  def translate(input_text, source, target):
29
  try:
30
  model = f"Helsinki-NLP/opus-mt-{source}-{target}"
 
112
  """)
113
  with gr.Column(scale=1):
114
  ITT_button = gr.Button(value="Start Process", variant="primary")
115
+
 
116
  with gr.Row():
117
  with gr.Column():
118
  img = gr.Image(type='pil')
119
  with gr.Column():
120
  generated_textbox = gr.Textbox(lines=2, placeholder="", label="Generated Text")
121
+ audio_output = gr.Audio(label="Generated Audio")
122
  ITT_Clear_button = gr.ClearButton(components=[img, generated_textbox], value="Clear")
123
 
124
+ ITT_button.click(launch_image_pipe, inputs=[img], outputs=[generated_textbox, audio_output])
125
 
126
  with gr.TabItem("Text"):
127
  with gr.Row():