rathapech commited on
Commit
722f574
·
verified ·
1 Parent(s): 50a2f63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -1,24 +1,32 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- from IPython.display import Audio as IPythonAudio
4
 
5
- itt_pipe = pipeline("image-to-text",
6
- model="Salesforce/blip-image-captioning-base")
 
7
 
8
  def get_pipeline_prediction(pil_image):
9
 
10
- pipeline_output = itt_pipe(pil_image)
11
-
 
 
12
  processed_image = render_results_in_image(pil_image,
13
  pipeline_output)
14
- return processed_image
15
-
16
- text = gr.Interface(
17
- fn=get_pipeline_prediction,
18
- inputs=gr.Image(label="Input image", type="pil"), outputs=gr.Image(label="Text describe the image", type="pil")
 
 
 
 
 
 
19
  )
20
- text.launch()
21
- print(text)
22
  #text = itt_pipe(input)
23
 
24
 
 
1
  import gradio as gr
2
  from transformers import pipeline
 
3
 
4
+ od_pipe = pipeline("object-detection", model="facebook/detr-resnet-50")
5
+ tts_pipe = pipeline("text-to-speech",
6
+ model="kakao-enterprise/vits-ljs")
7
 
8
  def get_pipeline_prediction(pil_image):
9
 
10
+ pipeline_output = od_pipe(pil_image)
11
+ text = summarize_predictions_natural_language(pipeline_output)
12
+ #text = "Hello, my name is Ratha"
13
+ gen_audio = tts_pipe(text)
14
  processed_image = render_results_in_image(pil_image,
15
  pipeline_output)
16
+ rate= gen_audio["sampling_rate"]
17
+ return processed_image, text, (rate, gen_audio["audio"][0])
18
+
19
+ demo = gr.Interface(
20
+ fn=get_pipeline_prediction,
21
+ inputs=gr.Image(label="Input image",
22
+ type="pil"),
23
+ outputs= [
24
+ gr.Image(label="Output image with predicted instances", type="pil"),
25
+ gr.Textbox(label="Prediction Summary"),
26
+ gr.Audio(label="Generated Speech")]
27
  )
28
+
29
+ demo.launch()
30
  #text = itt_pipe(input)
31
 
32