Nitzz4952 commited on
Commit
995d7c0
·
verified ·
1 Parent(s): f9ed277

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -23
app.py CHANGED
@@ -2,32 +2,14 @@ import gradio as gr
2
  from PIL import Image, ImageDraw, ImageFont
3
  import scipy.io.wavfile as wavfile
4
 
5
-
6
- # Use a pipeline as a high-level helper
7
  from transformers import pipeline
8
 
9
- # model_path = ("../Models/models--facebook--detr-resnet-50/snapshots"
10
- # "/1d5f47bd3bdd2c4bbfa585418ffe6da5028b4c0b")
11
- #
12
- # tts_model_path = ("../Models/models--kakao-enterprise--vits-ljs/snapshots"
13
- # "/3bcb8321394f671bd948ebf0d086d694dda95464")
14
-
15
-
16
  narrator = pipeline("text-to-speech",
17
  model="kakao-enterprise/vits-ljs")
18
 
19
  object_detector = pipeline("object-detection",
20
  model="facebook/detr-resnet-50")
21
 
22
- # object_detector = pipeline("object-detection",
23
- # model=model_path)
24
- #
25
- # narrator = pipeline("text-to-speech",
26
- # model=tts_model_path)
27
-
28
- # [{'score': 0.9996405839920044, 'label': 'person', 'box': {'xmin': 435, 'ymin': 282, 'xmax': 636, 'ymax': 927}}, {'score': 0.9995879530906677, 'label': 'dog', 'box': {'xmin': 570, 'ymin': 694, 'xmax': 833, 'ymax': 946}}]
29
-
30
- # Define the function to generate audio from text
31
  def generate_audio(text):
32
  # Generate the narrated text
33
  narrated_text = narrator(text)
@@ -39,10 +21,6 @@ def generate_audio(text):
39
  # Return the path to the saved audio file
40
  return "output.wav"
41
 
42
- # Could you please write me a python code that will take list of detection object as an input and it will give the response that will include all the objects (labels) provided in the input. For example if the input is like this: [{'score': 0.9996405839920044, 'label': 'person', 'box': {'xmin': 435, 'ymin': 282, 'xmax': 636, 'ymax': 927}}, {'score': 0.9995879530906677, 'label': 'dog', 'box': {'xmin': 570, 'ymin': 694, 'xmax': 833, 'ymax': 946}}]
43
- # The output should be, This pictuture contains 1 person and 1 dog. If there are multiple objects, do not add 'and' between every objects but 'and' should be at the end only
44
-
45
-
46
  def read_objects(detection_objects):
47
  # Initialize counters for each object label
48
  object_counts = {}
@@ -136,6 +114,5 @@ def detect_object(image):
136
  demo = gr.Interface(fn=detect_object,
137
  inputs=[gr.Image(label="Select Image",type="pil")],
138
  outputs=[gr.Image(label="Processed Image", type="pil"), gr.Audio(label="Generated Audio")],
139
- title="@GenAILearniverse Project 7: Object Detector with Audio",
140
  description="THIS APPLICATION WILL BE USED TO HIGHLIGHT OBJECTS AND GIVES AUDIO DESCRIPTION FOR THE PROVIDED INPUT IMAGE.")
141
  demo.launch()
 
2
  from PIL import Image, ImageDraw, ImageFont
3
  import scipy.io.wavfile as wavfile
4
 
 
 
5
  from transformers import pipeline
6
 
 
 
 
 
 
 
 
7
  narrator = pipeline("text-to-speech",
8
  model="kakao-enterprise/vits-ljs")
9
 
10
  object_detector = pipeline("object-detection",
11
  model="facebook/detr-resnet-50")
12
 
 
 
 
 
 
 
 
 
 
13
  def generate_audio(text):
14
  # Generate the narrated text
15
  narrated_text = narrator(text)
 
21
  # Return the path to the saved audio file
22
  return "output.wav"
23
 
 
 
 
 
24
  def read_objects(detection_objects):
25
  # Initialize counters for each object label
26
  object_counts = {}
 
114
  demo = gr.Interface(fn=detect_object,
115
  inputs=[gr.Image(label="Select Image",type="pil")],
116
  outputs=[gr.Image(label="Processed Image", type="pil"), gr.Audio(label="Generated Audio")],
 
117
  description="THIS APPLICATION WILL BE USED TO HIGHLIGHT OBJECTS AND GIVES AUDIO DESCRIPTION FOR THE PROVIDED INPUT IMAGE.")
118
  demo.launch()