Hantr commited on
Commit
de591bd
ยท
1 Parent(s): c4854be
Files changed (1) hide show
  1. app.py +8 -20
app.py CHANGED
@@ -5,16 +5,15 @@ import matplotlib.pyplot as plt
5
  import numpy as np
6
  from PIL import Image
7
  import tensorflow as tf
8
- from transformers import SegformerFeatureExtractor, TFSegformerForSemanticSegmentation, AutoFeatureExtractor, AutoModelForImageCaptioning
9
 
10
  feature_extractor = SegformerFeatureExtractor.from_pretrained(
11
  "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
12
  )
13
- seg_model = TFSegformerForSemanticSegmentation.from_pretrained(
14
  "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
15
  )
16
 
17
- caption_model = AutoModelForImageCaptioning.from_pretrained("facebook/deit-base-cc-turbo")
18
 
19
  def ade_palette():
20
  """ADE20K palette that maps each class to RGB values."""
@@ -83,7 +82,7 @@ def sepia(input_img):
83
  input_img = Image.fromarray(input_img)
84
 
85
  inputs = feature_extractor(images=input_img, return_tensors="tf")
86
- outputs = seg_model(**inputs)
87
  logits = outputs.logits
88
 
89
  logits = tf.transpose(logits, [0, 2, 3, 1])
@@ -106,12 +105,12 @@ def sepia(input_img):
106
  return fig
107
 
108
 
109
- def segment_and_caption(input_img):
110
  input_img = Image.fromarray(input_img)
111
 
112
  # ์„ธ๊ทธ๋ฉ˜ํ…Œ์ด์…˜ ์ˆ˜ํ–‰
113
  inputs = feature_extractor(images=input_img, return_tensors="tf")
114
- outputs = seg_model(**inputs)
115
  logits = outputs.logits
116
 
117
  logits = tf.transpose(logits, [0, 2, 3, 1])
@@ -120,23 +119,12 @@ def segment_and_caption(input_img):
120
  )
121
  seg = tf.math.argmax(logits, axis=-1)[0]
122
 
123
- # ์„ธ๊ทธ๋ฉ˜ํ…Œ์ด์…˜ ๊ฒฐ๊ณผ๋ฅผ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜
124
- seg_text = ""
125
- for label, label_name in enumerate(labels_list):
126
- count = np.sum(seg.numpy() == label)
127
- seg_text += f"{label_name}: {count} pixels\n"
128
 
129
- # ์ด๋ฏธ์ง€ ์บก์…˜ ์ƒ์„ฑ
130
- caption_input = caption_model.generate(input_img, max_length=20, num_return_sequences=1)
131
- caption_text = caption_input[0]['text']
132
 
133
- # ์„ธ๊ทธ๋ฉ˜ํ…Œ์ด์…˜ ๊ฒฐ๊ณผ์™€ ์บก์…˜์„ ๋ฐ˜ํ™˜
134
- return input_img, seg_text, caption_text
135
-
136
-
137
- demo = gr.Interface(fn=segment_and_caption,
138
  inputs=gr.Image(shape=(1024, 1024)),
139
- outputs=["image", "text", "text"],
140
  examples=["city-1.jpg", "city-2.jpg", "city-3.jpg", "city-4.jpg", "city-5.jpg"],
141
  allow_flagging='never')
142
 
 
5
  import numpy as np
6
  from PIL import Image
7
  import tensorflow as tf
8
+ from transformers import SegformerFeatureExtractor, TFSegformerForSemanticSegmentation
9
 
10
  feature_extractor = SegformerFeatureExtractor.from_pretrained(
11
  "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
12
  )
13
+ model = TFSegformerForSemanticSegmentation.from_pretrained(
14
  "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
15
  )
16
 
 
17
 
18
  def ade_palette():
19
  """ADE20K palette that maps each class to RGB values."""
 
82
  input_img = Image.fromarray(input_img)
83
 
84
  inputs = feature_extractor(images=input_img, return_tensors="tf")
85
+ outputs = model(**inputs)
86
  logits = outputs.logits
87
 
88
  logits = tf.transpose(logits, [0, 2, 3, 1])
 
105
  return fig
106
 
107
 
108
+ def segment_image(input_img):
109
  input_img = Image.fromarray(input_img)
110
 
111
  # ์„ธ๊ทธ๋ฉ˜ํ…Œ์ด์…˜ ์ˆ˜ํ–‰
112
  inputs = feature_extractor(images=input_img, return_tensors="tf")
113
+ outputs = model(**inputs)
114
  logits = outputs.logits
115
 
116
  logits = tf.transpose(logits, [0, 2, 3, 1])
 
119
  )
120
  seg = tf.math.argmax(logits, axis=-1)[0]
121
 
122
+ return input_img, seg
 
 
 
 
123
 
 
 
 
124
 
125
+ demo = gr.Interface(fn=segment_image,
 
 
 
 
126
  inputs=gr.Image(shape=(1024, 1024)),
127
+ outputs=["image", "image"],
128
  examples=["city-1.jpg", "city-2.jpg", "city-3.jpg", "city-4.jpg", "city-5.jpg"],
129
  allow_flagging='never')
130