Spaces:

Hantr
/

Hugging_Face_space_2

Runtime error

Hantr commited on Nov 7, 2023

Commit

c4854be

1 Parent(s): c421198

finish

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 from PIL import Image
 import tensorflow as tf
-from transformers import SegformerFeatureExtractor, TFSegformerForSemanticSegmentation, BigBirdForImageCaptioning
 feature_extractor = SegformerFeatureExtractor.from_pretrained(
     "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
@@ -14,7 +14,7 @@ seg_model = TFSegformerForSemanticSegmentation.from_pretrained(
     "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
 )
-caption_model = BigBirdForImageCaptioning.from_pretrained("bigbird/image-captioning-base")
 def ade_palette():
     """ADE20K palette that maps each class to RGB values."""
@@ -108,20 +108,29 @@ def sepia(input_img):
 def segment_and_caption(input_img):
     input_img = Image.fromarray(input_img)
     inputs = feature_extractor(images=input_img, return_tensors="tf")
     outputs = seg_model(**inputs)
     logits = outputs.logits
     logits = tf.transpose(logits, [0, 2, 3, 1])
     logits = tf.image.resize(
         logits, input_img.size[::-1]
     )
     seg = tf.math.argmax(logits, axis=-1)[0]
     seg_text = ""
     for label, label_name in enumerate(labels_list):
         count = np.sum(seg.numpy() == label)
         seg_text += f"{label_name}: {count} pixels\n"
-    caption = caption_model.generate(input_img, max_length=20, num_return_sequences=1, return_dict_in_generate=True)
-    caption_text = caption[0]['text']
     return input_img, seg_text, caption_text

 import numpy as np
 from PIL import Image
 import tensorflow as tf
+from transformers import SegformerFeatureExtractor, TFSegformerForSemanticSegmentation, AutoFeatureExtractor, AutoModelForImageCaptioning
 feature_extractor = SegformerFeatureExtractor.from_pretrained(
     "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
     "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
 )
+caption_model = AutoModelForImageCaptioning.from_pretrained("facebook/deit-base-cc-turbo")
 def ade_palette():
     """ADE20K palette that maps each class to RGB values."""
 def segment_and_caption(input_img):
     input_img = Image.fromarray(input_img)
+    # 세그멘테이션 수행
     inputs = feature_extractor(images=input_img, return_tensors="tf")
     outputs = seg_model(**inputs)
     logits = outputs.logits
     logits = tf.transpose(logits, [0, 2, 3, 1])
     logits = tf.image.resize(
         logits, input_img.size[::-1]
     )
     seg = tf.math.argmax(logits, axis=-1)[0]
+    # 세그멘테이션 결과를 텍스트로 변환
     seg_text = ""
     for label, label_name in enumerate(labels_list):
         count = np.sum(seg.numpy() == label)
         seg_text += f"{label_name}: {count} pixels\n"
+    # 이미지 캡션 생성
+    caption_input = caption_model.generate(input_img, max_length=20, num_return_sequences=1)
+    caption_text = caption_input[0]['text']
+    # 세그멘테이션 결과와 캡션을 반환
     return input_img, seg_text, caption_text