Spaces:
Running
Running
File size: 5,846 Bytes
569f484 948bae2 569f484 26aaba4 569f484 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
from gradio_image_prompter import ImagePrompter
import Predict
import XGBoost_utils
import numpy as np
import cv2 as cv
GENERAL_CATEGORY = {'Potatoes / Vegetables / Fruit': 0, 'Chemical products': 1, 'Photo / Film / Optical items': 2, 'Catering industry': 3, 'Industrial products other': 4, 'Media': 5, 'Real estate': 6, 'Government': 7, 'Personnel advertisements': 8, 'Cars / Commercial vehicles': 9, 'Cleaning products': 10, 'Retail': 11, 'Fragrances': 12, 'Footwear / Leather goods': 13, 'Software / Automation': 14, 'Telecommunication equipment': 15, 'Tourism': 16, 'Transport/Communication companies': 17, 'Transport services': 18, 'Insurances': 19, 'Meat / Fish / Poultry': 20, 'Detergents': 21, 'Foods General': 22, 'Other services': 23, 'Banks and Financial Services': 24, 'Office Products': 25, 'Household Items': 26, 'Non-alcoholic beverages': 27, 'Hair, Oral and Personal Care': 28, 'Fashion and Clothing': 29, 'Other products and Services': 30, 'Paper products': 31, 'Alcohol and Other Stimulants': 32, 'Medicines': 33, 'Recreation and Leisure': 34, 'Electronics': 35, 'Home Furnishings': 36, 'Products for Business Use': 37}
CATEGORIES = list(GENERAL_CATEGORY.keys())
CATEGORIES.sort()
def calculate_areas(prompts, brand_num, pictorial_num, text_num):
points_all = prompts["points"]
brand_surf = 0
for i in range(brand_num):
x1 = points_all[i][0]; y1 = points_all[i][1]
x2 = points_all[i][3]; y2 = points_all[i][4]
brand_surf += np.abs((x1-x2)*(y1-y2))
pictorial_surf = 0
for i in range(brand_num, brand_num+pictorial_num):
x1 = points_all[i][0]; y1 = points_all[i][1]
x2 = points_all[i][3]; y2 = points_all[i][4]
pictorial_surf += np.abs((x1-x2)*(y1-y2))
text_surf = 0
for i in range(brand_num+pictorial_num, brand_num+pictorial_num+text_num):
x1 = points_all[i][0]; y1 = points_all[i][1]
x2 = points_all[i][3]; y2 = points_all[i][4]
text_surf += np.abs((x1-x2)*(y1-y2))
ad_size = 0
x1 = points_all[-2][0]; y1 = points_all[-2][1]
x2 = points_all[-2][3]; y2 = points_all[-2][4]
ad_size += np.abs((x1-x2)*(y1-y2))
whole_size = 0
x1 = points_all[-1][0]; y1 = points_all[-1][1]
x2 = points_all[-1][3]; y2 = points_all[-1][4]
whole_size += np.abs((x1-x2)*(y1-y2))
return (brand_surf/whole_size*100, pictorial_surf/whole_size*100, text_surf/whole_size*100, ad_size/whole_size*100)
def attention(whole_display_prompt, ad, context,
brand_num, pictorial_num, text_num,
category, ad_location, gaze_type):
text_detection_model_path = 'EAST-Text-Detection/frozen_east_text_detection.pb'
LDA_model_pth = 'LDA_Model_trained/lda_model_best_tot.model'
training_ad_text_dictionary_path = 'LDA_Model_trained/object_word_dictionary'
training_lang_preposition_path = 'LDA_Model_trained/dutch_preposition'
# caption_ad = XGBoost_utils.Caption_Generation(ad)
# caption_context = XGBoost_utils.Caption_Generation(context)
# ad_topic = XGBoost_utils.Topic_emb(caption_ad)
# ctpg_topic = XGBoost_utils.Topic_emb(caption_context)
ad_topic = np.random.randn(1,768)
ctpg_topic = np.random.randn(1,768)
prod_group = np.zeros(38)
prod_group[GENERAL_CATEGORY[category]] = 1
if ad_location == 'left':
ad_loc = 0
elif ad_location == 'right':
ad_loc = 1
else:
ad_loc = None
brand_percent, visual_percent, text_percent, adv_size_percent = calculate_areas(whole_display_prompt, brand_num, pictorial_num, text_num)
surfaces = [brand_percent, visual_percent, text_percent, adv_size_percent*10/100]
ad = ad.convert('RGB')
ad = cv.resize(np.array(ad), (640, 832))
context = context.convert('RGB')
context = cv.resize(np.array(context), (640, 832))
Gaze = Predict.Ad_Gaze_Prediction(input_ad_path=ad, input_ctpg_path=context, ad_location=ad_loc,
text_detection_model_path=text_detection_model_path, LDA_model_pth=LDA_model_pth,
training_ad_text_dictionary_path=training_ad_text_dictionary_path, training_lang_preposition_path=training_lang_preposition_path, training_language='dutch',
ad_embeddings=ad_topic, ctpg_embeddings=ctpg_topic,
surface_sizes=surfaces, Product_Group=prod_group,
obj_detection_model_pth=None, num_topic=20, Gaze_Time_Type=gaze_type)
return np.round(Gaze,2)
def greet(name, intensity):
return "Hello " * intensity + name + "!"
demo = gr.Interface(
fn=attention,
inputs=[ImagePrompter(label="Upload Entire (Ad+Context) Image, and Draw Bounding Boxes"),
gr.Image(label="Ad Image", sources=['upload', 'webcam'], type="pil"),
gr.Image(label="Context Image", sources=['upload', 'webcam'], type="pil"),
gr.Number(label="Number of brand bounding boxes drawn"),
gr.Number(label="Number of pictorial bounding boxes drawn"),
gr.Number(label="Number of text bounding boxes drawn"),
gr.Dropdown(CATEGORIES, label="Product Category"),
gr.Textbox(label="Ad Location", info="Enter left or right or full"),
gr.Textbox(label="Gaze Type", info="Enter Ad or Brand")
],
outputs=[gr.Number(label="Predicted Gaze (sec)")],
title="Gaze Prediction",
description="In the section right below, please first upload the entire image that contains both ad and context, then draw bounding boxes. Please draw ALL Bounding Boxes in the order of: (1) brand, (2) pictorial, (3) text elements, (4) advertisement and (5) the entire image here. NOTE: Each ad element can have more than 1 boxes.",
theme=gr.themes.Soft()
)
demo.launch(share=True) |