Spaces:

Demo750
/

XGBoost_Gaze

Running

App Files Files Community

XGBoost_Gaze / Webpage.py

Demo750

Update Webpage.py

a4bd6f3 verified 2 months ago

raw

history blame

7.36 kB

	import gradio as gr
	from gradio_image_prompter import ImagePrompter
	import Predict
	import XGBoost_utils
	import numpy as np
	import cv2 as cv
	import torch
	from PIL import Image

	GENERAL_CATEGORY = {'Potatoes / Vegetables / Fruit': 0, 'Chemical products': 1, 'Photo / Film / Optical items': 2, 'Catering industry': 3, 'Industrial products other': 4, 'Media': 5, 'Real estate': 6, 'Government': 7, 'Personnel advertisements': 8, 'Cars / Commercial vehicles': 9, 'Cleaning products': 10, 'Retail': 11, 'Fragrances': 12, 'Footwear / Leather goods': 13, 'Software / Automation': 14, 'Telecommunication equipment': 15, 'Tourism': 16, 'Transport/Communication companies': 17, 'Transport services': 18, 'Insurances': 19, 'Meat / Fish / Poultry': 20, 'Detergents': 21, 'Foods General': 22, 'Other services': 23, 'Banks and Financial Services': 24, 'Office Products': 25, 'Household Items': 26, 'Non-alcoholic beverages': 27, 'Hair, Oral and Personal Care': 28, 'Fashion and Clothing': 29, 'Other products and Services': 30, 'Paper products': 31, 'Alcohol and Other Stimulants': 32, 'Medicines': 33, 'Recreation and Leisure': 34, 'Electronics': 35, 'Home Furnishings': 36, 'Products for Business Use': 37}
	CATEGORIES = list(GENERAL_CATEGORY.keys())
	CATEGORIES.sort()
	LOCATIONS = ['Left', 'Right', 'Full']
	GAZE_TYPE = ['Ad', 'Brand']

	def calculate_areas(prompts, brand_num, pictorial_num, text_num):
	image_entire = prompts["image"]
	w, h = image_entire.size
	image_entire = np.array(image_entire.convert('RGB'))
	points_all = prompts["points"]
	brand_surf = 0
	for i in range(brand_num):
	x1 = points_all[i][0]; y1 = points_all[i][1]
	x2 = points_all[i][3]; y2 = points_all[i][4]
	brand_surf += np.abs((x1-x2)*(y1-y2))

	pictorial_surf = 0
	for i in range(brand_num, brand_num+pictorial_num):
	x1 = points_all[i][0]; y1 = points_all[i][1]
	x2 = points_all[i][3]; y2 = points_all[i][4]
	pictorial_surf += np.abs((x1-x2)*(y1-y2))

	text_surf = 0
	for i in range(brand_num+pictorial_num, brand_num+pictorial_num+text_num):
	x1 = points_all[i][0]; y1 = points_all[i][1]
	x2 = points_all[i][3]; y2 = points_all[i][4]
	text_surf += np.abs((x1-x2)*(y1-y2))

	ad_size = 0
	x1 = points_all[-1][0]; y1 = points_all[-1][1]
	x2 = points_all[-1][3]; y2 = points_all[-1][4]
	ad_size += np.abs((x1-x2)*(y1-y2))
	ad_image = image_entire[int(y1):int(y2), int(x1):int(x2), :]
	left_margin = x1; right_margin = w-x2
	if left_margin >= right_margin:
	context_image = image_entire[:, :int(x1), :]
	else:
	context_image = image_entire[:, int(x2):, :]

	whole_size = 0
	whole_size += w*h

	return (brand_surf/whole_size100, pictorial_surf/whole_size100, text_surf/whole_size100, ad_size/whole_size100, ad_image, context_image)


	def attention(notes, whole_display_prompt,
	brand_num, pictorial_num, text_num,
	category, ad_location, gaze_type):
	text_detection_model_path = '../XGBoost_Prediction_Model/EAST-Text-Detection/frozen_east_text_detection.pb'
	LDA_model_pth = '../XGBoost_Prediction_Model/LDA_Model_trained/lda_model_best_tot.model'
	training_ad_text_dictionary_path = '../XGBoost_Prediction_Model/LDA_Model_trained/object_word_dictionary'
	training_lang_preposition_path = '../XGBoost_Prediction_Model/LDA_Model_trained/dutch_preposition'

	prod_group = np.zeros(38)
	prod_group[GENERAL_CATEGORY[category]] = 1

	if ad_location == 'left':
	ad_loc = 0
	elif ad_location == 'right':
	ad_loc = 1
	else:
	ad_loc = None

	brand_percent, visual_percent, text_percent, adv_size_percent, ad_image, context_image = calculate_areas(whole_display_prompt, brand_num, pictorial_num, text_num)
	surfaces = [brand_percent, visual_percent, text_percent, adv_size_percent*10/100]

	# caption_ad = XGBoost_utils.Caption_Generation(Image.fromarray(np.uint8(ad_image)))
	# caption_context = XGBoost_utils.Caption_Generation(Image.fromarray(np.uint8(context_image)))
	# ad_topic = XGBoost_utils.Topic_emb(caption_ad)
	# ctpg_topic = XGBoost_utils.Topic_emb(caption_context)
	np.random.seed(42)
	ad_topic = np.random.randn(1,768)
	ctpg_topic = np.random.randn(1,768)

	ad = cv.resize(ad_image, (640, 832))
	print('ad shape: ', ad.shape)
	context = cv.resize(context_image, (640, 832))

	adv_imgs = torch.permute(torch.tensor(ad), (2,0,1)).unsqueeze(0)
	ctpg_imgs = torch.permute(torch.tensor(context), (2,0,1)).unsqueeze(0)
	ad_locations = torch.tensor([1,0]).unsqueeze(0)
	heatmap = Predict.HeatMap_CNN(adv_imgs, ctpg_imgs, ad_locations, Gaze_Type='AG')

	Gaze = Predict.Ad_Gaze_Prediction(input_ad_path=ad, input_ctpg_path=context, ad_location=ad_loc,
	text_detection_model_path=text_detection_model_path, LDA_model_pth=LDA_model_pth,
	training_ad_text_dictionary_path=training_ad_text_dictionary_path, training_lang_preposition_path=training_lang_preposition_path, training_language='dutch',
	ad_embeddings=ad_topic, ctpg_embeddings=ctpg_topic,
	surface_sizes=surfaces, Product_Group=prod_group,
	obj_detection_model_pth=None, num_topic=20, Gaze_Time_Type=gaze_type)
	return np.round(Gaze,2), Image.fromarray(np.flip(heatmap, axis=2))

	def greet(name, intensity):
	return "Hello " * intensity + name + "!"

	demo = gr.Interface(
	fn=attention,
	inputs=[gr.Markdown("""
	Instruction:
	1. Click to upload or drag the entire image that contains BOTH ad and its context;
	2. Draw bounding boxes in the order of:
	(a) Brand element(s) (skip if N.A.)
	(b) Pictorial element(s) (skip if N.A.)
	(c) Text element(s) (skip if N.A.)
	(d) The advertisement.
	NOTE: Each ad element can have more than 1 boxes."""),
	ImagePrompter(label="Upload Entire (Ad+Context) Image, and Draw Bounding Boxes", sources=['upload'], type="pil"),
	gr.Number(label="Number of brand bounding boxes drawn"),
	gr.Number(label="Number of pictorial bounding boxes drawn"),
	gr.Number(label="Number of text bounding boxes drawn"),
	gr.Dropdown(CATEGORIES, label="Product Category"),
	gr.Dropdown(LOCATIONS, label='Ad Location'),
	gr.Dropdown(GAZE_TYPE, label='Gaze Type')
	],
	outputs=[gr.Number(label="Predicted Gaze (sec)"),
	gr.Image(label="Heatmap by ResNet50 (Hotter/Redder regions show more contribution)")],
	title="Ad Gaze Prediction",
	description="""This app accompanies: "Contextual Advertising with Theory-Informed Machine Learning", manuscript submitted to the Journal of Marketing.
	App Version: 1.0, Date: 10/24/2024.
	Warning: Due to computational efficiency, current version has not activated LLM generated ad topics. In future version, LLM topics will be activated in GPU environment.""",
	theme=gr.themes.Soft()
	)

	demo.launch(share=True)