Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -349,9 +349,12 @@ def extract_features_siglip(image):
|
|
349 |
@spaces.GPU
|
350 |
def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
351 |
print("task type",task_type)
|
352 |
-
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
355 |
input_image = Image.open(full_image_path).convert("RGB")
|
356 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
357 |
input_features = input_features.detach().cpu().numpy()
|
@@ -362,16 +365,17 @@ def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
|
362 |
sim = -distances[0][i]
|
363 |
image_url = df.iloc[v]["Link"]
|
364 |
img_retrieved = read_image_from_url(image_url)
|
365 |
-
|
366 |
if language=="English":
|
367 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
368 |
else:
|
369 |
msg="🖼️ 请到下方查看推荐结果。"
|
370 |
state+=[(None,msg)]
|
371 |
|
372 |
-
return
|
373 |
-
elif task_type==2:
|
374 |
-
|
|
|
375 |
input_image = Image.open(full_image_path).convert("RGB")
|
376 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
377 |
input_features = input_features.detach().cpu().numpy()
|
@@ -382,17 +386,18 @@ def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
|
382 |
sim = -distances[0][i]
|
383 |
image_url = df.iloc[v]["Link"]
|
384 |
img_retrieved = read_image_from_url(image_url)
|
385 |
-
|
386 |
if language=="English":
|
387 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
388 |
else:
|
389 |
msg="🖼️ 请到下方查看推荐结果。"
|
390 |
state+=[(None,msg)]
|
391 |
|
392 |
-
return
|
393 |
|
394 |
-
elif task_type==3:
|
395 |
-
|
|
|
396 |
input_image = Image.open(full_image_path).convert("RGB")
|
397 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
398 |
input_features = input_features.detach().cpu().numpy()
|
@@ -403,14 +408,15 @@ def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
|
403 |
sim = -distances[0][i]
|
404 |
image_url = df.iloc[v]["Link"]
|
405 |
img_retrieved = read_image_from_url(image_url)
|
406 |
-
|
407 |
if language=="English":
|
408 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
409 |
else:
|
410 |
msg="🖼️ 请到下方查看推荐结果。"
|
411 |
state+=[(None,msg)]
|
412 |
|
413 |
-
return
|
|
|
414 |
elif crop_image_path:
|
415 |
input_image = Image.open(crop_image_path).convert("RGB")
|
416 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
@@ -422,7 +428,7 @@ def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
|
422 |
sim = -distances[0][i]
|
423 |
image_url = df.iloc[v]["Link"]
|
424 |
img_retrieved = read_image_from_url(image_url)
|
425 |
-
|
426 |
|
427 |
input_image = Image.open(full_image_path).convert("RGB")
|
428 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
@@ -434,14 +440,14 @@ def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
|
434 |
sim = -distances[0][i]
|
435 |
image_url = df.iloc[v]["Link"]
|
436 |
img_retrieved = read_image_from_url(image_url)
|
437 |
-
|
438 |
if language=="English":
|
439 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
440 |
else:
|
441 |
msg="🖼️ 请到下方查看推荐结果。"
|
442 |
state+=[(None,msg)]
|
443 |
|
444 |
-
return
|
445 |
else:
|
446 |
input_image = Image.open(full_image_path).convert("RGB")
|
447 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
@@ -453,14 +459,15 @@ def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
|
453 |
sim = -distances[0][i]
|
454 |
image_url = df.iloc[v]["Link"]
|
455 |
img_retrieved = read_image_from_url(image_url)
|
456 |
-
|
457 |
if language=="English":
|
458 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
459 |
else:
|
460 |
msg="🖼️ 请到下方查看推荐结果。"
|
461 |
state+=[(None,msg)]
|
462 |
|
463 |
-
return
|
|
|
464 |
|
465 |
|
466 |
###############################################################################
|
@@ -614,7 +621,7 @@ css = """
|
|
614 |
|
615 |
|
616 |
.info_btn {
|
617 |
-
background:
|
618 |
border: none !important;
|
619 |
box-shadow: none !important;
|
620 |
font-size: 15px !important;
|
@@ -623,7 +630,7 @@ css = """
|
|
623 |
}
|
624 |
|
625 |
.info_btn_interact {
|
626 |
-
background: rgb(
|
627 |
box-shadow: none !important;
|
628 |
font-size: 15px !important;
|
629 |
min-width: 6rem !important;
|
@@ -631,16 +638,24 @@ css = """
|
|
631 |
}
|
632 |
|
633 |
.function_button {
|
|
|
634 |
border: none !important;
|
635 |
box-shadow: none !important;
|
636 |
}
|
637 |
|
638 |
.function_button_rec {
|
639 |
-
background: rgb(
|
640 |
border: none !important;
|
641 |
box-shadow: none !important;
|
642 |
}
|
643 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
644 |
#tool_box {max-width: 50px}
|
645 |
|
646 |
"""
|
@@ -703,26 +718,65 @@ Use a markdown outline format with appropriate emojis based on the image and Wik
|
|
703 |
]
|
704 |
|
705 |
recommendation_prompt=[
|
706 |
-
|
707 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
708 |
'''
|
709 |
-
|
710 |
-
|
711 |
-
Recommendation reason: {{ As the author of the first painting, I recommend based on the object I painted OR As the author of the first painting, I recommend based on the overall similarity in appearance}}
|
712 |
-
Detailed analysis: Based on the recommendation reason, explain why you recommend image 2 after viewing image 1. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I.
|
713 |
Each bullet point should be in {language} language, with a response length of about {length} words.
|
714 |
''',
|
715 |
'''
|
716 |
-
When generating answers, you should tell people that
|
717 |
-
|
718 |
-
|
719 |
-
|
|
|
|
|
720 |
Each bullet point should be in {language} language, with a response length of about {length} words.
|
|
|
|
|
721 |
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
722 |
|
723 |
|
724 |
|
725 |
-
|
726 |
|
727 |
gpt_state = 0
|
728 |
VOICE = "en-GB-SoniaNeural"
|
@@ -904,9 +958,14 @@ def update_click_state(click_state, caption, click_mode):
|
|
904 |
raise NotImplementedError
|
905 |
|
906 |
async def chat_input_callback(*args):
|
907 |
-
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay,gender,api_key,image_input,log_state,history = args
|
908 |
message = chat_input["text"]
|
909 |
-
|
|
|
|
|
|
|
|
|
|
|
910 |
prompt=prompt.format(question=message, language=language)
|
911 |
|
912 |
if visual_chatgpt is not None:
|
@@ -914,7 +973,8 @@ async def chat_input_callback(*args):
|
|
914 |
read_info = re.sub(r'[#[\]!*]','',result)
|
915 |
read_info = emoji.replace_emoji(read_info,replace="")
|
916 |
state = state + [(message,result)]
|
917 |
-
log_state += [(message,
|
|
|
918 |
# log_state += [("%% chat messahe %%",None)]
|
919 |
|
920 |
history.append({"role": "user", "content": message})
|
@@ -933,9 +993,8 @@ async def chat_input_callback(*args):
|
|
933 |
return state, state, None, audio,log_state,history
|
934 |
|
935 |
|
936 |
-
async def upload_callback(image_input,state, log_state, visual_chatgpt=None, openai_api_key=None,language="English",narritive=None,history=None,autoplay=
|
937 |
print("narritive", narritive)
|
938 |
-
print("image input",image_input)
|
939 |
if isinstance(image_input, dict): # if upload from sketcher_input, input contains image and mask
|
940 |
image_input = image_input['background']
|
941 |
|
@@ -944,7 +1003,7 @@ async def upload_callback(image_input,state, log_state, visual_chatgpt=None, ope
|
|
944 |
elif isinstance(image_input, bytes):
|
945 |
image_input = Image.open(io.BytesIO(image_input))
|
946 |
|
947 |
-
|
948 |
click_state = [[], [], []]
|
949 |
|
950 |
|
@@ -984,16 +1043,34 @@ async def upload_callback(image_input,state, log_state, visual_chatgpt=None, ope
|
|
984 |
visual_chatgpt.current_image = new_image_path
|
985 |
paragraph = get_gpt_response(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
|
986 |
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
987 |
-
|
988 |
-
|
989 |
-
|
990 |
-
|
991 |
-
|
992 |
-
|
993 |
-
|
994 |
-
|
995 |
-
|
996 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
997 |
if language=="English":
|
998 |
if naritive_mapping[narritive]==0 :
|
999 |
msg=f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
|
@@ -1033,7 +1110,7 @@ async def upload_callback(image_input,state, log_state, visual_chatgpt=None, ope
|
|
1033 |
|
1034 |
|
1035 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
1036 |
-
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist, gender,new_image_path,log_state,history,audio_output
|
1037 |
|
1038 |
|
1039 |
|
@@ -1103,7 +1180,7 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
1103 |
Image.open(out["crop_save_path"]).save(new_crop_save_path)
|
1104 |
print("new crop save",new_crop_save_path)
|
1105 |
|
1106 |
-
|
1107 |
|
1108 |
|
1109 |
query_focus_en = [
|
@@ -1134,7 +1211,11 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
|
|
1134 |
print("input_labels_state",input_labels_state)
|
1135 |
|
1136 |
prompt=generate_prompt(focus_type,paragraph,length,sentiment,factuality,language, naritive)
|
1137 |
-
|
|
|
|
|
|
|
|
|
1138 |
log_state = log_state + [("Selected image point: {}, Input label: {}".format(input_points_state, input_labels_state), None)]
|
1139 |
|
1140 |
|
@@ -1147,7 +1228,7 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
|
|
1147 |
# if not args.disable_gpt and text_refiner:
|
1148 |
if not args.disable_gpt:
|
1149 |
print("new crop save",new_crop_save_path)
|
1150 |
-
focus_info=get_gpt_response(openai_api_key,new_crop_save_path,prompt
|
1151 |
if focus_info.startswith('"') and focus_info.endswith('"'):
|
1152 |
focus_info=focus_info[1:-1]
|
1153 |
focus_info=focus_info.replace('#', '')
|
@@ -1204,7 +1285,7 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
|
|
1204 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,None,log_state,history
|
1205 |
|
1206 |
|
1207 |
-
naritive_mapping = {"
|
1208 |
|
1209 |
def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language,naritive):
|
1210 |
|
@@ -1658,51 +1739,81 @@ async def texttospeech(text, language,gender='female'):
|
|
1658 |
print(f"Error in texttospeech: {e}")
|
1659 |
return None
|
1660 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1661 |
# give the reason of recommendation
|
1662 |
-
async def
|
1663 |
-
persona=naritive_mapping[narritive]
|
1664 |
rec_path=evt._data['value']['image']['path']
|
1665 |
-
|
1666 |
-
|
1667 |
-
|
1668 |
-
|
1669 |
-
|
1670 |
-
|
1671 |
-
|
1672 |
-
result=get_gpt_response(openai_api_key, image_paths, prompt)
|
1673 |
-
print("recommend result",result)
|
1674 |
-
reason = [(None, f"{result}")]
|
1675 |
-
log_state = log_state + [(narritive, None)]
|
1676 |
-
log_state = log_state + [(f"image sort ranking {sort_score}", None)]
|
1677 |
-
log_state = log_state + [(None, f"{result}")]
|
1678 |
-
read_info = re.sub(r'[#[\]!*]','',result)
|
1679 |
-
read_info = emoji.replace_emoji(read_info,replace="")
|
1680 |
-
print("associate",read_info)
|
1681 |
-
audio_output=None
|
1682 |
-
if autoplay:
|
1683 |
-
audio_output = await texttospeech(read_info, language)
|
1684 |
-
return reason,audio_output,log_state,index,gr.update(value=[])
|
1685 |
|
1686 |
-
def change_naritive(session_type,image_input,
|
1687 |
if session_type=="Session 1":
|
1688 |
-
return None, [], [], [[], [], []], "", None, []
|
1689 |
else:
|
1690 |
if language=="English":
|
1691 |
-
if narritive=="
|
1692 |
state += [
|
1693 |
(
|
1694 |
None,
|
1695 |
f"🤖 Hi, I am EyeSee. Let's explore this painting together."
|
1696 |
)
|
1697 |
]
|
1698 |
-
elif narritive=="
|
1699 |
state += [
|
1700 |
(
|
1701 |
None,
|
1702 |
f"🧑🎨 Let's delve into it from the perspective of the artist."
|
1703 |
)
|
1704 |
]
|
1705 |
-
elif narritive=="
|
1706 |
state += [
|
1707 |
(
|
1708 |
None,
|
@@ -1710,21 +1821,21 @@ def change_naritive(session_type,image_input, chatbot, state, click_state, parag
|
|
1710 |
)
|
1711 |
]
|
1712 |
elif language=="Chinese":
|
1713 |
-
if narritive=="
|
1714 |
state += [
|
1715 |
(
|
1716 |
None,
|
1717 |
"🤖 让我们从第三方视角一起探索这幅画吧。"
|
1718 |
)
|
1719 |
]
|
1720 |
-
elif narritive == "
|
1721 |
state += [
|
1722 |
(
|
1723 |
None,
|
1724 |
"🧑🎨 让我们从艺术家的视角深入探索这幅画。"
|
1725 |
)
|
1726 |
]
|
1727 |
-
elif narritive == "
|
1728 |
state += [
|
1729 |
(
|
1730 |
None,
|
@@ -1732,7 +1843,8 @@ def change_naritive(session_type,image_input, chatbot, state, click_state, parag
|
|
1732 |
)
|
1733 |
]
|
1734 |
|
1735 |
-
|
|
|
1736 |
|
1737 |
|
1738 |
def print_like_dislike(x: gr.LikeData,state,log_state):
|
@@ -1748,7 +1860,7 @@ def print_like_dislike(x: gr.LikeData,state,log_state):
|
|
1748 |
return log_state,state
|
1749 |
|
1750 |
def get_recommendationscore(index,score,log_state):
|
1751 |
-
log_state+=[(f"
|
1752 |
log_state+=[("%% recommendation %%",None)]
|
1753 |
return log_state
|
1754 |
|
@@ -1776,10 +1888,9 @@ def create_ui():
|
|
1776 |
description = """<p>Gradio demo for EyeSee Anything in Art, image to dense captioning generation with various language styles. To use it, simply upload your image, or click one of the examples to load them. """
|
1777 |
|
1778 |
examples = [
|
1779 |
-
["test_images/1.The Ambassadors.jpg","test_images/task1.jpg"],
|
1780 |
-
["test_images/2.Football Players.jpg","test_images/task2.jpg"],
|
1781 |
-
["test_images/3.
|
1782 |
-
# ["test_images/test3.jpg"],
|
1783 |
# ["test_images/test4.jpg"],
|
1784 |
# ["test_images/test5.jpg"],
|
1785 |
# ["test_images/Picture5.png"],
|
@@ -1796,7 +1907,7 @@ def create_ui():
|
|
1796 |
log_state=gr.State([])
|
1797 |
# history log for gpt
|
1798 |
history_log=gr.State([])
|
1799 |
-
|
1800 |
out_state = gr.State(None)
|
1801 |
click_state = gr.State([[], [], []])
|
1802 |
origin_image = gr.State(None)
|
@@ -1823,21 +1934,34 @@ def create_ui():
|
|
1823 |
# store the whole image path
|
1824 |
image_path=gr.State('')
|
1825 |
pic_index=gr.State(None)
|
|
|
|
|
|
|
|
|
|
|
1826 |
|
1827 |
|
1828 |
-
|
1829 |
-
auto_play = gr.Checkbox(
|
1830 |
-
label="Check to autoplay audio", value=False, elem_classes="custom-autoplay"
|
1831 |
-
)
|
1832 |
-
output_audio = gr.HTML(
|
1833 |
-
label="Synthesised Audio", elem_classes="custom-output"
|
1834 |
-
)
|
1835 |
with gr.Row():
|
1836 |
-
|
1837 |
-
task_instuction=gr.Image(type="pil", interactive=True, elem_classes="task_instruct",height=650,label=None)
|
1838 |
with gr.Column(scale=6):
|
1839 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1840 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1841 |
with gr.Tab("Base(GPT Power)",visible=False) as base_tab:
|
1842 |
image_input_base = gr.Image(type="pil", interactive=True, elem_classes="image_upload",height=650)
|
1843 |
with gr.Row():
|
@@ -1854,49 +1978,32 @@ def create_ui():
|
|
1854 |
year_label_base2 = gr.Button(value="Year: ",elem_classes="info_btn_interact")
|
1855 |
material_label_base2 = gr.Button(value="Style: ",elem_classes="info_btn")
|
1856 |
|
1857 |
-
with gr.
|
1858 |
-
with gr.
|
1859 |
-
|
1860 |
-
|
1861 |
-
|
1862 |
with gr.Row():
|
1863 |
-
|
1864 |
-
|
1865 |
-
|
1866 |
-
|
1867 |
-
|
1868 |
-
|
1869 |
-
|
1870 |
-
|
1871 |
-
|
1872 |
-
|
1873 |
-
|
1874 |
-
|
1875 |
-
|
1876 |
-
|
1877 |
-
|
1878 |
-
|
1879 |
-
|
1880 |
-
focus_dda = gr.Button(value="Judge",interactive=True,elem_classes="function_button",variant="primary")
|
1881 |
-
|
1882 |
-
recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button_rec")
|
1883 |
# focus_asso = gr.Button(value="Associate",interactive=True,elem_classes="function_button",variant="primary")
|
|
|
1884 |
|
1885 |
-
with gr.Row(visible=False):
|
1886 |
-
with gr.Column():
|
1887 |
-
with gr.Row():
|
1888 |
-
# point_prompt = gr.Radio(
|
1889 |
-
# choices=["Positive", "Negative"],
|
1890 |
-
# value="Positive",
|
1891 |
-
# label="Point Prompt",
|
1892 |
-
# scale=5,
|
1893 |
-
# interactive=True)
|
1894 |
-
click_mode = gr.Radio(
|
1895 |
-
choices=["Continuous", "Single"],
|
1896 |
-
value="Continuous",
|
1897 |
-
label="Clicking Mode",
|
1898 |
-
scale=5,
|
1899 |
-
interactive=True)
|
1900 |
|
1901 |
|
1902 |
with gr.Tab("Trajectory (beta)", visible=False) as traj_tab:
|
@@ -1953,6 +2060,7 @@ def create_ui():
|
|
1953 |
with gr.Column(scale=4):
|
1954 |
with gr.Column(visible=True) as module_key_input:
|
1955 |
openai_api_key = gr.Textbox(
|
|
|
1956 |
placeholder="Input openAI API key",
|
1957 |
show_label=False,
|
1958 |
label="OpenAI API Key",
|
@@ -1969,27 +2077,28 @@ def create_ui():
|
|
1969 |
# with gr.Column(visible=False) as modules_need_gpt2:
|
1970 |
# paragraph_output = gr.Textbox(lines=16, label="Describe Everything", max_lines=16)
|
1971 |
# cap_everything_button = gr.Button(value="Caption Everything in a Paragraph", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1972 |
|
1973 |
|
1974 |
|
1975 |
-
|
1976 |
-
|
1977 |
-
naritive = gr.Radio(
|
1978 |
-
choices=["Third-person", "Single-Persona: Artist","Multi-Persona: Objects"],
|
1979 |
-
value="Third-person",
|
1980 |
-
label="Persona",
|
1981 |
-
scale=5,
|
1982 |
-
interactive=True)
|
1983 |
-
with gr.Blocks():
|
1984 |
-
chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=600,bubble_full_width=False)
|
1985 |
-
with gr.Column() as modules_need_gpt3:
|
1986 |
-
chat_input = gr.MultimodalTextbox(interactive=True, file_types=[".txt"], placeholder="Message EyeSee...", show_label=False)
|
1987 |
-
with gr.Row():
|
1988 |
-
clear_button_text = gr.Button(value="Clear Chat", interactive=True)
|
1989 |
-
export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
|
1990 |
-
# submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
|
1991 |
-
# upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
|
1992 |
-
# downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
|
1993 |
|
1994 |
|
1995 |
|
@@ -2012,27 +2121,59 @@ def create_ui():
|
|
2012 |
|
2013 |
with gr.Row():
|
2014 |
with gr.Column(scale=6):
|
2015 |
-
with gr.
|
2016 |
-
|
2017 |
-
|
2018 |
-
|
2019 |
-
|
2020 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2021 |
|
2022 |
-
|
2023 |
-
|
2024 |
-
|
2025 |
-
|
2026 |
-
|
2027 |
-
|
2028 |
-
|
2029 |
-
|
2030 |
-
|
2031 |
-
|
2032 |
-
|
2033 |
-
|
2034 |
-
|
2035 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2036 |
|
2037 |
|
2038 |
with gr.Column(scale=4,visible=False) as reco_reasons:
|
@@ -2040,12 +2181,13 @@ def create_ui():
|
|
2040 |
recommend_score = gr.Radio(
|
2041 |
choices=[1,2,3,4,5,6,7],
|
2042 |
label="Score",
|
2043 |
-
interactive=True)
|
2044 |
|
2045 |
with gr.Row():
|
|
|
2046 |
gr.Examples(
|
2047 |
examples=examples,
|
2048 |
-
inputs=[example_image,task_instuction],
|
2049 |
)
|
2050 |
|
2051 |
|
@@ -2170,6 +2312,7 @@ def create_ui():
|
|
2170 |
interactive=True,
|
2171 |
label="Generated Caption Length",
|
2172 |
)
|
|
|
2173 |
# auto_play = gr.Checkbox(
|
2174 |
# label="Check to autoplay audio", value=False, elem_classes="custom-autoplay"
|
2175 |
# )
|
@@ -2217,17 +2360,30 @@ def create_ui():
|
|
2217 |
recommend_btn.click(
|
2218 |
fn=infer,
|
2219 |
inputs=[new_crop_save_path,image_path,state,language,task_type],
|
2220 |
-
outputs=[gallery_result,chatbot,state]
|
2221 |
)
|
2222 |
|
2223 |
gallery_result.select(
|
2224 |
-
|
2225 |
-
inputs=[
|
2226 |
-
outputs=[recommend_bot,output_audio,log_state,pic_index,recommend_score],
|
2227 |
|
2228 |
|
2229 |
)
|
2230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2231 |
###############################################################################
|
2232 |
############# above part is for text to image #############
|
2233 |
###############################################################################
|
@@ -2444,11 +2600,18 @@ def create_ui():
|
|
2444 |
|
2445 |
# cap_everything_button.click(cap_everything, [paragraph, visual_chatgpt, language,auto_play],
|
2446 |
# [paragraph_output,output_audio])
|
2447 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2448 |
clear_button_click.click(
|
2449 |
-
|
2450 |
[origin_image],
|
2451 |
-
[click_state, image_input],
|
2452 |
queue=False,
|
2453 |
show_progress=False
|
2454 |
)
|
@@ -2503,11 +2666,11 @@ def create_ui():
|
|
2503 |
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2504 |
# paragraph,artist,gender,image_path])
|
2505 |
|
2506 |
-
image_input.upload(upload_callback, [image_input, state, log_state,visual_chatgpt,openai_api_key,language,naritive,history_log,auto_play,session_type
|
2507 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2508 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2509 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2510 |
-
paragraph,artist,gender,image_path,log_state,history_log,output_audio
|
2511 |
|
2512 |
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
|
2513 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
@@ -2515,26 +2678,23 @@ def create_ui():
|
|
2515 |
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2516 |
# paragraph,artist])
|
2517 |
|
2518 |
-
|
2519 |
-
|
2520 |
-
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph,artist])
|
2521 |
-
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
|
2522 |
-
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
2523 |
-
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph,artist])
|
2524 |
-
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play,gender,openai_api_key,image_path,log_state,history_log],
|
2525 |
[chatbot, state, aux_state,output_audio,log_state,history_log])
|
2526 |
# chat_input.submit(lambda: "", None, chat_input)
|
2527 |
chat_input.submit(lambda: {"text": ""}, None, chat_input)
|
2528 |
-
|
2529 |
-
|
2530 |
-
# submit_button_text.click(lambda: "", None, chat_input)
|
2531 |
-
example_image.change(upload_callback, [example_image, state, log_state, visual_chatgpt, openai_api_key,language,naritive,history_log,auto_play,session_type,task_type],
|
2532 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2533 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2534 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2535 |
-
paragraph,artist,gender,image_path, log_state,history_log,output_audio
|
2536 |
|
2537 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
|
|
|
|
|
|
|
|
2538 |
|
2539 |
# def on_click_tab_selected():
|
2540 |
# if gpt_state ==1:
|
@@ -2560,6 +2720,12 @@ def create_ui():
|
|
2560 |
# click_tab.select(on_click_tab_selected, outputs=[modules_need_gpt1,modules_not_need_gpt2,modules_need_gpt0,modules_need_gpt2])
|
2561 |
# base_tab.select(on_base_selected, outputs=[modules_need_gpt0,modules_need_gpt2,modules_not_need_gpt2,modules_need_gpt1])
|
2562 |
# base_tab2.select(on_base_selected, outputs=[modules_not_need_gpt2,modules_not_need_gpt2,modules_need_gpt0,modules_need_gpt1])
|
|
|
|
|
|
|
|
|
|
|
|
|
2563 |
|
2564 |
|
2565 |
|
@@ -2678,21 +2844,21 @@ def create_ui():
|
|
2678 |
|
2679 |
naritive.change(
|
2680 |
change_naritive,
|
2681 |
-
[session_type, image_input,
|
2682 |
-
|
|
|
2683 |
queue=False,
|
2684 |
show_progress=False
|
2685 |
|
2686 |
)
|
2687 |
-
def
|
2688 |
instruction=Image.open('test_images/task4.jpg')
|
2689 |
-
return [],instruction
|
2690 |
-
|
2691 |
|
2692 |
session_type.change(
|
2693 |
-
|
2694 |
[],
|
2695 |
-
[log_state,task_instuction]
|
2696 |
)
|
2697 |
|
2698 |
# upvote_btn.click(
|
|
|
349 |
@spaces.GPU
|
350 |
def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
351 |
print("task type",task_type)
|
352 |
+
style_gallery_output = []
|
353 |
+
item_gallery_output=[]
|
354 |
+
|
355 |
+
if task_type=="task 1":
|
356 |
+
item_gallery_output.append("recomendation_pic/1.8.jpg")
|
357 |
+
item_gallery_output.append("recomendation_pic/1.9.jpg")
|
358 |
input_image = Image.open(full_image_path).convert("RGB")
|
359 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
360 |
input_features = input_features.detach().cpu().numpy()
|
|
|
365 |
sim = -distances[0][i]
|
366 |
image_url = df.iloc[v]["Link"]
|
367 |
img_retrieved = read_image_from_url(image_url)
|
368 |
+
style_gallery_output.append(img_retrieved)
|
369 |
if language=="English":
|
370 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
371 |
else:
|
372 |
msg="🖼️ 请到下方查看推荐结果。"
|
373 |
state+=[(None,msg)]
|
374 |
|
375 |
+
return item_gallery_output, style_gallery_output,state,state
|
376 |
+
elif task_type=="task 2":
|
377 |
+
item_gallery_output.append("recomendation_pic/2.8.jpg")
|
378 |
+
item_gallery_output.append("recomendation_pic/2.9.png")
|
379 |
input_image = Image.open(full_image_path).convert("RGB")
|
380 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
381 |
input_features = input_features.detach().cpu().numpy()
|
|
|
386 |
sim = -distances[0][i]
|
387 |
image_url = df.iloc[v]["Link"]
|
388 |
img_retrieved = read_image_from_url(image_url)
|
389 |
+
style_gallery_output.append(img_retrieved)
|
390 |
if language=="English":
|
391 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
392 |
else:
|
393 |
msg="🖼️ 请到下方查看推荐结果。"
|
394 |
state+=[(None,msg)]
|
395 |
|
396 |
+
return item_gallery_output, style_gallery_output,state,state
|
397 |
|
398 |
+
elif task_type=="task 3":
|
399 |
+
item_gallery_output.append("recomendation_pic/3.8.png")
|
400 |
+
item_gallery_output.append("recomendation_pic/basket-2.png")
|
401 |
input_image = Image.open(full_image_path).convert("RGB")
|
402 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
403 |
input_features = input_features.detach().cpu().numpy()
|
|
|
408 |
sim = -distances[0][i]
|
409 |
image_url = df.iloc[v]["Link"]
|
410 |
img_retrieved = read_image_from_url(image_url)
|
411 |
+
style_gallery_output.append(img_retrieved)
|
412 |
if language=="English":
|
413 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
414 |
else:
|
415 |
msg="🖼️ 请到下方查看推荐结果。"
|
416 |
state+=[(None,msg)]
|
417 |
|
418 |
+
return item_gallery_output, style_gallery_output,state,state
|
419 |
+
|
420 |
elif crop_image_path:
|
421 |
input_image = Image.open(crop_image_path).convert("RGB")
|
422 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
|
|
428 |
sim = -distances[0][i]
|
429 |
image_url = df.iloc[v]["Link"]
|
430 |
img_retrieved = read_image_from_url(image_url)
|
431 |
+
item_gallery_output.append(img_retrieved)
|
432 |
|
433 |
input_image = Image.open(full_image_path).convert("RGB")
|
434 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
|
|
440 |
sim = -distances[0][i]
|
441 |
image_url = df.iloc[v]["Link"]
|
442 |
img_retrieved = read_image_from_url(image_url)
|
443 |
+
style_gallery_output.append(img_retrieved)
|
444 |
if language=="English":
|
445 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
446 |
else:
|
447 |
msg="🖼️ 请到下方查看推荐结果。"
|
448 |
state+=[(None,msg)]
|
449 |
|
450 |
+
return item_gallery_output, style_gallery_output,state,state
|
451 |
else:
|
452 |
input_image = Image.open(full_image_path).convert("RGB")
|
453 |
input_features = extract_features_siglip(input_image.convert("RGB"))
|
|
|
459 |
sim = -distances[0][i]
|
460 |
image_url = df.iloc[v]["Link"]
|
461 |
img_retrieved = read_image_from_url(image_url)
|
462 |
+
style_gallery_output.append(img_retrieved)
|
463 |
if language=="English":
|
464 |
msg="🖼️ Please refer to the section below to see the recommended results."
|
465 |
else:
|
466 |
msg="🖼️ 请到下方查看推荐结果。"
|
467 |
state+=[(None,msg)]
|
468 |
|
469 |
+
return item_gallery_output, style_gallery_output,state,state
|
470 |
+
|
471 |
|
472 |
|
473 |
###############################################################################
|
|
|
621 |
|
622 |
|
623 |
.info_btn {
|
624 |
+
background: rgb(245, 245, 245) !important;
|
625 |
border: none !important;
|
626 |
box-shadow: none !important;
|
627 |
font-size: 15px !important;
|
|
|
630 |
}
|
631 |
|
632 |
.info_btn_interact {
|
633 |
+
background: rgb(217, 217, 217) !important;
|
634 |
box-shadow: none !important;
|
635 |
font-size: 15px !important;
|
636 |
min-width: 6rem !important;
|
|
|
638 |
}
|
639 |
|
640 |
.function_button {
|
641 |
+
background: rgb(227, 226, 226) !important;
|
642 |
border: none !important;
|
643 |
box-shadow: none !important;
|
644 |
}
|
645 |
|
646 |
.function_button_rec {
|
647 |
+
background: rgb(189, 189, 189) !important;
|
648 |
border: none !important;
|
649 |
box-shadow: none !important;
|
650 |
}
|
651 |
|
652 |
+
.small_button {
|
653 |
+
font-size: 12px !important;
|
654 |
+
padding: 2px 8px !important;
|
655 |
+
min-width: 60px !important;
|
656 |
+
height: 30px !important;
|
657 |
+
}
|
658 |
+
|
659 |
#tool_box {max-width: 50px}
|
660 |
|
661 |
"""
|
|
|
718 |
]
|
719 |
|
720 |
recommendation_prompt=[
|
721 |
+
|
722 |
+
[
|
723 |
+
'''
|
724 |
+
First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
|
725 |
+
Recommendation reason: {{Recommendation based on {{object}} in the painting you saw earlier. Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate in three points. }}
|
726 |
+
Each bullet point should be in {language} language, with a response length of about {length} words.
|
727 |
+
''',
|
728 |
+
'''
|
729 |
+
When generating answers, you should tell people that I am the creator of painting you were looking at earlier itself, and generate text in the tone and manner in which you are the creator of painting were looking at earlier.
|
730 |
+
|
731 |
+
First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
|
732 |
+
|
733 |
+
Recommendation reason: {{I'm the creator of that painting you saw earlier. I'm an artist. and I'm recommending this painting based on the fact that the {{object}} I've drawn also appear in the painting you're looking at. }} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the creator of painting were looking at earlier and start every sentence with I.
|
734 |
+
|
735 |
+
Each bullet point should be in {language} language, with a response length of about {length} words.
|
736 |
+
|
737 |
+
''',
|
738 |
+
'''
|
739 |
+
When generating answers, you should tell people that you are the object itself that was selected in the painting, and generate text in the tone and manner in which you are the object
|
740 |
+
|
741 |
+
First identify what the object of the first painting is, you save yourself as the parameter: {{object}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
|
742 |
+
|
743 |
+
Recommendation reason: {{I'm the {{object}} in the painting you were looking at earlier, and I'm recommending this painting based on the fact that I'm also present in the one you're looking at.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the object of this painting and start every sentence with I.
|
744 |
+
|
745 |
+
Each bullet point should be in {language} language, with a response length of about {length} words.
|
746 |
+
|
747 |
+
'''],
|
748 |
+
|
749 |
+
[
|
750 |
'''
|
751 |
+
First identify what the name of the first painting is, you save yourself as the parameter: {{name}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
|
752 |
+
Recommendation reason: {{Recommendation based on the painting {{name}}.Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate in three points.}}
|
|
|
|
|
753 |
Each bullet point should be in {language} language, with a response length of about {length} words.
|
754 |
''',
|
755 |
'''
|
756 |
+
When generating answers, you should tell people that I am the creator of painting you were looking at earlier itself, and generate text in the tone and manner in which you are the creator of painting were looking at earlier.
|
757 |
+
|
758 |
+
First identify what the creator of the first painting is, you save yourself as the parameter: {artist}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
|
759 |
+
|
760 |
+
Recommendation reason: {{I'm the creator of that painting you saw earlier, {artist}. I'm an artist. and I'm recommending this painting based on the fact that the painting you're looking at is similar to the one you just saw of me.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the creator of painting were looking at earlier and start every sentence with I.
|
761 |
+
|
762 |
Each bullet point should be in {language} language, with a response length of about {length} words.
|
763 |
+
|
764 |
+
''',
|
765 |
'''
|
766 |
+
When generating answers, you should tell people that I am the painting you were looking at earlier itself, and generate text in the tone and manner in which you are the painting were looking at earlier.
|
767 |
+
|
768 |
+
First identify what the name of the first painting is, you save yourself as the parameter: {{name}}, do not need to tell me, the following will use the parameter. I want you to write the recommendation reason according to the following content, as a markdown outline with appropriate emojis that describe what you see according to the painting:
|
769 |
+
|
770 |
+
Recommendation reason: {{I'm the painting {{name}} you were looking at earlier, and I'm recommending this painting based on the fact that I'm similar to the one you're looking at.}} Detailed analysis: Based on the recommendation reason and the relationship between the two paintings, explain why you recommend another painting. Please generate the three points in the tone and manner as if you are the painting were looking at earlier and start every sentence with I.
|
771 |
+
|
772 |
+
Each bullet point should be in {language} language, with a response length of about {length} words.
|
773 |
+
|
774 |
+
'''],
|
775 |
+
|
776 |
|
777 |
|
778 |
|
779 |
+
]
|
780 |
|
781 |
gpt_state = 0
|
782 |
VOICE = "en-GB-SoniaNeural"
|
|
|
958 |
raise NotImplementedError
|
959 |
|
960 |
async def chat_input_callback(*args):
|
961 |
+
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay,gender,api_key,image_input,log_state,history,persona = args
|
962 |
message = chat_input["text"]
|
963 |
+
if persona == "Narrator":
|
964 |
+
prompt="Please help me answer the question with this painting {question} in {language}."
|
965 |
+
elif persona =="Artist":
|
966 |
+
prompt="When generating the answer, you should tell others that you are one of the creators of these paintings and generate the text in the tone and manner as if you are the creator of the painting. Please help me answer the question with this painting {question} in {language}."
|
967 |
+
else:
|
968 |
+
prompt="When generating answers, you should tell people that you are the object itself that was selected, and generate text in the tone and manner in which you are the object or the person. Please help me answer the question with this painting {question} in {language}."
|
969 |
prompt=prompt.format(question=message, language=language)
|
970 |
|
971 |
if visual_chatgpt is not None:
|
|
|
973 |
read_info = re.sub(r'[#[\]!*]','',result)
|
974 |
read_info = emoji.replace_emoji(read_info,replace="")
|
975 |
state = state + [(message,result)]
|
976 |
+
log_state += [(message,"/////")]
|
977 |
+
log_state += [("/////",result)]
|
978 |
# log_state += [("%% chat messahe %%",None)]
|
979 |
|
980 |
history.append({"role": "user", "content": message})
|
|
|
993 |
return state, state, None, audio,log_state,history
|
994 |
|
995 |
|
996 |
+
async def upload_callback(image_input,state, log_state, task_type, visual_chatgpt=None, openai_api_key=None,language="English",narritive=None,history=None,autoplay=True,session="Session 1"):
|
997 |
print("narritive", narritive)
|
|
|
998 |
if isinstance(image_input, dict): # if upload from sketcher_input, input contains image and mask
|
999 |
image_input = image_input['background']
|
1000 |
|
|
|
1003 |
elif isinstance(image_input, bytes):
|
1004 |
image_input = Image.open(io.BytesIO(image_input))
|
1005 |
|
1006 |
+
|
1007 |
click_state = [[], [], []]
|
1008 |
|
1009 |
|
|
|
1043 |
visual_chatgpt.current_image = new_image_path
|
1044 |
paragraph = get_gpt_response(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
|
1045 |
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
1046 |
+
if task_type=="task 3":
|
1047 |
+
name="Along the River During the Qingming Festival"
|
1048 |
+
artist="Zhang Zeduan"
|
1049 |
+
year="12th century (Song Dynasty)"
|
1050 |
+
material="Chinese painting"
|
1051 |
+
gender="male"
|
1052 |
+
|
1053 |
+
elif task_type=="task 1":
|
1054 |
+
name ="The Ambassadors"
|
1055 |
+
artist ="Hans Holbein the Younger"
|
1056 |
+
year = "1533 (Northern Renaissance)"
|
1057 |
+
material="Realism"
|
1058 |
+
gender = "male"
|
1059 |
+
|
1060 |
+
elif task_type=="task 2":
|
1061 |
+
name = "The Football Players"
|
1062 |
+
artist= "Albert Gleizes"
|
1063 |
+
year= "1912 (Cubism)"
|
1064 |
+
material="Cubism"
|
1065 |
+
gender= "male"
|
1066 |
+
|
1067 |
+
else:
|
1068 |
+
parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and painting style used for this painting. Return the information in dictionary format without any newline characters. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"style\": \"Painting style used in the painting\",\"gender\": \"The gender of the author\"}")
|
1069 |
+
print(parsed_data)
|
1070 |
+
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
1071 |
+
name, artist, year, material,gender= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["style"],parsed_data['gender']
|
1072 |
+
gender=gender.lower()
|
1073 |
+
|
1074 |
if language=="English":
|
1075 |
if naritive_mapping[narritive]==0 :
|
1076 |
msg=f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
|
|
|
1110 |
|
1111 |
|
1112 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
1113 |
+
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist, gender,new_image_path,log_state,history,audio_output]
|
1114 |
|
1115 |
|
1116 |
|
|
|
1180 |
Image.open(out["crop_save_path"]).save(new_crop_save_path)
|
1181 |
print("new crop save",new_crop_save_path)
|
1182 |
|
1183 |
+
return state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
|
1184 |
|
1185 |
|
1186 |
query_focus_en = [
|
|
|
1211 |
print("input_labels_state",input_labels_state)
|
1212 |
|
1213 |
prompt=generate_prompt(focus_type,paragraph,length,sentiment,factuality,language, naritive)
|
1214 |
+
print("log state",log_state[-1])
|
1215 |
+
if log_state[-1][0] is None or not log_state[-1][0].startswith("%%"):
|
1216 |
+
log_state = log_state + [("No like/dislike", None)]
|
1217 |
+
log_state = log_state + [("%% user interaction %%",None)]
|
1218 |
+
|
1219 |
log_state = log_state + [("Selected image point: {}, Input label: {}".format(input_points_state, input_labels_state), None)]
|
1220 |
|
1221 |
|
|
|
1228 |
# if not args.disable_gpt and text_refiner:
|
1229 |
if not args.disable_gpt:
|
1230 |
print("new crop save",new_crop_save_path)
|
1231 |
+
focus_info=get_gpt_response(openai_api_key,new_crop_save_path,prompt)
|
1232 |
if focus_info.startswith('"') and focus_info.endswith('"'):
|
1233 |
focus_info=focus_info[1:-1]
|
1234 |
focus_info=focus_info.replace('#', '')
|
|
|
1285 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,None,log_state,history
|
1286 |
|
1287 |
|
1288 |
+
naritive_mapping = {"Narrator": 0, "Artist": 1, "In-Situ": 2}
|
1289 |
|
1290 |
def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language,naritive):
|
1291 |
|
|
|
1739 |
print(f"Error in texttospeech: {e}")
|
1740 |
return None
|
1741 |
|
1742 |
+
async def get_recommendation(new_crop,image_path,openai_api_key,language,autoplay,length,log_state,sort_score,narritive,state,recommend_type,artist,recomended_path):
|
1743 |
+
|
1744 |
+
if recommend_type=="Item":
|
1745 |
+
persona=naritive_mapping[narritive]
|
1746 |
+
prompt=recommendation_prompt[0][persona].format(language=language,length=length)
|
1747 |
+
image_paths=[new_crop,recomended_path]
|
1748 |
+
result=get_gpt_response(openai_api_key, image_paths, prompt)
|
1749 |
+
print("recommend result",result)
|
1750 |
+
state += [(None, f"{result}")]
|
1751 |
+
log_state += [("User wants to know object recomendation reason", None)]
|
1752 |
+
log_state = log_state + [(narritive, None)]
|
1753 |
+
log_state = log_state + [(f"image sort ranking {sort_score}", None)]
|
1754 |
+
log_state = log_state + [(None, f"{result}")]
|
1755 |
+
read_info = re.sub(r'[#[\]!*]','',result)
|
1756 |
+
read_info = emoji.replace_emoji(read_info,replace="")
|
1757 |
+
print("associate",read_info)
|
1758 |
+
audio_output=None
|
1759 |
+
if autoplay:
|
1760 |
+
audio_output = await texttospeech(read_info, language)
|
1761 |
+
return state,state,audio_output,log_state,index,gr.update(value=[])
|
1762 |
+
else:
|
1763 |
+
persona=naritive_mapping[narritive]
|
1764 |
+
|
1765 |
+
if persona==1:
|
1766 |
+
prompt=recommendation_prompt[1][persona].format(language=language,length=length,artist=artist[8:])
|
1767 |
+
else:
|
1768 |
+
prompt=recommendation_prompt[1][persona].format(language=language,length=length)
|
1769 |
+
image_paths=[image_path,recomended_path]
|
1770 |
+
result=get_gpt_response(openai_api_key, image_paths, prompt )
|
1771 |
+
print("recommend result",result)
|
1772 |
+
state += [(None, f"{result}")]
|
1773 |
+
log_state += [("User wants to know style recomendation reason", None)]
|
1774 |
+
log_state = log_state + [(narritive, None)]
|
1775 |
+
log_state = log_state + [(f"image sort ranking {sort_score}", None)]
|
1776 |
+
log_state = log_state + [(None, f"{result}")]
|
1777 |
+
read_info = re.sub(r'[#[\]!*]','',result)
|
1778 |
+
read_info = emoji.replace_emoji(read_info,replace="")
|
1779 |
+
print("associate",read_info)
|
1780 |
+
audio_output=None
|
1781 |
+
if autoplay:
|
1782 |
+
audio_output = await texttospeech(read_info, language)
|
1783 |
+
return state,state,audio_output,log_state,index,gr.update(value=[])
|
1784 |
+
|
1785 |
+
|
1786 |
# give the reason of recommendation
|
1787 |
+
async def item_associate(new_crop,openai_api_key,language,autoplay,length,log_state,sort_score,narritive,state,evt: gr.SelectData):
|
|
|
1788 |
rec_path=evt._data['value']['image']['path']
|
1789 |
+
return state,state,None,log_state,None,gr.update(value=[]),rec_path,rec_path,"Item"
|
1790 |
+
|
1791 |
+
|
1792 |
+
async def style_associate(image_path,openai_api_key,language,autoplay,length,log_state,sort_score,narritive,state,artist,evt: gr.SelectData):
|
1793 |
+
rec_path=evt._data['value']['image']['path']
|
1794 |
+
return state,state,None,log_state,None,gr.update(value=[]),rec_path, rec_path,"Style"
|
1795 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1796 |
|
1797 |
+
def change_naritive(session_type,image_input, state, click_state, paragraph, origin_image,narritive,task_instruct,gallery_output,style_gallery_result,reco_reasons,language="English"):
|
1798 |
if session_type=="Session 1":
|
1799 |
+
return None, [], [], [[], [], []], "", None, None, [], [],[],[],gr.update(value="Preview")
|
1800 |
else:
|
1801 |
if language=="English":
|
1802 |
+
if narritive=="Narrator" :
|
1803 |
state += [
|
1804 |
(
|
1805 |
None,
|
1806 |
f"🤖 Hi, I am EyeSee. Let's explore this painting together."
|
1807 |
)
|
1808 |
]
|
1809 |
+
elif narritive=="Artist":
|
1810 |
state += [
|
1811 |
(
|
1812 |
None,
|
1813 |
f"🧑🎨 Let's delve into it from the perspective of the artist."
|
1814 |
)
|
1815 |
]
|
1816 |
+
elif narritive=="In-Situ":
|
1817 |
state += [
|
1818 |
(
|
1819 |
None,
|
|
|
1821 |
)
|
1822 |
]
|
1823 |
elif language=="Chinese":
|
1824 |
+
if narritive=="Narrator" :
|
1825 |
state += [
|
1826 |
(
|
1827 |
None,
|
1828 |
"🤖 让我们从第三方视角一起探索这幅画吧。"
|
1829 |
)
|
1830 |
]
|
1831 |
+
elif narritive == "Artist":
|
1832 |
state += [
|
1833 |
(
|
1834 |
None,
|
1835 |
"🧑🎨 让我们从艺术家的视角深入探索这幅画。"
|
1836 |
)
|
1837 |
]
|
1838 |
+
elif narritive == "In-Situ":
|
1839 |
state += [
|
1840 |
(
|
1841 |
None,
|
|
|
1843 |
)
|
1844 |
]
|
1845 |
|
1846 |
+
|
1847 |
+
return image_input, state, state, click_state, paragraph, origin_image,task_instruct,gallery_output,style_gallery_result,reco_reasons,reco_reasons,gr.update(value="Preview")
|
1848 |
|
1849 |
|
1850 |
def print_like_dislike(x: gr.LikeData,state,log_state):
|
|
|
1860 |
return log_state,state
|
1861 |
|
1862 |
def get_recommendationscore(index,score,log_state):
|
1863 |
+
log_state+=[(f"{index} : {score}",None)]
|
1864 |
log_state+=[("%% recommendation %%",None)]
|
1865 |
return log_state
|
1866 |
|
|
|
1888 |
description = """<p>Gradio demo for EyeSee Anything in Art, image to dense captioning generation with various language styles. To use it, simply upload your image, or click one of the examples to load them. """
|
1889 |
|
1890 |
examples = [
|
1891 |
+
["test_images/1.The Ambassadors.jpg","test_images/task1.jpg","task 1"],
|
1892 |
+
["test_images/2.Football Players.jpg","test_images/task2.jpg","task 2"],
|
1893 |
+
["test_images/3-square.jpg","test_images/task3.jpg","task 3"],
|
|
|
1894 |
# ["test_images/test4.jpg"],
|
1895 |
# ["test_images/test5.jpg"],
|
1896 |
# ["test_images/Picture5.png"],
|
|
|
1907 |
log_state=gr.State([])
|
1908 |
# history log for gpt
|
1909 |
history_log=gr.State([])
|
1910 |
+
|
1911 |
out_state = gr.State(None)
|
1912 |
click_state = gr.State([[], [], []])
|
1913 |
origin_image = gr.State(None)
|
|
|
1934 |
# store the whole image path
|
1935 |
image_path=gr.State('')
|
1936 |
pic_index=gr.State(None)
|
1937 |
+
recomended_state=gr.State([])
|
1938 |
+
|
1939 |
+
recomended_path=gr.State(None)
|
1940 |
+
recomended_type=gr.State(None)
|
1941 |
+
|
1942 |
|
1943 |
|
1944 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
1945 |
with gr.Row():
|
1946 |
+
|
|
|
1947 |
with gr.Column(scale=6):
|
1948 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1949 |
+
with gr.Row():
|
1950 |
+
naritive = gr.Radio(
|
1951 |
+
choices=["Narrator", "Artist","In-Situ"],
|
1952 |
+
value="Narrator",
|
1953 |
+
label="Select Mode",
|
1954 |
+
scale=5,
|
1955 |
+
interactive=True)
|
1956 |
+
|
1957 |
+
add_button = gr.Button(value="Extend Area", interactive=True,elem_classes="tools_button_add",icon=add_icon_path)
|
1958 |
+
minus_button = gr.Button(value="Remove Area", interactive=True,elem_classes="tools_button",icon=minus_icon_path)
|
1959 |
+
clear_button_click = gr.Button(value="Reset", interactive=True,elem_classes="tools_button",icon="assets/icons/recycle.png")
|
1960 |
+
|
1961 |
+
auto_play = gr.Checkbox(
|
1962 |
+
label="Check to autoplay audio", value=True, elem_classes="custom-autoplay",visible=False)
|
1963 |
+
output_audio = gr.HTML(
|
1964 |
+
label="Synthesised Audio", elem_classes="custom-output", visible=False)
|
1965 |
with gr.Tab("Base(GPT Power)",visible=False) as base_tab:
|
1966 |
image_input_base = gr.Image(type="pil", interactive=True, elem_classes="image_upload",height=650)
|
1967 |
with gr.Row():
|
|
|
1978 |
year_label_base2 = gr.Button(value="Year: ",elem_classes="info_btn_interact")
|
1979 |
material_label_base2 = gr.Button(value="Style: ",elem_classes="info_btn")
|
1980 |
|
1981 |
+
with gr.Row():
|
1982 |
+
with gr.Column(scale=1,min_width=50,visible=False) as instruct:
|
1983 |
+
task_instuction=gr.Image(type="pil", interactive=False, elem_classes="task_instruct",height=650,label="Instruction")
|
1984 |
+
with gr.Column(scale=6):
|
1985 |
+
with gr.Tab("Click") as click_tab:
|
1986 |
with gr.Row():
|
1987 |
+
with gr.Column(scale=10,min_width=600):
|
1988 |
+
image_input = gr.Image(type="pil", interactive=True, elem_classes="image_upload",height=650)
|
1989 |
+
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1990 |
+
# example_image_click = gr.Image(type="pil", interactive=False, visible=False)
|
1991 |
+
# the tool column
|
1992 |
+
with gr.Column(scale=1,elem_id="tool_box",min_width=80):
|
1993 |
+
name_label = gr.Button(value="Name: ",elem_classes="info_btn")
|
1994 |
+
artist_label = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
|
1995 |
+
year_label = gr.Button(value="Year: ",elem_classes="info_btn_interact")
|
1996 |
+
material_label = gr.Button(value="Style: ",elem_classes="info_btn")
|
1997 |
+
|
1998 |
+
focus_d = gr.Button(value="Describe",interactive=True,elem_classes="function_button")
|
1999 |
+
focus_da = gr.Button(value="D+Analysis",interactive=True,elem_classes="function_button")
|
2000 |
+
focus_dai = gr.Button(value="DA+Interprete",interactive=True,elem_classes="function_button")
|
2001 |
+
focus_dda = gr.Button(value="Judge",interactive=True,elem_classes="function_button")
|
2002 |
+
|
2003 |
+
recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button_rec")
|
|
|
|
|
|
|
2004 |
# focus_asso = gr.Button(value="Associate",interactive=True,elem_classes="function_button",variant="primary")
|
2005 |
+
|
2006 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2007 |
|
2008 |
|
2009 |
with gr.Tab("Trajectory (beta)", visible=False) as traj_tab:
|
|
|
2060 |
with gr.Column(scale=4):
|
2061 |
with gr.Column(visible=True) as module_key_input:
|
2062 |
openai_api_key = gr.Textbox(
|
2063 |
+
value="sk-proj-bxHhgjZV8TVgd1IupZrUT3BlbkFJvrthq6zIxpZVk3vwsvJ9",
|
2064 |
placeholder="Input openAI API key",
|
2065 |
show_label=False,
|
2066 |
label="OpenAI API Key",
|
|
|
2077 |
# with gr.Column(visible=False) as modules_need_gpt2:
|
2078 |
# paragraph_output = gr.Textbox(lines=16, label="Describe Everything", max_lines=16)
|
2079 |
# cap_everything_button = gr.Button(value="Caption Everything in a Paragraph", interactive=True)
|
2080 |
+
with gr.Column(visible=False) as modules_not_need_gpt2:
|
2081 |
+
with gr.Blocks():
|
2082 |
+
chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=750,bubble_full_width=False)
|
2083 |
+
with gr.Column() as modules_need_gpt3:
|
2084 |
+
chat_input = gr.MultimodalTextbox(interactive=True, file_types=[".txt"], placeholder="Message EyeSee...", show_label=False)
|
2085 |
+
with gr.Row():
|
2086 |
+
clear_button_text = gr.Button(value="Clear Chat", interactive=True)
|
2087 |
+
export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
|
2088 |
+
with gr.Row(visible=False):
|
2089 |
+
with gr.Column():
|
2090 |
+
with gr.Row():
|
2091 |
+
click_mode = gr.Radio(
|
2092 |
+
choices=["Continuous", "Single"],
|
2093 |
+
value="Continuous",
|
2094 |
+
label="Clicking Mode",
|
2095 |
+
scale=5,
|
2096 |
+
interactive=True)
|
2097 |
|
2098 |
|
2099 |
|
2100 |
+
|
2101 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2102 |
|
2103 |
|
2104 |
|
|
|
2121 |
|
2122 |
with gr.Row():
|
2123 |
with gr.Column(scale=6):
|
2124 |
+
with gr.Row():
|
2125 |
+
with gr.Column(visible=False) as recommend:
|
2126 |
+
|
2127 |
+
# sort_rec=gr.Dropdown(["1", "2", "3", "4"], visible=False,
|
2128 |
+
# value=[],
|
2129 |
+
# multiselect=True,
|
2130 |
+
# label="Score", info="Please sort the pictures according to your preference"
|
2131 |
+
# )
|
2132 |
+
|
2133 |
+
gallery_result = gr.Gallery(
|
2134 |
+
label="Object-based Recommendation",
|
2135 |
+
height="auto",
|
2136 |
+
columns=2,
|
2137 |
+
interactive=False
|
2138 |
+
# columns=4,
|
2139 |
+
# rows=2,
|
2140 |
+
# show_label=False,
|
2141 |
+
# allow_preview=True,
|
2142 |
+
# object_fit="contain",
|
2143 |
+
# height="auto",
|
2144 |
+
# preview=True,
|
2145 |
+
# show_share_button=True,
|
2146 |
+
# show_download_button=True
|
2147 |
+
)
|
2148 |
|
2149 |
+
style_gallery_result = gr.Gallery(
|
2150 |
+
label="Style-based Recommendation",
|
2151 |
+
height="auto",
|
2152 |
+
columns=2,
|
2153 |
+
interactive=False
|
2154 |
+
# columns=4,
|
2155 |
+
# rows=2,
|
2156 |
+
# show_label=False,
|
2157 |
+
# allow_preview=True,
|
2158 |
+
# object_fit="contain",
|
2159 |
+
# height="auto",
|
2160 |
+
# preview=True,
|
2161 |
+
# show_share_button=True,
|
2162 |
+
# show_download_button=True
|
2163 |
+
)
|
2164 |
+
with gr.Column(scale=3):
|
2165 |
+
selected_image = gr.Image(label="Selected Image", interactive=False)
|
2166 |
+
|
2167 |
+
sort_rec = gr.Radio(
|
2168 |
+
choices=[1,2,3,4,5,6,7],
|
2169 |
+
label="Score",
|
2170 |
+
interactive=True,info="Please sort the recommendation artwork")
|
2171 |
+
|
2172 |
+
recommend_type = gr.Radio(
|
2173 |
+
choices=["Preview","Reasons"],
|
2174 |
+
label="Information Type",
|
2175 |
+
value="Preview",
|
2176 |
+
interactive=True,visible=False)
|
2177 |
|
2178 |
|
2179 |
with gr.Column(scale=4,visible=False) as reco_reasons:
|
|
|
2181 |
recommend_score = gr.Radio(
|
2182 |
choices=[1,2,3,4,5,6,7],
|
2183 |
label="Score",
|
2184 |
+
interactive=True,info='Please score the recommendation reasons')
|
2185 |
|
2186 |
with gr.Row():
|
2187 |
+
task_type = gr.Textbox(visible=False)
|
2188 |
gr.Examples(
|
2189 |
examples=examples,
|
2190 |
+
inputs=[example_image,task_instuction,task_type],
|
2191 |
)
|
2192 |
|
2193 |
|
|
|
2312 |
interactive=True,
|
2313 |
label="Generated Caption Length",
|
2314 |
)
|
2315 |
+
|
2316 |
# auto_play = gr.Checkbox(
|
2317 |
# label="Check to autoplay audio", value=False, elem_classes="custom-autoplay"
|
2318 |
# )
|
|
|
2360 |
recommend_btn.click(
|
2361 |
fn=infer,
|
2362 |
inputs=[new_crop_save_path,image_path,state,language,task_type],
|
2363 |
+
outputs=[gallery_result,style_gallery_result,chatbot,state]
|
2364 |
)
|
2365 |
|
2366 |
gallery_result.select(
|
2367 |
+
item_associate,
|
2368 |
+
inputs=[new_crop_save_path,openai_api_key,language,auto_play,length,log_state,sort_rec,naritive,recomended_state],
|
2369 |
+
outputs=[recommend_bot,recomended_state,output_audio,log_state,pic_index,recommend_score,selected_image,recomended_path, recomended_type],
|
2370 |
|
2371 |
|
2372 |
)
|
2373 |
|
2374 |
+
style_gallery_result.select(
|
2375 |
+
style_associate,
|
2376 |
+
inputs=[image_path,openai_api_key,language,auto_play,length,log_state,sort_rec,naritive,recomended_state,artist_label],
|
2377 |
+
outputs=[recommend_bot,recomended_state,output_audio,log_state,pic_index,recommend_score,selected_image,recomended_path,recomended_type],
|
2378 |
+
|
2379 |
+
|
2380 |
+
)
|
2381 |
+
|
2382 |
+
selected_image.select(
|
2383 |
+
get_recommendation,
|
2384 |
+
inputs=[new_crop_save_path,image_path, openai_api_key,language,auto_play,length,log_state,sort_rec,naritive,recomended_state,recomended_type,artist_label,recomended_path],
|
2385 |
+
outputs=[recommend_bot,recomended_state,output_audio,log_state,pic_index,recommend_score])
|
2386 |
+
|
2387 |
###############################################################################
|
2388 |
############# above part is for text to image #############
|
2389 |
###############################################################################
|
|
|
2600 |
|
2601 |
# cap_everything_button.click(cap_everything, [paragraph, visual_chatgpt, language,auto_play],
|
2602 |
# [paragraph_output,output_audio])
|
2603 |
+
def reset_and_add(origin_image):
|
2604 |
+
new_prompt = "Positive"
|
2605 |
+
new_add_icon = "assets/icons/plus-square-blue.png"
|
2606 |
+
new_add_css = "tools_button_clicked"
|
2607 |
+
new_minus_icon = "assets/icons/minus-square.png"
|
2608 |
+
new_minus_css= "tools_button"
|
2609 |
+
return [[],[],[]],origin_image, new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
|
2610 |
+
|
2611 |
clear_button_click.click(
|
2612 |
+
reset_and_add,
|
2613 |
[origin_image],
|
2614 |
+
[click_state, image_input,point_prompt,add_button,minus_button],
|
2615 |
queue=False,
|
2616 |
show_progress=False
|
2617 |
)
|
|
|
2666 |
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2667 |
# paragraph,artist,gender,image_path])
|
2668 |
|
2669 |
+
image_input.upload(upload_callback, [image_input, state, log_state,task_type, visual_chatgpt,openai_api_key,language,naritive,history_log,auto_play,session_type],
|
2670 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2671 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2672 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2673 |
+
paragraph,artist,gender,image_path,log_state,history_log,output_audio])
|
2674 |
|
2675 |
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
|
2676 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
|
|
2678 |
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2679 |
# paragraph,artist])
|
2680 |
|
2681 |
+
|
2682 |
+
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play,gender,openai_api_key,image_path,log_state,history_log,naritive],
|
|
|
|
|
|
|
|
|
|
|
2683 |
[chatbot, state, aux_state,output_audio,log_state,history_log])
|
2684 |
# chat_input.submit(lambda: "", None, chat_input)
|
2685 |
chat_input.submit(lambda: {"text": ""}, None, chat_input)
|
2686 |
+
|
2687 |
+
example_image.change(upload_callback, [example_image, state, log_state, task_type, visual_chatgpt, openai_api_key,language,naritive,history_log,auto_play,session_type],
|
|
|
|
|
2688 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2689 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2690 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2691 |
+
paragraph,artist,gender,image_path, log_state,history_log,output_audio])
|
2692 |
|
2693 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
2694 |
+
example_image.change(
|
2695 |
+
lambda:([],[],[],None,[],gr.update(value="Preview")),
|
2696 |
+
[],
|
2697 |
+
[gallery_result,style_gallery_result,recommend_bot,new_crop_save_path,chatbot,recommend_type])
|
2698 |
|
2699 |
# def on_click_tab_selected():
|
2700 |
# if gpt_state ==1:
|
|
|
2720 |
# click_tab.select(on_click_tab_selected, outputs=[modules_need_gpt1,modules_not_need_gpt2,modules_need_gpt0,modules_need_gpt2])
|
2721 |
# base_tab.select(on_base_selected, outputs=[modules_need_gpt0,modules_need_gpt2,modules_not_need_gpt2,modules_need_gpt1])
|
2722 |
# base_tab2.select(on_base_selected, outputs=[modules_not_need_gpt2,modules_not_need_gpt2,modules_need_gpt0,modules_need_gpt1])
|
2723 |
+
|
2724 |
+
def print_reason():
|
2725 |
+
print("reason")
|
2726 |
+
|
2727 |
+
|
2728 |
+
|
2729 |
|
2730 |
|
2731 |
|
|
|
2844 |
|
2845 |
naritive.change(
|
2846 |
change_naritive,
|
2847 |
+
[session_type, image_input, state, click_state, paragraph, origin_image,naritive,
|
2848 |
+
task_instuction,gallery_result,style_gallery_result,recomended_state,language],
|
2849 |
+
[image_input, chatbot, state, click_state, paragraph, origin_image,task_instuction,gallery_result,style_gallery_result,recomended_state,recommend_bot,recommend_type],
|
2850 |
queue=False,
|
2851 |
show_progress=False
|
2852 |
|
2853 |
)
|
2854 |
+
def change_session():
|
2855 |
instruction=Image.open('test_images/task4.jpg')
|
2856 |
+
return None, [], [], [[], [], []], "", None, [],[],instruction,"task 4",[],[],[]
|
|
|
2857 |
|
2858 |
session_type.change(
|
2859 |
+
change_session,
|
2860 |
[],
|
2861 |
+
[image_input, chatbot, state, click_state, paragraph, origin_image,history_log,log_state,task_instuction,task_type,gallery_result,style_gallery_result,recommend_bot]
|
2862 |
)
|
2863 |
|
2864 |
# upvote_btn.click(
|