Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -476,15 +476,55 @@ print("4")
|
|
476 |
css = """
|
477 |
#warning {background-color: #FFCCCB}
|
478 |
.tools_button {
|
|
|
|
|
|
|
|
|
479 |
background: white;
|
480 |
border: none !important;
|
481 |
box-shadow: none !important;
|
|
|
|
|
482 |
}
|
483 |
|
484 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
background: white;
|
486 |
border: none !important;
|
487 |
box-shadow: none !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
}
|
489 |
|
490 |
.function_button {
|
@@ -496,20 +536,20 @@ css = """
|
|
496 |
|
497 |
"""
|
498 |
filtered_language_dict = {
|
499 |
-
'English': 'en-US-JennyNeural',
|
500 |
-
'Chinese': 'zh-CN-XiaoxiaoNeural',
|
501 |
-
'French': 'fr-FR-DeniseNeural',
|
502 |
-
'Spanish': 'es-MX-DaliaNeural',
|
503 |
-
'Arabic': 'ar-SA-ZariyahNeural',
|
504 |
-
'Portuguese': 'pt-BR-FranciscaNeural',
|
505 |
-
'Cantonese': 'zh-HK-HiuGaaiNeural'
|
506 |
}
|
507 |
|
508 |
focus_map = {
|
509 |
"D":0,
|
510 |
"DA":1,
|
511 |
"DAI":2,
|
512 |
-
"
|
513 |
}
|
514 |
|
515 |
'''
|
@@ -528,11 +568,27 @@ prompt_list = [
|
|
528 |
]
|
529 |
'''
|
530 |
prompt_list = [
|
531 |
-
|
532 |
-
|
533 |
-
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact
|
534 |
-
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
535 |
]
|
|
|
536 |
|
537 |
|
538 |
gpt_state = 0
|
@@ -665,11 +721,11 @@ def init_openai_api_key(api_key=""):
|
|
665 |
global gpt_state
|
666 |
gpt_state=1
|
667 |
# return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
|
668 |
-
return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]
|
669 |
else:
|
670 |
gpt_state=0
|
671 |
# return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
|
672 |
-
return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]
|
673 |
|
674 |
def init_wo_openai_api_key():
|
675 |
global gpt_state
|
@@ -714,7 +770,7 @@ def update_click_state(click_state, caption, click_mode):
|
|
714 |
raise NotImplementedError
|
715 |
|
716 |
async def chat_input_callback(*args):
|
717 |
-
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay = args
|
718 |
message = chat_input["text"]
|
719 |
if visual_chatgpt is not None:
|
720 |
state, _, aux_state, _ = visual_chatgpt.run_text(message, state, aux_state)
|
@@ -724,12 +780,12 @@ async def chat_input_callback(*args):
|
|
724 |
return state, state, aux_state, None
|
725 |
|
726 |
else:
|
727 |
-
audio = await texttospeech(last_response,language,autoplay)
|
728 |
return state, state, aux_state, audio
|
729 |
else:
|
730 |
response = "Text refiner is not initilzed, please input openai api key."
|
731 |
state = state + [(chat_input, response)]
|
732 |
-
audio = await texttospeech(response,language,autoplay)
|
733 |
return state, state, None, audio
|
734 |
|
735 |
|
@@ -774,37 +830,63 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
774 |
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
775 |
print("memory",visual_chatgpt.agent.memory)
|
776 |
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
777 |
-
parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and painting style used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"style\": \"Painting style used in the painting\" }")
|
778 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
779 |
-
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["style"]
|
780 |
-
|
|
|
|
|
781 |
|
782 |
-
if
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
|
801 |
-
|
802 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
803 |
|
804 |
|
805 |
|
806 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
807 |
-
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist]
|
808 |
|
809 |
|
810 |
|
@@ -842,14 +924,23 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
842 |
|
843 |
enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
|
844 |
out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
|
845 |
-
state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
|
|
|
|
|
|
|
|
|
|
|
846 |
|
847 |
-
|
848 |
-
|
|
|
|
|
|
|
|
|
|
|
849 |
text = out['generated_captions']['raw_caption']
|
850 |
input_mask = np.array(out['mask'].convert('P'))
|
851 |
image_input_nobackground = mask_painter(np.array(image_input), input_mask,background_alpha=0)
|
852 |
-
image_input_withbackground=mask_painter(np.array(image_input), input_mask)
|
853 |
|
854 |
click_index_state = click_index
|
855 |
input_mask_state = input_mask
|
@@ -878,9 +969,9 @@ query_focus = {
|
|
878 |
}
|
879 |
|
880 |
|
881 |
-
async def submit_caption(state,length, sentiment, factuality, language,
|
882 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
883 |
-
autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
884 |
|
885 |
|
886 |
state = state + [(query_focus[focus_type], None)]
|
@@ -896,7 +987,7 @@ async def submit_caption(state,length, sentiment, factuality, language,
|
|
896 |
print("input_points_state",input_points_state)
|
897 |
print("input_labels_state",input_labels_state)
|
898 |
|
899 |
-
prompt=generate_prompt(focus_type,paragraph,length,sentiment,factuality,language)
|
900 |
|
901 |
print("Prompt:", prompt)
|
902 |
print("click",click_index)
|
@@ -918,6 +1009,16 @@ async def submit_caption(state,length, sentiment, factuality, language,
|
|
918 |
read_info = re.sub(r'[#[\]!*]','',focus_info)
|
919 |
read_info = emoji.replace_emoji(read_info,replace="")
|
920 |
print("read info",read_info)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
921 |
|
922 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
923 |
# input_points=input_points, input_labels=input_labels)
|
@@ -925,25 +1026,26 @@ async def submit_caption(state,length, sentiment, factuality, language,
|
|
925 |
if autoplay==False:
|
926 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
|
927 |
|
928 |
-
audio_output = await texttospeech(read_info, language, autoplay)
|
929 |
print("done")
|
930 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
931 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
932 |
|
933 |
except Exception as e:
|
934 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
935 |
print(f"Error during TTS prediction: {str(e)}")
|
936 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
937 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
938 |
|
939 |
else:
|
940 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
941 |
print(f"Error during TTS prediction: {str(e)}")
|
942 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
|
943 |
|
944 |
|
945 |
|
946 |
-
|
|
|
947 |
|
948 |
mapped_value = focus_map.get(focus_type, -1)
|
949 |
|
@@ -953,9 +1055,13 @@ def generate_prompt(focus_type, paragraph,length, sentiment, factuality, languag
|
|
953 |
'factuality': factuality,
|
954 |
'language': language
|
955 |
}
|
|
|
|
|
|
|
|
|
956 |
|
957 |
if mapped_value != -1:
|
958 |
-
prompt = prompt_list[mapped_value].format(
|
959 |
Wiki_caption=paragraph,
|
960 |
length=controls['length'],
|
961 |
sentiment=controls['sentiment'],
|
@@ -964,8 +1070,8 @@ def generate_prompt(focus_type, paragraph,length, sentiment, factuality, languag
|
|
964 |
else:
|
965 |
prompt = "Invalid focus type."
|
966 |
|
967 |
-
if controls['factuality'] == "Imagination":
|
968 |
-
|
969 |
|
970 |
return prompt
|
971 |
|
@@ -1061,103 +1167,103 @@ def get_sketch_prompt(mask: Image.Image):
|
|
1061 |
|
1062 |
submit_traj=0
|
1063 |
|
1064 |
-
async def inference_traject(origin_image,sketcher_image, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
|
1065 |
-
|
1066 |
-
|
1067 |
|
1068 |
-
|
1069 |
|
1070 |
-
|
1071 |
-
|
1072 |
-
|
1073 |
-
|
1074 |
-
|
1075 |
-
|
1076 |
-
|
1077 |
-
|
1078 |
-
|
1079 |
-
|
1080 |
-
|
1081 |
-
|
1082 |
-
|
1083 |
-
|
1084 |
-
|
1085 |
-
|
1086 |
-
|
1087 |
-
|
1088 |
-
|
1089 |
-
|
1090 |
-
|
1091 |
-
|
1092 |
-
|
1093 |
|
1094 |
-
|
1095 |
|
1096 |
-
|
1097 |
-
|
1098 |
-
|
1099 |
-
|
1100 |
-
|
1101 |
|
1102 |
-
|
1103 |
-
|
1104 |
-
|
1105 |
-
|
1106 |
-
|
1107 |
-
|
1108 |
-
|
1109 |
-
|
1110 |
|
1111 |
-
|
1112 |
|
1113 |
-
|
1114 |
-
|
1115 |
|
1116 |
-
|
1117 |
-
|
1118 |
|
1119 |
-
|
1120 |
|
1121 |
|
1122 |
-
|
1123 |
-
|
1124 |
-
|
1125 |
-
|
1126 |
-
|
1127 |
-
|
1128 |
-
|
1129 |
-
|
1130 |
-
|
1131 |
-
|
1132 |
-
|
1133 |
-
|
1134 |
-
|
1135 |
-
|
1136 |
-
|
1137 |
-
|
1138 |
-
|
1139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1140 |
|
1141 |
|
1142 |
-
|
1143 |
-
|
1144 |
-
|
1145 |
-
|
1146 |
-
|
1147 |
-
|
1148 |
-
|
1149 |
-
else:
|
1150 |
-
try:
|
1151 |
-
audio_output = await texttospeech(focus_info, language, autoplay)
|
1152 |
-
# waveform_visual, audio_output = tts.predict(generated_caption, input_language, input_audio, input_mic, use_mic, agree)
|
1153 |
-
# return state, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1154 |
-
return state, state, image_input,audio_output
|
1155 |
|
1156 |
|
1157 |
-
|
1158 |
-
|
1159 |
-
|
1160 |
-
|
1161 |
|
1162 |
|
1163 |
def clear_chat_memory(visual_chatgpt, keep_global=False):
|
@@ -1172,7 +1278,7 @@ def clear_chat_memory(visual_chatgpt, keep_global=False):
|
|
1172 |
visual_chatgpt.global_prompt = ""
|
1173 |
|
1174 |
|
1175 |
-
def export_chat_log(chat_state, paragraph, liked, disliked):
|
1176 |
try:
|
1177 |
if not chat_state:
|
1178 |
return None
|
@@ -1201,41 +1307,44 @@ def export_chat_log(chat_state, paragraph, liked, disliked):
|
|
1201 |
temp_file.write(chat_log.encode('utf-8'))
|
1202 |
temp_file_path = temp_file.name
|
1203 |
print(temp_file_path)
|
1204 |
-
|
|
|
1205 |
except Exception as e:
|
1206 |
print(f"An error occurred while exporting the chat log: {e}")
|
1207 |
return None
|
1208 |
|
1209 |
async def get_artistinfo(artist_name,api_key,state,language,autoplay,length):
|
1210 |
-
prompt=f"Provide a concise summary of about {length} words in {language} on the painter {artist_name}, covering his biography, major works, artistic style, significant contributions to the art world, and any major awards or recognitions he has received."
|
1211 |
res=get_gpt_response(api_key,None,prompt)
|
1212 |
-
state = state + [(None,
|
1213 |
read_info = re.sub(r'[#[\]!*]','',res)
|
1214 |
read_info = emoji.replace_emoji(read_info,replace="")
|
1215 |
|
1216 |
|
1217 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
1218 |
# input_points=input_points, input_labels=input_labels)
|
1219 |
-
|
1220 |
-
|
1221 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1222 |
-
|
|
|
1223 |
|
1224 |
|
1225 |
async def get_yearinfo(year,api_key,state,language,autoplay,length):
|
1226 |
-
prompt = f"Provide a concise summary of about {length} words in {language} on the art historical period associated with the year {year}, covering its major characteristics, influential artists, notable works, and its significance in the broader context of art history."
|
1227 |
res=get_gpt_response(api_key,None,prompt)
|
1228 |
-
state = state + [(None,
|
1229 |
read_info = re.sub(r'[#[\]!*]','',res)
|
1230 |
read_info = emoji.replace_emoji(read_info,replace="")
|
1231 |
|
1232 |
|
1233 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
1234 |
# input_points=input_points, input_labels=input_labels)
|
1235 |
-
|
1236 |
-
|
1237 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1238 |
-
|
|
|
1239 |
|
1240 |
|
1241 |
|
@@ -1364,10 +1473,10 @@ def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragr
|
|
1364 |
|
1365 |
# return like_state, dislike_state
|
1366 |
|
1367 |
-
async def texttospeech(text, language, autoplay):
|
1368 |
try:
|
1369 |
if autoplay:
|
1370 |
-
voice = filtered_language_dict[language]
|
1371 |
communicate = edge_tts.Communicate(text, voice)
|
1372 |
file_path = "output.wav"
|
1373 |
await communicate.save(file_path)
|
@@ -1385,6 +1494,35 @@ async def texttospeech(text, language, autoplay):
|
|
1385 |
print(f"Error in texttospeech: {e}")
|
1386 |
return None
|
1387 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1388 |
def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
|
1389 |
print(x.index, x.value, x.liked)
|
1390 |
if x.liked == True:
|
@@ -1398,14 +1536,15 @@ def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
|
|
1398 |
return like_res,dislike_res,state
|
1399 |
|
1400 |
|
|
|
1401 |
def toggle_icons_and_update_prompt(point_prompt):
|
1402 |
new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
|
1403 |
new_add_icon = "assets/icons/plus-square-blue.png" if new_prompt == "Positive" else "assets/icons/plus-square.png"
|
|
|
1404 |
new_minus_icon = "assets/icons/minus-square.png" if new_prompt == "Positive" else "assets/icons/minus-square-blue.png"
|
1405 |
-
|
1406 |
-
|
1407 |
-
|
1408 |
-
return new_prompt, gr.update(icon=new_add_icon), gr.update(icon=new_minus_icon)
|
1409 |
|
1410 |
add_icon_path="assets/icons/plus-square-blue.png"
|
1411 |
minus_icon_path="assets/icons/minus-square.png"
|
@@ -1420,12 +1559,11 @@ def create_ui():
|
|
1420 |
|
1421 |
examples = [
|
1422 |
["test_images/ambass.jpg"],
|
1423 |
-
["test_images/
|
1424 |
-
["test_images/
|
1425 |
-
["test_images/
|
1426 |
-
["test_images/
|
1427 |
-
["test_images/
|
1428 |
-
["test_images/Picture4.png"],
|
1429 |
["test_images/Picture5.png"],
|
1430 |
|
1431 |
]
|
@@ -1457,6 +1595,9 @@ def create_ui():
|
|
1457 |
gr.Markdown(title)
|
1458 |
gr.Markdown(description)
|
1459 |
point_prompt = gr.State("Positive")
|
|
|
|
|
|
|
1460 |
# with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
|
1461 |
# with gr.Column(scale=0.5):
|
1462 |
# # gr.Markdown("Left side content")
|
@@ -1479,8 +1620,8 @@ def create_ui():
|
|
1479 |
value="English", label="Language", interactive=True, elem_classes="custom-language"
|
1480 |
)
|
1481 |
length = gr.Slider(
|
1482 |
-
minimum=
|
1483 |
-
maximum=
|
1484 |
value=80,
|
1485 |
step=1,
|
1486 |
interactive=True,
|
@@ -1507,45 +1648,47 @@ def create_ui():
|
|
1507 |
with gr.Column(scale=6):
|
1508 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1509 |
with gr.Tab("Base(GPT Power)") as base_tab:
|
1510 |
-
image_input_base = gr.Image(type="pil", interactive=True,
|
1511 |
with gr.Row():
|
1512 |
name_label_base = gr.Button(value="Name: ",elem_classes="info_btn")
|
1513 |
-
artist_label_base = gr.Button(value="Artist: ",elem_classes="
|
1514 |
-
year_label_base = gr.Button(value="Year: ",elem_classes="
|
1515 |
material_label_base = gr.Button(value="Style: ",elem_classes="info_btn")
|
1516 |
|
1517 |
with gr.Tab("Base2") as base_tab2:
|
1518 |
-
image_input_base_2 = gr.Image(type="pil", interactive=True,
|
1519 |
with gr.Row():
|
1520 |
name_label_base2 = gr.Button(value="Name: ",elem_classes="info_btn")
|
1521 |
-
artist_label_base2 = gr.Button(value="Artist: ",elem_classes="
|
1522 |
-
year_label_base2 = gr.Button(value="Year: ",elem_classes="
|
1523 |
material_label_base2 = gr.Button(value="Style: ",elem_classes="info_btn")
|
1524 |
|
1525 |
with gr.Tab("Click") as click_tab:
|
1526 |
with gr.Row():
|
1527 |
with gr.Column(scale=10,min_width=600):
|
1528 |
-
image_input = gr.Image(type="pil", interactive=True,
|
1529 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1530 |
with gr.Row():
|
1531 |
name_label = gr.Button(value="Name: ",elem_classes="info_btn")
|
1532 |
-
artist_label = gr.Button(value="Artist: ",elem_classes="
|
1533 |
-
year_label = gr.Button(value="Year: ",elem_classes="
|
1534 |
material_label = gr.Button(value="Style: ",elem_classes="info_btn")
|
1535 |
|
1536 |
|
1537 |
# example_image_click = gr.Image(type="pil", interactive=False, visible=False)
|
1538 |
# the tool column
|
1539 |
with gr.Column(scale=1,elem_id="tool_box",min_width=80):
|
1540 |
-
add_button = gr.Button(value="", interactive=True,elem_classes="
|
1541 |
-
minus_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=minus_icon_path)
|
1542 |
clear_button_click = gr.Button(value="Reset", interactive=True,elem_classes="tools_button")
|
1543 |
clear_button_image = gr.Button(value="Change", interactive=True,elem_classes="tools_button")
|
1544 |
-
focus_d = gr.Button(value="D",interactive=True,elem_classes="function_button")
|
1545 |
-
focus_da = gr.Button(value="DA",interactive=True,elem_classes="function_button")
|
1546 |
-
focus_dai = gr.Button(value="DAI",interactive=True,elem_classes="function_button")
|
1547 |
-
focus_dda = gr.Button(value="
|
1548 |
-
|
|
|
|
|
1549 |
|
1550 |
with gr.Row(visible=False):
|
1551 |
with gr.Column():
|
@@ -1608,7 +1751,22 @@ def create_ui():
|
|
1608 |
value="No",
|
1609 |
label="Expert",
|
1610 |
interactive=True)
|
1611 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1612 |
|
1613 |
with gr.Column(visible=True) as modules_not_need_gpt3:
|
1614 |
gr.Examples(
|
@@ -1671,7 +1829,7 @@ def create_ui():
|
|
1671 |
############# this part is for text to image #############
|
1672 |
###############################################################################
|
1673 |
|
1674 |
-
with gr.Row(variant="panel") as text2image_model:
|
1675 |
|
1676 |
with gr.Column():
|
1677 |
with gr.Column():
|
@@ -1719,7 +1877,7 @@ def create_ui():
|
|
1719 |
value=0,
|
1720 |
)
|
1721 |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
1722 |
-
with gr.Row(
|
1723 |
width = gr.Slider(
|
1724 |
label="Width",
|
1725 |
minimum=100,
|
@@ -1749,21 +1907,21 @@ def create_ui():
|
|
1749 |
step=1,
|
1750 |
value=8,
|
1751 |
)
|
1752 |
-
with gr.Column():
|
1753 |
-
|
1754 |
-
|
1755 |
-
|
1756 |
-
|
1757 |
-
|
1758 |
-
|
1759 |
-
|
1760 |
-
|
1761 |
-
|
1762 |
-
|
1763 |
-
|
1764 |
-
|
1765 |
-
|
1766 |
-
|
1767 |
|
1768 |
with gr.Row():
|
1769 |
naritive = gr.Radio(
|
@@ -1814,8 +1972,16 @@ def create_ui():
|
|
1814 |
recommend_btn.click(
|
1815 |
fn=infer,
|
1816 |
inputs=[new_crop_save_path],
|
1817 |
-
outputs=[
|
1818 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1819 |
|
1820 |
###############################################################################
|
1821 |
############# above part is for text to image #############
|
@@ -1966,11 +2132,11 @@ def create_ui():
|
|
1966 |
|
1967 |
openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
|
1968 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
1969 |
-
modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
|
1970 |
enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
|
1971 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
1972 |
modules_not_need_gpt,
|
1973 |
-
modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
|
1974 |
# openai_api_key.submit(init_openai_api_key,
|
1975 |
# outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
1976 |
# modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
|
@@ -2077,7 +2243,7 @@ def create_ui():
|
|
2077 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2078 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2079 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2080 |
-
paragraph,artist])
|
2081 |
|
2082 |
# image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
|
2083 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
@@ -2103,7 +2269,7 @@ def create_ui():
|
|
2103 |
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
|
2104 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
2105 |
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph,artist])
|
2106 |
-
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
2107 |
[chatbot, state, aux_state,output_audio])
|
2108 |
# chat_input.submit(lambda: "", None, chat_input)
|
2109 |
chat_input.submit(lambda: {"text": ""}, None, chat_input)
|
@@ -2114,7 +2280,7 @@ def create_ui():
|
|
2114 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2115 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2116 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2117 |
-
paragraph,artist])
|
2118 |
|
2119 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
2120 |
|
@@ -2161,11 +2327,11 @@ def create_ui():
|
|
2161 |
focus_d.click(
|
2162 |
submit_caption,
|
2163 |
inputs=[
|
2164 |
-
state,length, sentiment, factuality, language,
|
2165 |
-
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path
|
2166 |
],
|
2167 |
outputs=[
|
2168 |
-
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
2169 |
],
|
2170 |
show_progress=True,
|
2171 |
queue=True
|
@@ -2178,7 +2344,7 @@ def create_ui():
|
|
2178 |
focus_da.click(
|
2179 |
submit_caption,
|
2180 |
inputs=[
|
2181 |
-
state,length, sentiment, factuality, language,
|
2182 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,auto_play, paragraph,focus_da,openai_api_key,new_crop_save_path
|
2183 |
],
|
2184 |
outputs=[
|
@@ -2192,7 +2358,7 @@ def create_ui():
|
|
2192 |
focus_dai.click(
|
2193 |
submit_caption,
|
2194 |
inputs=[
|
2195 |
-
state,length, sentiment, factuality, language,
|
2196 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2197 |
auto_play, paragraph,focus_dai,openai_api_key,new_crop_save_path
|
2198 |
],
|
@@ -2207,7 +2373,7 @@ def create_ui():
|
|
2207 |
focus_dda.click(
|
2208 |
submit_caption,
|
2209 |
inputs=[
|
2210 |
-
state,length, sentiment, factuality, language,
|
2211 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2212 |
auto_play, paragraph,focus_dda,openai_api_key,new_crop_save_path
|
2213 |
],
|
@@ -2252,11 +2418,20 @@ def create_ui():
|
|
2252 |
|
2253 |
export_button.click(
|
2254 |
export_chat_log,
|
2255 |
-
inputs=[state,paragraph,like_res,dislike_res],
|
2256 |
-
outputs=[chat_log_file],
|
2257 |
queue=True
|
2258 |
)
|
2259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2260 |
# upvote_btn.click(
|
2261 |
# handle_liked,
|
2262 |
# inputs=[state,like_res],
|
@@ -2281,4 +2456,4 @@ if __name__ == '__main__':
|
|
2281 |
iface = create_ui()
|
2282 |
iface.queue(api_open=False, max_size=10)
|
2283 |
# iface.queue(concurrency_count=5, api_open=False, max_size=10)
|
2284 |
-
iface.launch(server_name="0.0.0.0"
|
|
|
476 |
css = """
|
477 |
#warning {background-color: #FFCCCB}
|
478 |
.tools_button {
|
479 |
+
display: flex;
|
480 |
+
flex-direction: column;
|
481 |
+
align-items: center;
|
482 |
+
justify-content: center;
|
483 |
background: white;
|
484 |
border: none !important;
|
485 |
box-shadow: none !important;
|
486 |
+
text-align: center;
|
487 |
+
color: black;
|
488 |
}
|
489 |
|
490 |
+
.tools_button_clicked {
|
491 |
+
display: flex;
|
492 |
+
flex-direction: column;
|
493 |
+
align-items: center;
|
494 |
+
justify-content: center;
|
495 |
+
background: white;
|
496 |
+
border: none !important;
|
497 |
+
box-shadow: none !important;
|
498 |
+
text-align: center;
|
499 |
+
color: rgb(18,150,219);
|
500 |
+
}
|
501 |
+
|
502 |
+
.tools_button_add {
|
503 |
+
display: flex;
|
504 |
+
flex-direction: column;
|
505 |
+
align-items: center;
|
506 |
+
justify-content: center;
|
507 |
background: white;
|
508 |
border: none !important;
|
509 |
box-shadow: none !important;
|
510 |
+
text-align: center;
|
511 |
+
color: rgb(18,150,219);
|
512 |
+
}
|
513 |
+
|
514 |
+
|
515 |
+
.image_upload {
|
516 |
+
height: 650px;
|
517 |
+
}
|
518 |
+
|
519 |
+
.info_btn {
|
520 |
+
background: white !important;
|
521 |
+
border: none !important;
|
522 |
+
box-shadow: none !important;
|
523 |
+
}
|
524 |
+
|
525 |
+
info_btn_interact {
|
526 |
+
background: white !important;
|
527 |
+
box-shadow: none !important;
|
528 |
}
|
529 |
|
530 |
.function_button {
|
|
|
536 |
|
537 |
"""
|
538 |
filtered_language_dict = {
|
539 |
+
'English': {'female': 'en-US-JennyNeural', 'male': 'en-US-GuyNeural'},
|
540 |
+
'Chinese': {'female': 'zh-CN-XiaoxiaoNeural', 'male': 'zh-CN-YunxiNeural'},
|
541 |
+
'French': {'female': 'fr-FR-DeniseNeural', 'male': 'fr-FR-HenriNeural'},
|
542 |
+
'Spanish': {'female': 'es-MX-DaliaNeural', 'male': 'es-MX-JorgeNeural'},
|
543 |
+
'Arabic': {'female': 'ar-SA-ZariyahNeural', 'male': 'ar-SA-HamedNeural'},
|
544 |
+
'Portuguese': {'female': 'pt-BR-FranciscaNeural', 'male': 'pt-BR-AntonioNeural'},
|
545 |
+
'Cantonese': {'female': 'zh-HK-HiuGaaiNeural', 'male': 'zh-HK-WanLungNeural'}
|
546 |
}
|
547 |
|
548 |
focus_map = {
|
549 |
"D":0,
|
550 |
"DA":1,
|
551 |
"DAI":2,
|
552 |
+
"Judge":3
|
553 |
}
|
554 |
|
555 |
'''
|
|
|
568 |
]
|
569 |
'''
|
570 |
prompt_list = [
|
571 |
+
[
|
572 |
+
|
573 |
+
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
574 |
+
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
575 |
+
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
576 |
+
'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
577 |
+
],
|
578 |
+
[
|
579 |
+
'When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
580 |
+
'When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
581 |
+
'When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
582 |
+
'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
583 |
+
],
|
584 |
+
[
|
585 |
+
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
586 |
+
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
587 |
+
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
588 |
+
'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
589 |
+
]
|
590 |
]
|
591 |
+
|
592 |
|
593 |
|
594 |
gpt_state = 0
|
|
|
721 |
global gpt_state
|
722 |
gpt_state=1
|
723 |
# return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
|
724 |
+
return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*2
|
725 |
else:
|
726 |
gpt_state=0
|
727 |
# return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
|
728 |
+
return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*2
|
729 |
|
730 |
def init_wo_openai_api_key():
|
731 |
global gpt_state
|
|
|
770 |
raise NotImplementedError
|
771 |
|
772 |
async def chat_input_callback(*args):
|
773 |
+
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay,gender = args
|
774 |
message = chat_input["text"]
|
775 |
if visual_chatgpt is not None:
|
776 |
state, _, aux_state, _ = visual_chatgpt.run_text(message, state, aux_state)
|
|
|
780 |
return state, state, aux_state, None
|
781 |
|
782 |
else:
|
783 |
+
audio = await texttospeech(last_response,language,autoplay,gender)
|
784 |
return state, state, aux_state, audio
|
785 |
else:
|
786 |
response = "Text refiner is not initilzed, please input openai api key."
|
787 |
state = state + [(chat_input, response)]
|
788 |
+
audio = await texttospeech(response,language,autoplay,gender)
|
789 |
return state, state, None, audio
|
790 |
|
791 |
|
|
|
830 |
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
831 |
print("memory",visual_chatgpt.agent.memory)
|
832 |
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
833 |
+
parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and painting style used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"style\": \"Painting style used in the painting\",\"gender\": \"The gender of the author\" }")
|
834 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
835 |
+
name, artist, year, material,gender= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["style"],parsed_data['gender']
|
836 |
+
gender=gender.lower()
|
837 |
+
print("gender",gender)
|
838 |
+
|
839 |
|
840 |
+
if language=="English":
|
841 |
+
if narritive=="Third" :
|
842 |
+
state = [
|
843 |
+
(
|
844 |
+
None,
|
845 |
+
f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
|
846 |
+
)
|
847 |
+
]
|
848 |
+
elif narritive=="Artist":
|
849 |
+
state = [
|
850 |
+
(
|
851 |
+
None,
|
852 |
+
f"🧑🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
|
853 |
+
)
|
854 |
+
]
|
855 |
+
elif narritive=="Item":
|
856 |
+
state = [
|
857 |
+
(
|
858 |
+
None,
|
859 |
+
f"🎨 Hello, Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with relevant insights and thoughts from the perspective of the objects within the painting"
|
860 |
+
)
|
861 |
+
]
|
862 |
+
elif language=="Chinese":
|
863 |
+
if narritive == "Third":
|
864 |
+
state = [
|
865 |
+
(
|
866 |
+
None,
|
867 |
+
f"🤖 你好,我是 EyeSee。让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供相关的信息。"
|
868 |
+
)
|
869 |
+
]
|
870 |
+
elif narritive == "Artist":
|
871 |
+
state = [
|
872 |
+
(
|
873 |
+
None,
|
874 |
+
f"🧑🎨 你好,我是{artist}。欢迎探索我的画作《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供我的创作背后的相关见解和想法。"
|
875 |
+
)
|
876 |
+
]
|
877 |
+
elif narritive == "Item":
|
878 |
+
state = [
|
879 |
+
(
|
880 |
+
None,
|
881 |
+
f"🎨 你好,让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会从画面上事物的视角为你提供相关的见解和想法。"
|
882 |
+
)
|
883 |
+
]
|
884 |
+
|
885 |
|
886 |
|
887 |
|
888 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
889 |
+
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist, gender]
|
890 |
|
891 |
|
892 |
|
|
|
924 |
|
925 |
enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
|
926 |
out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
|
927 |
+
# state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
|
928 |
+
|
929 |
+
# state = state + [("Selected image point: {}, Input label: {}".format(
|
930 |
+
# prompt["input_point"],
|
931 |
+
# '+' if prompt["input_label"] == "1" else '-'
|
932 |
+
# ), None)]
|
933 |
|
934 |
+
output_label = ['+' if label == 1 else '-' for label in prompt["input_label"]]
|
935 |
+
|
936 |
+
state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], output_label), None)]
|
937 |
+
|
938 |
+
|
939 |
+
|
940 |
+
# update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
|
941 |
text = out['generated_captions']['raw_caption']
|
942 |
input_mask = np.array(out['mask'].convert('P'))
|
943 |
image_input_nobackground = mask_painter(np.array(image_input), input_mask,background_alpha=0)
|
|
|
944 |
|
945 |
click_index_state = click_index
|
946 |
input_mask_state = input_mask
|
|
|
969 |
}
|
970 |
|
971 |
|
972 |
+
async def submit_caption(naritive, state,length, sentiment, factuality, language,
|
973 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
974 |
+
autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path, gender):
|
975 |
|
976 |
|
977 |
state = state + [(query_focus[focus_type], None)]
|
|
|
987 |
print("input_points_state",input_points_state)
|
988 |
print("input_labels_state",input_labels_state)
|
989 |
|
990 |
+
prompt=generate_prompt(focus_type,paragraph,length,sentiment,factuality,language, naritive)
|
991 |
|
992 |
print("Prompt:", prompt)
|
993 |
print("click",click_index)
|
|
|
1009 |
read_info = re.sub(r'[#[\]!*]','',focus_info)
|
1010 |
read_info = emoji.replace_emoji(read_info,replace="")
|
1011 |
print("read info",read_info)
|
1012 |
+
if naritive=="Item":
|
1013 |
+
parsed_data = get_gpt_response(openai_api_key, new_crop_save_path,prompt = f"Based on the information {focus_info}, return the gender of this item, returns its most likely gender, do not return unknown, in the format {{\"gender\": \"<gender>\"}}")
|
1014 |
+
parsed_data = json.loads(parsed_data)
|
1015 |
+
|
1016 |
+
try:
|
1017 |
+
gender=parsed_data['gender']
|
1018 |
+
gender=gender.lower()
|
1019 |
+
except:
|
1020 |
+
print("error gpt responese")
|
1021 |
+
print("item gender",gender)
|
1022 |
|
1023 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
1024 |
# input_points=input_points, input_labels=input_labels)
|
|
|
1026 |
if autoplay==False:
|
1027 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
|
1028 |
|
1029 |
+
audio_output = await texttospeech(read_info, language, autoplay,gender)
|
1030 |
print("done")
|
1031 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1032 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output,gender,focus_info
|
1033 |
|
1034 |
except Exception as e:
|
1035 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1036 |
print(f"Error during TTS prediction: {str(e)}")
|
1037 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
1038 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output,gender,focus_info
|
1039 |
|
1040 |
else:
|
1041 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1042 |
print(f"Error during TTS prediction: {str(e)}")
|
1043 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,None,focus_info
|
1044 |
|
1045 |
|
1046 |
|
1047 |
+
|
1048 |
+
def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language,naritive):
|
1049 |
|
1050 |
mapped_value = focus_map.get(focus_type, -1)
|
1051 |
|
|
|
1055 |
'factuality': factuality,
|
1056 |
'language': language
|
1057 |
}
|
1058 |
+
|
1059 |
+
naritive_mapping = {"Third": 0, "Artist": 1, "Item": 2}
|
1060 |
+
|
1061 |
+
naritive_value=naritive_mapping[naritive]
|
1062 |
|
1063 |
if mapped_value != -1:
|
1064 |
+
prompt = prompt_list[naritive_value][mapped_value].format(
|
1065 |
Wiki_caption=paragraph,
|
1066 |
length=controls['length'],
|
1067 |
sentiment=controls['sentiment'],
|
|
|
1070 |
else:
|
1071 |
prompt = "Invalid focus type."
|
1072 |
|
1073 |
+
# if controls['factuality'] == "Imagination":
|
1074 |
+
# prompt += " Assuming that I am someone who has viewed a lot of art and has a lot of experience viewing art. Explain artistic features (composition, color, style, or use of light) and discuss the symbolism of the content and its influence on later artistic movements."
|
1075 |
|
1076 |
return prompt
|
1077 |
|
|
|
1167 |
|
1168 |
submit_traj=0
|
1169 |
|
1170 |
+
# async def inference_traject(naritive, origin_image,sketcher_image, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
|
1171 |
+
# original_size, input_size, text_refiner,focus_type,paragraph,openai_api_key,autoplay,trace_type):
|
1172 |
+
# image_input, mask = sketcher_image['background'], sketcher_image['layers'][0]
|
1173 |
|
1174 |
+
# crop_save_path=""
|
1175 |
|
1176 |
+
# prompt = get_sketch_prompt(mask)
|
1177 |
+
# boxes = prompt['input_boxes']
|
1178 |
+
# boxes = boxes[0]
|
1179 |
+
|
1180 |
+
# controls = {'length': length,
|
1181 |
+
# 'sentiment': sentiment,
|
1182 |
+
# 'factuality': factuality,
|
1183 |
+
# 'language': language}
|
1184 |
+
|
1185 |
+
# model = build_caption_anything_with_models(
|
1186 |
+
# args,
|
1187 |
+
# api_key="",
|
1188 |
+
# captioner=shared_captioner,
|
1189 |
+
# sam_model=shared_sam_model,
|
1190 |
+
# ocr_reader=shared_ocr_reader,
|
1191 |
+
# text_refiner=text_refiner,
|
1192 |
+
# session_id=iface.app_id
|
1193 |
+
# )
|
1194 |
+
|
1195 |
+
# model.setup(image_embedding, original_size, input_size, is_image_set=True)
|
1196 |
+
|
1197 |
+
# enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
|
1198 |
+
# out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki,verbose=True)[0]
|
1199 |
|
1200 |
+
# print(trace_type)
|
1201 |
|
1202 |
+
# if trace_type=="Trace+Seg":
|
1203 |
+
# input_mask = np.array(out['mask'].convert('P'))
|
1204 |
+
# image_input = mask_painter(np.array(image_input), input_mask, background_alpha=0)
|
1205 |
+
# d3_input=mask_painter(np.array(image_input), input_mask)
|
1206 |
+
# crop_save_path=out['crop_save_path']
|
1207 |
|
1208 |
+
# else:
|
1209 |
+
# image_input = Image.fromarray(np.array(origin_image))
|
1210 |
+
# draw = ImageDraw.Draw(image_input)
|
1211 |
+
# draw.rectangle(boxes, outline='red', width=2)
|
1212 |
+
# d3_input=image_input
|
1213 |
+
# cropped_image = origin_image.crop(boxes)
|
1214 |
+
# cropped_image.save('temp.png')
|
1215 |
+
# crop_save_path='temp.png'
|
1216 |
|
1217 |
+
# print("crop_svae_path",out['crop_save_path'])
|
1218 |
|
1219 |
+
# # Update components and states
|
1220 |
+
# state.append((f'Box: {boxes}', None))
|
1221 |
|
1222 |
+
# # fake_click_index = (int((boxes[0][0] + boxes[0][2]) / 2), int((boxes[0][1] + boxes[0][3]) / 2))
|
1223 |
+
# # image_input = create_bubble_frame(image_input, "", fake_click_index, input_mask)
|
1224 |
|
1225 |
+
# prompt=generate_prompt(focus_type, paragraph, length, sentiment, factuality, language,naritive)
|
1226 |
|
1227 |
|
1228 |
+
# # if not args.disable_gpt and text_refiner:
|
1229 |
+
# if not args.disable_gpt:
|
1230 |
+
# focus_info=get_gpt_response(openai_api_key,crop_save_path,prompt)
|
1231 |
+
# if focus_info.startswith('"') and focus_info.endswith('"'):
|
1232 |
+
# focus_info=focus_info[1:-1]
|
1233 |
+
# focus_info=focus_info.replace('#', '')
|
1234 |
+
# state = state + [(None, f"{focus_info}")]
|
1235 |
+
# print("new_cap",focus_info)
|
1236 |
+
# read_info = re.sub(r'[#[\]!*]','',focus_info)
|
1237 |
+
# read_info = emoji.replace_emoji(read_info,replace="")
|
1238 |
+
# print("read info",read_info)
|
1239 |
+
|
1240 |
+
# # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
1241 |
+
# # input_points=input_points, input_labels=input_labels)
|
1242 |
+
# try:
|
1243 |
+
# audio_output = await texttospeech(read_info, language,autoplay,gender)
|
1244 |
+
# # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1245 |
+
# return state, state,image_input,audio_output,crop_save_path,d3_input
|
1246 |
+
|
1247 |
+
|
1248 |
+
# except Exception as e:
|
1249 |
+
# state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1250 |
+
# print(f"Error during TTS prediction: {str(e)}")
|
1251 |
+
# # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
1252 |
+
# return state, state, image_input,audio_output,crop_save_path
|
1253 |
|
1254 |
|
1255 |
+
# else:
|
1256 |
+
# try:
|
1257 |
+
# audio_output = await texttospeech(focus_info, language, autoplay)
|
1258 |
+
# # waveform_visual, audio_output = tts.predict(generated_caption, input_language, input_audio, input_mic, use_mic, agree)
|
1259 |
+
# # return state, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1260 |
+
# return state, state, image_input,audio_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1261 |
|
1262 |
|
1263 |
+
# except Exception as e:
|
1264 |
+
# state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1265 |
+
# print(f"Error during TTS prediction: {str(e)}")
|
1266 |
+
# return state, state, image_input,audio_output
|
1267 |
|
1268 |
|
1269 |
def clear_chat_memory(visual_chatgpt, keep_global=False):
|
|
|
1278 |
visual_chatgpt.global_prompt = ""
|
1279 |
|
1280 |
|
1281 |
+
def export_chat_log(chat_state, paragraph, liked, disliked,log_list):
|
1282 |
try:
|
1283 |
if not chat_state:
|
1284 |
return None
|
|
|
1307 |
temp_file.write(chat_log.encode('utf-8'))
|
1308 |
temp_file_path = temp_file.name
|
1309 |
print(temp_file_path)
|
1310 |
+
log_list.append(temp_file_path)
|
1311 |
+
return log_list,log_list
|
1312 |
except Exception as e:
|
1313 |
print(f"An error occurred while exporting the chat log: {e}")
|
1314 |
return None
|
1315 |
|
1316 |
async def get_artistinfo(artist_name,api_key,state,language,autoplay,length):
|
1317 |
+
prompt = f"Provide a concise summary of about {length} words in {language} on the painter {artist_name}, covering his biography, major works, artistic style, significant contributions to the art world, and any major awards or recognitions he has received. Start your response with 'Artist Background: '."
|
1318 |
res=get_gpt_response(api_key,None,prompt)
|
1319 |
+
state = state + [(None, res)]
|
1320 |
read_info = re.sub(r'[#[\]!*]','',res)
|
1321 |
read_info = emoji.replace_emoji(read_info,replace="")
|
1322 |
|
1323 |
|
1324 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
1325 |
# input_points=input_points, input_labels=input_labels)
|
1326 |
+
if autoplay:
|
1327 |
+
audio_output = await texttospeech(read_info, language,autoplay)
|
1328 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1329 |
+
return state, state,audio_output
|
1330 |
+
return state, state,None
|
1331 |
|
1332 |
|
1333 |
async def get_yearinfo(year,api_key,state,language,autoplay,length):
|
1334 |
+
prompt = f"Provide a concise summary of about {length} words in {language} on the art historical period associated with the year {year}, covering its major characteristics, influential artists, notable works, and its significance in the broader context of art history with 'History Background: '."
|
1335 |
res=get_gpt_response(api_key,None,prompt)
|
1336 |
+
state = state + [(None, res)]
|
1337 |
read_info = re.sub(r'[#[\]!*]','',res)
|
1338 |
read_info = emoji.replace_emoji(read_info,replace="")
|
1339 |
|
1340 |
|
1341 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
1342 |
# input_points=input_points, input_labels=input_labels)
|
1343 |
+
if autoplay:
|
1344 |
+
audio_output = await texttospeech(read_info, language,autoplay)
|
1345 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1346 |
+
return state, state,audio_output
|
1347 |
+
return state, state,None
|
1348 |
|
1349 |
|
1350 |
|
|
|
1473 |
|
1474 |
# return like_state, dislike_state
|
1475 |
|
1476 |
+
async def texttospeech(text, language, autoplay,gender='female'):
|
1477 |
try:
|
1478 |
if autoplay:
|
1479 |
+
voice = filtered_language_dict[language][gender]
|
1480 |
communicate = edge_tts.Communicate(text, voice)
|
1481 |
file_path = "output.wav"
|
1482 |
await communicate.save(file_path)
|
|
|
1494 |
print(f"Error in texttospeech: {e}")
|
1495 |
return None
|
1496 |
|
1497 |
+
async def associate(focus_info,openai_api_key,language,state,autoplay,evt: gr.SelectData):
|
1498 |
+
rec_path=evt._data['value']['image']['path']
|
1499 |
+
print("rec_path",rec_path)
|
1500 |
+
prompt="""
|
1501 |
+
The information and image I gave you are 2 different paintings. Please analyze the relationship between the image and the information {focus_info}. Discuss their similarities and differences in terms of style, themes, colors, and any other relevant aspects. Provide a detailed analysis that highlights how the information fits into or contrasts with the recommended painting. Consider the following points in your analysis:
|
1502 |
+
- Artistic style and techniques
|
1503 |
+
- Themes and subjects
|
1504 |
+
- Color palettes and compositions
|
1505 |
+
- Historical and cultural contexts
|
1506 |
+
- Symbolism and meanings
|
1507 |
+
|
1508 |
+
Based on your analysis, provide insights into how the information enhances or contrasts with the recommended painting, and suggest any interesting interpretations or observations. Return your response in {language}
|
1509 |
+
|
1510 |
+
"""
|
1511 |
+
prompt=prompt.format(focus_info=focus_info,language=language)
|
1512 |
+
result=get_gpt_response(openai_api_key, rec_path, prompt)
|
1513 |
+
state = state + [(None, f"{result}")]
|
1514 |
+
read_info = re.sub(r'[#[\]!*]','',result)
|
1515 |
+
read_info = emoji.replace_emoji(result,replace="")
|
1516 |
+
if autoplay:
|
1517 |
+
audio_output = await texttospeech(read_info, language, autoplay)
|
1518 |
+
return state,state,audio_output
|
1519 |
+
return state,state,None
|
1520 |
+
|
1521 |
+
|
1522 |
+
|
1523 |
+
|
1524 |
+
|
1525 |
+
|
1526 |
def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
|
1527 |
print(x.index, x.value, x.liked)
|
1528 |
if x.liked == True:
|
|
|
1536 |
return like_res,dislike_res,state
|
1537 |
|
1538 |
|
1539 |
+
|
1540 |
def toggle_icons_and_update_prompt(point_prompt):
|
1541 |
new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
|
1542 |
new_add_icon = "assets/icons/plus-square-blue.png" if new_prompt == "Positive" else "assets/icons/plus-square.png"
|
1543 |
+
new_add_css = "tools_button_clicked" if new_prompt == "Positive" else "tools_button"
|
1544 |
new_minus_icon = "assets/icons/minus-square.png" if new_prompt == "Positive" else "assets/icons/minus-square-blue.png"
|
1545 |
+
new_minus_css= "tools_button" if new_prompt == "Positive" else "tools_button_clicked"
|
1546 |
+
|
1547 |
+
return new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
|
|
|
1548 |
|
1549 |
add_icon_path="assets/icons/plus-square-blue.png"
|
1550 |
minus_icon_path="assets/icons/minus-square.png"
|
|
|
1559 |
|
1560 |
examples = [
|
1561 |
["test_images/ambass.jpg"],
|
1562 |
+
["test_images/test1.png"],
|
1563 |
+
["test_images/test2.png"],
|
1564 |
+
["test_images/test3.png"],
|
1565 |
+
["test_images/test4.png"],
|
1566 |
+
["test_images/test5.png"],
|
|
|
1567 |
["test_images/Picture5.png"],
|
1568 |
|
1569 |
]
|
|
|
1595 |
gr.Markdown(title)
|
1596 |
gr.Markdown(description)
|
1597 |
point_prompt = gr.State("Positive")
|
1598 |
+
log_list=gr.State([])
|
1599 |
+
gender=gr.State('female')
|
1600 |
+
focus_info=gr.State('')
|
1601 |
# with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
|
1602 |
# with gr.Column(scale=0.5):
|
1603 |
# # gr.Markdown("Left side content")
|
|
|
1620 |
value="English", label="Language", interactive=True, elem_classes="custom-language"
|
1621 |
)
|
1622 |
length = gr.Slider(
|
1623 |
+
minimum=60,
|
1624 |
+
maximum=120,
|
1625 |
value=80,
|
1626 |
step=1,
|
1627 |
interactive=True,
|
|
|
1648 |
with gr.Column(scale=6):
|
1649 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1650 |
with gr.Tab("Base(GPT Power)") as base_tab:
|
1651 |
+
image_input_base = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
|
1652 |
with gr.Row():
|
1653 |
name_label_base = gr.Button(value="Name: ",elem_classes="info_btn")
|
1654 |
+
artist_label_base = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
|
1655 |
+
year_label_base = gr.Button(value="Year: ",elem_classes="info_btn_interact")
|
1656 |
material_label_base = gr.Button(value="Style: ",elem_classes="info_btn")
|
1657 |
|
1658 |
with gr.Tab("Base2") as base_tab2:
|
1659 |
+
image_input_base_2 = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
|
1660 |
with gr.Row():
|
1661 |
name_label_base2 = gr.Button(value="Name: ",elem_classes="info_btn")
|
1662 |
+
artist_label_base2 = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
|
1663 |
+
year_label_base2 = gr.Button(value="Year: ",elem_classes="info_btn_interact")
|
1664 |
material_label_base2 = gr.Button(value="Style: ",elem_classes="info_btn")
|
1665 |
|
1666 |
with gr.Tab("Click") as click_tab:
|
1667 |
with gr.Row():
|
1668 |
with gr.Column(scale=10,min_width=600):
|
1669 |
+
image_input = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
|
1670 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1671 |
with gr.Row():
|
1672 |
name_label = gr.Button(value="Name: ",elem_classes="info_btn")
|
1673 |
+
artist_label = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
|
1674 |
+
year_label = gr.Button(value="Year: ",elem_classes="info_btn_interact")
|
1675 |
material_label = gr.Button(value="Style: ",elem_classes="info_btn")
|
1676 |
|
1677 |
|
1678 |
# example_image_click = gr.Image(type="pil", interactive=False, visible=False)
|
1679 |
# the tool column
|
1680 |
with gr.Column(scale=1,elem_id="tool_box",min_width=80):
|
1681 |
+
add_button = gr.Button(value="Extend Area", interactive=True,elem_classes="tools_button_add",icon=add_icon_path)
|
1682 |
+
minus_button = gr.Button(value="Remove Area", interactive=True,elem_classes="tools_button",icon=minus_icon_path)
|
1683 |
clear_button_click = gr.Button(value="Reset", interactive=True,elem_classes="tools_button")
|
1684 |
clear_button_image = gr.Button(value="Change", interactive=True,elem_classes="tools_button")
|
1685 |
+
focus_d = gr.Button(value="D",interactive=True,elem_classes="function_button",variant="primary")
|
1686 |
+
focus_da = gr.Button(value="DA",interactive=True,elem_classes="function_button",variant="primary")
|
1687 |
+
focus_dai = gr.Button(value="DAI",interactive=True,elem_classes="function_button",variant="primary")
|
1688 |
+
focus_dda = gr.Button(value="Judge",interactive=True,elem_classes="function_button",variant="primary")
|
1689 |
+
|
1690 |
+
recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button",variant="primary")
|
1691 |
+
# focus_asso = gr.Button(value="Associate",interactive=True,elem_classes="function_button",variant="primary")
|
1692 |
|
1693 |
with gr.Row(visible=False):
|
1694 |
with gr.Column():
|
|
|
1751 |
value="No",
|
1752 |
label="Expert",
|
1753 |
interactive=True)
|
1754 |
+
|
1755 |
+
with gr.Column(visible=False) as recommend:
|
1756 |
+
gallery_result = gr.Gallery(
|
1757 |
+
label="Result",
|
1758 |
+
height="auto",
|
1759 |
+
columns=4
|
1760 |
+
# columns=4,
|
1761 |
+
# rows=2,
|
1762 |
+
# show_label=False,
|
1763 |
+
# allow_preview=True,
|
1764 |
+
# object_fit="contain",
|
1765 |
+
# height="auto",
|
1766 |
+
# preview=True,
|
1767 |
+
# show_share_button=True,
|
1768 |
+
# show_download_button=True
|
1769 |
+
)
|
1770 |
|
1771 |
with gr.Column(visible=True) as modules_not_need_gpt3:
|
1772 |
gr.Examples(
|
|
|
1829 |
############# this part is for text to image #############
|
1830 |
###############################################################################
|
1831 |
|
1832 |
+
with gr.Row(variant="panel",visible=False) as text2image_model:
|
1833 |
|
1834 |
with gr.Column():
|
1835 |
with gr.Column():
|
|
|
1877 |
value=0,
|
1878 |
)
|
1879 |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
1880 |
+
with gr.Row():
|
1881 |
width = gr.Slider(
|
1882 |
label="Width",
|
1883 |
minimum=100,
|
|
|
1907 |
step=1,
|
1908 |
value=8,
|
1909 |
)
|
1910 |
+
# with gr.Column():
|
1911 |
+
# result = gr.Gallery(
|
1912 |
+
# label="Result",
|
1913 |
+
# height="auto",
|
1914 |
+
# columns=4
|
1915 |
+
# # columns=4,
|
1916 |
+
# # rows=2,
|
1917 |
+
# # show_label=False,
|
1918 |
+
# # allow_preview=True,
|
1919 |
+
# # object_fit="contain",
|
1920 |
+
# # height="auto",
|
1921 |
+
# # preview=True,
|
1922 |
+
# # show_share_button=True,
|
1923 |
+
# # show_download_button=True
|
1924 |
+
# )
|
1925 |
|
1926 |
with gr.Row():
|
1927 |
naritive = gr.Radio(
|
|
|
1972 |
recommend_btn.click(
|
1973 |
fn=infer,
|
1974 |
inputs=[new_crop_save_path],
|
1975 |
+
outputs=[gallery_result]
|
1976 |
)
|
1977 |
+
|
1978 |
+
gallery_result.select(
|
1979 |
+
associate,
|
1980 |
+
inputs=[focus_info,openai_api_key,language,state,auto_play],
|
1981 |
+
outputs=[chatbot,state,output_audio],
|
1982 |
+
|
1983 |
+
|
1984 |
+
)
|
1985 |
|
1986 |
###############################################################################
|
1987 |
############# above part is for text to image #############
|
|
|
2132 |
|
2133 |
openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
|
2134 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
2135 |
+
modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row,recommend])
|
2136 |
enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
|
2137 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
2138 |
modules_not_need_gpt,
|
2139 |
+
modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row,recommend])
|
2140 |
# openai_api_key.submit(init_openai_api_key,
|
2141 |
# outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
2142 |
# modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
|
|
|
2243 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2244 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2245 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2246 |
+
paragraph,artist,gender])
|
2247 |
|
2248 |
# image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
|
2249 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
|
|
2269 |
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
|
2270 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
2271 |
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph,artist])
|
2272 |
+
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play,gender],
|
2273 |
[chatbot, state, aux_state,output_audio])
|
2274 |
# chat_input.submit(lambda: "", None, chat_input)
|
2275 |
chat_input.submit(lambda: {"text": ""}, None, chat_input)
|
|
|
2280 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2281 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2282 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2283 |
+
paragraph,artist,gender])
|
2284 |
|
2285 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
2286 |
|
|
|
2327 |
focus_d.click(
|
2328 |
submit_caption,
|
2329 |
inputs=[
|
2330 |
+
naritive, state,length, sentiment, factuality, language,
|
2331 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path,gender
|
2332 |
],
|
2333 |
outputs=[
|
2334 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio,focus_info
|
2335 |
],
|
2336 |
show_progress=True,
|
2337 |
queue=True
|
|
|
2344 |
focus_da.click(
|
2345 |
submit_caption,
|
2346 |
inputs=[
|
2347 |
+
naritive,state,length, sentiment, factuality, language,
|
2348 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,auto_play, paragraph,focus_da,openai_api_key,new_crop_save_path
|
2349 |
],
|
2350 |
outputs=[
|
|
|
2358 |
focus_dai.click(
|
2359 |
submit_caption,
|
2360 |
inputs=[
|
2361 |
+
naritive,state,length, sentiment, factuality, language,
|
2362 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2363 |
auto_play, paragraph,focus_dai,openai_api_key,new_crop_save_path
|
2364 |
],
|
|
|
2373 |
focus_dda.click(
|
2374 |
submit_caption,
|
2375 |
inputs=[
|
2376 |
+
naritive,state,length, sentiment, factuality, language,
|
2377 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2378 |
auto_play, paragraph,focus_dda,openai_api_key,new_crop_save_path
|
2379 |
],
|
|
|
2418 |
|
2419 |
export_button.click(
|
2420 |
export_chat_log,
|
2421 |
+
inputs=[state,paragraph,like_res,dislike_res,log_list],
|
2422 |
+
outputs=[chat_log_file,log_list],
|
2423 |
queue=True
|
2424 |
)
|
2425 |
|
2426 |
+
naritive.change(
|
2427 |
+
lambda: (None, [], [], [[], [], []], "", "", ""),
|
2428 |
+
[],
|
2429 |
+
[image_input, chatbot, state, click_state, paragraph_output, origin_image],
|
2430 |
+
queue=False,
|
2431 |
+
show_progress=False
|
2432 |
+
|
2433 |
+
)
|
2434 |
+
|
2435 |
# upvote_btn.click(
|
2436 |
# handle_liked,
|
2437 |
# inputs=[state,like_res],
|
|
|
2456 |
iface = create_ui()
|
2457 |
iface.queue(api_open=False, max_size=10)
|
2458 |
# iface.queue(concurrency_count=5, api_open=False, max_size=10)
|
2459 |
+
iface.launch(server_name="0.0.0.0")
|