Niki Zhang commited on
Commit
06cd0b8
·
verified ·
1 Parent(s): 36f6cb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +390 -215
app.py CHANGED
@@ -476,15 +476,55 @@ print("4")
476
  css = """
477
  #warning {background-color: #FFCCCB}
478
  .tools_button {
 
 
 
 
479
  background: white;
480
  border: none !important;
481
  box-shadow: none !important;
 
 
482
  }
483
 
484
- .info_btn {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  background: white;
486
  border: none !important;
487
  box-shadow: none !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  }
489
 
490
  .function_button {
@@ -496,20 +536,20 @@ css = """
496
 
497
  """
498
  filtered_language_dict = {
499
- 'English': 'en-US-JennyNeural',
500
- 'Chinese': 'zh-CN-XiaoxiaoNeural',
501
- 'French': 'fr-FR-DeniseNeural',
502
- 'Spanish': 'es-MX-DaliaNeural',
503
- 'Arabic': 'ar-SA-ZariyahNeural',
504
- 'Portuguese': 'pt-BR-FranciscaNeural',
505
- 'Cantonese': 'zh-HK-HiuGaaiNeural'
506
  }
507
 
508
  focus_map = {
509
  "D":0,
510
  "DA":1,
511
  "DAI":2,
512
- "DDA":3
513
  }
514
 
515
  '''
@@ -528,11 +568,27 @@ prompt_list = [
528
  ]
529
  '''
530
  prompt_list = [
531
- 'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis)as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
532
- 'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
533
- 'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
534
- 'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and the objects that may be related to the selected object and list one fact of selected object, one fact of related object and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535
  ]
 
536
 
537
 
538
  gpt_state = 0
@@ -665,11 +721,11 @@ def init_openai_api_key(api_key=""):
665
  global gpt_state
666
  gpt_state=1
667
  # return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
668
- return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]
669
  else:
670
  gpt_state=0
671
  # return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
672
- return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]
673
 
674
  def init_wo_openai_api_key():
675
  global gpt_state
@@ -714,7 +770,7 @@ def update_click_state(click_state, caption, click_mode):
714
  raise NotImplementedError
715
 
716
  async def chat_input_callback(*args):
717
- visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay = args
718
  message = chat_input["text"]
719
  if visual_chatgpt is not None:
720
  state, _, aux_state, _ = visual_chatgpt.run_text(message, state, aux_state)
@@ -724,12 +780,12 @@ async def chat_input_callback(*args):
724
  return state, state, aux_state, None
725
 
726
  else:
727
- audio = await texttospeech(last_response,language,autoplay)
728
  return state, state, aux_state, audio
729
  else:
730
  response = "Text refiner is not initilzed, please input openai api key."
731
  state = state + [(chat_input, response)]
732
- audio = await texttospeech(response,language,autoplay)
733
  return state, state, None, audio
734
 
735
 
@@ -774,37 +830,63 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
774
  visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
775
  print("memory",visual_chatgpt.agent.memory)
776
  # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
777
- parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and painting style used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"style\": \"Painting style used in the painting\" }")
778
  parsed_data = json.loads(parsed_data.replace("'", "\""))
779
- name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["style"]
780
- # artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
 
 
781
 
782
- if narritive==None or narritive=="Third":
783
- state = [
784
- (
785
- None,
786
- f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
787
- )
788
- ]
789
- elif narritive=="Artist":
790
- state = [
791
- (
792
- None,
793
- f"🧑‍🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
794
- )
795
- ]
796
- elif narritive=="Item":
797
- state = [
798
- (
799
- None,
800
- f"🎨 Hello, I am the Item. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
801
- )
802
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
 
804
 
805
 
806
  return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
807
- original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist]
808
 
809
 
810
 
@@ -842,14 +924,23 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
842
 
843
  enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
844
  out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
845
- state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
 
 
 
 
 
846
 
847
- # state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], prompt["input_label"]), None)]
848
- update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
 
 
 
 
 
849
  text = out['generated_captions']['raw_caption']
850
  input_mask = np.array(out['mask'].convert('P'))
851
  image_input_nobackground = mask_painter(np.array(image_input), input_mask,background_alpha=0)
852
- image_input_withbackground=mask_painter(np.array(image_input), input_mask)
853
 
854
  click_index_state = click_index
855
  input_mask_state = input_mask
@@ -878,9 +969,9 @@ query_focus = {
878
  }
879
 
880
 
881
- async def submit_caption(state,length, sentiment, factuality, language,
882
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
883
- autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path):
884
 
885
 
886
  state = state + [(query_focus[focus_type], None)]
@@ -896,7 +987,7 @@ async def submit_caption(state,length, sentiment, factuality, language,
896
  print("input_points_state",input_points_state)
897
  print("input_labels_state",input_labels_state)
898
 
899
- prompt=generate_prompt(focus_type,paragraph,length,sentiment,factuality,language)
900
 
901
  print("Prompt:", prompt)
902
  print("click",click_index)
@@ -918,6 +1009,16 @@ async def submit_caption(state,length, sentiment, factuality, language,
918
  read_info = re.sub(r'[#[\]!*]','',focus_info)
919
  read_info = emoji.replace_emoji(read_info,replace="")
920
  print("read info",read_info)
 
 
 
 
 
 
 
 
 
 
921
 
922
  # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
923
  # input_points=input_points, input_labels=input_labels)
@@ -925,25 +1026,26 @@ async def submit_caption(state,length, sentiment, factuality, language,
925
  if autoplay==False:
926
  return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
927
 
928
- audio_output = await texttospeech(read_info, language, autoplay)
929
  print("done")
930
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
931
- return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
932
 
933
  except Exception as e:
934
  state = state + [(None, f"Error during TTS prediction: {str(e)}")]
935
  print(f"Error during TTS prediction: {str(e)}")
936
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
937
- return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
938
 
939
  else:
940
  state = state + [(None, f"Error during TTS prediction: {str(e)}")]
941
  print(f"Error during TTS prediction: {str(e)}")
942
- return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
943
 
944
 
945
 
946
- def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language):
 
947
 
948
  mapped_value = focus_map.get(focus_type, -1)
949
 
@@ -953,9 +1055,13 @@ def generate_prompt(focus_type, paragraph,length, sentiment, factuality, languag
953
  'factuality': factuality,
954
  'language': language
955
  }
 
 
 
 
956
 
957
  if mapped_value != -1:
958
- prompt = prompt_list[mapped_value].format(
959
  Wiki_caption=paragraph,
960
  length=controls['length'],
961
  sentiment=controls['sentiment'],
@@ -964,8 +1070,8 @@ def generate_prompt(focus_type, paragraph,length, sentiment, factuality, languag
964
  else:
965
  prompt = "Invalid focus type."
966
 
967
- if controls['factuality'] == "Imagination":
968
- prompt += " Assuming that I am someone who has viewed a lot of art and has a lot of experience viewing art. Explain artistic features (composition, color, style, or use of light) and discuss the symbolism of the content and its influence on later artistic movements."
969
 
970
  return prompt
971
 
@@ -1061,103 +1167,103 @@ def get_sketch_prompt(mask: Image.Image):
1061
 
1062
  submit_traj=0
1063
 
1064
- async def inference_traject(origin_image,sketcher_image, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
1065
- original_size, input_size, text_refiner,focus_type,paragraph,openai_api_key,autoplay,trace_type):
1066
- image_input, mask = sketcher_image['background'], sketcher_image['layers'][0]
1067
 
1068
- crop_save_path=""
1069
 
1070
- prompt = get_sketch_prompt(mask)
1071
- boxes = prompt['input_boxes']
1072
- boxes = boxes[0]
1073
-
1074
- controls = {'length': length,
1075
- 'sentiment': sentiment,
1076
- 'factuality': factuality,
1077
- 'language': language}
1078
-
1079
- model = build_caption_anything_with_models(
1080
- args,
1081
- api_key="",
1082
- captioner=shared_captioner,
1083
- sam_model=shared_sam_model,
1084
- ocr_reader=shared_ocr_reader,
1085
- text_refiner=text_refiner,
1086
- session_id=iface.app_id
1087
- )
1088
-
1089
- model.setup(image_embedding, original_size, input_size, is_image_set=True)
1090
-
1091
- enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
1092
- out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki,verbose=True)[0]
1093
 
1094
- print(trace_type)
1095
 
1096
- if trace_type=="Trace+Seg":
1097
- input_mask = np.array(out['mask'].convert('P'))
1098
- image_input = mask_painter(np.array(image_input), input_mask, background_alpha=0)
1099
- d3_input=mask_painter(np.array(image_input), input_mask)
1100
- crop_save_path=out['crop_save_path']
1101
 
1102
- else:
1103
- image_input = Image.fromarray(np.array(origin_image))
1104
- draw = ImageDraw.Draw(image_input)
1105
- draw.rectangle(boxes, outline='red', width=2)
1106
- d3_input=image_input
1107
- cropped_image = origin_image.crop(boxes)
1108
- cropped_image.save('temp.png')
1109
- crop_save_path='temp.png'
1110
 
1111
- print("crop_svae_path",out['crop_save_path'])
1112
 
1113
- # Update components and states
1114
- state.append((f'Box: {boxes}', None))
1115
 
1116
- # fake_click_index = (int((boxes[0][0] + boxes[0][2]) / 2), int((boxes[0][1] + boxes[0][3]) / 2))
1117
- # image_input = create_bubble_frame(image_input, "", fake_click_index, input_mask)
1118
 
1119
- prompt=generate_prompt(focus_type, paragraph, length, sentiment, factuality, language)
1120
 
1121
 
1122
- # if not args.disable_gpt and text_refiner:
1123
- if not args.disable_gpt:
1124
- focus_info=get_gpt_response(openai_api_key,crop_save_path,prompt)
1125
- if focus_info.startswith('"') and focus_info.endswith('"'):
1126
- focus_info=focus_info[1:-1]
1127
- focus_info=focus_info.replace('#', '')
1128
- state = state + [(None, f"{focus_info}")]
1129
- print("new_cap",focus_info)
1130
- read_info = re.sub(r'[#[\]!*]','',focus_info)
1131
- read_info = emoji.replace_emoji(read_info,replace="")
1132
- print("read info",read_info)
1133
-
1134
- # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
1135
- # input_points=input_points, input_labels=input_labels)
1136
- try:
1137
- audio_output = await texttospeech(read_info, language,autoplay)
1138
- # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1139
- return state, state,image_input,audio_output,crop_save_path,d3_input
 
 
 
 
 
 
 
1140
 
1141
 
1142
- except Exception as e:
1143
- state = state + [(None, f"Error during TTS prediction: {str(e)}")]
1144
- print(f"Error during TTS prediction: {str(e)}")
1145
- # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
1146
- return state, state, image_input,audio_output,crop_save_path
1147
-
1148
-
1149
- else:
1150
- try:
1151
- audio_output = await texttospeech(focus_info, language, autoplay)
1152
- # waveform_visual, audio_output = tts.predict(generated_caption, input_language, input_audio, input_mic, use_mic, agree)
1153
- # return state, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1154
- return state, state, image_input,audio_output
1155
 
1156
 
1157
- except Exception as e:
1158
- state = state + [(None, f"Error during TTS prediction: {str(e)}")]
1159
- print(f"Error during TTS prediction: {str(e)}")
1160
- return state, state, image_input,audio_output
1161
 
1162
 
1163
  def clear_chat_memory(visual_chatgpt, keep_global=False):
@@ -1172,7 +1278,7 @@ def clear_chat_memory(visual_chatgpt, keep_global=False):
1172
  visual_chatgpt.global_prompt = ""
1173
 
1174
 
1175
- def export_chat_log(chat_state, paragraph, liked, disliked):
1176
  try:
1177
  if not chat_state:
1178
  return None
@@ -1201,41 +1307,44 @@ def export_chat_log(chat_state, paragraph, liked, disliked):
1201
  temp_file.write(chat_log.encode('utf-8'))
1202
  temp_file_path = temp_file.name
1203
  print(temp_file_path)
1204
- return temp_file_path
 
1205
  except Exception as e:
1206
  print(f"An error occurred while exporting the chat log: {e}")
1207
  return None
1208
 
1209
  async def get_artistinfo(artist_name,api_key,state,language,autoplay,length):
1210
- prompt=f"Provide a concise summary of about {length} words in {language} on the painter {artist_name}, covering his biography, major works, artistic style, significant contributions to the art world, and any major awards or recognitions he has received."
1211
  res=get_gpt_response(api_key,None,prompt)
1212
- state = state + [(None, f"Artist Background:{res}")]
1213
  read_info = re.sub(r'[#[\]!*]','',res)
1214
  read_info = emoji.replace_emoji(read_info,replace="")
1215
 
1216
 
1217
  # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
1218
  # input_points=input_points, input_labels=input_labels)
1219
-
1220
- audio_output = await texttospeech(read_info, language,autoplay)
1221
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1222
- return state, state,audio_output
 
1223
 
1224
 
1225
  async def get_yearinfo(year,api_key,state,language,autoplay,length):
1226
- prompt = f"Provide a concise summary of about {length} words in {language} on the art historical period associated with the year {year}, covering its major characteristics, influential artists, notable works, and its significance in the broader context of art history."
1227
  res=get_gpt_response(api_key,None,prompt)
1228
- state = state + [(None, f"History Background: {res}")]
1229
  read_info = re.sub(r'[#[\]!*]','',res)
1230
  read_info = emoji.replace_emoji(read_info,replace="")
1231
 
1232
 
1233
  # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
1234
  # input_points=input_points, input_labels=input_labels)
1235
-
1236
- audio_output = await texttospeech(read_info, language,autoplay)
1237
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1238
- return state, state,audio_output
 
1239
 
1240
 
1241
 
@@ -1364,10 +1473,10 @@ def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragr
1364
 
1365
  # return like_state, dislike_state
1366
 
1367
- async def texttospeech(text, language, autoplay):
1368
  try:
1369
  if autoplay:
1370
- voice = filtered_language_dict[language]
1371
  communicate = edge_tts.Communicate(text, voice)
1372
  file_path = "output.wav"
1373
  await communicate.save(file_path)
@@ -1385,6 +1494,35 @@ async def texttospeech(text, language, autoplay):
1385
  print(f"Error in texttospeech: {e}")
1386
  return None
1387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1388
  def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
1389
  print(x.index, x.value, x.liked)
1390
  if x.liked == True:
@@ -1398,14 +1536,15 @@ def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
1398
  return like_res,dislike_res,state
1399
 
1400
 
 
1401
  def toggle_icons_and_update_prompt(point_prompt):
1402
  new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
1403
  new_add_icon = "assets/icons/plus-square-blue.png" if new_prompt == "Positive" else "assets/icons/plus-square.png"
 
1404
  new_minus_icon = "assets/icons/minus-square.png" if new_prompt == "Positive" else "assets/icons/minus-square-blue.png"
1405
- print(point_prompt,flush=True)
1406
- print(new_prompt,flush=True)
1407
-
1408
- return new_prompt, gr.update(icon=new_add_icon), gr.update(icon=new_minus_icon)
1409
 
1410
  add_icon_path="assets/icons/plus-square-blue.png"
1411
  minus_icon_path="assets/icons/minus-square.png"
@@ -1420,12 +1559,11 @@ def create_ui():
1420
 
1421
  examples = [
1422
  ["test_images/ambass.jpg"],
1423
- ["test_images/pearl.jpg"],
1424
- ["test_images/Picture0.png"],
1425
- ["test_images/Picture1.png"],
1426
- ["test_images/Picture2.png"],
1427
- ["test_images/Picture3.png"],
1428
- ["test_images/Picture4.png"],
1429
  ["test_images/Picture5.png"],
1430
 
1431
  ]
@@ -1457,6 +1595,9 @@ def create_ui():
1457
  gr.Markdown(title)
1458
  gr.Markdown(description)
1459
  point_prompt = gr.State("Positive")
 
 
 
1460
  # with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
1461
  # with gr.Column(scale=0.5):
1462
  # # gr.Markdown("Left side content")
@@ -1479,8 +1620,8 @@ def create_ui():
1479
  value="English", label="Language", interactive=True, elem_classes="custom-language"
1480
  )
1481
  length = gr.Slider(
1482
- minimum=40,
1483
- maximum=200,
1484
  value=80,
1485
  step=1,
1486
  interactive=True,
@@ -1507,45 +1648,47 @@ def create_ui():
1507
  with gr.Column(scale=6):
1508
  with gr.Column(visible=False) as modules_not_need_gpt:
1509
  with gr.Tab("Base(GPT Power)") as base_tab:
1510
- image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
1511
  with gr.Row():
1512
  name_label_base = gr.Button(value="Name: ",elem_classes="info_btn")
1513
- artist_label_base = gr.Button(value="Artist: ",elem_classes="info_btn")
1514
- year_label_base = gr.Button(value="Year: ",elem_classes="info_btn")
1515
  material_label_base = gr.Button(value="Style: ",elem_classes="info_btn")
1516
 
1517
  with gr.Tab("Base2") as base_tab2:
1518
- image_input_base_2 = gr.Image(type="pil", interactive=True, elem_id="image_upload")
1519
  with gr.Row():
1520
  name_label_base2 = gr.Button(value="Name: ",elem_classes="info_btn")
1521
- artist_label_base2 = gr.Button(value="Artist: ",elem_classes="info_btn")
1522
- year_label_base2 = gr.Button(value="Year: ",elem_classes="info_btn")
1523
  material_label_base2 = gr.Button(value="Style: ",elem_classes="info_btn")
1524
 
1525
  with gr.Tab("Click") as click_tab:
1526
  with gr.Row():
1527
  with gr.Column(scale=10,min_width=600):
1528
- image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
1529
  example_image = gr.Image(type="pil", interactive=False, visible=False)
1530
  with gr.Row():
1531
  name_label = gr.Button(value="Name: ",elem_classes="info_btn")
1532
- artist_label = gr.Button(value="Artist: ",elem_classes="info_btn")
1533
- year_label = gr.Button(value="Year: ",elem_classes="info_btn")
1534
  material_label = gr.Button(value="Style: ",elem_classes="info_btn")
1535
 
1536
 
1537
  # example_image_click = gr.Image(type="pil", interactive=False, visible=False)
1538
  # the tool column
1539
  with gr.Column(scale=1,elem_id="tool_box",min_width=80):
1540
- add_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=add_icon_path)
1541
- minus_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=minus_icon_path)
1542
  clear_button_click = gr.Button(value="Reset", interactive=True,elem_classes="tools_button")
1543
  clear_button_image = gr.Button(value="Change", interactive=True,elem_classes="tools_button")
1544
- focus_d = gr.Button(value="D",interactive=True,elem_classes="function_button")
1545
- focus_da = gr.Button(value="DA",interactive=True,elem_classes="function_button")
1546
- focus_dai = gr.Button(value="DAI",interactive=True,elem_classes="function_button")
1547
- focus_dda = gr.Button(value="DDA",interactive=True,elem_classes="function_button")
1548
- recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button")
 
 
1549
 
1550
  with gr.Row(visible=False):
1551
  with gr.Column():
@@ -1608,7 +1751,22 @@ def create_ui():
1608
  value="No",
1609
  label="Expert",
1610
  interactive=True)
1611
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1612
 
1613
  with gr.Column(visible=True) as modules_not_need_gpt3:
1614
  gr.Examples(
@@ -1671,7 +1829,7 @@ def create_ui():
1671
  ############# this part is for text to image #############
1672
  ###############################################################################
1673
 
1674
- with gr.Row(variant="panel") as text2image_model:
1675
 
1676
  with gr.Column():
1677
  with gr.Column():
@@ -1719,7 +1877,7 @@ def create_ui():
1719
  value=0,
1720
  )
1721
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
1722
- with gr.Row(visible=True):
1723
  width = gr.Slider(
1724
  label="Width",
1725
  minimum=100,
@@ -1749,21 +1907,21 @@ def create_ui():
1749
  step=1,
1750
  value=8,
1751
  )
1752
- with gr.Column():
1753
- result = gr.Gallery(
1754
- label="Result",
1755
- height="auto",
1756
- columns=4
1757
- # columns=4,
1758
- # rows=2,
1759
- # show_label=False,
1760
- # allow_preview=True,
1761
- # object_fit="contain",
1762
- # height="auto",
1763
- # preview=True,
1764
- # show_share_button=True,
1765
- # show_download_button=True
1766
- )
1767
 
1768
  with gr.Row():
1769
  naritive = gr.Radio(
@@ -1814,8 +1972,16 @@ def create_ui():
1814
  recommend_btn.click(
1815
  fn=infer,
1816
  inputs=[new_crop_save_path],
1817
- outputs=[result]
1818
  )
 
 
 
 
 
 
 
 
1819
 
1820
  ###############################################################################
1821
  ############# above part is for text to image #############
@@ -1966,11 +2132,11 @@ def create_ui():
1966
 
1967
  openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
1968
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
1969
- modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
1970
  enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
1971
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
1972
  modules_not_need_gpt,
1973
- modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
1974
  # openai_api_key.submit(init_openai_api_key,
1975
  # outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
1976
  # modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
@@ -2077,7 +2243,7 @@ def create_ui():
2077
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2078
  image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2079
  name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2080
- paragraph,artist])
2081
 
2082
  # image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
2083
  # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
@@ -2103,7 +2269,7 @@ def create_ui():
2103
  # sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
2104
  # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
2105
  # image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph,artist])
2106
- chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
2107
  [chatbot, state, aux_state,output_audio])
2108
  # chat_input.submit(lambda: "", None, chat_input)
2109
  chat_input.submit(lambda: {"text": ""}, None, chat_input)
@@ -2114,7 +2280,7 @@ def create_ui():
2114
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2115
  image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2116
  name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2117
- paragraph,artist])
2118
 
2119
  example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
2120
 
@@ -2161,11 +2327,11 @@ def create_ui():
2161
  focus_d.click(
2162
  submit_caption,
2163
  inputs=[
2164
- state,length, sentiment, factuality, language,
2165
- out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path
2166
  ],
2167
  outputs=[
2168
- chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
2169
  ],
2170
  show_progress=True,
2171
  queue=True
@@ -2178,7 +2344,7 @@ def create_ui():
2178
  focus_da.click(
2179
  submit_caption,
2180
  inputs=[
2181
- state,length, sentiment, factuality, language,
2182
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,auto_play, paragraph,focus_da,openai_api_key,new_crop_save_path
2183
  ],
2184
  outputs=[
@@ -2192,7 +2358,7 @@ def create_ui():
2192
  focus_dai.click(
2193
  submit_caption,
2194
  inputs=[
2195
- state,length, sentiment, factuality, language,
2196
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2197
  auto_play, paragraph,focus_dai,openai_api_key,new_crop_save_path
2198
  ],
@@ -2207,7 +2373,7 @@ def create_ui():
2207
  focus_dda.click(
2208
  submit_caption,
2209
  inputs=[
2210
- state,length, sentiment, factuality, language,
2211
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2212
  auto_play, paragraph,focus_dda,openai_api_key,new_crop_save_path
2213
  ],
@@ -2252,11 +2418,20 @@ def create_ui():
2252
 
2253
  export_button.click(
2254
  export_chat_log,
2255
- inputs=[state,paragraph,like_res,dislike_res],
2256
- outputs=[chat_log_file],
2257
  queue=True
2258
  )
2259
 
 
 
 
 
 
 
 
 
 
2260
  # upvote_btn.click(
2261
  # handle_liked,
2262
  # inputs=[state,like_res],
@@ -2281,4 +2456,4 @@ if __name__ == '__main__':
2281
  iface = create_ui()
2282
  iface.queue(api_open=False, max_size=10)
2283
  # iface.queue(concurrency_count=5, api_open=False, max_size=10)
2284
- iface.launch(server_name="0.0.0.0",show_error=True)
 
476
  css = """
477
  #warning {background-color: #FFCCCB}
478
  .tools_button {
479
+ display: flex;
480
+ flex-direction: column;
481
+ align-items: center;
482
+ justify-content: center;
483
  background: white;
484
  border: none !important;
485
  box-shadow: none !important;
486
+ text-align: center;
487
+ color: black;
488
  }
489
 
490
+ .tools_button_clicked {
491
+ display: flex;
492
+ flex-direction: column;
493
+ align-items: center;
494
+ justify-content: center;
495
+ background: white;
496
+ border: none !important;
497
+ box-shadow: none !important;
498
+ text-align: center;
499
+ color: rgb(18,150,219);
500
+ }
501
+
502
+ .tools_button_add {
503
+ display: flex;
504
+ flex-direction: column;
505
+ align-items: center;
506
+ justify-content: center;
507
  background: white;
508
  border: none !important;
509
  box-shadow: none !important;
510
+ text-align: center;
511
+ color: rgb(18,150,219);
512
+ }
513
+
514
+
515
+ .image_upload {
516
+ height: 650px;
517
+ }
518
+
519
+ .info_btn {
520
+ background: white !important;
521
+ border: none !important;
522
+ box-shadow: none !important;
523
+ }
524
+
525
+ info_btn_interact {
526
+ background: white !important;
527
+ box-shadow: none !important;
528
  }
529
 
530
  .function_button {
 
536
 
537
  """
538
  filtered_language_dict = {
539
+ 'English': {'female': 'en-US-JennyNeural', 'male': 'en-US-GuyNeural'},
540
+ 'Chinese': {'female': 'zh-CN-XiaoxiaoNeural', 'male': 'zh-CN-YunxiNeural'},
541
+ 'French': {'female': 'fr-FR-DeniseNeural', 'male': 'fr-FR-HenriNeural'},
542
+ 'Spanish': {'female': 'es-MX-DaliaNeural', 'male': 'es-MX-JorgeNeural'},
543
+ 'Arabic': {'female': 'ar-SA-ZariyahNeural', 'male': 'ar-SA-HamedNeural'},
544
+ 'Portuguese': {'female': 'pt-BR-FranciscaNeural', 'male': 'pt-BR-AntonioNeural'},
545
+ 'Cantonese': {'female': 'zh-HK-HiuGaaiNeural', 'male': 'zh-HK-WanLungNeural'}
546
  }
547
 
548
  focus_map = {
549
  "D":0,
550
  "DA":1,
551
  "DAI":2,
552
+ "Judge":3
553
  }
554
 
555
  '''
 
568
  ]
569
  '''
570
  prompt_list = [
571
+ [
572
+
573
+ 'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
574
+ 'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
575
+ 'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
576
+ 'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
577
+ ],
578
+ [
579
+ 'When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
580
+ 'When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
581
+ 'When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
582
+ 'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
583
+ ],
584
+ [
585
+ 'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
586
+ 'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
587
+ 'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
588
+ 'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
589
+ ]
590
  ]
591
+
592
 
593
 
594
  gpt_state = 0
 
721
  global gpt_state
722
  gpt_state=1
723
  # return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
724
+ return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*2
725
  else:
726
  gpt_state=0
727
  # return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
728
+ return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*2
729
 
730
  def init_wo_openai_api_key():
731
  global gpt_state
 
770
  raise NotImplementedError
771
 
772
  async def chat_input_callback(*args):
773
+ visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay,gender = args
774
  message = chat_input["text"]
775
  if visual_chatgpt is not None:
776
  state, _, aux_state, _ = visual_chatgpt.run_text(message, state, aux_state)
 
780
  return state, state, aux_state, None
781
 
782
  else:
783
+ audio = await texttospeech(last_response,language,autoplay,gender)
784
  return state, state, aux_state, audio
785
  else:
786
  response = "Text refiner is not initilzed, please input openai api key."
787
  state = state + [(chat_input, response)]
788
+ audio = await texttospeech(response,language,autoplay,gender)
789
  return state, state, None, audio
790
 
791
 
 
830
  visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
831
  print("memory",visual_chatgpt.agent.memory)
832
  # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
833
+ parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and painting style used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"style\": \"Painting style used in the painting\",\"gender\": \"The gender of the author\" }")
834
  parsed_data = json.loads(parsed_data.replace("'", "\""))
835
+ name, artist, year, material,gender= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["style"],parsed_data['gender']
836
+ gender=gender.lower()
837
+ print("gender",gender)
838
+
839
 
840
+ if language=="English":
841
+ if narritive=="Third" :
842
+ state = [
843
+ (
844
+ None,
845
+ f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
846
+ )
847
+ ]
848
+ elif narritive=="Artist":
849
+ state = [
850
+ (
851
+ None,
852
+ f"🧑‍🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
853
+ )
854
+ ]
855
+ elif narritive=="Item":
856
+ state = [
857
+ (
858
+ None,
859
+ f"🎨 Hello, Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with relevant insights and thoughts from the perspective of the objects within the painting"
860
+ )
861
+ ]
862
+ elif language=="Chinese":
863
+ if narritive == "Third":
864
+ state = [
865
+ (
866
+ None,
867
+ f"🤖 你好,我是 EyeSee。让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供相关的信息。"
868
+ )
869
+ ]
870
+ elif narritive == "Artist":
871
+ state = [
872
+ (
873
+ None,
874
+ f"🧑‍🎨 你好,我是{artist}。欢迎探索我的画作《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供我的创作背后的相关见解和想法。"
875
+ )
876
+ ]
877
+ elif narritive == "Item":
878
+ state = [
879
+ (
880
+ None,
881
+ f"🎨 你好,让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会从画面上事物的视角为你提供相关的见解和想法。"
882
+ )
883
+ ]
884
+
885
 
886
 
887
 
888
  return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
889
+ original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist, gender]
890
 
891
 
892
 
 
924
 
925
  enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
926
  out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
927
+ # state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
928
+
929
+ # state = state + [("Selected image point: {}, Input label: {}".format(
930
+ # prompt["input_point"],
931
+ # '+' if prompt["input_label"] == "1" else '-'
932
+ # ), None)]
933
 
934
+ output_label = ['+' if label == 1 else '-' for label in prompt["input_label"]]
935
+
936
+ state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], output_label), None)]
937
+
938
+
939
+
940
+ # update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
941
  text = out['generated_captions']['raw_caption']
942
  input_mask = np.array(out['mask'].convert('P'))
943
  image_input_nobackground = mask_painter(np.array(image_input), input_mask,background_alpha=0)
 
944
 
945
  click_index_state = click_index
946
  input_mask_state = input_mask
 
969
  }
970
 
971
 
972
+ async def submit_caption(naritive, state,length, sentiment, factuality, language,
973
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
974
+ autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path, gender):
975
 
976
 
977
  state = state + [(query_focus[focus_type], None)]
 
987
  print("input_points_state",input_points_state)
988
  print("input_labels_state",input_labels_state)
989
 
990
+ prompt=generate_prompt(focus_type,paragraph,length,sentiment,factuality,language, naritive)
991
 
992
  print("Prompt:", prompt)
993
  print("click",click_index)
 
1009
  read_info = re.sub(r'[#[\]!*]','',focus_info)
1010
  read_info = emoji.replace_emoji(read_info,replace="")
1011
  print("read info",read_info)
1012
+ if naritive=="Item":
1013
+ parsed_data = get_gpt_response(openai_api_key, new_crop_save_path,prompt = f"Based on the information {focus_info}, return the gender of this item, returns its most likely gender, do not return unknown, in the format {{\"gender\": \"<gender>\"}}")
1014
+ parsed_data = json.loads(parsed_data)
1015
+
1016
+ try:
1017
+ gender=parsed_data['gender']
1018
+ gender=gender.lower()
1019
+ except:
1020
+ print("error gpt responese")
1021
+ print("item gender",gender)
1022
 
1023
  # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
1024
  # input_points=input_points, input_labels=input_labels)
 
1026
  if autoplay==False:
1027
  return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
1028
 
1029
+ audio_output = await texttospeech(read_info, language, autoplay,gender)
1030
  print("done")
1031
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1032
+ return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output,gender,focus_info
1033
 
1034
  except Exception as e:
1035
  state = state + [(None, f"Error during TTS prediction: {str(e)}")]
1036
  print(f"Error during TTS prediction: {str(e)}")
1037
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
1038
+ return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output,gender,focus_info
1039
 
1040
  else:
1041
  state = state + [(None, f"Error during TTS prediction: {str(e)}")]
1042
  print(f"Error during TTS prediction: {str(e)}")
1043
+ return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,None,focus_info
1044
 
1045
 
1046
 
1047
+
1048
+ def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language,naritive):
1049
 
1050
  mapped_value = focus_map.get(focus_type, -1)
1051
 
 
1055
  'factuality': factuality,
1056
  'language': language
1057
  }
1058
+
1059
+ naritive_mapping = {"Third": 0, "Artist": 1, "Item": 2}
1060
+
1061
+ naritive_value=naritive_mapping[naritive]
1062
 
1063
  if mapped_value != -1:
1064
+ prompt = prompt_list[naritive_value][mapped_value].format(
1065
  Wiki_caption=paragraph,
1066
  length=controls['length'],
1067
  sentiment=controls['sentiment'],
 
1070
  else:
1071
  prompt = "Invalid focus type."
1072
 
1073
+ # if controls['factuality'] == "Imagination":
1074
+ # prompt += " Assuming that I am someone who has viewed a lot of art and has a lot of experience viewing art. Explain artistic features (composition, color, style, or use of light) and discuss the symbolism of the content and its influence on later artistic movements."
1075
 
1076
  return prompt
1077
 
 
1167
 
1168
  submit_traj=0
1169
 
1170
+ # async def inference_traject(naritive, origin_image,sketcher_image, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
1171
+ # original_size, input_size, text_refiner,focus_type,paragraph,openai_api_key,autoplay,trace_type):
1172
+ # image_input, mask = sketcher_image['background'], sketcher_image['layers'][0]
1173
 
1174
+ # crop_save_path=""
1175
 
1176
+ # prompt = get_sketch_prompt(mask)
1177
+ # boxes = prompt['input_boxes']
1178
+ # boxes = boxes[0]
1179
+
1180
+ # controls = {'length': length,
1181
+ # 'sentiment': sentiment,
1182
+ # 'factuality': factuality,
1183
+ # 'language': language}
1184
+
1185
+ # model = build_caption_anything_with_models(
1186
+ # args,
1187
+ # api_key="",
1188
+ # captioner=shared_captioner,
1189
+ # sam_model=shared_sam_model,
1190
+ # ocr_reader=shared_ocr_reader,
1191
+ # text_refiner=text_refiner,
1192
+ # session_id=iface.app_id
1193
+ # )
1194
+
1195
+ # model.setup(image_embedding, original_size, input_size, is_image_set=True)
1196
+
1197
+ # enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
1198
+ # out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki,verbose=True)[0]
1199
 
1200
+ # print(trace_type)
1201
 
1202
+ # if trace_type=="Trace+Seg":
1203
+ # input_mask = np.array(out['mask'].convert('P'))
1204
+ # image_input = mask_painter(np.array(image_input), input_mask, background_alpha=0)
1205
+ # d3_input=mask_painter(np.array(image_input), input_mask)
1206
+ # crop_save_path=out['crop_save_path']
1207
 
1208
+ # else:
1209
+ # image_input = Image.fromarray(np.array(origin_image))
1210
+ # draw = ImageDraw.Draw(image_input)
1211
+ # draw.rectangle(boxes, outline='red', width=2)
1212
+ # d3_input=image_input
1213
+ # cropped_image = origin_image.crop(boxes)
1214
+ # cropped_image.save('temp.png')
1215
+ # crop_save_path='temp.png'
1216
 
1217
+ # print("crop_svae_path",out['crop_save_path'])
1218
 
1219
+ # # Update components and states
1220
+ # state.append((f'Box: {boxes}', None))
1221
 
1222
+ # # fake_click_index = (int((boxes[0][0] + boxes[0][2]) / 2), int((boxes[0][1] + boxes[0][3]) / 2))
1223
+ # # image_input = create_bubble_frame(image_input, "", fake_click_index, input_mask)
1224
 
1225
+ # prompt=generate_prompt(focus_type, paragraph, length, sentiment, factuality, language,naritive)
1226
 
1227
 
1228
+ # # if not args.disable_gpt and text_refiner:
1229
+ # if not args.disable_gpt:
1230
+ # focus_info=get_gpt_response(openai_api_key,crop_save_path,prompt)
1231
+ # if focus_info.startswith('"') and focus_info.endswith('"'):
1232
+ # focus_info=focus_info[1:-1]
1233
+ # focus_info=focus_info.replace('#', '')
1234
+ # state = state + [(None, f"{focus_info}")]
1235
+ # print("new_cap",focus_info)
1236
+ # read_info = re.sub(r'[#[\]!*]','',focus_info)
1237
+ # read_info = emoji.replace_emoji(read_info,replace="")
1238
+ # print("read info",read_info)
1239
+
1240
+ # # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
1241
+ # # input_points=input_points, input_labels=input_labels)
1242
+ # try:
1243
+ # audio_output = await texttospeech(read_info, language,autoplay,gender)
1244
+ # # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1245
+ # return state, state,image_input,audio_output,crop_save_path,d3_input
1246
+
1247
+
1248
+ # except Exception as e:
1249
+ # state = state + [(None, f"Error during TTS prediction: {str(e)}")]
1250
+ # print(f"Error during TTS prediction: {str(e)}")
1251
+ # # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
1252
+ # return state, state, image_input,audio_output,crop_save_path
1253
 
1254
 
1255
+ # else:
1256
+ # try:
1257
+ # audio_output = await texttospeech(focus_info, language, autoplay)
1258
+ # # waveform_visual, audio_output = tts.predict(generated_caption, input_language, input_audio, input_mic, use_mic, agree)
1259
+ # # return state, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1260
+ # return state, state, image_input,audio_output
 
 
 
 
 
 
 
1261
 
1262
 
1263
+ # except Exception as e:
1264
+ # state = state + [(None, f"Error during TTS prediction: {str(e)}")]
1265
+ # print(f"Error during TTS prediction: {str(e)}")
1266
+ # return state, state, image_input,audio_output
1267
 
1268
 
1269
  def clear_chat_memory(visual_chatgpt, keep_global=False):
 
1278
  visual_chatgpt.global_prompt = ""
1279
 
1280
 
1281
+ def export_chat_log(chat_state, paragraph, liked, disliked,log_list):
1282
  try:
1283
  if not chat_state:
1284
  return None
 
1307
  temp_file.write(chat_log.encode('utf-8'))
1308
  temp_file_path = temp_file.name
1309
  print(temp_file_path)
1310
+ log_list.append(temp_file_path)
1311
+ return log_list,log_list
1312
  except Exception as e:
1313
  print(f"An error occurred while exporting the chat log: {e}")
1314
  return None
1315
 
1316
  async def get_artistinfo(artist_name,api_key,state,language,autoplay,length):
1317
+ prompt = f"Provide a concise summary of about {length} words in {language} on the painter {artist_name}, covering his biography, major works, artistic style, significant contributions to the art world, and any major awards or recognitions he has received. Start your response with 'Artist Background: '."
1318
  res=get_gpt_response(api_key,None,prompt)
1319
+ state = state + [(None, res)]
1320
  read_info = re.sub(r'[#[\]!*]','',res)
1321
  read_info = emoji.replace_emoji(read_info,replace="")
1322
 
1323
 
1324
  # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
1325
  # input_points=input_points, input_labels=input_labels)
1326
+ if autoplay:
1327
+ audio_output = await texttospeech(read_info, language,autoplay)
1328
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1329
+ return state, state,audio_output
1330
+ return state, state,None
1331
 
1332
 
1333
  async def get_yearinfo(year,api_key,state,language,autoplay,length):
1334
+ prompt = f"Provide a concise summary of about {length} words in {language} on the art historical period associated with the year {year}, covering its major characteristics, influential artists, notable works, and its significance in the broader context of art history with 'History Background: '."
1335
  res=get_gpt_response(api_key,None,prompt)
1336
+ state = state + [(None, res)]
1337
  read_info = re.sub(r'[#[\]!*]','',res)
1338
  read_info = emoji.replace_emoji(read_info,replace="")
1339
 
1340
 
1341
  # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
1342
  # input_points=input_points, input_labels=input_labels)
1343
+ if autoplay:
1344
+ audio_output = await texttospeech(read_info, language,autoplay)
1345
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
1346
+ return state, state,audio_output
1347
+ return state, state,None
1348
 
1349
 
1350
 
 
1473
 
1474
  # return like_state, dislike_state
1475
 
1476
+ async def texttospeech(text, language, autoplay,gender='female'):
1477
  try:
1478
  if autoplay:
1479
+ voice = filtered_language_dict[language][gender]
1480
  communicate = edge_tts.Communicate(text, voice)
1481
  file_path = "output.wav"
1482
  await communicate.save(file_path)
 
1494
  print(f"Error in texttospeech: {e}")
1495
  return None
1496
 
1497
+ async def associate(focus_info,openai_api_key,language,state,autoplay,evt: gr.SelectData):
1498
+ rec_path=evt._data['value']['image']['path']
1499
+ print("rec_path",rec_path)
1500
+ prompt="""
1501
+ The information and image I gave you are 2 different paintings. Please analyze the relationship between the image and the information {focus_info}. Discuss their similarities and differences in terms of style, themes, colors, and any other relevant aspects. Provide a detailed analysis that highlights how the information fits into or contrasts with the recommended painting. Consider the following points in your analysis:
1502
+ - Artistic style and techniques
1503
+ - Themes and subjects
1504
+ - Color palettes and compositions
1505
+ - Historical and cultural contexts
1506
+ - Symbolism and meanings
1507
+
1508
+ Based on your analysis, provide insights into how the information enhances or contrasts with the recommended painting, and suggest any interesting interpretations or observations. Return your response in {language}
1509
+
1510
+ """
1511
+ prompt=prompt.format(focus_info=focus_info,language=language)
1512
+ result=get_gpt_response(openai_api_key, rec_path, prompt)
1513
+ state = state + [(None, f"{result}")]
1514
+ read_info = re.sub(r'[#[\]!*]','',result)
1515
+ read_info = emoji.replace_emoji(result,replace="")
1516
+ if autoplay:
1517
+ audio_output = await texttospeech(read_info, language, autoplay)
1518
+ return state,state,audio_output
1519
+ return state,state,None
1520
+
1521
+
1522
+
1523
+
1524
+
1525
+
1526
  def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
1527
  print(x.index, x.value, x.liked)
1528
  if x.liked == True:
 
1536
  return like_res,dislike_res,state
1537
 
1538
 
1539
+
1540
  def toggle_icons_and_update_prompt(point_prompt):
1541
  new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
1542
  new_add_icon = "assets/icons/plus-square-blue.png" if new_prompt == "Positive" else "assets/icons/plus-square.png"
1543
+ new_add_css = "tools_button_clicked" if new_prompt == "Positive" else "tools_button"
1544
  new_minus_icon = "assets/icons/minus-square.png" if new_prompt == "Positive" else "assets/icons/minus-square-blue.png"
1545
+ new_minus_css= "tools_button" if new_prompt == "Positive" else "tools_button_clicked"
1546
+
1547
+ return new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
 
1548
 
1549
  add_icon_path="assets/icons/plus-square-blue.png"
1550
  minus_icon_path="assets/icons/minus-square.png"
 
1559
 
1560
  examples = [
1561
  ["test_images/ambass.jpg"],
1562
+ ["test_images/test1.png"],
1563
+ ["test_images/test2.png"],
1564
+ ["test_images/test3.png"],
1565
+ ["test_images/test4.png"],
1566
+ ["test_images/test5.png"],
 
1567
  ["test_images/Picture5.png"],
1568
 
1569
  ]
 
1595
  gr.Markdown(title)
1596
  gr.Markdown(description)
1597
  point_prompt = gr.State("Positive")
1598
+ log_list=gr.State([])
1599
+ gender=gr.State('female')
1600
+ focus_info=gr.State('')
1601
  # with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
1602
  # with gr.Column(scale=0.5):
1603
  # # gr.Markdown("Left side content")
 
1620
  value="English", label="Language", interactive=True, elem_classes="custom-language"
1621
  )
1622
  length = gr.Slider(
1623
+ minimum=60,
1624
+ maximum=120,
1625
  value=80,
1626
  step=1,
1627
  interactive=True,
 
1648
  with gr.Column(scale=6):
1649
  with gr.Column(visible=False) as modules_not_need_gpt:
1650
  with gr.Tab("Base(GPT Power)") as base_tab:
1651
+ image_input_base = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
1652
  with gr.Row():
1653
  name_label_base = gr.Button(value="Name: ",elem_classes="info_btn")
1654
+ artist_label_base = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
1655
+ year_label_base = gr.Button(value="Year: ",elem_classes="info_btn_interact")
1656
  material_label_base = gr.Button(value="Style: ",elem_classes="info_btn")
1657
 
1658
  with gr.Tab("Base2") as base_tab2:
1659
+ image_input_base_2 = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
1660
  with gr.Row():
1661
  name_label_base2 = gr.Button(value="Name: ",elem_classes="info_btn")
1662
+ artist_label_base2 = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
1663
+ year_label_base2 = gr.Button(value="Year: ",elem_classes="info_btn_interact")
1664
  material_label_base2 = gr.Button(value="Style: ",elem_classes="info_btn")
1665
 
1666
  with gr.Tab("Click") as click_tab:
1667
  with gr.Row():
1668
  with gr.Column(scale=10,min_width=600):
1669
+ image_input = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
1670
  example_image = gr.Image(type="pil", interactive=False, visible=False)
1671
  with gr.Row():
1672
  name_label = gr.Button(value="Name: ",elem_classes="info_btn")
1673
+ artist_label = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
1674
+ year_label = gr.Button(value="Year: ",elem_classes="info_btn_interact")
1675
  material_label = gr.Button(value="Style: ",elem_classes="info_btn")
1676
 
1677
 
1678
  # example_image_click = gr.Image(type="pil", interactive=False, visible=False)
1679
  # the tool column
1680
  with gr.Column(scale=1,elem_id="tool_box",min_width=80):
1681
+ add_button = gr.Button(value="Extend Area", interactive=True,elem_classes="tools_button_add",icon=add_icon_path)
1682
+ minus_button = gr.Button(value="Remove Area", interactive=True,elem_classes="tools_button",icon=minus_icon_path)
1683
  clear_button_click = gr.Button(value="Reset", interactive=True,elem_classes="tools_button")
1684
  clear_button_image = gr.Button(value="Change", interactive=True,elem_classes="tools_button")
1685
+ focus_d = gr.Button(value="D",interactive=True,elem_classes="function_button",variant="primary")
1686
+ focus_da = gr.Button(value="DA",interactive=True,elem_classes="function_button",variant="primary")
1687
+ focus_dai = gr.Button(value="DAI",interactive=True,elem_classes="function_button",variant="primary")
1688
+ focus_dda = gr.Button(value="Judge",interactive=True,elem_classes="function_button",variant="primary")
1689
+
1690
+ recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button",variant="primary")
1691
+ # focus_asso = gr.Button(value="Associate",interactive=True,elem_classes="function_button",variant="primary")
1692
 
1693
  with gr.Row(visible=False):
1694
  with gr.Column():
 
1751
  value="No",
1752
  label="Expert",
1753
  interactive=True)
1754
+
1755
+ with gr.Column(visible=False) as recommend:
1756
+ gallery_result = gr.Gallery(
1757
+ label="Result",
1758
+ height="auto",
1759
+ columns=4
1760
+ # columns=4,
1761
+ # rows=2,
1762
+ # show_label=False,
1763
+ # allow_preview=True,
1764
+ # object_fit="contain",
1765
+ # height="auto",
1766
+ # preview=True,
1767
+ # show_share_button=True,
1768
+ # show_download_button=True
1769
+ )
1770
 
1771
  with gr.Column(visible=True) as modules_not_need_gpt3:
1772
  gr.Examples(
 
1829
  ############# this part is for text to image #############
1830
  ###############################################################################
1831
 
1832
+ with gr.Row(variant="panel",visible=False) as text2image_model:
1833
 
1834
  with gr.Column():
1835
  with gr.Column():
 
1877
  value=0,
1878
  )
1879
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
1880
+ with gr.Row():
1881
  width = gr.Slider(
1882
  label="Width",
1883
  minimum=100,
 
1907
  step=1,
1908
  value=8,
1909
  )
1910
+ # with gr.Column():
1911
+ # result = gr.Gallery(
1912
+ # label="Result",
1913
+ # height="auto",
1914
+ # columns=4
1915
+ # # columns=4,
1916
+ # # rows=2,
1917
+ # # show_label=False,
1918
+ # # allow_preview=True,
1919
+ # # object_fit="contain",
1920
+ # # height="auto",
1921
+ # # preview=True,
1922
+ # # show_share_button=True,
1923
+ # # show_download_button=True
1924
+ # )
1925
 
1926
  with gr.Row():
1927
  naritive = gr.Radio(
 
1972
  recommend_btn.click(
1973
  fn=infer,
1974
  inputs=[new_crop_save_path],
1975
+ outputs=[gallery_result]
1976
  )
1977
+
1978
+ gallery_result.select(
1979
+ associate,
1980
+ inputs=[focus_info,openai_api_key,language,state,auto_play],
1981
+ outputs=[chatbot,state,output_audio],
1982
+
1983
+
1984
+ )
1985
 
1986
  ###############################################################################
1987
  ############# above part is for text to image #############
 
2132
 
2133
  openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
2134
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
2135
+ modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row,recommend])
2136
  enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
2137
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
2138
  modules_not_need_gpt,
2139
+ modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row,recommend])
2140
  # openai_api_key.submit(init_openai_api_key,
2141
  # outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
2142
  # modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
 
2243
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2244
  image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2245
  name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2246
+ paragraph,artist,gender])
2247
 
2248
  # image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
2249
  # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
 
2269
  # sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
2270
  # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
2271
  # image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph,artist])
2272
+ chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play,gender],
2273
  [chatbot, state, aux_state,output_audio])
2274
  # chat_input.submit(lambda: "", None, chat_input)
2275
  chat_input.submit(lambda: {"text": ""}, None, chat_input)
 
2280
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2281
  image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2282
  name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2283
+ paragraph,artist,gender])
2284
 
2285
  example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
2286
 
 
2327
  focus_d.click(
2328
  submit_caption,
2329
  inputs=[
2330
+ naritive, state,length, sentiment, factuality, language,
2331
+ out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path,gender
2332
  ],
2333
  outputs=[
2334
+ chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio,focus_info
2335
  ],
2336
  show_progress=True,
2337
  queue=True
 
2344
  focus_da.click(
2345
  submit_caption,
2346
  inputs=[
2347
+ naritive,state,length, sentiment, factuality, language,
2348
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,auto_play, paragraph,focus_da,openai_api_key,new_crop_save_path
2349
  ],
2350
  outputs=[
 
2358
  focus_dai.click(
2359
  submit_caption,
2360
  inputs=[
2361
+ naritive,state,length, sentiment, factuality, language,
2362
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2363
  auto_play, paragraph,focus_dai,openai_api_key,new_crop_save_path
2364
  ],
 
2373
  focus_dda.click(
2374
  submit_caption,
2375
  inputs=[
2376
+ naritive,state,length, sentiment, factuality, language,
2377
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2378
  auto_play, paragraph,focus_dda,openai_api_key,new_crop_save_path
2379
  ],
 
2418
 
2419
  export_button.click(
2420
  export_chat_log,
2421
+ inputs=[state,paragraph,like_res,dislike_res,log_list],
2422
+ outputs=[chat_log_file,log_list],
2423
  queue=True
2424
  )
2425
 
2426
+ naritive.change(
2427
+ lambda: (None, [], [], [[], [], []], "", "", ""),
2428
+ [],
2429
+ [image_input, chatbot, state, click_state, paragraph_output, origin_image],
2430
+ queue=False,
2431
+ show_progress=False
2432
+
2433
+ )
2434
+
2435
  # upvote_btn.click(
2436
  # handle_liked,
2437
  # inputs=[state,like_res],
 
2456
  iface = create_ui()
2457
  iface.queue(api_open=False, max_size=10)
2458
  # iface.queue(concurrency_count=5, api_open=False, max_size=10)
2459
+ iface.launch(server_name="0.0.0.0")