Niki Zhang commited on
Commit
b6190c4
·
verified ·
1 Parent(s): 9029eb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +557 -478
app.py CHANGED
@@ -28,7 +28,6 @@ import re
28
  import edge_tts
29
  from langchain import __version__
30
  import torch
31
- import gradio as gr
32
  from transformers import AutoProcessor, SiglipModel
33
  import faiss
34
  from huggingface_hub import hf_hub_download
@@ -38,6 +37,8 @@ import requests
38
  import spaces
39
  # Print the current version of LangChain
40
  print(f"Current LangChain version: {__version__}")
 
 
41
  # import tts
42
 
43
  ###############################################################################
@@ -46,9 +47,9 @@ print(f"Current LangChain version: {__version__}")
46
 
47
 
48
  # import spaces #
49
- import threading
50
 
51
- lock = threading.Lock()
52
  import os
53
  # import uuid
54
  # from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
@@ -94,220 +95,220 @@ from huggingface_hub import hf_hub_download
94
 
95
 
96
 
97
- def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
98
- """
99
- Get the rendering camera parameters.
100
- """
101
- c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
102
- if is_flexicubes:
103
- cameras = torch.linalg.inv(c2ws)
104
- cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
105
- else:
106
- extrinsics = c2ws.flatten(-2)
107
- intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
108
- cameras = torch.cat([extrinsics, intrinsics], dim=-1)
109
- cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
110
- return cameras
111
-
112
-
113
- def images_to_video(images, output_path, fps=30):
114
- # images: (N, C, H, W)
115
- os.makedirs(os.path.dirname(output_path), exist_ok=True)
116
- frames = []
117
- for i in range(images.shape[0]):
118
- frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
119
- assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
120
- f"Frame shape mismatch: {frame.shape} vs {images.shape}"
121
- assert frame.min() >= 0 and frame.max() <= 255, \
122
- f"Frame value out of range: {frame.min()} ~ {frame.max()}"
123
- frames.append(frame)
124
- imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')
125
-
126
-
127
- ###############################################################################
128
- # Configuration.
129
- ###############################################################################
130
-
131
- import shutil
132
-
133
- def find_cuda():
134
- # Check if CUDA_HOME or CUDA_PATH environment variables are set
135
- cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
136
-
137
- if cuda_home and os.path.exists(cuda_home):
138
- return cuda_home
139
-
140
- # Search for the nvcc executable in the system's PATH
141
- nvcc_path = shutil.which('nvcc')
142
-
143
- if nvcc_path:
144
- # Remove the 'bin/nvcc' part to get the CUDA installation path
145
- cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
146
- return cuda_path
147
-
148
- return None
149
-
150
- cuda_path = find_cuda()
151
-
152
- if cuda_path:
153
- print(f"CUDA installation found at: {cuda_path}")
154
- else:
155
- print("CUDA installation not found")
156
-
157
- config_path = 'configs/instant-nerf-base.yaml'
158
- config = OmegaConf.load(config_path)
159
- config_name = os.path.basename(config_path).replace('.yaml', '')
160
- model_config = config.model_config
161
- infer_config = config.infer_config
162
-
163
- IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
164
-
165
- device = torch.device('cuda')
166
-
167
- # load diffusion model
168
- print('Loading diffusion model ...')
169
- pipeline = DiffusionPipeline.from_pretrained(
170
- "sudo-ai/zero123plus-v1.2",
171
- custom_pipeline="zero123plus",
172
- torch_dtype=torch.float16,
173
- )
174
- pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
175
- pipeline.scheduler.config, timestep_spacing='trailing'
176
- )
177
-
178
- # load custom white-background UNet
179
- unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
180
- state_dict = torch.load(unet_ckpt_path, map_location='cpu')
181
- pipeline.unet.load_state_dict(state_dict, strict=True)
182
-
183
- pipeline = pipeline.to(device)
184
-
185
- # load reconstruction model
186
- print('Loading reconstruction model ...')
187
- model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_nerf_base.ckpt", repo_type="model")
188
- model0 = instantiate_from_config(model_config)
189
- state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
190
- state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
191
- model0.load_state_dict(state_dict, strict=True)
192
-
193
- model0 = model0.to(device)
194
-
195
- print('Loading Finished!')
196
-
197
-
198
- def check_input_image(input_image):
199
- if input_image is None:
200
- raise gr.Error("No image uploaded!")
201
- image = None
202
- else:
203
- image = Image.open(input_image)
204
- return image
205
 
206
- def preprocess(input_image, do_remove_background):
207
 
208
- rembg_session = rembg.new_session() if do_remove_background else None
209
 
210
- if do_remove_background:
211
- input_image = remove_background(input_image, rembg_session)
212
- input_image = resize_foreground(input_image, 0.85)
213
 
214
- return input_image
215
 
216
 
217
- # @spaces.GPU
218
- def generate_mvs(input_image, sample_steps, sample_seed):
219
 
220
- seed_everything(sample_seed)
221
 
222
- # sampling
223
- z123_image = pipeline(
224
- input_image,
225
- num_inference_steps=sample_steps
226
- ).images[0]
227
 
228
- show_image = np.asarray(z123_image, dtype=np.uint8)
229
- show_image = torch.from_numpy(show_image) # (960, 640, 3)
230
- show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
231
- show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
232
- show_image = Image.fromarray(show_image.numpy())
233
 
234
- return z123_image, show_image
235
 
236
 
237
- # @spaces.GPU
238
- def make3d(images):
239
 
240
- global model0
241
- if IS_FLEXICUBES:
242
- model0.init_flexicubes_geometry(device)
243
- model0 = model0.eval()
244
 
245
- images = np.asarray(images, dtype=np.float32) / 255.0
246
- images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float() # (3, 960, 640)
247
- images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2) # (6, 3, 320, 320)
248
 
249
- input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
250
- render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
251
 
252
- images = images.unsqueeze(0).to(device)
253
- images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
254
 
255
- mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
256
- print(mesh_fpath)
257
- mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
258
- mesh_dirname = os.path.dirname(mesh_fpath)
259
- video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
260
- mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")
261
 
262
- with torch.no_grad():
263
- # get triplane
264
- planes = model0.forward_planes(images, input_cameras)
265
 
266
- # # get video
267
- # chunk_size = 20 if IS_FLEXICUBES else 1
268
- # render_size = 384
269
 
270
- # frames = []
271
- # for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
272
- # if IS_FLEXICUBES:
273
- # frame = model.forward_geometry(
274
- # planes,
275
- # render_cameras[:, i:i+chunk_size],
276
- # render_size=render_size,
277
- # )['img']
278
- # else:
279
- # frame = model.synthesizer(
280
- # planes,
281
- # cameras=render_cameras[:, i:i+chunk_size],
282
- # render_size=render_size,
283
- # )['images_rgb']
284
- # frames.append(frame)
285
- # frames = torch.cat(frames, dim=1)
286
-
287
- # images_to_video(
288
- # frames[0],
289
- # video_fpath,
290
- # fps=30,
291
- # )
292
-
293
- # print(f"Video saved to {video_fpath}")
294
-
295
- # get mesh
296
- mesh_out = model0.extract_mesh(
297
- planes,
298
- use_texture_map=False,
299
- **infer_config,
300
- )
301
-
302
- vertices, faces, vertex_colors = mesh_out
303
- vertices = vertices[:, [1, 2, 0]]
304
 
305
- save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
306
- save_obj(vertices, faces, vertex_colors, mesh_fpath)
307
 
308
- print(f"Mesh saved to {mesh_fpath}")
309
 
310
- return mesh_fpath, mesh_glb_fpath
311
 
312
 
313
  ###############################################################################
@@ -471,10 +472,13 @@ examples = [
471
 
472
  css = """
473
  #warning {background-color: #FFCCCB}
474
- .chatbot {
475
- padding: 0 !important;
476
- margin: 0 !important;
477
- }
 
 
 
478
  """
479
  filtered_language_dict = {
480
  'English': 'en-US-JennyNeural',
@@ -487,10 +491,10 @@ filtered_language_dict = {
487
  }
488
 
489
  focus_map = {
490
- "CFV-D":0,
491
- "CFV-DA":1,
492
- "CFV-DAI":2,
493
- "PFV-DDA":3
494
  }
495
 
496
  '''
@@ -616,17 +620,17 @@ def init_openai_api_key(api_key=""):
616
  global gpt_state
617
  gpt_state=1
618
  # return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
619
- return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*2
620
  else:
621
  gpt_state=0
622
  # return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
623
- return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*2
624
 
625
  def init_wo_openai_api_key():
626
  global gpt_state
627
  gpt_state=0
628
  # return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*3
629
- return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*2
630
 
631
  def get_click_prompt(chat_input, click_state, click_mode):
632
  inputs = json.loads(chat_input)
@@ -666,15 +670,17 @@ def update_click_state(click_state, caption, click_mode):
666
 
667
  async def chat_input_callback(*args):
668
  visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay = args
 
669
  if visual_chatgpt is not None:
670
- state, _, aux_state, _ = visual_chatgpt.run_text(chat_input, state, aux_state)
671
  last_text, last_response = state[-1]
672
  print("last response",last_response)
673
- if autoplay:
674
- audio = await texttospeech(last_response,language,autoplay)
 
675
  else:
676
- audio=None
677
- return state, state, aux_state, audio
678
  else:
679
  response = "Text refiner is not initilzed, please input openai api key."
680
  state = state + [(chat_input, response)]
@@ -722,9 +728,9 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
722
  visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
723
  print("memory",visual_chatgpt.agent.memory)
724
  # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
725
- parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"material\": \"Material used in the painting\" }")
726
  parsed_data = json.loads(parsed_data.replace("'", "\""))
727
- name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
728
  # artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
729
 
730
 
@@ -736,7 +742,7 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
736
  ]
737
 
738
  return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
739
- original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Material: {material}"]*4 + [paragraph,artist]
740
 
741
 
742
 
@@ -774,7 +780,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
774
 
775
  enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
776
  out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
777
-
 
778
  state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], prompt["input_label"]), None)]
779
  update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
780
  text = out['generated_captions']['raw_caption']
@@ -798,13 +805,11 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
798
 
799
  print("new crop save",new_crop_save_path)
800
 
801
- yield state, state, click_state, image_input_nobackground, image_input_withbackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
802
 
803
 
804
 
805
-
806
-
807
- async def submit_caption(state, text_refiner, length, sentiment, factuality, language,
808
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
809
  autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path):
810
  print("state",state)
@@ -846,6 +851,9 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
846
  # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
847
  # input_points=input_points, input_labels=input_labels)
848
  try:
 
 
 
849
  audio_output = await texttospeech(read_info, language, autoplay)
850
  print("done")
851
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
@@ -858,16 +866,11 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
858
  return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
859
 
860
  else:
861
- try:
862
- audio_output = await texttospeech(focus_info, language, autoplay)
863
- # waveform_visual, audio_output = tts.predict(generated_caption, input_language, input_audio, input_mic, use_mic, agree)
864
- # return state, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
865
- return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
866
 
867
- except Exception as e:
868
- state = state + [(None, f"Error during TTS prediction: {str(e)}")]
869
- print(f"Error during TTS prediction: {str(e)}")
870
- return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
871
 
872
  def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language):
873
 
@@ -1069,7 +1072,7 @@ async def inference_traject(origin_image,sketcher_image, enable_wiki, language,
1069
  state = state + [(None, f"Error during TTS prediction: {str(e)}")]
1070
  print(f"Error during TTS prediction: {str(e)}")
1071
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
1072
- return state, state, image_input,audio_output,crop_save_path,d3_input
1073
 
1074
 
1075
  else:
@@ -1222,58 +1225,58 @@ def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragr
1222
  # return state,dislike_res
1223
 
1224
 
1225
- def get_style():
1226
- current_version = version.parse(gr.__version__)
1227
- print(current_version)
1228
- if current_version <= version.parse('3.24.1'):
1229
- style = '''
1230
- #image_sketcher{min-height:500px}
1231
- #image_sketcher [data-testid="image"], #image_sketcher [data-testid="image"] > div{min-height: 500px}
1232
- #image_upload{min-height:500px}
1233
- #image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 500px}
1234
- .custom-language {
1235
- width: 20%;
1236
- }
1237
-
1238
- .custom-autoplay {
1239
- width: 40%;
1240
- }
1241
-
1242
- .custom-output {
1243
- width: 30%;
1244
- }
1245
-
1246
- '''
1247
- elif current_version <= version.parse('3.27'):
1248
- style = '''
1249
- #image_sketcher{min-height:500px}
1250
- #image_upload{min-height:500px}
1251
- .custom-language {
1252
- width: 20%;
1253
- }
1254
-
1255
- .custom-autoplay {
1256
- width: 40%;
1257
- }
1258
-
1259
- .custom-output {
1260
- width: 30%;
1261
- }
1262
- .custom-gallery {
1263
- display: flex;
1264
- flex-wrap: wrap;
1265
- justify-content: space-between;
1266
- }
1267
-
1268
- .custom-gallery img {
1269
- width: 48%;
1270
- margin-bottom: 10px;
1271
- }
1272
- '''
1273
- else:
1274
- style = None
1275
 
1276
- return style
1277
 
1278
  # def handle_like_dislike(like_data, like_state, dislike_state):
1279
  # if like_data.liked:
@@ -1323,9 +1326,21 @@ def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
1323
  dislike_res.append(x.value)
1324
  state = state + [(None, f"Disliked Received 👎")]
1325
  return like_res,dislike_res,state
1326
-
1327
-
 
 
 
 
 
 
1328
 
 
 
 
 
 
 
1329
 
1330
  def create_ui():
1331
  title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
@@ -1345,7 +1360,7 @@ def create_ui():
1345
  ]
1346
 
1347
  with gr.Blocks(
1348
- css=get_style(),
1349
  theme=gr.themes.Base()
1350
  ) as iface:
1351
  state = gr.State([])
@@ -1370,6 +1385,7 @@ def create_ui():
1370
  dislike_res=gr.State([])
1371
  gr.Markdown(title)
1372
  gr.Markdown(description)
 
1373
  # with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
1374
  # with gr.Column(scale=0.5):
1375
  # # gr.Markdown("Left side content")
@@ -1392,9 +1408,9 @@ def create_ui():
1392
  value="English", label="Language", interactive=True, elem_classes="custom-language"
1393
  )
1394
  length = gr.Slider(
1395
- minimum=20,
1396
- maximum=100,
1397
- value=40,
1398
  step=1,
1399
  interactive=True,
1400
  label="Generated Caption Length",
@@ -1416,8 +1432,7 @@ def create_ui():
1416
  # auto_play = gr.Checkbox(label="Check to autoplay audio", value=False,scale=0.4)
1417
  # output_audio = gr.HTML(label="Synthesised Audio",scale=0.6)
1418
 
1419
- with gr.Row():
1420
-
1421
  with gr.Column(scale=6):
1422
  with gr.Column(visible=False) as modules_not_need_gpt:
1423
  with gr.Tab("Base(GPT Power)") as base_tab:
@@ -1426,7 +1441,7 @@ def create_ui():
1426
  name_label_base = gr.Button(value="Name: ")
1427
  artist_label_base = gr.Button(value="Artist: ")
1428
  year_label_base = gr.Button(value="Year: ")
1429
- material_label_base = gr.Button(value="Material: ")
1430
 
1431
  with gr.Tab("Base2") as base_tab2:
1432
  image_input_base_2 = gr.Image(type="pil", interactive=True, elem_id="image_upload")
@@ -1434,52 +1449,55 @@ def create_ui():
1434
  name_label_base2 = gr.Button(value="Name: ")
1435
  artist_label_base2 = gr.Button(value="Artist: ")
1436
  year_label_base2 = gr.Button(value="Year: ")
1437
- material_label_base2 = gr.Button(value="Material: ")
1438
 
1439
  with gr.Tab("Click") as click_tab:
1440
- image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
1441
- example_image = gr.Image(type="pil", interactive=False, visible=False)
1442
- # example_image_click = gr.Image(type="pil", interactive=False, visible=False)
1443
- with gr.Row():
1444
- name_label = gr.Button(value="Name: ")
1445
- artist_label = gr.Button(value="Artist: ")
1446
- year_label = gr.Button(value="Year: ")
1447
- material_label = gr.Button(value="Material: ")
1448
  with gr.Row():
1449
- with gr.Column():
 
 
1450
  with gr.Row():
1451
- focus_type = gr.Radio(
1452
- choices=["CFV-D", "CFV-DA", "CFV-DAI","PFV-DDA"],
1453
- value="CFV-D",
1454
- label="Information Type",
1455
- interactive=True,
1456
- scale=4)
1457
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1458
  with gr.Row():
1459
- point_prompt = gr.Radio(
1460
- choices=["Positive", "Negative"],
1461
- value="Positive",
1462
- label="Point Prompt",
1463
- scale=5,
1464
- interactive=True)
1465
  click_mode = gr.Radio(
1466
  choices=["Continuous", "Single"],
1467
  value="Continuous",
1468
  label="Clicking Mode",
1469
  scale=5,
1470
  interactive=True)
1471
- with gr.Column():
1472
- with gr.Row():
1473
- submit_button_click=gr.Button(value="Submit", interactive=True,variant='primary',scale=2)
1474
- with gr.Row():
1475
- clear_button_click = gr.Button(value="Clear Clicks", interactive=True,scale=2)
1476
- clear_button_image = gr.Button(value="Clear Image", interactive=True,scale=2)
1477
-
1478
- with gr.Tab("Trajectory (beta)") as traj_tab:
1479
  # sketcher_input = ImageSketcher(type="pil", interactive=True, brush_radius=10,
1480
  # elem_id="image_sketcher")
1481
- sketcher_input = gr.ImageEditor(type="pil", interactive=True,
1482
- elem_id="image_sketcher")
1483
  with gr.Row():
1484
  name_label_traj = gr.Button(value="Name: ")
1485
  artist_label_traj = gr.Button(value="Artist: ")
@@ -1489,28 +1507,16 @@ def create_ui():
1489
  with gr.Row():
1490
  clear_button_sketcher = gr.Button(value="Clear Sketch", interactive=True)
1491
  submit_button_sketcher = gr.Button(value="Submit", interactive=True)
1492
- with gr.Row():
1493
- with gr.Row():
1494
- focus_type_sketch = gr.Radio(
1495
- choices=["CFV-D", "CFV-DA", "CFV-DAI","PFV-DDA"],
1496
- value="CFV-D",
1497
- label="Information Type",
1498
- interactive=True)
1499
- Input_sketch = gr.Radio(
1500
- choices=["Trace+Seg", "Trace"],
1501
- value="Trace",
1502
- label="Trace Type",
1503
- interactive=True)
1504
 
1505
  with gr.Column(visible=False,scale=4) as modules_need_gpt1:
1506
- with gr.Row():
1507
  sentiment = gr.Radio(
1508
  choices=["Positive", "Natural", "Negative"],
1509
  value="Natural",
1510
  label="Sentiment",
1511
  interactive=True,
1512
  )
1513
- with gr.Row():
1514
  factuality = gr.Radio(
1515
  choices=["Factual", "Imagination"],
1516
  value="Factual",
@@ -1531,6 +1537,8 @@ def create_ui():
1531
  value="No",
1532
  label="Expert",
1533
  interactive=True)
 
 
1534
  with gr.Column(visible=True) as modules_not_need_gpt3:
1535
  gr.Examples(
1536
  examples=examples,
@@ -1541,7 +1549,7 @@ def create_ui():
1541
 
1542
 
1543
 
1544
- with gr.Column(scale=5):
1545
  with gr.Column(visible=True) as module_key_input:
1546
  openai_api_key = gr.Textbox(
1547
  placeholder="Input openAI API key",
@@ -1563,20 +1571,16 @@ def create_ui():
1563
 
1564
  with gr.Column(visible=False) as modules_not_need_gpt2:
1565
  with gr.Blocks():
1566
- chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=600)
1567
  with gr.Column(visible=False) as modules_need_gpt3:
1568
- chat_input = gr.Textbox(show_label=False, placeholder="Enter text and press Enter",container=False)
1569
  with gr.Row():
1570
- clear_button_text = gr.Button(value="Clear Text", interactive=True)
1571
- submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
 
1572
  # upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
1573
  # downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
1574
-
1575
-
1576
- with gr.Row():
1577
- export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
1578
- with gr.Row():
1579
- chat_log_file = gr.File(label="Download Chat Log")
1580
 
1581
  # TTS interface hidden initially
1582
  with gr.Column(visible=False) as tts_interface:
@@ -1689,6 +1693,15 @@ def create_ui():
1689
  # show_share_button=True,
1690
  # show_download_button=True
1691
  )
 
 
 
 
 
 
 
 
 
1692
 
1693
 
1694
 
@@ -1727,7 +1740,7 @@ def create_ui():
1727
  # outputs=[result, seed],
1728
  # api_name="run",
1729
  # )
1730
- run_button.click(
1731
  fn=infer,
1732
  inputs=[new_crop_save_path],
1733
  outputs=[result]
@@ -1742,106 +1755,106 @@ def create_ui():
1742
  # this part is for 3d generate.
1743
  ###############################################################################
1744
 
1745
- with gr.Row(variant="panel",visible=False) as d3_model:
1746
- with gr.Column():
1747
- with gr.Row():
1748
- input_image = gr.Image(
1749
- label="Input Image",
1750
- image_mode="RGBA",
1751
- sources="upload",
1752
- #width=256,
1753
- #height=256,
1754
- type="pil",
1755
- elem_id="content_image",
1756
- )
1757
- processed_image = gr.Image(
1758
- label="Processed Image",
1759
- image_mode="RGBA",
1760
- #width=256,
1761
- #height=256,
1762
- type="pil",
1763
- interactive=False
1764
- )
1765
- with gr.Row():
1766
- with gr.Group():
1767
- do_remove_background = gr.Checkbox(
1768
- label="Remove Background", value=True
1769
- )
1770
- sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
1771
 
1772
- sample_steps = gr.Slider(
1773
- label="Sample Steps",
1774
- minimum=30,
1775
- maximum=75,
1776
- value=75,
1777
- step=5
1778
- )
1779
 
1780
- with gr.Row():
1781
- submit = gr.Button("Generate", elem_id="generate", variant="primary")
1782
 
1783
- with gr.Row(variant="panel"):
1784
- gr.Examples(
1785
- examples=[
1786
- os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
1787
- ],
1788
- inputs=[input_image],
1789
- label="Examples",
1790
- cache_examples=False,
1791
- examples_per_page=16
1792
- )
1793
-
1794
- with gr.Column():
1795
 
1796
- with gr.Row():
1797
 
1798
- with gr.Column():
1799
- mv_show_images = gr.Image(
1800
- label="Generated Multi-views",
1801
- type="pil",
1802
- width=379,
1803
- interactive=False
1804
- )
1805
 
1806
- # with gr.Column():
1807
- # output_video = gr.Video(
1808
- # label="video", format="mp4",
1809
- # width=379,
1810
- # autoplay=True,
1811
- # interactive=False
1812
- # )
1813
 
1814
- with gr.Row():
1815
- with gr.Tab("OBJ"):
1816
- output_model_obj = gr.Model3D(
1817
- label="Output Model (OBJ Format)",
1818
- interactive=False,
1819
- )
1820
- gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
1821
- with gr.Tab("GLB"):
1822
- output_model_glb = gr.Model3D(
1823
- label="Output Model (GLB Format)",
1824
- interactive=False,
1825
- )
1826
- gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
1827
 
1828
 
1829
 
1830
 
1831
- mv_images = gr.State()
1832
 
1833
- chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
1834
 
1835
- submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
1836
- fn=generate_mvs,
1837
- inputs=[processed_image, sample_steps, sample_seed],
1838
- outputs=[mv_images, mv_show_images]
1839
 
1840
- ).success(
1841
- fn=make3d,
1842
- inputs=[mv_images],
1843
- outputs=[output_model_obj, output_model_glb]
1844
- )
1845
 
1846
  ###############################################################################
1847
  # above part is for 3d generate.
@@ -1868,13 +1881,13 @@ def create_ui():
1868
 
1869
 
1870
 
1871
- clear_button_sketcher.click(
1872
- lambda x: (x),
1873
- [origin_image],
1874
- [sketcher_input],
1875
- queue=False,
1876
- show_progress=False
1877
- )
1878
 
1879
 
1880
 
@@ -1882,11 +1895,11 @@ def create_ui():
1882
 
1883
  openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
1884
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
1885
- modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
1886
  enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
1887
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
1888
  modules_not_need_gpt,
1889
- modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
1890
  # openai_api_key.submit(init_openai_api_key,
1891
  # outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
1892
  # modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
@@ -1898,7 +1911,7 @@ def create_ui():
1898
  disable_chatGPT_button.click(init_wo_openai_api_key,
1899
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
1900
  modules_not_need_gpt,
1901
- modules_not_need_gpt2, tts_interface,module_key_input, module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
1902
 
1903
  artist_label_base2.click(
1904
  get_artistinfo,
@@ -1995,23 +2008,23 @@ def create_ui():
1995
  name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
1996
  paragraph,artist])
1997
 
1998
- image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
1999
- [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2000
- image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2001
- name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2002
- paragraph,artist])
2003
 
2004
- image_input.upload(upload_callback, [image_input, state, visual_chatgpt,openai_api_key],
2005
- [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2006
- image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2007
- name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2008
- paragraph,artist])
2009
 
2010
- sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
2011
- [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2012
- image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2013
- name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2014
- paragraph,artist])
2015
 
2016
  # image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
2017
  # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
@@ -2022,9 +2035,9 @@ def create_ui():
2022
  chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
2023
  [chatbot, state, aux_state,output_audio])
2024
  chat_input.submit(lambda: "", None, chat_input)
2025
- submit_button_text.click(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
2026
- [chatbot, state, aux_state,output_audio])
2027
- submit_button_text.click(lambda: "", None, chat_input)
2028
  example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
2029
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2030
  image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
@@ -2068,37 +2081,103 @@ def create_ui():
2068
  image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
2069
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2070
  ],
2071
- outputs=[chatbot, state, click_state, image_input, input_image, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground],
2072
  show_progress=False, queue=True
2073
  )
2074
 
2075
 
2076
- submit_button_click.click(
2077
  submit_caption,
2078
  inputs=[
2079
- state, text_refiner,length, sentiment, factuality, language,
2080
- out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2081
- auto_play,paragraph,focus_type,openai_api_key,new_crop_save_path
2082
  ],
2083
  outputs=[
2084
- chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
2085
- output_audio
2086
  ],
2087
  show_progress=True,
2088
  queue=True
2089
  )
2090
-
2091
 
2092
- submit_button_sketcher.click(
2093
- inference_traject,
2094
- inputs=[
2095
- origin_image,sketcher_input, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
2096
- original_size, input_size, text_refiner,focus_type_sketch,paragraph,openai_api_key,auto_play,Input_sketch
2097
- ],
2098
- outputs=[chatbot, state, sketcher_input,output_audio,new_crop_save_path,input_image],
2099
- show_progress=False, queue=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2100
  )
2101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2102
  export_button.click(
2103
  export_chat_log,
2104
  inputs=[state,paragraph,like_res,dislike_res],
@@ -2129,4 +2208,4 @@ if __name__ == '__main__':
2129
  iface = create_ui()
2130
  iface.queue(api_open=False, max_size=10)
2131
  # iface.queue(concurrency_count=5, api_open=False, max_size=10)
2132
- iface.launch(server_name="0.0.0.0")
 
28
  import edge_tts
29
  from langchain import __version__
30
  import torch
 
31
  from transformers import AutoProcessor, SiglipModel
32
  import faiss
33
  from huggingface_hub import hf_hub_download
 
37
  import spaces
38
  # Print the current version of LangChain
39
  print(f"Current LangChain version: {__version__}")
40
+
41
+ print("testing testing")
42
  # import tts
43
 
44
  ###############################################################################
 
47
 
48
 
49
  # import spaces #
50
+ # import threading
51
 
52
+ # lock = threading.Lock()
53
  import os
54
  # import uuid
55
  # from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
 
95
 
96
 
97
 
98
+ # def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
99
+ # """
100
+ # Get the rendering camera parameters.
101
+ # """
102
+ # c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
103
+ # if is_flexicubes:
104
+ # cameras = torch.linalg.inv(c2ws)
105
+ # cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
106
+ # else:
107
+ # extrinsics = c2ws.flatten(-2)
108
+ # intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
109
+ # cameras = torch.cat([extrinsics, intrinsics], dim=-1)
110
+ # cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
111
+ # return cameras
112
+
113
+
114
+ # def images_to_video(images, output_path, fps=30):
115
+ # # images: (N, C, H, W)
116
+ # os.makedirs(os.path.dirname(output_path), exist_ok=True)
117
+ # frames = []
118
+ # for i in range(images.shape[0]):
119
+ # frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
120
+ # assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
121
+ # f"Frame shape mismatch: {frame.shape} vs {images.shape}"
122
+ # assert frame.min() >= 0 and frame.max() <= 255, \
123
+ # f"Frame value out of range: {frame.min()} ~ {frame.max()}"
124
+ # frames.append(frame)
125
+ # imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')
126
+
127
+
128
+ # ###############################################################################
129
+ # # Configuration.
130
+ # ###############################################################################
131
+
132
+ # import shutil
133
+
134
+ # def find_cuda():
135
+ # # Check if CUDA_HOME or CUDA_PATH environment variables are set
136
+ # cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
137
+
138
+ # if cuda_home and os.path.exists(cuda_home):
139
+ # return cuda_home
140
+
141
+ # # Search for the nvcc executable in the system's PATH
142
+ # nvcc_path = shutil.which('nvcc')
143
+
144
+ # if nvcc_path:
145
+ # # Remove the 'bin/nvcc' part to get the CUDA installation path
146
+ # cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
147
+ # return cuda_path
148
+
149
+ # return None
150
+
151
+ # cuda_path = find_cuda()
152
+
153
+ # if cuda_path:
154
+ # print(f"CUDA installation found at: {cuda_path}")
155
+ # else:
156
+ # print("CUDA installation not found")
157
+
158
+ # config_path = 'configs/instant-nerf-base.yaml'
159
+ # config = OmegaConf.load(config_path)
160
+ # config_name = os.path.basename(config_path).replace('.yaml', '')
161
+ # model_config = config.model_config
162
+ # infer_config = config.infer_config
163
+
164
+ # IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
165
+
166
+ # device = torch.device('cuda')
167
+
168
+ # # load diffusion model
169
+ # print('Loading diffusion model ...')
170
+ # pipeline = DiffusionPipeline.from_pretrained(
171
+ # "sudo-ai/zero123plus-v1.2",
172
+ # custom_pipeline="zero123plus",
173
+ # torch_dtype=torch.float16,
174
+ # )
175
+ # pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
176
+ # pipeline.scheduler.config, timestep_spacing='trailing'
177
+ # )
178
+
179
+ # # load custom white-background UNet
180
+ # unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
181
+ # state_dict = torch.load(unet_ckpt_path, map_location='cpu')
182
+ # pipeline.unet.load_state_dict(state_dict, strict=True)
183
+
184
+ # pipeline = pipeline.to(device)
185
+
186
+ # # load reconstruction model
187
+ # print('Loading reconstruction model ...')
188
+ # model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_nerf_base.ckpt", repo_type="model")
189
+ # model0 = instantiate_from_config(model_config)
190
+ # state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
191
+ # state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
192
+ # model0.load_state_dict(state_dict, strict=True)
193
+
194
+ # model0 = model0.to(device)
195
+
196
+ # print('Loading Finished!')
197
+
198
+
199
+ # def check_input_image(input_image):
200
+ # if input_image is None:
201
+ # raise gr.Error("No image uploaded!")
202
+ # image = None
203
+ # else:
204
+ # image = Image.open(input_image)
205
+ # return image
206
 
207
+ # def preprocess(input_image, do_remove_background):
208
 
209
+ # rembg_session = rembg.new_session() if do_remove_background else None
210
 
211
+ # if do_remove_background:
212
+ # input_image = remove_background(input_image, rembg_session)
213
+ # input_image = resize_foreground(input_image, 0.85)
214
 
215
+ # return input_image
216
 
217
 
218
+ # # @spaces.GPU
219
+ # def generate_mvs(input_image, sample_steps, sample_seed):
220
 
221
+ # seed_everything(sample_seed)
222
 
223
+ # # sampling
224
+ # z123_image = pipeline(
225
+ # input_image,
226
+ # num_inference_steps=sample_steps
227
+ # ).images[0]
228
 
229
+ # show_image = np.asarray(z123_image, dtype=np.uint8)
230
+ # show_image = torch.from_numpy(show_image) # (960, 640, 3)
231
+ # show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
232
+ # show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
233
+ # show_image = Image.fromarray(show_image.numpy())
234
 
235
+ # return z123_image, show_image
236
 
237
 
238
+ # # @spaces.GPU
239
+ # def make3d(images):
240
 
241
+ # global model0
242
+ # if IS_FLEXICUBES:
243
+ # model0.init_flexicubes_geometry(device)
244
+ # model0 = model0.eval()
245
 
246
+ # images = np.asarray(images, dtype=np.float32) / 255.0
247
+ # images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float() # (3, 960, 640)
248
+ # images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2) # (6, 3, 320, 320)
249
 
250
+ # input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
251
+ # render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
252
 
253
+ # images = images.unsqueeze(0).to(device)
254
+ # images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
255
 
256
+ # mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
257
+ # print(mesh_fpath)
258
+ # mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
259
+ # mesh_dirname = os.path.dirname(mesh_fpath)
260
+ # video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
261
+ # mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")
262
 
263
+ # with torch.no_grad():
264
+ # # get triplane
265
+ # planes = model0.forward_planes(images, input_cameras)
266
 
267
+ # # # get video
268
+ # # chunk_size = 20 if IS_FLEXICUBES else 1
269
+ # # render_size = 384
270
 
271
+ # # frames = []
272
+ # # for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
273
+ # # if IS_FLEXICUBES:
274
+ # # frame = model.forward_geometry(
275
+ # # planes,
276
+ # # render_cameras[:, i:i+chunk_size],
277
+ # # render_size=render_size,
278
+ # # )['img']
279
+ # # else:
280
+ # # frame = model.synthesizer(
281
+ # # planes,
282
+ # # cameras=render_cameras[:, i:i+chunk_size],
283
+ # # render_size=render_size,
284
+ # # )['images_rgb']
285
+ # # frames.append(frame)
286
+ # # frames = torch.cat(frames, dim=1)
287
+
288
+ # # images_to_video(
289
+ # # frames[0],
290
+ # # video_fpath,
291
+ # # fps=30,
292
+ # # )
293
+
294
+ # # print(f"Video saved to {video_fpath}")
295
+
296
+ # # get mesh
297
+ # mesh_out = model0.extract_mesh(
298
+ # planes,
299
+ # use_texture_map=False,
300
+ # **infer_config,
301
+ # )
302
+
303
+ # vertices, faces, vertex_colors = mesh_out
304
+ # vertices = vertices[:, [1, 2, 0]]
305
 
306
+ # save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
307
+ # save_obj(vertices, faces, vertex_colors, mesh_fpath)
308
 
309
+ # print(f"Mesh saved to {mesh_fpath}")
310
 
311
+ # return mesh_fpath, mesh_glb_fpath
312
 
313
 
314
  ###############################################################################
 
472
 
473
  css = """
474
  #warning {background-color: #FFCCCB}
475
+ .tools_button {
476
+ background: white;
477
+ border: none !important;
478
+ box-shadow: none !important;
479
+ }
480
+ #tool_box {max-width: 50px}
481
+
482
  """
483
  filtered_language_dict = {
484
  'English': 'en-US-JennyNeural',
 
491
  }
492
 
493
  focus_map = {
494
+ "D":0,
495
+ "DA":1,
496
+ "DAI":2,
497
+ "DDA":3
498
  }
499
 
500
  '''
 
620
  global gpt_state
621
  gpt_state=1
622
  # return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
623
+ return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]
624
  else:
625
  gpt_state=0
626
  # return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
627
+ return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]
628
 
629
  def init_wo_openai_api_key():
630
  global gpt_state
631
  gpt_state=0
632
  # return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*3
633
+ return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]
634
 
635
  def get_click_prompt(chat_input, click_state, click_mode):
636
  inputs = json.loads(chat_input)
 
670
 
671
  async def chat_input_callback(*args):
672
  visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay = args
673
+ message = chat_input["text"]
674
  if visual_chatgpt is not None:
675
+ state, _, aux_state, _ = visual_chatgpt.run_text(message, state, aux_state)
676
  last_text, last_response = state[-1]
677
  print("last response",last_response)
678
+ if autoplay==False:
679
+ return state, state, aux_state, None
680
+
681
  else:
682
+ audio = await texttospeech(last_response,language,autoplay)
683
+ return state, state, aux_state, audio
684
  else:
685
  response = "Text refiner is not initilzed, please input openai api key."
686
  state = state + [(chat_input, response)]
 
728
  visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
729
  print("memory",visual_chatgpt.agent.memory)
730
  # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
731
+ parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and painting style used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"style\": \"Painting style used in the painting\" }")
732
  parsed_data = json.loads(parsed_data.replace("'", "\""))
733
+ name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["style"]
734
  # artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
735
 
736
 
 
742
  ]
743
 
744
  return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
745
+ original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist]
746
 
747
 
748
 
 
780
 
781
  enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
782
  out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
783
+ # state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
784
+
785
  state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], prompt["input_label"]), None)]
786
  update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
787
  text = out['generated_captions']['raw_caption']
 
805
 
806
  print("new crop save",new_crop_save_path)
807
 
808
+ yield state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
809
 
810
 
811
 
812
+ async def submit_caption(state,length, sentiment, factuality, language,
 
 
813
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
814
  autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path):
815
  print("state",state)
 
851
  # refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
852
  # input_points=input_points, input_labels=input_labels)
853
  try:
854
+ if autoplay==False:
855
+ return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
856
+
857
  audio_output = await texttospeech(read_info, language, autoplay)
858
  print("done")
859
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
 
866
  return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
867
 
868
  else:
869
+ state = state + [(None, f"Error during TTS prediction: {str(e)}")]
870
+ print(f"Error during TTS prediction: {str(e)}")
871
+ return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
872
+
 
873
 
 
 
 
 
874
 
875
  def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language):
876
 
 
1072
  state = state + [(None, f"Error during TTS prediction: {str(e)}")]
1073
  print(f"Error during TTS prediction: {str(e)}")
1074
  # return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
1075
+ return state, state, image_input,audio_output,crop_save_path
1076
 
1077
 
1078
  else:
 
1225
  # return state,dislike_res
1226
 
1227
 
1228
+ # def get_style():
1229
+ # current_version = version.parse(gr.__version__)
1230
+ # print(current_version)
1231
+ # if current_version <= version.parse('3.24.1'):
1232
+ # style = '''
1233
+ # #image_sketcher{min-height:500px}
1234
+ # #image_sketcher [data-testid="image"], #image_sketcher [data-testid="image"] > div{min-height: 500px}
1235
+ # #image_upload{min-height:500px}
1236
+ # #image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 500px}
1237
+ # .custom-language {
1238
+ # width: 20%;
1239
+ # }
1240
+
1241
+ # .custom-autoplay {
1242
+ # width: 40%;
1243
+ # }
1244
+
1245
+ # .custom-output {
1246
+ # width: 30%;
1247
+ # }
1248
+
1249
+ # '''
1250
+ # elif current_version <= version.parse('3.27'):
1251
+ # style = '''
1252
+ # #image_sketcher{min-height:500px}
1253
+ # #image_upload{min-height:500px}
1254
+ # .custom-language {
1255
+ # width: 20%;
1256
+ # }
1257
+
1258
+ # .custom-autoplay {
1259
+ # width: 40%;
1260
+ # }
1261
+
1262
+ # .custom-output {
1263
+ # width: 30%;
1264
+ # }
1265
+ # .custom-gallery {
1266
+ # display: flex;
1267
+ # flex-wrap: wrap;
1268
+ # justify-content: space-between;
1269
+ # }
1270
+
1271
+ # .custom-gallery img {
1272
+ # width: 48%;
1273
+ # margin-bottom: 10px;
1274
+ # }
1275
+ # '''
1276
+ # else:
1277
+ # style = None
1278
 
1279
+ # return style
1280
 
1281
  # def handle_like_dislike(like_data, like_state, dislike_state):
1282
  # if like_data.liked:
 
1326
  dislike_res.append(x.value)
1327
  state = state + [(None, f"Disliked Received 👎")]
1328
  return like_res,dislike_res,state
1329
+
1330
+
1331
+ def toggle_icons_and_update_prompt(point_prompt):
1332
+ new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
1333
+ new_add_icon = "assets/icons/plus-square-blue.png" if point_prompt == "Positive" else "assets/icons/plus-square.png"
1334
+ new_minus_icon = "assets/icons/minus-square.png" if point_prompt == "Positive" else "assets/icons/minus-square-blue.png"
1335
+ print(point_prompt)
1336
+ print(new_prompt)
1337
 
1338
+ return new_prompt, gr.update(icon=new_add_icon), gr.update(icon=new_minus_icon)
1339
+
1340
+ add_icon_path="assets/icons/plus-square-blue.png"
1341
+ minus_icon_path="assets/icons/minus-square.png"
1342
+
1343
+ print("this is a print test")
1344
 
1345
  def create_ui():
1346
  title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
 
1360
  ]
1361
 
1362
  with gr.Blocks(
1363
+ css=css,
1364
  theme=gr.themes.Base()
1365
  ) as iface:
1366
  state = gr.State([])
 
1385
  dislike_res=gr.State([])
1386
  gr.Markdown(title)
1387
  gr.Markdown(description)
1388
+ point_prompt = gr.State("Positive")
1389
  # with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
1390
  # with gr.Column(scale=0.5):
1391
  # # gr.Markdown("Left side content")
 
1408
  value="English", label="Language", interactive=True, elem_classes="custom-language"
1409
  )
1410
  length = gr.Slider(
1411
+ minimum=40,
1412
+ maximum=200,
1413
+ value=80,
1414
  step=1,
1415
  interactive=True,
1416
  label="Generated Caption Length",
 
1432
  # auto_play = gr.Checkbox(label="Check to autoplay audio", value=False,scale=0.4)
1433
  # output_audio = gr.HTML(label="Synthesised Audio",scale=0.6)
1434
 
1435
+ with gr.Row():
 
1436
  with gr.Column(scale=6):
1437
  with gr.Column(visible=False) as modules_not_need_gpt:
1438
  with gr.Tab("Base(GPT Power)") as base_tab:
 
1441
  name_label_base = gr.Button(value="Name: ")
1442
  artist_label_base = gr.Button(value="Artist: ")
1443
  year_label_base = gr.Button(value="Year: ")
1444
+ material_label_base = gr.Button(value="Style: ")
1445
 
1446
  with gr.Tab("Base2") as base_tab2:
1447
  image_input_base_2 = gr.Image(type="pil", interactive=True, elem_id="image_upload")
 
1449
  name_label_base2 = gr.Button(value="Name: ")
1450
  artist_label_base2 = gr.Button(value="Artist: ")
1451
  year_label_base2 = gr.Button(value="Year: ")
1452
+ material_label_base2 = gr.Button(value="Style: ")
1453
 
1454
  with gr.Tab("Click") as click_tab:
 
 
 
 
 
 
 
 
1455
  with gr.Row():
1456
+ with gr.Column(scale=10,min_width=450):
1457
+ image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
1458
+ example_image = gr.Image(type="pil", interactive=False, visible=False)
1459
  with gr.Row():
1460
+ name_label = gr.Button(value="Name: ")
1461
+ artist_label = gr.Button(value="Artist: ")
1462
+ year_label = gr.Button(value="Year: ")
1463
+ material_label = gr.Button(value="Style: ")
1464
+
1465
+
1466
+ # example_image_click = gr.Image(type="pil", interactive=False, visible=False)
1467
+ # the tool column
1468
+ with gr.Column(scale=1,elem_id="tool_box",min_width=100):
1469
+ add_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=add_icon_path)
1470
+ minus_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=minus_icon_path)
1471
+ clear_button_click = gr.Button(value="Reset", interactive=True,elem_classes="tools_button")
1472
+ clear_button_image = gr.Button(value="Change Image", interactive=True,elem_classes="tools_button")
1473
+ focus_d = gr.Button(value="D",interactive=True,elem_classes="function_button")
1474
+ focus_da = gr.Button(value="DA",interactive=True,elem_classes="function_button")
1475
+ focus_dai = gr.Button(value="DAI",interactive=True,elem_classes="function_button")
1476
+ focus_dda = gr.Button(value="DDA",interactive=True,elem_classes="function_button")
1477
+ recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button")
1478
+
1479
+ with gr.Row(visible=False):
1480
+ with gr.Column():
1481
  with gr.Row():
1482
+ # point_prompt = gr.Radio(
1483
+ # choices=["Positive", "Negative"],
1484
+ # value="Positive",
1485
+ # label="Point Prompt",
1486
+ # scale=5,
1487
+ # interactive=True)
1488
  click_mode = gr.Radio(
1489
  choices=["Continuous", "Single"],
1490
  value="Continuous",
1491
  label="Clicking Mode",
1492
  scale=5,
1493
  interactive=True)
1494
+
1495
+
1496
+ with gr.Tab("Trajectory (beta)", visible=False) as traj_tab:
 
 
 
 
 
1497
  # sketcher_input = ImageSketcher(type="pil", interactive=True, brush_radius=10,
1498
  # elem_id="image_sketcher")
1499
+ sketcher_input = gr.ImageEditor(type="pil", interactive=True
1500
+ )
1501
  with gr.Row():
1502
  name_label_traj = gr.Button(value="Name: ")
1503
  artist_label_traj = gr.Button(value="Artist: ")
 
1507
  with gr.Row():
1508
  clear_button_sketcher = gr.Button(value="Clear Sketch", interactive=True)
1509
  submit_button_sketcher = gr.Button(value="Submit", interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
1510
 
1511
  with gr.Column(visible=False,scale=4) as modules_need_gpt1:
1512
+ with gr.Row(visible=False):
1513
  sentiment = gr.Radio(
1514
  choices=["Positive", "Natural", "Negative"],
1515
  value="Natural",
1516
  label="Sentiment",
1517
  interactive=True,
1518
  )
1519
+
1520
  factuality = gr.Radio(
1521
  choices=["Factual", "Imagination"],
1522
  value="Factual",
 
1537
  value="No",
1538
  label="Expert",
1539
  interactive=True)
1540
+
1541
+
1542
  with gr.Column(visible=True) as modules_not_need_gpt3:
1543
  gr.Examples(
1544
  examples=examples,
 
1549
 
1550
 
1551
 
1552
+ with gr.Column(scale=4):
1553
  with gr.Column(visible=True) as module_key_input:
1554
  openai_api_key = gr.Textbox(
1555
  placeholder="Input openAI API key",
 
1571
 
1572
  with gr.Column(visible=False) as modules_not_need_gpt2:
1573
  with gr.Blocks():
1574
+ chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=600,bubble_full_width=False)
1575
  with gr.Column(visible=False) as modules_need_gpt3:
1576
+ chat_input = gr.MultimodalTextbox(interactive=True, file_types=[".txt"], placeholder="Message EyeSee...", show_label=False)
1577
  with gr.Row():
1578
+ clear_button_text = gr.Button(value="Clear Chat", interactive=True)
1579
+ export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
1580
+ # submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
1581
  # upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
1582
  # downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
1583
+
 
 
 
 
 
1584
 
1585
  # TTS interface hidden initially
1586
  with gr.Column(visible=False) as tts_interface:
 
1693
  # show_share_button=True,
1694
  # show_download_button=True
1695
  )
1696
+
1697
+ with gr.Row():
1698
+ naritive = gr.Radio(
1699
+ choices=["Third", "Artist","Item"],
1700
+ value="Third",
1701
+ label="narritive",
1702
+ scale=5,
1703
+ interactive=True)
1704
+ chat_log_file = gr.File(label="Download Chat Log",scale=5)
1705
 
1706
 
1707
 
 
1740
  # outputs=[result, seed],
1741
  # api_name="run",
1742
  # )
1743
+ recommend_btn.click(
1744
  fn=infer,
1745
  inputs=[new_crop_save_path],
1746
  outputs=[result]
 
1755
  # this part is for 3d generate.
1756
  ###############################################################################
1757
 
1758
+ # with gr.Row(variant="panel",visible=False) as d3_model:
1759
+ # with gr.Column():
1760
+ # with gr.Row():
1761
+ # input_image = gr.Image(
1762
+ # label="Input Image",
1763
+ # image_mode="RGBA",
1764
+ # sources="upload",
1765
+ # #width=256,
1766
+ # #height=256,
1767
+ # type="pil",
1768
+ # elem_id="content_image",
1769
+ # )
1770
+ # processed_image = gr.Image(
1771
+ # label="Processed Image",
1772
+ # image_mode="RGBA",
1773
+ # #width=256,
1774
+ # #height=256,
1775
+ # type="pil",
1776
+ # interactive=False
1777
+ # )
1778
+ # with gr.Row():
1779
+ # with gr.Group():
1780
+ # do_remove_background = gr.Checkbox(
1781
+ # label="Remove Background", value=True
1782
+ # )
1783
+ # sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
1784
 
1785
+ # sample_steps = gr.Slider(
1786
+ # label="Sample Steps",
1787
+ # minimum=30,
1788
+ # maximum=75,
1789
+ # value=75,
1790
+ # step=5
1791
+ # )
1792
 
1793
+ # with gr.Row():
1794
+ # submit = gr.Button("Generate", elem_id="generate", variant="primary")
1795
 
1796
+ # with gr.Row(variant="panel"):
1797
+ # gr.Examples(
1798
+ # examples=[
1799
+ # os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
1800
+ # ],
1801
+ # inputs=[input_image],
1802
+ # label="Examples",
1803
+ # cache_examples=False,
1804
+ # examples_per_page=16
1805
+ # )
1806
+
1807
+ # with gr.Column():
1808
 
1809
+ # with gr.Row():
1810
 
1811
+ # with gr.Column():
1812
+ # mv_show_images = gr.Image(
1813
+ # label="Generated Multi-views",
1814
+ # type="pil",
1815
+ # width=379,
1816
+ # interactive=False
1817
+ # )
1818
 
1819
+ # # with gr.Column():
1820
+ # # output_video = gr.Video(
1821
+ # # label="video", format="mp4",
1822
+ # # width=379,
1823
+ # # autoplay=True,
1824
+ # # interactive=False
1825
+ # # )
1826
 
1827
+ # with gr.Row():
1828
+ # with gr.Tab("OBJ"):
1829
+ # output_model_obj = gr.Model3D(
1830
+ # label="Output Model (OBJ Format)",
1831
+ # interactive=False,
1832
+ # )
1833
+ # gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
1834
+ # with gr.Tab("GLB"):
1835
+ # output_model_glb = gr.Model3D(
1836
+ # label="Output Model (GLB Format)",
1837
+ # interactive=False,
1838
+ # )
1839
+ # gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
1840
 
1841
 
1842
 
1843
 
1844
+ # mv_images = gr.State()
1845
 
1846
+ # chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
1847
 
1848
+ # submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
1849
+ # fn=generate_mvs,
1850
+ # inputs=[processed_image, sample_steps, sample_seed],
1851
+ # outputs=[mv_images, mv_show_images]
1852
 
1853
+ # ).success(
1854
+ # fn=make3d,
1855
+ # inputs=[mv_images],
1856
+ # outputs=[output_model_obj, output_model_glb]
1857
+ # )
1858
 
1859
  ###############################################################################
1860
  # above part is for 3d generate.
 
1881
 
1882
 
1883
 
1884
+ # clear_button_sketcher.click(
1885
+ # lambda x: (x),
1886
+ # [origin_image],
1887
+ # [sketcher_input],
1888
+ # queue=False,
1889
+ # show_progress=False
1890
+ # )
1891
 
1892
 
1893
 
 
1895
 
1896
  openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
1897
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
1898
+ modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
1899
  enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
1900
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
1901
  modules_not_need_gpt,
1902
+ modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
1903
  # openai_api_key.submit(init_openai_api_key,
1904
  # outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
1905
  # modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
 
1911
  disable_chatGPT_button.click(init_wo_openai_api_key,
1912
  outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
1913
  modules_not_need_gpt,
1914
+ modules_not_need_gpt2, tts_interface,module_key_input, module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
1915
 
1916
  artist_label_base2.click(
1917
  get_artistinfo,
 
2008
  name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2009
  paragraph,artist])
2010
 
2011
+ # image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
2012
+ # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2013
+ # image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2014
+ # name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2015
+ # paragraph,artist])
2016
 
2017
+ # image_input.upload(upload_callback, [image_input, state, visual_chatgpt,openai_api_key],
2018
+ # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2019
+ # image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2020
+ # name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2021
+ # paragraph,artist])
2022
 
2023
+ # sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
2024
+ # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2025
+ # image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
2026
+ # name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
2027
+ # paragraph,artist])
2028
 
2029
  # image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
2030
  # [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
 
2035
  chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
2036
  [chatbot, state, aux_state,output_audio])
2037
  chat_input.submit(lambda: "", None, chat_input)
2038
+ # submit_button_text.click(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
2039
+ # [chatbot, state, aux_state,output_audio])
2040
+ # submit_button_text.click(lambda: "", None, chat_input)
2041
  example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
2042
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
2043
  image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
 
2081
  image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
2082
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2083
  ],
2084
+ outputs=[chatbot, state, click_state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground],
2085
  show_progress=False, queue=True
2086
  )
2087
 
2088
 
2089
+ focus_d.click(
2090
  submit_caption,
2091
  inputs=[
2092
+ state,length, sentiment, factuality, language,
2093
+ out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path
 
2094
  ],
2095
  outputs=[
2096
+ chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
 
2097
  ],
2098
  show_progress=True,
2099
  queue=True
2100
  )
2101
+
2102
 
2103
+
2104
+
2105
+
2106
+ focus_da.click(
2107
+ submit_caption,
2108
+ inputs=[
2109
+ state,length, sentiment, factuality, language,
2110
+ out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,auto_play, paragraph,focus_da,openai_api_key,new_crop_save_path
2111
+ ],
2112
+ outputs=[
2113
+ chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
2114
+ ],
2115
+ show_progress=True,
2116
+ queue=True
2117
+ )
2118
+
2119
+
2120
+ focus_dai.click(
2121
+ submit_caption,
2122
+ inputs=[
2123
+ state,length, sentiment, factuality, language,
2124
+ out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2125
+ auto_play, paragraph,focus_dai,openai_api_key,new_crop_save_path
2126
+ ],
2127
+ outputs=[
2128
+ chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
2129
+ ],
2130
+ show_progress=True,
2131
+ queue=True
2132
  )
2133
 
2134
+
2135
+ focus_dda.click(
2136
+ submit_caption,
2137
+ inputs=[
2138
+ state,length, sentiment, factuality, language,
2139
+ out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
2140
+ auto_play, paragraph,focus_dda,openai_api_key,new_crop_save_path
2141
+ ],
2142
+ outputs=[
2143
+ chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
2144
+ ],
2145
+ show_progress=True,
2146
+ queue=True
2147
+ )
2148
+
2149
+ add_button.click(
2150
+ toggle_icons_and_update_prompt,
2151
+ inputs=[point_prompt],
2152
+ outputs=[point_prompt,add_button,minus_button],
2153
+ show_progress=True,
2154
+ queue=True
2155
+
2156
+ )
2157
+
2158
+ minus_button.click(
2159
+ toggle_icons_and_update_prompt,
2160
+ inputs=[point_prompt],
2161
+ outputs=[point_prompt,add_button,minus_button],
2162
+ show_progress=True,
2163
+ queue=True
2164
+
2165
+ )
2166
+
2167
+
2168
+
2169
+
2170
+
2171
+ # submit_button_sketcher.click(
2172
+ # inference_traject,
2173
+ # inputs=[
2174
+ # origin_image,sketcher_input, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
2175
+ # original_size, input_size, text_refiner,focus_type_sketch,paragraph,openai_api_key,auto_play,Input_sketch
2176
+ # ],
2177
+ # outputs=[chatbot, state, sketcher_input,output_audio,new_crop_save_path],
2178
+ # show_progress=False, queue=True
2179
+ # )
2180
+
2181
  export_button.click(
2182
  export_chat_log,
2183
  inputs=[state,paragraph,like_res,dislike_res],
 
2208
  iface = create_ui()
2209
  iface.queue(api_open=False, max_size=10)
2210
  # iface.queue(concurrency_count=5, api_open=False, max_size=10)
2211
+ iface.launch(server_name="0.0.0.0",show_error=True)