Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -28,7 +28,6 @@ import re
|
|
28 |
import edge_tts
|
29 |
from langchain import __version__
|
30 |
import torch
|
31 |
-
import gradio as gr
|
32 |
from transformers import AutoProcessor, SiglipModel
|
33 |
import faiss
|
34 |
from huggingface_hub import hf_hub_download
|
@@ -38,6 +37,8 @@ import requests
|
|
38 |
import spaces
|
39 |
# Print the current version of LangChain
|
40 |
print(f"Current LangChain version: {__version__}")
|
|
|
|
|
41 |
# import tts
|
42 |
|
43 |
###############################################################################
|
@@ -46,9 +47,9 @@ print(f"Current LangChain version: {__version__}")
|
|
46 |
|
47 |
|
48 |
# import spaces #
|
49 |
-
import threading
|
50 |
|
51 |
-
lock = threading.Lock()
|
52 |
import os
|
53 |
# import uuid
|
54 |
# from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
|
@@ -94,220 +95,220 @@ from huggingface_hub import hf_hub_download
|
|
94 |
|
95 |
|
96 |
|
97 |
-
def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
def images_to_video(images, output_path, fps=30):
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
###############################################################################
|
128 |
-
# Configuration.
|
129 |
-
###############################################################################
|
130 |
-
|
131 |
-
import shutil
|
132 |
-
|
133 |
-
def find_cuda():
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
cuda_path = find_cuda()
|
151 |
-
|
152 |
-
if cuda_path:
|
153 |
-
|
154 |
-
else:
|
155 |
-
|
156 |
-
|
157 |
-
config_path = 'configs/instant-nerf-base.yaml'
|
158 |
-
config = OmegaConf.load(config_path)
|
159 |
-
config_name = os.path.basename(config_path).replace('.yaml', '')
|
160 |
-
model_config = config.model_config
|
161 |
-
infer_config = config.infer_config
|
162 |
-
|
163 |
-
IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
|
164 |
-
|
165 |
-
device = torch.device('cuda')
|
166 |
-
|
167 |
-
# load diffusion model
|
168 |
-
print('Loading diffusion model ...')
|
169 |
-
pipeline = DiffusionPipeline.from_pretrained(
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
)
|
174 |
-
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
175 |
-
|
176 |
-
)
|
177 |
-
|
178 |
-
# load custom white-background UNet
|
179 |
-
unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
|
180 |
-
state_dict = torch.load(unet_ckpt_path, map_location='cpu')
|
181 |
-
pipeline.unet.load_state_dict(state_dict, strict=True)
|
182 |
-
|
183 |
-
pipeline = pipeline.to(device)
|
184 |
-
|
185 |
-
# load reconstruction model
|
186 |
-
print('Loading reconstruction model ...')
|
187 |
-
model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_nerf_base.ckpt", repo_type="model")
|
188 |
-
model0 = instantiate_from_config(model_config)
|
189 |
-
state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
|
190 |
-
state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
|
191 |
-
model0.load_state_dict(state_dict, strict=True)
|
192 |
-
|
193 |
-
model0 = model0.to(device)
|
194 |
-
|
195 |
-
print('Loading Finished!')
|
196 |
-
|
197 |
-
|
198 |
-
def check_input_image(input_image):
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
|
206 |
-
def preprocess(input_image, do_remove_background):
|
207 |
|
208 |
-
|
209 |
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
|
214 |
-
|
215 |
|
216 |
|
217 |
-
# @spaces.GPU
|
218 |
-
def generate_mvs(input_image, sample_steps, sample_seed):
|
219 |
|
220 |
-
|
221 |
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
|
234 |
-
|
235 |
|
236 |
|
237 |
-
# @spaces.GPU
|
238 |
-
def make3d(images):
|
239 |
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
|
249 |
-
|
250 |
-
|
251 |
|
252 |
-
|
253 |
-
|
254 |
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
|
305 |
-
|
306 |
-
|
307 |
|
308 |
-
|
309 |
|
310 |
-
|
311 |
|
312 |
|
313 |
###############################################################################
|
@@ -471,10 +472,13 @@ examples = [
|
|
471 |
|
472 |
css = """
|
473 |
#warning {background-color: #FFCCCB}
|
474 |
-
.
|
475 |
-
|
476 |
-
|
477 |
-
|
|
|
|
|
|
|
478 |
"""
|
479 |
filtered_language_dict = {
|
480 |
'English': 'en-US-JennyNeural',
|
@@ -487,10 +491,10 @@ filtered_language_dict = {
|
|
487 |
}
|
488 |
|
489 |
focus_map = {
|
490 |
-
"
|
491 |
-
"
|
492 |
-
"
|
493 |
-
"
|
494 |
}
|
495 |
|
496 |
'''
|
@@ -616,17 +620,17 @@ def init_openai_api_key(api_key=""):
|
|
616 |
global gpt_state
|
617 |
gpt_state=1
|
618 |
# return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
|
619 |
-
return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]
|
620 |
else:
|
621 |
gpt_state=0
|
622 |
# return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
|
623 |
-
return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]
|
624 |
|
625 |
def init_wo_openai_api_key():
|
626 |
global gpt_state
|
627 |
gpt_state=0
|
628 |
# return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*3
|
629 |
-
return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]
|
630 |
|
631 |
def get_click_prompt(chat_input, click_state, click_mode):
|
632 |
inputs = json.loads(chat_input)
|
@@ -666,15 +670,17 @@ def update_click_state(click_state, caption, click_mode):
|
|
666 |
|
667 |
async def chat_input_callback(*args):
|
668 |
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay = args
|
|
|
669 |
if visual_chatgpt is not None:
|
670 |
-
state, _, aux_state, _ = visual_chatgpt.run_text(
|
671 |
last_text, last_response = state[-1]
|
672 |
print("last response",last_response)
|
673 |
-
if autoplay:
|
674 |
-
|
|
|
675 |
else:
|
676 |
-
audio=
|
677 |
-
|
678 |
else:
|
679 |
response = "Text refiner is not initilzed, please input openai api key."
|
680 |
state = state + [(chat_input, response)]
|
@@ -722,9 +728,9 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
722 |
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
723 |
print("memory",visual_chatgpt.agent.memory)
|
724 |
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
725 |
-
parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and
|
726 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
727 |
-
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["
|
728 |
# artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
729 |
|
730 |
|
@@ -736,7 +742,7 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
736 |
]
|
737 |
|
738 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
739 |
-
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"
|
740 |
|
741 |
|
742 |
|
@@ -774,7 +780,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
774 |
|
775 |
enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
|
776 |
out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
|
777 |
-
|
|
|
778 |
state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], prompt["input_label"]), None)]
|
779 |
update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
|
780 |
text = out['generated_captions']['raw_caption']
|
@@ -798,13 +805,11 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
798 |
|
799 |
print("new crop save",new_crop_save_path)
|
800 |
|
801 |
-
yield state, state, click_state, image_input_nobackground,
|
802 |
|
803 |
|
804 |
|
805 |
-
|
806 |
-
|
807 |
-
async def submit_caption(state, text_refiner, length, sentiment, factuality, language,
|
808 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
809 |
autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
810 |
print("state",state)
|
@@ -846,6 +851,9 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
|
|
846 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
847 |
# input_points=input_points, input_labels=input_labels)
|
848 |
try:
|
|
|
|
|
|
|
849 |
audio_output = await texttospeech(read_info, language, autoplay)
|
850 |
print("done")
|
851 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
@@ -858,16 +866,11 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
|
|
858 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
859 |
|
860 |
else:
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
866 |
|
867 |
-
except Exception as e:
|
868 |
-
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
869 |
-
print(f"Error during TTS prediction: {str(e)}")
|
870 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
871 |
|
872 |
def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language):
|
873 |
|
@@ -1069,7 +1072,7 @@ async def inference_traject(origin_image,sketcher_image, enable_wiki, language,
|
|
1069 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1070 |
print(f"Error during TTS prediction: {str(e)}")
|
1071 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
1072 |
-
return state, state, image_input,audio_output,crop_save_path
|
1073 |
|
1074 |
|
1075 |
else:
|
@@ -1222,58 +1225,58 @@ def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragr
|
|
1222 |
# return state,dislike_res
|
1223 |
|
1224 |
|
1225 |
-
def get_style():
|
1226 |
-
|
1227 |
-
|
1228 |
-
|
1229 |
-
|
1230 |
-
|
1231 |
-
|
1232 |
-
|
1233 |
-
|
1234 |
-
|
1235 |
-
|
1236 |
-
|
1237 |
-
|
1238 |
-
|
1239 |
-
|
1240 |
-
|
1241 |
-
|
1242 |
-
|
1243 |
-
|
1244 |
-
|
1245 |
-
|
1246 |
-
|
1247 |
-
|
1248 |
-
|
1249 |
-
|
1250 |
-
|
1251 |
-
|
1252 |
-
|
1253 |
-
|
1254 |
-
|
1255 |
-
|
1256 |
-
|
1257 |
-
|
1258 |
-
|
1259 |
-
|
1260 |
-
|
1261 |
-
|
1262 |
-
|
1263 |
-
|
1264 |
-
|
1265 |
-
|
1266 |
-
|
1267 |
-
|
1268 |
-
|
1269 |
-
|
1270 |
-
|
1271 |
-
|
1272 |
-
|
1273 |
-
|
1274 |
-
|
1275 |
|
1276 |
-
|
1277 |
|
1278 |
# def handle_like_dislike(like_data, like_state, dislike_state):
|
1279 |
# if like_data.liked:
|
@@ -1323,9 +1326,21 @@ def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
|
|
1323 |
dislike_res.append(x.value)
|
1324 |
state = state + [(None, f"Disliked Received 👎")]
|
1325 |
return like_res,dislike_res,state
|
1326 |
-
|
1327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1329 |
|
1330 |
def create_ui():
|
1331 |
title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
|
@@ -1345,7 +1360,7 @@ def create_ui():
|
|
1345 |
]
|
1346 |
|
1347 |
with gr.Blocks(
|
1348 |
-
css=
|
1349 |
theme=gr.themes.Base()
|
1350 |
) as iface:
|
1351 |
state = gr.State([])
|
@@ -1370,6 +1385,7 @@ def create_ui():
|
|
1370 |
dislike_res=gr.State([])
|
1371 |
gr.Markdown(title)
|
1372 |
gr.Markdown(description)
|
|
|
1373 |
# with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
|
1374 |
# with gr.Column(scale=0.5):
|
1375 |
# # gr.Markdown("Left side content")
|
@@ -1392,9 +1408,9 @@ def create_ui():
|
|
1392 |
value="English", label="Language", interactive=True, elem_classes="custom-language"
|
1393 |
)
|
1394 |
length = gr.Slider(
|
1395 |
-
minimum=
|
1396 |
-
maximum=
|
1397 |
-
value=
|
1398 |
step=1,
|
1399 |
interactive=True,
|
1400 |
label="Generated Caption Length",
|
@@ -1416,8 +1432,7 @@ def create_ui():
|
|
1416 |
# auto_play = gr.Checkbox(label="Check to autoplay audio", value=False,scale=0.4)
|
1417 |
# output_audio = gr.HTML(label="Synthesised Audio",scale=0.6)
|
1418 |
|
1419 |
-
with gr.Row():
|
1420 |
-
|
1421 |
with gr.Column(scale=6):
|
1422 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1423 |
with gr.Tab("Base(GPT Power)") as base_tab:
|
@@ -1426,7 +1441,7 @@ def create_ui():
|
|
1426 |
name_label_base = gr.Button(value="Name: ")
|
1427 |
artist_label_base = gr.Button(value="Artist: ")
|
1428 |
year_label_base = gr.Button(value="Year: ")
|
1429 |
-
material_label_base = gr.Button(value="
|
1430 |
|
1431 |
with gr.Tab("Base2") as base_tab2:
|
1432 |
image_input_base_2 = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
@@ -1434,52 +1449,55 @@ def create_ui():
|
|
1434 |
name_label_base2 = gr.Button(value="Name: ")
|
1435 |
artist_label_base2 = gr.Button(value="Artist: ")
|
1436 |
year_label_base2 = gr.Button(value="Year: ")
|
1437 |
-
material_label_base2 = gr.Button(value="
|
1438 |
|
1439 |
with gr.Tab("Click") as click_tab:
|
1440 |
-
image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
1441 |
-
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1442 |
-
# example_image_click = gr.Image(type="pil", interactive=False, visible=False)
|
1443 |
-
with gr.Row():
|
1444 |
-
name_label = gr.Button(value="Name: ")
|
1445 |
-
artist_label = gr.Button(value="Artist: ")
|
1446 |
-
year_label = gr.Button(value="Year: ")
|
1447 |
-
material_label = gr.Button(value="Material: ")
|
1448 |
with gr.Row():
|
1449 |
-
with gr.Column():
|
|
|
|
|
1450 |
with gr.Row():
|
1451 |
-
|
1452 |
-
|
1453 |
-
|
1454 |
-
|
1455 |
-
|
1456 |
-
|
1457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1458 |
with gr.Row():
|
1459 |
-
point_prompt = gr.Radio(
|
1460 |
-
|
1461 |
-
|
1462 |
-
|
1463 |
-
|
1464 |
-
|
1465 |
click_mode = gr.Radio(
|
1466 |
choices=["Continuous", "Single"],
|
1467 |
value="Continuous",
|
1468 |
label="Clicking Mode",
|
1469 |
scale=5,
|
1470 |
interactive=True)
|
1471 |
-
|
1472 |
-
|
1473 |
-
|
1474 |
-
with gr.Row():
|
1475 |
-
clear_button_click = gr.Button(value="Clear Clicks", interactive=True,scale=2)
|
1476 |
-
clear_button_image = gr.Button(value="Clear Image", interactive=True,scale=2)
|
1477 |
-
|
1478 |
-
with gr.Tab("Trajectory (beta)") as traj_tab:
|
1479 |
# sketcher_input = ImageSketcher(type="pil", interactive=True, brush_radius=10,
|
1480 |
# elem_id="image_sketcher")
|
1481 |
-
sketcher_input = gr.ImageEditor(type="pil", interactive=True
|
1482 |
-
|
1483 |
with gr.Row():
|
1484 |
name_label_traj = gr.Button(value="Name: ")
|
1485 |
artist_label_traj = gr.Button(value="Artist: ")
|
@@ -1489,28 +1507,16 @@ def create_ui():
|
|
1489 |
with gr.Row():
|
1490 |
clear_button_sketcher = gr.Button(value="Clear Sketch", interactive=True)
|
1491 |
submit_button_sketcher = gr.Button(value="Submit", interactive=True)
|
1492 |
-
with gr.Row():
|
1493 |
-
with gr.Row():
|
1494 |
-
focus_type_sketch = gr.Radio(
|
1495 |
-
choices=["CFV-D", "CFV-DA", "CFV-DAI","PFV-DDA"],
|
1496 |
-
value="CFV-D",
|
1497 |
-
label="Information Type",
|
1498 |
-
interactive=True)
|
1499 |
-
Input_sketch = gr.Radio(
|
1500 |
-
choices=["Trace+Seg", "Trace"],
|
1501 |
-
value="Trace",
|
1502 |
-
label="Trace Type",
|
1503 |
-
interactive=True)
|
1504 |
|
1505 |
with gr.Column(visible=False,scale=4) as modules_need_gpt1:
|
1506 |
-
with gr.Row():
|
1507 |
sentiment = gr.Radio(
|
1508 |
choices=["Positive", "Natural", "Negative"],
|
1509 |
value="Natural",
|
1510 |
label="Sentiment",
|
1511 |
interactive=True,
|
1512 |
)
|
1513 |
-
|
1514 |
factuality = gr.Radio(
|
1515 |
choices=["Factual", "Imagination"],
|
1516 |
value="Factual",
|
@@ -1531,6 +1537,8 @@ def create_ui():
|
|
1531 |
value="No",
|
1532 |
label="Expert",
|
1533 |
interactive=True)
|
|
|
|
|
1534 |
with gr.Column(visible=True) as modules_not_need_gpt3:
|
1535 |
gr.Examples(
|
1536 |
examples=examples,
|
@@ -1541,7 +1549,7 @@ def create_ui():
|
|
1541 |
|
1542 |
|
1543 |
|
1544 |
-
with gr.Column(scale=
|
1545 |
with gr.Column(visible=True) as module_key_input:
|
1546 |
openai_api_key = gr.Textbox(
|
1547 |
placeholder="Input openAI API key",
|
@@ -1563,20 +1571,16 @@ def create_ui():
|
|
1563 |
|
1564 |
with gr.Column(visible=False) as modules_not_need_gpt2:
|
1565 |
with gr.Blocks():
|
1566 |
-
chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=600)
|
1567 |
with gr.Column(visible=False) as modules_need_gpt3:
|
1568 |
-
chat_input = gr.
|
1569 |
with gr.Row():
|
1570 |
-
clear_button_text = gr.Button(value="Clear
|
1571 |
-
|
|
|
1572 |
# upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
|
1573 |
# downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
|
1574 |
-
|
1575 |
-
|
1576 |
-
with gr.Row():
|
1577 |
-
export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
|
1578 |
-
with gr.Row():
|
1579 |
-
chat_log_file = gr.File(label="Download Chat Log")
|
1580 |
|
1581 |
# TTS interface hidden initially
|
1582 |
with gr.Column(visible=False) as tts_interface:
|
@@ -1689,6 +1693,15 @@ def create_ui():
|
|
1689 |
# show_share_button=True,
|
1690 |
# show_download_button=True
|
1691 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1692 |
|
1693 |
|
1694 |
|
@@ -1727,7 +1740,7 @@ def create_ui():
|
|
1727 |
# outputs=[result, seed],
|
1728 |
# api_name="run",
|
1729 |
# )
|
1730 |
-
|
1731 |
fn=infer,
|
1732 |
inputs=[new_crop_save_path],
|
1733 |
outputs=[result]
|
@@ -1742,106 +1755,106 @@ def create_ui():
|
|
1742 |
# this part is for 3d generate.
|
1743 |
###############################################################################
|
1744 |
|
1745 |
-
with gr.Row(variant="panel",visible=False) as d3_model:
|
1746 |
-
|
1747 |
-
|
1748 |
-
|
1749 |
-
|
1750 |
-
|
1751 |
-
|
1752 |
-
|
1753 |
-
|
1754 |
-
|
1755 |
-
|
1756 |
-
|
1757 |
-
|
1758 |
-
|
1759 |
-
|
1760 |
-
|
1761 |
-
|
1762 |
-
|
1763 |
-
|
1764 |
-
|
1765 |
-
|
1766 |
-
|
1767 |
-
|
1768 |
-
|
1769 |
-
|
1770 |
-
|
1771 |
|
1772 |
-
|
1773 |
-
|
1774 |
-
|
1775 |
-
|
1776 |
-
|
1777 |
-
|
1778 |
-
|
1779 |
|
1780 |
-
|
1781 |
-
|
1782 |
|
1783 |
-
|
1784 |
-
|
1785 |
-
|
1786 |
-
|
1787 |
-
|
1788 |
-
|
1789 |
-
|
1790 |
-
|
1791 |
-
|
1792 |
-
|
1793 |
-
|
1794 |
-
|
1795 |
|
1796 |
-
|
1797 |
|
1798 |
-
|
1799 |
-
|
1800 |
-
|
1801 |
-
|
1802 |
-
|
1803 |
-
|
1804 |
-
|
1805 |
|
1806 |
-
|
1807 |
-
|
1808 |
-
|
1809 |
-
|
1810 |
-
|
1811 |
-
|
1812 |
-
|
1813 |
|
1814 |
-
|
1815 |
-
|
1816 |
-
|
1817 |
-
|
1818 |
-
|
1819 |
-
|
1820 |
-
|
1821 |
-
|
1822 |
-
|
1823 |
-
|
1824 |
-
|
1825 |
-
|
1826 |
-
|
1827 |
|
1828 |
|
1829 |
|
1830 |
|
1831 |
-
mv_images = gr.State()
|
1832 |
|
1833 |
-
chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
|
1834 |
|
1835 |
-
submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
|
1836 |
-
|
1837 |
-
|
1838 |
-
|
1839 |
|
1840 |
-
).success(
|
1841 |
-
|
1842 |
-
|
1843 |
-
|
1844 |
-
)
|
1845 |
|
1846 |
###############################################################################
|
1847 |
# above part is for 3d generate.
|
@@ -1868,13 +1881,13 @@ def create_ui():
|
|
1868 |
|
1869 |
|
1870 |
|
1871 |
-
clear_button_sketcher.click(
|
1872 |
-
|
1873 |
-
|
1874 |
-
|
1875 |
-
|
1876 |
-
|
1877 |
-
)
|
1878 |
|
1879 |
|
1880 |
|
@@ -1882,11 +1895,11 @@ def create_ui():
|
|
1882 |
|
1883 |
openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
|
1884 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
1885 |
-
modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,
|
1886 |
enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
|
1887 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
1888 |
modules_not_need_gpt,
|
1889 |
-
modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,
|
1890 |
# openai_api_key.submit(init_openai_api_key,
|
1891 |
# outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
1892 |
# modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
|
@@ -1898,7 +1911,7 @@ def create_ui():
|
|
1898 |
disable_chatGPT_button.click(init_wo_openai_api_key,
|
1899 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
1900 |
modules_not_need_gpt,
|
1901 |
-
modules_not_need_gpt2, tts_interface,module_key_input, module_notification_box, text_refiner, visual_chatgpt, notification_box,
|
1902 |
|
1903 |
artist_label_base2.click(
|
1904 |
get_artistinfo,
|
@@ -1995,23 +2008,23 @@ def create_ui():
|
|
1995 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
1996 |
paragraph,artist])
|
1997 |
|
1998 |
-
image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
|
1999 |
-
|
2000 |
-
|
2001 |
-
|
2002 |
-
|
2003 |
|
2004 |
-
image_input.upload(upload_callback, [image_input, state, visual_chatgpt,openai_api_key],
|
2005 |
-
|
2006 |
-
|
2007 |
-
|
2008 |
-
|
2009 |
|
2010 |
-
sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
|
2011 |
-
|
2012 |
-
|
2013 |
-
|
2014 |
-
|
2015 |
|
2016 |
# image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
|
2017 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
@@ -2022,9 +2035,9 @@ def create_ui():
|
|
2022 |
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
2023 |
[chatbot, state, aux_state,output_audio])
|
2024 |
chat_input.submit(lambda: "", None, chat_input)
|
2025 |
-
submit_button_text.click(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
2026 |
-
|
2027 |
-
submit_button_text.click(lambda: "", None, chat_input)
|
2028 |
example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
|
2029 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2030 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
@@ -2068,37 +2081,103 @@ def create_ui():
|
|
2068 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
2069 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2070 |
],
|
2071 |
-
outputs=[chatbot, state, click_state, image_input,
|
2072 |
show_progress=False, queue=True
|
2073 |
)
|
2074 |
|
2075 |
|
2076 |
-
|
2077 |
submit_caption,
|
2078 |
inputs=[
|
2079 |
-
state,
|
2080 |
-
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2081 |
-
auto_play,paragraph,focus_type,openai_api_key,new_crop_save_path
|
2082 |
],
|
2083 |
outputs=[
|
2084 |
-
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
|
2085 |
-
output_audio
|
2086 |
],
|
2087 |
show_progress=True,
|
2088 |
queue=True
|
2089 |
)
|
2090 |
-
|
2091 |
|
2092 |
-
|
2093 |
-
|
2094 |
-
|
2095 |
-
|
2096 |
-
|
2097 |
-
|
2098 |
-
|
2099 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2100 |
)
|
2101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2102 |
export_button.click(
|
2103 |
export_chat_log,
|
2104 |
inputs=[state,paragraph,like_res,dislike_res],
|
@@ -2129,4 +2208,4 @@ if __name__ == '__main__':
|
|
2129 |
iface = create_ui()
|
2130 |
iface.queue(api_open=False, max_size=10)
|
2131 |
# iface.queue(concurrency_count=5, api_open=False, max_size=10)
|
2132 |
-
iface.launch(server_name="0.0.0.0")
|
|
|
28 |
import edge_tts
|
29 |
from langchain import __version__
|
30 |
import torch
|
|
|
31 |
from transformers import AutoProcessor, SiglipModel
|
32 |
import faiss
|
33 |
from huggingface_hub import hf_hub_download
|
|
|
37 |
import spaces
|
38 |
# Print the current version of LangChain
|
39 |
print(f"Current LangChain version: {__version__}")
|
40 |
+
|
41 |
+
print("testing testing")
|
42 |
# import tts
|
43 |
|
44 |
###############################################################################
|
|
|
47 |
|
48 |
|
49 |
# import spaces #
|
50 |
+
# import threading
|
51 |
|
52 |
+
# lock = threading.Lock()
|
53 |
import os
|
54 |
# import uuid
|
55 |
# from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
|
|
|
95 |
|
96 |
|
97 |
|
98 |
+
# def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
|
99 |
+
# """
|
100 |
+
# Get the rendering camera parameters.
|
101 |
+
# """
|
102 |
+
# c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
|
103 |
+
# if is_flexicubes:
|
104 |
+
# cameras = torch.linalg.inv(c2ws)
|
105 |
+
# cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
|
106 |
+
# else:
|
107 |
+
# extrinsics = c2ws.flatten(-2)
|
108 |
+
# intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
|
109 |
+
# cameras = torch.cat([extrinsics, intrinsics], dim=-1)
|
110 |
+
# cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
|
111 |
+
# return cameras
|
112 |
+
|
113 |
+
|
114 |
+
# def images_to_video(images, output_path, fps=30):
|
115 |
+
# # images: (N, C, H, W)
|
116 |
+
# os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
117 |
+
# frames = []
|
118 |
+
# for i in range(images.shape[0]):
|
119 |
+
# frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
|
120 |
+
# assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
|
121 |
+
# f"Frame shape mismatch: {frame.shape} vs {images.shape}"
|
122 |
+
# assert frame.min() >= 0 and frame.max() <= 255, \
|
123 |
+
# f"Frame value out of range: {frame.min()} ~ {frame.max()}"
|
124 |
+
# frames.append(frame)
|
125 |
+
# imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')
|
126 |
+
|
127 |
+
|
128 |
+
# ###############################################################################
|
129 |
+
# # Configuration.
|
130 |
+
# ###############################################################################
|
131 |
+
|
132 |
+
# import shutil
|
133 |
+
|
134 |
+
# def find_cuda():
|
135 |
+
# # Check if CUDA_HOME or CUDA_PATH environment variables are set
|
136 |
+
# cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
|
137 |
+
|
138 |
+
# if cuda_home and os.path.exists(cuda_home):
|
139 |
+
# return cuda_home
|
140 |
+
|
141 |
+
# # Search for the nvcc executable in the system's PATH
|
142 |
+
# nvcc_path = shutil.which('nvcc')
|
143 |
+
|
144 |
+
# if nvcc_path:
|
145 |
+
# # Remove the 'bin/nvcc' part to get the CUDA installation path
|
146 |
+
# cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
|
147 |
+
# return cuda_path
|
148 |
+
|
149 |
+
# return None
|
150 |
+
|
151 |
+
# cuda_path = find_cuda()
|
152 |
+
|
153 |
+
# if cuda_path:
|
154 |
+
# print(f"CUDA installation found at: {cuda_path}")
|
155 |
+
# else:
|
156 |
+
# print("CUDA installation not found")
|
157 |
+
|
158 |
+
# config_path = 'configs/instant-nerf-base.yaml'
|
159 |
+
# config = OmegaConf.load(config_path)
|
160 |
+
# config_name = os.path.basename(config_path).replace('.yaml', '')
|
161 |
+
# model_config = config.model_config
|
162 |
+
# infer_config = config.infer_config
|
163 |
+
|
164 |
+
# IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
|
165 |
+
|
166 |
+
# device = torch.device('cuda')
|
167 |
+
|
168 |
+
# # load diffusion model
|
169 |
+
# print('Loading diffusion model ...')
|
170 |
+
# pipeline = DiffusionPipeline.from_pretrained(
|
171 |
+
# "sudo-ai/zero123plus-v1.2",
|
172 |
+
# custom_pipeline="zero123plus",
|
173 |
+
# torch_dtype=torch.float16,
|
174 |
+
# )
|
175 |
+
# pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
176 |
+
# pipeline.scheduler.config, timestep_spacing='trailing'
|
177 |
+
# )
|
178 |
+
|
179 |
+
# # load custom white-background UNet
|
180 |
+
# unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
|
181 |
+
# state_dict = torch.load(unet_ckpt_path, map_location='cpu')
|
182 |
+
# pipeline.unet.load_state_dict(state_dict, strict=True)
|
183 |
+
|
184 |
+
# pipeline = pipeline.to(device)
|
185 |
+
|
186 |
+
# # load reconstruction model
|
187 |
+
# print('Loading reconstruction model ...')
|
188 |
+
# model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_nerf_base.ckpt", repo_type="model")
|
189 |
+
# model0 = instantiate_from_config(model_config)
|
190 |
+
# state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
|
191 |
+
# state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
|
192 |
+
# model0.load_state_dict(state_dict, strict=True)
|
193 |
+
|
194 |
+
# model0 = model0.to(device)
|
195 |
+
|
196 |
+
# print('Loading Finished!')
|
197 |
+
|
198 |
+
|
199 |
+
# def check_input_image(input_image):
|
200 |
+
# if input_image is None:
|
201 |
+
# raise gr.Error("No image uploaded!")
|
202 |
+
# image = None
|
203 |
+
# else:
|
204 |
+
# image = Image.open(input_image)
|
205 |
+
# return image
|
206 |
|
207 |
+
# def preprocess(input_image, do_remove_background):
|
208 |
|
209 |
+
# rembg_session = rembg.new_session() if do_remove_background else None
|
210 |
|
211 |
+
# if do_remove_background:
|
212 |
+
# input_image = remove_background(input_image, rembg_session)
|
213 |
+
# input_image = resize_foreground(input_image, 0.85)
|
214 |
|
215 |
+
# return input_image
|
216 |
|
217 |
|
218 |
+
# # @spaces.GPU
|
219 |
+
# def generate_mvs(input_image, sample_steps, sample_seed):
|
220 |
|
221 |
+
# seed_everything(sample_seed)
|
222 |
|
223 |
+
# # sampling
|
224 |
+
# z123_image = pipeline(
|
225 |
+
# input_image,
|
226 |
+
# num_inference_steps=sample_steps
|
227 |
+
# ).images[0]
|
228 |
|
229 |
+
# show_image = np.asarray(z123_image, dtype=np.uint8)
|
230 |
+
# show_image = torch.from_numpy(show_image) # (960, 640, 3)
|
231 |
+
# show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
|
232 |
+
# show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
|
233 |
+
# show_image = Image.fromarray(show_image.numpy())
|
234 |
|
235 |
+
# return z123_image, show_image
|
236 |
|
237 |
|
238 |
+
# # @spaces.GPU
|
239 |
+
# def make3d(images):
|
240 |
|
241 |
+
# global model0
|
242 |
+
# if IS_FLEXICUBES:
|
243 |
+
# model0.init_flexicubes_geometry(device)
|
244 |
+
# model0 = model0.eval()
|
245 |
|
246 |
+
# images = np.asarray(images, dtype=np.float32) / 255.0
|
247 |
+
# images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float() # (3, 960, 640)
|
248 |
+
# images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2) # (6, 3, 320, 320)
|
249 |
|
250 |
+
# input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
|
251 |
+
# render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
|
252 |
|
253 |
+
# images = images.unsqueeze(0).to(device)
|
254 |
+
# images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
|
255 |
|
256 |
+
# mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
|
257 |
+
# print(mesh_fpath)
|
258 |
+
# mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
|
259 |
+
# mesh_dirname = os.path.dirname(mesh_fpath)
|
260 |
+
# video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
|
261 |
+
# mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")
|
262 |
|
263 |
+
# with torch.no_grad():
|
264 |
+
# # get triplane
|
265 |
+
# planes = model0.forward_planes(images, input_cameras)
|
266 |
|
267 |
+
# # # get video
|
268 |
+
# # chunk_size = 20 if IS_FLEXICUBES else 1
|
269 |
+
# # render_size = 384
|
270 |
|
271 |
+
# # frames = []
|
272 |
+
# # for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
|
273 |
+
# # if IS_FLEXICUBES:
|
274 |
+
# # frame = model.forward_geometry(
|
275 |
+
# # planes,
|
276 |
+
# # render_cameras[:, i:i+chunk_size],
|
277 |
+
# # render_size=render_size,
|
278 |
+
# # )['img']
|
279 |
+
# # else:
|
280 |
+
# # frame = model.synthesizer(
|
281 |
+
# # planes,
|
282 |
+
# # cameras=render_cameras[:, i:i+chunk_size],
|
283 |
+
# # render_size=render_size,
|
284 |
+
# # )['images_rgb']
|
285 |
+
# # frames.append(frame)
|
286 |
+
# # frames = torch.cat(frames, dim=1)
|
287 |
+
|
288 |
+
# # images_to_video(
|
289 |
+
# # frames[0],
|
290 |
+
# # video_fpath,
|
291 |
+
# # fps=30,
|
292 |
+
# # )
|
293 |
+
|
294 |
+
# # print(f"Video saved to {video_fpath}")
|
295 |
+
|
296 |
+
# # get mesh
|
297 |
+
# mesh_out = model0.extract_mesh(
|
298 |
+
# planes,
|
299 |
+
# use_texture_map=False,
|
300 |
+
# **infer_config,
|
301 |
+
# )
|
302 |
+
|
303 |
+
# vertices, faces, vertex_colors = mesh_out
|
304 |
+
# vertices = vertices[:, [1, 2, 0]]
|
305 |
|
306 |
+
# save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
|
307 |
+
# save_obj(vertices, faces, vertex_colors, mesh_fpath)
|
308 |
|
309 |
+
# print(f"Mesh saved to {mesh_fpath}")
|
310 |
|
311 |
+
# return mesh_fpath, mesh_glb_fpath
|
312 |
|
313 |
|
314 |
###############################################################################
|
|
|
472 |
|
473 |
css = """
|
474 |
#warning {background-color: #FFCCCB}
|
475 |
+
.tools_button {
|
476 |
+
background: white;
|
477 |
+
border: none !important;
|
478 |
+
box-shadow: none !important;
|
479 |
+
}
|
480 |
+
#tool_box {max-width: 50px}
|
481 |
+
|
482 |
"""
|
483 |
filtered_language_dict = {
|
484 |
'English': 'en-US-JennyNeural',
|
|
|
491 |
}
|
492 |
|
493 |
focus_map = {
|
494 |
+
"D":0,
|
495 |
+
"DA":1,
|
496 |
+
"DAI":2,
|
497 |
+
"DDA":3
|
498 |
}
|
499 |
|
500 |
'''
|
|
|
620 |
global gpt_state
|
621 |
gpt_state=1
|
622 |
# return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
|
623 |
+
return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]
|
624 |
else:
|
625 |
gpt_state=0
|
626 |
# return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
|
627 |
+
return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]
|
628 |
|
629 |
def init_wo_openai_api_key():
|
630 |
global gpt_state
|
631 |
gpt_state=0
|
632 |
# return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*3
|
633 |
+
return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]
|
634 |
|
635 |
def get_click_prompt(chat_input, click_state, click_mode):
|
636 |
inputs = json.loads(chat_input)
|
|
|
670 |
|
671 |
async def chat_input_callback(*args):
|
672 |
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay = args
|
673 |
+
message = chat_input["text"]
|
674 |
if visual_chatgpt is not None:
|
675 |
+
state, _, aux_state, _ = visual_chatgpt.run_text(message, state, aux_state)
|
676 |
last_text, last_response = state[-1]
|
677 |
print("last response",last_response)
|
678 |
+
if autoplay==False:
|
679 |
+
return state, state, aux_state, None
|
680 |
+
|
681 |
else:
|
682 |
+
audio = await texttospeech(last_response,language,autoplay)
|
683 |
+
return state, state, aux_state, audio
|
684 |
else:
|
685 |
response = "Text refiner is not initilzed, please input openai api key."
|
686 |
state = state + [(chat_input, response)]
|
|
|
728 |
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
729 |
print("memory",visual_chatgpt.agent.memory)
|
730 |
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
731 |
+
parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and painting style used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"style\": \"Painting style used in the painting\" }")
|
732 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
733 |
+
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["style"]
|
734 |
# artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
735 |
|
736 |
|
|
|
742 |
]
|
743 |
|
744 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
745 |
+
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist]
|
746 |
|
747 |
|
748 |
|
|
|
780 |
|
781 |
enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
|
782 |
out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
|
783 |
+
# state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
|
784 |
+
|
785 |
state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], prompt["input_label"]), None)]
|
786 |
update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
|
787 |
text = out['generated_captions']['raw_caption']
|
|
|
805 |
|
806 |
print("new crop save",new_crop_save_path)
|
807 |
|
808 |
+
yield state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
|
809 |
|
810 |
|
811 |
|
812 |
+
async def submit_caption(state,length, sentiment, factuality, language,
|
|
|
|
|
813 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
814 |
autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
815 |
print("state",state)
|
|
|
851 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
852 |
# input_points=input_points, input_labels=input_labels)
|
853 |
try:
|
854 |
+
if autoplay==False:
|
855 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
|
856 |
+
|
857 |
audio_output = await texttospeech(read_info, language, autoplay)
|
858 |
print("done")
|
859 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
|
|
866 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
867 |
|
868 |
else:
|
869 |
+
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
870 |
+
print(f"Error during TTS prediction: {str(e)}")
|
871 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
|
872 |
+
|
|
|
873 |
|
|
|
|
|
|
|
|
|
874 |
|
875 |
def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language):
|
876 |
|
|
|
1072 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1073 |
print(f"Error during TTS prediction: {str(e)}")
|
1074 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
1075 |
+
return state, state, image_input,audio_output,crop_save_path
|
1076 |
|
1077 |
|
1078 |
else:
|
|
|
1225 |
# return state,dislike_res
|
1226 |
|
1227 |
|
1228 |
+
# def get_style():
|
1229 |
+
# current_version = version.parse(gr.__version__)
|
1230 |
+
# print(current_version)
|
1231 |
+
# if current_version <= version.parse('3.24.1'):
|
1232 |
+
# style = '''
|
1233 |
+
# #image_sketcher{min-height:500px}
|
1234 |
+
# #image_sketcher [data-testid="image"], #image_sketcher [data-testid="image"] > div{min-height: 500px}
|
1235 |
+
# #image_upload{min-height:500px}
|
1236 |
+
# #image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 500px}
|
1237 |
+
# .custom-language {
|
1238 |
+
# width: 20%;
|
1239 |
+
# }
|
1240 |
+
|
1241 |
+
# .custom-autoplay {
|
1242 |
+
# width: 40%;
|
1243 |
+
# }
|
1244 |
+
|
1245 |
+
# .custom-output {
|
1246 |
+
# width: 30%;
|
1247 |
+
# }
|
1248 |
+
|
1249 |
+
# '''
|
1250 |
+
# elif current_version <= version.parse('3.27'):
|
1251 |
+
# style = '''
|
1252 |
+
# #image_sketcher{min-height:500px}
|
1253 |
+
# #image_upload{min-height:500px}
|
1254 |
+
# .custom-language {
|
1255 |
+
# width: 20%;
|
1256 |
+
# }
|
1257 |
+
|
1258 |
+
# .custom-autoplay {
|
1259 |
+
# width: 40%;
|
1260 |
+
# }
|
1261 |
+
|
1262 |
+
# .custom-output {
|
1263 |
+
# width: 30%;
|
1264 |
+
# }
|
1265 |
+
# .custom-gallery {
|
1266 |
+
# display: flex;
|
1267 |
+
# flex-wrap: wrap;
|
1268 |
+
# justify-content: space-between;
|
1269 |
+
# }
|
1270 |
+
|
1271 |
+
# .custom-gallery img {
|
1272 |
+
# width: 48%;
|
1273 |
+
# margin-bottom: 10px;
|
1274 |
+
# }
|
1275 |
+
# '''
|
1276 |
+
# else:
|
1277 |
+
# style = None
|
1278 |
|
1279 |
+
# return style
|
1280 |
|
1281 |
# def handle_like_dislike(like_data, like_state, dislike_state):
|
1282 |
# if like_data.liked:
|
|
|
1326 |
dislike_res.append(x.value)
|
1327 |
state = state + [(None, f"Disliked Received 👎")]
|
1328 |
return like_res,dislike_res,state
|
1329 |
+
|
1330 |
+
|
1331 |
+
def toggle_icons_and_update_prompt(point_prompt):
|
1332 |
+
new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
|
1333 |
+
new_add_icon = "assets/icons/plus-square-blue.png" if point_prompt == "Positive" else "assets/icons/plus-square.png"
|
1334 |
+
new_minus_icon = "assets/icons/minus-square.png" if point_prompt == "Positive" else "assets/icons/minus-square-blue.png"
|
1335 |
+
print(point_prompt)
|
1336 |
+
print(new_prompt)
|
1337 |
|
1338 |
+
return new_prompt, gr.update(icon=new_add_icon), gr.update(icon=new_minus_icon)
|
1339 |
+
|
1340 |
+
add_icon_path="assets/icons/plus-square-blue.png"
|
1341 |
+
minus_icon_path="assets/icons/minus-square.png"
|
1342 |
+
|
1343 |
+
print("this is a print test")
|
1344 |
|
1345 |
def create_ui():
|
1346 |
title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
|
|
|
1360 |
]
|
1361 |
|
1362 |
with gr.Blocks(
|
1363 |
+
css=css,
|
1364 |
theme=gr.themes.Base()
|
1365 |
) as iface:
|
1366 |
state = gr.State([])
|
|
|
1385 |
dislike_res=gr.State([])
|
1386 |
gr.Markdown(title)
|
1387 |
gr.Markdown(description)
|
1388 |
+
point_prompt = gr.State("Positive")
|
1389 |
# with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
|
1390 |
# with gr.Column(scale=0.5):
|
1391 |
# # gr.Markdown("Left side content")
|
|
|
1408 |
value="English", label="Language", interactive=True, elem_classes="custom-language"
|
1409 |
)
|
1410 |
length = gr.Slider(
|
1411 |
+
minimum=40,
|
1412 |
+
maximum=200,
|
1413 |
+
value=80,
|
1414 |
step=1,
|
1415 |
interactive=True,
|
1416 |
label="Generated Caption Length",
|
|
|
1432 |
# auto_play = gr.Checkbox(label="Check to autoplay audio", value=False,scale=0.4)
|
1433 |
# output_audio = gr.HTML(label="Synthesised Audio",scale=0.6)
|
1434 |
|
1435 |
+
with gr.Row():
|
|
|
1436 |
with gr.Column(scale=6):
|
1437 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1438 |
with gr.Tab("Base(GPT Power)") as base_tab:
|
|
|
1441 |
name_label_base = gr.Button(value="Name: ")
|
1442 |
artist_label_base = gr.Button(value="Artist: ")
|
1443 |
year_label_base = gr.Button(value="Year: ")
|
1444 |
+
material_label_base = gr.Button(value="Style: ")
|
1445 |
|
1446 |
with gr.Tab("Base2") as base_tab2:
|
1447 |
image_input_base_2 = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
|
|
1449 |
name_label_base2 = gr.Button(value="Name: ")
|
1450 |
artist_label_base2 = gr.Button(value="Artist: ")
|
1451 |
year_label_base2 = gr.Button(value="Year: ")
|
1452 |
+
material_label_base2 = gr.Button(value="Style: ")
|
1453 |
|
1454 |
with gr.Tab("Click") as click_tab:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1455 |
with gr.Row():
|
1456 |
+
with gr.Column(scale=10,min_width=450):
|
1457 |
+
image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
1458 |
+
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1459 |
with gr.Row():
|
1460 |
+
name_label = gr.Button(value="Name: ")
|
1461 |
+
artist_label = gr.Button(value="Artist: ")
|
1462 |
+
year_label = gr.Button(value="Year: ")
|
1463 |
+
material_label = gr.Button(value="Style: ")
|
1464 |
+
|
1465 |
+
|
1466 |
+
# example_image_click = gr.Image(type="pil", interactive=False, visible=False)
|
1467 |
+
# the tool column
|
1468 |
+
with gr.Column(scale=1,elem_id="tool_box",min_width=100):
|
1469 |
+
add_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=add_icon_path)
|
1470 |
+
minus_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=minus_icon_path)
|
1471 |
+
clear_button_click = gr.Button(value="Reset", interactive=True,elem_classes="tools_button")
|
1472 |
+
clear_button_image = gr.Button(value="Change Image", interactive=True,elem_classes="tools_button")
|
1473 |
+
focus_d = gr.Button(value="D",interactive=True,elem_classes="function_button")
|
1474 |
+
focus_da = gr.Button(value="DA",interactive=True,elem_classes="function_button")
|
1475 |
+
focus_dai = gr.Button(value="DAI",interactive=True,elem_classes="function_button")
|
1476 |
+
focus_dda = gr.Button(value="DDA",interactive=True,elem_classes="function_button")
|
1477 |
+
recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button")
|
1478 |
+
|
1479 |
+
with gr.Row(visible=False):
|
1480 |
+
with gr.Column():
|
1481 |
with gr.Row():
|
1482 |
+
# point_prompt = gr.Radio(
|
1483 |
+
# choices=["Positive", "Negative"],
|
1484 |
+
# value="Positive",
|
1485 |
+
# label="Point Prompt",
|
1486 |
+
# scale=5,
|
1487 |
+
# interactive=True)
|
1488 |
click_mode = gr.Radio(
|
1489 |
choices=["Continuous", "Single"],
|
1490 |
value="Continuous",
|
1491 |
label="Clicking Mode",
|
1492 |
scale=5,
|
1493 |
interactive=True)
|
1494 |
+
|
1495 |
+
|
1496 |
+
with gr.Tab("Trajectory (beta)", visible=False) as traj_tab:
|
|
|
|
|
|
|
|
|
|
|
1497 |
# sketcher_input = ImageSketcher(type="pil", interactive=True, brush_radius=10,
|
1498 |
# elem_id="image_sketcher")
|
1499 |
+
sketcher_input = gr.ImageEditor(type="pil", interactive=True
|
1500 |
+
)
|
1501 |
with gr.Row():
|
1502 |
name_label_traj = gr.Button(value="Name: ")
|
1503 |
artist_label_traj = gr.Button(value="Artist: ")
|
|
|
1507 |
with gr.Row():
|
1508 |
clear_button_sketcher = gr.Button(value="Clear Sketch", interactive=True)
|
1509 |
submit_button_sketcher = gr.Button(value="Submit", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1510 |
|
1511 |
with gr.Column(visible=False,scale=4) as modules_need_gpt1:
|
1512 |
+
with gr.Row(visible=False):
|
1513 |
sentiment = gr.Radio(
|
1514 |
choices=["Positive", "Natural", "Negative"],
|
1515 |
value="Natural",
|
1516 |
label="Sentiment",
|
1517 |
interactive=True,
|
1518 |
)
|
1519 |
+
|
1520 |
factuality = gr.Radio(
|
1521 |
choices=["Factual", "Imagination"],
|
1522 |
value="Factual",
|
|
|
1537 |
value="No",
|
1538 |
label="Expert",
|
1539 |
interactive=True)
|
1540 |
+
|
1541 |
+
|
1542 |
with gr.Column(visible=True) as modules_not_need_gpt3:
|
1543 |
gr.Examples(
|
1544 |
examples=examples,
|
|
|
1549 |
|
1550 |
|
1551 |
|
1552 |
+
with gr.Column(scale=4):
|
1553 |
with gr.Column(visible=True) as module_key_input:
|
1554 |
openai_api_key = gr.Textbox(
|
1555 |
placeholder="Input openAI API key",
|
|
|
1571 |
|
1572 |
with gr.Column(visible=False) as modules_not_need_gpt2:
|
1573 |
with gr.Blocks():
|
1574 |
+
chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=600,bubble_full_width=False)
|
1575 |
with gr.Column(visible=False) as modules_need_gpt3:
|
1576 |
+
chat_input = gr.MultimodalTextbox(interactive=True, file_types=[".txt"], placeholder="Message EyeSee...", show_label=False)
|
1577 |
with gr.Row():
|
1578 |
+
clear_button_text = gr.Button(value="Clear Chat", interactive=True)
|
1579 |
+
export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
|
1580 |
+
# submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
|
1581 |
# upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
|
1582 |
# downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
|
1583 |
+
|
|
|
|
|
|
|
|
|
|
|
1584 |
|
1585 |
# TTS interface hidden initially
|
1586 |
with gr.Column(visible=False) as tts_interface:
|
|
|
1693 |
# show_share_button=True,
|
1694 |
# show_download_button=True
|
1695 |
)
|
1696 |
+
|
1697 |
+
with gr.Row():
|
1698 |
+
naritive = gr.Radio(
|
1699 |
+
choices=["Third", "Artist","Item"],
|
1700 |
+
value="Third",
|
1701 |
+
label="narritive",
|
1702 |
+
scale=5,
|
1703 |
+
interactive=True)
|
1704 |
+
chat_log_file = gr.File(label="Download Chat Log",scale=5)
|
1705 |
|
1706 |
|
1707 |
|
|
|
1740 |
# outputs=[result, seed],
|
1741 |
# api_name="run",
|
1742 |
# )
|
1743 |
+
recommend_btn.click(
|
1744 |
fn=infer,
|
1745 |
inputs=[new_crop_save_path],
|
1746 |
outputs=[result]
|
|
|
1755 |
# this part is for 3d generate.
|
1756 |
###############################################################################
|
1757 |
|
1758 |
+
# with gr.Row(variant="panel",visible=False) as d3_model:
|
1759 |
+
# with gr.Column():
|
1760 |
+
# with gr.Row():
|
1761 |
+
# input_image = gr.Image(
|
1762 |
+
# label="Input Image",
|
1763 |
+
# image_mode="RGBA",
|
1764 |
+
# sources="upload",
|
1765 |
+
# #width=256,
|
1766 |
+
# #height=256,
|
1767 |
+
# type="pil",
|
1768 |
+
# elem_id="content_image",
|
1769 |
+
# )
|
1770 |
+
# processed_image = gr.Image(
|
1771 |
+
# label="Processed Image",
|
1772 |
+
# image_mode="RGBA",
|
1773 |
+
# #width=256,
|
1774 |
+
# #height=256,
|
1775 |
+
# type="pil",
|
1776 |
+
# interactive=False
|
1777 |
+
# )
|
1778 |
+
# with gr.Row():
|
1779 |
+
# with gr.Group():
|
1780 |
+
# do_remove_background = gr.Checkbox(
|
1781 |
+
# label="Remove Background", value=True
|
1782 |
+
# )
|
1783 |
+
# sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
|
1784 |
|
1785 |
+
# sample_steps = gr.Slider(
|
1786 |
+
# label="Sample Steps",
|
1787 |
+
# minimum=30,
|
1788 |
+
# maximum=75,
|
1789 |
+
# value=75,
|
1790 |
+
# step=5
|
1791 |
+
# )
|
1792 |
|
1793 |
+
# with gr.Row():
|
1794 |
+
# submit = gr.Button("Generate", elem_id="generate", variant="primary")
|
1795 |
|
1796 |
+
# with gr.Row(variant="panel"):
|
1797 |
+
# gr.Examples(
|
1798 |
+
# examples=[
|
1799 |
+
# os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
|
1800 |
+
# ],
|
1801 |
+
# inputs=[input_image],
|
1802 |
+
# label="Examples",
|
1803 |
+
# cache_examples=False,
|
1804 |
+
# examples_per_page=16
|
1805 |
+
# )
|
1806 |
+
|
1807 |
+
# with gr.Column():
|
1808 |
|
1809 |
+
# with gr.Row():
|
1810 |
|
1811 |
+
# with gr.Column():
|
1812 |
+
# mv_show_images = gr.Image(
|
1813 |
+
# label="Generated Multi-views",
|
1814 |
+
# type="pil",
|
1815 |
+
# width=379,
|
1816 |
+
# interactive=False
|
1817 |
+
# )
|
1818 |
|
1819 |
+
# # with gr.Column():
|
1820 |
+
# # output_video = gr.Video(
|
1821 |
+
# # label="video", format="mp4",
|
1822 |
+
# # width=379,
|
1823 |
+
# # autoplay=True,
|
1824 |
+
# # interactive=False
|
1825 |
+
# # )
|
1826 |
|
1827 |
+
# with gr.Row():
|
1828 |
+
# with gr.Tab("OBJ"):
|
1829 |
+
# output_model_obj = gr.Model3D(
|
1830 |
+
# label="Output Model (OBJ Format)",
|
1831 |
+
# interactive=False,
|
1832 |
+
# )
|
1833 |
+
# gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
|
1834 |
+
# with gr.Tab("GLB"):
|
1835 |
+
# output_model_glb = gr.Model3D(
|
1836 |
+
# label="Output Model (GLB Format)",
|
1837 |
+
# interactive=False,
|
1838 |
+
# )
|
1839 |
+
# gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
|
1840 |
|
1841 |
|
1842 |
|
1843 |
|
1844 |
+
# mv_images = gr.State()
|
1845 |
|
1846 |
+
# chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
|
1847 |
|
1848 |
+
# submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
|
1849 |
+
# fn=generate_mvs,
|
1850 |
+
# inputs=[processed_image, sample_steps, sample_seed],
|
1851 |
+
# outputs=[mv_images, mv_show_images]
|
1852 |
|
1853 |
+
# ).success(
|
1854 |
+
# fn=make3d,
|
1855 |
+
# inputs=[mv_images],
|
1856 |
+
# outputs=[output_model_obj, output_model_glb]
|
1857 |
+
# )
|
1858 |
|
1859 |
###############################################################################
|
1860 |
# above part is for 3d generate.
|
|
|
1881 |
|
1882 |
|
1883 |
|
1884 |
+
# clear_button_sketcher.click(
|
1885 |
+
# lambda x: (x),
|
1886 |
+
# [origin_image],
|
1887 |
+
# [sketcher_input],
|
1888 |
+
# queue=False,
|
1889 |
+
# show_progress=False
|
1890 |
+
# )
|
1891 |
|
1892 |
|
1893 |
|
|
|
1895 |
|
1896 |
openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
|
1897 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
1898 |
+
modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
|
1899 |
enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
|
1900 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
1901 |
modules_not_need_gpt,
|
1902 |
+
modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
|
1903 |
# openai_api_key.submit(init_openai_api_key,
|
1904 |
# outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
1905 |
# modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
|
|
|
1911 |
disable_chatGPT_button.click(init_wo_openai_api_key,
|
1912 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
1913 |
modules_not_need_gpt,
|
1914 |
+
modules_not_need_gpt2, tts_interface,module_key_input, module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
|
1915 |
|
1916 |
artist_label_base2.click(
|
1917 |
get_artistinfo,
|
|
|
2008 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2009 |
paragraph,artist])
|
2010 |
|
2011 |
+
# image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
|
2012 |
+
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2013 |
+
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2014 |
+
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2015 |
+
# paragraph,artist])
|
2016 |
|
2017 |
+
# image_input.upload(upload_callback, [image_input, state, visual_chatgpt,openai_api_key],
|
2018 |
+
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2019 |
+
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2020 |
+
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2021 |
+
# paragraph,artist])
|
2022 |
|
2023 |
+
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
|
2024 |
+
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2025 |
+
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2026 |
+
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2027 |
+
# paragraph,artist])
|
2028 |
|
2029 |
# image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
|
2030 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
|
|
2035 |
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
2036 |
[chatbot, state, aux_state,output_audio])
|
2037 |
chat_input.submit(lambda: "", None, chat_input)
|
2038 |
+
# submit_button_text.click(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
2039 |
+
# [chatbot, state, aux_state,output_audio])
|
2040 |
+
# submit_button_text.click(lambda: "", None, chat_input)
|
2041 |
example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
|
2042 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2043 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
|
|
2081 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
2082 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2083 |
],
|
2084 |
+
outputs=[chatbot, state, click_state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground],
|
2085 |
show_progress=False, queue=True
|
2086 |
)
|
2087 |
|
2088 |
|
2089 |
+
focus_d.click(
|
2090 |
submit_caption,
|
2091 |
inputs=[
|
2092 |
+
state,length, sentiment, factuality, language,
|
2093 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path
|
|
|
2094 |
],
|
2095 |
outputs=[
|
2096 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
|
|
2097 |
],
|
2098 |
show_progress=True,
|
2099 |
queue=True
|
2100 |
)
|
2101 |
+
|
2102 |
|
2103 |
+
|
2104 |
+
|
2105 |
+
|
2106 |
+
focus_da.click(
|
2107 |
+
submit_caption,
|
2108 |
+
inputs=[
|
2109 |
+
state,length, sentiment, factuality, language,
|
2110 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,auto_play, paragraph,focus_da,openai_api_key,new_crop_save_path
|
2111 |
+
],
|
2112 |
+
outputs=[
|
2113 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
2114 |
+
],
|
2115 |
+
show_progress=True,
|
2116 |
+
queue=True
|
2117 |
+
)
|
2118 |
+
|
2119 |
+
|
2120 |
+
focus_dai.click(
|
2121 |
+
submit_caption,
|
2122 |
+
inputs=[
|
2123 |
+
state,length, sentiment, factuality, language,
|
2124 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2125 |
+
auto_play, paragraph,focus_dai,openai_api_key,new_crop_save_path
|
2126 |
+
],
|
2127 |
+
outputs=[
|
2128 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
2129 |
+
],
|
2130 |
+
show_progress=True,
|
2131 |
+
queue=True
|
2132 |
)
|
2133 |
|
2134 |
+
|
2135 |
+
focus_dda.click(
|
2136 |
+
submit_caption,
|
2137 |
+
inputs=[
|
2138 |
+
state,length, sentiment, factuality, language,
|
2139 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
2140 |
+
auto_play, paragraph,focus_dda,openai_api_key,new_crop_save_path
|
2141 |
+
],
|
2142 |
+
outputs=[
|
2143 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
2144 |
+
],
|
2145 |
+
show_progress=True,
|
2146 |
+
queue=True
|
2147 |
+
)
|
2148 |
+
|
2149 |
+
add_button.click(
|
2150 |
+
toggle_icons_and_update_prompt,
|
2151 |
+
inputs=[point_prompt],
|
2152 |
+
outputs=[point_prompt,add_button,minus_button],
|
2153 |
+
show_progress=True,
|
2154 |
+
queue=True
|
2155 |
+
|
2156 |
+
)
|
2157 |
+
|
2158 |
+
minus_button.click(
|
2159 |
+
toggle_icons_and_update_prompt,
|
2160 |
+
inputs=[point_prompt],
|
2161 |
+
outputs=[point_prompt,add_button,minus_button],
|
2162 |
+
show_progress=True,
|
2163 |
+
queue=True
|
2164 |
+
|
2165 |
+
)
|
2166 |
+
|
2167 |
+
|
2168 |
+
|
2169 |
+
|
2170 |
+
|
2171 |
+
# submit_button_sketcher.click(
|
2172 |
+
# inference_traject,
|
2173 |
+
# inputs=[
|
2174 |
+
# origin_image,sketcher_input, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
|
2175 |
+
# original_size, input_size, text_refiner,focus_type_sketch,paragraph,openai_api_key,auto_play,Input_sketch
|
2176 |
+
# ],
|
2177 |
+
# outputs=[chatbot, state, sketcher_input,output_audio,new_crop_save_path],
|
2178 |
+
# show_progress=False, queue=True
|
2179 |
+
# )
|
2180 |
+
|
2181 |
export_button.click(
|
2182 |
export_chat_log,
|
2183 |
inputs=[state,paragraph,like_res,dislike_res],
|
|
|
2208 |
iface = create_ui()
|
2209 |
iface.queue(api_open=False, max_size=10)
|
2210 |
# iface.queue(concurrency_count=5, api_open=False, max_size=10)
|
2211 |
+
iface.launch(server_name="0.0.0.0",show_error=True)
|