lokijii commited on
Commit
bbf3291
·
verified ·
1 Parent(s): 3c68c17

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -0
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
3
+ import torch
4
+ from PIL import Image
5
+ import re
6
+ import requests
7
+ from io import BytesIO
8
+ import copy
9
+ import secrets
10
+ from pathlib import Path
11
+
12
+ tokenizer = AutoTokenizer.from_pretrained("lokijii/qwen", trust_remote_code=True)
13
+ config = AutoConfig.from_pretrained("lokijii/qwen", trust_remote_code=True, torch_dtype=torch.float16)
14
+ #config.quantization_config["use_exllama"] = True
15
+ config.quantization_config["disable_exllama"] = False
16
+ config.quantization_config["exllama_config"] = {"version":2}
17
+ model = AutoModelForCausalLM.from_config(config, trust_remote_code=True, torch_dtype=torch.float16)
18
+
19
+ BOX_TAG_PATTERN = r"<box>([\s\S]*?)</box>"
20
+ PUNCTUATION = "!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏."
21
+
22
+ def _parse_text(text):
23
+ lines = text.split("\n")
24
+ lines = [line for line in lines if line != ""]
25
+ count = 0
26
+ for i, line in enumerate(lines):
27
+ if "```" in line:
28
+ count += 1
29
+ items = line.split("`")
30
+ if count % 2 == 1:
31
+ lines[i] = f'<pre><code class="language-{items[-1]}">'
32
+ else:
33
+ lines[i] = f"<br></code></pre>"
34
+ else:
35
+ if i > 0:
36
+ if count % 2 == 1:
37
+ line = line.replace("`", r"\`")
38
+ line = line.replace("<", "&lt;")
39
+ line = line.replace(">", "&gt;")
40
+ line = line.replace(" ", "&nbsp;")
41
+ line = line.replace("*", "&ast;")
42
+ line = line.replace("_", "&lowbar;")
43
+ line = line.replace("-", "&#45;")
44
+ line = line.replace(".", "&#46;")
45
+ line = line.replace("!", "&#33;")
46
+ line = line.replace("(", "&#40;")
47
+ line = line.replace(")", "&#41;")
48
+ line = line.replace("$", "&#36;")
49
+ lines[i] = "<br>" + line
50
+ text = "".join(lines)
51
+ return text
52
+
53
+ def predict(_chatbot, task_history):
54
+ chat_query = _chatbot[-1][0]
55
+ query = task_history[-1][0]
56
+ history_cp = copy.deepcopy(task_history)
57
+ full_response = ""
58
+
59
+ history_filter = []
60
+ pic_idx = 1
61
+ pre = ""
62
+ for i, (q, a) in enumerate(history_cp):
63
+ if isinstance(q, (tuple, list)):
64
+ q = f'Picture {pic_idx}: <img>{q[0]}</img>'
65
+ pre += q + '\n'
66
+ pic_idx += 1
67
+ else:
68
+ pre += q
69
+ history_filter.append((pre, a))
70
+ pre = ""
71
+ history, message = history_filter[:-1], history_filter[-1][0]
72
+ response, history = model.chat(tokenizer, message, history=history)
73
+ image = tokenizer.draw_bbox_on_latest_picture(response, history)
74
+ if image is not None:
75
+ temp_dir = secrets.token_hex(20)
76
+ temp_dir = Path("/tmp") / temp_dir
77
+ temp_dir.mkdir(exist_ok=True, parents=True)
78
+ name = f"tmp{secrets.token_hex(5)}.jpg"
79
+ filename = temp_dir / name
80
+ image.save(str(filename))
81
+ _chatbot[-1] = (_parse_text(chat_query), (str(filename),))
82
+ chat_response = response.replace("<ref>", "")
83
+ chat_response = chat_response.replace(r"</ref>", "")
84
+ chat_response = re.sub(BOX_TAG_PATTERN, "", chat_response)
85
+ if chat_response != "":
86
+ _chatbot.append((None, chat_response))
87
+ else:
88
+ _chatbot[-1] = (_parse_text(chat_query), response)
89
+ full_response = _parse_text(response)
90
+ task_history[-1] = (query, full_response)
91
+ return _chatbot
92
+
93
+ def add_text(history, task_history, text):
94
+ task_text = text
95
+ if len(text) >= 2 and text[-1] in PUNCTUATION and text[-2] not in PUNCTUATION:
96
+ task_text = text[:-1]
97
+ history = history + [(_parse_text(text), None)]
98
+ task_history = task_history + [(task_text, None)]
99
+ return history, task_history, ""
100
+
101
+ def add_file(history, task_history, file):
102
+ history = history + [((file.name,), None)]
103
+ task_history = task_history + [((file.name,), None)]
104
+ return history, task_history
105
+
106
+ def reset_user_input():
107
+ return gr.update(value="")
108
+
109
+ def reset_state(task_history):
110
+ task_history.clear()
111
+ return []
112
+
113
+ def regenerate(_chatbot, task_history):
114
+ print("Regenerate clicked")
115
+ print("Before:", task_history, _chatbot)
116
+ if not task_history:
117
+ return _chatbot
118
+ item = task_history[-1]
119
+ if item[1] is None:
120
+ return _chatbot
121
+ task_history[-1] = (item[0], None)
122
+ chatbot_item = _chatbot.pop(-1)
123
+ if chatbot_item[0] is None:
124
+ _chatbot[-1] = (_chatbot[-1][0], None)
125
+ else:
126
+ _chatbot.append((chatbot_item[0], None))
127
+ print("After:", task_history, _chatbot)
128
+ return predict(_chatbot, task_history)
129
+
130
+ css = '''
131
+ .gradio-container{max-width:800px !important}
132
+ '''
133
+
134
+ with gr.Blocks(css=css) as demo:
135
+ gr.Markdown("# Qwen-VL-Chat Bot")
136
+ gr.Markdown("## Qwen-VL: A Multimodal Large Vision Language Model by Alibaba Cloud **Space by [@Artificialguybr](https://twitter.com/artificialguybr). Test the [QwenLLM-14B](https://huggingface.co/spaces/artificialguybr/qwen-14b-chat-demo) here for free!</center>")
137
+ chatbot = gr.Chatbot(label='Qwen-VL-Chat', elem_classes="control-height", height=520)
138
+ query = gr.Textbox(lines=2, label='Input')
139
+ task_history = gr.State([])
140
+
141
+ with gr.Row():
142
+ addfile_btn = gr.UploadButton("📁 Upload", file_types=["image"])
143
+ submit_btn = gr.Button("🚀 Submit")
144
+ regen_btn = gr.Button("🤔️ Regenerate")
145
+ empty_bin = gr.Button("🧹 Clear History")
146
+
147
+ gr.Markdown("### Key Features:\n- **Strong Performance**: Surpasses existing LVLMs on multiple English benchmarks including Zero-shot Captioning and VQA.\n- **Multi-lingual Support**: Supports English, Chinese, and multi-lingual conversation.\n- **High Resolution**: Utilizes 448*448 resolution for fine-grained recognition and understanding.")
148
+ submit_btn.click(add_text, [chatbot, task_history, query], [chatbot, task_history]).then(
149
+ predict, [chatbot, task_history], [chatbot], show_progress=True
150
+ )
151
+ submit_btn.click(reset_user_input, [], [query])
152
+ empty_bin.click(reset_state, [task_history], [chatbot], show_progress=True)
153
+ regen_btn.click(regenerate, [chatbot, task_history], [chatbot], show_progress=True)
154
+ addfile_btn.upload(add_file, [chatbot, task_history, addfile_btn], [chatbot, task_history], show_progress=True)
155
+
156
+ demo.launch()