s3nh commited on
Commit
4b9815b
1 Parent(s): 64d9e7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +398 -1
app.py CHANGED
@@ -1,3 +1,400 @@
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- gr.Interface.load("models/s3nh/chinese-alpaca-2-7b-GGML").launch()
 
1
+ import os
2
+ import platform
3
+ import random
4
+ import time
5
+ from dataclasses import asdict, dataclass
6
+ from pathlib import Path
7
+
8
  import gradio as gr
9
+ import psutil
10
+ from about_time import about_time
11
+ from ctransformers import AutoModelForCausalLM
12
+ from dl_hf_model import dl_hf_model
13
+ from loguru import logger
14
+
15
+
16
+ URL = "https://huggingface.co/s3nh/chinese-alpaca-2-7b-GGML/blob/main/chinese-alpaca-2-7b.ggmlv3.q5_1.bin" # 4.05G
17
+
18
+ _ = (
19
+ "golay" in platform.node()
20
+ or "okteto" in platform.node()
21
+ or Path("/kaggle").exists()
22
+ # or psutil.cpu_count(logical=False) < 4
23
+ or 1 # run 7b in hf
24
+ )
25
+
26
+ if _:
27
+ url = "https://huggingface.co/s3nh/chinese-alpaca-2-7b-GGML/blob/main/chinese-alpaca-2-7b.ggmlv3.q5_1.bin" # 2.87G
28
+
29
+
30
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
31
+ ### Instruction: {user_prompt}
32
+ ### Response:
33
+ """
34
+
35
+ prompt_template = """System: You are a helpful,
36
+ respectful and honest assistant. Always answer as
37
+ helpfully as possible, while being safe. Your answers
38
+ should not include any harmful, unethical, racist,
39
+ sexist, toxic, dangerous, or illegal content. Please
40
+ ensure that your responses are socially unbiased and
41
+ positive in nature. If a question does not make any
42
+ sense, or is not factually coherent, explain why instead
43
+ of answering something not correct. If you don't know
44
+ the answer to a question, please don't share false
45
+ information.
46
+ User: {prompt}
47
+ Assistant: """
48
+
49
+ prompt_template = """System: You are a helpful assistant.
50
+ User: {prompt}
51
+ Assistant: """
52
+
53
+ prompt_template = """Question: {question}
54
+ Answer: Let's work this out in a step by step way to be sure we have the right answer."""
55
+
56
+ prompt_template = """[INST] <>
57
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
58
+ <>
59
+ What NFL team won the Super Bowl in the year Justin Bieber was born?
60
+ [/INST]"""
61
+
62
+ prompt_template = """[INST] <<SYS>>
63
+ You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
64
+ {question} [/INST]
65
+ """
66
+
67
+ prompt_template = """[INST] <<SYS>>
68
+ You are a helpful assistant.
69
+ <</SYS>>
70
+ {question} [/INST]
71
+ """
72
+
73
+ prompt_template = """### HUMAN:
74
+ {question}
75
+ ### RESPONSE:"""
76
+
77
+
78
+ prompt_template = """<|prompt|>:{question}</s>
79
+ <|answer|>:"""
80
+
81
+
82
+ prompt_template = """SYSTEM:
83
+ USER: {question}
84
+ ASSISTANT: """
85
+
86
+
87
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
88
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
89
+
90
+ logger.debug(f"{stop_string=} not used")
91
+
92
+ _ = psutil.cpu_count(logical=False) - 1
93
+ cpu_count: int = int(_) if _ else 1
94
+ logger.debug(f"{cpu_count=}")
95
+
96
+ LLM = None
97
+
98
+ try:
99
+ model_loc, file_size = dl_hf_model(url)
100
+ except Exception as exc_:
101
+ logger.error(exc_)
102
+ raise SystemExit(1) from exc_
103
+
104
+ LLM = AutoModelForCausalLM.from_pretrained(
105
+ model_loc,
106
+ model_type="llama",
107
+ )
108
+
109
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
110
+
111
+ os.environ["TZ"] = "Asia/Shanghai"
112
+ try:
113
+ time.tzset()
114
+
115
+ logger.warning("Windows, cant run time.tzset()")
116
+ except Exception:
117
+ logger.warning("Windows, cant run time.tzset()")
118
+
119
+
120
+ @dataclass
121
+ class GenerationConfig:
122
+ temperature: float = 0.7
123
+ top_k: int = 50
124
+ top_p: float = 0.9
125
+ repetition_penalty: float = 1.0
126
+ max_new_tokens: int = 512
127
+ seed: int = 42
128
+ reset: bool = False
129
+ stream: bool = True
130
+ # threads: int = cpu_count
131
+ # stop: list[str] = field(default_factory=lambda: [stop_string])
132
+
133
+
134
+ def generate(
135
+ question: str,
136
+ llm=LLM,
137
+ config: GenerationConfig = GenerationConfig(),
138
+ ):
139
+ """Run model inference, will return a Generator if streaming is true."""
140
+
141
+
142
+ prompt = prompt_template.format(question=question)
143
+
144
+ return llm(
145
+ prompt,
146
+ **asdict(config),
147
+ )
148
+
149
+
150
+ logger.debug(f"{asdict(GenerationConfig())=}")
151
+
152
+
153
+ def user(user_message, history):
154
+ history.append([user_message, None])
155
+ return user_message, history
156
+
157
+
158
+ def user1(user_message, history):
159
+ history.append([user_message, None])
160
+ return "", history
161
+
162
+ def bot_(history):
163
+ user_message = history[-1][0]
164
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
165
+ bot_message = user_message + ": " + resp
166
+ history[-1][1] = ""
167
+ for character in bot_message:
168
+ history[-1][1] += character
169
+ time.sleep(0.02)
170
+ yield history
171
+
172
+ history[-1][1] = resp
173
+ yield history
174
+
175
+
176
+ def bot(history):
177
+ user_message = history[-1][0]
178
+ response = []
179
+
180
+ logger.debug(f"{user_message=}")
181
+
182
+ with about_time() as atime:
183
+ flag = 1
184
+ prefix = ""
185
+ then = time.time()
186
+
187
+ logger.debug("about to generate")
188
+
189
+ config = GenerationConfig(reset=True)
190
+ for elm in generate(user_message, config=config):
191
+ if flag == 1:
192
+ logger.debug("in the loop")
193
+ prefix = f"({time.time() - then:.2f}s) "
194
+ flag = 0
195
+ print(prefix, end="", flush=True)
196
+ logger.debug(f"{prefix=}")
197
+ print(elm, end="", flush=True)
198
+
199
+ response.append(elm)
200
+ history[-1][1] = prefix + "".join(response)
201
+ yield history
202
+
203
+ _ = (
204
+ f"(time elapsed: {atime.duration_human}, "
205
+ f"{atime.duration/len(''.join(response)):.2f}s/char)"
206
+ )
207
+
208
+ history[-1][1] = "".join(response) + f"\n{_}"
209
+ yield history
210
+
211
+
212
+ def predict_api(prompt):
213
+ logger.debug(f"{prompt=}")
214
+ try:
215
+ # user_prompt = prompt
216
+ config = GenerationConfig(
217
+ temperature=0.2,
218
+ top_k=10,
219
+ top_p=0.9,
220
+ repetition_penalty=1.0,
221
+ max_new_tokens=512, # adjust as needed
222
+ seed=42,
223
+ reset=True,
224
+ stream=False,
225
+ )
226
+
227
+ response = generate(
228
+ prompt,
229
+ config=config,
230
+ )
231
+
232
+ logger.debug(f"api: {response=}")
233
+ except Exception as exc:
234
+ logger.error(exc)
235
+ response = f"{exc=}"
236
+ return response
237
+
238
+
239
+ css = """
240
+ .importantButton {
241
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
242
+ border: none !important;
243
+ }
244
+ .importantButton:hover {
245
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
246
+ border: none !important;
247
+ }
248
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
249
+ .xsmall {font-size: x-small;}
250
+ """
251
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
252
+ examples_list = [
253
+ ["Send an email requesting that people use language models responsibly."],
254
+ ["Write a shouting match between Julius Caesar and Napoleon"],
255
+ ["Write a theory to explain why cat never existed"],
256
+ ["write a story about a grain of sand as it watches millions of years go by"],
257
+ ["What are 3 popular chess openings?"],
258
+ ["write a conversation between the sun and pluto"],
259
+ ["Did you know that Yann LeCun dropped a rap album last year? We listened to it andhere’s what we thought:"],
260
+ ]
261
+
262
+ logger.info("start block")
263
+
264
+ with gr.Blocks(
265
+ title=f"{Path(model_loc).name}",
266
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
267
+ css=css,
268
+ ) as block:
269
+ # buff_var = gr.State("")
270
+ with gr.Accordion("🎈 Info", open=False):
271
+ # gr.HTML(
272
+ # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
273
+ # )
274
+ gr.Markdown(
275
+ f"""<h5><center>{Path(model_loc).name}</center></h4>
276
+ Most examples are meant for another model.
277
+ You probably should try to test
278
+ some related prompts.""",
279
+ elem_classes="xsmall",
280
+ )
281
+
282
+ # chatbot = gr.Chatbot().style(height=700) # 500
283
+ chatbot = gr.Chatbot(height=500)
284
+
285
+ # buff = gr.Textbox(show_label=False, visible=True)
286
+
287
+ with gr.Row():
288
+ with gr.Column(scale=5):
289
+ msg = gr.Textbox(
290
+ label="Chat Message Box",
291
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
292
+ show_label=False,
293
+ # container=False,
294
+ lines=6,
295
+ max_lines=30,
296
+ show_copy_button=True,
297
+ # ).style(container=False)
298
+ )
299
+ with gr.Column(scale=1, min_width=50):
300
+ with gr.Row():
301
+ submit = gr.Button("Submit", elem_classes="xsmall")
302
+ stop = gr.Button("Stop", visible=True)
303
+ clear = gr.Button("Clear History", visible=True)
304
+ with gr.Row(visible=False):
305
+ with gr.Accordion("Advanced Options:", open=False):
306
+ with gr.Row():
307
+ with gr.Column(scale=2):
308
+ system = gr.Textbox(
309
+ label="System Prompt",
310
+ value=prompt_template,
311
+ show_label=False,
312
+ container=False,
313
+ # ).style(container=False)
314
+ )
315
+ with gr.Column():
316
+ with gr.Row():
317
+ change = gr.Button("Change System Prompt")
318
+ reset = gr.Button("Reset System Prompt")
319
+
320
+ with gr.Accordion("Example Inputs", open=True):
321
+ examples = gr.Examples(
322
+ examples=examples_list,
323
+ inputs=[msg],
324
+ examples_per_page=40,
325
+ )
326
+
327
+ # with gr.Row():
328
+ with gr.Accordion("Disclaimer", open=True):
329
+ _ = Path(model_loc).name
330
+ gr.Markdown(
331
+ "Disclaimer: I AM NOT RESPONSIBLE FOR ANY PROMPT PROVIDED BY USER AND PROMPT RETURNED FROM THE MODEL. THIS APP SHOULD BE USED FOR EDUCATIONAL PURPOSE"
332
+ "WITHOUT ANY OFFENSIVE, AGGRESIVE INTENTS. {_} can produce factually incorrect output, and should not be relied on to produce "
333
+ f"factually accurate information. {_} was trained on various public datasets; while great efforts "
334
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
335
+ "biased, or otherwise offensive outputs.",
336
+ elem_classes=["disclaimer"],
337
+ )
338
+
339
+ msg_submit_event = msg.submit(
340
+ # fn=conversation.user_turn,
341
+ fn=user,
342
+ inputs=[msg, chatbot],
343
+ outputs=[msg, chatbot],
344
+ queue=True,
345
+ show_progress="full",
346
+ # api_name=None,
347
+ ).then(bot, chatbot, chatbot, queue=True)
348
+ submit_click_event = submit.click(
349
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
350
+ fn=user1, # clear msg
351
+ inputs=[msg, chatbot],
352
+ outputs=[msg, chatbot],
353
+ queue=True,
354
+ # queue=False,
355
+ show_progress="full",
356
+ # api_name=None,
357
+ ).then(bot, chatbot, chatbot, queue=True)
358
+ stop.click(
359
+ fn=None,
360
+ inputs=None,
361
+ outputs=None,
362
+ cancels=[msg_submit_event, submit_click_event],
363
+ queue=False,
364
+ )
365
+ clear.click(lambda: None, None, chatbot, queue=False)
366
+
367
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
368
+ input_text = gr.Text()
369
+ api_btn = gr.Button("Go", variant="primary")
370
+ out_text = gr.Text()
371
+
372
+ api_btn.click(
373
+ predict_api,
374
+ input_text,
375
+ out_text,
376
+ api_name="api",
377
+ )
378
+
379
+ # block.load(update_buff, [], buff, every=1)
380
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
381
+
382
+ # concurrency_count=5, max_size=20
383
+ # max_size=36, concurrency_count=14
384
+ # CPU cpu_count=2 16G, model 7G
385
+ # CPU UPGRADE cpu_count=8 32G, model 7G
386
+
387
+ # does not work
388
+ _ = """
389
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
390
+ # concurrency_count = max(_, 1)
391
+ if psutil.cpu_count(logical=False) >= 8:
392
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
393
+ else:
394
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
395
+ # """
396
+
397
+ concurrency_count = 1
398
+ logger.info(f"{concurrency_count=}")
399
 
400
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)