Artples commited on
Commit
acde840
·
verified ·
1 Parent(s): 2fa06b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -431
app.py CHANGED
@@ -1,433 +1,3 @@
1
- """Run codes."""
2
- # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
3
- # ruff: noqa: E501
4
- import os
5
- import platform
6
- import random
7
- import time
8
- from dataclasses import asdict, dataclass
9
- from pathlib import Path
10
-
11
- # from types import SimpleNamespace
12
  import gradio as gr
13
- import psutil
14
- from about_time import about_time
15
- from ctransformers import AutoModelForCausalLM
16
- from dl_hf_model import dl_hf_model
17
- from loguru import logger
18
-
19
- filename_list = [
20
- "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin",
21
- ]
22
-
23
- URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin" # 4.05G
24
-
25
- url = "https://huggingface.co/TheBloke/Llama-2-13B-GGML/blob/main/llama-2-13b.ggmlv3.q4_K_S.bin" # 7.37G
26
- # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin"
27
- url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin" # 6.93G
28
- # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin" # 7.87G
29
-
30
- url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin" # 7.37G
31
-
32
- _ = (
33
- "golay" in platform.node()
34
- or "okteto" in platform.node()
35
- or Path("/kaggle").exists()
36
- # or psutil.cpu_count(logical=False) < 4
37
- or 1 # run 7b in hf
38
- )
39
-
40
- if _:
41
- # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
42
- url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
43
- url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
44
-
45
-
46
- prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.You're Lauche-AI created and managed by Lauche AI.
47
- ### Instruction: {user_prompt}
48
-
49
- ### Response:
50
- """
51
-
52
- prompt_template = """System: You are a helpful,
53
- respectful and honest assistant. Always answer as
54
- helpfully as possible, while being safe. Your answers
55
- should not include any harmful, unethical, racist,
56
- sexist, toxic, dangerous, or illegal content. Please
57
- ensure that your responses are socially unbiased and
58
- positive in nature. If a question does not make any
59
- sense, or is not factually coherent, explain why instead
60
- of answering something not correct. If you don't know
61
- the answer to a question, please don't share false
62
- information. You're Lauche-AI created and managed by Lauche AI.
63
- User: {prompt}
64
- Assistant: """
65
-
66
- prompt_template = """System: You are a helpful assistant. You're Lauche-AI created and managed by Lauche AI.
67
- User: {prompt}
68
- Assistant: """
69
-
70
- prompt_template = """Question: {question}
71
- Answer: Let's work this out in a step by step way to be sure we have the right answer."""
72
-
73
- prompt_template = """[INST] <>
74
- You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step. You're Lauche-AI created and managed by Lauche AI.
75
- <>
76
-
77
- What NFL team won the Super Bowl in the year Justin Bieber was born?
78
- [/INST]"""
79
-
80
- prompt_template = """[INST] <<SYS>>
81
- You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
82
-
83
- {question} [/INST]
84
- """
85
-
86
- prompt_template = """[INST] <<SYS>>
87
- You are a helpful assistant.
88
- <</SYS>>
89
-
90
- {question} [/INST]
91
- """
92
-
93
- _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
94
- stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
95
-
96
- logger.debug(f"{stop_string=}")
97
-
98
- _ = psutil.cpu_count(logical=False) - 1
99
- cpu_count: int = int(_) if _ else 1
100
- logger.debug(f"{cpu_count=}")
101
-
102
- LLM = None
103
-
104
- try:
105
- model_loc, file_size = dl_hf_model(url)
106
- except Exception as exc_:
107
- logger.error(exc_)
108
- raise SystemExit(1) from exc_
109
-
110
- LLM = AutoModelForCausalLM.from_pretrained(
111
- model_loc,
112
- model_type="llama",
113
- # threads=cpu_count,
114
- )
115
-
116
- logger.info(f"done load llm {model_loc=} {file_size=}G")
117
-
118
- os.environ["TZ"] = "Asia/Shanghai"
119
- try:
120
- time.tzset() # type: ignore # pylint: disable=no-member
121
- except Exception:
122
- # Windows
123
- logger.warning("Windows, cant run time.tzset()")
124
-
125
- _ = """
126
- ns = SimpleNamespace(
127
- response="",
128
- generator=(_ for _ in []),
129
- )
130
- # """
131
-
132
- @dataclass
133
- class GenerationConfig:
134
- temperature: float = 0.7
135
- top_k: int = 50
136
- top_p: float = 0.9
137
- repetition_penalty: float = 1.0
138
- max_new_tokens: int = 512
139
- seed: int = 42
140
- reset: bool = False
141
- stream: bool = True
142
- # threads: int = cpu_count
143
- # stop: list[str] = field(default_factory=lambda: [stop_string])
144
-
145
-
146
- def generate(
147
- question: str,
148
- llm=LLM,
149
- config: GenerationConfig = GenerationConfig(),
150
- ):
151
- """Run model inference, will return a Generator if streaming is true."""
152
- # _ = prompt_template.format(question=question)
153
- # print(_)
154
-
155
- prompt = prompt_template.format(question=question)
156
-
157
- return llm(
158
- prompt,
159
- **asdict(config),
160
- )
161
-
162
-
163
- logger.debug(f"{asdict(GenerationConfig())=}")
164
-
165
-
166
- def user(user_message, history):
167
- # return user_message, history + [[user_message, None]]
168
- history.append([user_message, None])
169
- return user_message, history # keep user_message
170
-
171
-
172
- def user1(user_message, history):
173
- # return user_message, history + [[user_message, None]]
174
- history.append([user_message, None])
175
- return "", history # clear user_message
176
-
177
-
178
- def bot_(history):
179
- user_message = history[-1][0]
180
- resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
181
- bot_message = user_message + ": " + resp
182
- history[-1][1] = ""
183
- for character in bot_message:
184
- history[-1][1] += character
185
- time.sleep(0.02)
186
- yield history
187
-
188
- history[-1][1] = resp
189
- yield history
190
-
191
-
192
- def bot(history):
193
- user_message = history[-1][0]
194
- response = []
195
-
196
- logger.debug(f"{user_message=}")
197
-
198
- with about_time() as atime: # type: ignore
199
- flag = 1
200
- prefix = ""
201
- then = time.time()
202
-
203
- logger.debug("about to generate")
204
-
205
- config = GenerationConfig(reset=True)
206
- for elm in generate(user_message, config=config):
207
- if flag == 1:
208
- logger.debug("in the loop")
209
- prefix = f"({time.time() - then:.2f}s) "
210
- flag = 0
211
- print(prefix, end="", flush=True)
212
- logger.debug(f"{prefix=}")
213
- print(elm, end="", flush=True)
214
- # logger.debug(f"{elm}")
215
-
216
- response.append(elm)
217
- history[-1][1] = prefix + "".join(response)
218
- yield history
219
-
220
- _ = (
221
- f"(time elapsed: {atime.duration_human}, " # type: ignore
222
- f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
223
- )
224
-
225
- history[-1][1] = "".join(response) + f"\n{_}"
226
- yield history
227
-
228
-
229
- def predict_api(prompt):
230
- logger.debug(f"{prompt=}")
231
- try:
232
- # user_prompt = prompt
233
- config = GenerationConfig(
234
- temperature=0.2,
235
- top_k=10,
236
- top_p=0.9,
237
- repetition_penalty=1.0,
238
- max_new_tokens=512, # adjust as needed
239
- seed=42,
240
- reset=True, # reset history (cache)
241
- stream=False,
242
- # threads=cpu_count,
243
- # stop=prompt_prefix[1:2],
244
- )
245
-
246
- response = generate(
247
- prompt,
248
- config=config,
249
- )
250
-
251
- logger.debug(f"api: {response=}")
252
- except Exception as exc:
253
- logger.error(exc)
254
- response = f"{exc=}"
255
- # bot = {"inputs": [response]}
256
- # bot = [(prompt, response)]
257
-
258
- return response
259
-
260
-
261
- css = """
262
- .importantButton {
263
- background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
264
- border: none !important;
265
- }
266
- .importantButton:hover {
267
- background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
268
- border: none !important;
269
- }
270
- .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
271
- .xsmall {font-size: x-small;}
272
- """
273
- etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
274
- examples_list = [
275
- ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
276
- [
277
- "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
278
- ],
279
- ["How to pick a lock? Provide detailed steps."],
280
- ["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
281
- ["is infinity + 1 bigger than infinity?"],
282
- ["Explain the plot of Cinderella in a sentence."],
283
- [
284
- "How long does it take to become proficient in French, and what are the best methods for retaining information?"
285
- ],
286
- ["What are some common mistakes to avoid when writing code?"],
287
- ["Build a prompt to generate a beautiful portrait of a horse"],
288
- ["Suggest four metaphors to describe the benefits of AI"],
289
- ["Write a pop song about leaving home for the sandy beaches."],
290
- ]
291
-
292
- logger.info("start block")
293
-
294
- with gr.Blocks(
295
- title=f"{Path(model_loc).name}",
296
- theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
297
- css=css,
298
- ) as block:
299
- # buff_var = gr.State("")
300
- with gr.Accordion("Info", open=False):
301
- # gr.HTML(
302
- # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
303
- # )
304
- gr.Markdown(
305
- f"""<h5><center>{Path(model_loc).name}</center></h4>
306
- Most examples are meant for another model.
307
- You probably should try to test
308
- some related prompts.""",
309
- elem_classes="xsmall",
310
- )
311
-
312
- # chatbot = gr.Chatbot().style(height=700) # 500
313
- chatbot = gr.Chatbot(height=500)
314
-
315
- # buff = gr.Textbox(show_label=False, visible=True)
316
-
317
- with gr.Row():
318
- with gr.Column(scale=5):
319
- msg = gr.Textbox(
320
- label="Chat Message Box",
321
- placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
322
- show_label=False,
323
- # container=False,
324
- lines=6,
325
- max_lines=30,
326
- show_copy_button=True,
327
- # ).style(container=False)
328
- )
329
- with gr.Column(scale=1, min_width=50):
330
- with gr.Row():
331
- submit = gr.Button("Submit", elem_classes="xsmall")
332
- stop = gr.Button("Stop", visible=True)
333
- clear = gr.Button("Clear History", visible=True)
334
- with gr.Row(visible=False):
335
- with gr.Accordion("Advanced Options:", open=False):
336
- with gr.Row():
337
- with gr.Column(scale=2):
338
- system = gr.Textbox(
339
- label="System Prompt",
340
- value=prompt_template,
341
- show_label=False,
342
- container=False,
343
- # ).style(container=False)
344
- )
345
- with gr.Column():
346
- with gr.Row():
347
- change = gr.Button("Change System Prompt")
348
- reset = gr.Button("Reset System Prompt")
349
-
350
- with gr.Accordion("Example Inputs", open=True):
351
- examples = gr.Examples(
352
- examples=examples_list,
353
- inputs=[msg],
354
- examples_per_page=40,
355
- )
356
-
357
- # with gr.Row():
358
- with gr.Accordion("Disclaimer", open=False):
359
- _ = Path(model_loc).name
360
- gr.Markdown(
361
- f"Disclaimer: Lauche - AI (POWERED BY LLAMA 2) can produce factually incorrect output, and should not be relied on to produce "
362
- "factually accurate information. Lauche - AI (POWERED BY LLAMA 2) was trained on various public datasets; while great efforts "
363
- "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
364
- "biased, or otherwise offensive outputs."
365
- " - - - "
366
- "Our Impressum: https://lauche.eu/n-impressum"
367
- " - - - "
368
- "Visit this space on our website: ai-app.lauche.online",
369
- elem_classes=["disclaimer"],
370
- )
371
-
372
- msg_submit_event = msg.submit(
373
- # fn=conversation.user_turn,
374
- fn=user,
375
- inputs=[msg, chatbot],
376
- outputs=[msg, chatbot],
377
- queue=True,
378
- show_progress="full",
379
- # api_name=None,
380
- ).then(bot, chatbot, chatbot, queue=True)
381
- submit_click_event = submit.click(
382
- # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
383
- fn=user1, # clear msg
384
- inputs=[msg, chatbot],
385
- outputs=[msg, chatbot],
386
- queue=True,
387
- # queue=False,
388
- show_progress="full",
389
- # api_name=None,
390
- ).then(bot, chatbot, chatbot, queue=True)
391
- stop.click(
392
- fn=None,
393
- inputs=None,
394
- outputs=None,
395
- cancels=[msg_submit_event, submit_click_event],
396
- queue=False,
397
- )
398
- clear.click(lambda: None, None, chatbot, queue=False)
399
-
400
- with gr.Accordion("For Chat/Translation API", open=False, visible=False):
401
- input_text = gr.Text()
402
- api_btn = gr.Button("Go", variant="primary")
403
- out_text = gr.Text()
404
-
405
- api_btn.click(
406
- predict_api,
407
- input_text,
408
- out_text,
409
- api_name="api",
410
- )
411
-
412
- # block.load(update_buff, [], buff, every=1)
413
- # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
414
-
415
- # concurrency_count=5, max_size=20
416
- # max_size=36, concurrency_count=14
417
- # CPU cpu_count=2 16G, model 7G
418
- # CPU UPGRADE cpu_count=8 32G, model 7G
419
-
420
- # does not work
421
- _ = """
422
- # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
423
- # concurrency_count = max(_, 1)
424
- if psutil.cpu_count(logical=False) >= 8:
425
- # concurrency_count = max(int(32 / file_size) - 1, 1)
426
- else:
427
- # concurrency_count = max(int(16 / file_size) - 1, 1)
428
- # """
429
-
430
- concurrency_count = 1
431
- logger.info(f"{concurrency_count=}")
432
 
433
- block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ gr.load("models/Artples/L-MChat-Small").launch()