Hilda Cran May s3nh commited on
Commit
d45219f
·
0 Parent(s):

Duplicate from s3nh/s3nh-chinese-alpaca-2-7b-GGML

Browse files

Co-authored-by: s3nh <[email protected]>

Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. app.py +397 -0
  4. requirements.txt +8 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: S3nh Chinese Alpaca 2 7b GGML
3
+ emoji: 📊
4
+ colorFrom: indigo
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.39.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: openrail
11
+ duplicated_from: s3nh/s3nh-chinese-alpaca-2-7b-GGML
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import platform
3
+ import random
4
+ import time
5
+ from dataclasses import asdict, dataclass
6
+ from pathlib import Path
7
+
8
+ import gradio as gr
9
+ import psutil
10
+ from about_time import about_time
11
+ from ctransformers import AutoModelForCausalLM
12
+ from dl_hf_model import dl_hf_model
13
+ from loguru import logger
14
+
15
+
16
+ URL = "https://huggingface.co/s3nh/chinese-alpaca-2-7b-GGML/blob/main/chinese-alpaca-2-7b.ggmlv3.q5_1.bin" # 4.05G
17
+
18
+ _ = (
19
+ "golay" in platform.node()
20
+ or "okteto" in platform.node()
21
+ or Path("/kaggle").exists()
22
+ # or psutil.cpu_count(logical=False) < 4
23
+ or 1 # run 7b in hf
24
+ )
25
+
26
+ if _:
27
+ url = "https://huggingface.co/s3nh/chinese-alpaca-2-7b-GGML/blob/main/chinese-alpaca-2-7b.ggmlv3.q5_1.bin" # 2.87G
28
+
29
+
30
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
31
+ ### Instruction: {user_prompt}
32
+ ### Response:
33
+ """
34
+
35
+ prompt_template = """System: You are a helpful,
36
+ respectful and honest assistant. Always answer as
37
+ helpfully as possible, while being safe. Your answers
38
+ should not include any harmful, unethical, racist,
39
+ sexist, toxic, dangerous, or illegal content. Please
40
+ ensure that your responses are socially unbiased and
41
+ positive in nature. If a question does not make any
42
+ sense, or is not factually coherent, explain why instead
43
+ of answering something not correct. If you don't know
44
+ the answer to a question, please don't share false
45
+ information.
46
+ User: {prompt}
47
+ Assistant: """
48
+
49
+ prompt_template = """System: You are a helpful assistant.
50
+ User: {prompt}
51
+ Assistant: """
52
+
53
+ prompt_template = """Question: {question}
54
+ Answer: Let's work this out in a step by step way to be sure we have the right answer."""
55
+
56
+ prompt_template = """[INST] <>
57
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
58
+ <>
59
+ What NFL team won the Super Bowl in the year Justin Bieber was born?
60
+ [/INST]"""
61
+
62
+ prompt_template = """[INST] <<SYS>>
63
+ You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
64
+ {question} [/INST]
65
+ """
66
+
67
+ prompt_template = """[INST] <<SYS>>
68
+ You are a helpful assistant.
69
+ <</SYS>>
70
+ {question} [/INST]
71
+ """
72
+
73
+ prompt_template = """### HUMAN:
74
+ {question}
75
+ ### RESPONSE:"""
76
+
77
+
78
+ prompt_template = """<|prompt|>:{question}</s>
79
+ <|answer|>:"""
80
+
81
+
82
+ prompt_template = """SYSTEM:
83
+ USER: {question}
84
+ ASSISTANT: """
85
+
86
+
87
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
88
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
89
+
90
+ logger.debug(f"{stop_string=} not used")
91
+
92
+ _ = psutil.cpu_count(logical=False) - 1
93
+ cpu_count: int = int(_) if _ else 1
94
+ logger.debug(f"{cpu_count=}")
95
+
96
+ LLM = None
97
+
98
+ try:
99
+ model_loc, file_size = dl_hf_model(url)
100
+ except Exception as exc_:
101
+ logger.error(exc_)
102
+ raise SystemExit(1) from exc_
103
+
104
+ LLM = AutoModelForCausalLM.from_pretrained(
105
+ model_loc,
106
+ model_type="llama",
107
+ )
108
+
109
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
110
+
111
+ os.environ["TZ"] = "Asia/Shanghai"
112
+ try:
113
+ time.tzset()
114
+
115
+ logger.warning("Windows, cant run time.tzset()")
116
+ except Exception:
117
+ logger.warning("Windows, cant run time.tzset()")
118
+
119
+
120
+ @dataclass
121
+ class GenerationConfig:
122
+ temperature: float = 0.7
123
+ top_k: int = 50
124
+ top_p: float = 0.9
125
+ repetition_penalty: float = 1.0
126
+ max_new_tokens: int = 512
127
+ seed: int = 42
128
+ reset: bool = False
129
+ stream: bool = True
130
+ # threads: int = cpu_count
131
+ # stop: list[str] = field(default_factory=lambda: [stop_string])
132
+
133
+
134
+ def generate(
135
+ question: str,
136
+ llm=LLM,
137
+ config: GenerationConfig = GenerationConfig(),
138
+ ):
139
+ """Run model inference, will return a Generator if streaming is true."""
140
+
141
+
142
+ prompt = prompt_template.format(question=question)
143
+
144
+ return llm(
145
+ prompt,
146
+ **asdict(config),
147
+ )
148
+
149
+
150
+ logger.debug(f"{asdict(GenerationConfig())=}")
151
+
152
+
153
+ def user(user_message, history):
154
+ history.append([user_message, None])
155
+ return user_message, history
156
+
157
+
158
+ def user1(user_message, history):
159
+ history.append([user_message, None])
160
+ return "", history
161
+
162
+ def bot_(history):
163
+ user_message = history[-1][0]
164
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
165
+ bot_message = user_message + ": " + resp
166
+ history[-1][1] = ""
167
+ for character in bot_message:
168
+ history[-1][1] += character
169
+ time.sleep(0.02)
170
+ yield history
171
+
172
+ history[-1][1] = resp
173
+ yield history
174
+
175
+
176
+ def bot(history):
177
+ user_message = history[-1][0]
178
+ response = []
179
+
180
+ logger.debug(f"{user_message=}")
181
+
182
+ with about_time() as atime:
183
+ flag = 1
184
+ prefix = ""
185
+ then = time.time()
186
+
187
+ logger.debug("about to generate")
188
+
189
+ config = GenerationConfig(reset=True)
190
+ for elm in generate(user_message, config=config):
191
+ if flag == 1:
192
+ logger.debug("in the loop")
193
+ prefix = f"({time.time() - then:.2f}s) "
194
+ flag = 0
195
+ print(prefix, end="", flush=True)
196
+ logger.debug(f"{prefix=}")
197
+ print(elm, end="", flush=True)
198
+
199
+ response.append(elm)
200
+ history[-1][1] = prefix + "".join(response)
201
+ yield history
202
+
203
+ _ = (
204
+ f"(time elapsed: {atime.duration_human}, "
205
+ f"{atime.duration/len(''.join(response)):.2f}s/char)"
206
+ )
207
+
208
+ history[-1][1] = "".join(response) + f"\n{_}"
209
+ yield history
210
+
211
+
212
+ def predict_api(prompt):
213
+ logger.debug(f"{prompt=}")
214
+ try:
215
+ # user_prompt = prompt
216
+ config = GenerationConfig(
217
+ temperature=0.2,
218
+ top_k=10,
219
+ top_p=0.9,
220
+ repetition_penalty=1.0,
221
+ max_new_tokens=512, # adjust as needed
222
+ seed=42,
223
+ reset=True,
224
+ stream=False,
225
+ )
226
+
227
+ response = generate(
228
+ prompt,
229
+ config=config,
230
+ )
231
+
232
+ logger.debug(f"api: {response=}")
233
+ except Exception as exc:
234
+ logger.error(exc)
235
+ response = f"{exc=}"
236
+ return response
237
+
238
+
239
+ css = """
240
+ .importantButton {
241
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
242
+ border: none !important;
243
+ }
244
+ .importantButton:hover {
245
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
246
+ border: none !important;
247
+ }
248
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
249
+ .xsmall {font-size: x-small;}
250
+ """
251
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
252
+ examples_list = [
253
+ ["> 你能不能详细介绍一下怎么做披萨? 制作披萨的步骤大致如下:"],
254
+ ["你推荐我买最新款的iPhone吗?"],
255
+ ["你是一个资深导游,你能介绍一下中国的首都吗?"],
256
+ ["你好,我们聊聊音乐吧"],
257
+ ]
258
+
259
+ logger.info("start block")
260
+
261
+ with gr.Blocks(
262
+ title=f"{Path(model_loc).name}",
263
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
264
+ css=css,
265
+ ) as block:
266
+ # buff_var = gr.State("")
267
+ with gr.Accordion("🎈 Info", open=False):
268
+ # gr.HTML(
269
+ # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
270
+ # )
271
+ gr.Markdown(
272
+ f"""<h5><center>{Path(model_loc).name}</center></h4>
273
+ Most examples are meant for another model.
274
+ You probably should try to test
275
+ some related prompts.""",
276
+ elem_classes="xsmall",
277
+ )
278
+
279
+ # chatbot = gr.Chatbot().style(height=700) # 500
280
+ chatbot = gr.Chatbot(height=500)
281
+
282
+ # buff = gr.Textbox(show_label=False, visible=True)
283
+
284
+ with gr.Row():
285
+ with gr.Column(scale=5):
286
+ msg = gr.Textbox(
287
+ label="Chat Message Box",
288
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
289
+ show_label=False,
290
+ # container=False,
291
+ lines=6,
292
+ max_lines=30,
293
+ show_copy_button=True,
294
+ # ).style(container=False)
295
+ )
296
+ with gr.Column(scale=1, min_width=50):
297
+ with gr.Row():
298
+ submit = gr.Button("Submit", elem_classes="xsmall")
299
+ stop = gr.Button("Stop", visible=True)
300
+ clear = gr.Button("Clear History", visible=True)
301
+ with gr.Row(visible=False):
302
+ with gr.Accordion("Advanced Options:", open=False):
303
+ with gr.Row():
304
+ with gr.Column(scale=2):
305
+ system = gr.Textbox(
306
+ label="System Prompt",
307
+ value=prompt_template,
308
+ show_label=False,
309
+ container=False,
310
+ # ).style(container=False)
311
+ )
312
+ with gr.Column():
313
+ with gr.Row():
314
+ change = gr.Button("Change System Prompt")
315
+ reset = gr.Button("Reset System Prompt")
316
+
317
+ with gr.Accordion("Example Inputs", open=True):
318
+ examples = gr.Examples(
319
+ examples=examples_list,
320
+ inputs=[msg],
321
+ examples_per_page=40,
322
+ )
323
+
324
+ # with gr.Row():
325
+ with gr.Accordion("Disclaimer", open=True):
326
+ _ = Path(model_loc).name
327
+ gr.Markdown(
328
+ "Disclaimer: I AM NOT RESPONSIBLE FOR ANY PROMPT PROVIDED BY USER AND PROMPT RETURNED FROM THE MODEL. THIS APP SHOULD BE USED FOR EDUCATIONAL PURPOSE"
329
+ "WITHOUT ANY OFFENSIVE, AGGRESIVE INTENTS. {_} can produce factually incorrect output, and should not be relied on to produce "
330
+ f"factually accurate information. {_} was trained on various public datasets; while great efforts "
331
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
332
+ "biased, or otherwise offensive outputs.",
333
+ elem_classes=["disclaimer"],
334
+ )
335
+
336
+ msg_submit_event = msg.submit(
337
+ # fn=conversation.user_turn,
338
+ fn=user,
339
+ inputs=[msg, chatbot],
340
+ outputs=[msg, chatbot],
341
+ queue=True,
342
+ show_progress="full",
343
+ # api_name=None,
344
+ ).then(bot, chatbot, chatbot, queue=True)
345
+ submit_click_event = submit.click(
346
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
347
+ fn=user1, # clear msg
348
+ inputs=[msg, chatbot],
349
+ outputs=[msg, chatbot],
350
+ queue=True,
351
+ # queue=False,
352
+ show_progress="full",
353
+ # api_name=None,
354
+ ).then(bot, chatbot, chatbot, queue=True)
355
+ stop.click(
356
+ fn=None,
357
+ inputs=None,
358
+ outputs=None,
359
+ cancels=[msg_submit_event, submit_click_event],
360
+ queue=False,
361
+ )
362
+ clear.click(lambda: None, None, chatbot, queue=False)
363
+
364
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
365
+ input_text = gr.Text()
366
+ api_btn = gr.Button("Go", variant="primary")
367
+ out_text = gr.Text()
368
+
369
+ api_btn.click(
370
+ predict_api,
371
+ input_text,
372
+ out_text,
373
+ api_name="api",
374
+ )
375
+
376
+ # block.load(update_buff, [], buff, every=1)
377
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
378
+
379
+ # concurrency_count=5, max_size=20
380
+ # max_size=36, concurrency_count=14
381
+ # CPU cpu_count=2 16G, model 7G
382
+ # CPU UPGRADE cpu_count=8 32G, model 7G
383
+
384
+ # does not work
385
+ _ = """
386
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
387
+ # concurrency_count = max(_, 1)
388
+ if psutil.cpu_count(logical=False) >= 8:
389
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
390
+ else:
391
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
392
+ # """
393
+
394
+ concurrency_count = 1
395
+ logger.info(f"{concurrency_count=}")
396
+
397
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ctransformers # ==0.2.10 0.2.13
2
+ transformers # ==4.30.2
3
+ # huggingface_hub
4
+ gradio
5
+ loguru
6
+ about-time
7
+ psutil
8
+ dl-hf-model