ffreemt
commited on
Commit
·
bc41479
1
Parent(s):
1774daa
Update memory=None, prelude
Browse files
app.py
CHANGED
@@ -166,6 +166,8 @@ if "forindo" in platform.node().lower():
|
|
166 |
url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
|
167 |
else:
|
168 |
url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin" # 8.14G
|
|
|
|
|
169 |
logger.debug(f"{url=}")
|
170 |
try:
|
171 |
model_loc, file_size = dl_hf_model(url)
|
@@ -209,7 +211,7 @@ memory = ConversationBufferWindowMemory(
|
|
209 |
conversation = ConversationChain(
|
210 |
llm=LLM,
|
211 |
prompt=prompt,
|
212 |
-
memory=memory,
|
213 |
verbose=True,
|
214 |
)
|
215 |
logger.debug(f"{conversation.prompt.template=}") # type: ignore
|
@@ -221,6 +223,7 @@ config.stop = stop
|
|
221 |
config.threads = cpu_count
|
222 |
|
223 |
try:
|
|
|
224 |
LLM_api = CTransformers(
|
225 |
model=model_loc,
|
226 |
model_type="llama",
|
@@ -282,11 +285,13 @@ def bot(history):
|
|
282 |
flag = 1
|
283 |
then = time.time()
|
284 |
prefix = "" # to please pyright
|
|
|
285 |
with about_time() as atime: # type: ignore
|
286 |
while True:
|
287 |
if deq:
|
288 |
if flag:
|
289 |
-
|
|
|
290 |
flag = 0
|
291 |
_ = deq.popleft()
|
292 |
if _ is sig_end:
|
@@ -299,7 +304,7 @@ def bot(history):
|
|
299 |
time.sleep(0.01)
|
300 |
_ = (
|
301 |
f"(time elapsed: {atime.duration_human}, " # type: ignore
|
302 |
-
f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
|
303 |
)
|
304 |
|
305 |
history[-1][1] = "".join(response) + f"\n{_}"
|
@@ -343,8 +348,8 @@ css = """
|
|
343 |
"""
|
344 |
etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
|
345 |
examples_list = [
|
346 |
-
["Hello I am mike."],
|
347 |
-
["What's my name?"],
|
348 |
["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
|
349 |
[
|
350 |
"What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
|
@@ -388,10 +393,16 @@ examples_list = [
|
|
388 |
|
389 |
logger.info("start block")
|
390 |
|
|
|
|
|
|
|
|
|
|
|
391 |
with gr.Blocks(
|
392 |
title=f"{Path(model_loc).name}",
|
393 |
theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
|
394 |
css=css,
|
|
|
395 |
) as block:
|
396 |
# buff_var = gr.State("")
|
397 |
with gr.Accordion("🎈 Info", open=False):
|
@@ -399,13 +410,14 @@ with gr.Blocks(
|
|
399 |
# """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
|
400 |
# )
|
401 |
gr.Markdown(
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
|
|
409 |
elem_classes="xsmall",
|
410 |
)
|
411 |
|
|
|
166 |
url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
|
167 |
else:
|
168 |
url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin" # 8.14G
|
169 |
+
|
170 |
+
# url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin" # 8.14G
|
171 |
logger.debug(f"{url=}")
|
172 |
try:
|
173 |
model_loc, file_size = dl_hf_model(url)
|
|
|
211 |
conversation = ConversationChain(
|
212 |
llm=LLM,
|
213 |
prompt=prompt,
|
214 |
+
# memory=memory, # default memory=None
|
215 |
verbose=True,
|
216 |
)
|
217 |
logger.debug(f"{conversation.prompt.template=}") # type: ignore
|
|
|
223 |
config.threads = cpu_count
|
224 |
|
225 |
try:
|
226 |
+
# raise Exception # disable api
|
227 |
LLM_api = CTransformers(
|
228 |
model=model_loc,
|
229 |
model_type="llama",
|
|
|
285 |
flag = 1
|
286 |
then = time.time()
|
287 |
prefix = "" # to please pyright
|
288 |
+
prelude = 0.0
|
289 |
with about_time() as atime: # type: ignore
|
290 |
while True:
|
291 |
if deq:
|
292 |
if flag:
|
293 |
+
prelude = time.time() - then
|
294 |
+
prefix = f"({prelude:.2f}s) "
|
295 |
flag = 0
|
296 |
_ = deq.popleft()
|
297 |
if _ is sig_end:
|
|
|
304 |
time.sleep(0.01)
|
305 |
_ = (
|
306 |
f"(time elapsed: {atime.duration_human}, " # type: ignore
|
307 |
+
f"{(atime.duration - prelude)/len(''.join(response)):.2f}s/char)" # type: ignore
|
308 |
)
|
309 |
|
310 |
history[-1][1] = "".join(response) + f"\n{_}"
|
|
|
348 |
"""
|
349 |
etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
|
350 |
examples_list = [
|
351 |
+
# ["Hello I am mike."],
|
352 |
+
# ["What's my name?"],
|
353 |
["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
|
354 |
[
|
355 |
"What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
|
|
|
393 |
|
394 |
logger.info("start block")
|
395 |
|
396 |
+
|
397 |
+
port = 7860
|
398 |
+
if "forindo" in platform.node():
|
399 |
+
port = 7861
|
400 |
+
|
401 |
with gr.Blocks(
|
402 |
title=f"{Path(model_loc).name}",
|
403 |
theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
|
404 |
css=css,
|
405 |
+
port=port,
|
406 |
) as block:
|
407 |
# buff_var = gr.State("")
|
408 |
with gr.Accordion("🎈 Info", open=False):
|
|
|
410 |
# """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
|
411 |
# )
|
412 |
gr.Markdown(
|
413 |
+
(
|
414 |
+
f"""<h5><center>{Path(model_loc).name}</center></h4>"""
|
415 |
+
# The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
|
416 |
+
# It typically takes about xxx seconds for the first response to appear.
|
417 |
+
"Most examples are meant for another model. "
|
418 |
+
"You probably should try to test "
|
419 |
+
"some related prompts. "
|
420 |
+
),
|
421 |
elem_classes="xsmall",
|
422 |
)
|
423 |
|