ffreemt commited on
Commit
bc41479
·
1 Parent(s): 1774daa

Update memory=None, prelude

Browse files
Files changed (1) hide show
  1. app.py +24 -12
app.py CHANGED
@@ -166,6 +166,8 @@ if "forindo" in platform.node().lower():
166
  url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
167
  else:
168
  url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin" # 8.14G
 
 
169
  logger.debug(f"{url=}")
170
  try:
171
  model_loc, file_size = dl_hf_model(url)
@@ -209,7 +211,7 @@ memory = ConversationBufferWindowMemory(
209
  conversation = ConversationChain(
210
  llm=LLM,
211
  prompt=prompt,
212
- memory=memory,
213
  verbose=True,
214
  )
215
  logger.debug(f"{conversation.prompt.template=}") # type: ignore
@@ -221,6 +223,7 @@ config.stop = stop
221
  config.threads = cpu_count
222
 
223
  try:
 
224
  LLM_api = CTransformers(
225
  model=model_loc,
226
  model_type="llama",
@@ -282,11 +285,13 @@ def bot(history):
282
  flag = 1
283
  then = time.time()
284
  prefix = "" # to please pyright
 
285
  with about_time() as atime: # type: ignore
286
  while True:
287
  if deq:
288
  if flag:
289
- prefix = f"({time.time() - then:.2f}s) "
 
290
  flag = 0
291
  _ = deq.popleft()
292
  if _ is sig_end:
@@ -299,7 +304,7 @@ def bot(history):
299
  time.sleep(0.01)
300
  _ = (
301
  f"(time elapsed: {atime.duration_human}, " # type: ignore
302
- f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
303
  )
304
 
305
  history[-1][1] = "".join(response) + f"\n{_}"
@@ -343,8 +348,8 @@ css = """
343
  """
344
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
345
  examples_list = [
346
- ["Hello I am mike."],
347
- ["What's my name?"],
348
  ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
349
  [
350
  "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
@@ -388,10 +393,16 @@ examples_list = [
388
 
389
  logger.info("start block")
390
 
 
 
 
 
 
391
  with gr.Blocks(
392
  title=f"{Path(model_loc).name}",
393
  theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
394
  css=css,
 
395
  ) as block:
396
  # buff_var = gr.State("")
397
  with gr.Accordion("🎈 Info", open=False):
@@ -399,13 +410,14 @@ with gr.Blocks(
399
  # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
400
  # )
401
  gr.Markdown(
402
- f"""<h5><center>{Path(model_loc).name}</center></h4>
403
- The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
404
- It typically takes about xxx seconds for the first response to appear.
405
-
406
- Most examples are meant for another model.
407
- You probably should try to test
408
- some related prompts.""",
 
409
  elem_classes="xsmall",
410
  )
411
 
 
166
  url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
167
  else:
168
  url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin" # 8.14G
169
+
170
+ # url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin" # 8.14G
171
  logger.debug(f"{url=}")
172
  try:
173
  model_loc, file_size = dl_hf_model(url)
 
211
  conversation = ConversationChain(
212
  llm=LLM,
213
  prompt=prompt,
214
+ # memory=memory, # default memory=None
215
  verbose=True,
216
  )
217
  logger.debug(f"{conversation.prompt.template=}") # type: ignore
 
223
  config.threads = cpu_count
224
 
225
  try:
226
+ # raise Exception # disable api
227
  LLM_api = CTransformers(
228
  model=model_loc,
229
  model_type="llama",
 
285
  flag = 1
286
  then = time.time()
287
  prefix = "" # to please pyright
288
+ prelude = 0.0
289
  with about_time() as atime: # type: ignore
290
  while True:
291
  if deq:
292
  if flag:
293
+ prelude = time.time() - then
294
+ prefix = f"({prelude:.2f}s) "
295
  flag = 0
296
  _ = deq.popleft()
297
  if _ is sig_end:
 
304
  time.sleep(0.01)
305
  _ = (
306
  f"(time elapsed: {atime.duration_human}, " # type: ignore
307
+ f"{(atime.duration - prelude)/len(''.join(response)):.2f}s/char)" # type: ignore
308
  )
309
 
310
  history[-1][1] = "".join(response) + f"\n{_}"
 
348
  """
349
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
350
  examples_list = [
351
+ # ["Hello I am mike."],
352
+ # ["What's my name?"],
353
  ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
354
  [
355
  "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
 
393
 
394
  logger.info("start block")
395
 
396
+
397
+ port = 7860
398
+ if "forindo" in platform.node():
399
+ port = 7861
400
+
401
  with gr.Blocks(
402
  title=f"{Path(model_loc).name}",
403
  theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
404
  css=css,
405
+ port=port,
406
  ) as block:
407
  # buff_var = gr.State("")
408
  with gr.Accordion("🎈 Info", open=False):
 
410
  # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
411
  # )
412
  gr.Markdown(
413
+ (
414
+ f"""<h5><center>{Path(model_loc).name}</center></h4>"""
415
+ # The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
416
+ # It typically takes about xxx seconds for the first response to appear.
417
+ "Most examples are meant for another model. "
418
+ "You probably should try to test "
419
+ "some related prompts. "
420
+ ),
421
  elem_classes="xsmall",
422
  )
423