llama-cpp-python

Runtime error

zac commited on Aug 7, 2023

Commit

8833e69

1 Parent(s): f968745

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,17 +6,14 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download #load from huggingfaces
-llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/airoboros-l2-13b-gpt4-m2.0-GGML", filename="airoboros-l2-13b-gpt4-m2.0.ggmlv3.q6_K.bin"), n_ctx=2048) #download model from hf/ n_ctx=2048 for high ccontext length
 history = []
 def generate_text(input_text, history):
     print("history ",history)
     print("input ", input_text)
-    full_conversation = f"{full_conversation[-1]} Q: {input_text} \n A:"
-    print("full convo", full_conversation)
-    output = llm(full_conversation, max_tokens=1024, stop=["Q:", "\n"], echo=True)
     response = output['choices'][0]['text']
     return response

 from huggingface_hub import hf_hub_download #load from huggingfaces
+llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/Vigogne-2-7B-Chat-GGML", filename="vigogne-2-7b-chat.ggmlv3.q4_1.bin"), n_ctx=2048) #download model from hf/ n_ctx=2048 for high ccontext length
 history = []
 def generate_text(input_text, history):
     print("history ",history)
     print("input ", input_text)
+    output = llm(input_text, max_tokens=1024, stop=["Q:", "\n"], echo=True)
     response = output['choices'][0]['text']
     return response