Update app.py
Browse files
app.py
CHANGED
@@ -34,8 +34,8 @@ data.add_faiss_index("embeddings", custom_index=index)
|
|
34 |
#question = "How can I reverse Diabetes?"
|
35 |
|
36 |
SYS_PROMPT = """You are an assistant for answering questions.
|
37 |
-
You are given the extracted parts of
|
38 |
-
If you
|
39 |
# Provides context of how to answer the question
|
40 |
|
41 |
#llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf
|
@@ -47,7 +47,6 @@ model = Llama(
|
|
47 |
n_gpu_layers = 0,
|
48 |
temperature=0.75,
|
49 |
n_ctx = 4096,
|
50 |
-
max_tokens=500,
|
51 |
top_p=0.95 #,
|
52 |
# eos_tokens=terminators
|
53 |
# callback_manager=callback_manager,
|
@@ -118,9 +117,10 @@ def talk(prompt, history):
|
|
118 |
# the chat template structure should be based on text generation model format
|
119 |
|
120 |
# indicates the end of a sequence
|
121 |
-
stream = model.create_chat_completion(messages = messages, max_tokens=
|
122 |
# print(f"{stream}")
|
123 |
print("check 7")
|
|
|
124 |
print(historylog)
|
125 |
print(stream['choices'][0]['message']['content'])
|
126 |
return(stream['choices'][0]['message']['content'])
|
|
|
34 |
#question = "How can I reverse Diabetes?"
|
35 |
|
36 |
SYS_PROMPT = """You are an assistant for answering questions.
|
37 |
+
You are given the extracted parts of document, a question and history of questions and answers . Provide a conversational answer.
|
38 |
+
If you do not know the answer, just say "I do not know." Do not make up an answer. Don't repeat the SYS_PROMPT."""
|
39 |
# Provides context of how to answer the question
|
40 |
|
41 |
#llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf
|
|
|
47 |
n_gpu_layers = 0,
|
48 |
temperature=0.75,
|
49 |
n_ctx = 4096,
|
|
|
50 |
top_p=0.95 #,
|
51 |
# eos_tokens=terminators
|
52 |
# callback_manager=callback_manager,
|
|
|
117 |
# the chat template structure should be based on text generation model format
|
118 |
|
119 |
# indicates the end of a sequence
|
120 |
+
stream = model.create_chat_completion(messages = messages, max_tokens=600, stop=["</s>"], stream=False)
|
121 |
# print(f"{stream}")
|
122 |
print("check 7")
|
123 |
+
global historylog
|
124 |
print(historylog)
|
125 |
print(stream['choices'][0]['message']['content'])
|
126 |
return(stream['choices'][0]['message']['content'])
|