Spaces:
Runtime error
Runtime error
Roger Condori
commited on
change default model and added limits for demo
Browse files- conversadocs/bones.py +7 -4
conversadocs/bones.py
CHANGED
@@ -100,7 +100,7 @@ class DocChat(param.Parameterized):
|
|
100 |
super(DocChat, self).__init__( **params)
|
101 |
self.loaded_file = ["demo_docs/demo.txt"]
|
102 |
self.db = load_db(self.loaded_file)
|
103 |
-
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.
|
104 |
self.qa = q_a(self.db, "stuff", self.k_value, self.llm)
|
105 |
|
106 |
|
@@ -133,7 +133,7 @@ class DocChat(param.Parameterized):
|
|
133 |
result = self.qa({"question": query, "chat_history": self.chat_history})
|
134 |
except:
|
135 |
print("Error not get response from model, reloaded default llama-2 7B config")
|
136 |
-
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.
|
137 |
self.qa = q_a(self.db, "stuff", k_max, self.llm)
|
138 |
result = self.qa({"question": query, "chat_history": self.chat_history})
|
139 |
|
@@ -145,6 +145,9 @@ class DocChat(param.Parameterized):
|
|
145 |
|
146 |
def summarize(self, chunk_size=2000, chunk_overlap=100):
|
147 |
# load docs
|
|
|
|
|
|
|
148 |
documents = []
|
149 |
for file in self.loaded_file:
|
150 |
ext = "." + file.rsplit(".", 1)[-1]
|
@@ -196,7 +199,7 @@ class DocChat(param.Parameterized):
|
|
196 |
self.k_value = k
|
197 |
return f"Loaded {file_} [GPU INFERENCE]"
|
198 |
except:
|
199 |
-
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.
|
200 |
return "No valid model | Reloaded Reloaded default llama-2 7B config"
|
201 |
else:
|
202 |
try:
|
@@ -222,7 +225,7 @@ class DocChat(param.Parameterized):
|
|
222 |
self.k_value = k
|
223 |
return f"Loaded {file_} [CPU INFERENCE SLOW]"
|
224 |
except:
|
225 |
-
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.
|
226 |
return "No valid model | Reloaded default llama-2 7B config"
|
227 |
|
228 |
def default_falcon_model(self, HF_TOKEN):
|
|
|
100 |
super(DocChat, self).__init__( **params)
|
101 |
self.loaded_file = ["demo_docs/demo.txt"]
|
102 |
self.db = load_db(self.loaded_file)
|
103 |
+
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
|
104 |
self.qa = q_a(self.db, "stuff", self.k_value, self.llm)
|
105 |
|
106 |
|
|
|
133 |
result = self.qa({"question": query, "chat_history": self.chat_history})
|
134 |
except:
|
135 |
print("Error not get response from model, reloaded default llama-2 7B config")
|
136 |
+
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
|
137 |
self.qa = q_a(self.db, "stuff", k_max, self.llm)
|
138 |
result = self.qa({"question": query, "chat_history": self.chat_history})
|
139 |
|
|
|
145 |
|
146 |
def summarize(self, chunk_size=2000, chunk_overlap=100):
|
147 |
# load docs
|
148 |
+
if "SET_LIMIT" == os.getenv("DEMO"):
|
149 |
+
return "Since the space only uses the CPU, the summarization function cannot be used."
|
150 |
+
|
151 |
documents = []
|
152 |
for file in self.loaded_file:
|
153 |
ext = "." + file.rsplit(".", 1)[-1]
|
|
|
199 |
self.k_value = k
|
200 |
return f"Loaded {file_} [GPU INFERENCE]"
|
201 |
except:
|
202 |
+
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
|
203 |
return "No valid model | Reloaded Reloaded default llama-2 7B config"
|
204 |
else:
|
205 |
try:
|
|
|
225 |
self.k_value = k
|
226 |
return f"Loaded {file_} [CPU INFERENCE SLOW]"
|
227 |
except:
|
228 |
+
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
|
229 |
return "No valid model | Reloaded default llama-2 7B config"
|
230 |
|
231 |
def default_falcon_model(self, HF_TOKEN):
|