Spaces:
Sleeping
Sleeping
eliujl
commited on
Commit
·
32944e5
1
Parent(s):
7f1213b
Improved the support for using local model
Browse filesAllow a local model to be downloaded if not existing, otherwise using the local model.
- app.py +45 -14
- requirements.txt +1 -0
app.py
CHANGED
@@ -121,9 +121,7 @@ def setup_docsearch(use_pinecone, pinecone_index_name, embeddings, chroma_collec
|
|
121 |
index_info = index_client.describe_index_stats()
|
122 |
# namespace_name = ''
|
123 |
# if index_info is not None:
|
124 |
-
# print(index_info)
|
125 |
# print(index_info['namespaces'][namespace_name]['vector_count'])
|
126 |
-
# print(index_info['total_vector_count'])
|
127 |
# else:
|
128 |
# print("Index information is not available.")
|
129 |
# n_texts = index_info['namespaces'][namespace_name]['vector_count']
|
@@ -146,18 +144,38 @@ def get_response(query, chat_history, CRqa):
|
|
146 |
result = CRqa({"question": query, "chat_history": chat_history})
|
147 |
return result['answer'], result['source_documents']
|
148 |
|
|
|
149 |
@st.cache_resource()
|
150 |
-
def use_local_llm(r_llm):
|
151 |
from langchain.llms import LlamaCpp
|
152 |
from langchain.callbacks.manager import CallbackManager
|
153 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
155 |
if r_llm == gpt_local_mistral:
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
else:
|
158 |
-
|
|
|
159 |
llm = LlamaCpp(
|
160 |
-
model_path=
|
161 |
temperature=0.0,
|
162 |
n_batch=300,
|
163 |
n_ctx=4000,
|
@@ -196,7 +214,7 @@ def setup_prompt():
|
|
196 |
)
|
197 |
return prompt
|
198 |
|
199 |
-
def setup_em_llm(OPENAI_API_KEY, temperature, r_llm):
|
200 |
if (r_llm == gpt3p5 or r_llm == gpt4) and OPENAI_API_KEY:
|
201 |
# Set up OpenAI embeddings
|
202 |
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
|
@@ -204,11 +222,14 @@ def setup_em_llm(OPENAI_API_KEY, temperature, r_llm):
|
|
204 |
# Set the temperature to be 0 if you do not want it to make up things
|
205 |
llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
|
206 |
openai_api_key=OPENAI_API_KEY)
|
207 |
-
|
208 |
#em_model_name = 'hkunlp/instructor-xl'
|
209 |
em_model_name='sentence-transformers/all-mpnet-base-v2'
|
210 |
embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
|
211 |
-
llm = use_local_llm(r_llm)
|
|
|
|
|
|
|
212 |
return embeddings, llm
|
213 |
|
214 |
|
@@ -236,11 +257,13 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
236 |
reply = ''
|
237 |
source = ''
|
238 |
LLMs = [gpt3p5, gpt4, gpt_local_llama, gpt_local_mistral]
|
|
|
|
|
239 |
# Get user input of whether to use Pinecone or not
|
240 |
col1, col2, col3 = st.columns([1, 1, 1])
|
241 |
# create the radio buttons and text input fields
|
242 |
with col1:
|
243 |
-
r_llm = st.multiselect('LLM:', LLMs, gpt3p5)
|
244 |
if not r_llm:
|
245 |
r_llm = gpt3p5
|
246 |
else:
|
@@ -268,7 +291,6 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
268 |
st.write('Local GPT model (and local embedding model) is selected. Online vector store is selected.')
|
269 |
else:
|
270 |
st.write('Local GPT model (and local embedding model) and local vector store are selected. All info remains local.')
|
271 |
-
embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature, r_llm)
|
272 |
with col3:
|
273 |
if use_pinecone == True:
|
274 |
PINECONE_API_KEY = st.text_input(
|
@@ -282,8 +304,17 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
282 |
chroma_collection_name = st.text_input(
|
283 |
'''Chroma collection name of 3-63 characters:''')
|
284 |
persist_directory = "./vectorstore"
|
285 |
-
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
session_name = pinecone_index_name + chroma_collection_name
|
288 |
if r_ingest.lower() == 'yes':
|
289 |
files = st.file_uploader(
|
@@ -367,7 +398,7 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
367 |
all_chat_history_str = '\n'.join(
|
368 |
[f'{x[0]}: {x[1]}' for x in all_chats])
|
369 |
st.title(':blue[All chat records]')
|
370 |
-
st.text_area('', value=all_chat_history_str, height=250, label_visibility='collapsed')
|
371 |
if __name__ == '__main__':
|
372 |
main(pinecone_index_name, chroma_collection_name, persist_directory,
|
373 |
docsearch_ready, directory_name)
|
|
|
121 |
index_info = index_client.describe_index_stats()
|
122 |
# namespace_name = ''
|
123 |
# if index_info is not None:
|
|
|
124 |
# print(index_info['namespaces'][namespace_name]['vector_count'])
|
|
|
125 |
# else:
|
126 |
# print("Index information is not available.")
|
127 |
# n_texts = index_info['namespaces'][namespace_name]['vector_count']
|
|
|
144 |
result = CRqa({"question": query, "chat_history": chat_history})
|
145 |
return result['answer'], result['source_documents']
|
146 |
|
147 |
+
|
148 |
@st.cache_resource()
|
149 |
+
def use_local_llm(r_llm, local_llm_path):
|
150 |
from langchain.llms import LlamaCpp
|
151 |
from langchain.callbacks.manager import CallbackManager
|
152 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
153 |
+
from huggingface_hub import hf_hub_download
|
154 |
+
model_tuples = [
|
155 |
+
("TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
|
156 |
+
("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
157 |
+
("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
158 |
+
("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
159 |
+
("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
160 |
+
]
|
161 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
162 |
if r_llm == gpt_local_mistral:
|
163 |
+
model_name, model_file, model_type, model_link = model_tuples[0]
|
164 |
+
else:
|
165 |
+
model_name, model_file, model_type, model_link = model_tuples[3]
|
166 |
+
model_path = os.path.join( local_llm_path, model_name, model_file )
|
167 |
+
model_path = os.path.normpath( model_path )
|
168 |
+
if not os.path.exists(model_path):
|
169 |
+
print("model not existing at ", model_path, "\n")
|
170 |
+
model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
|
171 |
+
#cache_dir=local_llm_path,
|
172 |
+
local_dir=local_llm_path, local_dir_use_symlinks=False)
|
173 |
+
print("\n model downloaded at path=",model_path)
|
174 |
else:
|
175 |
+
print("model existing at ", model_path)
|
176 |
+
|
177 |
llm = LlamaCpp(
|
178 |
+
model_path=model_path,
|
179 |
temperature=0.0,
|
180 |
n_batch=300,
|
181 |
n_ctx=4000,
|
|
|
214 |
)
|
215 |
return prompt
|
216 |
|
217 |
+
def setup_em_llm(OPENAI_API_KEY, temperature, r_llm, local_llm_path):
|
218 |
if (r_llm == gpt3p5 or r_llm == gpt4) and OPENAI_API_KEY:
|
219 |
# Set up OpenAI embeddings
|
220 |
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
|
|
|
222 |
# Set the temperature to be 0 if you do not want it to make up things
|
223 |
llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
|
224 |
openai_api_key=OPENAI_API_KEY)
|
225 |
+
elif r_llm == gpt_local_mistral or r_llm == gpt_local_llama:
|
226 |
#em_model_name = 'hkunlp/instructor-xl'
|
227 |
em_model_name='sentence-transformers/all-mpnet-base-v2'
|
228 |
embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
|
229 |
+
llm = use_local_llm(r_llm, local_llm_path)
|
230 |
+
else:
|
231 |
+
embeddings = []
|
232 |
+
llm = []
|
233 |
return embeddings, llm
|
234 |
|
235 |
|
|
|
257 |
reply = ''
|
258 |
source = ''
|
259 |
LLMs = [gpt3p5, gpt4, gpt_local_llama, gpt_local_mistral]
|
260 |
+
local_llm_path = './models/'
|
261 |
+
user_llm_path = ''
|
262 |
# Get user input of whether to use Pinecone or not
|
263 |
col1, col2, col3 = st.columns([1, 1, 1])
|
264 |
# create the radio buttons and text input fields
|
265 |
with col1:
|
266 |
+
r_llm = st.multiselect(label='LLM:', options=LLMs, default=gpt3p5, max_selections=1)
|
267 |
if not r_llm:
|
268 |
r_llm = gpt3p5
|
269 |
else:
|
|
|
291 |
st.write('Local GPT model (and local embedding model) is selected. Online vector store is selected.')
|
292 |
else:
|
293 |
st.write('Local GPT model (and local embedding model) and local vector store are selected. All info remains local.')
|
|
|
294 |
with col3:
|
295 |
if use_pinecone == True:
|
296 |
PINECONE_API_KEY = st.text_input(
|
|
|
304 |
chroma_collection_name = st.text_input(
|
305 |
'''Chroma collection name of 3-63 characters:''')
|
306 |
persist_directory = "./vectorstore"
|
307 |
+
if use_openai == False:
|
308 |
+
user_llm_path = st.text_input(
|
309 |
+
"Path for local model (TO BE DOWNLOADED IF NOT EXISTING), type 'default' to use default path:",
|
310 |
+
placeholder="default")
|
311 |
+
if 'default' in user_llm_path:
|
312 |
+
user_llm_path = local_llm_path
|
313 |
+
|
314 |
+
if ( (pinecone_index_name or chroma_collection_name)
|
315 |
+
and ( (use_openai and OPENAI_API_KEY) or (not use_openai and user_llm_path) ) ):
|
316 |
+
embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature, r_llm, user_llm_path)
|
317 |
+
#if ( pinecone_index_name or chroma_collection_name ) and embeddings and llm:
|
318 |
session_name = pinecone_index_name + chroma_collection_name
|
319 |
if r_ingest.lower() == 'yes':
|
320 |
files = st.file_uploader(
|
|
|
398 |
all_chat_history_str = '\n'.join(
|
399 |
[f'{x[0]}: {x[1]}' for x in all_chats])
|
400 |
st.title(':blue[All chat records]')
|
401 |
+
st.text_area('Chat records in ascending order:', value=all_chat_history_str, height=250, label_visibility='collapsed')
|
402 |
if __name__ == '__main__':
|
403 |
main(pinecone_index_name, chroma_collection_name, persist_directory,
|
404 |
docsearch_ready, directory_name)
|
requirements.txt
CHANGED
@@ -11,4 +11,5 @@ pymupdf
|
|
11 |
tabulate
|
12 |
sentence-transformers
|
13 |
llama-cpp-python
|
|
|
14 |
altair<5
|
|
|
11 |
tabulate
|
12 |
sentence-transformers
|
13 |
llama-cpp-python
|
14 |
+
huggingface-hub
|
15 |
altair<5
|