eliujl commited on
Commit
32944e5
·
1 Parent(s): 7f1213b

Improved the support for using local model

Browse files

Allow a local model to be downloaded if not existing, otherwise using the local model.

Files changed (2) hide show
  1. app.py +45 -14
  2. requirements.txt +1 -0
app.py CHANGED
@@ -121,9 +121,7 @@ def setup_docsearch(use_pinecone, pinecone_index_name, embeddings, chroma_collec
121
  index_info = index_client.describe_index_stats()
122
  # namespace_name = ''
123
  # if index_info is not None:
124
- # print(index_info)
125
  # print(index_info['namespaces'][namespace_name]['vector_count'])
126
- # print(index_info['total_vector_count'])
127
  # else:
128
  # print("Index information is not available.")
129
  # n_texts = index_info['namespaces'][namespace_name]['vector_count']
@@ -146,18 +144,38 @@ def get_response(query, chat_history, CRqa):
146
  result = CRqa({"question": query, "chat_history": chat_history})
147
  return result['answer'], result['source_documents']
148
 
 
149
  @st.cache_resource()
150
- def use_local_llm(r_llm):
151
  from langchain.llms import LlamaCpp
152
  from langchain.callbacks.manager import CallbackManager
153
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
 
 
 
 
 
 
 
154
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
155
  if r_llm == gpt_local_mistral:
156
- gpt_local = 'openhermes-2-mistral-7b.Q8_0.gguf'
 
 
 
 
 
 
 
 
 
 
157
  else:
158
- gpt_local = 'llama-2-13b-chat.Q8_0.gguf'
 
159
  llm = LlamaCpp(
160
- model_path='~//models//'+gpt_local,
161
  temperature=0.0,
162
  n_batch=300,
163
  n_ctx=4000,
@@ -196,7 +214,7 @@ def setup_prompt():
196
  )
197
  return prompt
198
 
199
- def setup_em_llm(OPENAI_API_KEY, temperature, r_llm):
200
  if (r_llm == gpt3p5 or r_llm == gpt4) and OPENAI_API_KEY:
201
  # Set up OpenAI embeddings
202
  embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
@@ -204,11 +222,14 @@ def setup_em_llm(OPENAI_API_KEY, temperature, r_llm):
204
  # Set the temperature to be 0 if you do not want it to make up things
205
  llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
206
  openai_api_key=OPENAI_API_KEY)
207
- else:
208
  #em_model_name = 'hkunlp/instructor-xl'
209
  em_model_name='sentence-transformers/all-mpnet-base-v2'
210
  embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
211
- llm = use_local_llm(r_llm)
 
 
 
212
  return embeddings, llm
213
 
214
 
@@ -236,11 +257,13 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
236
  reply = ''
237
  source = ''
238
  LLMs = [gpt3p5, gpt4, gpt_local_llama, gpt_local_mistral]
 
 
239
  # Get user input of whether to use Pinecone or not
240
  col1, col2, col3 = st.columns([1, 1, 1])
241
  # create the radio buttons and text input fields
242
  with col1:
243
- r_llm = st.multiselect('LLM:', LLMs, gpt3p5)
244
  if not r_llm:
245
  r_llm = gpt3p5
246
  else:
@@ -268,7 +291,6 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
268
  st.write('Local GPT model (and local embedding model) is selected. Online vector store is selected.')
269
  else:
270
  st.write('Local GPT model (and local embedding model) and local vector store are selected. All info remains local.')
271
- embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature, r_llm)
272
  with col3:
273
  if use_pinecone == True:
274
  PINECONE_API_KEY = st.text_input(
@@ -282,8 +304,17 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
282
  chroma_collection_name = st.text_input(
283
  '''Chroma collection name of 3-63 characters:''')
284
  persist_directory = "./vectorstore"
285
-
286
- if pinecone_index_name or chroma_collection_name:
 
 
 
 
 
 
 
 
 
287
  session_name = pinecone_index_name + chroma_collection_name
288
  if r_ingest.lower() == 'yes':
289
  files = st.file_uploader(
@@ -367,7 +398,7 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
367
  all_chat_history_str = '\n'.join(
368
  [f'{x[0]}: {x[1]}' for x in all_chats])
369
  st.title(':blue[All chat records]')
370
- st.text_area('', value=all_chat_history_str, height=250, label_visibility='collapsed')
371
  if __name__ == '__main__':
372
  main(pinecone_index_name, chroma_collection_name, persist_directory,
373
  docsearch_ready, directory_name)
 
121
  index_info = index_client.describe_index_stats()
122
  # namespace_name = ''
123
  # if index_info is not None:
 
124
  # print(index_info['namespaces'][namespace_name]['vector_count'])
 
125
  # else:
126
  # print("Index information is not available.")
127
  # n_texts = index_info['namespaces'][namespace_name]['vector_count']
 
144
  result = CRqa({"question": query, "chat_history": chat_history})
145
  return result['answer'], result['source_documents']
146
 
147
+
148
  @st.cache_resource()
149
+ def use_local_llm(r_llm, local_llm_path):
150
  from langchain.llms import LlamaCpp
151
  from langchain.callbacks.manager import CallbackManager
152
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
153
+ from huggingface_hub import hf_hub_download
154
+ model_tuples = [
155
+ ("TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
156
+ ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
157
+ ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
158
+ ("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
159
+ ("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
160
+ ]
161
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
162
  if r_llm == gpt_local_mistral:
163
+ model_name, model_file, model_type, model_link = model_tuples[0]
164
+ else:
165
+ model_name, model_file, model_type, model_link = model_tuples[3]
166
+ model_path = os.path.join( local_llm_path, model_name, model_file )
167
+ model_path = os.path.normpath( model_path )
168
+ if not os.path.exists(model_path):
169
+ print("model not existing at ", model_path, "\n")
170
+ model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
171
+ #cache_dir=local_llm_path,
172
+ local_dir=local_llm_path, local_dir_use_symlinks=False)
173
+ print("\n model downloaded at path=",model_path)
174
  else:
175
+ print("model existing at ", model_path)
176
+
177
  llm = LlamaCpp(
178
+ model_path=model_path,
179
  temperature=0.0,
180
  n_batch=300,
181
  n_ctx=4000,
 
214
  )
215
  return prompt
216
 
217
+ def setup_em_llm(OPENAI_API_KEY, temperature, r_llm, local_llm_path):
218
  if (r_llm == gpt3p5 or r_llm == gpt4) and OPENAI_API_KEY:
219
  # Set up OpenAI embeddings
220
  embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
 
222
  # Set the temperature to be 0 if you do not want it to make up things
223
  llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
224
  openai_api_key=OPENAI_API_KEY)
225
+ elif r_llm == gpt_local_mistral or r_llm == gpt_local_llama:
226
  #em_model_name = 'hkunlp/instructor-xl'
227
  em_model_name='sentence-transformers/all-mpnet-base-v2'
228
  embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
229
+ llm = use_local_llm(r_llm, local_llm_path)
230
+ else:
231
+ embeddings = []
232
+ llm = []
233
  return embeddings, llm
234
 
235
 
 
257
  reply = ''
258
  source = ''
259
  LLMs = [gpt3p5, gpt4, gpt_local_llama, gpt_local_mistral]
260
+ local_llm_path = './models/'
261
+ user_llm_path = ''
262
  # Get user input of whether to use Pinecone or not
263
  col1, col2, col3 = st.columns([1, 1, 1])
264
  # create the radio buttons and text input fields
265
  with col1:
266
+ r_llm = st.multiselect(label='LLM:', options=LLMs, default=gpt3p5, max_selections=1)
267
  if not r_llm:
268
  r_llm = gpt3p5
269
  else:
 
291
  st.write('Local GPT model (and local embedding model) is selected. Online vector store is selected.')
292
  else:
293
  st.write('Local GPT model (and local embedding model) and local vector store are selected. All info remains local.')
 
294
  with col3:
295
  if use_pinecone == True:
296
  PINECONE_API_KEY = st.text_input(
 
304
  chroma_collection_name = st.text_input(
305
  '''Chroma collection name of 3-63 characters:''')
306
  persist_directory = "./vectorstore"
307
+ if use_openai == False:
308
+ user_llm_path = st.text_input(
309
+ "Path for local model (TO BE DOWNLOADED IF NOT EXISTING), type 'default' to use default path:",
310
+ placeholder="default")
311
+ if 'default' in user_llm_path:
312
+ user_llm_path = local_llm_path
313
+
314
+ if ( (pinecone_index_name or chroma_collection_name)
315
+ and ( (use_openai and OPENAI_API_KEY) or (not use_openai and user_llm_path) ) ):
316
+ embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature, r_llm, user_llm_path)
317
+ #if ( pinecone_index_name or chroma_collection_name ) and embeddings and llm:
318
  session_name = pinecone_index_name + chroma_collection_name
319
  if r_ingest.lower() == 'yes':
320
  files = st.file_uploader(
 
398
  all_chat_history_str = '\n'.join(
399
  [f'{x[0]}: {x[1]}' for x in all_chats])
400
  st.title(':blue[All chat records]')
401
+ st.text_area('Chat records in ascending order:', value=all_chat_history_str, height=250, label_visibility='collapsed')
402
  if __name__ == '__main__':
403
  main(pinecone_index_name, chroma_collection_name, persist_directory,
404
  docsearch_ready, directory_name)
requirements.txt CHANGED
@@ -11,4 +11,5 @@ pymupdf
11
  tabulate
12
  sentence-transformers
13
  llama-cpp-python
 
14
  altair<5
 
11
  tabulate
12
  sentence-transformers
13
  llama-cpp-python
14
+ huggingface-hub
15
  altair<5