eliujl commited on
Commit
6c41a5e
·
1 Parent(s): 32944e5

Updated app.py

Browse files

Put local llm models in a list

Files changed (1) hide show
  1. app.py +13 -23
app.py CHANGED
@@ -24,8 +24,14 @@ PINECONE_API_KEY = ''
24
  PINECONE_API_ENV = ''
25
  gpt3p5 = 'gpt-3.5-turbo-1106'
26
  gpt4 = 'gpt-4-1106-preview'
27
- gpt_local_mistral = 'mistral_7b'
28
- gpt_local_llama = 'llama_13b'
 
 
 
 
 
 
29
  langchain.verbose = False
30
 
31
 
@@ -151,18 +157,9 @@ def use_local_llm(r_llm, local_llm_path):
151
  from langchain.callbacks.manager import CallbackManager
152
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
153
  from huggingface_hub import hf_hub_download
154
- model_tuples = [
155
- ("TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
156
- ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
157
- ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
158
- ("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
159
- ("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
160
- ]
161
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
162
- if r_llm == gpt_local_mistral:
163
- model_name, model_file, model_type, model_link = model_tuples[0]
164
- else:
165
- model_name, model_file, model_type, model_link = model_tuples[3]
166
  model_path = os.path.join( local_llm_path, model_name, model_file )
167
  model_path = os.path.normpath( model_path )
168
  if not os.path.exists(model_path):
@@ -222,14 +219,11 @@ def setup_em_llm(OPENAI_API_KEY, temperature, r_llm, local_llm_path):
222
  # Set the temperature to be 0 if you do not want it to make up things
223
  llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
224
  openai_api_key=OPENAI_API_KEY)
225
- elif r_llm == gpt_local_mistral or r_llm == gpt_local_llama:
226
  #em_model_name = 'hkunlp/instructor-xl'
227
  em_model_name='sentence-transformers/all-mpnet-base-v2'
228
  embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
229
  llm = use_local_llm(r_llm, local_llm_path)
230
- else:
231
- embeddings = []
232
- llm = []
233
  return embeddings, llm
234
 
235
 
@@ -256,18 +250,14 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
256
  latest_chats = []
257
  reply = ''
258
  source = ''
259
- LLMs = [gpt3p5, gpt4, gpt_local_llama, gpt_local_mistral]
260
  local_llm_path = './models/'
261
  user_llm_path = ''
262
  # Get user input of whether to use Pinecone or not
263
  col1, col2, col3 = st.columns([1, 1, 1])
264
  # create the radio buttons and text input fields
265
  with col1:
266
- r_llm = st.multiselect(label='LLM:', options=LLMs, default=gpt3p5, max_selections=1)
267
- if not r_llm:
268
- r_llm = gpt3p5
269
- else:
270
- r_llm = r_llm[0]
271
  if r_llm == gpt3p5 or r_llm == gpt4:
272
  use_openai = True
273
  else:
 
24
  PINECONE_API_ENV = ''
25
  gpt3p5 = 'gpt-3.5-turbo-1106'
26
  gpt4 = 'gpt-4-1106-preview'
27
+ local_model_tuples = [
28
+ (0, 'mistral_7b', "TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
29
+ (1, 'mistral_7b_inst_small', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
30
+ (2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
31
+ (3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
32
+ (4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
33
+ ]
34
+ local_model_names = [t[1] for t in local_model_tuples]
35
  langchain.verbose = False
36
 
37
 
 
157
  from langchain.callbacks.manager import CallbackManager
158
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
159
  from huggingface_hub import hf_hub_download
 
 
 
 
 
 
 
160
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
161
+ entry = local_model_names.index(r_llm)
162
+ model_id, local_model_name, model_name, model_file, model_type, model_link = local_model_tuples[entry]
 
 
163
  model_path = os.path.join( local_llm_path, model_name, model_file )
164
  model_path = os.path.normpath( model_path )
165
  if not os.path.exists(model_path):
 
219
  # Set the temperature to be 0 if you do not want it to make up things
220
  llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
221
  openai_api_key=OPENAI_API_KEY)
222
+ else:
223
  #em_model_name = 'hkunlp/instructor-xl'
224
  em_model_name='sentence-transformers/all-mpnet-base-v2'
225
  embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
226
  llm = use_local_llm(r_llm, local_llm_path)
 
 
 
227
  return embeddings, llm
228
 
229
 
 
250
  latest_chats = []
251
  reply = ''
252
  source = ''
253
+ LLMs = [gpt3p5, gpt4] + local_model_names
254
  local_llm_path = './models/'
255
  user_llm_path = ''
256
  # Get user input of whether to use Pinecone or not
257
  col1, col2, col3 = st.columns([1, 1, 1])
258
  # create the radio buttons and text input fields
259
  with col1:
260
+ r_llm = st.radio(label='LLM:', options=LLMs)
 
 
 
 
261
  if r_llm == gpt3p5 or r_llm == gpt4:
262
  use_openai = True
263
  else: