Spaces:
Sleeping
Sleeping
eliujl
commited on
Commit
·
6c41a5e
1
Parent(s):
32944e5
Updated app.py
Browse filesPut local llm models in a list
app.py
CHANGED
@@ -24,8 +24,14 @@ PINECONE_API_KEY = ''
|
|
24 |
PINECONE_API_ENV = ''
|
25 |
gpt3p5 = 'gpt-3.5-turbo-1106'
|
26 |
gpt4 = 'gpt-4-1106-preview'
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
langchain.verbose = False
|
30 |
|
31 |
|
@@ -151,18 +157,9 @@ def use_local_llm(r_llm, local_llm_path):
|
|
151 |
from langchain.callbacks.manager import CallbackManager
|
152 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
153 |
from huggingface_hub import hf_hub_download
|
154 |
-
model_tuples = [
|
155 |
-
("TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
|
156 |
-
("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
157 |
-
("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
158 |
-
("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
159 |
-
("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
160 |
-
]
|
161 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
162 |
-
|
163 |
-
|
164 |
-
else:
|
165 |
-
model_name, model_file, model_type, model_link = model_tuples[3]
|
166 |
model_path = os.path.join( local_llm_path, model_name, model_file )
|
167 |
model_path = os.path.normpath( model_path )
|
168 |
if not os.path.exists(model_path):
|
@@ -222,14 +219,11 @@ def setup_em_llm(OPENAI_API_KEY, temperature, r_llm, local_llm_path):
|
|
222 |
# Set the temperature to be 0 if you do not want it to make up things
|
223 |
llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
|
224 |
openai_api_key=OPENAI_API_KEY)
|
225 |
-
|
226 |
#em_model_name = 'hkunlp/instructor-xl'
|
227 |
em_model_name='sentence-transformers/all-mpnet-base-v2'
|
228 |
embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
|
229 |
llm = use_local_llm(r_llm, local_llm_path)
|
230 |
-
else:
|
231 |
-
embeddings = []
|
232 |
-
llm = []
|
233 |
return embeddings, llm
|
234 |
|
235 |
|
@@ -256,18 +250,14 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
256 |
latest_chats = []
|
257 |
reply = ''
|
258 |
source = ''
|
259 |
-
LLMs = [gpt3p5, gpt4
|
260 |
local_llm_path = './models/'
|
261 |
user_llm_path = ''
|
262 |
# Get user input of whether to use Pinecone or not
|
263 |
col1, col2, col3 = st.columns([1, 1, 1])
|
264 |
# create the radio buttons and text input fields
|
265 |
with col1:
|
266 |
-
r_llm = st.
|
267 |
-
if not r_llm:
|
268 |
-
r_llm = gpt3p5
|
269 |
-
else:
|
270 |
-
r_llm = r_llm[0]
|
271 |
if r_llm == gpt3p5 or r_llm == gpt4:
|
272 |
use_openai = True
|
273 |
else:
|
|
|
24 |
PINECONE_API_ENV = ''
|
25 |
gpt3p5 = 'gpt-3.5-turbo-1106'
|
26 |
gpt4 = 'gpt-4-1106-preview'
|
27 |
+
local_model_tuples = [
|
28 |
+
(0, 'mistral_7b', "TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
|
29 |
+
(1, 'mistral_7b_inst_small', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
30 |
+
(2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
31 |
+
(3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
32 |
+
(4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
33 |
+
]
|
34 |
+
local_model_names = [t[1] for t in local_model_tuples]
|
35 |
langchain.verbose = False
|
36 |
|
37 |
|
|
|
157 |
from langchain.callbacks.manager import CallbackManager
|
158 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
159 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
161 |
+
entry = local_model_names.index(r_llm)
|
162 |
+
model_id, local_model_name, model_name, model_file, model_type, model_link = local_model_tuples[entry]
|
|
|
|
|
163 |
model_path = os.path.join( local_llm_path, model_name, model_file )
|
164 |
model_path = os.path.normpath( model_path )
|
165 |
if not os.path.exists(model_path):
|
|
|
219 |
# Set the temperature to be 0 if you do not want it to make up things
|
220 |
llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
|
221 |
openai_api_key=OPENAI_API_KEY)
|
222 |
+
else:
|
223 |
#em_model_name = 'hkunlp/instructor-xl'
|
224 |
em_model_name='sentence-transformers/all-mpnet-base-v2'
|
225 |
embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
|
226 |
llm = use_local_llm(r_llm, local_llm_path)
|
|
|
|
|
|
|
227 |
return embeddings, llm
|
228 |
|
229 |
|
|
|
250 |
latest_chats = []
|
251 |
reply = ''
|
252 |
source = ''
|
253 |
+
LLMs = [gpt3p5, gpt4] + local_model_names
|
254 |
local_llm_path = './models/'
|
255 |
user_llm_path = ''
|
256 |
# Get user input of whether to use Pinecone or not
|
257 |
col1, col2, col3 = st.columns([1, 1, 1])
|
258 |
# create the radio buttons and text input fields
|
259 |
with col1:
|
260 |
+
r_llm = st.radio(label='LLM:', options=LLMs)
|
|
|
|
|
|
|
|
|
261 |
if r_llm == gpt3p5 or r_llm == gpt4:
|
262 |
use_openai = True
|
263 |
else:
|