mohcineelharras
commited on
Commit
β’
6131df7
1
Parent(s):
f35f223
wokrs
Browse files
app.py
CHANGED
@@ -7,7 +7,6 @@ import logging
|
|
7 |
import sys
|
8 |
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
9 |
from llama_index.llms import LlamaCPP
|
10 |
-
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
|
11 |
from llama_index.embeddings import InstructorEmbedding
|
12 |
from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
|
13 |
from tqdm.notebook import tqdm
|
@@ -101,14 +100,33 @@ def load_emb_uploaded_document(filename):
|
|
101 |
# You may want to add a check to prevent execution during initialization.
|
102 |
if 'init' in st.session_state:
|
103 |
embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
|
104 |
-
service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm,
|
105 |
documents = SimpleDirectoryReader(input_files=[filename]).load_data()
|
106 |
index = VectorStoreIndex.from_documents(
|
107 |
documents, service_context=service_context, show_progress=True)
|
108 |
return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
|
109 |
return None
|
110 |
|
|
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
# LLM
|
114 |
@st.cache_resource
|
@@ -122,33 +140,30 @@ def load_llm_model():
|
|
122 |
model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
|
123 |
temperature=0.0,
|
124 |
max_new_tokens=100,
|
125 |
-
context_window=
|
126 |
generate_kwargs={},
|
127 |
model_kwargs={"n_gpu_layers": 20},
|
128 |
-
messages_to_prompt=messages_to_prompt,
|
129 |
-
completion_to_prompt=completion_to_prompt,
|
130 |
verbose=True,
|
131 |
)
|
132 |
return llm
|
133 |
|
134 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
-
@st.cache_resource
|
137 |
-
def load_emb_model():
|
138 |
-
if not os.path.exists("data"):
|
139 |
-
st.error("Data directory does not exist. Please upload the data.")
|
140 |
-
os.makedirs("data")
|
141 |
-
return None #
|
142 |
-
embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
|
143 |
-
#model_name="hkunlp/instructor-base"
|
144 |
-
)
|
145 |
-
service_context = ServiceContext.from_defaults(embed_model=embed_model_inst,
|
146 |
-
llm=llm)
|
147 |
-
documents = SimpleDirectoryReader("data").load_data()
|
148 |
-
print(f"Number of documents: {len(documents)}")
|
149 |
-
index = VectorStoreIndex.from_documents(
|
150 |
-
documents, service_context=service_context, show_progress=True)
|
151 |
-
return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
|
152 |
|
153 |
# ------------------------------------layout----------------------------------------
|
154 |
|
@@ -157,6 +172,7 @@ with st.sidebar:
|
|
157 |
st.title("π€ Llama Index π")
|
158 |
if st.button('Clear Memory'):
|
159 |
del st.session_state["memory"]
|
|
|
160 |
st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
|
161 |
st.write("π This app allows you to chat with local LLM using api server or loaded in cache")
|
162 |
st.subheader("π» System Requirements: ")
|
@@ -166,20 +182,6 @@ with st.sidebar:
|
|
166 |
st.subheader("Developer Information:")
|
167 |
st.write("This app is developed and maintained by **@mohcineelharras**")
|
168 |
|
169 |
-
if 'memory' not in st.session_state:
|
170 |
-
st.session_state.memory = ""
|
171 |
-
# LLM Model Loading
|
172 |
-
if 'llm_model' not in st.session_state:
|
173 |
-
st.session_state.llm_model = load_llm_model()
|
174 |
-
|
175 |
-
# Embedding Model Loading
|
176 |
-
if 'emb_model' not in st.session_state:
|
177 |
-
st.session_state.emb_model = load_emb_model()
|
178 |
-
|
179 |
-
# Use the models from session state
|
180 |
-
llm = st.session_state.llm_model
|
181 |
-
query_engine = st.session_state.emb_model
|
182 |
-
|
183 |
# Define your app's tabs
|
184 |
tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
|
185 |
|
@@ -189,7 +191,7 @@ with tab1:
|
|
189 |
st.title("π¬ LLM only")
|
190 |
prompt = st.text_input(
|
191 |
"Ask your question here",
|
192 |
-
placeholder="
|
193 |
)
|
194 |
if prompt:
|
195 |
contextual_prompt = st.session_state.memory + "\n" + prompt
|
@@ -208,7 +210,7 @@ with tab2:
|
|
208 |
st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
|
209 |
prompt = st.text_input(
|
210 |
"Ask your question here",
|
211 |
-
placeholder="
|
212 |
)
|
213 |
if prompt:
|
214 |
contextual_prompt = st.session_state.memory + "\n" + prompt
|
@@ -265,6 +267,7 @@ with tab3:
|
|
265 |
response = query_engine.query(contextual_prompt)
|
266 |
text_response = response.response
|
267 |
st.write("### Answer")
|
|
|
268 |
st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
|
269 |
with open("short_memory.txt", 'w') as file:
|
270 |
file.write(st.session_state.memory)
|
@@ -280,6 +283,11 @@ with tab3:
|
|
280 |
#st.write()
|
281 |
#print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
|
282 |
|
|
|
|
|
|
|
|
|
|
|
283 |
st.markdown("""
|
284 |
<div style="text-align: center; margin-top: 20px;">
|
285 |
<a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">
|
|
|
7 |
import sys
|
8 |
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
9 |
from llama_index.llms import LlamaCPP
|
|
|
10 |
from llama_index.embeddings import InstructorEmbedding
|
11 |
from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
|
12 |
from tqdm.notebook import tqdm
|
|
|
100 |
# You may want to add a check to prevent execution during initialization.
|
101 |
if 'init' in st.session_state:
|
102 |
embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
|
103 |
+
service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size=500)
|
104 |
documents = SimpleDirectoryReader(input_files=[filename]).load_data()
|
105 |
index = VectorStoreIndex.from_documents(
|
106 |
documents, service_context=service_context, show_progress=True)
|
107 |
return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
|
108 |
return None
|
109 |
|
110 |
+
# --------------------------------cache Embedding model-----------------------------------
|
111 |
|
112 |
+
@st.cache_resource
|
113 |
+
def load_emb_model():
|
114 |
+
if not os.path.exists("data"):
|
115 |
+
st.error("Data directory does not exist. Please upload the data.")
|
116 |
+
os.makedirs("data")
|
117 |
+
return None #
|
118 |
+
embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
|
119 |
+
#model_name="hkunlp/instructor-base"
|
120 |
+
)
|
121 |
+
service_context = ServiceContext.from_defaults(embed_model=embed_model_inst,chunk_size=500,
|
122 |
+
llm=llm)
|
123 |
+
documents = SimpleDirectoryReader("data").load_data()
|
124 |
+
print(f"Number of documents: {len(documents)}")
|
125 |
+
index = VectorStoreIndex.from_documents(
|
126 |
+
documents, service_context=service_context, show_progress=True)
|
127 |
+
return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
|
128 |
+
|
129 |
+
# --------------------------------cache Embedding model-----------------------------------
|
130 |
|
131 |
# LLM
|
132 |
@st.cache_resource
|
|
|
140 |
model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
|
141 |
temperature=0.0,
|
142 |
max_new_tokens=100,
|
143 |
+
context_window=4096,
|
144 |
generate_kwargs={},
|
145 |
model_kwargs={"n_gpu_layers": 20},
|
|
|
|
|
146 |
verbose=True,
|
147 |
)
|
148 |
return llm
|
149 |
|
150 |
+
# ------------------------------------session state----------------------------------------
|
151 |
+
|
152 |
+
if 'memory' not in st.session_state:
|
153 |
+
st.session_state.memory = ""
|
154 |
+
|
155 |
+
# LLM Model Loading
|
156 |
+
if 'llm_model' not in st.session_state:
|
157 |
+
st.session_state.llm_model = load_llm_model()
|
158 |
+
# Use the models from session state
|
159 |
+
llm = st.session_state.llm_model
|
160 |
+
|
161 |
+
# Embedding Model Loading
|
162 |
+
if 'emb_model' not in st.session_state:
|
163 |
+
st.session_state.emb_model = load_emb_model()
|
164 |
+
# Use the models from session state
|
165 |
+
query_engine = st.session_state.emb_model
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
# ------------------------------------layout----------------------------------------
|
169 |
|
|
|
172 |
st.title("π€ Llama Index π")
|
173 |
if st.button('Clear Memory'):
|
174 |
del st.session_state["memory"]
|
175 |
+
st.session_state.memory = ""
|
176 |
st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
|
177 |
st.write("π This app allows you to chat with local LLM using api server or loaded in cache")
|
178 |
st.subheader("π» System Requirements: ")
|
|
|
182 |
st.subheader("Developer Information:")
|
183 |
st.write("This app is developed and maintained by **@mohcineelharras**")
|
184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
# Define your app's tabs
|
186 |
tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
|
187 |
|
|
|
191 |
st.title("π¬ LLM only")
|
192 |
prompt = st.text_input(
|
193 |
"Ask your question here",
|
194 |
+
placeholder="How do miners contribute to the security of the blockchain ?",
|
195 |
)
|
196 |
if prompt:
|
197 |
contextual_prompt = st.session_state.memory + "\n" + prompt
|
|
|
210 |
st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
|
211 |
prompt = st.text_input(
|
212 |
"Ask your question here",
|
213 |
+
placeholder="Who is Mohcine ?",
|
214 |
)
|
215 |
if prompt:
|
216 |
contextual_prompt = st.session_state.memory + "\n" + prompt
|
|
|
267 |
response = query_engine.query(contextual_prompt)
|
268 |
text_response = response.response
|
269 |
st.write("### Answer")
|
270 |
+
st.markdown(text_response)
|
271 |
st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
|
272 |
with open("short_memory.txt", 'w') as file:
|
273 |
file.write(st.session_state.memory)
|
|
|
283 |
#st.write()
|
284 |
#print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
|
285 |
|
286 |
+
st.subheader('β οΈ Warning: To avoid lags')
|
287 |
+
st.markdown("Please consider **delete input prompt** and **clear memory** with the button on sidebar, each time you switch to another tab")
|
288 |
+
st.markdown("If you've got a GPU locally, the execution could be a **lot faster** (approximately 5 seconds on my local machine).")
|
289 |
+
|
290 |
+
|
291 |
st.markdown("""
|
292 |
<div style="text-align: center; margin-top: 20px;">
|
293 |
<a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">
|