mohcineelharras commited on
Commit
6131df7
β€’
1 Parent(s): f35f223
Files changed (1) hide show
  1. app.py +46 -38
app.py CHANGED
@@ -7,7 +7,6 @@ import logging
7
  import sys
8
  from llama_index.callbacks import CallbackManager, LlamaDebugHandler
9
  from llama_index.llms import LlamaCPP
10
- from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
11
  from llama_index.embeddings import InstructorEmbedding
12
  from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
13
  from tqdm.notebook import tqdm
@@ -101,14 +100,33 @@ def load_emb_uploaded_document(filename):
101
  # You may want to add a check to prevent execution during initialization.
102
  if 'init' in st.session_state:
103
  embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
104
- service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size_limit=500)
105
  documents = SimpleDirectoryReader(input_files=[filename]).load_data()
106
  index = VectorStoreIndex.from_documents(
107
  documents, service_context=service_context, show_progress=True)
108
  return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
109
  return None
110
 
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  # LLM
114
  @st.cache_resource
@@ -122,33 +140,30 @@ def load_llm_model():
122
  model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
123
  temperature=0.0,
124
  max_new_tokens=100,
125
- context_window=2048,
126
  generate_kwargs={},
127
  model_kwargs={"n_gpu_layers": 20},
128
- messages_to_prompt=messages_to_prompt,
129
- completion_to_prompt=completion_to_prompt,
130
  verbose=True,
131
  )
132
  return llm
133
 
134
- # --------------------------------cache Embedding model-----------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- @st.cache_resource
137
- def load_emb_model():
138
- if not os.path.exists("data"):
139
- st.error("Data directory does not exist. Please upload the data.")
140
- os.makedirs("data")
141
- return None #
142
- embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
143
- #model_name="hkunlp/instructor-base"
144
- )
145
- service_context = ServiceContext.from_defaults(embed_model=embed_model_inst,
146
- llm=llm)
147
- documents = SimpleDirectoryReader("data").load_data()
148
- print(f"Number of documents: {len(documents)}")
149
- index = VectorStoreIndex.from_documents(
150
- documents, service_context=service_context, show_progress=True)
151
- return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
152
 
153
  # ------------------------------------layout----------------------------------------
154
 
@@ -157,6 +172,7 @@ with st.sidebar:
157
  st.title("πŸ€– Llama Index πŸ“š")
158
  if st.button('Clear Memory'):
159
  del st.session_state["memory"]
 
160
  st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
161
  st.write("πŸš€ This app allows you to chat with local LLM using api server or loaded in cache")
162
  st.subheader("πŸ’» System Requirements: ")
@@ -166,20 +182,6 @@ with st.sidebar:
166
  st.subheader("Developer Information:")
167
  st.write("This app is developed and maintained by **@mohcineelharras**")
168
 
169
- if 'memory' not in st.session_state:
170
- st.session_state.memory = ""
171
- # LLM Model Loading
172
- if 'llm_model' not in st.session_state:
173
- st.session_state.llm_model = load_llm_model()
174
-
175
- # Embedding Model Loading
176
- if 'emb_model' not in st.session_state:
177
- st.session_state.emb_model = load_emb_model()
178
-
179
- # Use the models from session state
180
- llm = st.session_state.llm_model
181
- query_engine = st.session_state.emb_model
182
-
183
  # Define your app's tabs
184
  tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
185
 
@@ -189,7 +191,7 @@ with tab1:
189
  st.title("πŸ’¬ LLM only")
190
  prompt = st.text_input(
191
  "Ask your question here",
192
- placeholder="Who is Mohcine",
193
  )
194
  if prompt:
195
  contextual_prompt = st.session_state.memory + "\n" + prompt
@@ -208,7 +210,7 @@ with tab2:
208
  st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
209
  prompt = st.text_input(
210
  "Ask your question here",
211
- placeholder="How does the blockchain work ?",
212
  )
213
  if prompt:
214
  contextual_prompt = st.session_state.memory + "\n" + prompt
@@ -265,6 +267,7 @@ with tab3:
265
  response = query_engine.query(contextual_prompt)
266
  text_response = response.response
267
  st.write("### Answer")
 
268
  st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
269
  with open("short_memory.txt", 'w') as file:
270
  file.write(st.session_state.memory)
@@ -280,6 +283,11 @@ with tab3:
280
  #st.write()
281
  #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
282
 
 
 
 
 
 
283
  st.markdown("""
284
  <div style="text-align: center; margin-top: 20px;">
285
  <a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">
 
7
  import sys
8
  from llama_index.callbacks import CallbackManager, LlamaDebugHandler
9
  from llama_index.llms import LlamaCPP
 
10
  from llama_index.embeddings import InstructorEmbedding
11
  from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
12
  from tqdm.notebook import tqdm
 
100
  # You may want to add a check to prevent execution during initialization.
101
  if 'init' in st.session_state:
102
  embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
103
+ service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size=500)
104
  documents = SimpleDirectoryReader(input_files=[filename]).load_data()
105
  index = VectorStoreIndex.from_documents(
106
  documents, service_context=service_context, show_progress=True)
107
  return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
108
  return None
109
 
110
+ # --------------------------------cache Embedding model-----------------------------------
111
 
112
+ @st.cache_resource
113
+ def load_emb_model():
114
+ if not os.path.exists("data"):
115
+ st.error("Data directory does not exist. Please upload the data.")
116
+ os.makedirs("data")
117
+ return None #
118
+ embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
119
+ #model_name="hkunlp/instructor-base"
120
+ )
121
+ service_context = ServiceContext.from_defaults(embed_model=embed_model_inst,chunk_size=500,
122
+ llm=llm)
123
+ documents = SimpleDirectoryReader("data").load_data()
124
+ print(f"Number of documents: {len(documents)}")
125
+ index = VectorStoreIndex.from_documents(
126
+ documents, service_context=service_context, show_progress=True)
127
+ return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
128
+
129
+ # --------------------------------cache Embedding model-----------------------------------
130
 
131
  # LLM
132
  @st.cache_resource
 
140
  model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
141
  temperature=0.0,
142
  max_new_tokens=100,
143
+ context_window=4096,
144
  generate_kwargs={},
145
  model_kwargs={"n_gpu_layers": 20},
 
 
146
  verbose=True,
147
  )
148
  return llm
149
 
150
+ # ------------------------------------session state----------------------------------------
151
+
152
+ if 'memory' not in st.session_state:
153
+ st.session_state.memory = ""
154
+
155
+ # LLM Model Loading
156
+ if 'llm_model' not in st.session_state:
157
+ st.session_state.llm_model = load_llm_model()
158
+ # Use the models from session state
159
+ llm = st.session_state.llm_model
160
+
161
+ # Embedding Model Loading
162
+ if 'emb_model' not in st.session_state:
163
+ st.session_state.emb_model = load_emb_model()
164
+ # Use the models from session state
165
+ query_engine = st.session_state.emb_model
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  # ------------------------------------layout----------------------------------------
169
 
 
172
  st.title("πŸ€– Llama Index πŸ“š")
173
  if st.button('Clear Memory'):
174
  del st.session_state["memory"]
175
+ st.session_state.memory = ""
176
  st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
177
  st.write("πŸš€ This app allows you to chat with local LLM using api server or loaded in cache")
178
  st.subheader("πŸ’» System Requirements: ")
 
182
  st.subheader("Developer Information:")
183
  st.write("This app is developed and maintained by **@mohcineelharras**")
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  # Define your app's tabs
186
  tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
187
 
 
191
  st.title("πŸ’¬ LLM only")
192
  prompt = st.text_input(
193
  "Ask your question here",
194
+ placeholder="How do miners contribute to the security of the blockchain ?",
195
  )
196
  if prompt:
197
  contextual_prompt = st.session_state.memory + "\n" + prompt
 
210
  st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
211
  prompt = st.text_input(
212
  "Ask your question here",
213
+ placeholder="Who is Mohcine ?",
214
  )
215
  if prompt:
216
  contextual_prompt = st.session_state.memory + "\n" + prompt
 
267
  response = query_engine.query(contextual_prompt)
268
  text_response = response.response
269
  st.write("### Answer")
270
+ st.markdown(text_response)
271
  st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
272
  with open("short_memory.txt", 'w') as file:
273
  file.write(st.session_state.memory)
 
283
  #st.write()
284
  #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
285
 
286
+ st.subheader('⚠️ Warning: To avoid lags')
287
+ st.markdown("Please consider **delete input prompt** and **clear memory** with the button on sidebar, each time you switch to another tab")
288
+ st.markdown("If you've got a GPU locally, the execution could be a **lot faster** (approximately 5 seconds on my local machine).")
289
+
290
+
291
  st.markdown("""
292
  <div style="text-align: center; margin-top: 20px;">
293
  <a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">