Spaces:

mohcineelharras
/

llama-index-docs-spaces

Running

App Files Files Community

mohcineelharras commited on Nov 21, 2023

Commit

6131df7

1 Parent(s): f35f223

wokrs

Browse files

Files changed (1) hide show

app.py +46 -38

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import logging
 import sys
 from llama_index.callbacks import CallbackManager, LlamaDebugHandler
 from llama_index.llms import LlamaCPP
-from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
 from llama_index.embeddings import InstructorEmbedding
 from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
 from tqdm.notebook import tqdm
@@ -101,14 +100,33 @@ def load_emb_uploaded_document(filename):
     # You may want to add a check to prevent execution during initialization.
     if 'init' in st.session_state:
         embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
-        service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size_limit=500)
         documents = SimpleDirectoryReader(input_files=[filename]).load_data()
         index = VectorStoreIndex.from_documents(
             documents, service_context=service_context, show_progress=True)
         return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
     return None
 # LLM
 @st.cache_resource
@@ -122,33 +140,30 @@ def load_llm_model():
         model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
         temperature=0.0,
         max_new_tokens=100,
-        context_window=2048,
         generate_kwargs={},
         model_kwargs={"n_gpu_layers": 20},
-        messages_to_prompt=messages_to_prompt,
-        completion_to_prompt=completion_to_prompt,
         verbose=True,
     )
     return llm
-# --------------------------------cache Embedding model-----------------------------------
-@st.cache_resource
-def load_emb_model():
-    if not os.path.exists("data"):
-        st.error("Data directory does not exist. Please upload the data.")
-        os.makedirs("data")
-        return None  #
-    embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
-        #model_name="hkunlp/instructor-base"
-        )
-    service_context = ServiceContext.from_defaults(embed_model=embed_model_inst,
-                                                   llm=llm)
-    documents = SimpleDirectoryReader("data").load_data()
-    print(f"Number of documents: {len(documents)}")
-    index = VectorStoreIndex.from_documents(
-        documents, service_context=service_context, show_progress=True)
-    return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
 # ------------------------------------layout----------------------------------------
@@ -157,6 +172,7 @@ with st.sidebar:
     st.title("🤖 Llama Index 📚")
     if st.button('Clear Memory'):
         del st.session_state["memory"]
     st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
     st.write("🚀 This app allows you to chat with local LLM using api server or loaded in cache")
     st.subheader("💻 System Requirements: ")
@@ -166,20 +182,6 @@ with st.sidebar:
     st.subheader("Developer Information:")
     st.write("This app is developed and maintained by **@mohcineelharras**")
-if 'memory' not in st.session_state:
-    st.session_state.memory = ""
-# LLM Model Loading
-if 'llm_model' not in st.session_state:
-    st.session_state.llm_model = load_llm_model()
-# Embedding Model Loading
-if 'emb_model' not in st.session_state:
-    st.session_state.emb_model = load_emb_model()
-# Use the models from session state
-llm = st.session_state.llm_model
-query_engine = st.session_state.emb_model
 # Define your app's tabs
 tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
@@ -189,7 +191,7 @@ with tab1:
     st.title("💬 LLM only")
     prompt = st.text_input(
         "Ask your question here",
-        placeholder="Who is Mohcine",
     )
     if prompt:
         contextual_prompt = st.session_state.memory + "\n" + prompt
@@ -208,7 +210,7 @@ with tab2:
     st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
     prompt = st.text_input(
         "Ask your question here",
-        placeholder="How does the blockchain work ?",
     )
     if prompt:
         contextual_prompt = st.session_state.memory + "\n" + prompt
@@ -265,6 +267,7 @@ with tab3:
         response = query_engine.query(contextual_prompt)
         text_response = response.response
         st.write("### Answer")
         st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
         with open("short_memory.txt", 'w') as file:
             file.write(st.session_state.memory)
@@ -280,6 +283,11 @@ with tab3:
                 #st.write()
     #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
 st.markdown("""
 <div style="text-align: center; margin-top: 20px;">
     <a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">

 import sys
 from llama_index.callbacks import CallbackManager, LlamaDebugHandler
 from llama_index.llms import LlamaCPP
 from llama_index.embeddings import InstructorEmbedding
 from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
 from tqdm.notebook import tqdm
     # You may want to add a check to prevent execution during initialization.
     if 'init' in st.session_state:
         embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
+        service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size=500)
         documents = SimpleDirectoryReader(input_files=[filename]).load_data()
         index = VectorStoreIndex.from_documents(
             documents, service_context=service_context, show_progress=True)
         return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
     return None
+# --------------------------------cache Embedding model-----------------------------------
+@st.cache_resource
+def load_emb_model():
+    if not os.path.exists("data"):
+        st.error("Data directory does not exist. Please upload the data.")
+        os.makedirs("data")
+        return None  #
+    embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
+        #model_name="hkunlp/instructor-base"
+        )
+    service_context = ServiceContext.from_defaults(embed_model=embed_model_inst,chunk_size=500,
+                                                   llm=llm)
+    documents = SimpleDirectoryReader("data").load_data()
+    print(f"Number of documents: {len(documents)}")
+    index = VectorStoreIndex.from_documents(
+        documents, service_context=service_context, show_progress=True)
+    return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
+# --------------------------------cache Embedding model-----------------------------------
 # LLM
 @st.cache_resource
         model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
         temperature=0.0,
         max_new_tokens=100,
+        context_window=4096,
         generate_kwargs={},
         model_kwargs={"n_gpu_layers": 20},
         verbose=True,
     )
     return llm
+# ------------------------------------session state----------------------------------------
+if 'memory' not in st.session_state:
+    st.session_state.memory = ""
+# LLM Model Loading
+if 'llm_model' not in st.session_state:
+    st.session_state.llm_model = load_llm_model()
+# Use the models from session state
+llm = st.session_state.llm_model
+# Embedding Model Loading
+if 'emb_model' not in st.session_state:
+    st.session_state.emb_model = load_emb_model()
+# Use the models from session state
+query_engine = st.session_state.emb_model
 # ------------------------------------layout----------------------------------------
     st.title("🤖 Llama Index 📚")
     if st.button('Clear Memory'):
         del st.session_state["memory"]
+        st.session_state.memory = ""
     st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
     st.write("🚀 This app allows you to chat with local LLM using api server or loaded in cache")
     st.subheader("💻 System Requirements: ")
     st.subheader("Developer Information:")
     st.write("This app is developed and maintained by **@mohcineelharras**")
 # Define your app's tabs
 tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
     st.title("💬 LLM only")
     prompt = st.text_input(
         "Ask your question here",
+        placeholder="How do miners contribute to the security of the blockchain ?",
     )
     if prompt:
         contextual_prompt = st.session_state.memory + "\n" + prompt
     st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
     prompt = st.text_input(
         "Ask your question here",
+        placeholder="Who is Mohcine ?",
     )
     if prompt:
         contextual_prompt = st.session_state.memory + "\n" + prompt
         response = query_engine.query(contextual_prompt)
         text_response = response.response
         st.write("### Answer")
+        st.markdown(text_response)
         st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
         with open("short_memory.txt", 'w') as file:
             file.write(st.session_state.memory)
                 #st.write()
     #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
+st.subheader('⚠️ Warning: To avoid lags')
+st.markdown("Please consider **delete input prompt** and **clear memory** with the button on sidebar, each time you switch to another tab")
+st.markdown("If you've got a GPU locally, the execution could be a **lot faster** (approximately 5 seconds on my local machine).")
 st.markdown("""
 <div style="text-align: center; margin-top: 20px;">
     <a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">