Spaces:

mohcineelharras
/

llama-index-docs-spaces

Sleeping

App Files Files Community

mohcineelharras commited on Nov 22, 2023

Commit

8df831b

1 Parent(s): c89b497

Upload app.py

Browse files

Files changed (1) hide show

app.py +64 -60

app.py CHANGED Viewed

@@ -80,6 +80,7 @@ template = (
     "If a question is asked about content not in the documents or context, respond with 'I do not have that information.' "
     "Always respond in the same language as the question was asked. Be concise.\n"
     "Respond to the best of your ability. Try to respond in markdown.\"\n"
     "context\n"
     "{context}\n"
     "user\n"
@@ -90,30 +91,32 @@ template = (
 # --------------------------------cache LLM-----------------------------------
-logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
-llama_debug = LlamaDebugHandler(print_trace_on_end=True)
-callback_manager = CallbackManager([llama_debug])
 @st.cache_resource
-#One doc embedding
-def load_emb_uploaded_document(filename):
-    # You may want to add a check to prevent execution during initialization.
-    if 'init' in st.session_state:
-        embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
-        service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size=500)
-        documents = SimpleDirectoryReader(input_files=[filename]).load_data()
-        index = VectorStoreIndex.from_documents(
-            documents, service_context=service_context, show_progress=True)
-        return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
-    return None
 # --------------------------------cache Embedding model-----------------------------------
 @st.cache_resource
 def load_emb_model():
     if not os.path.exists("data"):
-        st.error("Data directory does not exist. Please upload the data.")
         os.makedirs("data")
         return None  #
     embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
@@ -126,44 +129,44 @@ def load_emb_model():
     index = VectorStoreIndex.from_documents(
         documents, service_context=service_context, show_progress=True)
     return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
-# --------------------------------cache Embedding model-----------------------------------
-# LLM
 @st.cache_resource
-def load_llm_model():
-    if not os.path.exists("models"):
-        st.error("models directory does not exist. Please download and copy paste a model in folder models.")
-        os.makedirs("models")
-        return None  #
-    llm = LlamaCPP(
-        #model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf",
-        model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
-        temperature=0.0,
-        max_new_tokens=100,
-        context_window=4096,
-        generate_kwargs={},
-        model_kwargs={"n_gpu_layers": 20},
-        verbose=True,
-    )
-    return llm
 # ------------------------------------session state----------------------------------------
 if 'memory' not in st.session_state:
     st.session_state.memory = ""
-# LLM Model Loading
-if 'llm_model' not in st.session_state:
-    st.session_state.llm_model = load_llm_model()
-# Use the models from session state
-llm = st.session_state.llm_model
-# Embedding Model Loading
-if 'emb_model' not in st.session_state:
-    st.session_state.emb_model = load_emb_model()
-# Use the models from session state
-query_engine = st.session_state.emb_model
 # ------------------------------------layout----------------------------------------
@@ -189,7 +192,7 @@ tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single
 with tab1:
     st.title("💬 LLM only")
-    prompt = st.text_input(
         "Ask your question here",
         placeholder="How do miners contribute to the security of the blockchain ?",
     )
@@ -208,7 +211,7 @@ with tab1:
 with tab2:
     st.title("💬 LLM RAG QA with database")
     st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
-    prompt = st.text_input(
         "Ask your question here",
         placeholder="Who is Mohcine ?",
     )
@@ -234,13 +237,12 @@ with tab2:
 with tab3:
     st.title("📝 One single document Q&A with Llama Index using local open llms")
-    if st.button('Reinitialize Query Engine', key='reinit_engine'):
-        del st.session_state["emb_model_upload_doc"]
-        st.session_state.emb_model_upload_doc = ""
-        st.write("Query engine reinitialized.")
     uploaded_file = st.file_uploader("Upload an File", type=("txt", "csv", "md","pdf"))
-    question = st.text_input(
         "Ask something about the files",
         placeholder="Can you give me a short summary?",
         disabled=not uploaded_file,
@@ -251,22 +253,23 @@ with tab3:
     if uploaded_file:
         if not os.path.exists("draft_docs"):
-            st.error("draft_docs directory does not exist. Please download and copy paste a model in folder models.")
             os.makedirs("draft_docs")
         with open("draft_docs/"+uploaded_file.name, "wb") as f:
             text = uploaded_file.read()
             f.write(text)
         text = uploaded_file.read()
         # Embedding Model Loading
-        if 'emb_model_upload_doc' not in st.session_state:
-            st.session_state.emb_model_upload_doc = load_emb_uploaded_document("draft_docs/"+uploaded_file.name)
-        # Use the models from session state
-        query_engine_upload_doc = st.session_state.emb_model_upload_doc
         # if load_emb_uploaded_document:
         #     load_emb_uploaded_document.clear()
         #load_emb_uploaded_document.clear()
         st.write("File ",uploaded_file.name, "was loaded successfully")
     if uploaded_file and question and api_server_info:
         contextual_prompt = st.session_state.memory + "\n" + question
         response = query_engine_upload_doc.query(contextual_prompt)
@@ -288,7 +291,8 @@ with tab3:
                 #st.write()
     #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
-st.subheader('⚠️ Warning: To avoid lags')
 st.markdown("Please consider **delete input prompt** and **clear memory** with the button on sidebar, each time you switch to another tab")
 st.markdown("If you've got a GPU locally, the execution could be a **lot faster** (approximately 5 seconds on my local machine).")

     "If a question is asked about content not in the documents or context, respond with 'I do not have that information.' "
     "Always respond in the same language as the question was asked. Be concise.\n"
     "Respond to the best of your ability. Try to respond in markdown.\"\n"
+    "If the user prompt is in French, YOU MUST ANSWER IN FRENCH. Otherwise, speak English\"\n"
     "context\n"
     "{context}\n"
     "user\n"
 # --------------------------------cache LLM-----------------------------------
+# LLM
 @st.cache_resource
+def load_llm_model():
+    if not os.path.exists("models"):
+        os.makedirs("models")
+        return None  #
+    llm = LlamaCPP(
+        #model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf",
+        model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
+        temperature=0.0,
+        max_new_tokens=100,
+        context_window=4096,
+        generate_kwargs={},
+        model_kwargs={"n_gpu_layers": 20},
+        verbose=True,
+    )
+    return llm
+llm = load_llm_model()
 # --------------------------------cache Embedding model-----------------------------------
 @st.cache_resource
 def load_emb_model():
     if not os.path.exists("data"):
         os.makedirs("data")
         return None  #
     embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
     index = VectorStoreIndex.from_documents(
         documents, service_context=service_context, show_progress=True)
     return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
+query_engine = load_emb_model()
+# --------------------------------cache embd one doc-----------------------------------
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+llama_debug = LlamaDebugHandler(print_trace_on_end=True)
+callback_manager = CallbackManager([llama_debug])
 @st.cache_resource
+#One doc embedding
+def load_emb_uploaded_document(filename):
+    # You may want to add a check to prevent execution during initialization.
+    if 'init' in st.session_state:
+        embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
+        service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size=500)
+        documents = SimpleDirectoryReader(input_files=[filename]).load_data()
+        index = VectorStoreIndex.from_documents(
+            documents, service_context=service_context, show_progress=True)
+        return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
+    return None
 # ------------------------------------session state----------------------------------------
 if 'memory' not in st.session_state:
     st.session_state.memory = ""
+# # LLM Model Loading
+# if 'llm_model' not in st.session_state:
+#     st.session_state.llm_model = load_llm_model()
+# # Use the models from session state
+# llm = st.session_state.llm_model
+# # Embedding Model Loading
+# if 'emb_model' not in st.session_state:
+#     st.session_state.emb_model = load_emb_model()
+# # Use the models from session state
+# query_engine = st.session_state.emb_model
 # ------------------------------------layout----------------------------------------
 with tab1:
     st.title("💬 LLM only")
+    prompt = st.text_area(
         "Ask your question here",
         placeholder="How do miners contribute to the security of the blockchain ?",
     )
 with tab2:
     st.title("💬 LLM RAG QA with database")
     st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
+    prompt = st.text_area(
         "Ask your question here",
         placeholder="Who is Mohcine ?",
     )
 with tab3:
     st.title("📝 One single document Q&A with Llama Index using local open llms")
+    # if st.button('Reinitialize Query Engine', key='reinit_engine'):
+    #     del query_engine_upload_doc
+    #     st.write("Query engine reinitialized.")
     uploaded_file = st.file_uploader("Upload an File", type=("txt", "csv", "md","pdf"))
+    question = st.text_area(
         "Ask something about the files",
         placeholder="Can you give me a short summary?",
         disabled=not uploaded_file,
     if uploaded_file:
         if not os.path.exists("draft_docs"):
             os.makedirs("draft_docs")
         with open("draft_docs/"+uploaded_file.name, "wb") as f:
             text = uploaded_file.read()
             f.write(text)
         text = uploaded_file.read()
         # Embedding Model Loading
+        query_engine_upload_doc = load_emb_uploaded_document("draft_docs/"+uploaded_file.name)
         # if load_emb_uploaded_document:
         #     load_emb_uploaded_document.clear()
         #load_emb_uploaded_document.clear()
         st.write("File ",uploaded_file.name, "was loaded successfully")
+    else:
+        try:
+            del query_engine_upload_doc
+        except:
+            pass
     if uploaded_file and question and api_server_info:
         contextual_prompt = st.session_state.memory + "\n" + question
         response = query_engine_upload_doc.query(contextual_prompt)
                 #st.write()
     #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
+st.subheader('⚠️ Warning: To avoid lags read carefully the steps below')
+st.markdown("**ONE EXECUTION COULD TAKE UP TO 2 or 3 minutes because of hardware (0.9 token/second)**")
 st.markdown("Please consider **delete input prompt** and **clear memory** with the button on sidebar, each time you switch to another tab")
 st.markdown("If you've got a GPU locally, the execution could be a **lot faster** (approximately 5 seconds on my local machine).")