Spaces:

vishwask
/

rag

Sleeping

App Files Files Community

vishwask commited on Feb 28, 2024

Commit

279b0d5

verified ·

1 Parent(s): 3886089

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -11

app.py CHANGED Viewed

@@ -79,7 +79,7 @@ def load_db():
 # Initialize langchain LLM chain
-def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     llm = HuggingFaceHub(repo_id=llm_model, model_kwargs={"temperature":
                                                           temperature, "max_new_tokens":
                                                           max_tokens, "top_k": top_k,
@@ -111,7 +111,6 @@ def initialize_database(list_file_obj, chunk_size, chunk_overlap):
     # Create list of documents (when valid)
     list_file_path = [x.name for x in list_file_obj if x is not None]
     # Create collection_name for vector database
-    progress(0.1, desc="Creating collection name...")
     collection_name = Path(list_file_path[0]).stem
     # Fix potential issues from naming convention
     ## Remove space
@@ -125,23 +124,20 @@ def initialize_database(list_file_obj, chunk_size, chunk_overlap):
         collection_name[-1] = 'Z'
     # print('list_file_path: ', list_file_path)
     print('Collection name: ', collection_name)
-    progress(0.25, desc="Loading document...")
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
     # Create or load vector database
-    progress(0.5, desc="Generating vector database...")
     # global vector_db
     vector_db = create_db(doc_splits, collection_name)
-    progress(0.9, desc="Done!")
     return vector_db, collection_name
-def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     # print("llm_option",llm_option)
     llm_name = list_llm[llm_option]
     print("llm_name: ",llm_name)
     qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
-    return qa_chain, "Complete!"
 def format_chat_history(message, chat_history):
@@ -175,7 +171,7 @@ def conversation(qa_chain, message, history):
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]
     # return gr.update(value=""), new_history, response_sources[0], response_sources[1]
-    return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
 def upload_file(file_obj):
@@ -194,11 +190,10 @@ def demo():
         qa_chain = gr.State()
         collection_name = gr.State()
-        document = gr.Files(value = ['/home/user/app/pdfs/Annual-Report-2022-2023-English_1.pdf'],visible=False,
-                                height=100, file_count="multiple", file_types=["pdf"], label="Upload your PDF documents (single or multiple)")
         chatbot = gr.Chatbot(height=300)
         db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database", visible=False)
-        with gr.Accordion("Advanced - Document references", open=False):
             with gr.Row():
                 doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
                 source1_page = gr.Number(label="Page", scale=1)

 # Initialize langchain LLM chain
+def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
     llm = HuggingFaceHub(repo_id=llm_model, model_kwargs={"temperature":
                                                           temperature, "max_new_tokens":
                                                           max_tokens, "top_k": top_k,
     # Create list of documents (when valid)
     list_file_path = [x.name for x in list_file_obj if x is not None]
     # Create collection_name for vector database
     collection_name = Path(list_file_path[0]).stem
     # Fix potential issues from naming convention
     ## Remove space
         collection_name[-1] = 'Z'
     # print('list_file_path: ', list_file_path)
     print('Collection name: ', collection_name)
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
     # Create or load vector database
     # global vector_db
     vector_db = create_db(doc_splits, collection_name)
     return vector_db, collection_name
+def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
     # print("llm_option",llm_option)
     llm_name = list_llm[llm_option]
     print("llm_name: ",llm_name)
     qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
+    return qa_chain
 def format_chat_history(message, chat_history):
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]
     # return gr.update(value=""), new_history, response_sources[0], response_sources[1]
+    return qa_chain, new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
 def upload_file(file_obj):
         qa_chain = gr.State()
         collection_name = gr.State()
+        document = gr.Files(value = os.listdir('/home/user/app/pdfs/'),visible=False,height=100, file_count="multiple", file_types=["pdf"])
         chatbot = gr.Chatbot(height=300)
         db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database", visible=False)
+        with gr.Accordion("Document references", open=False):
             with gr.Row():
                 doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
                 source1_page = gr.Number(label="Page", scale=1)