Spaces:

raj999
/

rag_for_all

Sleeping

App Files Files Community

raj999 commited on Sep 22, 2024

Commit

27a6371

verified ·

1 Parent(s): b3ae10a

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -40

app.py CHANGED Viewed

@@ -5,53 +5,48 @@ from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.llms import HuggingFaceHub
 from langchain.chains import ConversationalRetrievalChain
 # Load the HuggingFace language model and embeddings
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-# Initialize the embeddings model for document retrieval
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
-# Initialize vector_store and retriever as None initially
 vector_store = None
 retriever = None
 def update_documents(text_input):
     global vector_store, retriever
-    # Split the input text into individual documents based on newlines or other delimiters
     documents = text_input.split("\n")
-    # Update the FAISS vector store with new documents
     vector_store = FAISS.from_texts(documents, embeddings)
-    # Set the retriever to use the new vector store
     retriever = vector_store.as_retriever()
     return f"{len(documents)} documents successfully added to the vector store."
-# Set up ConversationalRetrievalChain
 rag_chain = None
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
     global rag_chain, retriever
     if retriever is None:
         return "Please upload or enter documents before asking a question."
-    # Create the chain if it hasn't been initialized
     if rag_chain is None:
         rag_chain = ConversationalRetrievalChain.from_llm(
             HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta"),
             retriever=retriever
         )
-    # Combine history with the user message
     conversation_history = [{"role": "system", "content": system_message}]
     for val in history:
@@ -62,35 +57,29 @@ def respond(
     conversation_history.append({"role": "user", "content": message})
-    # Retrieve documents and generate response
     response = rag_chain({"question": message, "chat_history": history})
-    # Return the model's response
     return response['answer']
 def upload_file(filepath):
-    name = Path(filepath).name
-    return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {name}", value=filepath, visible=True)]
-def download_file():
-    return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
 # Gradio interface setup
 demo = gr.Blocks()
 with demo:
     with gr.Row():
-        # upload_button = gr.Button("Upload Documents")
-        with gr.Row():
-            u = gr.UploadButton("Upload a file", file_count="single")
-            d = gr.DownloadButton("Download the file", visible=False)
-    u.upload(upload_file, u, [u, d])
-    d.click(download_file, None, [u, d])
     with gr.Row():
-        # Chat interface for the RAG system
         chat = gr.ChatInterface(
             respond,
             additional_inputs=[
@@ -101,8 +90,5 @@ with demo:
             ],
         )
-    # Bind button to update the document vector store
-    # upload_button.click(update_documents, inputs=[doc_input], outputs=gr.Textbox(label="Status"))
 if __name__ == "__main__":
-    demo.launch()

 from langchain.vectorstores import FAISS
 from langchain.llms import HuggingFaceHub
 from langchain.chains import ConversationalRetrievalChain
+from unstructured.documents import from_pdf
+import camelot
+from pathlib import Path
 # Load the HuggingFace language model and embeddings
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 vector_store = None
 retriever = None
+def extract_text_from_pdf(filepath):
+    # Use unstructured to read text from the PDF
+    documents = from_pdf(filepath)
+    return "\n".join([doc.text for doc in documents])
+def extract_tables_from_pdf(filepath):
+    # Use camelot to read tables from the PDF
+    tables = camelot.read_pdf(filepath, pages='1-end')
+    return [table.df.to_string(index=False) for table in tables]
 def update_documents(text_input):
     global vector_store, retriever
     documents = text_input.split("\n")
     vector_store = FAISS.from_texts(documents, embeddings)
     retriever = vector_store.as_retriever()
     return f"{len(documents)} documents successfully added to the vector store."
 rag_chain = None
+def respond(message, history, system_message, max_tokens, temperature, top_p):
     global rag_chain, retriever
     if retriever is None:
         return "Please upload or enter documents before asking a question."
     if rag_chain is None:
         rag_chain = ConversationalRetrievalChain.from_llm(
             HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta"),
             retriever=retriever
         )
     conversation_history = [{"role": "system", "content": system_message}]
     for val in history:
     conversation_history.append({"role": "user", "content": message})
     response = rag_chain({"question": message, "chat_history": history})
     return response['answer']
 def upload_file(filepath):
+    text = extract_text_from_pdf(filepath)
+    tables = extract_tables_from_pdf(filepath)
+    # Update documents in the vector store
+    update_documents(text)
+    return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {Path(filepath).name}", value=filepath, visible=True), f"{len(tables)} tables extracted."]
 # Gradio interface setup
 demo = gr.Blocks()
 with demo:
     with gr.Row():
+        u = gr.UploadButton("Upload a file", file_count="single")
+        d = gr.DownloadButton("Download the file", visible=False)
+    u.upload(upload_file, u, [u, d, "status"])
     with gr.Row():
         chat = gr.ChatInterface(
             respond,
             additional_inputs=[
             ],
         )
 if __name__ == "__main__":
+    demo.launch()