rag-tool

Running

Chris4K commited on Nov 23, 2023

Commit

018fb30

1 Parent(s): 11501be

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,40 @@
-from transformers.tools.base import launch_gradio_demo
-from vector_store_retriever import VectorStoreRetrievalTool
-launch_gradio_demo(VectorStoreRetrievalTool)

+import gradio as gr
+from langchain.vectorstores import Chroma
+from langchain.document_loaders import PyPDFLoader
+from langchain.embeddings import HuggingFaceInstructEmbeddings
+# Initialize the HuggingFaceInstructEmbeddings
+hf = HuggingFaceInstructEmbeddings(
+    model_name="hkunlp/instructor-large",
+    embed_instruction="Represent the document for retrieval: ",
+    query_instruction="Represent the query for retrieval: "
+)
+# Load and process the PDF files
+loader = PyPDFLoader('./new_papers/', glob="./*.pdf")
+documents = loader.load()
+# Create a Chroma vector store from the PDF documents
+db = Chroma.from_documents(documents, hf, collection_name="my-collection")
+class PDFRetrievalTool:
+    def __init__(self):
+        self.retriever = db.as_retriever(search_kwargs={"k": 1})
+    def __call__(self, query):
+        # Run the query through the retriever
+        response = self.retriever.run(query)
+        return response['result']
+# Create the Gradio interface using the PDFRetrievalTool
+tool = gr.Interface(
+    PDFRetrievalTool(),
+    inputs=gr.Textbox(),
+    outputs=gr.Textbox(),
+    live=True,
+    title="PDF Retrieval Tool",
+    description="This tool indexes PDF documents and retrieves relevant answers based on a given query.",
+)
+# Launch the Gradio interface
+tool.launch()