import gradio as gr from langchain.vectorstores import Chroma from langchain.document_loaders import PyPDFLoader from langchain.embeddings import HuggingFaceInstructEmbeddings # Initialize the HuggingFaceInstructEmbeddings hf = HuggingFaceInstructEmbeddings( model_name="gpt2", embed_instruction="Represent the document for retrieval: ", query_instruction="Represent the query for retrieval: " ) # Load and process the PDF files loader = PyPDFLoader("./new_papers/ReACT.pdf") #loader = PyPDFLoader('./new_papers/', glob="./*.pdf") documents = loader.load() # Create a Chroma vector store from the PDF documents db = Chroma.from_documents(documents, hf, collection_name="my-collection") class PDFRetrievalTool: def __init__(self): self.retriever = db.as_retriever(search_kwargs={"k": 1}) def __call__(self, query): # Run the query through the retriever response = self.retriever.run(query) return response['result'] # Create the Gradio interface using the PDFRetrievalTool tool = gr.Interface( PDFRetrievalTool(), inputs=gr.Textbox(), outputs=gr.Textbox(), live=True, title="PDF Retrieval Tool", description="This tool indexes PDF documents and retrieves relevant answers based on a given query.", ) # Launch the Gradio interface tool.launch()