|
import gradio as gr |
|
from langchain.vectorstores import Chroma |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.embeddings import HuggingFaceInstructEmbeddings |
|
|
|
|
|
hf = HuggingFaceInstructEmbeddings( |
|
model_name="hkunlp/instructor-large", |
|
embed_instruction="Represent the document for retrieval: ", |
|
query_instruction="Represent the query for retrieval: " |
|
) |
|
|
|
|
|
loader = PyPDFLoader('./new_papers/', glob="./*.pdf") |
|
documents = loader.load() |
|
|
|
|
|
db = Chroma.from_documents(documents, hf, collection_name="my-collection") |
|
|
|
class PDFRetrievalTool: |
|
def __init__(self): |
|
self.retriever = db.as_retriever(search_kwargs={"k": 1}) |
|
|
|
def __call__(self, query): |
|
|
|
response = self.retriever.run(query) |
|
return response['result'] |
|
|
|
|
|
tool = gr.Interface( |
|
PDFRetrievalTool(), |
|
inputs=gr.Textbox(), |
|
outputs=gr.Textbox(), |
|
live=True, |
|
title="PDF Retrieval Tool", |
|
description="This tool indexes PDF documents and retrieves relevant answers based on a given query.", |
|
) |
|
|
|
|
|
tool.launch() |
|
|