File size: 1,941 Bytes
018fb30 037c950 018fb30 f7493dd 037c950 c52adb8 037c950 008f20f 037c950 018fb30 c7297e1 f514bc9 cbed288 c7297e1 f514bc9 c7297e1 18cb8f3 403222a 18cb8f3 037c950 403222a cbed288 018fb30 403222a 018fb30 037c950 018fb30 037c950 018fb30 037c950 018fb30 037c950 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
import os
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
# Use Hugging Face Inference API embeddings
inference_api_key = os.environ['HF']
api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
api_key=inference_api_key,
model_name="sentence-transformers/all-MiniLM-l6-v2"
)
# Load and process the PDF files
loader = PyPDFLoader("new_papers/ReACT.pdf")
loader
documents = loader.load()
print("-----------")
print(documents)
print("-----------")
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
vdocuments = text_splitter.split_documents(documents)
# Create Chroma vector store for API embeddings
api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
#api_db = Chroma.from_texts(documents, api_hf_embeddings, collection_name="api-collection")
#Similarity search
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)
print(docs[0].page_content)
class PDFRetrievalTool:
def __init__(self, retriever):
self.retriever = retriever
def __call__(self, query):
# Run the query through the retriever
response = self.retriever.run(query)
return response['result']
# Create Gradio interface for the API retriever
api_tool = gr.Interface(
PDFRetrievalTool(api_db.as_retriever(search_kwargs={"k": 1})),
inputs=gr.Textbox(),
outputs=gr.Textbox(),
live=True,
title="API PDF Retrieval Tool",
description="This tool indexes PDF documents and retrieves relevant answers based on a given query (HF Inference API Embeddings).",
)
# Launch the Gradio interface
api_tool.launch()
|