rag-tool

Running

App Files Files Community

rag-tool / app.py

Chris4K

Update app.py

f7493dd over 1 year ago

raw

history blame

1.94 kB

	import gradio as gr
	import os
	from langchain.vectorstores import Chroma
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter

	from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings

	# Use Hugging Face Inference API embeddings
	inference_api_key = os.environ['HF']
	api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
	api_key=inference_api_key,
	model_name="sentence-transformers/all-MiniLM-l6-v2"
	)

	# Load and process the PDF files
	loader = PyPDFLoader("new_papers/ReACT.pdf")
	loader
	documents = loader.load()
	print("-----------")
	print(documents)
	print("-----------")

	# Load the document, split it into chunks, embed each chunk and load it into the vector store.
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	vdocuments = text_splitter.split_documents(documents)






	# Create Chroma vector store for API embeddings
	api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
	#api_db = Chroma.from_texts(documents, api_hf_embeddings, collection_name="api-collection")

	#Similarity search
	query = "What did the president say about Ketanji Brown Jackson"
	docs = db.similarity_search(query)
	print(docs[0].page_content)


	class PDFRetrievalTool:
	def __init__(self, retriever):
	self.retriever = retriever

	def __call__(self, query):
	# Run the query through the retriever
	response = self.retriever.run(query)
	return response['result']

	# Create Gradio interface for the API retriever
	api_tool = gr.Interface(
	PDFRetrievalTool(api_db.as_retriever(search_kwargs={"k": 1})),
	inputs=gr.Textbox(),
	outputs=gr.Textbox(),
	live=True,
	title="API PDF Retrieval Tool",
	description="This tool indexes PDF documents and retrieves relevant answers based on a given query (HF Inference API Embeddings).",
	)

	# Launch the Gradio interface
	api_tool.launch()