rag-tool

Sleeping

App Files Files Community

rag-tool / app.py

Chris4K

Update app.py

6976271 verified about 1 year ago

raw

history blame

1.64 kB

	import os
	import gradio as gr
	from dotenv import load_dotenv
	from langchain.vectorstores.faiss import FAISS
	from langchain.embeddings import HuggingFaceBgeEmbeddings
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter

	# Load environment variables
	load_dotenv()

	# Load and process the PDF files
	loader = PyPDFLoader("./new_papers/ALiBi.pdf")
	documents = loader.load()

	# Split the documents into chunks and embed them using HuggingFaceBgeEmbeddings
	text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
	vdocuments = text_splitter.split_documents(documents)

	# Extract the text from the Document objects
	docs_text = [doc.text for doc in vdocuments]

	model = "BAAI/bge-base-en-v1.5"
	encode_kwargs = {
	"normalize_embeddings": True
	} # set True to compute cosine similarity
	embeddings = HuggingFaceBgeEmbeddings(
	model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
	)

	# Create FAISS vector store for API embeddings
	api_db = FAISS.from_texts(texts=docs_text, embedding=embeddings)

	# Define the PDF retrieval function
	def pdf_retrieval(query):
	# Run the query through the retriever
	response = api_db.similarity_search(query)
	return response

	# Create Gradio interface for the API retriever
	api_tool = gr.Interface(
	fn=pdf_retrieval,
	inputs=[gr.Textbox()],
	outputs=gr.Textbox(),
	live=True,
	title="API PDF Retrieval Tool",
	description="This tool indexes PDF documents and retrieves relevant answers based on a given query (HuggingFaceBgeEmbeddings).",
	)

	# Launch the Gradio interface
	api_tool.launch()