Spaces:

vineethn
/

qna

Sleeping

qna / app.py

vineeth N

Update app.py

2f98d89 verified 9 months ago

4.33 kB

	import os
	from typing import List
	from dotenv import load_dotenv
	import chainlit as cl
	from langchain_community.embeddings import OpenAIEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.chains import RetrievalQA
	from langchain_openai import ChatOpenAI
	from langchain_openai import OpenAIEmbeddings

	# Load environment variables
	load_dotenv()

	# Initialize OpenAI API key
	openai_api_key = os.getenv("'sk-None-Nn6BodKwwjNYiNYT2QtWT3BlbkFJqTm7b3Fq4HftPntWdkUa'")

	# Initialize embedding model using OpenAI
	embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key,model="text-embedding-3-small")

	# Initialize vector store
	vector_store = None

	# Store PDF file paths
	pdf_files = {}

	# Define the path for the FAISS index
	FAISS_INDEX_PATH = "faiss_index"
	FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")

	def process_pdfs(directory: str) -> None:
	"""Process all PDFs in the given directory and add them to the vector store."""
	global vector_store, pdf_files
	documents = []

	for filename in os.listdir(directory):
	if filename.endswith(".pdf"):
	file_path = os.path.join(directory, filename)
	loader = PyPDFLoader(file_path)
	documents.extend(loader.load())
	pdf_files[filename] = file_path

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	texts = text_splitter.split_documents(documents)

	if os.path.exists(FAISS_INDEX_FILE):
	try:
	vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
	vector_store.add_documents(texts)
	except Exception as e:
	print(f"Error loading FAISS index: {e}")
	vector_store = FAISS.from_documents(texts, embeddings)
	else:
	vector_store = FAISS.from_documents(texts, embeddings)

	# Save the updated vector store
	if not os.path.exists(FAISS_INDEX_PATH):
	os.makedirs(FAISS_INDEX_PATH)
	vector_store.save_local(FAISS_INDEX_PATH)

	@cl.on_chat_start
	async def start():
	"""Initialize the chat session."""
	await cl.Message(content="Welcome! Processing PDFs...").send()

	# Process PDFs (replace with your PDF directory)
	process_pdfs(r"C:\Users\sumes\OneDrive\Documents\pdf_docs")

	await cl.Message(content="PDFs processed. You can now ask questions!").send()

	@cl.on_message
	async def main(message: cl.Message):
	"""Handle user messages and generate responses."""
	if vector_store is None:
	await cl.Message(content="Error: Vector store not initialized.").send()
	return

	query = message.content

	retriever = vector_store.as_retriever(search_kwargs={"k": 3})

	# Initialize the OpenAI language model
	llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)

	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True
	)

	result = qa_chain(query)
	answer = result['result']
	source_docs = result['source_documents']

	await cl.Message(content=answer).send()

	if source_docs:
	unique_sources = set()
	for doc in source_docs:
	file_name = os.path.basename(doc.metadata['source'])
	if file_name in pdf_files and file_name not in unique_sources:
	unique_sources.add(file_name)
	file_path = pdf_files[file_name]
	elements = [
	cl.Text(name=file_name, content=f"Source: {file_name}"),
	cl.File(name=file_name, path=file_path, display="inline")
	]
	await cl.Message(content=f"Source: {file_name}", elements=elements).send()

	other_sources = [doc.metadata['source'] for doc in source_docs if os.path.basename(doc.metadata['source']) not in pdf_files]
	unique_other_sources = set(other_sources)
	if unique_other_sources:
	sources_message = "Other Sources:\n" + "\n".join(f"- {source}" for source in unique_other_sources)
	await cl.Message(content=sources_message).send()

	if __name__ == "__main__":
	cl.run()