Spaces:

NaimaAqeel
/

Chatbot

Sleeping

App Files Files Community

Chatbot / app.py

NaimaAqeel

Update app.py

1649416 verified 10 months ago

raw

history blame

3.74 kB

	import streamlit as st
	from langchain.prompts import PromptTemplate
	from langchain.chains.question_answering import load_qa_chain
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores.faiss import FAISS
	from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
	from dotenv import load_dotenv
	import PyPDF2
	import os
	import io

	# Set page configuration
	st.set_page_config(layout="centered")
	st.markdown("<h1 style='font-size:24px;'>PDF ChatBot by Ali & Arooj</h1>", unsafe_allow_html=True)

	# Load environment variables from .env file
	load_dotenv()

	# Retrieve API key from environment variable
	google_api_key = os.getenv("GOOGLE_API_KEY")

	# Check if the API key is available
	if google_api_key is None:
	st.warning("API key not found. Please set the google_api_key environment variable.")
	st.stop()

	uploaded_file = st.file_uploader("Your PDF file here", type=["pdf", "docx"])

	# Prompt template
	prompt_template = """
	Answer the question as detailed as possible from the provided context,
	make sure to provide all the details, if the answer is not in
	provided context just say, "answer is not available in the context",
	don't provide the wrong answer\n\n
	Context:\n {context}?\n
	Question: \n{question}\n
	Answer:
	"""

	# Additional prompts
	prompt_template += """
	--------------------------------------------------
	Prompt Suggestions:
	1. Summarize the primary theme of the context.
	2. Elaborate on the crucial concepts highlighted in the context.
	...
	20. Cite case studies or examples that demonstrate the concepts discussed in the context.
	"""

	# Function to process PDF and DOCX files
	def process_files(uploaded_file):
	if uploaded_file is not None:
	st.text("File Uploaded Successfully!")

	# Check file type and process accordingly
	if uploaded_file.type == "application/pdf":
	# PDF Processing
	pdf_data = uploaded_file.read()
	pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))
	pdf_pages = pdf_reader.pages

	context = "\n\n".join(page.extract_text() for page in pdf_pages)
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
	texts = text_splitter.split_text(context)
	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
	vector_index = FAISS.from_texts(texts, embeddings).as_retriever()

	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	# DOCX Processing (if needed)
	pass
	else:
	st.warning("Unsupported file format. Please upload PDF or DOCX.")
	st.stop()

	user_question = st.text_input("Ask Anything from PDF:", "")

	if st.button("Get Answer"):
	if user_question:
	with st.spinner("Processing..."):
	docs = vector_index.get_relevant_documents(user_question)
	prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
	model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, api_key=google_api_key)
	chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
	response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
	st.subheader("Answer:")
	st.write(response['output_text'])
	else:
	st.warning("Please Ask.")

	# Main function
	def main():
	process_files(uploaded_file)

	if __name__ == "__main__":
	main()