Chandranshu Jain commited on
Commit
a3286b2
·
verified ·
1 Parent(s): 23ac240

Update app3.py

Browse files
Files changed (1) hide show
  1. app3.py +33 -46
app3.py CHANGED
@@ -1,13 +1,12 @@
1
  import streamlit as st
2
  from PyPDF2 import PdfReader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
 
4
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
- import google.generativeai as genai
6
- from langchain.vectorstores import FAISS
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
  from langchain.chains.question_answering import load_qa_chain
9
  from langchain.prompts import PromptTemplate
10
- import os
11
 
12
  st.set_page_config(page_title="Document Genie", layout="wide")
13
 
@@ -20,35 +19,12 @@ This chatbot is built using the Retrieval-Augmented Generation (RAG) framework,
20
 
21
  Follow these simple steps to interact with the chatbot:
22
 
23
- 1. **Enter Your API Key**: You'll need a Google API key for the chatbot to access Google's Generative AI models. Obtain your API key https://makersuite.google.com/app/apikey.
24
-
25
- 2. **Upload Your Documents**: The system accepts multiple PDF files at once, analyzing the content to provide comprehensive insights.
26
 
27
- 3. **Ask a Question**: After processing the documents, ask any question related to the content of your uploaded documents for a precise answer.
28
  """)
29
 
30
-
31
-
32
- # This is the first API key input; no need to repeat it in the main function.
33
- api_key = st.text_input("Enter your Google API Key:", type="password", key="api_key_input")
34
-
35
- def get_pdf_text(pdf_docs):
36
- text = ""
37
- for pdf in pdf_docs:
38
- pdf_reader = PdfReader(pdf)
39
- for page in pdf_reader.pages:
40
- text += page.extract_text()
41
- return text
42
-
43
- def get_text_chunks(text):
44
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
45
- chunks = text_splitter.split_text(text)
46
- return chunks
47
-
48
- def get_vector_store(text_chunks, api_key):
49
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
50
- vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
51
- vector_store.save_local("faiss_index")
52
 
53
  def get_conversational_chain():
54
  prompt_template = """
@@ -64,31 +40,42 @@ def get_conversational_chain():
64
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
65
  return chain
66
 
67
- def user_input(user_question, api_key):
68
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
69
- new_db = FAISS.load_local("faiss_index", embeddings)
70
- docs = new_db.similarity_search(user_question)
71
- chain = get_conversational_chain()
72
- response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
73
- st.write("Reply: ", response["output_text"])
74
 
75
- def main():
76
- st.header("AI clone chatbot💁")
 
 
 
 
77
 
78
- user_question = st.text_input("Ask a Question from the PDF Files", key="user_question")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- if user_question and api_key: # Ensure API key and user question are provided
81
- user_input(user_question, api_key)
82
 
83
  with st.sidebar:
84
  st.title("Menu:")
85
  pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True, key="pdf_uploader")
86
- if st.button("Submit & Process", key="process_button") and api_key: # Check if API key is provided before processing
87
  with st.spinner("Processing..."):
88
- raw_text = get_pdf_text(pdf_docs)
89
- text_chunks = get_text_chunks(raw_text)
90
- get_vector_store(text_chunks, api_key)
91
  st.success("Done")
 
 
92
 
93
  if __name__ == "__main__":
94
  main()
 
1
  import streamlit as st
2
  from PyPDF2 import PdfReader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ import os
5
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ from langchain_community.vectorstores import Chroma
 
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
  from langchain.chains.question_answering import load_qa_chain
9
  from langchain.prompts import PromptTemplate
 
10
 
11
  st.set_page_config(page_title="Document Genie", layout="wide")
12
 
 
19
 
20
  Follow these simple steps to interact with the chatbot:
21
 
22
+ 1. **Upload Your Documents**: The system accepts multiple PDF files at once, analyzing the content to provide comprehensive insights.
 
 
23
 
24
+ 2. **Ask a Question**: After processing the documents, ask any question related to the content of your uploaded documents for a precise answer.
25
  """)
26
 
27
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def get_conversational_chain():
30
  prompt_template = """
 
40
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
41
  return chain
42
 
 
 
 
 
 
 
 
43
 
44
+ def get_pdf(pdf_docs,query):
45
+ text = ""
46
+ for pdf in pdf_docs:
47
+ pdf_reader = PdfReader(pdf)
48
+ for page in pdf_reader.pages:
49
+ text += page.extract_text()
50
 
51
+ text_splitter = RecursiveCharacterTextSplitter(
52
+ # Set a really small chunk size, just to show.
53
+ chunk_size=500,
54
+ chunk_overlap=20,
55
+ separators=["\n\n","\n"," ",".",","])
56
+ chunks=text_splitter.split_text(text)
57
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
58
+ vector = Chroma.from_documents(chunk, embeddings)
59
+ docs = db3.similarity_search(query)
60
+ chain = get_conversational_chain()
61
+ response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
62
+ return response
63
+ #st.write("Reply: ", response["output_text"])
64
+
65
+ def main():
66
+ st.header("Chat with your pdf💁")
67
 
68
+ query = st.text_input("Ask a Question from the PDF Files", key="query")
 
69
 
70
  with st.sidebar:
71
  st.title("Menu:")
72
  pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True, key="pdf_uploader")
73
+ if query and st.button("Submit & Process", key="process_button"):
74
  with st.spinner("Processing..."):
75
+ response = get_pdf(pdf_docs,query)
 
 
76
  st.success("Done")
77
+ st.write("Reply: ", response["output_text"])
78
+
79
 
80
  if __name__ == "__main__":
81
  main()