vineeth N commited on
Commit
4f4392b
·
verified ·
1 Parent(s): aae26f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -61
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
 
2
  from typing import List
3
  from dotenv import load_dotenv
4
- import chainlit as cl
5
  from langchain_community.embeddings import OpenAIEmbeddings
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import FAISS
@@ -14,10 +14,10 @@ from langchain_openai import OpenAIEmbeddings
14
  load_dotenv()
15
 
16
  # Initialize OpenAI API key
17
- openai_api_key = os.getenv('sk-None-Nn6BodKwwjNYiNYT2QtWT3BlbkFJqTm7b3Fq4HftPntWdkUa')
18
 
19
  # Initialize embedding model using OpenAI
20
- embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key,model="text-embedding-3-small")
21
 
22
  # Initialize vector store
23
  vector_store = None
@@ -29,6 +29,7 @@ pdf_files = {}
29
  FAISS_INDEX_PATH = "faiss_index"
30
  FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
31
 
 
32
  def process_pdfs(directory: str) -> None:
33
  """Process all PDFs in the given directory and add them to the vector store."""
34
  global vector_store, pdf_files
@@ -49,7 +50,7 @@ def process_pdfs(directory: str) -> None:
49
  vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
50
  vector_store.add_documents(texts)
51
  except Exception as e:
52
- print(f"Error loading FAISS index: {e}")
53
  vector_store = FAISS.from_documents(texts, embeddings)
54
  else:
55
  vector_store = FAISS.from_documents(texts, embeddings)
@@ -59,60 +60,64 @@ def process_pdfs(directory: str) -> None:
59
  os.makedirs(FAISS_INDEX_PATH)
60
  vector_store.save_local(FAISS_INDEX_PATH)
61
 
62
- @cl.on_chat_start
63
- async def start():
64
- """Initialize the chat session."""
65
- await cl.Message(content="Welcome! Processing PDFs...").send()
66
-
67
- # Process PDFs (replace with your PDF directory)
68
- process_pdfs(r"C:\Users\sumes\OneDrive\Documents\pdf_docs")
69
-
70
- await cl.Message(content="PDFs processed. You can now ask questions!").send()
71
-
72
- @cl.on_message
73
- async def main(message: cl.Message):
74
- """Handle user messages and generate responses."""
75
- if vector_store is None:
76
- await cl.Message(content="Error: Vector store not initialized.").send()
77
- return
78
-
79
- query = message.content
80
-
81
- retriever = vector_store.as_retriever(search_kwargs={"k": 3})
82
-
83
- # Initialize the OpenAI language model
84
- llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)
85
-
86
- qa_chain = RetrievalQA.from_chain_type(
87
- llm=llm,
88
- chain_type="stuff",
89
- retriever=retriever,
90
- return_source_documents=True
91
- )
92
-
93
- result = qa_chain(query)
94
- answer = result['result']
95
- source_docs = result['source_documents']
96
-
97
- await cl.Message(content=answer).send()
98
-
99
- if source_docs:
100
- unique_sources = set()
101
- for doc in source_docs:
102
- file_name = os.path.basename(doc.metadata['source'])
103
- if file_name in pdf_files and file_name not in unique_sources:
104
- unique_sources.add(file_name)
105
- file_path = pdf_files[file_name]
106
- elements = [
107
- cl.Text(name=file_name, content=f"Source: {file_name}"),
108
- cl.File(name=file_name, path=file_path, display="inline")
109
- ]
110
- await cl.Message(content=f"Source: {file_name}", elements=elements).send()
111
-
112
- other_sources = [doc.metadata['source'] for doc in source_docs if os.path.basename(doc.metadata['source']) not in pdf_files]
113
- unique_other_sources = set(other_sources)
114
- if unique_other_sources:
115
- sources_message = "Other Sources:\n" + "\n".join(f"- {source}" for source in unique_other_sources)
116
- await cl.Message(content=sources_message).send()
117
-
118
-
 
 
 
 
 
1
  import os
2
+ import streamlit as st
3
  from typing import List
4
  from dotenv import load_dotenv
 
5
  from langchain_community.embeddings import OpenAIEmbeddings
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import FAISS
 
14
  load_dotenv()
15
 
16
  # Initialize OpenAI API key
17
+ openai_api_key = os.getenv('OPENAI_API_KEY')
18
 
19
  # Initialize embedding model using OpenAI
20
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-3-small")
21
 
22
  # Initialize vector store
23
  vector_store = None
 
29
  FAISS_INDEX_PATH = "faiss_index"
30
  FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
31
 
32
+ @st.cache_resource
33
  def process_pdfs(directory: str) -> None:
34
  """Process all PDFs in the given directory and add them to the vector store."""
35
  global vector_store, pdf_files
 
50
  vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
51
  vector_store.add_documents(texts)
52
  except Exception as e:
53
+ st.error(f"Error loading FAISS index: {e}")
54
  vector_store = FAISS.from_documents(texts, embeddings)
55
  else:
56
  vector_store = FAISS.from_documents(texts, embeddings)
 
60
  os.makedirs(FAISS_INDEX_PATH)
61
  vector_store.save_local(FAISS_INDEX_PATH)
62
 
63
+ def main():
64
+ st.title("PDF Question Answering System")
65
+
66
+ # Process PDFs
67
+ pdf_directory = "/path/to/your/pdf/directory" # Update this path
68
+ process_pdfs(pdf_directory)
69
+
70
+ st.success("PDFs processed. You can now ask questions!")
71
+
72
+ # User input
73
+ user_question = st.text_input("Ask a question about the PDFs:")
74
+
75
+ if user_question:
76
+ if vector_store is None:
77
+ st.error("Error: Vector store not initialized.")
78
+ return
79
+
80
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
81
+
82
+ # Initialize the OpenAI language model
83
+ llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)
84
+
85
+ qa_chain = RetrievalQA.from_chain_type(
86
+ llm=llm,
87
+ chain_type="stuff",
88
+ retriever=retriever,
89
+ return_source_documents=True
90
+ )
91
+
92
+ result = qa_chain(user_question)
93
+ answer = result['result']
94
+ source_docs = result['source_documents']
95
+
96
+ st.write("Answer:", answer)
97
+
98
+ if source_docs:
99
+ st.subheader("Sources:")
100
+ unique_sources = set()
101
+ for doc in source_docs:
102
+ file_name = os.path.basename(doc.metadata['source'])
103
+ if file_name in pdf_files and file_name not in unique_sources:
104
+ unique_sources.add(file_name)
105
+ file_path = pdf_files[file_name]
106
+ st.write(f"Source: {file_name}")
107
+ with open(file_path, "rb") as file:
108
+ st.download_button(
109
+ label=f"Download {file_name}",
110
+ data=file,
111
+ file_name=file_name,
112
+ mime="application/pdf"
113
+ )
114
+
115
+ other_sources = [doc.metadata['source'] for doc in source_docs if os.path.basename(doc.metadata['source']) not in pdf_files]
116
+ unique_other_sources = set(other_sources)
117
+ if unique_other_sources:
118
+ st.subheader("Other Sources:")
119
+ for source in unique_other_sources:
120
+ st.write(f"- {source}")
121
+
122
+ if __name__ == "__main__":
123
+ main()