AjiNiktech commited on
Commit
b3b3691
·
verified ·
1 Parent(s): fe58b7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -49
app.py CHANGED
@@ -26,7 +26,6 @@ with st.sidebar:
26
 
27
  # Main app logic
28
  if "OPENAI_API_KEY" in os.environ:
29
- # with st.sidebar:
30
  st.header('Multiple File Upload')
31
  uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
32
 
@@ -58,72 +57,80 @@ if "OPENAI_API_KEY" in os.environ:
58
 
59
  # Process uploaded files
60
  if uploaded_files:
61
- all_documents = []
62
- for file in uploaded_files:
63
- all_documents.extend(load_file(file))
 
 
64
 
65
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
66
- all_splits = text_splitter.split_documents(all_documents)
67
 
68
- # Initialize components
69
- @st.cache_resource
70
- def initialize_components(_all_splits):
71
- dotenv.load_dotenv()
72
- chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
73
- embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
74
- vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
75
- retriever = vectorstore.as_retriever(k=4)
76
 
77
- SYSTEM_TEMPLATE = """
78
- You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
 
 
 
 
 
79
 
80
- 1. Process and structure multiple documents in various formats, including:
81
- .txt, .pdf, .csv, .ppt, .doc, .xls, .pptx, and .xlsx
 
82
 
83
- 2. Extract and organize information from these unstructured documents into a coherent, searchable format.
 
 
 
84
 
85
- 3. Retrieve relevant information from the processed documents based on user queries.
 
86
 
87
- 4. Act as a chatbot, engaging in conversations about the content of the documents.
 
88
 
89
- 5. Provide accurate and contextual responses to user questions, drawing solely from the information contained within the processed documents.
90
 
91
- 6. If a user's question is not related to the content of the provided documents, politely inform them that you can only answer questions based on the information in the given documents.
92
 
93
- 7. When answering, cite the specific document or section where the information was found, if possible.
94
 
95
- 8. If there's ambiguity in a query, ask for clarification to ensure you provide the most relevant information.
96
 
97
- 9. Maintain confidentiality and do not share or discuss information from one user's documents with other users.
98
 
99
- Remember, your knowledge is limited to the content of the documents you've been given to process. Do not provide information or answer questions that are outside the scope of these documents. Always strive for accuracy and relevance in your responses.
100
 
101
- <context>
102
- {context}
103
- </context>
104
 
105
- Chat History:
106
- {chat_history}
107
- """
108
 
109
- question_answering_prompt = ChatPromptTemplate.from_messages(
110
- [
111
- (
112
- "system",
113
- SYSTEM_TEMPLATE,
114
- ),
115
- MessagesPlaceholder(variable_name="chat_history"),
116
- MessagesPlaceholder(variable_name="messages"),
117
- ]
118
- )
119
 
120
- document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
 
 
121
 
122
- return retriever, document_chain
 
 
123
 
124
- # Load components
125
- with st.spinner("Initializing Assistant..."):
126
- retriever, document_chain = initialize_components(all_splits)
 
 
 
 
 
 
 
 
 
127
 
128
  # Initialize memory for each session
129
  if "memory" not in st.session_state:
@@ -176,7 +183,7 @@ if "OPENAI_API_KEY" in os.environ:
176
  st.session_state.memory.save_context({"input": prompt}, {"output": full_response})
177
 
178
  else:
179
- st.warning("Please upload files to continue.")
180
 
181
  else:
182
  st.warning("Please enter your OpenAI API Key in the sidebar to start the chatbot.")
 
26
 
27
  # Main app logic
28
  if "OPENAI_API_KEY" in os.environ:
 
29
  st.header('Multiple File Upload')
30
  uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
31
 
 
57
 
58
  # Process uploaded files
59
  if uploaded_files:
60
+ if st.button("Process Documents"):
61
+ with st.spinner("Processing documents..."):
62
+ all_documents = []
63
+ for file in uploaded_files:
64
+ all_documents.extend(load_file(file))
65
 
66
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
67
+ all_splits = text_splitter.split_documents(all_documents)
68
 
69
+ # Store processed documents in session state
70
+ st.session_state.processed_documents = all_splits
71
+ st.success("Documents processed successfully!")
 
 
 
 
 
72
 
73
+ # Initialize components
74
+ @st.cache_resource
75
+ def initialize_components():
76
+ dotenv.load_dotenv()
77
+ chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
78
+ embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
79
+ return chat, embeddings
80
 
81
+ # Load components
82
+ with st.spinner("Initializing Assistant..."):
83
+ chat, embeddings = initialize_components()
84
 
85
+ # Create vectorstore and retriever only if documents are processed
86
+ if 'processed_documents' in st.session_state:
87
+ vectorstore = Chroma.from_documents(documents=st.session_state.processed_documents, embedding=embeddings)
88
+ retriever = vectorstore.as_retriever(k=4)
89
 
90
+ SYSTEM_TEMPLATE = """
91
+ You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
92
 
93
+ 1. Process and structure multiple documents in various formats, including:
94
+ .txt, .pdf, .csv, .ppt, .doc, .xls, .pptx, and .xlsx
95
 
96
+ 2. Extract and organize information from these unstructured documents into a coherent, searchable format.
97
 
98
+ 3. Retrieve relevant information from the processed documents based on user queries.
99
 
100
+ 4. Act as a chatbot, engaging in conversations about the content of the documents.
101
 
102
+ 5. Provide accurate and contextual responses to user questions, drawing solely from the information contained within the processed documents.
103
 
104
+ 6. If a user's question is not related to the content of the provided documents, politely inform them that you can only answer questions based on the information in the given documents.
105
 
106
+ 7. When answering, cite the specific document or section where the information was found, if possible.
107
 
108
+ 8. If there's ambiguity in a query, ask for clarification to ensure you provide the most relevant information.
 
 
109
 
110
+ 9. Maintain confidentiality and do not share or discuss information from one user's documents with other users.
 
 
111
 
112
+ Remember, your knowledge is limited to the content of the documents you've been given to process. Do not provide information or answer questions that are outside the scope of these documents. Always strive for accuracy and relevance in your responses.
 
 
 
 
 
 
 
 
 
113
 
114
+ <context>
115
+ {context}
116
+ </context>
117
 
118
+ Chat History:
119
+ {chat_history}
120
+ """
121
 
122
+ question_answering_prompt = ChatPromptTemplate.from_messages(
123
+ [
124
+ (
125
+ "system",
126
+ SYSTEM_TEMPLATE,
127
+ ),
128
+ MessagesPlaceholder(variable_name="chat_history"),
129
+ MessagesPlaceholder(variable_name="messages"),
130
+ ]
131
+ )
132
+
133
+ document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
134
 
135
  # Initialize memory for each session
136
  if "memory" not in st.session_state:
 
183
  st.session_state.memory.save_context({"input": prompt}, {"output": full_response})
184
 
185
  else:
186
+ st.warning("Please upload and process documents to start chatting.")
187
 
188
  else:
189
  st.warning("Please enter your OpenAI API Key in the sidebar to start the chatbot.")