AjiNiktech commited on
Commit
fe58b7c
·
verified ·
1 Parent(s): 3a30a49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -26
app.py CHANGED
@@ -3,7 +3,7 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
3
  import os
4
  import dotenv
5
  from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, UnstructuredPowerPointLoader, UnstructuredWordDocumentLoader, UnstructuredExcelLoader
6
- from langchain_text_splitters import TokenTextSplitter
7
  from langchain_chroma import Chroma
8
  from langchain.chains.combine_documents import create_stuff_documents_chain
9
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
@@ -26,6 +26,7 @@ with st.sidebar:
26
 
27
  # Main app logic
28
  if "OPENAI_API_KEY" in os.environ:
 
29
  st.header('Multiple File Upload')
30
  uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
31
 
@@ -55,29 +56,23 @@ if "OPENAI_API_KEY" in os.environ:
55
  os.unlink(temp_file_path)
56
  return documents
57
 
58
- @st.cache_data
59
- def process_documents(uploaded_files):
60
  all_documents = []
61
  for file in uploaded_files:
62
  all_documents.extend(load_file(file))
63
-
64
- text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=200)
65
- all_splits = text_splitter.split_documents(all_documents)
66
- return all_splits
67
 
68
- # Process uploaded files
69
- if uploaded_files:
70
- with st.spinner("Processing documents..."):
71
- all_splits = process_documents(uploaded_files)
72
 
73
  # Initialize components
74
  @st.cache_resource
75
  def initialize_components(_all_splits):
76
  dotenv.load_dotenv()
77
- chat = ChatOpenAI(model="gpt-3.5-turbo-instruct", temperature=0.2)
78
- embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
79
  vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
80
- retriever = vectorstore.as_retriever(search_type="mmr", k=3)
81
 
82
  SYSTEM_TEMPLATE = """
83
  You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
@@ -159,19 +154,19 @@ if "OPENAI_API_KEY" in os.environ:
159
  # Retrieve relevant documents
160
  docs = retriever.get_relevant_documents(prompt)
161
 
162
- # Get recent chat history
163
- MAX_HISTORY = 5
164
- recent_history = st.session_state.memory.load_memory_variables({})["chat_history"][-MAX_HISTORY:]
165
-
166
  # Generate response
167
- full_response = ""
168
- for chunk in document_chain.stream({
169
- "context": docs,
170
- "chat_history": recent_history,
171
- "messages": [HumanMessage(content=prompt)],
172
- }):
173
- full_response += chunk
174
- message_placeholder.markdown(full_response + "▌")
 
 
 
 
175
  message_placeholder.markdown(full_response)
176
 
177
  # Add assistant response to chat history
 
3
  import os
4
  import dotenv
5
  from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, UnstructuredPowerPointLoader, UnstructuredWordDocumentLoader, UnstructuredExcelLoader
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_chroma import Chroma
8
  from langchain.chains.combine_documents import create_stuff_documents_chain
9
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 
26
 
27
  # Main app logic
28
  if "OPENAI_API_KEY" in os.environ:
29
+ # with st.sidebar:
30
  st.header('Multiple File Upload')
31
  uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
32
 
 
56
  os.unlink(temp_file_path)
57
  return documents
58
 
59
+ # Process uploaded files
60
+ if uploaded_files:
61
  all_documents = []
62
  for file in uploaded_files:
63
  all_documents.extend(load_file(file))
 
 
 
 
64
 
65
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
66
+ all_splits = text_splitter.split_documents(all_documents)
 
 
67
 
68
  # Initialize components
69
  @st.cache_resource
70
  def initialize_components(_all_splits):
71
  dotenv.load_dotenv()
72
+ chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
73
+ embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
74
  vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
75
+ retriever = vectorstore.as_retriever(k=4)
76
 
77
  SYSTEM_TEMPLATE = """
78
  You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
 
154
  # Retrieve relevant documents
155
  docs = retriever.get_relevant_documents(prompt)
156
 
 
 
 
 
157
  # Generate response
158
+ response = document_chain.invoke(
159
+ {
160
+ "context": docs,
161
+ "chat_history": st.session_state.memory.load_memory_variables({})["chat_history"],
162
+ "messages": [
163
+ HumanMessage(content=prompt)
164
+ ],
165
+ }
166
+ )
167
+
168
+ # The response is already a string, so we can use it directly
169
+ full_response = response
170
  message_placeholder.markdown(full_response)
171
 
172
  # Add assistant response to chat history