Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
|
3 |
import os
|
4 |
import dotenv
|
5 |
from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, UnstructuredPowerPointLoader, UnstructuredWordDocumentLoader, UnstructuredExcelLoader
|
6 |
-
from langchain_text_splitters import
|
7 |
from langchain_chroma import Chroma
|
8 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
9 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
@@ -26,6 +26,7 @@ with st.sidebar:
|
|
26 |
|
27 |
# Main app logic
|
28 |
if "OPENAI_API_KEY" in os.environ:
|
|
|
29 |
st.header('Multiple File Upload')
|
30 |
uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
|
31 |
|
@@ -55,29 +56,23 @@ if "OPENAI_API_KEY" in os.environ:
|
|
55 |
os.unlink(temp_file_path)
|
56 |
return documents
|
57 |
|
58 |
-
|
59 |
-
|
60 |
all_documents = []
|
61 |
for file in uploaded_files:
|
62 |
all_documents.extend(load_file(file))
|
63 |
-
|
64 |
-
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=200)
|
65 |
-
all_splits = text_splitter.split_documents(all_documents)
|
66 |
-
return all_splits
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
with st.spinner("Processing documents..."):
|
71 |
-
all_splits = process_documents(uploaded_files)
|
72 |
|
73 |
# Initialize components
|
74 |
@st.cache_resource
|
75 |
def initialize_components(_all_splits):
|
76 |
dotenv.load_dotenv()
|
77 |
-
chat = ChatOpenAI(model="gpt-3.5-turbo-
|
78 |
-
embeddings = OpenAIEmbeddings(model="text-embedding-
|
79 |
vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
|
80 |
-
retriever = vectorstore.as_retriever(
|
81 |
|
82 |
SYSTEM_TEMPLATE = """
|
83 |
You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
|
@@ -159,19 +154,19 @@ if "OPENAI_API_KEY" in os.environ:
|
|
159 |
# Retrieve relevant documents
|
160 |
docs = retriever.get_relevant_documents(prompt)
|
161 |
|
162 |
-
# Get recent chat history
|
163 |
-
MAX_HISTORY = 5
|
164 |
-
recent_history = st.session_state.memory.load_memory_variables({})["chat_history"][-MAX_HISTORY:]
|
165 |
-
|
166 |
# Generate response
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
175 |
message_placeholder.markdown(full_response)
|
176 |
|
177 |
# Add assistant response to chat history
|
|
|
3 |
import os
|
4 |
import dotenv
|
5 |
from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, UnstructuredPowerPointLoader, UnstructuredWordDocumentLoader, UnstructuredExcelLoader
|
6 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
7 |
from langchain_chroma import Chroma
|
8 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
9 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
|
26 |
|
27 |
# Main app logic
|
28 |
if "OPENAI_API_KEY" in os.environ:
|
29 |
+
# with st.sidebar:
|
30 |
st.header('Multiple File Upload')
|
31 |
uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
|
32 |
|
|
|
56 |
os.unlink(temp_file_path)
|
57 |
return documents
|
58 |
|
59 |
+
# Process uploaded files
|
60 |
+
if uploaded_files:
|
61 |
all_documents = []
|
62 |
for file in uploaded_files:
|
63 |
all_documents.extend(load_file(file))
|
|
|
|
|
|
|
|
|
64 |
|
65 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
66 |
+
all_splits = text_splitter.split_documents(all_documents)
|
|
|
|
|
67 |
|
68 |
# Initialize components
|
69 |
@st.cache_resource
|
70 |
def initialize_components(_all_splits):
|
71 |
dotenv.load_dotenv()
|
72 |
+
chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
|
73 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
|
74 |
vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
|
75 |
+
retriever = vectorstore.as_retriever(k=4)
|
76 |
|
77 |
SYSTEM_TEMPLATE = """
|
78 |
You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
|
|
|
154 |
# Retrieve relevant documents
|
155 |
docs = retriever.get_relevant_documents(prompt)
|
156 |
|
|
|
|
|
|
|
|
|
157 |
# Generate response
|
158 |
+
response = document_chain.invoke(
|
159 |
+
{
|
160 |
+
"context": docs,
|
161 |
+
"chat_history": st.session_state.memory.load_memory_variables({})["chat_history"],
|
162 |
+
"messages": [
|
163 |
+
HumanMessage(content=prompt)
|
164 |
+
],
|
165 |
+
}
|
166 |
+
)
|
167 |
+
|
168 |
+
# The response is already a string, so we can use it directly
|
169 |
+
full_response = response
|
170 |
message_placeholder.markdown(full_response)
|
171 |
|
172 |
# Add assistant response to chat history
|