Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,23 +4,35 @@ from langchain_community.vectorstores import FAISS
|
|
4 |
from langchain_community.document_loaders import PyPDFLoader
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
-
from langchain_huggingface import HuggingFaceEndpoint
|
8 |
from langchain.chains import ConversationalRetrievalChain
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
-
import
|
11 |
|
12 |
api_token = os.getenv("HF_TOKEN")
|
13 |
list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
|
14 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
15 |
|
16 |
-
def load_doc(
|
17 |
try:
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
pages = []
|
20 |
for loader in loaders:
|
21 |
pages.extend(loader.load())
|
|
|
22 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
|
23 |
doc_splits = text_splitter.split_documents(pages)
|
|
|
|
|
|
|
|
|
24 |
return doc_splits
|
25 |
except Exception as e:
|
26 |
st.error(f"Error loading document: {e}")
|
@@ -64,10 +76,9 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
|
|
64 |
st.error(f"Error initializing LLM chain: {e}")
|
65 |
return None
|
66 |
|
67 |
-
def initialize_database(
|
68 |
try:
|
69 |
-
|
70 |
-
doc_splits = load_doc(list_file_path)
|
71 |
if not doc_splits:
|
72 |
return None, "Failed to load documents."
|
73 |
vector_db = create_db(doc_splits)
|
@@ -127,6 +138,7 @@ def main():
|
|
127 |
with st.spinner("Creating vector database..."):
|
128 |
vector_db, db_message = initialize_database(uploaded_files)
|
129 |
st.success(db_message)
|
|
|
130 |
|
131 |
if 'vector_db' not in st.session_state:
|
132 |
st.session_state['vector_db'] = None
|
|
|
4 |
from langchain_community.document_loaders import PyPDFLoader
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
+
from langchain_huggingface import HuggingFaceEndpoint # Updated import
|
8 |
from langchain.chains import ConversationalRetrievalChain
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
+
import tempfile
|
11 |
|
12 |
api_token = os.getenv("HF_TOKEN")
|
13 |
list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
|
14 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
15 |
|
16 |
+
def load_doc(uploaded_files):
|
17 |
try:
|
18 |
+
temp_files = []
|
19 |
+
for uploaded_file in uploaded_files:
|
20 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
|
21 |
+
temp_file.write(uploaded_file.read())
|
22 |
+
temp_file.close()
|
23 |
+
temp_files.append(temp_file.name)
|
24 |
+
|
25 |
+
loaders = [PyPDFLoader(x) for x in temp_files]
|
26 |
pages = []
|
27 |
for loader in loaders:
|
28 |
pages.extend(loader.load())
|
29 |
+
|
30 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
|
31 |
doc_splits = text_splitter.split_documents(pages)
|
32 |
+
|
33 |
+
for temp_file in temp_files:
|
34 |
+
os.remove(temp_file) # Clean up temporary files
|
35 |
+
|
36 |
return doc_splits
|
37 |
except Exception as e:
|
38 |
st.error(f"Error loading document: {e}")
|
|
|
76 |
st.error(f"Error initializing LLM chain: {e}")
|
77 |
return None
|
78 |
|
79 |
+
def initialize_database(uploaded_files):
|
80 |
try:
|
81 |
+
doc_splits = load_doc(uploaded_files)
|
|
|
82 |
if not doc_splits:
|
83 |
return None, "Failed to load documents."
|
84 |
vector_db = create_db(doc_splits)
|
|
|
138 |
with st.spinner("Creating vector database..."):
|
139 |
vector_db, db_message = initialize_database(uploaded_files)
|
140 |
st.success(db_message)
|
141 |
+
st.session_state['vector_db'] = vector_db
|
142 |
|
143 |
if 'vector_db' not in st.session_state:
|
144 |
st.session_state['vector_db'] = None
|