Sharal commited on
Commit
0438285
·
verified ·
1 Parent(s): 48aee67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -4,23 +4,35 @@ from langchain_community.vectorstores import FAISS
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
- from langchain_huggingface import HuggingFaceEndpoint
8
  from langchain.chains import ConversationalRetrievalChain
9
  from langchain.memory import ConversationBufferMemory
10
- import torch
11
 
12
  api_token = os.getenv("HF_TOKEN")
13
  list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
14
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
15
 
16
- def load_doc(list_file_path):
17
  try:
18
- loaders = [PyPDFLoader(x) for x in list_file_path]
 
 
 
 
 
 
 
19
  pages = []
20
  for loader in loaders:
21
  pages.extend(loader.load())
 
22
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
23
  doc_splits = text_splitter.split_documents(pages)
 
 
 
 
24
  return doc_splits
25
  except Exception as e:
26
  st.error(f"Error loading document: {e}")
@@ -64,10 +76,9 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
64
  st.error(f"Error initializing LLM chain: {e}")
65
  return None
66
 
67
- def initialize_database(list_file_obj):
68
  try:
69
- list_file_path = [x.name for x in list_file_obj if x is not None]
70
- doc_splits = load_doc(list_file_path)
71
  if not doc_splits:
72
  return None, "Failed to load documents."
73
  vector_db = create_db(doc_splits)
@@ -127,6 +138,7 @@ def main():
127
  with st.spinner("Creating vector database..."):
128
  vector_db, db_message = initialize_database(uploaded_files)
129
  st.success(db_message)
 
130
 
131
  if 'vector_db' not in st.session_state:
132
  st.session_state['vector_db'] = None
 
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_huggingface import HuggingFaceEndpoint # Updated import
8
  from langchain.chains import ConversationalRetrievalChain
9
  from langchain.memory import ConversationBufferMemory
10
+ import tempfile
11
 
12
  api_token = os.getenv("HF_TOKEN")
13
  list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
14
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
15
 
16
+ def load_doc(uploaded_files):
17
  try:
18
+ temp_files = []
19
+ for uploaded_file in uploaded_files:
20
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
21
+ temp_file.write(uploaded_file.read())
22
+ temp_file.close()
23
+ temp_files.append(temp_file.name)
24
+
25
+ loaders = [PyPDFLoader(x) for x in temp_files]
26
  pages = []
27
  for loader in loaders:
28
  pages.extend(loader.load())
29
+
30
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
31
  doc_splits = text_splitter.split_documents(pages)
32
+
33
+ for temp_file in temp_files:
34
+ os.remove(temp_file) # Clean up temporary files
35
+
36
  return doc_splits
37
  except Exception as e:
38
  st.error(f"Error loading document: {e}")
 
76
  st.error(f"Error initializing LLM chain: {e}")
77
  return None
78
 
79
+ def initialize_database(uploaded_files):
80
  try:
81
+ doc_splits = load_doc(uploaded_files)
 
82
  if not doc_splits:
83
  return None, "Failed to load documents."
84
  vector_db = create_db(doc_splits)
 
138
  with st.spinner("Creating vector database..."):
139
  vector_db, db_message = initialize_database(uploaded_files)
140
  st.success(db_message)
141
+ st.session_state['vector_db'] = vector_db
142
 
143
  if 'vector_db' not in st.session_state:
144
  st.session_state['vector_db'] = None