vishwask commited on
Commit
c4e61a2
·
verified ·
1 Parent(s): fd3b58c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -12,6 +12,8 @@ from langchain.memory import ConversationBufferMemory
12
  from langchain.llms import HuggingFaceHub
13
  from langchain.memory import ConversationBufferWindowMemory
14
  from langchain_community.document_loaders import TextLoader
 
 
15
 
16
  from pathlib import Path
17
  import chromadb
@@ -80,7 +82,7 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
80
  # Processing for one document only
81
  # loader = PyPDFLoader(file_path)
82
  # pages = loader.load()
83
- loaders = [PyPDFLoader(x) for x in list_file_path]
84
  pages = []
85
  for loader in loaders:
86
  pages.extend(loader.load())
@@ -227,16 +229,16 @@ def demo():
227
  vector_db = gr.State()
228
  qa_chain = gr.State()
229
  collection_name = gr.State()
230
- pdf_directory = '/home/user/app/pdfs/'
231
 
232
  def process_pdfs():
233
  # List all PDF files in the directory
234
- pdf_files = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith(".pdf")]
235
  return pdf_files
236
 
237
  # Create a dictionary with the necessary information
238
  pdf_dict = {"value": process_pdfs, "height": 100, "file_count": "multiple",
239
- "visible": False, "file_types": ["pdf"], "interactive": True,
240
  "label": "Uploaded PDF documents"}
241
 
242
  # Create a gr.Files component with the dictionary
 
12
  from langchain.llms import HuggingFaceHub
13
  from langchain.memory import ConversationBufferWindowMemory
14
  from langchain_community.document_loaders import TextLoader
15
+ from langchain_community.document_loaders import DirectoryLoader
16
+ from langchain_community.document_loaders import UnstructuredHTMLLoader
17
 
18
  from pathlib import Path
19
  import chromadb
 
82
  # Processing for one document only
83
  # loader = PyPDFLoader(file_path)
84
  # pages = loader.load()
85
+ loaders = [UnstructuredHTMLLoader(x) for x in list_file_path]
86
  pages = []
87
  for loader in loaders:
88
  pages.extend(loader.load())
 
229
  vector_db = gr.State()
230
  qa_chain = gr.State()
231
  collection_name = gr.State()
232
+ pdf_directory = '/home/user/app/htmls/'
233
 
234
  def process_pdfs():
235
  # List all PDF files in the directory
236
+ pdf_files = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith(".html")]
237
  return pdf_files
238
 
239
  # Create a dictionary with the necessary information
240
  pdf_dict = {"value": process_pdfs, "height": 100, "file_count": "multiple",
241
+ "visible": False, "file_types": ["html"], "interactive": True,
242
  "label": "Uploaded PDF documents"}
243
 
244
  # Create a gr.Files component with the dictionary