Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,8 @@ from langchain.memory import ConversationBufferMemory
|
|
12 |
from langchain.llms import HuggingFaceHub
|
13 |
from langchain.memory import ConversationBufferWindowMemory
|
14 |
from langchain_community.document_loaders import TextLoader
|
|
|
|
|
15 |
|
16 |
from pathlib import Path
|
17 |
import chromadb
|
@@ -80,7 +82,7 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
|
|
80 |
# Processing for one document only
|
81 |
# loader = PyPDFLoader(file_path)
|
82 |
# pages = loader.load()
|
83 |
-
loaders = [
|
84 |
pages = []
|
85 |
for loader in loaders:
|
86 |
pages.extend(loader.load())
|
@@ -227,16 +229,16 @@ def demo():
|
|
227 |
vector_db = gr.State()
|
228 |
qa_chain = gr.State()
|
229 |
collection_name = gr.State()
|
230 |
-
pdf_directory = '/home/user/app/
|
231 |
|
232 |
def process_pdfs():
|
233 |
# List all PDF files in the directory
|
234 |
-
pdf_files = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith(".
|
235 |
return pdf_files
|
236 |
|
237 |
# Create a dictionary with the necessary information
|
238 |
pdf_dict = {"value": process_pdfs, "height": 100, "file_count": "multiple",
|
239 |
-
"visible": False, "file_types": ["
|
240 |
"label": "Uploaded PDF documents"}
|
241 |
|
242 |
# Create a gr.Files component with the dictionary
|
|
|
12 |
from langchain.llms import HuggingFaceHub
|
13 |
from langchain.memory import ConversationBufferWindowMemory
|
14 |
from langchain_community.document_loaders import TextLoader
|
15 |
+
from langchain_community.document_loaders import DirectoryLoader
|
16 |
+
from langchain_community.document_loaders import UnstructuredHTMLLoader
|
17 |
|
18 |
from pathlib import Path
|
19 |
import chromadb
|
|
|
82 |
# Processing for one document only
|
83 |
# loader = PyPDFLoader(file_path)
|
84 |
# pages = loader.load()
|
85 |
+
loaders = [UnstructuredHTMLLoader(x) for x in list_file_path]
|
86 |
pages = []
|
87 |
for loader in loaders:
|
88 |
pages.extend(loader.load())
|
|
|
229 |
vector_db = gr.State()
|
230 |
qa_chain = gr.State()
|
231 |
collection_name = gr.State()
|
232 |
+
pdf_directory = '/home/user/app/htmls/'
|
233 |
|
234 |
def process_pdfs():
|
235 |
# List all PDF files in the directory
|
236 |
+
pdf_files = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith(".html")]
|
237 |
return pdf_files
|
238 |
|
239 |
# Create a dictionary with the necessary information
|
240 |
pdf_dict = {"value": process_pdfs, "height": 100, "file_count": "multiple",
|
241 |
+
"visible": False, "file_types": ["html"], "interactive": True,
|
242 |
"label": "Uploaded PDF documents"}
|
243 |
|
244 |
# Create a gr.Files component with the dictionary
|