Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ import pandas as pd
|
|
10 |
import io
|
11 |
import time
|
12 |
|
13 |
-
from
|
14 |
from typing import List, Dict, Tuple
|
15 |
from langchain_openai import AzureChatOpenAI,AzureOpenAIEmbeddings
|
16 |
from langchain.text_splitter import CharacterTextSplitter
|
@@ -27,7 +27,7 @@ class PDFExtract:
|
|
27 |
List[str]: Extracted text from the PDFs.
|
28 |
"""
|
29 |
docs = []
|
30 |
-
loaders = [
|
31 |
for loader in loaders:
|
32 |
docs.extend(loader.load())
|
33 |
return docs
|
|
|
10 |
import io
|
11 |
import time
|
12 |
|
13 |
+
from langchain_unstructured import UnstructuredLoader
|
14 |
from typing import List, Dict, Tuple
|
15 |
from langchain_openai import AzureChatOpenAI,AzureOpenAIEmbeddings
|
16 |
from langchain.text_splitter import CharacterTextSplitter
|
|
|
27 |
List[str]: Extracted text from the PDFs.
|
28 |
"""
|
29 |
docs = []
|
30 |
+
loaders = [UnstructuredLoader(file_obj, strategy="fast") for file_obj in file_paths]
|
31 |
for loader in loaders:
|
32 |
docs.extend(loader.load())
|
33 |
return docs
|