Shreyas094 commited on
Commit
495c1d2
1 Parent(s): 63d903a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -11,6 +11,7 @@ from langchain_community.vectorstores import FAISS
11
  from langchain_community.document_loaders import PyPDFLoader
12
  from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from llama_parse import LlamaParse
 
14
 
15
  # Environment variables and configurations
16
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -25,7 +26,7 @@ llama_parser = LlamaParse(
25
  language="en",
26
  )
27
 
28
- def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[dict]:
29
  """Loads and splits the document into pages."""
30
  if parser == "pypdf":
31
  loader = PyPDFLoader(file.name)
@@ -33,7 +34,7 @@ def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[dict]
33
  elif parser == "llamaparse":
34
  try:
35
  documents = llama_parser.load_data(file.name)
36
- return [{"page_content": doc.text, "metadata": {"source": file.name}} for doc in documents]
37
  except Exception as e:
38
  print(f"Error using Llama Parse: {str(e)}")
39
  print("Falling back to PyPDF parser")
 
11
  from langchain_community.document_loaders import PyPDFLoader
12
  from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from llama_parse import LlamaParse
14
+ from langchain_core.documents import Document
15
 
16
  # Environment variables and configurations
17
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 
26
  language="en",
27
  )
28
 
29
+ def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[Document]:
30
  """Loads and splits the document into pages."""
31
  if parser == "pypdf":
32
  loader = PyPDFLoader(file.name)
 
34
  elif parser == "llamaparse":
35
  try:
36
  documents = llama_parser.load_data(file.name)
37
+ return [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
38
  except Exception as e:
39
  print(f"Error using Llama Parse: {str(e)}")
40
  print("Falling back to PyPDF parser")