Muzammil6376 commited on
Commit
b6b04c5
Β·
verified Β·
1 Parent(s): 8439939

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -60,7 +60,9 @@ def process_pdf(pdf_file):
60
  # read full text
61
  reader = PdfReader(pdf_file.name)
62
  pages = [p.extract_text() or "" for p in reader.pages]
63
- pdf_text = "\n\n".join(pages)
 
 
64
 
65
  # extract elements with images via unstructured
66
  try:
@@ -89,12 +91,11 @@ def process_pdf(pdf_file):
89
  # combine text chunks and image captions
90
  docs = chunks + captions
91
 
92
- # embed and index
93
  vectors = embeddings.embed_documents(docs)
94
- # FAISS.from_embeddings expects list of (text, embedding) pairs
95
  pairs = list(zip(docs, vectors))
96
  index = FAISS.from_embeddings(pairs)
97
- retriever = index.as_retriever(search_kwargs={"k":2})(search_kwargs={"k":2})
98
 
99
  status = f"βœ… Indexed β€” {len(chunks)} text chunks + {len(captions)} captions"
100
  return os.path.basename(pdf_file.name), status, gr.update(interactive=True)
 
60
  # read full text
61
  reader = PdfReader(pdf_file.name)
62
  pages = [p.extract_text() or "" for p in reader.pages]
63
+ pdf_text = "
64
+
65
+ ".join(pages)
66
 
67
  # extract elements with images via unstructured
68
  try:
 
91
  # combine text chunks and image captions
92
  docs = chunks + captions
93
 
94
+ # embed and index
95
  vectors = embeddings.embed_documents(docs)
 
96
  pairs = list(zip(docs, vectors))
97
  index = FAISS.from_embeddings(pairs)
98
+ retriever = index.as_retriever(search_kwargs={"k": 2})
99
 
100
  status = f"βœ… Indexed β€” {len(chunks)} text chunks + {len(captions)} captions"
101
  return os.path.basename(pdf_file.name), status, gr.update(interactive=True)