Spaces:

Muzammil6376
/

Multimodal

Running

Muzammil6376 commited on 20 days ago

Commit

b6b04c5

verified ·

1 Parent(s): 8439939

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -60,7 +60,9 @@ def process_pdf(pdf_file):
     # read full text
     reader = PdfReader(pdf_file.name)
     pages = [p.extract_text() or "" for p in reader.pages]
-    pdf_text = "\n\n".join(pages)
     # extract elements with images via unstructured
     try:
@@ -89,12 +91,11 @@ def process_pdf(pdf_file):
     # combine text chunks and image captions
     docs = chunks + captions
-        # embed and index
     vectors = embeddings.embed_documents(docs)
-    # FAISS.from_embeddings expects list of (text, embedding) pairs
     pairs = list(zip(docs, vectors))
     index = FAISS.from_embeddings(pairs)
-    retriever = index.as_retriever(search_kwargs={"k":2})(search_kwargs={"k":2})
     status = f"✅ Indexed — {len(chunks)} text chunks + {len(captions)} captions"
     return os.path.basename(pdf_file.name), status, gr.update(interactive=True)

     # read full text
     reader = PdfReader(pdf_file.name)
     pages = [p.extract_text() or "" for p in reader.pages]
+    pdf_text = "
+".join(pages)
     # extract elements with images via unstructured
     try:
     # combine text chunks and image captions
     docs = chunks + captions
+    # embed and index
     vectors = embeddings.embed_documents(docs)
     pairs = list(zip(docs, vectors))
     index = FAISS.from_embeddings(pairs)
+    retriever = index.as_retriever(search_kwargs={"k": 2})
     status = f"✅ Indexed — {len(chunks)} text chunks + {len(captions)} captions"
     return os.path.basename(pdf_file.name), status, gr.update(interactive=True)