Shreyas094 commited on
Commit
83a84c1
·
verified ·
1 Parent(s): 0a69d83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -11
app.py CHANGED
@@ -8,6 +8,8 @@ from typing import List
8
  from pydantic import BaseModel, Field
9
  from tempfile import NamedTemporaryFile
10
  from langchain_community.vectorstores import FAISS
 
 
11
  from langchain_community.document_loaders import PyPDFLoader
12
  from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from llama_parse import LlamaParse
@@ -460,25 +462,32 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
- retriever = database.as_retriever(search_kwargs={"k": 20})
464
- logging.info(f"Retrieving relevant documents for query: {query}")
465
- relevant_docs = retriever.get_relevant_documents(query)
466
- logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
467
-
468
- # Filter relevant_docs based on selected documents
469
- filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
470
- logging.info(f"Number of filtered documents: {len(filtered_docs)}")
471
 
 
 
472
  if not filtered_docs:
473
- logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
474
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
475
  return
476
 
477
- for doc in filtered_docs:
 
 
 
 
 
 
 
 
478
  logging.info(f"Document source: {doc.metadata['source']}")
479
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
480
 
481
- context_str = "\n".join([doc.page_content for doc in filtered_docs])
482
  logging.info(f"Total context length: {len(context_str)}")
483
 
484
  if model == "@cf/meta/llama-3.1-8b-instruct":
 
8
  from pydantic import BaseModel, Field
9
  from tempfile import NamedTemporaryFile
10
  from langchain_community.vectorstores import FAISS
11
+ from langchain_core.vectorstores import VectorStore
12
+ from langchain_core.documents import Document
13
  from langchain_community.document_loaders import PyPDFLoader
14
  from langchain_community.embeddings import HuggingFaceEmbeddings
15
  from llama_parse import LlamaParse
 
462
  yield "No documents available. Please upload PDF documents to answer questions."
463
  return
464
 
465
+ # Pre-filter the documents
466
+ filtered_docs = []
467
+ for doc_id, doc in database.docstore._dict.items():
468
+ if isinstance(doc, Document) and doc.metadata.get("source") in selected_docs:
469
+ filtered_docs.append(doc)
 
 
 
470
 
471
+ logging.info(f"Number of documents after pre-filtering: {len(filtered_docs)}")
472
+
473
  if not filtered_docs:
474
+ logging.warning(f"No documents found for the selected sources: {selected_docs}")
475
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
476
  return
477
 
478
+ # Create a new FAISS index with only the selected documents
479
+ filtered_db = FAISS.from_documents(filtered_docs, embed)
480
+
481
+ retriever = filtered_db.as_retriever(search_kwargs={"k": 10})
482
+ logging.info(f"Retrieving relevant documents for query: {query}")
483
+ relevant_docs = retriever.get_relevant_documents(query)
484
+ logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
485
+
486
+ for doc in relevant_docs:
487
  logging.info(f"Document source: {doc.metadata['source']}")
488
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
489
 
490
+ context_str = "\n".join([doc.page_content for doc in relevant_docs])
491
  logging.info(f"Total context length: {len(context_str)}")
492
 
493
  if model == "@cf/meta/llama-3.1-8b-instruct":