saritha commited on
Commit
b98540b
·
verified ·
1 Parent(s): 95e2001

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -11
app.py CHANGED
@@ -45,30 +45,42 @@ async def initialize(file_path, question):
45
  # Identify key sentences or phrases
46
  key_phrases = answer.split(". ") # Split answer into sentences for more precise matching
47
 
48
- relevant_pages = set()
 
49
  for i, page in enumerate(pages):
50
  for phrase in key_phrases:
51
  if phrase.lower() in page.page_content.lower():
52
- relevant_pages.add(i+1) # Add page number if phrase is found
53
-
54
- if relevant_pages:
55
- page_numbers = ', '.join(str(p) for p in sorted(relevant_pages))
56
- source_str = f"Relevant pages: {page_numbers}"
 
 
 
 
 
 
 
 
 
 
 
 
57
  else:
58
- source_str = "Relevant pages: Not found in specific page"
59
 
60
  # Create a clickable link for the document
61
- file_name = os.path.basename(file_path)
62
- source_link = f"[{file_name}](file://{os.path.abspath(file_path)})"
63
 
64
- return f"Answer: {answer}\n{source_str}\n[Document: {source_link}]"
65
  else:
66
  return "Error: Unable to process the document. Please ensure the PDF file is valid."
67
 
68
  # Define Gradio Interface
69
  input_file = gr.File(label="Upload PDF File")
70
  input_question = gr.Textbox(label="Ask about the document")
71
- output_text = gr.Textbox(label="Answer and Relevant Pages")
72
 
73
  async def pdf_qa(file, question):
74
  if file is None:
@@ -79,3 +91,4 @@ async def pdf_qa(file, question):
79
 
80
  # Create Gradio Interface with share=True to enable a public link
81
  gr.Interface(fn=pdf_qa, inputs=[input_file, input_question], outputs=output_text, title="PDF Question Answering System", description="Upload a PDF file and ask questions about the content.").launch(share=True)
 
 
45
  # Identify key sentences or phrases
46
  key_phrases = answer.split(". ") # Split answer into sentences for more precise matching
47
 
48
+ # Score each page based on the presence of key phrases
49
+ page_scores = [0] * len(pages)
50
  for i, page in enumerate(pages):
51
  for phrase in key_phrases:
52
  if phrase.lower() in page.page_content.lower():
53
+ page_scores[i] += 1
54
+
55
+ # Determine the maximum score and get top pages
56
+ max_score = max(page_scores)
57
+ top_pages = [i+1 for i, score in enumerate(page_scores) if score == max_score]
58
+
59
+ # Limit to the top 2 pages
60
+ num_top_pages = 2
61
+ top_pages = sorted(top_pages)[:num_top_pages]
62
+
63
+ # Generate links for each top page
64
+ file_name = os.path.basename(file_path)
65
+ page_links = [f"[Page {p}](file://{os.path.abspath(file_path)}#page={p})" for p in top_pages]
66
+ page_links_str = ', '.join(page_links)
67
+
68
+ if top_pages:
69
+ source_str = f"Top relevant page(s): {page_links_str}"
70
  else:
71
+ source_str = "Top relevant page(s): Not found in specific page"
72
 
73
  # Create a clickable link for the document
74
+ source_link = f"[Document: {file_name}](file://{os.path.abspath(file_path)})"
 
75
 
76
+ return f"Answer: {answer}\n{source_str}\n{source_link}"
77
  else:
78
  return "Error: Unable to process the document. Please ensure the PDF file is valid."
79
 
80
  # Define Gradio Interface
81
  input_file = gr.File(label="Upload PDF File")
82
  input_question = gr.Textbox(label="Ask about the document")
83
+ output_text = gr.Textbox(label="Answer and Top Pages")
84
 
85
  async def pdf_qa(file, question):
86
  if file is None:
 
91
 
92
  # Create Gradio Interface with share=True to enable a public link
93
  gr.Interface(fn=pdf_qa, inputs=[input_file, input_question], outputs=output_text, title="PDF Question Answering System", description="Upload a PDF file and ask questions about the content.").launch(share=True)
94
+ the content.").launch(share=True)