saritha commited on
Commit
001d160
·
verified ·
1 Parent(s): 4ebdc50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -33,7 +33,7 @@ async def initialize(file_path, question):
33
 
34
  # Extract content from each page and store along with page number
35
  page_contexts = [f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages)]
36
- context = "\n".join(page_contexts) # Using the first 30 pages for context
37
 
38
  # Load the question-answering chain
39
  stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
@@ -52,17 +52,20 @@ async def initialize(file_path, question):
52
  if phrase.lower() in page.page_content.lower():
53
  page_scores[i] += 1
54
 
55
- # Determine the maximum score
56
  max_score = max(page_scores)
 
57
 
58
- # Collect the page numbers with the maximum score
59
- relevant_pages = [i+1 for i, score in enumerate(page_scores) if score == max_score]
 
 
60
 
61
- if relevant_pages:
62
- page_numbers = ', '.join(str(p) for p in sorted(relevant_pages))
63
- source_str = f"Relevant page(s): {page_numbers}"
64
  else:
65
- source_str = "Relevant page(s): Not found in specific page"
66
 
67
  # Create a clickable link for the document
68
  file_name = os.path.basename(file_path)
@@ -75,7 +78,7 @@ async def initialize(file_path, question):
75
  # Define Gradio Interface
76
  input_file = gr.File(label="Upload PDF File")
77
  input_question = gr.Textbox(label="Ask about the document")
78
- output_text = gr.Textbox(label="Answer and Relevant Pages")
79
 
80
  async def pdf_qa(file, question):
81
  if file is None:
 
33
 
34
  # Extract content from each page and store along with page number
35
  page_contexts = [f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages)]
36
+ context = "\n".join(page_contexts[:30]) # Using the first 30 pages for context
37
 
38
  # Load the question-answering chain
39
  stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
 
52
  if phrase.lower() in page.page_content.lower():
53
  page_scores[i] += 1
54
 
55
+ # Determine the maximum score and get top pages
56
  max_score = max(page_scores)
57
+ top_pages = [i+1 for i, score in enumerate(page_scores) if score == max_score]
58
 
59
+ # Optionally, limit the number of top pages to display
60
+ num_top_pages = 5 # Adjust this value based on your needs
61
+ if len(top_pages) > num_top_pages:
62
+ top_pages = sorted(top_pages)[:num_top_pages]
63
 
64
+ if top_pages:
65
+ page_numbers = ', '.join(str(p) for p in sorted(top_pages))
66
+ source_str = f"Top relevant page(s): {page_numbers}"
67
  else:
68
+ source_str = "Top relevant page(s): Not found in specific page"
69
 
70
  # Create a clickable link for the document
71
  file_name = os.path.basename(file_path)
 
78
  # Define Gradio Interface
79
  input_file = gr.File(label="Upload PDF File")
80
  input_question = gr.Textbox(label="Ask about the document")
81
+ output_text = gr.Textbox(label="Answer and Top Pages")
82
 
83
  async def pdf_qa(file, question):
84
  if file is None: