Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -45,30 +45,42 @@ async def initialize(file_path, question):
|
|
45 |
# Identify key sentences or phrases
|
46 |
key_phrases = answer.split(". ") # Split answer into sentences for more precise matching
|
47 |
|
48 |
-
|
|
|
49 |
for i, page in enumerate(pages):
|
50 |
for phrase in key_phrases:
|
51 |
if phrase.lower() in page.page_content.lower():
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
else:
|
58 |
-
source_str = "
|
59 |
|
60 |
# Create a clickable link for the document
|
61 |
-
|
62 |
-
source_link = f"[{file_name}](file://{os.path.abspath(file_path)})"
|
63 |
|
64 |
-
return f"Answer: {answer}\n{source_str}\n
|
65 |
else:
|
66 |
return "Error: Unable to process the document. Please ensure the PDF file is valid."
|
67 |
|
68 |
# Define Gradio Interface
|
69 |
input_file = gr.File(label="Upload PDF File")
|
70 |
input_question = gr.Textbox(label="Ask about the document")
|
71 |
-
output_text = gr.Textbox(label="Answer and
|
72 |
|
73 |
async def pdf_qa(file, question):
|
74 |
if file is None:
|
@@ -79,3 +91,4 @@ async def pdf_qa(file, question):
|
|
79 |
|
80 |
# Create Gradio Interface with share=True to enable a public link
|
81 |
gr.Interface(fn=pdf_qa, inputs=[input_file, input_question], outputs=output_text, title="PDF Question Answering System", description="Upload a PDF file and ask questions about the content.").launch(share=True)
|
|
|
|
45 |
# Identify key sentences or phrases
|
46 |
key_phrases = answer.split(". ") # Split answer into sentences for more precise matching
|
47 |
|
48 |
+
# Score each page based on the presence of key phrases
|
49 |
+
page_scores = [0] * len(pages)
|
50 |
for i, page in enumerate(pages):
|
51 |
for phrase in key_phrases:
|
52 |
if phrase.lower() in page.page_content.lower():
|
53 |
+
page_scores[i] += 1
|
54 |
+
|
55 |
+
# Determine the maximum score and get top pages
|
56 |
+
max_score = max(page_scores)
|
57 |
+
top_pages = [i+1 for i, score in enumerate(page_scores) if score == max_score]
|
58 |
+
|
59 |
+
# Limit to the top 2 pages
|
60 |
+
num_top_pages = 2
|
61 |
+
top_pages = sorted(top_pages)[:num_top_pages]
|
62 |
+
|
63 |
+
# Generate links for each top page
|
64 |
+
file_name = os.path.basename(file_path)
|
65 |
+
page_links = [f"[Page {p}](file://{os.path.abspath(file_path)}#page={p})" for p in top_pages]
|
66 |
+
page_links_str = ', '.join(page_links)
|
67 |
+
|
68 |
+
if top_pages:
|
69 |
+
source_str = f"Top relevant page(s): {page_links_str}"
|
70 |
else:
|
71 |
+
source_str = "Top relevant page(s): Not found in specific page"
|
72 |
|
73 |
# Create a clickable link for the document
|
74 |
+
source_link = f"[Document: {file_name}](file://{os.path.abspath(file_path)})"
|
|
|
75 |
|
76 |
+
return f"Answer: {answer}\n{source_str}\n{source_link}"
|
77 |
else:
|
78 |
return "Error: Unable to process the document. Please ensure the PDF file is valid."
|
79 |
|
80 |
# Define Gradio Interface
|
81 |
input_file = gr.File(label="Upload PDF File")
|
82 |
input_question = gr.Textbox(label="Ask about the document")
|
83 |
+
output_text = gr.Textbox(label="Answer and Top Pages")
|
84 |
|
85 |
async def pdf_qa(file, question):
|
86 |
if file is None:
|
|
|
91 |
|
92 |
# Create Gradio Interface with share=True to enable a public link
|
93 |
gr.Interface(fn=pdf_qa, inputs=[input_file, input_question], outputs=output_text, title="PDF Question Answering System", description="Upload a PDF file and ask questions about the content.").launch(share=True)
|
94 |
+
the content.").launch(share=True)
|