Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ async def initialize(file_path, question):
|
|
32 |
pages = pdf_loader.load_and_split()
|
33 |
|
34 |
# Extract content from each page and store along with page number
|
35 |
-
page_contexts = [
|
36 |
context = "\n".join(page_contexts[:30]) # Using the first 30 pages for context
|
37 |
|
38 |
# Load the question-answering chain
|
@@ -52,17 +52,13 @@ async def initialize(file_path, question):
|
|
52 |
if phrase.lower() in page.page_content.lower():
|
53 |
page_scores[i] += 1
|
54 |
|
55 |
-
# Determine the
|
56 |
-
|
57 |
-
top_pages = [i+1 for i, score in
|
58 |
-
|
59 |
-
# Limit to the top 2 pages
|
60 |
-
num_top_pages = 2
|
61 |
-
top_pages = sorted(top_pages)[:num_top_pages]
|
62 |
|
63 |
# Generate links for each top page
|
64 |
file_name = os.path.basename(file_path)
|
65 |
-
# Use a general link with instructions for manual navigation if automatic links are not supported
|
66 |
page_links = [f"[Page {p}](file://{os.path.abspath(file_path)})" for p in top_pages]
|
67 |
page_links_str = ', '.join(page_links)
|
68 |
|
|
|
32 |
pages = pdf_loader.load_and_split()
|
33 |
|
34 |
# Extract content from each page and store along with page number
|
35 |
+
page_contexts = [page.page_content for i, page in enumerate(pages)]
|
36 |
context = "\n".join(page_contexts[:30]) # Using the first 30 pages for context
|
37 |
|
38 |
# Load the question-answering chain
|
|
|
52 |
if phrase.lower() in page.page_content.lower():
|
53 |
page_scores[i] += 1
|
54 |
|
55 |
+
# Determine the top pages based on highest scores
|
56 |
+
top_pages_with_scores = sorted(enumerate(page_scores), key=lambda x: x[1], reverse=True)
|
57 |
+
top_pages = [i + 1 for i, score in top_pages_with_scores if score > 0][:2] # Get top 2 pages
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Generate links for each top page
|
60 |
file_name = os.path.basename(file_path)
|
61 |
+
# Use a general link format with instructions for manual navigation if automatic links are not supported
|
62 |
page_links = [f"[Page {p}](file://{os.path.abspath(file_path)})" for p in top_pages]
|
63 |
page_links_str = ', '.join(page_links)
|
64 |
|