Update app.py
Browse files
app.py
CHANGED
|
@@ -1,16 +1,12 @@
|
|
| 1 |
#!/usr/bin/env python
|
| 2 |
|
| 3 |
import datetime
|
| 4 |
-
import operator
|
| 5 |
import pandas as pd
|
| 6 |
import tqdm.auto
|
| 7 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 8 |
from huggingface_hub import HfApi
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
-
from gradio_calendar import Calendar
|
| 12 |
-
import datasets
|
| 13 |
-
import requests
|
| 14 |
|
| 15 |
from datetime import timezone # Ensure timezone is imported
|
| 16 |
|
|
@@ -95,7 +91,7 @@ class Prettifier:
|
|
| 95 |
|
| 96 |
class PaperList:
|
| 97 |
"""
|
| 98 |
-
Manages the list of papers
|
| 99 |
"""
|
| 100 |
COLUMN_INFO = [
|
| 101 |
["arxiv_id", "str"], # Added arxiv_id
|
|
@@ -121,27 +117,11 @@ class PaperList:
|
|
| 121 |
def column_datatype(self):
|
| 122 |
return [col[1] for col in self.COLUMN_INFO]
|
| 123 |
|
| 124 |
-
def
|
| 125 |
-
self,
|
| 126 |
-
title_search_query: str,
|
| 127 |
-
max_num_to_retrieve: int = 1000, # Set a high default to include all if not specified
|
| 128 |
-
) -> pd.DataFrame:
|
| 129 |
"""
|
| 130 |
-
|
| 131 |
"""
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
# Filter by title if search query is provided
|
| 135 |
-
if title_search_query:
|
| 136 |
-
df = df[df["title"].str.contains(title_search_query, case=False, na=False)]
|
| 137 |
-
|
| 138 |
-
# Limit the number of papers to retrieve if max_num_to_retrieve is set
|
| 139 |
-
if max_num_to_retrieve:
|
| 140 |
-
df = df.head(max_num_to_retrieve)
|
| 141 |
-
|
| 142 |
-
# Prettify the DataFrame
|
| 143 |
-
df_prettified = self._prettifier(df).loc[:, self.column_names]
|
| 144 |
-
return df_prettified
|
| 145 |
|
| 146 |
|
| 147 |
# --- Sorting and Pagination Management ---
|
|
@@ -154,7 +134,6 @@ class PaperManager:
|
|
| 154 |
self.paper_list = paper_list
|
| 155 |
self.papers_per_page = papers_per_page
|
| 156 |
self.sort_method = "hot" # Default sort method
|
| 157 |
-
self.current_search_query = "" # Initialize with no search query
|
| 158 |
self.top_time_frame = "all time" # Default time frame for "Top" sorting
|
| 159 |
self.sort_papers()
|
| 160 |
# 'current_page' and 'total_pages' are set in 'sort_papers()'
|
|
@@ -181,14 +160,10 @@ class PaperManager:
|
|
| 181 |
|
| 182 |
def sort_papers(self):
|
| 183 |
"""
|
| 184 |
-
Sorts the papers based on the current sort method
|
| 185 |
"""
|
| 186 |
df = self.paper_list.df_raw.copy()
|
| 187 |
|
| 188 |
-
# Apply search filter if a search query exists
|
| 189 |
-
if self.current_search_query:
|
| 190 |
-
df = df[df["title"].str.contains(self.current_search_query, case=False, na=False)]
|
| 191 |
-
|
| 192 |
if self.sort_method == "hot":
|
| 193 |
if not df.empty:
|
| 194 |
df = df.drop(columns=['score'], errors='ignore') # Remove existing 'score' column if present
|
|
@@ -242,15 +217,6 @@ class PaperManager:
|
|
| 242 |
self.sort_papers()
|
| 243 |
return True # Assume success
|
| 244 |
|
| 245 |
-
def set_search_query(self, query: str):
|
| 246 |
-
"""
|
| 247 |
-
Sets the current search query and re-sorts the papers.
|
| 248 |
-
"""
|
| 249 |
-
print(f"Setting search query to: {query}")
|
| 250 |
-
self.current_search_query = query
|
| 251 |
-
self.sort_papers()
|
| 252 |
-
return True # Assume success
|
| 253 |
-
|
| 254 |
def get_current_page_papers(self) -> str:
|
| 255 |
"""
|
| 256 |
Retrieves the HTML string of the current page's papers.
|
|
@@ -399,22 +365,6 @@ def refresh_papers_ui() -> str:
|
|
| 399 |
return paper_manager.refresh()
|
| 400 |
|
| 401 |
|
| 402 |
-
def search_papers_ui(query: str) -> str:
|
| 403 |
-
"""
|
| 404 |
-
Searches for papers based on the title search query.
|
| 405 |
-
"""
|
| 406 |
-
paper_manager.set_search_query(query)
|
| 407 |
-
return paper_manager.get_current_page_papers()
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
def clear_search_ui() -> str:
|
| 411 |
-
"""
|
| 412 |
-
Clears the current search query and refreshes the paper list.
|
| 413 |
-
"""
|
| 414 |
-
paper_manager.set_search_query("")
|
| 415 |
-
return paper_manager.get_current_page_papers()
|
| 416 |
-
|
| 417 |
-
|
| 418 |
# --- CSS Styling ---
|
| 419 |
|
| 420 |
css = """
|
|
@@ -592,16 +542,6 @@ with demo:
|
|
| 592 |
</tr>
|
| 593 |
</table>
|
| 594 |
""")
|
| 595 |
-
# Search Bar and Clear Search Button
|
| 596 |
-
with gr.Row():
|
| 597 |
-
search_box = gr.Textbox(
|
| 598 |
-
label="Search Papers by Title",
|
| 599 |
-
placeholder="Enter keywords to search...",
|
| 600 |
-
lines=1,
|
| 601 |
-
interactive=True
|
| 602 |
-
)
|
| 603 |
-
search_button = gr.Button("Search")
|
| 604 |
-
clear_search_button = gr.Button("Clear Search")
|
| 605 |
# Sort Options and Time Frame (conditionally visible)
|
| 606 |
with gr.Row():
|
| 607 |
sort_radio = gr.Radio(
|
|
@@ -667,20 +607,13 @@ with demo:
|
|
| 667 |
outputs=[paper_list]
|
| 668 |
)
|
| 669 |
|
| 670 |
-
#
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
)
|
| 676 |
-
|
| 677 |
-
# Clear search functionality
|
| 678 |
-
clear_search_button.click(
|
| 679 |
-
fn=clear_search_ui,
|
| 680 |
-
inputs=None,
|
| 681 |
-
outputs=[paper_list]
|
| 682 |
-
)
|
| 683 |
-
|
| 684 |
|
| 685 |
|
| 686 |
# --- Launch the App ---
|
|
|
|
| 1 |
#!/usr/bin/env python
|
| 2 |
|
| 3 |
import datetime
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import tqdm.auto
|
| 6 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 7 |
from huggingface_hub import HfApi
|
| 8 |
|
| 9 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
from datetime import timezone # Ensure timezone is imported
|
| 12 |
|
|
|
|
| 91 |
|
| 92 |
class PaperList:
|
| 93 |
"""
|
| 94 |
+
Manages the list of papers.
|
| 95 |
"""
|
| 96 |
COLUMN_INFO = [
|
| 97 |
["arxiv_id", "str"], # Added arxiv_id
|
|
|
|
| 117 |
def column_datatype(self):
|
| 118 |
return [col[1] for col in self.COLUMN_INFO]
|
| 119 |
|
| 120 |
+
def get_prettified_df(self) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
"""
|
| 122 |
+
Returns the prettified DataFrame.
|
| 123 |
"""
|
| 124 |
+
return self.df_prettified
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
|
| 127 |
# --- Sorting and Pagination Management ---
|
|
|
|
| 134 |
self.paper_list = paper_list
|
| 135 |
self.papers_per_page = papers_per_page
|
| 136 |
self.sort_method = "hot" # Default sort method
|
|
|
|
| 137 |
self.top_time_frame = "all time" # Default time frame for "Top" sorting
|
| 138 |
self.sort_papers()
|
| 139 |
# 'current_page' and 'total_pages' are set in 'sort_papers()'
|
|
|
|
| 160 |
|
| 161 |
def sort_papers(self):
|
| 162 |
"""
|
| 163 |
+
Sorts the papers based on the current sort method.
|
| 164 |
"""
|
| 165 |
df = self.paper_list.df_raw.copy()
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
if self.sort_method == "hot":
|
| 168 |
if not df.empty:
|
| 169 |
df = df.drop(columns=['score'], errors='ignore') # Remove existing 'score' column if present
|
|
|
|
| 217 |
self.sort_papers()
|
| 218 |
return True # Assume success
|
| 219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
def get_current_page_papers(self) -> str:
|
| 221 |
"""
|
| 222 |
Retrieves the HTML string of the current page's papers.
|
|
|
|
| 365 |
return paper_manager.refresh()
|
| 366 |
|
| 367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
# --- CSS Styling ---
|
| 369 |
|
| 370 |
css = """
|
|
|
|
| 542 |
</tr>
|
| 543 |
</table>
|
| 544 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
# Sort Options and Time Frame (conditionally visible)
|
| 546 |
with gr.Row():
|
| 547 |
sort_radio = gr.Radio(
|
|
|
|
| 607 |
outputs=[paper_list]
|
| 608 |
)
|
| 609 |
|
| 610 |
+
# Footer
|
| 611 |
+
gr.Markdown("""
|
| 612 |
+
Related useful Spaces:
|
| 613 |
+
- [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien)
|
| 614 |
+
- [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy)
|
| 615 |
+
- [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung)
|
| 616 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
|
| 618 |
|
| 619 |
# --- Launch the App ---
|