IAMJB commited on
Commit
a0359a1
·
1 Parent(s): f710cf8
Files changed (5) hide show
  1. app.py +25 -9
  2. df/PaperCentral.py +18 -1
  3. paper_chat_tab.py +281 -0
  4. requirements.txt +14 -0
  5. style.css +63 -1
app.py CHANGED
@@ -10,6 +10,8 @@ import json
10
  import requests
11
 
12
  from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
 
 
13
  from zoneinfo import ZoneInfo # Available in Python 3.9 and later
14
 
15
  # Initialize the PaperCentral class instance
@@ -60,6 +62,9 @@ with gr.Blocks(css_paths="style.css") as demo:
60
  with gr.Column(scale=1):
61
  with gr.Accordion(label="⭐Release notes", open=False):
62
  gr.Markdown("""
 
 
 
63
  - **October 24, 2024** – CoRL 2024 proceedings added.
64
  - **October 20, 2024** – You can now add or edit papers.
65
  - **October 19, 2024** – Papers with github now have github stars.
@@ -182,6 +187,12 @@ with gr.Blocks(css_paths="style.css") as demo:
182
  author_resource_leaderboard_tab()
183
 
184
 
 
 
 
 
 
 
185
  # Define function to move to the next day
186
  def go_to_next_day(
187
  date: Union[str, datetime],
@@ -468,13 +479,14 @@ with gr.Blocks(css_paths="style.css") as demo:
468
  date_range = gr.update(value=None)
469
  conferences = gr.update(value=[])
470
  hf_options = gr.update(value=[])
471
- leaderboard_tab = gr.Tabs()
 
472
 
473
  if request:
474
- print("Request headers dictionary:", dict(request.headers))
475
- print("IP address:", request.client.host)
476
- print("Query parameters:", dict(request.query_params))
477
- print("Session hash:", request.session_hash)
478
 
479
  if 'date' in request.query_params:
480
  calendar = gr.update(value=request.query_params['date'])
@@ -502,9 +514,13 @@ with gr.Blocks(css_paths="style.css") as demo:
502
  if "tab" in request.query_params:
503
  tab = request.query_params['tab']
504
  if tab == "tab-leaderboards":
505
- leaderboard_tab = gr.Tabs(selected="tab-leaderboards")
 
 
 
 
506
 
507
- return calendar, date_range, conferences, hf_options, leaderboard_tab,
508
 
509
 
510
  demo.load(
@@ -514,7 +530,7 @@ with gr.Blocks(css_paths="style.css") as demo:
514
  api_name="update_data",
515
  ).then(
516
  fn=echo,
517
- outputs=[calendar, date_range_radio, conference_options, hf_options, tabs],
518
  api_name=False,
519
  ).then(
520
  # New then to handle LoginButton and HTML components
@@ -529,7 +545,7 @@ def main():
529
  """
530
  Launches the Gradio app.
531
  """
532
- demo.launch()
533
 
534
 
535
  # Run the main function when the script is executed
 
10
  import requests
11
 
12
  from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
13
+ from paper_chat_tab import paper_chat_tab
14
+
15
  from zoneinfo import ZoneInfo # Available in Python 3.9 and later
16
 
17
  # Initialize the PaperCentral class instance
 
62
  with gr.Column(scale=1):
63
  with gr.Accordion(label="⭐Release notes", open=False):
64
  gr.Markdown("""
65
+ - **November 21, 2024** – Neurips D&B 2024 proceedings added.
66
+ - **November 20, 2024** – Neurips 2024 proceedings added.
67
+ - **November 15, 2024** – EMNLP 2024 proceedings added.
68
  - **October 24, 2024** – CoRL 2024 proceedings added.
69
  - **October 20, 2024** – You can now add or edit papers.
70
  - **October 19, 2024** – Papers with github now have github stars.
 
187
  author_resource_leaderboard_tab()
188
 
189
 
190
+ with gr.Tab("Chat With Paper", id="tab-chat-with-paper"):
191
+ gr.Markdown("## Chat with Paper")
192
+ arxiv_id = gr.State(value=None)
193
+ paper_chat_tab(arxiv_id)
194
+
195
+
196
  # Define function to move to the next day
197
  def go_to_next_day(
198
  date: Union[str, datetime],
 
479
  date_range = gr.update(value=None)
480
  conferences = gr.update(value=[])
481
  hf_options = gr.update(value=[])
482
+ selected_tab = gr.Tabs()
483
+ paper_id = gr.update(value=None)
484
 
485
  if request:
486
+ # print("Request headers dictionary:", dict(request.headers))
487
+ # print("IP address:", request.client.host)
488
+ # print("Query parameters:", dict(request.query_params))
489
+ # print("Session hash:", request.session_hash)
490
 
491
  if 'date' in request.query_params:
492
  calendar = gr.update(value=request.query_params['date'])
 
514
  if "tab" in request.query_params:
515
  tab = request.query_params['tab']
516
  if tab == "tab-leaderboards":
517
+ selected_tab = gr.Tabs(selected="tab-leaderboards")
518
+ elif tab == "tab-chat-with-paper":
519
+ selected_tab = gr.Tabs(selected="tab-chat-with-paper")
520
+ if "paper_id" in request.query_params:
521
+ paper_id = request.query_params['paper_id']
522
 
523
+ return calendar, date_range, conferences, hf_options, selected_tab, paper_id
524
 
525
 
526
  demo.load(
 
530
  api_name="update_data",
531
  ).then(
532
  fn=echo,
533
+ outputs=[calendar, date_range_radio, conference_options, hf_options, tabs, arxiv_id],
534
  api_name=False,
535
  ).then(
536
  # New then to handle LoginButton and HTML components
 
545
  """
546
  Launches the Gradio app.
547
  """
548
+ demo.launch(ssr_mode=False)
549
 
550
 
551
  # Run the main function when the script is executed
df/PaperCentral.py CHANGED
@@ -15,7 +15,7 @@ import gradio as gr
15
  from utils import load_and_process
16
  import numpy as np
17
  from datetime import datetime, timedelta
18
-
19
 
20
  class PaperCentral:
21
  """
@@ -53,6 +53,7 @@ class PaperCentral:
53
  ]
54
 
55
  COLUMNS_ORDER_PAPER_PAGE: List[str] = [
 
56
  'date',
57
  'arxiv_id',
58
  'paper_page',
@@ -90,6 +91,7 @@ class PaperCentral:
90
  'authors': 'str',
91
  'github_stars': 'number',
92
  'project_page': 'markdown',
 
93
  }
94
 
95
  # Mapping for renaming columns for display purposes
@@ -101,6 +103,7 @@ class PaperCentral:
101
  'github_stars': 'GitHub⭐',
102
  'num_comments': '💬',
103
  'upvotes': '👍',
 
104
  }
105
 
106
  def __init__(self):
@@ -475,6 +478,20 @@ class PaperCentral:
475
  )
476
  filtered_df = filtered_df[conference_filter]
477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  # Prettify the DataFrame
479
  filtered_df = self.prettify(filtered_df)
480
 
 
15
  from utils import load_and_process
16
  import numpy as np
17
  from datetime import datetime, timedelta
18
+ import re
19
 
20
  class PaperCentral:
21
  """
 
53
  ]
54
 
55
  COLUMNS_ORDER_PAPER_PAGE: List[str] = [
56
+ 'chat_with_paper',
57
  'date',
58
  'arxiv_id',
59
  'paper_page',
 
91
  'authors': 'str',
92
  'github_stars': 'number',
93
  'project_page': 'markdown',
94
+ 'chat_with_paper': 'markdown',
95
  }
96
 
97
  # Mapping for renaming columns for display purposes
 
103
  'github_stars': 'GitHub⭐',
104
  'num_comments': '💬',
105
  'upvotes': '👍',
106
+ 'chat_with_paper': 'Chat',
107
  }
108
 
109
  def __init__(self):
 
478
  )
479
  filtered_df = filtered_df[conference_filter]
480
 
481
+ if any(conf in ["NeurIPS2024 D&B", "NeurIPS2024"] for conf in conference_options):
482
+ def create_chat_link(row):
483
+ neurips_id = re.search(r'id=([^&]+)', row["proceedings"])
484
+ if neurips_id:
485
+ neurips_id = neurips_id.group(1)
486
+ return f'<a href="/?tab=tab-chat-with-paper&paper_id={neurips_id}" id="custom_button" target="_blank" rel="noopener noreferrer" aria-disabled="false">✨ Chat with paper</a>'
487
+ else:
488
+ return ""
489
+
490
+ # Add the "chat_with_paper" column
491
+ filtered_df['chat_with_paper'] = filtered_df.apply(create_chat_link, axis=1)
492
+ if 'chat_with_paper' not in columns_to_show:
493
+ columns_to_show.append('chat_with_paper')
494
+
495
  # Prettify the DataFrame
496
  filtered_df = self.prettify(filtered_df)
497
 
paper_chat_tab.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PyPDF2 import PdfReader
3
+ from bs4 import BeautifulSoup
4
+
5
+ import requests
6
+ from io import BytesIO
7
+ from transformers import AutoTokenizer
8
+
9
+ import os
10
+ from openai import OpenAI
11
+
12
+ # Cache for tokenizers to avoid reloading
13
+ tokenizer_cache = {}
14
+
15
+
16
+ # Function to fetch paper information from OpenReview
17
+ def fetch_paper_info_neurips(paper_id):
18
+ url = f"https://openreview.net/forum?id={paper_id}"
19
+ response = requests.get(url)
20
+ if response.status_code != 200:
21
+ return None, None
22
+
23
+ html_content = response.content
24
+ soup = BeautifulSoup(html_content, 'html.parser')
25
+
26
+ # Extract title
27
+ title_tag = soup.find('h2', class_='citation_title')
28
+ title = title_tag.get_text(strip=True) if title_tag else 'Title not found'
29
+
30
+ # Extract authors
31
+ authors = []
32
+ author_div = soup.find('div', class_='forum-authors')
33
+ if author_div:
34
+ author_tags = author_div.find_all('a')
35
+ authors = [tag.get_text(strip=True) for tag in author_tags]
36
+ author_list = ', '.join(authors) if authors else 'Authors not found'
37
+
38
+ # Extract abstract
39
+ abstract_div = soup.find('strong', text='Abstract:')
40
+ if abstract_div:
41
+ abstract_paragraph = abstract_div.find_next_sibling('div')
42
+ abstract = abstract_paragraph.get_text(strip=True) if abstract_paragraph else 'Abstract not found'
43
+ else:
44
+ abstract = 'Abstract not found'
45
+
46
+ # Construct preamble in Markdown
47
+ # preamble = f"**[{title}](https://openreview.net/forum?id={paper_id})**\n\n{author_list}\n\n**Abstract:**\n{abstract}"
48
+ preamble = f"**[{title}](https://openreview.net/forum?id={paper_id})**\n\n{author_list}\n\n"
49
+
50
+ return preamble
51
+
52
+
53
+ def fetch_paper_content(paper_id):
54
+ try:
55
+ # Construct the URL
56
+ url = f"https://openreview.net/pdf?id={paper_id}"
57
+
58
+ # Fetch the PDF
59
+ response = requests.get(url)
60
+ response.raise_for_status() # Raise an exception for HTTP errors
61
+
62
+ # Read the PDF content
63
+ pdf_content = BytesIO(response.content)
64
+ reader = PdfReader(pdf_content)
65
+
66
+ # Extract text from the PDF
67
+ text = ""
68
+ for page in reader.pages:
69
+ text += page.extract_text()
70
+
71
+ return text # Return full text; truncation will be handled later
72
+
73
+ except Exception as e:
74
+ print(f"An error occurred: {e}")
75
+ return None
76
+
77
+
78
+ def paper_chat_tab(paper_id):
79
+ with gr.Blocks() as demo:
80
+ with gr.Column():
81
+ # Textbox to display the paper title and authors
82
+ content = gr.Markdown(value="")
83
+
84
+ # Preamble message to hint the user
85
+ gr.Markdown("**Note:** Providing your own sambanova token can help you avoid rate limits.")
86
+
87
+ # Input for Hugging Face token
88
+ hf_token_input = gr.Textbox(
89
+ label="Enter your sambanova token (optional)",
90
+ type="password",
91
+ placeholder="Enter your sambanova token to avoid rate limits"
92
+ )
93
+
94
+ models = [
95
+ "Meta-Llama-3.1-8B-Instruct",
96
+ "Meta-Llama-3.1-70B-Instruct",
97
+ "Meta-Llama-3.1-405B-Instruct",
98
+ ]
99
+
100
+ default_model = models[-1]
101
+
102
+ # Dropdown for selecting the model
103
+ model_dropdown = gr.Dropdown(
104
+ label="Select Model",
105
+ choices=models,
106
+ value=default_model
107
+ )
108
+
109
+ # State to store the paper content
110
+ paper_content = gr.State()
111
+
112
+ # Create a column for each model, only visible if it's the default model
113
+ columns = []
114
+ for model_name in models:
115
+ column = gr.Column(visible=(model_name == default_model))
116
+ with column:
117
+ chatbot = create_chat_interface(model_name, paper_content, hf_token_input)
118
+ columns.append(column)
119
+ gr.HTML(
120
+ '<img src="https://venturebeat.com/wp-content/uploads/2020/02/SambaNovaLogo_H_F.jpg" width="100px" />')
121
+ gr.Markdown("**Note:** This model is supported by SambaNova.")
122
+
123
+ # Update visibility of columns based on the selected model
124
+ def update_columns(selected_model):
125
+ visibility = []
126
+ for model_name in models:
127
+ is_visible = model_name == selected_model
128
+ visibility.append(gr.update(visible=is_visible))
129
+ return visibility
130
+
131
+ model_dropdown.change(
132
+ fn=update_columns,
133
+ inputs=model_dropdown,
134
+ outputs=columns,
135
+ api_name=False,
136
+ queue=False,
137
+ )
138
+
139
+ # Function to update the content Markdown and paper_content when paper ID or model changes
140
+ def update_paper_info(paper_id, selected_model):
141
+ preamble = fetch_paper_info_neurips(paper_id)
142
+ text = fetch_paper_content(paper_id)
143
+ if text is None:
144
+ return preamble, None
145
+
146
+ return preamble, text
147
+
148
+ # Update paper content when paper ID or model changes
149
+ paper_id.change(
150
+ fn=update_paper_info,
151
+ inputs=[paper_id, model_dropdown],
152
+ outputs=[content, paper_content]
153
+ )
154
+
155
+ model_dropdown.change(
156
+ fn=update_paper_info,
157
+ inputs=[paper_id, model_dropdown],
158
+ outputs=[content, paper_content],
159
+ queue=False,
160
+ )
161
+ return demo
162
+
163
+
164
+ def create_chat_interface(model_name, paper_content, hf_token_input):
165
+ # Load tokenizer and cache it
166
+ if model_name not in tokenizer_cache:
167
+ # Load the tokenizer from Hugging Face
168
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
169
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
170
+ tokenizer_cache[model_name] = tokenizer
171
+ else:
172
+ tokenizer = tokenizer_cache[model_name]
173
+
174
+ max_total_tokens = 50000 # Maximum tokens allowed
175
+
176
+ # Define the function to handle the chat
177
+ def get_fn(message, history, paper_content_value, hf_token_value):
178
+ # Include the paper content as context
179
+ if paper_content_value:
180
+ context = f"The following is the content of the paper:\n{paper_content_value}\n\n"
181
+ else:
182
+ context = ""
183
+
184
+ # Tokenize the context
185
+ context_tokens = tokenizer.encode(context)
186
+ context_token_length = len(context_tokens)
187
+
188
+ # Prepare the messages without context
189
+ messages = []
190
+ message_tokens_list = []
191
+ total_tokens = context_token_length # Start with context tokens
192
+
193
+ for user_msg, assistant_msg in history:
194
+ # Tokenize user message
195
+ user_tokens = tokenizer.encode(user_msg)
196
+ messages.append({"role": "user", "content": user_msg})
197
+ message_tokens_list.append(len(user_tokens))
198
+ total_tokens += len(user_tokens)
199
+
200
+ # Tokenize assistant message
201
+ if assistant_msg:
202
+ assistant_tokens = tokenizer.encode(assistant_msg)
203
+ messages.append({"role": "assistant", "content": assistant_msg})
204
+ message_tokens_list.append(len(assistant_tokens))
205
+ total_tokens += len(assistant_tokens)
206
+
207
+ # Tokenize the new user message
208
+ message_tokens = tokenizer.encode(message)
209
+ messages.append({"role": "user", "content": message})
210
+ message_tokens_list.append(len(message_tokens))
211
+ total_tokens += len(message_tokens)
212
+
213
+ # Check if total tokens exceed the maximum allowed tokens
214
+ if total_tokens > max_total_tokens:
215
+ # Attempt to truncate the context first
216
+ available_tokens = max_total_tokens - (total_tokens - context_token_length)
217
+ if available_tokens > 0:
218
+ # Truncate the context to fit the available tokens
219
+ truncated_context_tokens = context_tokens[:available_tokens]
220
+ context = tokenizer.decode(truncated_context_tokens)
221
+ context_token_length = available_tokens
222
+ total_tokens = total_tokens - len(context_tokens) + context_token_length
223
+ else:
224
+ # Not enough space for context; remove it
225
+ context = ""
226
+ total_tokens -= context_token_length
227
+ context_token_length = 0
228
+
229
+ # If total tokens still exceed the limit, truncate the message history
230
+ while total_tokens > max_total_tokens and len(messages) > 1:
231
+ # Remove the oldest message
232
+ removed_message = messages.pop(0)
233
+ removed_tokens = message_tokens_list.pop(0)
234
+ total_tokens -= removed_tokens
235
+
236
+ # Rebuild the final messages list including the (possibly truncated) context
237
+ final_messages = []
238
+ if context:
239
+ final_messages.append({"role": "system", "content": context})
240
+ final_messages.extend(messages)
241
+
242
+ # Use the Hugging Face token if provided
243
+ api_key = hf_token_value or os.environ.get("SAMBANOVA_API_KEY")
244
+ if not api_key:
245
+ raise ValueError("API token is not provided.")
246
+
247
+ # Initialize the OpenAI client
248
+ client = OpenAI(
249
+ base_url="https://api.sambanova.ai/v1/",
250
+ api_key=api_key,
251
+ )
252
+
253
+ try:
254
+ # Create the chat completion
255
+ completion = client.chat.completions.create(
256
+ model=model_name,
257
+ messages=final_messages,
258
+ stream=True,
259
+ )
260
+ response_text = ""
261
+ for chunk in completion:
262
+ delta = chunk.choices[0].delta.content or ""
263
+ response_text += delta
264
+ yield response_text
265
+ except Exception as e:
266
+ error_message = f"Error: {str(e)}"
267
+ yield error_message
268
+
269
+ # Create the ChatInterface
270
+ chat_interface = gr.ChatInterface(
271
+ fn=get_fn,
272
+ chatbot=gr.Chatbot(
273
+ label="Chatbot",
274
+ scale=1,
275
+ height=400,
276
+ autoscroll=True
277
+ ),
278
+ additional_inputs=[paper_content, hf_token_input],
279
+ # examples=["What are the main findings of this paper?", "Explain the methodology used in this research."]
280
+ )
281
+ return chat_interface
requirements.txt CHANGED
@@ -2,3 +2,17 @@ gradio==5.6.0
2
  gradio_calendar
3
  datasets
4
  scholarly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  gradio_calendar
3
  datasets
4
  scholarly
5
+ arxiv
6
+ PyPDF2
7
+ transformers
8
+ beautifulsoup4
9
+ # Set the primary index URL to PyTorch's CPU wheels
10
+ --index-url https://download.pytorch.org/whl/cpu
11
+
12
+ # Ensure PyPI is still accessible for other packages
13
+ --extra-index-url https://pypi.org/simple
14
+
15
+ # List all your packages
16
+ torch
17
+ torchvision
18
+ torchaudio
style.css CHANGED
@@ -57,4 +57,66 @@ body a:hover {
57
  height: 1.38rem;
58
  overflow: hidden;
59
  border-radius: 9999px;
60
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  height: 1.38rem;
58
  overflow: hidden;
59
  border-radius: 9999px;
60
+ }
61
+
62
+
63
+ /* CSS Variables for Button Styling */
64
+ :root {
65
+ /* Border and Padding */
66
+ --button-border-width: 0px;
67
+ --button-small-padding: 8px 12px; /* Example values */
68
+ --button-small-radius: 4px; /* Example values */
69
+
70
+ /* Colors */
71
+ --button-secondary-border-color: #e5e7eb; /* Example neutral-200 */
72
+ --button-secondary-background-fill: #f3f4f6; /* Example neutral-200 */
73
+ --button-secondary-background-fill-hover: #d1d5db; /* Example neutral-300 */
74
+ --button-secondary-text-color: #000000;
75
+ --button-secondary-text-color-hover: #000000;
76
+
77
+ /* Typography */
78
+ --button-small-text-size: 14px; /* Example text-sm */
79
+ --button-small-text-weight: 400;
80
+
81
+ /* Shadows and Transitions */
82
+ --button-secondary-shadow: none;
83
+ --button-secondary-shadow-hover: none;
84
+ --button-secondary-shadow-active: none;
85
+ --button-transition: all 0.2s ease;
86
+ }
87
+
88
+ /* Custom Button Styles */
89
+ #custom_button {
90
+ display: inline-flex;
91
+ align-items: center;
92
+ justify-content: center;
93
+ border: var(--button-border-width) solid var(--button-secondary-border-color);
94
+ background: var(--button-secondary-background-fill);
95
+ color: var(--button-secondary-text-color);
96
+ padding: var(--button-small-padding);
97
+ border-radius: var(--button-small-radius);
98
+ font-size: var(--button-small-text-size);
99
+ font-weight: var(--button-small-text-weight);
100
+ text-decoration: none;
101
+ box-shadow: var(--button-secondary-shadow);
102
+ transition: var(--button-transition);
103
+ }
104
+
105
+ #custom_button:hover {
106
+ background: var(--button-secondary-background-fill-hover);
107
+ border-color: var(--button-secondary-border-color-hover);
108
+ color: var(--button-secondary-text-color-hover);
109
+ box-shadow: var(--button-secondary-shadow-hover);
110
+ }
111
+
112
+ #custom_button:active {
113
+ box-shadow: var(--button-secondary-shadow-active);
114
+ }
115
+
116
+ /* Icon Styling */
117
+ #custom_button .button-icon {
118
+ margin-right: 8px; /* Adjust spacing between icon and text as needed */
119
+ width: 20px; /* Adjust icon size as needed */
120
+ height: 20px; /* Adjust icon size as needed */
121
+ }
122
+