pdx97 commited on
Commit
41b209f
·
verified ·
1 Parent(s): 79b8e3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -255
app.py CHANGED
@@ -126,310 +126,216 @@
126
  # # return [{"error": f"Error fetching research papers: {str(e)}"}]
127
 
128
 
129
- # """------Applied TF-IDF for better semantic search------"""
130
- # import feedparser
131
- # import urllib.parse
132
- # import yaml
133
- # from tools.final_answer import FinalAnswerTool
134
- # import numpy as np
135
- # from sklearn.feature_extraction.text import TfidfVectorizer
136
- # from sklearn.metrics.pairwise import cosine_similarity
137
- # import gradio as gr
138
- # from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
139
- # import nltk
140
-
141
- # import datetime
142
- # import requests
143
- # import pytz
144
- # from tools.final_answer import FinalAnswerTool
145
-
146
- # from Gradio_UI import GradioUI
147
-
148
- # nltk.download("stopwords")
149
- # from nltk.corpus import stopwords
150
-
151
- # @tool # ✅ Register the function properly as a SmolAgents tool
152
- # def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
153
- # """Fetches and ranks arXiv papers using TF-IDF and Cosine Similarity.
154
-
155
- # Args:
156
- # keywords: List of keywords for search.
157
- # num_results: Number of results to return.
158
-
159
- # Returns:
160
- # List of the most relevant papers based on TF-IDF ranking.
161
- # """
162
- # try:
163
- # print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
164
-
165
- # # Use a general keyword search
166
- # query = "+AND+".join([f"all:{kw}" for kw in keywords])
167
- # query_encoded = urllib.parse.quote(query)
168
- # url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=50&sortBy=submittedDate&sortOrder=descending"
169
-
170
- # print(f"DEBUG: Query URL - {url}")
171
-
172
- # feed = feedparser.parse(url)
173
- # papers = []
174
-
175
- # # Extract papers from arXiv
176
- # for entry in feed.entries:
177
- # papers.append({
178
- # "title": entry.title,
179
- # "authors": ", ".join(author.name for author in entry.authors),
180
- # "year": entry.published[:4],
181
- # "abstract": entry.summary,
182
- # "link": entry.link
183
- # })
184
-
185
- # if not papers:
186
- # return [{"error": "No results found. Try different keywords."}]
187
-
188
- # # Prepare TF-IDF Vectorization
189
- # corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
190
- # vectorizer = TfidfVectorizer(stop_words=stopwords.words('english')) # Remove stopwords
191
- # tfidf_matrix = vectorizer.fit_transform(corpus)
192
-
193
- # # Transform Query into TF-IDF Vector
194
- # query_str = " ".join(keywords)
195
- # query_vec = vectorizer.transform([query_str])
196
-
197
- # #Compute Cosine Similarity
198
- # similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
199
-
200
- # #Sort papers based on similarity score
201
- # ranked_papers = sorted(zip(papers, similarity_scores), key=lambda x: x[1], reverse=True)
202
-
203
- # # Return the most relevant papers
204
- # return [paper[0] for paper in ranked_papers[:num_results]]
205
-
206
- # except Exception as e:
207
- # print(f"ERROR: {str(e)}")
208
- # return [{"error": f"Error fetching research papers: {str(e)}"}]
209
- # @tool
210
- # def get_current_time_in_timezone(timezone: str) -> str:
211
- # """A tool that fetches the current local time in a specified timezone.
212
- # Args:
213
- # timezone: A string representing a valid timezone (e.g., 'America/New_York').
214
- # """
215
- # try:
216
- # # Create timezone object
217
- # tz = pytz.timezone(timezone)
218
- # # Get current time in that timezone
219
- # local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
220
- # return f"The current local time in {timezone} is: {local_time}"
221
- # except Exception as e:
222
- # return f"Error fetching time for timezone '{timezone}': {str(e)}"
223
-
224
-
225
- # final_answer = FinalAnswerTool()
226
-
227
-
228
- # # AI Model
229
- # model = HfApiModel(
230
- # max_tokens=2096,
231
- # temperature=0.5,
232
- # model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
233
- # custom_role_conversions=None,
234
- # )
235
-
236
- # # Import tool from Hub
237
- # image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
238
-
239
-
240
- # # Load prompt templates
241
- # with open("prompts.yaml", 'r') as stream:
242
- # prompt_templates = yaml.safe_load(stream)
243
-
244
- # # Create the AI Agent
245
- # agent = CodeAgent(
246
- # model=model,
247
- # tools=[final_answer,fetch_latest_arxiv_papers], # Add your tools here
248
- # max_steps=6,
249
- # verbosity_level=1,
250
- # grammar=None,
251
- # planning_interval=None,
252
- # name="ScholarAgent",
253
- # description="An AI agent that fetches the latest research papers from arXiv based on user-defined keywords and filters.",
254
- # prompt_templates=prompt_templates
255
- # )
256
-
257
- # # # Define Gradio Search Function
258
- # # def search_papers(user_input):
259
- # # keywords = [kw.strip() for kw in user_input.split(",") if kw.strip()] # Ensure valid keywords
260
- # # print(f"DEBUG: Received input keywords - {keywords}") # Debug user input
261
-
262
- # # if not keywords:
263
- # # print("DEBUG: No valid keywords provided.")
264
- # # return "Error: Please enter at least one valid keyword."
265
-
266
- # # results = fetch_latest_arxiv_papers(keywords, num_results=3) # Fetch 3 results
267
- # # print(f"DEBUG: Results received - {results}") # Debug function output
268
-
269
- # # if isinstance(results, list) and results and isinstance(results[0], dict):
270
- # # #Format output with better readability and clarity
271
- # # formatted_results = "\n\n".join([
272
- # # f"---\n\n"
273
- # # f"📌 **Title:**\n{paper['title']}\n\n"
274
- # # f"👨‍🔬 **Authors:**\n{paper['authors']}\n\n"
275
- # # f"📅 **Year:** {paper['year']}\n\n"
276
- # # f"📖 **Abstract:**\n{paper['abstract'][:500]}... *(truncated for readability)*\n\n"
277
- # # f"[🔗 Read Full Paper]({paper['link']})\n\n"
278
- # # for paper in results
279
- # # ])
280
- # # return formatted_results
281
-
282
- # # print("DEBUG: No results found.")
283
- # # return "No results found. Try different keywords."
284
 
285
- # #Search Papers
286
- # def search_papers(user_input):
287
- # keywords = [kw.strip() for kw in user_input.split(",") if kw.strip()] # Ensure valid keywords
288
- # print(f"DEBUG: Received input keywords - {keywords}") # Debug user input
289
-
290
- # if not keywords:
291
- # print("DEBUG: No valid keywords provided.")
292
- # return "Error: Please enter at least one valid keyword."
293
-
294
- # results = fetch_latest_arxiv_papers(keywords, num_results=3) # Fetch 3 results
295
- # print(f"DEBUG: Results received - {results}") # Debug function output
296
 
297
- # # Check if the API returned an error
298
- # if isinstance(results, list) and len(results) > 0 and "error" in results[0]:
299
- # return results[0]["error"] # Return the error message directly
300
 
301
- # # Format results only if valid papers exist
302
- # if isinstance(results, list) and results and isinstance(results[0], dict):
303
- # formatted_results = "\n\n".join([
304
- # f"---\n\n"
305
- # f"📌 **Title:** {paper['title']}\n\n"
306
- # f"👨‍🔬 **Authors:** {paper['authors']}\n\n"
307
- # f"📅 **Year:** {paper['year']}\n\n"
308
- # f"📖 **Abstract:** {paper['abstract'][:500]}... *(truncated for readability)*\n\n"
309
- # f"[🔗 Read Full Paper]({paper['link']})\n\n"
310
- # for paper in results
311
- # ])
312
- # return formatted_results
313
 
314
- # print("DEBUG: No results found.")
315
- # return "No results found. Try different keywords."
 
316
 
317
- # # Launch Gradio UI with CodeAgent
318
- # GradioUI(agent).launch()
 
319
 
 
 
 
 
 
320
 
321
- # # # Create Gradio UI
322
- # # with gr.Blocks() as demo:
323
- # # gr.Markdown("# ScholarAgent")
324
- # # keyword_input = gr.Textbox(label="Enter keywords (comma-separated)", placeholder="e.g., deep learning, reinforcement learning")
325
- # # output_display = gr.Markdown()
326
- # # search_button = gr.Button("Search")
327
 
328
- # # search_button.click(search_papers, inputs=[keyword_input], outputs=[output_display])
329
 
330
- # # print("DEBUG: Gradio UI is running. Waiting for user input...")
 
331
 
332
- # # # Launch Gradio App
333
- # # demo.launch()
 
 
 
 
 
 
 
334
 
335
- import os
336
- import datetime
337
- import requests
338
- import pytz
339
- import yaml
340
- from smolagents import CodeAgent, HfApiModel, load_tool, tool
341
- from tools.final_answer import FinalAnswerTool
342
- from Gradio_UI import GradioUI
343
 
344
- # Step 1: Set Hugging Face API Token
345
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_huggingface_api_token"
 
 
346
 
347
- # Step 2: Define ScholarAgent's Paper Search Functionality
348
- @tool
349
- def fetch_arxiv_papers(query: str) -> str:
350
- """Fetches the top 3 most recent research papers from ArXiv based on a keyword search.
351
 
352
- Args:
353
- query: A string containing keywords or a full sentence describing the research topic.
354
-
355
- Returns:
356
- A formatted string with the top 3 recent papers, including title, authors, and ArXiv links.
357
- """
358
- base_url = "http://export.arxiv.org/api/query"
359
- params = {
360
- "search_query": query,
361
- "start": 0,
362
- "max_results": 3,
363
- "sortBy": "submittedDate",
364
- "sortOrder": "descending",
365
- }
366
 
367
- try:
368
- response = requests.get(base_url, params=params)
369
- if response.status_code == 200:
370
- papers = response.text.split("<entry>")
371
- results = []
372
- for paper in papers[1:4]: # Extract top 3 papers
373
- title = paper.split("<title>")[1].split("</title>")[0].strip()
374
- authors = paper.split("<author><name>")[1].split("</name>")[0].strip()
375
- link = paper.split("<id>")[1].split("</id>")[0].strip()
376
- results.append(f"- **{title}**\n - 📖 Authors: {authors}\n - 🔗 [Read here]({link})\n")
377
- return "\n".join(results) if results else "No relevant papers found."
378
- else:
379
- return "Error: Unable to retrieve papers from ArXiv."
380
- except Exception as e:
381
- return f"API Error: {str(e)}"
382
 
383
- # Step 3: Add a Timezone Utility Tool
 
 
 
 
 
384
  @tool
385
  def get_current_time_in_timezone(timezone: str) -> str:
386
- """Fetches the current local time in a specified timezone.
387
-
388
  Args:
389
  timezone: A string representing a valid timezone (e.g., 'America/New_York').
390
-
391
- Returns:
392
- A formatted string with the current time.
393
  """
394
  try:
 
395
  tz = pytz.timezone(timezone)
 
396
  local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
397
  return f"The current local time in {timezone} is: {local_time}"
398
  except Exception as e:
399
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
400
 
401
- # Step 4: Define Final Answer Tool (Required)
402
  final_answer = FinalAnswerTool()
403
 
404
- # Step 5: Configure Hugging Face Model with API Token
 
405
  model = HfApiModel(
406
  max_tokens=2096,
407
  temperature=0.5,
408
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct', # Default model
409
  custom_role_conversions=None,
410
-
411
  )
412
 
413
- # Step 6: Load Additional Tools
414
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
415
 
416
- # Step 7: Load Prompt Templates
 
417
  with open("prompts.yaml", 'r') as stream:
418
  prompt_templates = yaml.safe_load(stream)
419
 
420
- # Step 8: Define ScholarAgent (AI Agent)
421
  agent = CodeAgent(
422
  model=model,
423
- tools=[final_answer, fetch_arxiv_papers, get_current_time_in_timezone], # ScholarAgent tools
424
  max_steps=6,
425
  verbosity_level=1,
426
  grammar=None,
427
  planning_interval=None,
428
  name="ScholarAgent",
429
- description="An AI-powered research assistant that fetches top research papers from ArXiv.",
430
  prompt_templates=prompt_templates
431
  )
432
 
433
- # Step 9: Launch Gradio UI with CodeAgent
434
- GradioUI(agent).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
 
126
  # # return [{"error": f"Error fetching research papers: {str(e)}"}]
127
 
128
 
129
+ """------Applied TF-IDF for better semantic search------"""
130
+ import feedparser
131
+ import urllib.parse
132
+ import yaml
133
+ from tools.final_answer import FinalAnswerTool
134
+ import numpy as np
135
+ from sklearn.feature_extraction.text import TfidfVectorizer
136
+ from sklearn.metrics.pairwise import cosine_similarity
137
+ import gradio as gr
138
+ from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
139
+ import nltk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
+ import datetime
142
+ import requests
143
+ import pytz
144
+ from tools.final_answer import FinalAnswerTool
 
 
 
 
 
 
 
145
 
146
+ from Gradio_UI import GradioUI
 
 
147
 
148
+ nltk.download("stopwords")
149
+ from nltk.corpus import stopwords
 
 
 
 
 
 
 
 
 
 
150
 
151
+ @tool # Register the function properly as a SmolAgents tool
152
+ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
153
+ """Fetches and ranks arXiv papers using TF-IDF and Cosine Similarity.
154
 
155
+ Args:
156
+ keywords: List of keywords for search.
157
+ num_results: Number of results to return.
158
 
159
+ Returns:
160
+ List of the most relevant papers based on TF-IDF ranking.
161
+ """
162
+ try:
163
+ print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
164
 
165
+ # Use a general keyword search
166
+ query = "+AND+".join([f"all:{kw}" for kw in keywords])
167
+ query_encoded = urllib.parse.quote(query)
168
+ url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=50&sortBy=submittedDate&sortOrder=descending"
 
 
169
 
170
+ print(f"DEBUG: Query URL - {url}")
171
 
172
+ feed = feedparser.parse(url)
173
+ papers = []
174
 
175
+ # Extract papers from arXiv
176
+ for entry in feed.entries:
177
+ papers.append({
178
+ "title": entry.title,
179
+ "authors": ", ".join(author.name for author in entry.authors),
180
+ "year": entry.published[:4],
181
+ "abstract": entry.summary,
182
+ "link": entry.link
183
+ })
184
 
185
+ if not papers:
186
+ return [{"error": "No results found. Try different keywords."}]
 
 
 
 
 
 
187
 
188
+ # Prepare TF-IDF Vectorization
189
+ corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
190
+ vectorizer = TfidfVectorizer(stop_words=stopwords.words('english')) # Remove stopwords
191
+ tfidf_matrix = vectorizer.fit_transform(corpus)
192
 
193
+ # Transform Query into TF-IDF Vector
194
+ query_str = " ".join(keywords)
195
+ query_vec = vectorizer.transform([query_str])
 
196
 
197
+ #Compute Cosine Similarity
198
+ similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
+ #Sort papers based on similarity score
201
+ ranked_papers = sorted(zip(papers, similarity_scores), key=lambda x: x[1], reverse=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
+ # Return the most relevant papers
204
+ return [paper[0] for paper in ranked_papers[:num_results]]
205
+
206
+ except Exception as e:
207
+ print(f"ERROR: {str(e)}")
208
+ return [{"error": f"Error fetching research papers: {str(e)}"}]
209
  @tool
210
  def get_current_time_in_timezone(timezone: str) -> str:
211
+ """A tool that fetches the current local time in a specified timezone.
 
212
  Args:
213
  timezone: A string representing a valid timezone (e.g., 'America/New_York').
 
 
 
214
  """
215
  try:
216
+ # Create timezone object
217
  tz = pytz.timezone(timezone)
218
+ # Get current time in that timezone
219
  local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
220
  return f"The current local time in {timezone} is: {local_time}"
221
  except Exception as e:
222
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
223
 
224
+
225
  final_answer = FinalAnswerTool()
226
 
227
+
228
+ # AI Model
229
  model = HfApiModel(
230
  max_tokens=2096,
231
  temperature=0.5,
232
+ model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
233
  custom_role_conversions=None,
 
234
  )
235
 
236
+ # Import tool from Hub
237
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
238
 
239
+
240
+ # Load prompt templates
241
  with open("prompts.yaml", 'r') as stream:
242
  prompt_templates = yaml.safe_load(stream)
243
 
244
+ # Create the AI Agent
245
  agent = CodeAgent(
246
  model=model,
247
+ tools=[final_answer,fetch_latest_arxiv_papers], # Add your tools here
248
  max_steps=6,
249
  verbosity_level=1,
250
  grammar=None,
251
  planning_interval=None,
252
  name="ScholarAgent",
253
+ description="An AI agent that fetches the latest research papers from arXiv based on user-defined keywords and filters.",
254
  prompt_templates=prompt_templates
255
  )
256
 
257
+
258
+
259
+ # #Search Papers
260
+ # def search_papers(user_input):
261
+ # keywords = [kw.strip() for kw in user_input.split(",") if kw.strip()] # Ensure valid keywords
262
+ # print(f"DEBUG: Received input keywords - {keywords}") # Debug user input
263
+
264
+ # if not keywords:
265
+ # print("DEBUG: No valid keywords provided.")
266
+ # return "Error: Please enter at least one valid keyword."
267
+
268
+ # results = fetch_latest_arxiv_papers(keywords, num_results=3) # Fetch 3 results
269
+ # print(f"DEBUG: Results received - {results}") # Debug function output
270
+
271
+ # # Check if the API returned an error
272
+ # if isinstance(results, list) and len(results) > 0 and "error" in results[0]:
273
+ # return results[0]["error"] # Return the error message directly
274
+
275
+ # # Format results only if valid papers exist
276
+ # if isinstance(results, list) and results and isinstance(results[0], dict):
277
+ # formatted_results = "\n\n".join([
278
+ # f"---\n\n"
279
+ # f"📌 **Title:** {paper['title']}\n\n"
280
+ # f"👨‍🔬 **Authors:** {paper['authors']}\n\n"
281
+ # f"📅 **Year:** {paper['year']}\n\n"
282
+ # f"📖 **Abstract:** {paper['abstract'][:500]}... *(truncated for readability)*\n\n"
283
+ # f"[🔗 Read Full Paper]({paper['link']})\n\n"
284
+ # for paper in results
285
+ # ])
286
+ # return formatted_results
287
+
288
+ # print("DEBUG: No results found.")
289
+ # return "No results found. Try different keywords."
290
+
291
+ def search_papers(user_input):
292
+ keywords = [kw.strip() for kw in user_input.split(",") if kw.strip()] # Ensure valid keywords
293
+ print(f"DEBUG: Received input keywords - {keywords}") # Debug user input
294
+
295
+ if not keywords:
296
+ print("DEBUG: No valid keywords provided.")
297
+ return "Error: Please enter at least one valid keyword."
298
+
299
+ # Use CodeAgent to process the request
300
+ response = agent.run(user_input) # Now it actually uses CodeAgent
301
+ print(f"DEBUG: Agent Response - {response}") # Debug response from the agent
302
+
303
+ # Check if response is valid
304
+ if isinstance(response, list) and len(response) > 0 and "error" in response[0]:
305
+ return response[0]["error"] # Return the error message directly
306
+
307
+ # Format results only if valid papers exist
308
+ if isinstance(response, list) and response and isinstance(response[0], dict):
309
+ formatted_results = "\n\n".join([
310
+ f"---\n\n"
311
+ f"📌 **Title:** {paper['title']}\n\n"
312
+ f"👨‍🔬 **Authors:** {paper['authors']}\n\n"
313
+ f"📅 **Year:** {paper['year']}\n\n"
314
+ f"📖 **Abstract:** {paper['abstract'][:500]}... *(truncated for readability)*\n\n"
315
+ f"[🔗 Read Full Paper]({paper['link']})\n\n"
316
+ for paper in response
317
+ ])
318
+ return formatted_results
319
+
320
+ print("DEBUG: No results found.")
321
+ return "No results found. Try different keywords."
322
+
323
+
324
+ # # Launch Gradio UI with CodeAgent
325
+ # GradioUI(agent).launch()
326
+
327
+
328
+ # Create Gradio UI
329
+ with gr.Blocks() as demo:
330
+ gr.Markdown("# ScholarAgent")
331
+ keyword_input = gr.Textbox(label="Enter keywords (comma-separated)", placeholder="e.g., deep learning, reinforcement learning")
332
+ output_display = gr.Markdown()
333
+ search_button = gr.Button("Search")
334
+
335
+ search_button.click(search_papers, inputs=[keyword_input], outputs=[output_display])
336
+
337
+ print("DEBUG: Gradio UI is running. Waiting for user input...")
338
+
339
+ # Launch Gradio App
340
+ demo.launch()
341