pdx97 commited on
Commit
b81895b
·
verified ·
1 Parent(s): 6dda0fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -338,17 +338,16 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
338
  List of the most relevant papers based on TF-IDF ranking.
339
  """
340
  try:
341
- # ✅ Correct URL encoding for spaces and special characters
342
  query = "+AND+".join([f"all:{kw}" for kw in keywords])
343
- query_encoded = urllib.parse.quote_plus(query) # ✅ FIXED: Correct encoding
344
-
345
  url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=5&sortBy=submittedDate&sortOrder=descending"
346
- print(f"DEBUG: Query URL - {url}") # ✅ Debugging
347
 
348
  feed = feedparser.parse(url)
349
- print(f"DEBUG: API Response - {feed.entries}")
350
-
351
  papers = []
 
352
  for entry in feed.entries:
353
  papers.append({
354
  "title": entry.title,
@@ -362,31 +361,32 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
362
  print("DEBUG: No results from ArXiv API")
363
  return [{"error": "No results found. Try different keywords."}]
364
 
365
- # ✅ Debug Corpus before TF-IDF
366
  corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
367
- print(f"DEBUG: Corpus - {corpus}")
368
-
369
  vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'), max_features=2000)
370
  tfidf_matrix = vectorizer.fit_transform(corpus)
371
- print(f"DEBUG: TF-IDF Matrix Shape - {tfidf_matrix.shape}")
372
 
373
  query_vec = vectorizer.transform([" ".join(keywords)])
374
- print(f"DEBUG: Query Vector Shape - {query_vec.shape}")
375
-
376
  similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
377
- print(f"DEBUG: Similarity Scores - {similarity_scores}")
378
 
379
- # ✅ Rank papers by similarity score
380
  ranked_papers = sorted(zip(papers, similarity_scores), key=lambda x: x[1], reverse=True)
381
-
 
 
 
 
 
 
 
382
  return [paper[0] for paper in ranked_papers[:num_results]]
383
-
384
  except Exception as e:
385
  print(f"ERROR: {str(e)}")
386
  return [{"error": f"Error fetching research papers: {str(e)}"}]
387
 
388
 
389
 
 
390
  @tool
391
  def get_citation_count(paper_title: str) -> int:
392
  """
 
338
  List of the most relevant papers based on TF-IDF ranking.
339
  """
340
  try:
341
+ # ✅ Encode query properly
342
  query = "+AND+".join([f"all:{kw}" for kw in keywords])
343
+ query_encoded = urllib.parse.quote_plus(query)
344
+
345
  url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=5&sortBy=submittedDate&sortOrder=descending"
346
+ print(f"DEBUG: Query URL - {url}")
347
 
348
  feed = feedparser.parse(url)
 
 
349
  papers = []
350
+
351
  for entry in feed.entries:
352
  papers.append({
353
  "title": entry.title,
 
361
  print("DEBUG: No results from ArXiv API")
362
  return [{"error": "No results found. Try different keywords."}]
363
 
364
+ # ✅ TF-IDF Vectorization
365
  corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
 
 
366
  vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'), max_features=2000)
367
  tfidf_matrix = vectorizer.fit_transform(corpus)
 
368
 
369
  query_vec = vectorizer.transform([" ".join(keywords)])
 
 
370
  similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
 
371
 
 
372
  ranked_papers = sorted(zip(papers, similarity_scores), key=lambda x: x[1], reverse=True)
373
+
374
+ # ✅ Apply GPT Summarization with Fallback
375
+ for paper, _ in ranked_papers:
376
+ try:
377
+ paper["summary"] = summarizer(paper["abstract"], max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
378
+ except:
379
+ paper["summary"] = paper["abstract"][:300] + "..." # ✅ Fallback: First 300 characters of abstract
380
+
381
  return [paper[0] for paper in ranked_papers[:num_results]]
382
+
383
  except Exception as e:
384
  print(f"ERROR: {str(e)}")
385
  return [{"error": f"Error fetching research papers: {str(e)}"}]
386
 
387
 
388
 
389
+
390
  @tool
391
  def get_citation_count(paper_title: str) -> int:
392
  """