Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -338,17 +338,16 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
|
|
338 |
List of the most relevant papers based on TF-IDF ranking.
|
339 |
"""
|
340 |
try:
|
341 |
-
# ✅
|
342 |
query = "+AND+".join([f"all:{kw}" for kw in keywords])
|
343 |
-
query_encoded = urllib.parse.quote_plus(query)
|
344 |
-
|
345 |
url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=5&sortBy=submittedDate&sortOrder=descending"
|
346 |
-
print(f"DEBUG: Query URL - {url}")
|
347 |
|
348 |
feed = feedparser.parse(url)
|
349 |
-
print(f"DEBUG: API Response - {feed.entries}")
|
350 |
-
|
351 |
papers = []
|
|
|
352 |
for entry in feed.entries:
|
353 |
papers.append({
|
354 |
"title": entry.title,
|
@@ -362,31 +361,32 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
|
|
362 |
print("DEBUG: No results from ArXiv API")
|
363 |
return [{"error": "No results found. Try different keywords."}]
|
364 |
|
365 |
-
# ✅
|
366 |
corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
|
367 |
-
print(f"DEBUG: Corpus - {corpus}")
|
368 |
-
|
369 |
vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'), max_features=2000)
|
370 |
tfidf_matrix = vectorizer.fit_transform(corpus)
|
371 |
-
print(f"DEBUG: TF-IDF Matrix Shape - {tfidf_matrix.shape}")
|
372 |
|
373 |
query_vec = vectorizer.transform([" ".join(keywords)])
|
374 |
-
print(f"DEBUG: Query Vector Shape - {query_vec.shape}")
|
375 |
-
|
376 |
similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
|
377 |
-
print(f"DEBUG: Similarity Scores - {similarity_scores}")
|
378 |
|
379 |
-
# ✅ Rank papers by similarity score
|
380 |
ranked_papers = sorted(zip(papers, similarity_scores), key=lambda x: x[1], reverse=True)
|
381 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
return [paper[0] for paper in ranked_papers[:num_results]]
|
383 |
-
|
384 |
except Exception as e:
|
385 |
print(f"ERROR: {str(e)}")
|
386 |
return [{"error": f"Error fetching research papers: {str(e)}"}]
|
387 |
|
388 |
|
389 |
|
|
|
390 |
@tool
|
391 |
def get_citation_count(paper_title: str) -> int:
|
392 |
"""
|
|
|
338 |
List of the most relevant papers based on TF-IDF ranking.
|
339 |
"""
|
340 |
try:
|
341 |
+
# ✅ Encode query properly
|
342 |
query = "+AND+".join([f"all:{kw}" for kw in keywords])
|
343 |
+
query_encoded = urllib.parse.quote_plus(query)
|
344 |
+
|
345 |
url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=5&sortBy=submittedDate&sortOrder=descending"
|
346 |
+
print(f"DEBUG: Query URL - {url}")
|
347 |
|
348 |
feed = feedparser.parse(url)
|
|
|
|
|
349 |
papers = []
|
350 |
+
|
351 |
for entry in feed.entries:
|
352 |
papers.append({
|
353 |
"title": entry.title,
|
|
|
361 |
print("DEBUG: No results from ArXiv API")
|
362 |
return [{"error": "No results found. Try different keywords."}]
|
363 |
|
364 |
+
# ✅ TF-IDF Vectorization
|
365 |
corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
|
|
|
|
|
366 |
vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'), max_features=2000)
|
367 |
tfidf_matrix = vectorizer.fit_transform(corpus)
|
|
|
368 |
|
369 |
query_vec = vectorizer.transform([" ".join(keywords)])
|
|
|
|
|
370 |
similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
|
|
|
371 |
|
|
|
372 |
ranked_papers = sorted(zip(papers, similarity_scores), key=lambda x: x[1], reverse=True)
|
373 |
+
|
374 |
+
# ✅ Apply GPT Summarization with Fallback
|
375 |
+
for paper, _ in ranked_papers:
|
376 |
+
try:
|
377 |
+
paper["summary"] = summarizer(paper["abstract"], max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
|
378 |
+
except:
|
379 |
+
paper["summary"] = paper["abstract"][:300] + "..." # ✅ Fallback: First 300 characters of abstract
|
380 |
+
|
381 |
return [paper[0] for paper in ranked_papers[:num_results]]
|
382 |
+
|
383 |
except Exception as e:
|
384 |
print(f"ERROR: {str(e)}")
|
385 |
return [{"error": f"Error fetching research papers: {str(e)}"}]
|
386 |
|
387 |
|
388 |
|
389 |
+
|
390 |
@tool
|
391 |
def get_citation_count(paper_title: str) -> int:
|
392 |
"""
|