ScholarAgent

Running

App Files Files Community

pdx97 commited on Mar 13

Commit

b81895b

verified ·

1 Parent(s): 6dda0fd

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -338,17 +338,16 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
         List of the most relevant papers based on TF-IDF ranking.
     """
     try:
-        # ✅ Correct URL encoding for spaces and special characters
         query = "+AND+".join([f"all:{kw}" for kw in keywords])
-        query_encoded = urllib.parse.quote_plus(query)  # ✅ FIXED: Correct encoding
         url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=5&sortBy=submittedDate&sortOrder=descending"
-        print(f"DEBUG: Query URL - {url}")  # ✅ Debugging
         feed = feedparser.parse(url)
-        print(f"DEBUG: API Response - {feed.entries}")
         papers = []
         for entry in feed.entries:
             papers.append({
                 "title": entry.title,
@@ -362,31 +361,32 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
             print("DEBUG: No results from ArXiv API")
             return [{"error": "No results found. Try different keywords."}]
-        # ✅ Debug Corpus before TF-IDF
         corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
-        print(f"DEBUG: Corpus - {corpus}")
         vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'), max_features=2000)
         tfidf_matrix = vectorizer.fit_transform(corpus)
-        print(f"DEBUG: TF-IDF Matrix Shape - {tfidf_matrix.shape}")
         query_vec = vectorizer.transform([" ".join(keywords)])
-        print(f"DEBUG: Query Vector Shape - {query_vec.shape}")
         similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
-        print(f"DEBUG: Similarity Scores - {similarity_scores}")
-        # ✅ Rank papers by similarity score
         ranked_papers = sorted(zip(papers, similarity_scores), key=lambda x: x[1], reverse=True)
         return [paper[0] for paper in ranked_papers[:num_results]]
     except Exception as e:
         print(f"ERROR: {str(e)}")
         return [{"error": f"Error fetching research papers: {str(e)}"}]
 @tool
 def get_citation_count(paper_title: str) -> int:
     """

         List of the most relevant papers based on TF-IDF ranking.
     """
     try:
+        # ✅ Encode query properly
         query = "+AND+".join([f"all:{kw}" for kw in keywords])
+        query_encoded = urllib.parse.quote_plus(query)
         url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=5&sortBy=submittedDate&sortOrder=descending"
+        print(f"DEBUG: Query URL - {url}")
         feed = feedparser.parse(url)
         papers = []
         for entry in feed.entries:
             papers.append({
                 "title": entry.title,
             print("DEBUG: No results from ArXiv API")
             return [{"error": "No results found. Try different keywords."}]
+        # ✅ TF-IDF Vectorization
         corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
         vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'), max_features=2000)
         tfidf_matrix = vectorizer.fit_transform(corpus)
         query_vec = vectorizer.transform([" ".join(keywords)])
         similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
         ranked_papers = sorted(zip(papers, similarity_scores), key=lambda x: x[1], reverse=True)
+        # ✅ Apply GPT Summarization with Fallback
+        for paper, _ in ranked_papers:
+            try:
+                paper["summary"] = summarizer(paper["abstract"], max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
+            except:
+                paper["summary"] = paper["abstract"][:300] + "..."  # ✅ Fallback: First 300 characters of abstract
         return [paper[0] for paper in ranked_papers[:num_results]]
     except Exception as e:
         print(f"ERROR: {str(e)}")
         return [{"error": f"Error fetching research papers: {str(e)}"}]
 @tool
 def get_citation_count(paper_title: str) -> int:
     """