Spaces:

mgbam
/

Researcher

Sleeping

App Files Files Community

mgbam commited on Apr 9

Commit

834ac1a

verified ·

1 Parent(s): c36d3e9

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -23

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from pyvis.network import Network
 import tempfile
 import openai
 import requests
-import feedparser
 import pandas as pd
 from io import StringIO
 import asyncio
@@ -33,7 +33,7 @@ generator = load_text_generator()
 # ---------------------------
 def generate_ideas_with_hf(prompt):
     # Use Hugging Face's text-generation pipeline.
-    # Instead of using max_length, we use max_new_tokens so that new tokens are generated.
     results = generator(prompt, max_new_tokens=50, num_return_sequences=1)
     idea_text = results[0]['generated_text']
     return idea_text
@@ -68,33 +68,49 @@ def generate_ideas_with_openai(prompt, api_key):
     return output_text
 # ---------------------------
-# arXiv API Integration
 # ---------------------------
 def fetch_arxiv_results(query, max_results=5):
     """
-    Queries arXiv's free API to fetch relevant papers.
     """
     base_url = "http://export.arxiv.org/api/query?"
     search_query = "search_query=all:" + query
     start = "0"
-    max_results = str(max_results)
-    query_url = f"{base_url}{search_query}&start={start}&max_results={max_results}"
     response = requests.get(query_url)
     if response.status_code == 200:
-        feed = feedparser.parse(response.content)
-        results = []
-        for entry in feed.entries:
-            title = entry.title
-            summary = entry.summary
-            published = entry.published
-            link = entry.link
-            authors = ", ".join(author.name for author in entry.authors)
             results.append({
                 "title": title,
-                "authors": authors,
-                "published": published,
                 "summary": summary,
-                "link": link
             })
         return results
     else:
@@ -140,13 +156,13 @@ paper_abstract = st.text_area("Enter the research paper abstract:", height=200)
 if st.button("Generate Ideas"):
     if paper_abstract.strip():
         st.subheader("Summarized Abstract")
-        # Summarize the abstract to capture its key points
         summary = summarizer(paper_abstract, max_length=100, min_length=30, do_sample=False)
         summary_text = summary[0]['summary_text']
         st.write(summary_text)
         st.subheader("Generated Research Ideas")
-        # Build a combined prompt with the abstract and its summary
         prompt = (
             f"Based on the following research paper abstract, generate innovative and promising research ideas for future work.\n\n"
             f"Paper Abstract:\n{paper_abstract}\n\n"
@@ -188,22 +204,20 @@ if st.button("Generate Knowledge Graph"):
                 cited_list = [c.strip() for c in cited.split(';') if c.strip()]
                 data.append({"paper_id": paper_id, "title": title, "cited": cited_list})
         if data:
-            # Build a directed graph using NetworkX
             G = nx.DiGraph()
             for paper in data:
-                # Ensure each node has a 'title' key, using the node id as fallback.
                 G.add_node(paper["paper_id"], title=paper.get("title", str(paper["paper_id"])))
                 for cited in paper["cited"]:
                     G.add_edge(paper["paper_id"], cited)
             st.subheader("Knowledge Graph")
-            # Create an interactive visualization using Pyvis
             net = Network(height="500px", width="100%", directed=True)
             for node, node_data in G.nodes(data=True):
                 net.add_node(node, label=node_data.get("title", str(node)))
             for source, target in G.edges():
                 net.add_edge(source, target)
-            # Save the interactive visualization to an HTML file and embed it in Streamlit
             temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
             net.write_html(temp_file.name)
             with open(temp_file.name, 'r', encoding='utf-8') as f:

 import tempfile
 import openai
 import requests
+import xml.etree.ElementTree as ET
 import pandas as pd
 from io import StringIO
 import asyncio
 # ---------------------------
 def generate_ideas_with_hf(prompt):
     # Use Hugging Face's text-generation pipeline.
+    # We use max_new_tokens so that new tokens are generated beyond the prompt.
     results = generator(prompt, max_new_tokens=50, num_return_sequences=1)
     idea_text = results[0]['generated_text']
     return idea_text
     return output_text
 # ---------------------------
+# arXiv API Integration using xml.etree.ElementTree
 # ---------------------------
 def fetch_arxiv_results(query, max_results=5):
     """
+    Queries arXiv's free API to fetch relevant papers using XML parsing.
     """
     base_url = "http://export.arxiv.org/api/query?"
     search_query = "search_query=all:" + query
     start = "0"
+    max_results_str = str(max_results)
+    query_url = f"{base_url}{search_query}&start={start}&max_results={max_results_str}"
     response = requests.get(query_url)
+    results = []
     if response.status_code == 200:
+        root = ET.fromstring(response.content)
+        ns = {"atom": "http://www.w3.org/2005/Atom"}
+        for entry in root.findall("atom:entry", ns):
+            title_elem = entry.find("atom:title", ns)
+            title = title_elem.text.strip() if title_elem is not None else ""
+            summary_elem = entry.find("atom:summary", ns)
+            summary = summary_elem.text.strip() if summary_elem is not None else ""
+            published_elem = entry.find("atom:published", ns)
+            published = published_elem.text.strip() if published_elem is not None else ""
+            link_elem = entry.find("atom:id", ns)
+            link = link_elem.text.strip() if link_elem is not None else ""
+            authors = []
+            for author in entry.findall("atom:author", ns):
+                name_elem = author.find("atom:name", ns)
+                if name_elem is not None:
+                    authors.append(name_elem.text.strip())
+            authors_str = ", ".join(authors)
             results.append({
                 "title": title,
                 "summary": summary,
+                "published": published,
+                "link": link,
+                "authors": authors_str
             })
         return results
     else:
 if st.button("Generate Ideas"):
     if paper_abstract.strip():
         st.subheader("Summarized Abstract")
+        # Summarize the abstract to capture its key points.
         summary = summarizer(paper_abstract, max_length=100, min_length=30, do_sample=False)
         summary_text = summary[0]['summary_text']
         st.write(summary_text)
         st.subheader("Generated Research Ideas")
+        # Build a combined prompt with the abstract and its summary.
         prompt = (
             f"Based on the following research paper abstract, generate innovative and promising research ideas for future work.\n\n"
             f"Paper Abstract:\n{paper_abstract}\n\n"
                 cited_list = [c.strip() for c in cited.split(';') if c.strip()]
                 data.append({"paper_id": paper_id, "title": title, "cited": cited_list})
         if data:
+            # Build a directed graph using NetworkX.
             G = nx.DiGraph()
             for paper in data:
                 G.add_node(paper["paper_id"], title=paper.get("title", str(paper["paper_id"])))
                 for cited in paper["cited"]:
                     G.add_edge(paper["paper_id"], cited)
             st.subheader("Knowledge Graph")
+            # Create an interactive visualization using Pyvis.
             net = Network(height="500px", width="100%", directed=True)
             for node, node_data in G.nodes(data=True):
                 net.add_node(node, label=node_data.get("title", str(node)))
             for source, target in G.edges():
                 net.add_edge(source, target)
             temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
             net.write_html(temp_file.name)
             with open(temp_file.name, 'r', encoding='utf-8') as f: