mgbam commited on
Commit
834ac1a
·
verified ·
1 Parent(s): c36d3e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -23
app.py CHANGED
@@ -5,7 +5,7 @@ from pyvis.network import Network
5
  import tempfile
6
  import openai
7
  import requests
8
- import feedparser
9
  import pandas as pd
10
  from io import StringIO
11
  import asyncio
@@ -33,7 +33,7 @@ generator = load_text_generator()
33
  # ---------------------------
34
  def generate_ideas_with_hf(prompt):
35
  # Use Hugging Face's text-generation pipeline.
36
- # Instead of using max_length, we use max_new_tokens so that new tokens are generated.
37
  results = generator(prompt, max_new_tokens=50, num_return_sequences=1)
38
  idea_text = results[0]['generated_text']
39
  return idea_text
@@ -68,33 +68,49 @@ def generate_ideas_with_openai(prompt, api_key):
68
  return output_text
69
 
70
  # ---------------------------
71
- # arXiv API Integration
72
  # ---------------------------
73
  def fetch_arxiv_results(query, max_results=5):
74
  """
75
- Queries arXiv's free API to fetch relevant papers.
76
  """
77
  base_url = "http://export.arxiv.org/api/query?"
78
  search_query = "search_query=all:" + query
79
  start = "0"
80
- max_results = str(max_results)
81
- query_url = f"{base_url}{search_query}&start={start}&max_results={max_results}"
 
82
  response = requests.get(query_url)
 
83
  if response.status_code == 200:
84
- feed = feedparser.parse(response.content)
85
- results = []
86
- for entry in feed.entries:
87
- title = entry.title
88
- summary = entry.summary
89
- published = entry.published
90
- link = entry.link
91
- authors = ", ".join(author.name for author in entry.authors)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  results.append({
93
  "title": title,
94
- "authors": authors,
95
- "published": published,
96
  "summary": summary,
97
- "link": link
 
 
98
  })
99
  return results
100
  else:
@@ -140,13 +156,13 @@ paper_abstract = st.text_area("Enter the research paper abstract:", height=200)
140
  if st.button("Generate Ideas"):
141
  if paper_abstract.strip():
142
  st.subheader("Summarized Abstract")
143
- # Summarize the abstract to capture its key points
144
  summary = summarizer(paper_abstract, max_length=100, min_length=30, do_sample=False)
145
  summary_text = summary[0]['summary_text']
146
  st.write(summary_text)
147
 
148
  st.subheader("Generated Research Ideas")
149
- # Build a combined prompt with the abstract and its summary
150
  prompt = (
151
  f"Based on the following research paper abstract, generate innovative and promising research ideas for future work.\n\n"
152
  f"Paper Abstract:\n{paper_abstract}\n\n"
@@ -188,22 +204,20 @@ if st.button("Generate Knowledge Graph"):
188
  cited_list = [c.strip() for c in cited.split(';') if c.strip()]
189
  data.append({"paper_id": paper_id, "title": title, "cited": cited_list})
190
  if data:
191
- # Build a directed graph using NetworkX
192
  G = nx.DiGraph()
193
  for paper in data:
194
- # Ensure each node has a 'title' key, using the node id as fallback.
195
  G.add_node(paper["paper_id"], title=paper.get("title", str(paper["paper_id"])))
196
  for cited in paper["cited"]:
197
  G.add_edge(paper["paper_id"], cited)
198
 
199
  st.subheader("Knowledge Graph")
200
- # Create an interactive visualization using Pyvis
201
  net = Network(height="500px", width="100%", directed=True)
202
  for node, node_data in G.nodes(data=True):
203
  net.add_node(node, label=node_data.get("title", str(node)))
204
  for source, target in G.edges():
205
  net.add_edge(source, target)
206
- # Save the interactive visualization to an HTML file and embed it in Streamlit
207
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
208
  net.write_html(temp_file.name)
209
  with open(temp_file.name, 'r', encoding='utf-8') as f:
 
5
  import tempfile
6
  import openai
7
  import requests
8
+ import xml.etree.ElementTree as ET
9
  import pandas as pd
10
  from io import StringIO
11
  import asyncio
 
33
  # ---------------------------
34
  def generate_ideas_with_hf(prompt):
35
  # Use Hugging Face's text-generation pipeline.
36
+ # We use max_new_tokens so that new tokens are generated beyond the prompt.
37
  results = generator(prompt, max_new_tokens=50, num_return_sequences=1)
38
  idea_text = results[0]['generated_text']
39
  return idea_text
 
68
  return output_text
69
 
70
  # ---------------------------
71
+ # arXiv API Integration using xml.etree.ElementTree
72
  # ---------------------------
73
  def fetch_arxiv_results(query, max_results=5):
74
  """
75
+ Queries arXiv's free API to fetch relevant papers using XML parsing.
76
  """
77
  base_url = "http://export.arxiv.org/api/query?"
78
  search_query = "search_query=all:" + query
79
  start = "0"
80
+ max_results_str = str(max_results)
81
+ query_url = f"{base_url}{search_query}&start={start}&max_results={max_results_str}"
82
+
83
  response = requests.get(query_url)
84
+ results = []
85
  if response.status_code == 200:
86
+ root = ET.fromstring(response.content)
87
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
88
+ for entry in root.findall("atom:entry", ns):
89
+ title_elem = entry.find("atom:title", ns)
90
+ title = title_elem.text.strip() if title_elem is not None else ""
91
+
92
+ summary_elem = entry.find("atom:summary", ns)
93
+ summary = summary_elem.text.strip() if summary_elem is not None else ""
94
+
95
+ published_elem = entry.find("atom:published", ns)
96
+ published = published_elem.text.strip() if published_elem is not None else ""
97
+
98
+ link_elem = entry.find("atom:id", ns)
99
+ link = link_elem.text.strip() if link_elem is not None else ""
100
+
101
+ authors = []
102
+ for author in entry.findall("atom:author", ns):
103
+ name_elem = author.find("atom:name", ns)
104
+ if name_elem is not None:
105
+ authors.append(name_elem.text.strip())
106
+ authors_str = ", ".join(authors)
107
+
108
  results.append({
109
  "title": title,
 
 
110
  "summary": summary,
111
+ "published": published,
112
+ "link": link,
113
+ "authors": authors_str
114
  })
115
  return results
116
  else:
 
156
  if st.button("Generate Ideas"):
157
  if paper_abstract.strip():
158
  st.subheader("Summarized Abstract")
159
+ # Summarize the abstract to capture its key points.
160
  summary = summarizer(paper_abstract, max_length=100, min_length=30, do_sample=False)
161
  summary_text = summary[0]['summary_text']
162
  st.write(summary_text)
163
 
164
  st.subheader("Generated Research Ideas")
165
+ # Build a combined prompt with the abstract and its summary.
166
  prompt = (
167
  f"Based on the following research paper abstract, generate innovative and promising research ideas for future work.\n\n"
168
  f"Paper Abstract:\n{paper_abstract}\n\n"
 
204
  cited_list = [c.strip() for c in cited.split(';') if c.strip()]
205
  data.append({"paper_id": paper_id, "title": title, "cited": cited_list})
206
  if data:
207
+ # Build a directed graph using NetworkX.
208
  G = nx.DiGraph()
209
  for paper in data:
 
210
  G.add_node(paper["paper_id"], title=paper.get("title", str(paper["paper_id"])))
211
  for cited in paper["cited"]:
212
  G.add_edge(paper["paper_id"], cited)
213
 
214
  st.subheader("Knowledge Graph")
215
+ # Create an interactive visualization using Pyvis.
216
  net = Network(height="500px", width="100%", directed=True)
217
  for node, node_data in G.nodes(data=True):
218
  net.add_node(node, label=node_data.get("title", str(node)))
219
  for source, target in G.edges():
220
  net.add_edge(source, target)
 
221
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
222
  net.write_html(temp_file.name)
223
  with open(temp_file.name, 'r', encoding='utf-8') as f: