mgbam commited on
Commit
3dbb4eb
·
verified ·
1 Parent(s): 10eff7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -174
app.py CHANGED
@@ -1,190 +1,149 @@
1
- import os
2
- import requests
3
- import feedparser
4
- import networkx as nx
5
- import gradio as gr
6
  from transformers import pipeline
 
 
 
7
  import openai
8
 
9
- # --------------------------
10
- # 1. arXiv API Integration
11
- # --------------------------
12
- def fetch_arxiv_papers(search_query="Artificial Intelligence", max_results=5):
13
- """
14
- Fetch paper metadata from the arXiv API using the legacy endpoint.
15
- By using the arXiv APIs, you are agreeing to arXiv's Terms of Use.
16
-
17
- Returns:
18
- List of dictionaries with keys: id, title, summary, published, authors.
19
- """
20
- # arXiv API endpoint
21
- base_url = "http://export.arxiv.org/api/query?"
22
- # Construct query parameters: see arXiv API docs for details.
23
- query = f"search_query=all:{search_query}&start=0&max_results={max_results}"
24
- url = base_url + query
25
- response = requests.get(url)
26
- # Parse the Atom feed using feedparser
27
- feed = feedparser.parse(response.text)
28
- papers = []
29
- for entry in feed.entries:
30
- paper = {
31
- "id": entry.id,
32
- "title": entry.title.strip().replace("\n", " "),
33
- "summary": entry.summary.strip().replace("\n", " "),
34
- "published": entry.published,
35
- "authors": ", ".join(author.name for author in entry.authors)
36
- }
37
- papers.append(paper)
38
- return papers
39
-
40
- # --------------------------
41
- # 2. Build a Simple Knowledge Graph
42
- # --------------------------
43
- def build_knowledge_graph(papers):
44
- """
45
- Create a directed knowledge graph from a list of papers.
46
- Here, a simple simulation links papers in publication order.
47
- In a real-world scenario, edges might be derived from citation relationships.
48
-
49
- Each node holds paper metadata; edges are added sequentially for demonstration.
50
- """
51
- G = nx.DiGraph()
52
- for i, paper in enumerate(papers):
53
- # Use a short identifier like 'P1', 'P2', etc.
54
- node_id = f"P{i+1}"
55
- G.add_node(node_id, title=paper["title"], summary=paper["summary"], published=paper["published"], authors=paper["authors"])
56
-
57
- # Simulate citation relationships: for demo purposes, link each paper to the next one.
58
- # The context is a simple statement; in practice, this could be extracted citation context.
59
- for i in range(len(papers) - 1):
60
- source = f"P{i+1}"
61
- target = f"P{i+2}"
62
- context = f"Paper '{papers[i]['title']}' builds on the ideas in '{papers[i+1]['title']}'."
63
- G.add_edge(source, target, context=context)
64
- return G
65
-
66
- # --------------------------
67
- # 3. Semantic Summarization on Citation Contexts
68
- # --------------------------
69
- # Initialize the Hugging Face summarizer (using an open-source model)
70
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
71
-
72
- def summarize_context(text):
73
- """
74
- Given a text (e.g. simulated citation context), return a semantic summary.
75
- """
76
- if not text.strip():
77
- return "No context available."
78
- summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
79
- return summary[0]["summary_text"]
80
-
81
- def enrich_graph_with_summaries(G):
82
- """
83
- For each edge in the graph, compute a semantic summary of the citation context.
84
- Store the result as an edge attribute.
85
- """
86
- for u, v, data in G.edges(data=True):
87
- context_text = data.get("context", "")
88
- data["semantic_summary"] = summarize_context(context_text)
89
- return G
90
-
91
- # --------------------------
92
- # 4. Generate Graph Summary Text
93
- # --------------------------
94
- def generate_graph_summary(G):
95
- """
96
- Generate a text summary of the knowledge graph. For each edge, the summary will include:
97
- "Paper 'source_title' cites 'target_title': <semantic summary>"
98
- """
99
- summary_lines = []
100
- for u, v, data in G.edges(data=True):
101
- source_title = G.nodes[u]["title"]
102
- target_title = G.nodes[v]["title"]
103
- sem_summary = data.get("semantic_summary", "No summary available.")
104
- line = f"Paper '{source_title}' cites '{target_title}': {sem_summary}"
105
- summary_lines.append(line)
106
- return "\n".join(summary_lines)
107
 
108
- # --------------------------
109
- # 5. Research Idea Generation using OpenAI
110
- # --------------------------
111
- # Set your OpenAI API key from the environment (ensure OPENAI_API_KEY is set)
112
- openai.api_key = os.getenv("OPENAI_API_KEY")
113
 
114
- def generate_research_ideas(graph_summary_text):
115
- """
116
- Generate innovative research ideas using OpenAI's GPT model.
117
- The prompt includes the semantic graph summary.
118
- """
119
- prompt = f"""
120
- Based on the following summary of research literature and their semantic relationships, propose innovative research ideas in the field of Artificial Intelligence:
121
 
122
- {graph_summary_text}
123
-
124
- Research Ideas:
125
- """
 
 
 
126
  response = openai.ChatCompletion.create(
127
  model="gpt-3.5-turbo",
128
  messages=[
129
- {"role": "system", "content": "You are an expert AI researcher."},
130
  {"role": "user", "content": prompt}
131
  ],
132
- max_tokens=200,
133
- temperature=0.7,
134
- n=1,
135
  )
136
- ideas = response.choices[0].message.content.strip()
137
- return ideas
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- # --------------------------
140
- # 6. Main Pipeline (Tie Everything Together)
141
- # --------------------------
142
- def process_arxiv_and_generate(search_query):
143
- """
144
- Main function called via the Gradio interface.
145
- 1. Fetches papers from arXiv (ensuring compliance with arXiv API Terms of Use).
146
- 2. Builds and enriches a simulated knowledge graph.
147
- 3. Generates a graph summary.
148
- 4. Produces innovative research ideas using OpenAI's API.
149
- """
150
- # Step 1: Fetch papers from arXiv (by using their API and respecting their terms)
151
- papers = fetch_arxiv_papers(search_query=search_query, max_results=5)
152
- if not papers:
153
- return "No papers were retrieved from arXiv. Please try a different query.", ""
154
-
155
- # Step 2: Build the knowledge graph from the retrieved papers
156
- G = build_knowledge_graph(papers)
157
- # Step 3: Enrich the graph by summarizing the (simulated) citation contexts
158
- G = enrich_graph_with_summaries(G)
159
- # Step 4: Generate a text summary of the graph
160
- graph_summary = generate_graph_summary(G)
161
- # Step 5: Generate research ideas using OpenAI's API
162
- research_ideas = generate_research_ideas(graph_summary)
163
-
164
- # Build a result text that shows the graph summary along with the generated ideas.
165
- return graph_summary, research_ideas
166
 
167
- # --------------------------
168
- # 7. Gradio Interface for Hugging Face Space
169
- # --------------------------
170
- demo = gr.Interface(
171
- fn=process_arxiv_and_generate,
172
- inputs=gr.components.Textbox(lines=1, label="Search Query for arXiv (e.g., 'Artificial Intelligence')", default="Artificial Intelligence"),
173
- outputs=[
174
- gr.outputs.Textbox(label="Knowledge Graph Summary"),
175
- gr.outputs.Textbox(label="Generated Research Ideas")
176
- ],
177
- title="Graph of AI Ideas: Leveraging Knowledge Graphs, arXiv Metadata & LLMs",
178
- description=(
179
- "This Hugging Face Space application retrieves recent arXiv e-prints based on your search query "
180
- "and builds a simple knowledge graph (using simulated citation relationships) from the paper metadata. "
181
- "A Hugging Face summarization model enriches these simulated citation contexts, and the graph summary "
182
- "is then fed to OpenAI's GPT model to generate innovative AI research ideas.\n\n"
183
- "By using this application, you agree to the arXiv API Terms of Use. Please review the arXiv API documentation "
184
- "for guidelines on rate limits, attribution, and usage."
185
- ),
186
- allow_flagging="never",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  )
 
 
 
 
 
 
188
 
189
- # Launch the Gradio interface (Hugging Face Spaces automatically runs this file)
190
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
 
 
 
 
2
  from transformers import pipeline
3
+ import networkx as nx
4
+ from pyvis.network import Network
5
+ import tempfile
6
  import openai
7
 
8
+ # ---------------------------
9
+ # Model Loading & Caching
10
+ # ---------------------------
11
+ @st.cache_resource(show_spinner=False)
12
+ def load_summarizer():
13
+ # Load a summarization pipeline from Hugging Face (using facebook/bart-large-cnn)
14
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
+ return summarizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ @st.cache_resource(show_spinner=False)
18
+ def load_text_generator():
19
+ # For a quick demo, we use a smaller text generation model (e.g., GPT-2)
20
+ generator = pipeline("text-generation", model="gpt2")
21
+ return generator
22
 
23
+ summarizer = load_summarizer()
24
+ generator = load_text_generator()
 
 
 
 
 
25
 
26
+ # ---------------------------
27
+ # OpenAI Based Idea Generation (Streaming)
28
+ # ---------------------------
29
+ def generate_ideas_with_openai(prompt, api_key):
30
+ openai.api_key = api_key
31
+ output_text = ""
32
+ # Create a chat completion request for streaming output
33
  response = openai.ChatCompletion.create(
34
  model="gpt-3.5-turbo",
35
  messages=[
36
+ {"role": "system", "content": "You are an expert AI research assistant who generates innovative research ideas."},
37
  {"role": "user", "content": prompt}
38
  ],
39
+ stream=True,
 
 
40
  )
41
+ st_text = st.empty() # Placeholder for streaming output
42
+ for chunk in response:
43
+ if 'choices' in chunk and len(chunk['choices']) > 0:
44
+ delta = chunk['choices'][0]['delta']
45
+ if 'content' in delta:
46
+ text_piece = delta['content']
47
+ output_text += text_piece
48
+ st_text.text(output_text)
49
+ return output_text
50
+
51
+ def generate_ideas_with_hf(prompt):
52
+ # Use a Hugging Face text-generation pipeline for demo purposes.
53
+ # (This may be less creative compared to GPT-3.5)
54
+ results = generator(prompt, max_length=150, num_return_sequences=1)
55
+ idea_text = results[0]['generated_text']
56
+ return idea_text
57
+
58
+ # ---------------------------
59
+ # Streamlit App Layout
60
+ # ---------------------------
61
+ st.title("Graph of AI Ideas Application")
62
 
63
+ st.sidebar.header("Configuration")
64
+ generation_mode = st.sidebar.selectbox("Select Idea Generation Mode",
65
+ ["Hugging Face Open Source", "OpenAI GPT-3.5 (Streaming)"])
66
+ openai_api_key = st.sidebar.text_input("OpenAI API Key (for GPT-3.5 Streaming)", type="password")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # --- Section 1: Research Paper Input and Idea Generation ---
69
+ st.header("Research Paper Input")
70
+ paper_abstract = st.text_area("Enter the research paper abstract:", height=200)
71
+
72
+ if st.button("Generate Ideas"):
73
+ if paper_abstract.strip():
74
+ st.subheader("Summarized Abstract")
75
+ # Summarize the paper abstract to capture essential points
76
+ summary = summarizer(paper_abstract, max_length=100, min_length=30, do_sample=False)
77
+ summary_text = summary[0]['summary_text']
78
+ st.write(summary_text)
79
+
80
+ st.subheader("Generated Research Ideas")
81
+ # Build a prompt that combines the abstract and its summary
82
+ prompt = (
83
+ f"Based on the following research paper abstract, generate innovative and promising research ideas for future work.\n\n"
84
+ f"Paper Abstract:\n{paper_abstract}\n\n"
85
+ f"Summary:\n{summary_text}\n\n"
86
+ f"Research Ideas:"
87
+ )
88
+ if generation_mode == "OpenAI GPT-3.5 (Streaming)":
89
+ if not openai_api_key.strip():
90
+ st.error("Please provide your OpenAI API Key in the sidebar.")
91
+ else:
92
+ with st.spinner("Generating ideas using OpenAI GPT-3.5..."):
93
+ ideas = generate_ideas_with_openai(prompt, openai_api_key)
94
+ st.write(ideas)
95
+ else:
96
+ with st.spinner("Generating ideas using Hugging Face open source model..."):
97
+ ideas = generate_ideas_with_hf(prompt)
98
+ st.write(ideas)
99
+ else:
100
+ st.error("Please enter a research paper abstract.")
101
+
102
+ # --- Section 2: Knowledge Graph Visualization ---
103
+ st.header("Knowledge Graph Visualization")
104
+ st.markdown(
105
+ "Simulate a knowledge graph by entering paper details and their citation relationships. "
106
+ "Enter details in CSV format: **PaperID,Title,CitedPaperIDs** (CitedPaperIDs separated by ';'). "
107
+ "Example:\n\n`1,Paper A,2;3`\n`2,Paper B,`\n`3,Paper C,2`"
108
  )
109
+ papers_csv = st.text_area("Enter paper details in CSV format:", height=150)
110
+
111
+ if st.button("Generate Knowledge Graph"):
112
+ if papers_csv.strip():
113
+ import pandas as pd
114
+ from io import StringIO
115
 
116
+ # Process the CSV text input
117
+ data = []
118
+ for line in papers_csv.splitlines():
119
+ parts = line.split(',')
120
+ if len(parts) >= 3:
121
+ paper_id = parts[0].strip()
122
+ title = parts[1].strip()
123
+ cited = parts[2].strip()
124
+ cited_list = [c.strip() for c in cited.split(';') if c.strip()]
125
+ data.append({"paper_id": paper_id, "title": title, "cited": cited_list})
126
+
127
+ if data:
128
+ # Build a directed graph
129
+ G = nx.DiGraph()
130
+ for paper in data:
131
+ G.add_node(paper["paper_id"], title=paper["title"])
132
+ for cited in paper["cited"]:
133
+ G.add_edge(paper["paper_id"], cited)
134
+
135
+ st.subheader("Knowledge Graph")
136
+ # Create an interactive visualization using Pyvis
137
+ net = Network(height="500px", width="100%", directed=True)
138
+ for node, node_data in G.nodes(data=True):
139
+ net.add_node(node, label=node_data["title"])
140
+ for source, target in G.edges():
141
+ net.add_edge(source, target)
142
+ # Write and display the network as HTML in Streamlit
143
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
144
+ net.write_html(temp_file.name)
145
+ with open(temp_file.name, 'r', encoding='utf-8') as f:
146
+ html_content = f.read()
147
+ st.components.v1.html(html_content, height=500)
148
+ else:
149
+ st.error("Please enter paper details for the knowledge graph.")