mgbam commited on
Commit
d01c5cc
·
verified ·
1 Parent(s): fd6d271

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -0
app.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import feedparser
4
+ import networkx as nx
5
+ import gradio as gr
6
+ from transformers import pipeline
7
+ import openai
8
+
9
+ # --------------------------
10
+ # 1. arXiv API Integration
11
+ # --------------------------
12
+ def fetch_arxiv_papers(search_query="Artificial Intelligence", max_results=5):
13
+ """
14
+ Fetch paper metadata from the arXiv API using the legacy endpoint.
15
+ By using the arXiv APIs, you are agreeing to arXiv's Terms of Use.
16
+
17
+ Returns:
18
+ List of dictionaries with keys: id, title, summary, published, authors.
19
+ """
20
+ # arXiv API endpoint
21
+ base_url = "http://export.arxiv.org/api/query?"
22
+ # Construct query parameters: see arXiv API docs for details.
23
+ query = f"search_query=all:{search_query}&start=0&max_results={max_results}"
24
+ url = base_url + query
25
+ response = requests.get(url)
26
+ # Parse the Atom feed using feedparser
27
+ feed = feedparser.parse(response.text)
28
+ papers = []
29
+ for entry in feed.entries:
30
+ paper = {
31
+ "id": entry.id,
32
+ "title": entry.title.strip().replace("\n", " "),
33
+ "summary": entry.summary.strip().replace("\n", " "),
34
+ "published": entry.published,
35
+ "authors": ", ".join(author.name for author in entry.authors)
36
+ }
37
+ papers.append(paper)
38
+ return papers
39
+
40
+ # --------------------------
41
+ # 2. Build a Simple Knowledge Graph
42
+ # --------------------------
43
+ def build_knowledge_graph(papers):
44
+ """
45
+ Create a directed knowledge graph from a list of papers.
46
+ Here, a simple simulation links papers in publication order.
47
+ In a real-world scenario, edges might be derived from citation relationships.
48
+
49
+ Each node holds paper metadata; edges are added sequentially for demonstration.
50
+ """
51
+ G = nx.DiGraph()
52
+ for i, paper in enumerate(papers):
53
+ # Use a short identifier like 'P1', 'P2', etc.
54
+ node_id = f"P{i+1}"
55
+ G.add_node(node_id, title=paper["title"], summary=paper["summary"], published=paper["published"], authors=paper["authors"])
56
+
57
+ # Simulate citation relationships: for demo purposes, link each paper to the next one.
58
+ # The context is a simple statement; in practice, this could be extracted citation context.
59
+ for i in range(len(papers) - 1):
60
+ source = f"P{i+1}"
61
+ target = f"P{i+2}"
62
+ context = f"Paper '{papers[i]['title']}' builds on the ideas in '{papers[i+1]['title']}'."
63
+ G.add_edge(source, target, context=context)
64
+ return G
65
+
66
+ # --------------------------
67
+ # 3. Semantic Summarization on Citation Contexts
68
+ # --------------------------
69
+ # Initialize the Hugging Face summarizer (using an open-source model)
70
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
71
+
72
+ def summarize_context(text):
73
+ """
74
+ Given a text (e.g. simulated citation context), return a semantic summary.
75
+ """
76
+ if not text.strip():
77
+ return "No context available."
78
+ summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
79
+ return summary[0]["summary_text"]
80
+
81
+ def enrich_graph_with_summaries(G):
82
+ """
83
+ For each edge in the graph, compute a semantic summary of the citation context.
84
+ Store the result as an edge attribute.
85
+ """
86
+ for u, v, data in G.edges(data=True):
87
+ context_text = data.get("context", "")
88
+ data["semantic_summary"] = summarize_context(context_text)
89
+ return G
90
+
91
+ # --------------------------
92
+ # 4. Generate Graph Summary Text
93
+ # --------------------------
94
+ def generate_graph_summary(G):
95
+ """
96
+ Generate a text summary of the knowledge graph. For each edge, the summary will include:
97
+ "Paper 'source_title' cites 'target_title': <semantic summary>"
98
+ """
99
+ summary_lines = []
100
+ for u, v, data in G.edges(data=True):
101
+ source_title = G.nodes[u]["title"]
102
+ target_title = G.nodes[v]["title"]
103
+ sem_summary = data.get("semantic_summary", "No summary available.")
104
+ line = f"Paper '{source_title}' cites '{target_title}': {sem_summary}"
105
+ summary_lines.append(line)
106
+ return "\n".join(summary_lines)
107
+
108
+ # --------------------------
109
+ # 5. Research Idea Generation using OpenAI
110
+ # --------------------------
111
+ # Set your OpenAI API key from the environment (ensure OPENAI_API_KEY is set)
112
+ openai.api_key = os.getenv("OPENAI_API_KEY")
113
+
114
+ def generate_research_ideas(graph_summary_text):
115
+ """
116
+ Generate innovative research ideas using OpenAI's GPT model.
117
+ The prompt includes the semantic graph summary.
118
+ """
119
+ prompt = f"""
120
+ Based on the following summary of research literature and their semantic relationships, propose innovative research ideas in the field of Artificial Intelligence:
121
+
122
+ {graph_summary_text}
123
+
124
+ Research Ideas:
125
+ """
126
+ response = openai.ChatCompletion.create(
127
+ model="gpt-3.5-turbo",
128
+ messages=[
129
+ {"role": "system", "content": "You are an expert AI researcher."},
130
+ {"role": "user", "content": prompt}
131
+ ],
132
+ max_tokens=200,
133
+ temperature=0.7,
134
+ n=1,
135
+ )
136
+ ideas = response.choices[0].message.content.strip()
137
+ return ideas
138
+
139
+ # --------------------------
140
+ # 6. Main Pipeline (Tie Everything Together)
141
+ # --------------------------
142
+ def process_arxiv_and_generate(search_query):
143
+ """
144
+ Main function called via the Gradio interface.
145
+ 1. Fetches papers from arXiv (ensuring compliance with arXiv API Terms of Use).
146
+ 2. Builds and enriches a simulated knowledge graph.
147
+ 3. Generates a graph summary.
148
+ 4. Produces innovative research ideas using OpenAI's API.
149
+ """
150
+ # Step 1: Fetch papers from arXiv (by using their API and respecting their terms)
151
+ papers = fetch_arxiv_papers(search_query=search_query, max_results=5)
152
+ if not papers:
153
+ return "No papers were retrieved from arXiv. Please try a different query.", ""
154
+
155
+ # Step 2: Build the knowledge graph from the retrieved papers
156
+ G = build_knowledge_graph(papers)
157
+ # Step 3: Enrich the graph by summarizing the (simulated) citation contexts
158
+ G = enrich_graph_with_summaries(G)
159
+ # Step 4: Generate a text summary of the graph
160
+ graph_summary = generate_graph_summary(G)
161
+ # Step 5: Generate research ideas using OpenAI's API
162
+ research_ideas = generate_research_ideas(graph_summary)
163
+
164
+ # Build a result text that shows the graph summary along with the generated ideas.
165
+ return graph_summary, research_ideas
166
+
167
+ # --------------------------
168
+ # 7. Gradio Interface for Hugging Face Space
169
+ # --------------------------
170
+ demo = gr.Interface(
171
+ fn=process_arxiv_and_generate,
172
+ inputs=gr.components.Textbox(lines=1, label="Search Query for arXiv (e.g., 'Artificial Intelligence')", default="Artificial Intelligence"),
173
+ outputs=[
174
+ gr.outputs.Textbox(label="Knowledge Graph Summary"),
175
+ gr.outputs.Textbox(label="Generated Research Ideas")
176
+ ],
177
+ title="Graph of AI Ideas: Leveraging Knowledge Graphs, arXiv Metadata & LLMs",
178
+ description=(
179
+ "This Hugging Face Space application retrieves recent arXiv e-prints based on your search query "
180
+ "and builds a simple knowledge graph (using simulated citation relationships) from the paper metadata. "
181
+ "A Hugging Face summarization model enriches these simulated citation contexts, and the graph summary "
182
+ "is then fed to OpenAI's GPT model to generate innovative AI research ideas.\n\n"
183
+ "By using this application, you agree to the arXiv API Terms of Use. Please review the arXiv API documentation "
184
+ "for guidelines on rate limits, attribution, and usage."
185
+ ),
186
+ allow_flagging="never",
187
+ )
188
+
189
+ # Launch the Gradio interface (Hugging Face Spaces automatically runs this file)
190
+ demo.launch()