Spaces:

mgbam
/

Researcher

Sleeping

App Files Files Community

mgbam commited on Apr 8

Commit

d01c5cc

verified ·

1 Parent(s): fd6d271

Create app.py

Browse files

Files changed (1) hide show

app.py +190 -0

app.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import os
+import requests
+import feedparser
+import networkx as nx
+import gradio as gr
+from transformers import pipeline
+import openai
+# --------------------------
+# 1. arXiv API Integration
+# --------------------------
+def fetch_arxiv_papers(search_query="Artificial Intelligence", max_results=5):
+    """
+    Fetch paper metadata from the arXiv API using the legacy endpoint.
+    By using the arXiv APIs, you are agreeing to arXiv's Terms of Use.
+    Returns:
+        List of dictionaries with keys: id, title, summary, published, authors.
+    """
+    # arXiv API endpoint
+    base_url = "http://export.arxiv.org/api/query?"
+    # Construct query parameters: see arXiv API docs for details.
+    query = f"search_query=all:{search_query}&start=0&max_results={max_results}"
+    url = base_url + query
+    response = requests.get(url)
+    # Parse the Atom feed using feedparser
+    feed = feedparser.parse(response.text)
+    papers = []
+    for entry in feed.entries:
+        paper = {
+            "id": entry.id,
+            "title": entry.title.strip().replace("\n", " "),
+            "summary": entry.summary.strip().replace("\n", " "),
+            "published": entry.published,
+            "authors": ", ".join(author.name for author in entry.authors)
+        }
+        papers.append(paper)
+    return papers
+# --------------------------
+# 2. Build a Simple Knowledge Graph
+# --------------------------
+def build_knowledge_graph(papers):
+    """
+    Create a directed knowledge graph from a list of papers.
+    Here, a simple simulation links papers in publication order.
+    In a real-world scenario, edges might be derived from citation relationships.
+    Each node holds paper metadata; edges are added sequentially for demonstration.
+    """
+    G = nx.DiGraph()
+    for i, paper in enumerate(papers):
+        # Use a short identifier like 'P1', 'P2', etc.
+        node_id = f"P{i+1}"
+        G.add_node(node_id, title=paper["title"], summary=paper["summary"], published=paper["published"], authors=paper["authors"])
+    # Simulate citation relationships: for demo purposes, link each paper to the next one.
+    # The context is a simple statement; in practice, this could be extracted citation context.
+    for i in range(len(papers) - 1):
+        source = f"P{i+1}"
+        target = f"P{i+2}"
+        context = f"Paper '{papers[i]['title']}' builds on the ideas in '{papers[i+1]['title']}'."
+        G.add_edge(source, target, context=context)
+    return G
+# --------------------------
+# 3. Semantic Summarization on Citation Contexts
+# --------------------------
+# Initialize the Hugging Face summarizer (using an open-source model)
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+def summarize_context(text):
+    """
+    Given a text (e.g. simulated citation context), return a semantic summary.
+    """
+    if not text.strip():
+        return "No context available."
+    summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
+    return summary[0]["summary_text"]
+def enrich_graph_with_summaries(G):
+    """
+    For each edge in the graph, compute a semantic summary of the citation context.
+    Store the result as an edge attribute.
+    """
+    for u, v, data in G.edges(data=True):
+        context_text = data.get("context", "")
+        data["semantic_summary"] = summarize_context(context_text)
+    return G
+# --------------------------
+# 4. Generate Graph Summary Text
+# --------------------------
+def generate_graph_summary(G):
+    """
+    Generate a text summary of the knowledge graph. For each edge, the summary will include:
+    "Paper 'source_title' cites 'target_title': <semantic summary>"
+    """
+    summary_lines = []
+    for u, v, data in G.edges(data=True):
+        source_title = G.nodes[u]["title"]
+        target_title = G.nodes[v]["title"]
+        sem_summary = data.get("semantic_summary", "No summary available.")
+        line = f"Paper '{source_title}' cites '{target_title}': {sem_summary}"
+        summary_lines.append(line)
+    return "\n".join(summary_lines)
+# --------------------------
+# 5. Research Idea Generation using OpenAI
+# --------------------------
+# Set your OpenAI API key from the environment (ensure OPENAI_API_KEY is set)
+openai.api_key = os.getenv("OPENAI_API_KEY")
+def generate_research_ideas(graph_summary_text):
+    """
+    Generate innovative research ideas using OpenAI's GPT model.
+    The prompt includes the semantic graph summary.
+    """
+    prompt = f"""
+Based on the following summary of research literature and their semantic relationships, propose innovative research ideas in the field of Artificial Intelligence:
+{graph_summary_text}
+Research Ideas:
+"""
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are an expert AI researcher."},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=200,
+        temperature=0.7,
+        n=1,
+    )
+    ideas = response.choices[0].message.content.strip()
+    return ideas
+# --------------------------
+# 6. Main Pipeline (Tie Everything Together)
+# --------------------------
+def process_arxiv_and_generate(search_query):
+    """
+    Main function called via the Gradio interface.
+    1. Fetches papers from arXiv (ensuring compliance with arXiv API Terms of Use).
+    2. Builds and enriches a simulated knowledge graph.
+    3. Generates a graph summary.
+    4. Produces innovative research ideas using OpenAI's API.
+    """
+    # Step 1: Fetch papers from arXiv (by using their API and respecting their terms)
+    papers = fetch_arxiv_papers(search_query=search_query, max_results=5)
+    if not papers:
+        return "No papers were retrieved from arXiv. Please try a different query.", ""
+    # Step 2: Build the knowledge graph from the retrieved papers
+    G = build_knowledge_graph(papers)
+    # Step 3: Enrich the graph by summarizing the (simulated) citation contexts
+    G = enrich_graph_with_summaries(G)
+    # Step 4: Generate a text summary of the graph
+    graph_summary = generate_graph_summary(G)
+    # Step 5: Generate research ideas using OpenAI's API
+    research_ideas = generate_research_ideas(graph_summary)
+    # Build a result text that shows the graph summary along with the generated ideas.
+    return graph_summary, research_ideas
+# --------------------------
+# 7. Gradio Interface for Hugging Face Space
+# --------------------------
+demo = gr.Interface(
+    fn=process_arxiv_and_generate,
+    inputs=gr.components.Textbox(lines=1, label="Search Query for arXiv (e.g., 'Artificial Intelligence')", default="Artificial Intelligence"),
+    outputs=[
+        gr.outputs.Textbox(label="Knowledge Graph Summary"),
+        gr.outputs.Textbox(label="Generated Research Ideas")
+    ],
+    title="Graph of AI Ideas: Leveraging Knowledge Graphs, arXiv Metadata & LLMs",
+    description=(
+        "This Hugging Face Space application retrieves recent arXiv e-prints based on your search query "
+        "and builds a simple knowledge graph (using simulated citation relationships) from the paper metadata. "
+        "A Hugging Face summarization model enriches these simulated citation contexts, and the graph summary "
+        "is then fed to OpenAI's GPT model to generate innovative AI research ideas.\n\n"
+        "By using this application, you agree to the arXiv API Terms of Use. Please review the arXiv API documentation "
+        "for guidelines on rate limits, attribution, and usage."
+    ),
+    allow_flagging="never",
+)
+# Launch the Gradio interface (Hugging Face Spaces automatically runs this file)
+demo.launch()