Spaces:

mgbam
/

Researcher

Sleeping

App Files Files Community

mgbam commited on Apr 8

Commit

3dbb4eb

verified ·

1 Parent(s): 10eff7e

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -174

app.py CHANGED Viewed

@@ -1,190 +1,149 @@
-import os
-import requests
-import feedparser
-import networkx as nx
-import gradio as gr
 from transformers import pipeline
 import openai
-# --------------------------
-# 1. arXiv API Integration
-# --------------------------
-def fetch_arxiv_papers(search_query="Artificial Intelligence", max_results=5):
-    """
-    Fetch paper metadata from the arXiv API using the legacy endpoint.
-    By using the arXiv APIs, you are agreeing to arXiv's Terms of Use.
-    Returns:
-        List of dictionaries with keys: id, title, summary, published, authors.
-    """
-    # arXiv API endpoint
-    base_url = "http://export.arxiv.org/api/query?"
-    # Construct query parameters: see arXiv API docs for details.
-    query = f"search_query=all:{search_query}&start=0&max_results={max_results}"
-    url = base_url + query
-    response = requests.get(url)
-    # Parse the Atom feed using feedparser
-    feed = feedparser.parse(response.text)
-    papers = []
-    for entry in feed.entries:
-        paper = {
-            "id": entry.id,
-            "title": entry.title.strip().replace("\n", " "),
-            "summary": entry.summary.strip().replace("\n", " "),
-            "published": entry.published,
-            "authors": ", ".join(author.name for author in entry.authors)
-        }
-        papers.append(paper)
-    return papers
-# --------------------------
-# 2. Build a Simple Knowledge Graph
-# --------------------------
-def build_knowledge_graph(papers):
-    """
-    Create a directed knowledge graph from a list of papers.
-    Here, a simple simulation links papers in publication order.
-    In a real-world scenario, edges might be derived from citation relationships.
-    Each node holds paper metadata; edges are added sequentially for demonstration.
-    """
-    G = nx.DiGraph()
-    for i, paper in enumerate(papers):
-        # Use a short identifier like 'P1', 'P2', etc.
-        node_id = f"P{i+1}"
-        G.add_node(node_id, title=paper["title"], summary=paper["summary"], published=paper["published"], authors=paper["authors"])
-    # Simulate citation relationships: for demo purposes, link each paper to the next one.
-    # The context is a simple statement; in practice, this could be extracted citation context.
-    for i in range(len(papers) - 1):
-        source = f"P{i+1}"
-        target = f"P{i+2}"
-        context = f"Paper '{papers[i]['title']}' builds on the ideas in '{papers[i+1]['title']}'."
-        G.add_edge(source, target, context=context)
-    return G
-# --------------------------
-# 3. Semantic Summarization on Citation Contexts
-# --------------------------
-# Initialize the Hugging Face summarizer (using an open-source model)
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-def summarize_context(text):
-    """
-    Given a text (e.g. simulated citation context), return a semantic summary.
-    """
-    if not text.strip():
-        return "No context available."
-    summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
-    return summary[0]["summary_text"]
-def enrich_graph_with_summaries(G):
-    """
-    For each edge in the graph, compute a semantic summary of the citation context.
-    Store the result as an edge attribute.
-    """
-    for u, v, data in G.edges(data=True):
-        context_text = data.get("context", "")
-        data["semantic_summary"] = summarize_context(context_text)
-    return G
-# --------------------------
-# 4. Generate Graph Summary Text
-# --------------------------
-def generate_graph_summary(G):
-    """
-    Generate a text summary of the knowledge graph. For each edge, the summary will include:
-    "Paper 'source_title' cites 'target_title': <semantic summary>"
-    """
-    summary_lines = []
-    for u, v, data in G.edges(data=True):
-        source_title = G.nodes[u]["title"]
-        target_title = G.nodes[v]["title"]
-        sem_summary = data.get("semantic_summary", "No summary available.")
-        line = f"Paper '{source_title}' cites '{target_title}': {sem_summary}"
-        summary_lines.append(line)
-    return "\n".join(summary_lines)
-# --------------------------
-# 5. Research Idea Generation using OpenAI
-# --------------------------
-# Set your OpenAI API key from the environment (ensure OPENAI_API_KEY is set)
-openai.api_key = os.getenv("OPENAI_API_KEY")
-def generate_research_ideas(graph_summary_text):
-    """
-    Generate innovative research ideas using OpenAI's GPT model.
-    The prompt includes the semantic graph summary.
-    """
-    prompt = f"""
-Based on the following summary of research literature and their semantic relationships, propose innovative research ideas in the field of Artificial Intelligence:
-{graph_summary_text}
-Research Ideas:
-"""
     response = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=[
-            {"role": "system", "content": "You are an expert AI researcher."},
             {"role": "user", "content": prompt}
         ],
-        max_tokens=200,
-        temperature=0.7,
-        n=1,
     )
-    ideas = response.choices[0].message.content.strip()
-    return ideas
-# --------------------------
-# 6. Main Pipeline (Tie Everything Together)
-# --------------------------
-def process_arxiv_and_generate(search_query):
-    """
-    Main function called via the Gradio interface.
-    1. Fetches papers from arXiv (ensuring compliance with arXiv API Terms of Use).
-    2. Builds and enriches a simulated knowledge graph.
-    3. Generates a graph summary.
-    4. Produces innovative research ideas using OpenAI's API.
-    """
-    # Step 1: Fetch papers from arXiv (by using their API and respecting their terms)
-    papers = fetch_arxiv_papers(search_query=search_query, max_results=5)
-    if not papers:
-        return "No papers were retrieved from arXiv. Please try a different query.", ""
-    # Step 2: Build the knowledge graph from the retrieved papers
-    G = build_knowledge_graph(papers)
-    # Step 3: Enrich the graph by summarizing the (simulated) citation contexts
-    G = enrich_graph_with_summaries(G)
-    # Step 4: Generate a text summary of the graph
-    graph_summary = generate_graph_summary(G)
-    # Step 5: Generate research ideas using OpenAI's API
-    research_ideas = generate_research_ideas(graph_summary)
-    # Build a result text that shows the graph summary along with the generated ideas.
-    return graph_summary, research_ideas
-# --------------------------
-# 7. Gradio Interface for Hugging Face Space
-# --------------------------
-demo = gr.Interface(
-    fn=process_arxiv_and_generate,
-    inputs=gr.components.Textbox(lines=1, label="Search Query for arXiv (e.g., 'Artificial Intelligence')", default="Artificial Intelligence"),
-    outputs=[
-        gr.outputs.Textbox(label="Knowledge Graph Summary"),
-        gr.outputs.Textbox(label="Generated Research Ideas")
-    ],
-    title="Graph of AI Ideas: Leveraging Knowledge Graphs, arXiv Metadata & LLMs",
-    description=(
-        "This Hugging Face Space application retrieves recent arXiv e-prints based on your search query "
-        "and builds a simple knowledge graph (using simulated citation relationships) from the paper metadata. "
-        "A Hugging Face summarization model enriches these simulated citation contexts, and the graph summary "
-        "is then fed to OpenAI's GPT model to generate innovative AI research ideas.\n\n"
-        "By using this application, you agree to the arXiv API Terms of Use. Please review the arXiv API documentation "
-        "for guidelines on rate limits, attribution, and usage."
-    ),
-    allow_flagging="never",
 )
-# Launch the Gradio interface (Hugging Face Spaces automatically runs this file)
-demo.launch()

+import streamlit as st
 from transformers import pipeline
+import networkx as nx
+from pyvis.network import Network
+import tempfile
 import openai
+# ---------------------------
+# Model Loading & Caching
+# ---------------------------
+@st.cache_resource(show_spinner=False)
+def load_summarizer():
+    # Load a summarization pipeline from Hugging Face (using facebook/bart-large-cnn)
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    return summarizer
+@st.cache_resource(show_spinner=False)
+def load_text_generator():
+    # For a quick demo, we use a smaller text generation model (e.g., GPT-2)
+    generator = pipeline("text-generation", model="gpt2")
+    return generator
+summarizer = load_summarizer()
+generator = load_text_generator()
+# ---------------------------
+# OpenAI Based Idea Generation (Streaming)
+# ---------------------------
+def generate_ideas_with_openai(prompt, api_key):
+    openai.api_key = api_key
+    output_text = ""
+    # Create a chat completion request for streaming output
     response = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=[
+            {"role": "system", "content": "You are an expert AI research assistant who generates innovative research ideas."},
             {"role": "user", "content": prompt}
         ],
+        stream=True,
     )
+    st_text = st.empty()  # Placeholder for streaming output
+    for chunk in response:
+        if 'choices' in chunk and len(chunk['choices']) > 0:
+            delta = chunk['choices'][0]['delta']
+            if 'content' in delta:
+                text_piece = delta['content']
+                output_text += text_piece
+                st_text.text(output_text)
+    return output_text
+def generate_ideas_with_hf(prompt):
+    # Use a Hugging Face text-generation pipeline for demo purposes.
+    # (This may be less creative compared to GPT-3.5)
+    results = generator(prompt, max_length=150, num_return_sequences=1)
+    idea_text = results[0]['generated_text']
+    return idea_text
+# ---------------------------
+# Streamlit App Layout
+# ---------------------------
+st.title("Graph of AI Ideas Application")
+st.sidebar.header("Configuration")
+generation_mode = st.sidebar.selectbox("Select Idea Generation Mode",
+                                        ["Hugging Face Open Source", "OpenAI GPT-3.5 (Streaming)"])
+openai_api_key = st.sidebar.text_input("OpenAI API Key (for GPT-3.5 Streaming)", type="password")
+# --- Section 1: Research Paper Input and Idea Generation ---
+st.header("Research Paper Input")
+paper_abstract = st.text_area("Enter the research paper abstract:", height=200)
+if st.button("Generate Ideas"):
+    if paper_abstract.strip():
+        st.subheader("Summarized Abstract")
+        # Summarize the paper abstract to capture essential points
+        summary = summarizer(paper_abstract, max_length=100, min_length=30, do_sample=False)
+        summary_text = summary[0]['summary_text']
+        st.write(summary_text)
+        st.subheader("Generated Research Ideas")
+        # Build a prompt that combines the abstract and its summary
+        prompt = (
+            f"Based on the following research paper abstract, generate innovative and promising research ideas for future work.\n\n"
+            f"Paper Abstract:\n{paper_abstract}\n\n"
+            f"Summary:\n{summary_text}\n\n"
+            f"Research Ideas:"
+        )
+        if generation_mode == "OpenAI GPT-3.5 (Streaming)":
+            if not openai_api_key.strip():
+                st.error("Please provide your OpenAI API Key in the sidebar.")
+            else:
+                with st.spinner("Generating ideas using OpenAI GPT-3.5..."):
+                    ideas = generate_ideas_with_openai(prompt, openai_api_key)
+                    st.write(ideas)
+        else:
+            with st.spinner("Generating ideas using Hugging Face open source model..."):
+                ideas = generate_ideas_with_hf(prompt)
+                st.write(ideas)
+    else:
+        st.error("Please enter a research paper abstract.")
+# --- Section 2: Knowledge Graph Visualization ---
+st.header("Knowledge Graph Visualization")
+st.markdown(
+    "Simulate a knowledge graph by entering paper details and their citation relationships. "
+    "Enter details in CSV format: **PaperID,Title,CitedPaperIDs** (CitedPaperIDs separated by ';'). "
+    "Example:\n\n`1,Paper A,2;3`\n`2,Paper B,`\n`3,Paper C,2`"
 )
+papers_csv = st.text_area("Enter paper details in CSV format:", height=150)
+if st.button("Generate Knowledge Graph"):
+    if papers_csv.strip():
+        import pandas as pd
+        from io import StringIO
+        # Process the CSV text input
+        data = []
+        for line in papers_csv.splitlines():
+            parts = line.split(',')
+            if len(parts) >= 3:
+                paper_id = parts[0].strip()
+                title = parts[1].strip()
+                cited = parts[2].strip()
+                cited_list = [c.strip() for c in cited.split(';') if c.strip()]
+                data.append({"paper_id": paper_id, "title": title, "cited": cited_list})
+        if data:
+            # Build a directed graph
+            G = nx.DiGraph()
+            for paper in data:
+                G.add_node(paper["paper_id"], title=paper["title"])
+                for cited in paper["cited"]:
+                    G.add_edge(paper["paper_id"], cited)
+            st.subheader("Knowledge Graph")
+            # Create an interactive visualization using Pyvis
+            net = Network(height="500px", width="100%", directed=True)
+            for node, node_data in G.nodes(data=True):
+                net.add_node(node, label=node_data["title"])
+            for source, target in G.edges():
+                net.add_edge(source, target)
+            # Write and display the network as HTML in Streamlit
+            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
+            net.write_html(temp_file.name)
+            with open(temp_file.name, 'r', encoding='utf-8') as f:
+                html_content = f.read()
+            st.components.v1.html(html_content, height=500)
+    else:
+        st.error("Please enter paper details for the knowledge graph.")