Spaces:

Ani14
/

AutoReasearcher

Running

App Files Files Community

Ani14 commited on Apr 21

Commit

a77e234

verified ·

1 Parent(s): 113325b

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -72

app.py CHANGED Viewed

@@ -1,78 +1,206 @@
 import os
 import streamlit as st
-import asyncio
-import nest_asyncio
-from gpt_researcher import GPTResearcher
 from dotenv import load_dotenv
-# Enable async for Streamlit
-nest_asyncio.apply()
 load_dotenv()
-# Set your Tavily API key
-os.environ["TAVILY_API_KEY"] = "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"
-# App UI setup
-st.set_page_config(page_title="🧠 Super Deep Research Agent", layout="wide")
-st.title("📚 GPT-Powered Super Deep Research Assistant")
-# Sidebar UI
-with st.sidebar:
-    st.header("🔍 Research Setup")
-    query = st.text_input("📌 Research Topic", "Is AI a threat to creative jobs?")
-    report_type = st.selectbox("📄 Report Type", ["research_report", "summary", "detailed_report"])
-    tone = st.selectbox("🗣️ Tone", ["objective", "persuasive", "informative"])
-    source_type = st.selectbox("🌐 Source Scope", ["web", "arxiv", "semantic-scholar", "hybrid"])
-    output_format = st.selectbox("📝 Output Format", ["markdown", "text"])
-    start = st.button("🚀 Start Research")
-# Async agent runner
-async def run_research(query, report_type, source, tone, fmt):
-    agent = GPTResearcher(
-        query=query,
-        report_type=report_type,
-        report_source=source,
-        report_format=fmt,
-        tone=tone
-    )
-    await agent.conduct_research()
-    report = await agent.write_report()
-    context = agent.get_research_context()
-    sources = agent.get_research_sources()
-    images = agent.get_research_images()
-    return report, context, sources, images
-# Run on click
-if start and query:
-    st.info("⏳ Running research agent...")
-    # Spinner with placeholder log
-    with st.spinner("Thinking..."):
-        # Optional: log collector using mutable container (if future logging is needed)
-        logs = []
-        # Run agent
-        report, context, sources, images = asyncio.run(
-            run_research(query, report_type, source_type, tone, output_format)
-        )
-    st.success("✅ Research Completed!")
-    # Display report
-    st.subheader("📄 Final Report")
-    st.markdown(report, unsafe_allow_html=True)
-    # Display sources
-    if sources:
-        st.subheader("📚 Sources")
-        for s in sources:
-            st.markdown(f"- [{s.get('title', 'Untitled')}]({s.get('url', '#')})")
-    # Display images
-    if images:
-        st.subheader("🖼️ Relevant Images")
-        for img in images:
-            st.image(img, use_column_width=True)
-    # Download report
-    st.download_button("💾 Download Markdown", report, file_name="deep_research.md", mime="text/markdown")

 import os
 import streamlit as st
+import requests
+import feedparser
+import datetime
+import base64
+import tempfile
 from dotenv import load_dotenv
+from duckduckgo_search import DDGS
+from fuzzywuzzy import fuzz
 load_dotenv()
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+# --- LLM Call ---
+def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
+    url = "https://openrouter.ai/api/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        "Content-Type": "application/json",
+        "X-Title": "Autonomous Research Assistant"
+    }
+    data = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": max_tokens,
+        "temperature": temperature
+    }
+    response = requests.post(url, headers=headers, json=data)
+    result = response.json()
+    if response.status_code != 200:
+        raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
+    return result["choices"][0]["message"]["content"]
+# --- Search Helpers ---
+def get_arxiv_papers(query, max_results=3):
+    from urllib.parse import quote_plus
+    url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
+    feed = feedparser.parse(url)
+    return [{
+        "title": e.title or "Untitled",
+        "summary": (e.summary or "No summary available").replace("\n", " ").strip(),
+        "url": next((l.href for l in e.links if l.type == "application/pdf"), "")
+    } for e in feed.entries]
+def get_semantic_scholar_papers(query, max_results=3):
+    url = "https://api.semanticscholar.org/graph/v1/paper/search"
+    params = {"query": query, "limit": max_results, "fields": "title,abstract,url"}
+    response = requests.get(url, params=params)
+    papers = response.json().get("data", [])
+    return [{
+        "title": p.get("title") or "Untitled",
+        "summary": (p.get("abstract") or "No abstract available").strip(),
+        "url": p.get("url", "")
+    } for p in papers]
+def search_duckduckgo(query, max_results=3):
+    with DDGS() as ddgs:
+        return [{
+            "title": r["title"] or "Untitled",
+            "snippet": r["body"] or "",
+            "url": r["href"] or ""
+        } for r in ddgs.text(query, max_results=max_results)]
+def get_image_urls(query, max_images=3):
+    with DDGS() as ddgs:
+        return [img["image"] for img in ddgs.images(query, max_results=max_images)]
+def generate_apa_citation(title, url, source=""):
+    year = datetime.datetime.now().year
+    if source == "arxiv":
+        return f"{title}. ({year}). *arXiv*. {url}"
+    elif source == "semantic":
+        return f"{title}. ({year}). *Semantic Scholar*. {url}"
+    elif source == "web":
+        return f"{title}. ({year}). *Web Source*. {url}"
+    return f"{title}. ({year}). {url}"
+# --- Main Agent ---
+def autonomous_research_agent(topic):
+    arxiv = get_arxiv_papers(topic)
+    scholar = get_semantic_scholar_papers(topic)
+    web = search_duckduckgo(topic)
+    images = get_image_urls(topic)
+    def to_md_and_citations(papers, source):
+        md, citations = "", []
+        for p in papers:
+            md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
+            citations.append(generate_apa_citation(p['title'], p['url'], source))
+        return md, citations
+    arxiv_md, arxiv_cite = to_md_and_citations(arxiv, "arxiv")
+    scholar_md, scholar_cite = to_md_and_citations(scholar, "semantic")
+    web_md, web_cite = to_md_and_citations(web, "web")
+    prompt = f"""
+# Research Topic: {topic}
+## ArXiv:
+{arxiv_md}
+## Semantic Scholar:
+{scholar_md}
+## Web Insights:
+{web_md}
+Now synthesize this information into:
+1. A research gap
+2. A novel research direction
+3. A full markdown-formatted research article (continuous, no section labels, academic tone)
+"""
+    response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)
+    # Append sources and citations
+    response += "\n\n---\n### Sources Cited\n"
+    if arxiv_md:
+        response += "**ArXiv:**\n" + arxiv_md
+    if scholar_md:
+        response += "**Semantic Scholar:**\n" + scholar_md
+    if web_md:
+        response += "**Web:**\n" + web_md
+    all_citations = arxiv_cite + scholar_cite + web_cite
+    response += "\n---\n### 📚 APA Citations\n" + "\n".join(f"- {c}" for c in all_citations)
+    return response, images
+# --- Export Helper ---
+def export_file(content, export_format):
+    filename_base = f"research_output_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
+    if export_format == "Markdown":
+        return content, f"{filename_base}.md"
+    elif export_format == "LaTeX":
+        tex = f"\\documentclass{{article}}\n\\begin{{document}}\n{content}\n\\end{{document}}"
+        return tex, f"{filename_base}.tex"
+    elif export_format == "PDF":
+        try:
+            from fpdf import FPDF
+        except ImportError:
+            st.error("Install fpdf with: `pip install fpdf`")
+            return None, None
+        pdf = FPDF()
+        pdf.add_page()
+        pdf.set_auto_page_break(auto=True, margin=15)
+        pdf.set_font("Arial", size=12)
+        for line in content.split('\n'):
+            pdf.multi_cell(0, 10, line)
+        path = tempfile.mktemp(suffix=".pdf")
+        pdf.output(path)
+        with open(path, "rb") as f:
+            return f.read(), f"{filename_base}.pdf"
+    return None, None
+# --- Streamlit UI ---
+st.set_page_config("Autonomous Research Assistant", layout="wide")
+st.title("🤖 Autonomous AI Research Assistant")
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+topic = st.text_input("Enter a research topic:")
+if st.button("Run Research Agent") and topic:
+    with st.spinner("Gathering sources & thinking..."):
+        try:
+            response, images = autonomous_research_agent(topic)
+            st.session_state.chat_history.append({"role": "user", "content": topic})
+            st.session_state.chat_history.append({"role": "assistant", "content": response})
+            if images:
+                st.subheader("🖼️ Relevant Images")
+                st.image(images, width=300)
+            st.markdown(response)
+            # Export options (only show after generation)
+            export_format = st.selectbox("📤 Export Format", ["Markdown", "LaTeX", "PDF"])
+            if st.button("Download Export"):
+                try:
+                    file_data, filename = export_file(response, export_format)
+                    if file_data:
+                        if isinstance(file_data, str):
+                            b64 = base64.b64encode(file_data.encode()).decode()
+                        else:
+                            b64 = base64.b64encode(file_data).decode()
+                        href = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">📥 Download {filename}</a>'
+                        st.markdown(href, unsafe_allow_html=True)
+                except Exception as e:
+                    st.error(f"Export failed: {e}")
+        except Exception as e:
+            st.error(f"Research failed: {e}")
+# --- Follow-up Chat ---
+st.divider()
+st.subheader("💬 Follow-up Q&A")
+followup = st.text_input("Ask a follow-up question:")
+if st.button("Ask") and followup:
+    try:
+        chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
+        answer = call_llm(chat, max_tokens=1500)
+        st.session_state.chat_history.append({"role": "user", "content": followup})
+        st.session_state.chat_history.append({"role": "assistant", "content": answer})
+        st.markdown(answer)
+    except Exception as e:
+        st.error(f"Follow-up error: {e}")