Ani14 commited on
Commit
a77e234
Β·
verified Β·
1 Parent(s): 113325b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -72
app.py CHANGED
@@ -1,78 +1,206 @@
1
  import os
2
  import streamlit as st
3
- import asyncio
4
- import nest_asyncio
5
- from gpt_researcher import GPTResearcher
 
 
6
  from dotenv import load_dotenv
 
 
7
 
8
- # Enable async for Streamlit
9
- nest_asyncio.apply()
10
  load_dotenv()
 
11
 
12
- # Set your Tavily API key
13
- os.environ["TAVILY_API_KEY"] = "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"
14
-
15
- # App UI setup
16
- st.set_page_config(page_title="🧠 Super Deep Research Agent", layout="wide")
17
- st.title("πŸ“š GPT-Powered Super Deep Research Assistant")
18
-
19
- # Sidebar UI
20
- with st.sidebar:
21
- st.header("πŸ” Research Setup")
22
- query = st.text_input("πŸ“Œ Research Topic", "Is AI a threat to creative jobs?")
23
- report_type = st.selectbox("πŸ“„ Report Type", ["research_report", "summary", "detailed_report"])
24
- tone = st.selectbox("πŸ—£οΈ Tone", ["objective", "persuasive", "informative"])
25
- source_type = st.selectbox("🌐 Source Scope", ["web", "arxiv", "semantic-scholar", "hybrid"])
26
- output_format = st.selectbox("πŸ“ Output Format", ["markdown", "text"])
27
- start = st.button("πŸš€ Start Research")
28
-
29
- # Async agent runner
30
- async def run_research(query, report_type, source, tone, fmt):
31
- agent = GPTResearcher(
32
- query=query,
33
- report_type=report_type,
34
- report_source=source,
35
- report_format=fmt,
36
- tone=tone
37
- )
38
- await agent.conduct_research()
39
- report = await agent.write_report()
40
- context = agent.get_research_context()
41
- sources = agent.get_research_sources()
42
- images = agent.get_research_images()
43
- return report, context, sources, images
44
-
45
- # Run on click
46
- if start and query:
47
- st.info("⏳ Running research agent...")
48
-
49
- # Spinner with placeholder log
50
- with st.spinner("Thinking..."):
51
- # Optional: log collector using mutable container (if future logging is needed)
52
- logs = []
53
-
54
- # Run agent
55
- report, context, sources, images = asyncio.run(
56
- run_research(query, report_type, source_type, tone, output_format)
57
- )
58
-
59
- st.success("βœ… Research Completed!")
60
-
61
- # Display report
62
- st.subheader("πŸ“„ Final Report")
63
- st.markdown(report, unsafe_allow_html=True)
64
-
65
- # Display sources
66
- if sources:
67
- st.subheader("πŸ“š Sources")
68
- for s in sources:
69
- st.markdown(f"- [{s.get('title', 'Untitled')}]({s.get('url', '#')})")
70
-
71
- # Display images
72
- if images:
73
- st.subheader("πŸ–ΌοΈ Relevant Images")
74
- for img in images:
75
- st.image(img, use_column_width=True)
76
-
77
- # Download report
78
- st.download_button("πŸ’Ύ Download Markdown", report, file_name="deep_research.md", mime="text/markdown")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import streamlit as st
3
+ import requests
4
+ import feedparser
5
+ import datetime
6
+ import base64
7
+ import tempfile
8
  from dotenv import load_dotenv
9
+ from duckduckgo_search import DDGS
10
+ from fuzzywuzzy import fuzz
11
 
 
 
12
  load_dotenv()
13
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
14
 
15
+ # --- LLM Call ---
16
+ def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
17
+ url = "https://openrouter.ai/api/v1/chat/completions"
18
+ headers = {
19
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
20
+ "Content-Type": "application/json",
21
+ "X-Title": "Autonomous Research Assistant"
22
+ }
23
+ data = {
24
+ "model": model,
25
+ "messages": messages,
26
+ "max_tokens": max_tokens,
27
+ "temperature": temperature
28
+ }
29
+ response = requests.post(url, headers=headers, json=data)
30
+ result = response.json()
31
+ if response.status_code != 200:
32
+ raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
33
+ return result["choices"][0]["message"]["content"]
34
+
35
+ # --- Search Helpers ---
36
+ def get_arxiv_papers(query, max_results=3):
37
+ from urllib.parse import quote_plus
38
+ url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
39
+ feed = feedparser.parse(url)
40
+ return [{
41
+ "title": e.title or "Untitled",
42
+ "summary": (e.summary or "No summary available").replace("\n", " ").strip(),
43
+ "url": next((l.href for l in e.links if l.type == "application/pdf"), "")
44
+ } for e in feed.entries]
45
+
46
+ def get_semantic_scholar_papers(query, max_results=3):
47
+ url = "https://api.semanticscholar.org/graph/v1/paper/search"
48
+ params = {"query": query, "limit": max_results, "fields": "title,abstract,url"}
49
+ response = requests.get(url, params=params)
50
+ papers = response.json().get("data", [])
51
+ return [{
52
+ "title": p.get("title") or "Untitled",
53
+ "summary": (p.get("abstract") or "No abstract available").strip(),
54
+ "url": p.get("url", "")
55
+ } for p in papers]
56
+
57
+ def search_duckduckgo(query, max_results=3):
58
+ with DDGS() as ddgs:
59
+ return [{
60
+ "title": r["title"] or "Untitled",
61
+ "snippet": r["body"] or "",
62
+ "url": r["href"] or ""
63
+ } for r in ddgs.text(query, max_results=max_results)]
64
+
65
+ def get_image_urls(query, max_images=3):
66
+ with DDGS() as ddgs:
67
+ return [img["image"] for img in ddgs.images(query, max_results=max_images)]
68
+
69
+ def generate_apa_citation(title, url, source=""):
70
+ year = datetime.datetime.now().year
71
+ if source == "arxiv":
72
+ return f"{title}. ({year}). *arXiv*. {url}"
73
+ elif source == "semantic":
74
+ return f"{title}. ({year}). *Semantic Scholar*. {url}"
75
+ elif source == "web":
76
+ return f"{title}. ({year}). *Web Source*. {url}"
77
+ return f"{title}. ({year}). {url}"
78
+
79
+ # --- Main Agent ---
80
+ def autonomous_research_agent(topic):
81
+ arxiv = get_arxiv_papers(topic)
82
+ scholar = get_semantic_scholar_papers(topic)
83
+ web = search_duckduckgo(topic)
84
+ images = get_image_urls(topic)
85
+
86
+ def to_md_and_citations(papers, source):
87
+ md, citations = "", []
88
+ for p in papers:
89
+ md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
90
+ citations.append(generate_apa_citation(p['title'], p['url'], source))
91
+ return md, citations
92
+
93
+ arxiv_md, arxiv_cite = to_md_and_citations(arxiv, "arxiv")
94
+ scholar_md, scholar_cite = to_md_and_citations(scholar, "semantic")
95
+ web_md, web_cite = to_md_and_citations(web, "web")
96
+
97
+ prompt = f"""
98
+ # Research Topic: {topic}
99
+
100
+ ## ArXiv:
101
+ {arxiv_md}
102
+
103
+ ## Semantic Scholar:
104
+ {scholar_md}
105
+
106
+ ## Web Insights:
107
+ {web_md}
108
+
109
+ Now synthesize this information into:
110
+ 1. A research gap
111
+ 2. A novel research direction
112
+ 3. A full markdown-formatted research article (continuous, no section labels, academic tone)
113
+ """
114
+ response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)
115
+
116
+ # Append sources and citations
117
+ response += "\n\n---\n### Sources Cited\n"
118
+ if arxiv_md:
119
+ response += "**ArXiv:**\n" + arxiv_md
120
+ if scholar_md:
121
+ response += "**Semantic Scholar:**\n" + scholar_md
122
+ if web_md:
123
+ response += "**Web:**\n" + web_md
124
+
125
+ all_citations = arxiv_cite + scholar_cite + web_cite
126
+ response += "\n---\n### πŸ“š APA Citations\n" + "\n".join(f"- {c}" for c in all_citations)
127
+
128
+ return response, images
129
+
130
+ # --- Export Helper ---
131
+ def export_file(content, export_format):
132
+ filename_base = f"research_output_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
133
+ if export_format == "Markdown":
134
+ return content, f"{filename_base}.md"
135
+ elif export_format == "LaTeX":
136
+ tex = f"\\documentclass{{article}}\n\\begin{{document}}\n{content}\n\\end{{document}}"
137
+ return tex, f"{filename_base}.tex"
138
+ elif export_format == "PDF":
139
+ try:
140
+ from fpdf import FPDF
141
+ except ImportError:
142
+ st.error("Install fpdf with: `pip install fpdf`")
143
+ return None, None
144
+ pdf = FPDF()
145
+ pdf.add_page()
146
+ pdf.set_auto_page_break(auto=True, margin=15)
147
+ pdf.set_font("Arial", size=12)
148
+ for line in content.split('\n'):
149
+ pdf.multi_cell(0, 10, line)
150
+ path = tempfile.mktemp(suffix=".pdf")
151
+ pdf.output(path)
152
+ with open(path, "rb") as f:
153
+ return f.read(), f"{filename_base}.pdf"
154
+ return None, None
155
+
156
+ # --- Streamlit UI ---
157
+ st.set_page_config("Autonomous Research Assistant", layout="wide")
158
+ st.title("πŸ€– Autonomous AI Research Assistant")
159
+
160
+ if "chat_history" not in st.session_state:
161
+ st.session_state.chat_history = []
162
+
163
+ topic = st.text_input("Enter a research topic:")
164
+ if st.button("Run Research Agent") and topic:
165
+ with st.spinner("Gathering sources & thinking..."):
166
+ try:
167
+ response, images = autonomous_research_agent(topic)
168
+ st.session_state.chat_history.append({"role": "user", "content": topic})
169
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
170
+
171
+ if images:
172
+ st.subheader("πŸ–ΌοΈ Relevant Images")
173
+ st.image(images, width=300)
174
+
175
+ st.markdown(response)
176
+
177
+ # Export options (only show after generation)
178
+ export_format = st.selectbox("πŸ“€ Export Format", ["Markdown", "LaTeX", "PDF"])
179
+ if st.button("Download Export"):
180
+ try:
181
+ file_data, filename = export_file(response, export_format)
182
+ if file_data:
183
+ if isinstance(file_data, str):
184
+ b64 = base64.b64encode(file_data.encode()).decode()
185
+ else:
186
+ b64 = base64.b64encode(file_data).decode()
187
+ href = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">πŸ“₯ Download {filename}</a>'
188
+ st.markdown(href, unsafe_allow_html=True)
189
+ except Exception as e:
190
+ st.error(f"Export failed: {e}")
191
+ except Exception as e:
192
+ st.error(f"Research failed: {e}")
193
+
194
+ # --- Follow-up Chat ---
195
+ st.divider()
196
+ st.subheader("πŸ’¬ Follow-up Q&A")
197
+ followup = st.text_input("Ask a follow-up question:")
198
+ if st.button("Ask") and followup:
199
+ try:
200
+ chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
201
+ answer = call_llm(chat, max_tokens=1500)
202
+ st.session_state.chat_history.append({"role": "user", "content": followup})
203
+ st.session_state.chat_history.append({"role": "assistant", "content": answer})
204
+ st.markdown(answer)
205
+ except Exception as e:
206
+ st.error(f"Follow-up error: {e}")