Ani14 commited on
Commit
3e7ca50
Β·
verified Β·
1 Parent(s): 7d54951

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -144
app.py CHANGED
@@ -1,149 +1,36 @@
1
 
2
- import os
3
  import streamlit as st
4
- import requests
5
- import datetime
6
- import openai
7
- import feedparser
8
- from dotenv import load_dotenv
9
- from tavily import TavilyClient
10
- from PyPDF2 import PdfReader
11
- import faiss
12
- import numpy as np
13
 
14
- # --- Load API Keys ---
15
- load_dotenv()
16
- openai.api_key = os.getenv("OPENAI_API_KEY")
17
- TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "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")
18
- tavily = TavilyClient(api_key=TAVILY_API_KEY)
19
 
20
- # --- Streamlit Config ---
21
- st.set_page_config(page_title="GPT Researcher Agent", layout="wide")
22
- st.title("πŸ“š GPT-Powered Research Assistant")
23
-
24
- # --- Helper: APA Citation ---
25
- def generate_apa_citation(title, url, source):
26
- year = datetime.datetime.now().year
27
- label = {
28
- "arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web*"
29
- }.get(source, "*Web*")
30
- return f"{title}. ({year}). {label}. {url}"
31
-
32
- # --- Search Tools ---
33
- def tavily_search(query):
34
- results = tavily.search(query, search_depth="advanced", max_results=5)
35
- return results.get("results", [])
36
-
37
- def arxiv_search(query):
38
- from urllib.parse import quote_plus
39
- url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=3"
40
- feed = feedparser.parse(url)
41
- return [{
42
- "title": e.title,
43
- "summary": e.summary.replace("\n", " ").strip(),
44
- "url": next((l.href for l in e.links if l.type == "application/pdf"), "")
45
- } for e in feed.entries]
46
-
47
- # --- Document Embedding ---
48
- def embed_document(file):
49
- doc_text = ""
50
- if file.name.endswith(".pdf"):
51
- reader = PdfReader(file)
52
- for page in reader.pages:
53
- text = page.extract_text()
54
- if text:
55
- doc_text += text
56
- else:
57
- doc_text = file.read().decode("utf-8")
58
-
59
- chunks = [doc_text[i:i+1000] for i in range(0, len(doc_text), 1000)]
60
- embeddings = openai.Embedding.create(input=chunks, model="text-embedding-ada-002")
61
- vectors = [np.array(rec["embedding"], dtype=np.float32) for rec in embeddings["data"]]
62
-
63
- dim = len(vectors[0])
64
- index = faiss.IndexFlatL2(dim)
65
- index.add(np.vstack(vectors))
66
-
67
- return chunks, index
68
-
69
- # --- Streaming GPT Call ---
70
- def stream_response(messages):
71
- response = openai.ChatCompletion.create(
72
- model="gpt-4",
73
- messages=messages,
74
- max_tokens=3000,
75
- stream=True
76
- )
77
- collected = ""
78
- placeholder = st.empty()
79
- for chunk in response:
80
- delta = chunk["choices"][0].get("delta", {})
81
- if "content" in delta:
82
- token = delta["content"]
83
- collected += token
84
- placeholder.markdown(collected + "β–Œ")
85
- placeholder.markdown(collected)
86
- return collected
87
-
88
- # --- Sidebar Input ---
89
  with st.sidebar:
90
- topic = st.text_input("πŸ” Research Topic", "AI in Sustainable Agriculture")
91
- report_type = st.selectbox("πŸ“„ Report Type", ["Summary", "Detailed", "Academic Paper"])
92
- tone = st.selectbox("🎯 Tone", ["Objective", "Scientific", "Persuasive"])
93
- sources = st.selectbox("🌐 Sources", ["Web", "Documents", "Both"])
94
- uploaded_file = st.file_uploader("πŸ“Ž Upload Document (PDF/TXT)", type=["pdf", "txt"])
95
- start_button = st.button("πŸš€ Run Research")
96
-
97
- # --- Main Agent Execution ---
98
- if start_button and topic:
99
- st.subheader("🧠 Agent Log")
100
- with st.container():
101
- st.markdown("<div style='max-height:300px; overflow-y:auto; background:#222; padding:10px; border-radius:10px;'>", unsafe_allow_html=True)
102
- st.markdown("🧭 Starting research task...")
103
- st.markdown(f"πŸ”Ž Topic: **{topic}** | Tone: _{tone}_ | Type: _{report_type}_")
104
- st.markdown("</div>", unsafe_allow_html=True)
105
-
106
- citations = []
107
- context = ""
108
-
109
- if sources in ["Web", "Both"]:
110
- st.info("🌐 Searching web sources via Tavily...")
111
- web_results = tavily_search(topic)
112
- for r in web_results:
113
- context += f"{r.get('content','')}
114
- "
115
- citations.append(generate_apa_citation(r.get("title", "Untitled"), r.get("url", "#"), "web"))
116
-
117
- if sources in ["Documents", "Both"] and uploaded_file:
118
- st.info("πŸ“„ Embedding and retrieving from uploaded document...")
119
- chunks, index = embed_document(uploaded_file)
120
- q_embed = openai.Embedding.create(input=[topic], model="text-embedding-ada-002")
121
- q_vector = np.array(q_embed["data"][0]["embedding"], dtype=np.float32).reshape(1, -1)
122
- D, I = index.search(q_vector, k=3)
123
- for idx in I[0]:
124
- context += chunks[idx] + "
125
- "
126
- citations.append(generate_apa_citation(uploaded_file.name, "Uploaded", "local"))
127
-
128
- st.info("✍️ Generating final research report...")
129
- messages = [
130
- {"role": "system", "content": f"You are a research assistant. Write a {report_type.lower()} in a {tone.lower()} tone, citing sources."},
131
- {"role": "user", "content": f"Topic: {topic}
132
-
133
- Context:
134
- {context}
135
-
136
- Write a complete report in academic markdown format."}
137
- ]
138
-
139
- final_output = stream_response(messages)
140
-
141
- # --- Show Output and Citations ---
142
- st.subheader("πŸ“„ Final Report")
143
- st.markdown(final_output, unsafe_allow_html=True)
144
-
145
- st.subheader("πŸ“š References")
146
- for cite in citations:
147
- st.markdown(f"- {cite}")
148
-
149
- st.download_button("πŸ’Ύ Download Markdown", final_output, file_name="report.md", mime="text/markdown")
 
1
 
 
2
  import streamlit as st
3
+ from gpt_researcher.agent import GPTResearcher
 
 
 
 
 
 
 
 
4
 
5
+ st.set_page_config(page_title="GPT Researcher UI", layout="wide")
6
+ st.title("πŸ€– GPT Researcher β€” Streamlit UI")
 
 
 
7
 
8
+ # --- Sidebar inputs ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  with st.sidebar:
10
+ st.header("🧠 Research Configuration")
11
+ topic = st.text_input("πŸ’‘ Research Topic", "AI in climate change")
12
+ report_type = st.selectbox("πŸ“„ Report Type", ["summary", "detailed", "academic"])
13
+ report_format = st.selectbox("πŸ“œ Format", ["markdown", "text"])
14
+ report_source = st.selectbox("🌐 Sources", ["web", "arxiv", "semantic-scholar", "hybrid"])
15
+ tone = st.selectbox("🎯 Tone", ["objective", "persuasive", "informative"])
16
+ start = st.button("πŸš€ Start Research")
17
+
18
+ # --- Run GPTResearcher ---
19
+ if start and topic:
20
+ st.markdown("### ⏳ Running Autonomous Research Agent...")
21
+ with st.spinner("Gathering knowledge, synthesizing insights..."):
22
+ agent = GPTResearcher(
23
+ query=topic,
24
+ report_type=report_type,
25
+ report_format=report_format,
26
+ report_source=report_source,
27
+ tone=tone
28
+ )
29
+ output = agent.run()
30
+
31
+ st.success("βœ… Research Complete!")
32
+
33
+ st.markdown("### πŸ“„ Final Report")
34
+ st.markdown(output, unsafe_allow_html=True)
35
+
36
+ st.download_button("πŸ’Ύ Download Markdown", output, file_name="report.md", mime="text/markdown")