Ani14 commited on
Commit
65c3858
·
verified ·
1 Parent(s): a77e234

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -78
app.py CHANGED
@@ -3,16 +3,14 @@ import streamlit as st
3
  import requests
4
  import feedparser
5
  import datetime
6
- import base64
7
- import tempfile
8
  from dotenv import load_dotenv
9
  from duckduckgo_search import DDGS
10
- from fuzzywuzzy import fuzz
11
 
12
  load_dotenv()
13
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
14
 
15
- # --- LLM Call ---
16
  def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
17
  url = "https://openrouter.ai/api/v1/chat/completions"
18
  headers = {
@@ -26,13 +24,34 @@ def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2
26
  "max_tokens": max_tokens,
27
  "temperature": temperature
28
  }
29
- response = requests.post(url, headers=headers, json=data)
30
- result = response.json()
 
 
 
31
  if response.status_code != 200:
32
  raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
 
 
33
  return result["choices"][0]["message"]["content"]
34
 
35
- # --- Search Helpers ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def get_arxiv_papers(query, max_results=3):
37
  from urllib.parse import quote_plus
38
  url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
@@ -67,32 +86,37 @@ def get_image_urls(query, max_images=3):
67
  return [img["image"] for img in ddgs.images(query, max_results=max_images)]
68
 
69
  def generate_apa_citation(title, url, source=""):
70
- year = datetime.datetime.now().year
71
  if source == "arxiv":
72
- return f"{title}. ({year}). *arXiv*. {url}"
73
  elif source == "semantic":
74
- return f"{title}. ({year}). *Semantic Scholar*. {url}"
75
  elif source == "web":
76
- return f"{title}. ({year}). *Web Source*. {url}"
77
- return f"{title}. ({year}). {url}"
 
78
 
79
- # --- Main Agent ---
80
  def autonomous_research_agent(topic):
81
  arxiv = get_arxiv_papers(topic)
82
  scholar = get_semantic_scholar_papers(topic)
83
  web = search_duckduckgo(topic)
84
  images = get_image_urls(topic)
85
 
86
- def to_md_and_citations(papers, source):
87
- md, citations = "", []
88
- for p in papers:
89
- md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
90
- citations.append(generate_apa_citation(p['title'], p['url'], source))
91
- return md, citations
92
 
93
- arxiv_md, arxiv_cite = to_md_and_citations(arxiv, "arxiv")
94
- scholar_md, scholar_cite = to_md_and_citations(scholar, "semantic")
95
- web_md, web_cite = to_md_and_citations(web, "web")
 
 
 
 
 
 
96
 
97
  prompt = f"""
98
  # Research Topic: {topic}
@@ -113,7 +137,7 @@ Now synthesize this information into:
113
  """
114
  response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)
115
 
116
- # Append sources and citations
117
  response += "\n\n---\n### Sources Cited\n"
118
  if arxiv_md:
119
  response += "**ArXiv:**\n" + arxiv_md
@@ -122,37 +146,14 @@ Now synthesize this information into:
122
  if web_md:
123
  response += "**Web:**\n" + web_md
124
 
125
- all_citations = arxiv_cite + scholar_cite + web_cite
126
- response += "\n---\n### 📚 APA Citations\n" + "\n".join(f"- {c}" for c in all_citations)
 
 
 
127
 
128
  return response, images
129
 
130
- # --- Export Helper ---
131
- def export_file(content, export_format):
132
- filename_base = f"research_output_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
133
- if export_format == "Markdown":
134
- return content, f"{filename_base}.md"
135
- elif export_format == "LaTeX":
136
- tex = f"\\documentclass{{article}}\n\\begin{{document}}\n{content}\n\\end{{document}}"
137
- return tex, f"{filename_base}.tex"
138
- elif export_format == "PDF":
139
- try:
140
- from fpdf import FPDF
141
- except ImportError:
142
- st.error("Install fpdf with: `pip install fpdf`")
143
- return None, None
144
- pdf = FPDF()
145
- pdf.add_page()
146
- pdf.set_auto_page_break(auto=True, margin=15)
147
- pdf.set_font("Arial", size=12)
148
- for line in content.split('\n'):
149
- pdf.multi_cell(0, 10, line)
150
- path = tempfile.mktemp(suffix=".pdf")
151
- pdf.output(path)
152
- with open(path, "rb") as f:
153
- return f.read(), f"{filename_base}.pdf"
154
- return None, None
155
-
156
  # --- Streamlit UI ---
157
  st.set_page_config("Autonomous Research Assistant", layout="wide")
158
  st.title("🤖 Autonomous AI Research Assistant")
@@ -161,46 +162,45 @@ if "chat_history" not in st.session_state:
161
  st.session_state.chat_history = []
162
 
163
  topic = st.text_input("Enter a research topic:")
164
- if st.button("Run Research Agent") and topic:
165
  with st.spinner("Gathering sources & thinking..."):
166
  try:
167
  response, images = autonomous_research_agent(topic)
168
- st.session_state.chat_history.append({"role": "user", "content": topic})
169
- st.session_state.chat_history.append({"role": "assistant", "content": response})
170
 
 
171
  if images:
172
  st.subheader("🖼️ Relevant Images")
173
  st.image(images, width=300)
174
 
 
 
 
175
  st.markdown(response)
176
 
177
- # Export options (only show after generation)
178
- export_format = st.selectbox("📤 Export Format", ["Markdown", "LaTeX", "PDF"])
179
- if st.button("Download Export"):
180
- try:
181
- file_data, filename = export_file(response, export_format)
182
- if file_data:
183
- if isinstance(file_data, str):
184
- b64 = base64.b64encode(file_data.encode()).decode()
185
- else:
186
- b64 = base64.b64encode(file_data).decode()
187
- href = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">📥 Download {filename}</a>'
188
- st.markdown(href, unsafe_allow_html=True)
189
- except Exception as e:
190
- st.error(f"Export failed: {e}")
191
  except Exception as e:
192
- st.error(f"Research failed: {e}")
193
 
194
  # --- Follow-up Chat ---
195
  st.divider()
196
  st.subheader("💬 Follow-up Q&A")
197
  followup = st.text_input("Ask a follow-up question:")
198
- if st.button("Ask") and followup:
199
- try:
200
- chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
201
- answer = call_llm(chat, max_tokens=1500)
202
- st.session_state.chat_history.append({"role": "user", "content": followup})
203
- st.session_state.chat_history.append({"role": "assistant", "content": answer})
204
- st.markdown(answer)
205
- except Exception as e:
206
- st.error(f"Follow-up error: {e}")
 
 
3
  import requests
4
  import feedparser
5
  import datetime
6
+ from fuzzywuzzy import fuzz
 
7
  from dotenv import load_dotenv
8
  from duckduckgo_search import DDGS
 
9
 
10
  load_dotenv()
11
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
12
 
13
+ # --- Call OpenRouter LLM ---
14
  def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
15
  url = "https://openrouter.ai/api/v1/chat/completions"
16
  headers = {
 
24
  "max_tokens": max_tokens,
25
  "temperature": temperature
26
  }
27
+ try:
28
+ response = requests.post(url, headers=headers, json=data)
29
+ result = response.json()
30
+ except Exception as e:
31
+ raise RuntimeError(f"Failed to connect or parse response: {e}")
32
  if response.status_code != 200:
33
  raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
34
+ if "choices" not in result:
35
+ raise RuntimeError(f"Invalid response: {result}")
36
  return result["choices"][0]["message"]["content"]
37
 
38
+ # --- Plagiarism Check ---
39
+ def check_plagiarism(text, query, threshold=70):
40
+ web_results = search_duckduckgo(query, max_results=5)
41
+ plagiarized_snippets = []
42
+ for result in web_results:
43
+ snippet = result.get("snippet", "")
44
+ similarity = fuzz.token_set_ratio(text, snippet)
45
+ if similarity >= threshold:
46
+ plagiarized_snippets.append({
47
+ "title": result["title"],
48
+ "url": result["url"],
49
+ "snippet": snippet,
50
+ "similarity": similarity
51
+ })
52
+ return plagiarized_snippets
53
+
54
+ # --- Source Utilities ---
55
  def get_arxiv_papers(query, max_results=3):
56
  from urllib.parse import quote_plus
57
  url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
 
86
  return [img["image"] for img in ddgs.images(query, max_results=max_images)]
87
 
88
  def generate_apa_citation(title, url, source=""):
89
+ current_year = datetime.datetime.now().year
90
  if source == "arxiv":
91
+ return f"{title}. ({current_year}). *arXiv*. {url}"
92
  elif source == "semantic":
93
+ return f"{title}. ({current_year}). *Semantic Scholar*. {url}"
94
  elif source == "web":
95
+ return f"{title}. ({current_year}). *Web Source*. {url}"
96
+ else:
97
+ return f"{title}. ({current_year}). {url}"
98
 
99
+ # --- Research Agent ---
100
  def autonomous_research_agent(topic):
101
  arxiv = get_arxiv_papers(topic)
102
  scholar = get_semantic_scholar_papers(topic)
103
  web = search_duckduckgo(topic)
104
  images = get_image_urls(topic)
105
 
106
+ arxiv_md, arxiv_citations = "", []
107
+ for p in arxiv:
108
+ arxiv_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
109
+ arxiv_citations.append(generate_apa_citation(p["title"], p["url"], source="arxiv"))
 
 
110
 
111
+ scholar_md, scholar_citations = "", []
112
+ for p in scholar:
113
+ scholar_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
114
+ scholar_citations.append(generate_apa_citation(p["title"], p["url"], source="semantic"))
115
+
116
+ web_md, web_citations = "", []
117
+ for w in web:
118
+ web_md += f"- [{w['title']}]({w['url']})\n> {w['snippet']}\n\n"
119
+ web_citations.append(generate_apa_citation(w["title"], w["url"], source="web"))
120
 
121
  prompt = f"""
122
  # Research Topic: {topic}
 
137
  """
138
  response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)
139
 
140
+ # Append Sources
141
  response += "\n\n---\n### Sources Cited\n"
142
  if arxiv_md:
143
  response += "**ArXiv:**\n" + arxiv_md
 
146
  if web_md:
147
  response += "**Web:**\n" + web_md
148
 
149
+ # APA Citations Section
150
+ all_citations = arxiv_citations + scholar_citations + web_citations
151
+ response += "\n---\n### 📚 APA Citations\n"
152
+ for cite in all_citations:
153
+ response += f"- {cite}\n"
154
 
155
  return response, images
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  # --- Streamlit UI ---
158
  st.set_page_config("Autonomous Research Assistant", layout="wide")
159
  st.title("🤖 Autonomous AI Research Assistant")
 
162
  st.session_state.chat_history = []
163
 
164
  topic = st.text_input("Enter a research topic:")
165
+ if st.button("Run Research Agent"):
166
  with st.spinner("Gathering sources & thinking..."):
167
  try:
168
  response, images = autonomous_research_agent(topic)
 
 
169
 
170
+ # Display images
171
  if images:
172
  st.subheader("🖼️ Relevant Images")
173
  st.image(images, width=300)
174
 
175
+ # Display markdown response
176
+ st.session_state.chat_history.append({"role": "user", "content": topic})
177
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
178
  st.markdown(response)
179
 
180
+ # Check for plagiarism
181
+ plagiarism_hits = check_plagiarism(response, topic)
182
+ if plagiarism_hits:
183
+ st.warning("⚠️ Potential overlap with existing web content detected.")
184
+ st.subheader("🕵️ Plagiarism Check Results")
185
+ for hit in plagiarism_hits:
186
+ st.markdown(f"**{hit['title']}** - [{hit['url']}]({hit['url']})")
187
+ st.markdown(f"> _Similarity: {hit['similarity']}%_\n\n{hit['snippet']}")
188
+ else:
189
+ st.success("✅ No significant overlaps found. Content appears original.")
 
 
 
 
190
  except Exception as e:
191
+ st.error(f"Failed: {e}")
192
 
193
  # --- Follow-up Chat ---
194
  st.divider()
195
  st.subheader("💬 Follow-up Q&A")
196
  followup = st.text_input("Ask a follow-up question:")
197
+ if st.button("Ask"):
198
+ if followup:
199
+ try:
200
+ chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
201
+ answer = call_llm(chat, max_tokens=1500)
202
+ st.session_state.chat_history.append({"role": "user", "content": followup})
203
+ st.session_state.chat_history.append({"role": "assistant", "content": answer})
204
+ st.markdown(answer)
205
+ except Exception as e:
206
+ st.error(f"Follow-up error: {e}")