Ani14 commited on
Commit
afadb32
·
verified ·
1 Parent(s): 952c032

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -10
app.py CHANGED
@@ -21,7 +21,7 @@ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
21
  tavily = TavilyClient(api_key=TAVILY_API_KEY)
22
 
23
  # --- Helper Functions ---
24
- def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3500, temperature=0.7):
25
  url = "https://openrouter.ai/api/v1/chat/completions"
26
  headers = {
27
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
@@ -86,9 +86,10 @@ def get_sources(topic, domains=None):
86
  "url": r["url"],
87
  "snippet": r.get("content", ""),
88
  "image_url": image_url,
89
- "source": "web"
 
90
  })
91
-
92
  return results
93
 
94
  def get_arxiv_papers(query):
@@ -99,24 +100,32 @@ def get_arxiv_papers(query):
99
  "title": e.title,
100
  "summary": e.summary.replace("\n", " ").strip(),
101
  "url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
102
- "source": "arxiv"
 
103
  } for e in feed.entries]
104
 
105
  def get_semantic_papers(query):
106
  try:
107
  url = "https://api.semanticscholar.org/graph/v1/paper/search"
108
- params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
109
  response = requests.get(url, params=params)
110
  papers = response.json().get("data", [])
111
  return [{
112
  "title": p.get("title"),
113
  "summary": p.get("abstract", "No abstract available"),
114
  "url": p.get("url"),
115
- "source": "semantic"
 
116
  } for p in papers]
117
  except:
118
  return []
119
 
 
 
 
 
 
 
120
  def check_plagiarism(text, topic):
121
  hits = []
122
  for r in get_sources(topic):
@@ -125,8 +134,8 @@ def check_plagiarism(text, topic):
125
  hits.append(r)
126
  return hits
127
 
128
- def generate_apa_citation(title, url, source):
129
- year = datetime.datetime.now().year
130
  label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
131
  return f"{title}. ({year}). {label}. {url}"
132
 
@@ -187,6 +196,9 @@ def generate_download_button(file, label, mime_type):
187
  </a>
188
  """
189
 
 
 
 
190
  # --- Streamlit UI ---
191
  st.set_page_config("Deep Research Assistant", layout="centered")
192
 
@@ -237,6 +249,7 @@ if research_button and topic:
237
  raise ValueError("❌ No sources found.")
238
 
239
  merged = merge_duplicates(all_sources)
 
240
 
241
  # 🔹 Image previews
242
  st.subheader("🖼 Source Previews")
@@ -251,12 +264,13 @@ if research_button and topic:
251
  st.info("ℹ️ No image previews available.")
252
 
253
  # 🔹 Generate report
254
- citations = [generate_apa_citation(m['title'], m['url'], m['source']) for m in merged]
255
  combined_text = "\n\n".join([
256
- f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}..."
257
  for m in merged
258
  ])
259
 
 
260
  prompt = f"""
261
  You are an expert research assistant.
262
 
 
21
  tavily = TavilyClient(api_key=TAVILY_API_KEY)
22
 
23
  # --- Helper Functions ---
24
+ def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=20000, temperature=0.7):
25
  url = "https://openrouter.ai/api/v1/chat/completions"
26
  headers = {
27
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
 
86
  "url": r["url"],
87
  "snippet": r.get("content", ""),
88
  "image_url": image_url,
89
+ "source": "web",
90
+ "year": extract_year_from_text(r.get("content", ""))
91
  })
92
+
93
  return results
94
 
95
  def get_arxiv_papers(query):
 
100
  "title": e.title,
101
  "summary": e.summary.replace("\n", " ").strip(),
102
  "url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
103
+ "source": "arxiv",
104
+ "year": int(e.published[:4]) if 'published' in e else 9999
105
  } for e in feed.entries]
106
 
107
  def get_semantic_papers(query):
108
  try:
109
  url = "https://api.semanticscholar.org/graph/v1/paper/search"
110
+ params = {"query": query, "limit": 5, "fields": "title,abstract,url,year"}
111
  response = requests.get(url, params=params)
112
  papers = response.json().get("data", [])
113
  return [{
114
  "title": p.get("title"),
115
  "summary": p.get("abstract", "No abstract available"),
116
  "url": p.get("url"),
117
+ "source": "semantic",
118
+ "year": p.get("year", 9999)
119
  } for p in papers]
120
  except:
121
  return []
122
 
123
+ def extract_year_from_text(text):
124
+ import re
125
+ years = re.findall(r"\b(19|20)\d{2}\b", text)
126
+ return int(years[0]) if years else 9999
127
+
128
+
129
  def check_plagiarism(text, topic):
130
  hits = []
131
  for r in get_sources(topic):
 
134
  hits.append(r)
135
  return hits
136
 
137
+ def generate_apa_citation(title, url, source, year=None):
138
+ year = year or datetime.datetime.now().year
139
  label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
140
  return f"{title}. ({year}). {label}. {url}"
141
 
 
196
  </a>
197
  """
198
 
199
+ def sort_sources_chronologically(sources):
200
+ return sorted(sources, key=lambda s: s.get("year", 9999))
201
+
202
  # --- Streamlit UI ---
203
  st.set_page_config("Deep Research Assistant", layout="centered")
204
 
 
249
  raise ValueError("❌ No sources found.")
250
 
251
  merged = merge_duplicates(all_sources)
252
+ merged = sort_sources_chronologically(merged)
253
 
254
  # 🔹 Image previews
255
  st.subheader("🖼 Source Previews")
 
264
  st.info("ℹ️ No image previews available.")
265
 
266
  # 🔹 Generate report
267
+ citations = [generate_apa_citation(m['title'], m['url'], m['source'], m.get('year')) for m in merged]
268
  combined_text = "\n\n".join([
269
+ f"- [{m['title']}]({m['url']}) ({m.get('year', 'n.d.')})\n> {m.get('snippet', m.get('summary', ''))[:300]}..."
270
  for m in merged
271
  ])
272
 
273
+
274
  prompt = f"""
275
  You are an expert research assistant.
276