Ani14 commited on
Commit
eb35130
Β·
verified Β·
1 Parent(s): 4dfa219

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -12
app.py CHANGED
@@ -159,6 +159,26 @@ def merge_duplicates(entries):
159
  return unique
160
 
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  class PDF(FPDF):
163
  def __init__(self):
164
  super().__init__()
@@ -240,24 +260,24 @@ if research_button and topic:
240
  with st.status("πŸ” Gathering and analyzing sources...") as status:
241
  def fetch_all_sources():
242
  sources = []
 
 
243
  if source_type in ["Web Only", "Hybrid"]:
244
- sources += get_sources(topic, custom_domains)
 
 
 
 
 
 
 
 
245
  if source_type in ["Academic Only", "Hybrid"]:
246
  sources += get_arxiv_papers(topic)
247
  sources += get_semantic_papers(topic)
 
248
  return sources
249
 
250
- all_sources, retries = [], 0
251
- while retries < 3:
252
- all_sources = fetch_all_sources()
253
- if all_sources:
254
- break
255
- retries += 1
256
- time.sleep(2)
257
-
258
- if not all_sources:
259
- raise ValueError("❌ No sources found.")
260
-
261
  merged = merge_duplicates(all_sources)
262
  merged = sort_sources_chronologically(merged)
263
 
 
159
  return unique
160
 
161
 
162
+ def load_web_url_content(url):
163
+ try:
164
+ response = tavily.search(query=url, search_depth="advanced", max_results=1)
165
+ if response["results"]:
166
+ result = response["results"][0]
167
+ return {
168
+ "title": result.get("title", "Untitled Web Page"),
169
+ "url": result.get("url", url),
170
+ "snippet": result.get("content", ""),
171
+ "image_url": result.get("image_url"),
172
+ "source": "web",
173
+ "year": extract_year_from_text(result.get("content", ""))
174
+ }
175
+ return None
176
+ except Exception as e:
177
+ print(f"Error loading URL: {url} β€” {e}")
178
+ return None
179
+
180
+
181
+
182
  class PDF(FPDF):
183
  def __init__(self):
184
  super().__init__()
 
260
  with st.status("πŸ” Gathering and analyzing sources...") as status:
261
  def fetch_all_sources():
262
  sources = []
263
+
264
+ # Handle Web/Hybrid sources
265
  if source_type in ["Web Only", "Hybrid"]:
266
+ # Check if custom_domains is a full URL
267
+ if custom_domains.strip().startswith("http"):
268
+ custom_source = load_web_url_content(custom_domains.strip())
269
+ if custom_source:
270
+ sources.append(custom_source)
271
+ else:
272
+ sources += get_sources(topic, custom_domains)
273
+
274
+ # Handle Academic sources
275
  if source_type in ["Academic Only", "Hybrid"]:
276
  sources += get_arxiv_papers(topic)
277
  sources += get_semantic_papers(topic)
278
+
279
  return sources
280
 
 
 
 
 
 
 
 
 
 
 
 
281
  merged = merge_duplicates(all_sources)
282
  merged = sort_sources_chronologically(merged)
283