Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -159,6 +159,26 @@ def merge_duplicates(entries):
|
|
159 |
return unique
|
160 |
|
161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
class PDF(FPDF):
|
163 |
def __init__(self):
|
164 |
super().__init__()
|
@@ -240,24 +260,24 @@ if research_button and topic:
|
|
240 |
with st.status("π Gathering and analyzing sources...") as status:
|
241 |
def fetch_all_sources():
|
242 |
sources = []
|
|
|
|
|
243 |
if source_type in ["Web Only", "Hybrid"]:
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
if source_type in ["Academic Only", "Hybrid"]:
|
246 |
sources += get_arxiv_papers(topic)
|
247 |
sources += get_semantic_papers(topic)
|
|
|
248 |
return sources
|
249 |
|
250 |
-
all_sources, retries = [], 0
|
251 |
-
while retries < 3:
|
252 |
-
all_sources = fetch_all_sources()
|
253 |
-
if all_sources:
|
254 |
-
break
|
255 |
-
retries += 1
|
256 |
-
time.sleep(2)
|
257 |
-
|
258 |
-
if not all_sources:
|
259 |
-
raise ValueError("β No sources found.")
|
260 |
-
|
261 |
merged = merge_duplicates(all_sources)
|
262 |
merged = sort_sources_chronologically(merged)
|
263 |
|
|
|
159 |
return unique
|
160 |
|
161 |
|
162 |
+
def load_web_url_content(url):
|
163 |
+
try:
|
164 |
+
response = tavily.search(query=url, search_depth="advanced", max_results=1)
|
165 |
+
if response["results"]:
|
166 |
+
result = response["results"][0]
|
167 |
+
return {
|
168 |
+
"title": result.get("title", "Untitled Web Page"),
|
169 |
+
"url": result.get("url", url),
|
170 |
+
"snippet": result.get("content", ""),
|
171 |
+
"image_url": result.get("image_url"),
|
172 |
+
"source": "web",
|
173 |
+
"year": extract_year_from_text(result.get("content", ""))
|
174 |
+
}
|
175 |
+
return None
|
176 |
+
except Exception as e:
|
177 |
+
print(f"Error loading URL: {url} β {e}")
|
178 |
+
return None
|
179 |
+
|
180 |
+
|
181 |
+
|
182 |
class PDF(FPDF):
|
183 |
def __init__(self):
|
184 |
super().__init__()
|
|
|
260 |
with st.status("π Gathering and analyzing sources...") as status:
|
261 |
def fetch_all_sources():
|
262 |
sources = []
|
263 |
+
|
264 |
+
# Handle Web/Hybrid sources
|
265 |
if source_type in ["Web Only", "Hybrid"]:
|
266 |
+
# Check if custom_domains is a full URL
|
267 |
+
if custom_domains.strip().startswith("http"):
|
268 |
+
custom_source = load_web_url_content(custom_domains.strip())
|
269 |
+
if custom_source:
|
270 |
+
sources.append(custom_source)
|
271 |
+
else:
|
272 |
+
sources += get_sources(topic, custom_domains)
|
273 |
+
|
274 |
+
# Handle Academic sources
|
275 |
if source_type in ["Academic Only", "Hybrid"]:
|
276 |
sources += get_arxiv_papers(topic)
|
277 |
sources += get_semantic_papers(topic)
|
278 |
+
|
279 |
return sources
|
280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
merged = merge_duplicates(all_sources)
|
282 |
merged = sort_sources_chronologically(merged)
|
283 |
|