Spaces:

Ani14
/

AutoReasearcher

Running

App Files Files Community

Ani14 commited on Apr 23

Commit

eb35130

verified ·

1 Parent(s): 4dfa219

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -12

app.py CHANGED Viewed

@@ -159,6 +159,26 @@ def merge_duplicates(entries):
     return unique
 class PDF(FPDF):
     def __init__(self):
         super().__init__()
@@ -240,24 +260,24 @@ if research_button and topic:
         with st.status("🔍 Gathering and analyzing sources...") as status:
             def fetch_all_sources():
                 sources = []
                 if source_type in ["Web Only", "Hybrid"]:
-                    sources += get_sources(topic, custom_domains)
                 if source_type in ["Academic Only", "Hybrid"]:
                     sources += get_arxiv_papers(topic)
                     sources += get_semantic_papers(topic)
                 return sources
-            all_sources, retries = [], 0
-            while retries < 3:
-                all_sources = fetch_all_sources()
-                if all_sources:
-                    break
-                retries += 1
-                time.sleep(2)
-            if not all_sources:
-                raise ValueError("❌ No sources found.")
             merged = merge_duplicates(all_sources)
             merged = sort_sources_chronologically(merged)

     return unique
+def load_web_url_content(url):
+    try:
+        response = tavily.search(query=url, search_depth="advanced", max_results=1)
+        if response["results"]:
+            result = response["results"][0]
+            return {
+                "title": result.get("title", "Untitled Web Page"),
+                "url": result.get("url", url),
+                "snippet": result.get("content", ""),
+                "image_url": result.get("image_url"),
+                "source": "web",
+                "year": extract_year_from_text(result.get("content", ""))
+            }
+        return None
+    except Exception as e:
+        print(f"Error loading URL: {url} — {e}")
+        return None
 class PDF(FPDF):
     def __init__(self):
         super().__init__()
         with st.status("🔍 Gathering and analyzing sources...") as status:
             def fetch_all_sources():
                 sources = []
+                # Handle Web/Hybrid sources
                 if source_type in ["Web Only", "Hybrid"]:
+                    # Check if custom_domains is a full URL
+                    if custom_domains.strip().startswith("http"):
+                        custom_source = load_web_url_content(custom_domains.strip())
+                        if custom_source:
+                            sources.append(custom_source)
+                    else:
+                        sources += get_sources(topic, custom_domains)
+                # Handle Academic sources
                 if source_type in ["Academic Only", "Hybrid"]:
                     sources += get_arxiv_papers(topic)
                     sources += get_semantic_papers(topic)
                 return sources
             merged = merge_duplicates(all_sources)
             merged = sort_sources_chronologically(merged)