Spaces:
Running
Running
Update rss_feed.py
Browse files- modules/rss_feed.py +23 -2
modules/rss_feed.py
CHANGED
@@ -7,6 +7,10 @@ import requests
|
|
7 |
from datetime import datetime, timedelta
|
8 |
from urllib.parse import urlparse
|
9 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
|
|
|
|
|
10 |
|
11 |
logger = logging.getLogger("misinformation_detector")
|
12 |
|
@@ -335,8 +339,8 @@ def retrieve_evidence_from_rss(claim, max_results=10, category_feeds=None):
|
|
335 |
other_feeds = [feed for feed in feeds_to_use if feed not in fact_check_feeds]
|
336 |
|
337 |
# Take all fact-checking feeds plus a random selection of others
|
338 |
-
import random
|
339 |
selected_feeds = fact_check_feeds + random.sample(other_feeds, min(max(0, 10 - len(fact_check_feeds)), len(other_feeds)))
|
|
|
340 |
else:
|
341 |
selected_feeds = feeds_to_use
|
342 |
|
@@ -403,8 +407,25 @@ def retrieve_evidence_from_rss(claim, max_results=10, category_feeds=None):
|
|
403 |
logger.info(f"Retrieved {len(top_entries)} relevant RSS items from {len(feeds)} feeds in {time.time() - start_time:.2f}s")
|
404 |
|
405 |
# Return just the text portion
|
406 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
|
408 |
except Exception as e:
|
409 |
logger.error(f"Error in RSS retrieval: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
410 |
return []
|
|
|
7 |
from datetime import datetime, timedelta
|
8 |
from urllib.parse import urlparse
|
9 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
10 |
+
import random
|
11 |
+
# Import the performance tracker
|
12 |
+
from utils.performance import PerformanceTracker
|
13 |
+
performance_tracker = PerformanceTracker()
|
14 |
|
15 |
logger = logging.getLogger("misinformation_detector")
|
16 |
|
|
|
339 |
other_feeds = [feed for feed in feeds_to_use if feed not in fact_check_feeds]
|
340 |
|
341 |
# Take all fact-checking feeds plus a random selection of others
|
|
|
342 |
selected_feeds = fact_check_feeds + random.sample(other_feeds, min(max(0, 10 - len(fact_check_feeds)), len(other_feeds)))
|
343 |
+
|
344 |
else:
|
345 |
selected_feeds = feeds_to_use
|
346 |
|
|
|
407 |
logger.info(f"Retrieved {len(top_entries)} relevant RSS items from {len(feeds)} feeds in {time.time() - start_time:.2f}s")
|
408 |
|
409 |
# Return just the text portion
|
410 |
+
rss_results = [entry["text"] for entry in top_entries]
|
411 |
+
|
412 |
+
# Log evidence retrieval performance
|
413 |
+
success = bool(rss_results)
|
414 |
+
source_count = {"rss": len(rss_results)}
|
415 |
+
try:
|
416 |
+
performance_tracker.log_evidence_retrieval(success, source_count)
|
417 |
+
except Exception as e:
|
418 |
+
logger.error(f"Error logging RSS evidence retrieval: {e}")
|
419 |
+
|
420 |
+
return rss_results
|
421 |
|
422 |
except Exception as e:
|
423 |
logger.error(f"Error in RSS retrieval: {str(e)}")
|
424 |
+
|
425 |
+
# Log failed evidence retrieval
|
426 |
+
try:
|
427 |
+
performance_tracker.log_evidence_retrieval(False, {"rss": 0})
|
428 |
+
except Exception as log_error:
|
429 |
+
logger.error(f"Error logging failed RSS evidence retrieval: {log_error}")
|
430 |
+
|
431 |
return []
|