Spaces:
Running
Running
Update evidence_retrieval.py
Browse files- modules/evidence_retrieval.py +38 -13
modules/evidence_retrieval.py
CHANGED
@@ -8,11 +8,9 @@ combining evidence to support fact-checking operations.
|
|
8 |
|
9 |
import logging
|
10 |
import time
|
11 |
-
import re
|
12 |
-
import random
|
13 |
import requests
|
14 |
-
import json
|
15 |
import ssl
|
|
|
16 |
from urllib.parse import urlencode
|
17 |
from bs4 import BeautifulSoup
|
18 |
from SPARQLWrapper import SPARQLWrapper, JSON
|
@@ -24,6 +22,7 @@ from utils.models import get_nlp_model
|
|
24 |
from modules.claim_extraction import shorten_claim_for_evidence
|
25 |
from modules.rss_feed import retrieve_evidence_from_rss
|
26 |
from config import NEWS_API_KEY, FACTCHECK_API_KEY
|
|
|
27 |
# Import the performance tracker
|
28 |
from utils.performance import PerformanceTracker
|
29 |
performance_tracker = PerformanceTracker()
|
@@ -342,11 +341,7 @@ def retrieve_evidence_from_wikidata(claim):
|
|
342 |
sparql.addCustomHttpHeader("User-Agent", "MisinformationDetectionResearchBot/1.0")
|
343 |
|
344 |
# Fix SSL issues by disabling SSL verification for this specific request
|
345 |
-
try:
|
346 |
-
# Create a context where we don't verify SSL certs
|
347 |
-
import ssl
|
348 |
-
import urllib.request
|
349 |
-
|
350 |
# Create a context that doesn't verify certificates
|
351 |
ssl_context = ssl._create_unverified_context()
|
352 |
|
@@ -401,10 +396,26 @@ def retrieve_evidence_from_wikidata(claim):
|
|
401 |
wikidata_evidence.append(evidence_text)
|
402 |
|
403 |
logger.info(f"Retrieved {len(wikidata_evidence)} Wikidata entities")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
return wikidata_evidence
|
405 |
|
406 |
except Exception as e:
|
407 |
logger.error(f"Error retrieving from Wikidata: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
return []
|
409 |
|
410 |
@api_error_handler("openalex")
|
@@ -478,10 +489,26 @@ def retrieve_evidence_from_openalex(claim):
|
|
478 |
logger.error(f"Unexpected error in OpenAlex request: {str(e)}")
|
479 |
|
480 |
logger.info(f"Retrieved {len(scholarly_evidence)} scholarly papers from OpenAlex")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
return scholarly_evidence
|
482 |
|
483 |
except Exception as e:
|
484 |
logger.error(f"Fatal error in OpenAlex retrieval: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
return []
|
486 |
|
487 |
@api_error_handler("factcheck")
|
@@ -702,8 +729,9 @@ def retrieve_news_articles(claim, requires_recent=False):
|
|
702 |
news_texts = [item["text"] for item in news_results]
|
703 |
|
704 |
# Log evidence retrieval
|
|
|
|
|
705 |
try:
|
706 |
-
success = bool(news_texts)
|
707 |
performance_tracker.log_evidence_retrieval(success, source_count)
|
708 |
except Exception as log_error:
|
709 |
logger.error(f"Error logging evidence retrieval: {log_error}")
|
@@ -736,12 +764,9 @@ def retrieve_combined_evidence(claim):
|
|
736 |
logger.info(f"Starting evidence retrieval for: {claim}")
|
737 |
start_time = time.time()
|
738 |
|
739 |
-
# Use the category detector to identify the claim category
|
740 |
-
from modules.category_detection import get_category_specific_rss_feeds, get_fallback_category, detect_claim_category
|
741 |
-
|
742 |
# Extract key claim components for relevance matching
|
743 |
claim_components = extract_claim_components(claim)
|
744 |
-
logger.info(f"Extracted claim components: entities={claim_components
|
745 |
|
746 |
# Determine if claim has temporal attributes
|
747 |
requires_recent_evidence = bool(claim_components.get("temporal_words", []))
|
|
|
8 |
|
9 |
import logging
|
10 |
import time
|
|
|
|
|
11 |
import requests
|
|
|
12 |
import ssl
|
13 |
+
import urllib.request
|
14 |
from urllib.parse import urlencode
|
15 |
from bs4 import BeautifulSoup
|
16 |
from SPARQLWrapper import SPARQLWrapper, JSON
|
|
|
22 |
from modules.claim_extraction import shorten_claim_for_evidence
|
23 |
from modules.rss_feed import retrieve_evidence_from_rss
|
24 |
from config import NEWS_API_KEY, FACTCHECK_API_KEY
|
25 |
+
from modules.category_detection import get_category_specific_rss_feeds, get_fallback_category, detect_claim_category
|
26 |
# Import the performance tracker
|
27 |
from utils.performance import PerformanceTracker
|
28 |
performance_tracker = PerformanceTracker()
|
|
|
341 |
sparql.addCustomHttpHeader("User-Agent", "MisinformationDetectionResearchBot/1.0")
|
342 |
|
343 |
# Fix SSL issues by disabling SSL verification for this specific request
|
344 |
+
try:
|
|
|
|
|
|
|
|
|
345 |
# Create a context that doesn't verify certificates
|
346 |
ssl_context = ssl._create_unverified_context()
|
347 |
|
|
|
396 |
wikidata_evidence.append(evidence_text)
|
397 |
|
398 |
logger.info(f"Retrieved {len(wikidata_evidence)} Wikidata entities")
|
399 |
+
|
400 |
+
# Log evidence retrieval performance
|
401 |
+
success = bool(wikidata_evidence)
|
402 |
+
source_count = {"wikidata": len(wikidata_evidence)}
|
403 |
+
try:
|
404 |
+
performance_tracker.log_evidence_retrieval(success, source_count)
|
405 |
+
except Exception as e:
|
406 |
+
logger.error(f"Error logging Wikidata evidence retrieval: {e}")
|
407 |
+
|
408 |
return wikidata_evidence
|
409 |
|
410 |
except Exception as e:
|
411 |
logger.error(f"Error retrieving from Wikidata: {str(e)}")
|
412 |
+
|
413 |
+
# Log failed evidence retrieval
|
414 |
+
try:
|
415 |
+
performance_tracker.log_evidence_retrieval(False, {"wikidata": 0})
|
416 |
+
except Exception as log_error:
|
417 |
+
logger.error(f"Error logging failed Wikidata evidence retrieval: {log_error}")
|
418 |
+
|
419 |
return []
|
420 |
|
421 |
@api_error_handler("openalex")
|
|
|
489 |
logger.error(f"Unexpected error in OpenAlex request: {str(e)}")
|
490 |
|
491 |
logger.info(f"Retrieved {len(scholarly_evidence)} scholarly papers from OpenAlex")
|
492 |
+
|
493 |
+
# Log evidence retrieval performance
|
494 |
+
success = bool(scholarly_evidence)
|
495 |
+
source_count = {"openalex": len(scholarly_evidence)}
|
496 |
+
try:
|
497 |
+
performance_tracker.log_evidence_retrieval(success, source_count)
|
498 |
+
except Exception as e:
|
499 |
+
logger.error(f"Error logging OpenAlex evidence retrieval: {e}")
|
500 |
+
|
501 |
return scholarly_evidence
|
502 |
|
503 |
except Exception as e:
|
504 |
logger.error(f"Fatal error in OpenAlex retrieval: {str(e)}")
|
505 |
+
|
506 |
+
# Log failed evidence retrieval
|
507 |
+
try:
|
508 |
+
performance_tracker.log_evidence_retrieval(False, {"openalex": 0})
|
509 |
+
except Exception as log_error:
|
510 |
+
logger.error(f"Error logging failed OpenAlex evidence retrieval: {log_error}")
|
511 |
+
|
512 |
return []
|
513 |
|
514 |
@api_error_handler("factcheck")
|
|
|
729 |
news_texts = [item["text"] for item in news_results]
|
730 |
|
731 |
# Log evidence retrieval
|
732 |
+
success = bool(news_texts)
|
733 |
+
source_count = {"news": len(news_texts)}
|
734 |
try:
|
|
|
735 |
performance_tracker.log_evidence_retrieval(success, source_count)
|
736 |
except Exception as log_error:
|
737 |
logger.error(f"Error logging evidence retrieval: {log_error}")
|
|
|
764 |
logger.info(f"Starting evidence retrieval for: {claim}")
|
765 |
start_time = time.time()
|
766 |
|
|
|
|
|
|
|
767 |
# Extract key claim components for relevance matching
|
768 |
claim_components = extract_claim_components(claim)
|
769 |
+
logger.info(f"Extracted claim components: entities={claim_components.get('entities', [])}, verbs={claim_components.get('verbs', [])}")
|
770 |
|
771 |
# Determine if claim has temporal attributes
|
772 |
requires_recent_evidence = bool(claim_components.get("temporal_words", []))
|