ankanghosh commited on
Commit
b89c41e
·
verified ·
1 Parent(s): 28eb311

Update evidence_retrieval.py

Browse files
Files changed (1) hide show
  1. modules/evidence_retrieval.py +38 -13
modules/evidence_retrieval.py CHANGED
@@ -8,11 +8,9 @@ combining evidence to support fact-checking operations.
8
 
9
  import logging
10
  import time
11
- import re
12
- import random
13
  import requests
14
- import json
15
  import ssl
 
16
  from urllib.parse import urlencode
17
  from bs4 import BeautifulSoup
18
  from SPARQLWrapper import SPARQLWrapper, JSON
@@ -24,6 +22,7 @@ from utils.models import get_nlp_model
24
  from modules.claim_extraction import shorten_claim_for_evidence
25
  from modules.rss_feed import retrieve_evidence_from_rss
26
  from config import NEWS_API_KEY, FACTCHECK_API_KEY
 
27
  # Import the performance tracker
28
  from utils.performance import PerformanceTracker
29
  performance_tracker = PerformanceTracker()
@@ -342,11 +341,7 @@ def retrieve_evidence_from_wikidata(claim):
342
  sparql.addCustomHttpHeader("User-Agent", "MisinformationDetectionResearchBot/1.0")
343
 
344
  # Fix SSL issues by disabling SSL verification for this specific request
345
- try:
346
- # Create a context where we don't verify SSL certs
347
- import ssl
348
- import urllib.request
349
-
350
  # Create a context that doesn't verify certificates
351
  ssl_context = ssl._create_unverified_context()
352
 
@@ -401,10 +396,26 @@ def retrieve_evidence_from_wikidata(claim):
401
  wikidata_evidence.append(evidence_text)
402
 
403
  logger.info(f"Retrieved {len(wikidata_evidence)} Wikidata entities")
 
 
 
 
 
 
 
 
 
404
  return wikidata_evidence
405
 
406
  except Exception as e:
407
  logger.error(f"Error retrieving from Wikidata: {str(e)}")
 
 
 
 
 
 
 
408
  return []
409
 
410
  @api_error_handler("openalex")
@@ -478,10 +489,26 @@ def retrieve_evidence_from_openalex(claim):
478
  logger.error(f"Unexpected error in OpenAlex request: {str(e)}")
479
 
480
  logger.info(f"Retrieved {len(scholarly_evidence)} scholarly papers from OpenAlex")
 
 
 
 
 
 
 
 
 
481
  return scholarly_evidence
482
 
483
  except Exception as e:
484
  logger.error(f"Fatal error in OpenAlex retrieval: {str(e)}")
 
 
 
 
 
 
 
485
  return []
486
 
487
  @api_error_handler("factcheck")
@@ -702,8 +729,9 @@ def retrieve_news_articles(claim, requires_recent=False):
702
  news_texts = [item["text"] for item in news_results]
703
 
704
  # Log evidence retrieval
 
 
705
  try:
706
- success = bool(news_texts)
707
  performance_tracker.log_evidence_retrieval(success, source_count)
708
  except Exception as log_error:
709
  logger.error(f"Error logging evidence retrieval: {log_error}")
@@ -736,12 +764,9 @@ def retrieve_combined_evidence(claim):
736
  logger.info(f"Starting evidence retrieval for: {claim}")
737
  start_time = time.time()
738
 
739
- # Use the category detector to identify the claim category
740
- from modules.category_detection import get_category_specific_rss_feeds, get_fallback_category, detect_claim_category
741
-
742
  # Extract key claim components for relevance matching
743
  claim_components = extract_claim_components(claim)
744
- logger.info(f"Extracted claim components: entities={claim_components['entities']}, verbs={claim_components['verbs']}")
745
 
746
  # Determine if claim has temporal attributes
747
  requires_recent_evidence = bool(claim_components.get("temporal_words", []))
 
8
 
9
  import logging
10
  import time
 
 
11
  import requests
 
12
  import ssl
13
+ import urllib.request
14
  from urllib.parse import urlencode
15
  from bs4 import BeautifulSoup
16
  from SPARQLWrapper import SPARQLWrapper, JSON
 
22
  from modules.claim_extraction import shorten_claim_for_evidence
23
  from modules.rss_feed import retrieve_evidence_from_rss
24
  from config import NEWS_API_KEY, FACTCHECK_API_KEY
25
+ from modules.category_detection import get_category_specific_rss_feeds, get_fallback_category, detect_claim_category
26
  # Import the performance tracker
27
  from utils.performance import PerformanceTracker
28
  performance_tracker = PerformanceTracker()
 
341
  sparql.addCustomHttpHeader("User-Agent", "MisinformationDetectionResearchBot/1.0")
342
 
343
  # Fix SSL issues by disabling SSL verification for this specific request
344
+ try:
 
 
 
 
345
  # Create a context that doesn't verify certificates
346
  ssl_context = ssl._create_unverified_context()
347
 
 
396
  wikidata_evidence.append(evidence_text)
397
 
398
  logger.info(f"Retrieved {len(wikidata_evidence)} Wikidata entities")
399
+
400
+ # Log evidence retrieval performance
401
+ success = bool(wikidata_evidence)
402
+ source_count = {"wikidata": len(wikidata_evidence)}
403
+ try:
404
+ performance_tracker.log_evidence_retrieval(success, source_count)
405
+ except Exception as e:
406
+ logger.error(f"Error logging Wikidata evidence retrieval: {e}")
407
+
408
  return wikidata_evidence
409
 
410
  except Exception as e:
411
  logger.error(f"Error retrieving from Wikidata: {str(e)}")
412
+
413
+ # Log failed evidence retrieval
414
+ try:
415
+ performance_tracker.log_evidence_retrieval(False, {"wikidata": 0})
416
+ except Exception as log_error:
417
+ logger.error(f"Error logging failed Wikidata evidence retrieval: {log_error}")
418
+
419
  return []
420
 
421
  @api_error_handler("openalex")
 
489
  logger.error(f"Unexpected error in OpenAlex request: {str(e)}")
490
 
491
  logger.info(f"Retrieved {len(scholarly_evidence)} scholarly papers from OpenAlex")
492
+
493
+ # Log evidence retrieval performance
494
+ success = bool(scholarly_evidence)
495
+ source_count = {"openalex": len(scholarly_evidence)}
496
+ try:
497
+ performance_tracker.log_evidence_retrieval(success, source_count)
498
+ except Exception as e:
499
+ logger.error(f"Error logging OpenAlex evidence retrieval: {e}")
500
+
501
  return scholarly_evidence
502
 
503
  except Exception as e:
504
  logger.error(f"Fatal error in OpenAlex retrieval: {str(e)}")
505
+
506
+ # Log failed evidence retrieval
507
+ try:
508
+ performance_tracker.log_evidence_retrieval(False, {"openalex": 0})
509
+ except Exception as log_error:
510
+ logger.error(f"Error logging failed OpenAlex evidence retrieval: {log_error}")
511
+
512
  return []
513
 
514
  @api_error_handler("factcheck")
 
729
  news_texts = [item["text"] for item in news_results]
730
 
731
  # Log evidence retrieval
732
+ success = bool(news_texts)
733
+ source_count = {"news": len(news_texts)}
734
  try:
 
735
  performance_tracker.log_evidence_retrieval(success, source_count)
736
  except Exception as log_error:
737
  logger.error(f"Error logging evidence retrieval: {log_error}")
 
764
  logger.info(f"Starting evidence retrieval for: {claim}")
765
  start_time = time.time()
766
 
 
 
 
767
  # Extract key claim components for relevance matching
768
  claim_components = extract_claim_components(claim)
769
+ logger.info(f"Extracted claim components: entities={claim_components.get('entities', [])}, verbs={claim_components.get('verbs', [])}")
770
 
771
  # Determine if claim has temporal attributes
772
  requires_recent_evidence = bool(claim_components.get("temporal_words", []))