wozwize commited on
Commit
5c3b4a6
·
1 Parent(s): 884bb19

updating backend to implement either AI or traditional scoring values and return flagged phrases. updating table calls for supabase to incorporate new column

Browse files
app/routers/analyze.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import APIRouter, HTTPException
2
  from pydantic import BaseModel, HttpUrl
3
- from typing import Dict, Any, List
4
  import logging
5
  import os
6
  from supabase import AsyncClient
@@ -20,7 +20,6 @@ logger = logging.getLogger(__name__)
20
  # Initialize router and dependencies
21
  router = APIRouter(tags=["analysis"])
22
  scraper = ArticleScraper()
23
- scorer = MediaScorer()
24
 
25
  # Get Supabase credentials
26
  SUPABASE_URL = os.getenv("SUPABASE_URL")
@@ -32,8 +31,12 @@ if not SUPABASE_URL or not SUPABASE_KEY:
32
 
33
  supabase = AsyncClient(SUPABASE_URL, SUPABASE_KEY)
34
 
 
 
 
35
  class ArticleRequest(BaseModel):
36
  url: HttpUrl
 
37
 
38
  class MediaScoreDetails(BaseModel):
39
  headline_analysis: Dict[str, Any]
@@ -54,6 +57,7 @@ class AnalysisResponse(BaseModel):
54
  bias_score: float
55
  bias_percentage: float
56
  media_score: MediaScore
 
57
 
58
  @router.post("/analyze", response_model=AnalysisResponse)
59
  async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
@@ -61,7 +65,7 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
61
  Analyze an article for bias, sentiment, and credibility.
62
 
63
  Args:
64
- request: ArticleRequest containing the URL to analyze
65
 
66
  Returns:
67
  AnalysisResponse with complete analysis results
@@ -70,16 +74,27 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
70
  HTTPException: If scraping or analysis fails
71
  """
72
  try:
73
- logger.info(f"Analyzing article: {request.url}")
74
-
75
- # Check if the article has already been analyzed
76
- existing_article = await supabase.table('article_analysis').select('*').eq('url', str(request.url)).execute()
77
 
78
- if existing_article.data and len(existing_article.data) > 0:
79
- logger.info("Article already analyzed. Returning cached data.")
80
- # Return the existing analysis result if it exists
81
- cached_data = existing_article.data[0]
82
- return AnalysisResponse.parse_obj(cached_data)
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # Scrape article
85
  article = scraper.scrape_article(str(request.url))
@@ -89,6 +104,9 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
89
  detail="Failed to scrape article content"
90
  )
91
 
 
 
 
92
  # Analyze content
93
  analysis = scorer.calculate_media_score(
94
  article["headline"],
@@ -108,6 +126,7 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
108
  "bias": str(analysis['details']['bias_analysis']['bias']),
109
  "bias_score": float(analysis['details']['bias_analysis']['bias_score']),
110
  "bias_percentage": float(analysis['details']['bias_analysis']['bias_percentage']),
 
111
  "media_score": {
112
  "media_unmasked_score": float(analysis['media_unmasked_score']),
113
  "rating": str(analysis['rating']),
@@ -135,17 +154,26 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
135
  }
136
  }
137
 
138
- # Save the new analysis to Supabase
139
- await supabase.table('article_analysis').upsert({
140
- 'url': str(request.url),
141
- 'headline': response_dict['headline'],
142
- 'content': response_dict['content'],
143
- 'sentiment': response_dict['sentiment'],
144
- 'bias': response_dict['bias'],
145
- 'bias_score': response_dict['bias_score'],
146
- 'bias_percentage': response_dict['bias_percentage'],
147
- 'media_score': response_dict['media_score']
148
- }).execute()
 
 
 
 
 
 
 
 
 
149
 
150
  # Return the response
151
  return AnalysisResponse.parse_obj(response_dict)
 
1
  from fastapi import APIRouter, HTTPException
2
  from pydantic import BaseModel, HttpUrl
3
+ from typing import Dict, Any, List, Literal
4
  import logging
5
  import os
6
  from supabase import AsyncClient
 
20
  # Initialize router and dependencies
21
  router = APIRouter(tags=["analysis"])
22
  scraper = ArticleScraper()
 
23
 
24
  # Get Supabase credentials
25
  SUPABASE_URL = os.getenv("SUPABASE_URL")
 
31
 
32
  supabase = AsyncClient(SUPABASE_URL, SUPABASE_KEY)
33
 
34
+ # Define analysis mode type
35
+ AnalysisMode = Literal['ai', 'traditional']
36
+
37
  class ArticleRequest(BaseModel):
38
  url: HttpUrl
39
+ use_ai: bool = True # Default to AI-powered analysis
40
 
41
  class MediaScoreDetails(BaseModel):
42
  headline_analysis: Dict[str, Any]
 
57
  bias_score: float
58
  bias_percentage: float
59
  media_score: MediaScore
60
+ analysis_mode: AnalysisMode
61
 
62
  @router.post("/analyze", response_model=AnalysisResponse)
63
  async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
 
65
  Analyze an article for bias, sentiment, and credibility.
66
 
67
  Args:
68
+ request: ArticleRequest containing the URL to analyze and analysis preferences
69
 
70
  Returns:
71
  AnalysisResponse with complete analysis results
 
74
  HTTPException: If scraping or analysis fails
75
  """
76
  try:
77
+ # Determine analysis mode
78
+ analysis_mode: AnalysisMode = 'ai' if request.use_ai else 'traditional'
79
+ logger.info(f"Analyzing article: {request.url} (Analysis Mode: {analysis_mode})")
 
80
 
81
+ # Check cache with both URL and analysis mode
82
+ try:
83
+ cached_result = await supabase.table('article_analysis') \
84
+ .select('*') \
85
+ .eq('url', str(request.url)) \
86
+ .eq('analysis_mode', analysis_mode) \
87
+ .limit(1) \
88
+ .single() \
89
+ .execute()
90
+
91
+ if cached_result and cached_result.data:
92
+ logger.info(f"Found cached analysis for URL with {analysis_mode} mode")
93
+ return AnalysisResponse.parse_obj(cached_result.data)
94
+
95
+ except Exception as cache_error:
96
+ logger.warning(f"Cache lookup failed: {str(cache_error)}")
97
+ # Continue with analysis if cache lookup fails
98
 
99
  # Scrape article
100
  article = scraper.scrape_article(str(request.url))
 
104
  detail="Failed to scrape article content"
105
  )
106
 
107
+ # Initialize scorer with specified analysis preference
108
+ scorer = MediaScorer(use_ai=request.use_ai)
109
+
110
  # Analyze content
111
  analysis = scorer.calculate_media_score(
112
  article["headline"],
 
126
  "bias": str(analysis['details']['bias_analysis']['bias']),
127
  "bias_score": float(analysis['details']['bias_analysis']['bias_score']),
128
  "bias_percentage": float(analysis['details']['bias_analysis']['bias_percentage']),
129
+ "analysis_mode": analysis_mode,
130
  "media_score": {
131
  "media_unmasked_score": float(analysis['media_unmasked_score']),
132
  "rating": str(analysis['rating']),
 
154
  }
155
  }
156
 
157
+ # Save to Supabase with analysis mode
158
+ try:
159
+ await supabase.table('article_analysis').upsert({
160
+ 'url': str(request.url),
161
+ 'headline': response_dict['headline'],
162
+ 'content': response_dict['content'],
163
+ 'sentiment': response_dict['sentiment'],
164
+ 'bias': response_dict['bias'],
165
+ 'bias_score': response_dict['bias_score'],
166
+ 'bias_percentage': response_dict['bias_percentage'],
167
+ 'media_score': response_dict['media_score'],
168
+ 'analysis_mode': analysis_mode, # Store the analysis mode
169
+ 'created_at': 'now()' # Use server timestamp
170
+ }, on_conflict='url,analysis_mode').execute() # Specify composite unique constraint
171
+
172
+ logger.info(f"Saved analysis to database with mode: {analysis_mode}")
173
+
174
+ except Exception as db_error:
175
+ logger.error(f"Failed to save to database: {str(db_error)}")
176
+ # Continue since we can still return the analysis even if saving fails
177
 
178
  # Return the response
179
  return AnalysisResponse.parse_obj(response_dict)
mediaunmasked/analyzers/bias_analyzer.py CHANGED
@@ -1,14 +1,72 @@
1
  import logging
2
  import os
3
  from typing import Dict, Any, List
 
 
4
 
5
  logger = logging.getLogger(__name__)
6
 
7
  class BiasAnalyzer:
8
- def __init__(self):
 
 
 
 
 
 
 
 
 
 
9
  self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
10
  self.left_keywords = self._load_keywords('left_bias_words.txt')
11
  self.right_keywords = self._load_keywords('right_bias_words.txt')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def _load_keywords(self, filename: str) -> List[str]:
14
  """Load keywords from file."""
@@ -20,63 +78,146 @@ class BiasAnalyzer:
20
  logger.error(f"Error loading {filename}: {str(e)}")
21
  return []
22
 
23
- def analyze(self, text: str) -> Dict[str, Any]:
24
- """Detect bias using keyword analysis."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  try:
26
- text_lower = text.lower()
 
 
 
 
 
27
 
28
- flagged_phrases = []
 
29
 
30
- # Count matches and collect flagged phrases
31
- left_count = sum(1 for word in self.left_keywords if word in text_lower)
32
- flagged_phrases.extend([word for word in self.left_keywords if word in text_lower])
33
- right_count = sum(1 for word in self.right_keywords if word in text_lower)
34
- flagged_phrases.extend([word for word in self.right_keywords if word in text_lower])
35
-
36
- total_words = left_count + right_count
37
- if total_words == 0:
38
- return {
39
- "bias": "Neutral",
40
- "bias_score": 0.0, # True neutral
41
- "bias_percentage": 0, # Neutral percentage
42
- "flagged_phrases": []
43
- }
44
 
45
- # New bias score formula (-1.0 left, 0.0 neutral, 1.0 right)
46
- bias_score = (right_count - left_count) / total_words
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- # Convert bias_score to percentage (-100% to +100%)
49
- bias_percentage = bias_score * 100
50
- logger.info(f"Bias score: {bias_score:.2f}, Bias percentage: {bias_percentage:.1f}%")
51
 
52
  # Determine bias label
53
- if bias_score < -0.8:
54
  bias = "Strongly Left"
55
- elif bias_score < -0.5:
56
  bias = "Moderately Left"
57
- elif bias_score < -0.2:
58
  bias = "Leaning Left"
59
- elif bias_score > 0.8:
60
  bias = "Strongly Right"
61
- elif bias_score > 0.5:
62
  bias = "Moderately Right"
63
- elif bias_score > 0.2:
64
  bias = "Leaning Right"
65
  else:
66
  bias = "Neutral"
67
 
 
 
 
68
  return {
69
  "bias": bias,
70
- "bias_score": round(bias_score, 2), # Keep 2 decimal places
71
- "bias_percentage": abs(round(bias_percentage, 1)),
72
- "flagged_phrases": flagged_phrases
 
 
 
 
 
73
  }
74
 
75
  except Exception as e:
76
- logger.error(f"Error in bias analysis: {str(e)}")
77
- return {
78
- "bias": "Error",
79
- "bias_score": 0.0,
80
- "bias_percentage": 0,
81
- "flagged_phrases": []
82
- }
 
1
  import logging
2
  import os
3
  from typing import Dict, Any, List
4
+ from transformers import pipeline
5
+ import numpy as np
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
  class BiasAnalyzer:
10
+ def __init__(self, use_ai: bool = True):
11
+ """
12
+ Initialize bias analyzer with both LLM and traditional approaches.
13
+
14
+ Args:
15
+ use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
16
+ """
17
+ self.use_ai = use_ai
18
+ self.llm_available = False
19
+
20
+ # Load traditional keywords
21
  self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
22
  self.left_keywords = self._load_keywords('left_bias_words.txt')
23
  self.right_keywords = self._load_keywords('right_bias_words.txt')
24
+
25
+ if use_ai:
26
+ try:
27
+ # Initialize LLM pipeline for zero-shot classification
28
+ self.classifier = pipeline(
29
+ "zero-shot-classification",
30
+ model="facebook/bart-large-mnli",
31
+ device=-1 # Use CPU, change to specific GPU index if available
32
+ )
33
+ self.llm_available = True
34
+ logger.info("LLM pipeline initialized successfully for bias analysis")
35
+ except Exception as e:
36
+ logger.warning(f"Failed to initialize LLM pipeline: {str(e)}")
37
+ self.llm_available = False
38
+ else:
39
+ logger.info("Initializing bias analyzer in traditional mode")
40
+
41
+ def analyze(self, text: str) -> Dict[str, Any]:
42
+ """
43
+ Analyze bias using LLM with fallback to traditional method.
44
+
45
+ Args:
46
+ text: The text to analyze
47
+
48
+ Returns:
49
+ Dict containing bias analysis results
50
+ """
51
+ try:
52
+ # Try LLM analysis if enabled and available
53
+ if self.use_ai and self.llm_available:
54
+ llm_result = self._analyze_with_llm(text)
55
+ if llm_result:
56
+ return llm_result
57
+
58
+ # Use traditional analysis
59
+ logger.info("Using traditional bias analysis")
60
+ return self._analyze_traditional(text)
61
+
62
+ except Exception as e:
63
+ logger.error(f"Error in bias analysis: {str(e)}")
64
+ return {
65
+ "bias": "Error",
66
+ "bias_score": 0.0,
67
+ "bias_percentage": 0,
68
+ "flagged_phrases": []
69
+ }
70
 
71
  def _load_keywords(self, filename: str) -> List[str]:
72
  """Load keywords from file."""
 
78
  logger.error(f"Error loading {filename}: {str(e)}")
79
  return []
80
 
81
+ def _analyze_traditional(self, text: str) -> Dict[str, Any]:
82
+ """Traditional keyword-based bias analysis."""
83
+ text_lower = text.lower()
84
+
85
+ # Count matches and collect flagged phrases
86
+ left_matches = [word for word in self.left_keywords if word in text_lower]
87
+ right_matches = [word for word in self.right_keywords if word in text_lower]
88
+
89
+ left_count = len(left_matches)
90
+ right_count = len(right_matches)
91
+ total_count = left_count + right_count
92
+
93
+ if total_count == 0:
94
+ return {
95
+ "bias": "Neutral",
96
+ "bias_score": 0.0,
97
+ "bias_percentage": 0,
98
+ "flagged_phrases": []
99
+ }
100
+
101
+ # Calculate bias score (-1 to 1)
102
+ bias_score = (right_count - left_count) / total_count
103
+
104
+ # Calculate bias percentage
105
+ bias_percentage = abs(bias_score * 100)
106
+
107
+ # Determine bias label
108
+ if bias_score < -0.6:
109
+ bias = "Strongly Left"
110
+ elif bias_score < -0.3:
111
+ bias = "Moderately Left"
112
+ elif bias_score < -0.1:
113
+ bias = "Leaning Left"
114
+ elif bias_score > 0.6:
115
+ bias = "Strongly Right"
116
+ elif bias_score > 0.3:
117
+ bias = "Moderately Right"
118
+ elif bias_score > 0.1:
119
+ bias = "Leaning Right"
120
+ else:
121
+ bias = "Neutral"
122
+
123
+ return {
124
+ "bias": bias,
125
+ "bias_score": round(bias_score, 2),
126
+ "bias_percentage": round(bias_percentage, 1),
127
+ "flagged_phrases": list(set(left_matches + right_matches))[:5] # Limit to top 5 unique phrases
128
+ }
129
+
130
+ def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
131
+ """Analyze bias using LLM zero-shot classification."""
132
  try:
133
+ # Define bias categories to check against
134
+ bias_categories = [
135
+ "left-wing bias",
136
+ "right-wing bias",
137
+ "neutral/balanced perspective"
138
+ ]
139
 
140
+ # Split text into manageable chunks (2000 chars each)
141
+ chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
142
 
143
+ # Analyze each chunk
144
+ chunk_scores = []
145
+ flagged_phrases = []
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ for chunk in chunks:
148
+ # Perform zero-shot classification
149
+ result = self.classifier(
150
+ chunk,
151
+ bias_categories,
152
+ multi_label=True
153
+ )
154
+
155
+ chunk_scores.append({
156
+ label: score
157
+ for label, score in zip(result['labels'], result['scores'])
158
+ })
159
+
160
+ # Identify strongly biased phrases
161
+ sentences = chunk.split('.')
162
+ for sentence in sentences:
163
+ if len(sentence.strip()) > 10: # Ignore very short sentences
164
+ sentence_result = self.classifier(
165
+ sentence.strip(),
166
+ bias_categories,
167
+ multi_label=False
168
+ )
169
+ max_score = max(sentence_result['scores'])
170
+ if max_score > 0.8 and sentence_result['labels'][0] != "neutral/balanced perspective":
171
+ flagged_phrases.append(sentence.strip())
172
+
173
+ # Aggregate scores across chunks
174
+ aggregated_scores = {
175
+ category: np.mean([
176
+ scores[category]
177
+ for scores in chunk_scores
178
+ ])
179
+ for category in bias_categories
180
+ }
181
+
182
+ # Calculate bias metrics
183
+ left_score = aggregated_scores["left-wing bias"]
184
+ right_score = aggregated_scores["right-wing bias"]
185
+ neutral_score = aggregated_scores["neutral/balanced perspective"]
186
 
187
+ # Calculate bias score (-1 to 1, where negative is left and positive is right)
188
+ bias_score = (right_score - left_score) / max(right_score + left_score, 0.0001)
 
189
 
190
  # Determine bias label
191
+ if bias_score < -0.6:
192
  bias = "Strongly Left"
193
+ elif bias_score < -0.3:
194
  bias = "Moderately Left"
195
+ elif bias_score < -0.1:
196
  bias = "Leaning Left"
197
+ elif bias_score > 0.6:
198
  bias = "Strongly Right"
199
+ elif bias_score > 0.3:
200
  bias = "Moderately Right"
201
+ elif bias_score > 0.1:
202
  bias = "Leaning Right"
203
  else:
204
  bias = "Neutral"
205
 
206
+ # Calculate bias percentage (0-100)
207
+ bias_percentage = min(100, abs(bias_score * 100))
208
+
209
  return {
210
  "bias": bias,
211
+ "bias_score": round(bias_score, 2),
212
+ "bias_percentage": round(bias_percentage, 1),
213
+ "flagged_phrases": list(set(flagged_phrases))[:5], # Limit to top 5 unique phrases
214
+ "detailed_scores": {
215
+ "left_bias": round(left_score * 100, 1),
216
+ "right_bias": round(right_score * 100, 1),
217
+ "neutral": round(neutral_score * 100, 1)
218
+ }
219
  }
220
 
221
  except Exception as e:
222
+ logger.error(f"LLM analysis failed: {str(e)}")
223
+ return None
 
 
 
 
 
mediaunmasked/analyzers/evidence_analyzer.py CHANGED
@@ -1,10 +1,40 @@
1
  import logging
2
  from typing import Dict, Any, List
 
 
 
 
3
 
4
  logger = logging.getLogger(__name__)
5
 
6
  class EvidenceAnalyzer:
7
- def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  self.citation_markers = [
9
  "according to",
10
  "said",
@@ -29,25 +59,187 @@ class EvidenceAnalyzer:
29
  "allegedly"
30
  ]
31
 
32
- def analyze(self, text: str) -> Dict[str, Any]:
33
- """Check for evidence-based reporting."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  try:
35
  text_lower = text.lower()
36
 
37
- citation_count = sum(1 for marker in self.citation_markers if marker in text_lower)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
39
 
 
 
40
  base_score = min(citation_count * 20, 100)
41
  penalty = vague_count * 10
42
 
43
  evidence_score = max(0, base_score - penalty)
44
 
45
  return {
46
- "evidence_based_score": evidence_score
 
47
  }
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  except Exception as e:
50
  logger.error(f"Error in evidence analysis: {str(e)}")
51
  return {
52
- "evidence_based_score": 0
 
53
  }
 
1
  import logging
2
  from typing import Dict, Any, List
3
+ from transformers import pipeline
4
+ import numpy as np
5
+ import nltk
6
+ from nltk.tokenize import sent_tokenize
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
  class EvidenceAnalyzer:
11
+ def __init__(self, use_ai: bool = True):
12
+ """
13
+ Initialize evidence analyzer with LLM and traditional approaches.
14
+
15
+ Args:
16
+ use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
17
+ """
18
+ self.use_ai = use_ai
19
+ self.llm_available = False
20
+
21
+ if use_ai:
22
+ try:
23
+ # Zero-shot classifier for evidence analysis
24
+ self.classifier = pipeline(
25
+ "zero-shot-classification",
26
+ model="facebook/bart-large-mnli",
27
+ device=-1
28
+ )
29
+ self.llm_available = True
30
+ logger.info("LLM pipeline initialized successfully for evidence analysis")
31
+ except Exception as e:
32
+ logger.warning(f"Failed to initialize LLM pipeline: {str(e)}")
33
+ self.llm_available = False
34
+ else:
35
+ logger.info("Initializing evidence analyzer in traditional mode")
36
+
37
+ # Traditional markers for fallback
38
  self.citation_markers = [
39
  "according to",
40
  "said",
 
59
  "allegedly"
60
  ]
61
 
62
+ def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
63
+ """Analyze evidence using LLM."""
64
+ try:
65
+ # Clean the text of formatting markers
66
+ cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
67
+ cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
68
+ if not line.startswith('[') and not line.startswith('More on'))
69
+
70
+ # Download NLTK data if needed
71
+ try:
72
+ nltk.data.find('tokenizers/punkt')
73
+ except LookupError:
74
+ nltk.download('punkt')
75
+
76
+ # Split text into chunks
77
+ chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
78
+
79
+ # Categories for evidence classification
80
+ evidence_categories = [
81
+ "factual statement with source",
82
+ "verifiable claim",
83
+ "expert opinion",
84
+ "data-backed claim",
85
+ "unsubstantiated claim",
86
+ "opinion statement"
87
+ ]
88
+
89
+ chunk_scores = []
90
+ flagged_phrases = []
91
+
92
+ for chunk in chunks:
93
+ # Analyze each sentence in the chunk
94
+ sentences = sent_tokenize(chunk)
95
+
96
+ for sentence in sentences:
97
+ if len(sentence.strip()) > 10:
98
+ # Classify the type of evidence
99
+ result = self.classifier(
100
+ sentence.strip(),
101
+ evidence_categories,
102
+ multi_label=True
103
+ )
104
+
105
+ # Calculate evidence score for the sentence
106
+ evidence_scores = {
107
+ label: score
108
+ for label, score in zip(result['labels'], result['scores'])
109
+ }
110
+
111
+ # Strong evidence indicators
112
+ strong_evidence = sum([
113
+ evidence_scores.get("factual statement with source", 0),
114
+ evidence_scores.get("data-backed claim", 0),
115
+ evidence_scores.get("expert opinion", 0)
116
+ ]) / 3 # Average the strong evidence scores
117
+
118
+ # Weak or no evidence indicators
119
+ weak_evidence = sum([
120
+ evidence_scores.get("unsubstantiated claim", 0),
121
+ evidence_scores.get("opinion statement", 0)
122
+ ]) / 2 # Average the weak evidence scores
123
+
124
+ # Store scores for overall calculation
125
+ chunk_scores.append({
126
+ 'strong_evidence': strong_evidence,
127
+ 'weak_evidence': weak_evidence
128
+ })
129
+
130
+ # Flag high-quality evidence
131
+ if strong_evidence > 0.7 and not any(
132
+ marker in sentence.lower()
133
+ for marker in ['more on this story', 'click here', 'read more']
134
+ ):
135
+ flagged_phrases.append({
136
+ 'text': sentence.strip(),
137
+ 'type': 'strong_evidence',
138
+ 'score': strong_evidence
139
+ })
140
+
141
+ # Calculate overall evidence score
142
+ if chunk_scores:
143
+ avg_strong = np.mean([s['strong_evidence'] for s in chunk_scores])
144
+ avg_weak = np.mean([s['weak_evidence'] for s in chunk_scores])
145
+
146
+ # Evidence score formula:
147
+ # - Reward strong evidence (70% weight)
148
+ # - Penalize weak/unsubstantiated claims (30% weight)
149
+ # - Ensure score is between 0 and 100
150
+ evidence_score = min(100, (
151
+ (avg_strong * 0.7) +
152
+ ((1 - avg_weak) * 0.3)
153
+ ) * 100)
154
+ else:
155
+ evidence_score = 0
156
+
157
+ # Sort and select top evidence phrases
158
+ sorted_phrases = sorted(
159
+ flagged_phrases,
160
+ key=lambda x: x['score'],
161
+ reverse=True
162
+ )
163
+ # Filter out formatting text and duplicates
164
+ unique_phrases = []
165
+ seen = set()
166
+ for phrase in sorted_phrases:
167
+ clean_text = phrase['text'].strip()
168
+ if clean_text not in seen and not any(
169
+ marker in clean_text.lower()
170
+ for marker in ['more on this story', 'click here', 'read more']
171
+ ):
172
+ unique_phrases.append(clean_text)
173
+ seen.add(clean_text)
174
+ if len(unique_phrases) >= 5:
175
+ break
176
+
177
+ return {
178
+ "evidence_based_score": round(evidence_score, 1),
179
+ "flagged_phrases": unique_phrases
180
+ }
181
+
182
+ except Exception as e:
183
+ logger.error(f"LLM analysis failed: {str(e)}")
184
+ return None
185
+
186
+ def _analyze_traditional(self, text: str) -> Dict[str, Any]:
187
+ """Traditional evidence analysis as fallback."""
188
  try:
189
  text_lower = text.lower()
190
 
191
+ # Find citations and evidence
192
+ evidence_phrases = []
193
+ for marker in self.citation_markers:
194
+ index = text_lower.find(marker)
195
+ while index != -1:
196
+ # Get the sentence containing the marker
197
+ start = max(0, text_lower.rfind('.', 0, index) + 1)
198
+ end = text_lower.find('.', index)
199
+ if end == -1:
200
+ end = len(text_lower)
201
+
202
+ evidence_phrases.append(text[start:end].strip())
203
+ index = text_lower.find(marker, end)
204
+
205
+ # Count vague references
206
  vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
207
 
208
+ # Calculate score
209
+ citation_count = len(evidence_phrases)
210
  base_score = min(citation_count * 20, 100)
211
  penalty = vague_count * 10
212
 
213
  evidence_score = max(0, base_score - penalty)
214
 
215
  return {
216
+ "evidence_based_score": evidence_score,
217
+ "flagged_phrases": list(set(evidence_phrases))[:5] # Limit to top 5 unique phrases
218
  }
219
 
220
+ except Exception as e:
221
+ logger.error(f"Traditional analysis failed: {str(e)}")
222
+ return {
223
+ "evidence_based_score": 0,
224
+ "flagged_phrases": []
225
+ }
226
+
227
+ def analyze(self, text: str) -> Dict[str, Any]:
228
+ """Analyze evidence using LLM with fallback to traditional method."""
229
+ try:
230
+ # Try LLM analysis if enabled and available
231
+ if self.use_ai and self.llm_available:
232
+ llm_result = self._analyze_with_llm(text)
233
+ if llm_result:
234
+ return llm_result
235
+
236
+ # Use traditional analysis
237
+ logger.info("Using traditional evidence analysis")
238
+ return self._analyze_traditional(text)
239
+
240
  except Exception as e:
241
  logger.error(f"Error in evidence analysis: {str(e)}")
242
  return {
243
+ "evidence_based_score": 0,
244
+ "flagged_phrases": []
245
  }
mediaunmasked/analyzers/headline_analyzer.py CHANGED
@@ -1,7 +1,6 @@
1
  import logging
2
  from typing import Dict, Any, List
3
- from transformers import pipeline
4
- from transformers import AutoTokenizer
5
  import numpy as np
6
  import nltk
7
  from nltk.tokenize import sent_tokenize
@@ -9,12 +8,38 @@ from nltk.tokenize import sent_tokenize
9
  logger = logging.getLogger(__name__)
10
 
11
  class HeadlineAnalyzer:
12
- def __init__(self):
13
- """Initialize the NLI model for contradiction detection."""
14
- self.nli_pipeline = pipeline("text-classification", model="roberta-large-mnli")
15
- self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
16
- self.max_length = 512
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def _split_content(self, headline: str, content: str) -> List[str]:
19
  """Split content into sections that fit within token limit."""
20
  content_words = content.split()
@@ -23,7 +48,7 @@ class HeadlineAnalyzer:
23
 
24
  # Account for headline and [SEP] token in the max length
25
  headline_tokens = len(self.tokenizer.encode(headline))
26
- sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2 # -2 because encode adds special tokens
27
  max_content_tokens = self.max_length - headline_tokens - sep_tokens
28
 
29
  # Process words into sections
@@ -33,7 +58,6 @@ class HeadlineAnalyzer:
33
  # Check if current section is approaching token limit
34
  current_text = " ".join(current_section)
35
  if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
36
- # Remove last word (it might make us go over limit)
37
  current_section.pop()
38
  sections.append(" ".join(current_section))
39
 
@@ -42,141 +66,226 @@ class HeadlineAnalyzer:
42
  current_section = current_section[overlap_start:]
43
  current_section.append(word)
44
 
45
- # Add any remaining content as the last section
46
  if current_section:
47
  sections.append(" ".join(current_section))
48
 
49
- logger.info(f"""Content Splitting:
50
- - Original content length: {len(content_words)} words
51
- - Split into {len(sections)} sections
52
- - Headline uses {headline_tokens} tokens
53
- - Available tokens per section: {max_content_tokens}
54
- """)
55
  return sections
56
 
57
- def _analyze_section(self, headline: str, section: str) -> Dict[str, float]:
58
- """Analyze a single section of content."""
59
- # Use a more robust method for sentence splitting
60
- nltk.download('punkt')
61
- sentences = sent_tokenize(section)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- flagged_phrases = []
64
- for sentence in sentences:
65
- input_text = f"{headline} [SEP] {sentence}"
66
- result = self.nli_pipeline(input_text, top_k=None)
67
- scores = {item['label']: item['score'] for item in result}
68
-
69
- # Log the model output for debugging
70
- logger.info(f"Sentence: {sentence}")
71
- logger.info(f"Scores: {scores}")
72
-
73
- # Set the threshold for contradiction to anything higher than 0.1
74
- if scores.get('CONTRADICTION', 0) > 0.1: # Threshold set to > 0.1
75
- flagged_phrases.append(sentence)
76
-
77
- # Adjust the headline_vs_content_score based on contradictions
78
- contradiction_penalty = len(flagged_phrases) * 0.1 # Example penalty per contradiction
79
- adjusted_score = max(0, scores.get('ENTAILMENT', 0) - contradiction_penalty)
80
 
81
- logger.info("\nSection Analysis:")
82
- logger.info("-"*30)
83
- logger.info(f"Section preview: {section[:100]}...")
84
- for label, score in scores.items():
85
- logger.info(f"Label: {label:<12} Score: {score:.3f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- return {"scores": scores, "flagged_phrases": flagged_phrases, "adjusted_score": adjusted_score}
 
 
 
 
 
88
 
89
  def analyze(self, headline: str, content: str) -> Dict[str, Any]:
90
- """Analyze how well the headline matches the content using an AI model."""
91
  try:
92
  logger.info("\n" + "="*50)
93
  logger.info("HEADLINE ANALYSIS STARTED")
94
  logger.info("="*50)
95
 
96
- # Handle empty inputs
97
  if not headline.strip() or not content.strip():
98
  logger.warning("Empty headline or content provided")
99
  return {
100
  "headline_vs_content_score": 0,
101
- "entailment_score": 0,
102
- "contradiction_score": 0,
103
- "contradictory_phrases": []
104
  }
105
 
106
- # Split content if too long
107
- content_tokens = len(self.tokenizer.encode(content))
108
- if content_tokens > self.max_length:
109
- logger.warning(f"""
110
- Content Length Warning:
111
- - Total tokens: {content_tokens}
112
- - Max allowed: {self.max_length}
113
- - Splitting into sections...
114
- """)
115
  sections = self._split_content(headline, content)
 
116
 
117
  # Analyze each section
118
- section_scores = []
119
- for i, section in enumerate(sections, 1):
120
- logger.info(f"\nAnalyzing section {i}/{len(sections)}")
121
- scores = self._analyze_section(headline, section)
122
- section_scores.append(scores)
 
 
 
 
 
 
 
123
 
124
- # Aggregate scores across sections
125
- # Use max contradiction (if any section strongly contradicts, that's important)
126
- # Use mean entailment (overall support across sections)
127
- # Use mean neutral (general neutral tone across sections)
128
- entailment_score = np.mean([s.get('ENTAILMENT', 0) for s in section_scores])
129
- contradiction_score = np.max([s.get('CONTRADICTION', 0) for s in section_scores])
130
- neutral_score = np.mean([s.get('NEUTRAL', 0) for s in section_scores])
131
 
132
- logger.info("\nAggregated Scores Across Sections:")
133
- logger.info("-"*30)
134
- logger.info(f"Mean Entailment: {entailment_score:.3f}")
135
- logger.info(f"Max Contradiction: {contradiction_score:.3f}")
136
- logger.info(f"Mean Neutral: {neutral_score:.3f}")
137
  else:
138
- # Single section analysis
139
- scores = self._analyze_section(headline, content)
140
- entailment_score = scores.get('ENTAILMENT', 0)
141
- contradiction_score = scores.get('CONTRADICTION', 0)
142
- neutral_score = scores.get('NEUTRAL', 0)
143
-
144
- # Compute final consistency score
145
- final_score = (
146
- (entailment_score * 0.6) + # Base score from entailment
147
- (neutral_score * 0.3) + # Neutral is acceptable
148
- ((1 - contradiction_score) * 0.1) # Small penalty for contradiction
149
- ) * 100
150
-
151
- # Log final results
152
- logger.info("\nFinal Analysis Results:")
153
- logger.info("-"*30)
154
- logger.info(f"Headline: {headline}")
155
- logger.info(f"Content Length: {content_tokens} tokens")
156
- logger.info("\nFinal Scores:")
157
- logger.info(f"{'Entailment:':<15} {entailment_score:.3f}")
158
- logger.info(f"{'Neutral:':<15} {neutral_score:.3f}")
159
- logger.info(f"{'Contradiction:':<15} {contradiction_score:.3f}")
160
- logger.info(f"\nFinal Score: {final_score:.1f}%")
161
- logger.info("="*50 + "\n")
162
-
163
- return {
164
- "headline_vs_content_score": round(final_score, 1),
165
- "entailment_score": round(entailment_score, 2),
166
- "contradiction_score": round(contradiction_score, 2),
167
- "contradictory_phrases": scores.get('flagged_phrases', [])
168
- }
169
 
170
  except Exception as e:
171
- logger.error("\nHEADLINE ANALYSIS ERROR")
172
- logger.error("-"*30)
173
- logger.error(f"Error Type: {type(e).__name__}")
174
- logger.error(f"Error Message: {str(e)}")
175
- logger.error("Stack Trace:", exc_info=True)
176
- logger.error("="*50 + "\n")
177
  return {
178
  "headline_vs_content_score": 0,
179
- "entailment_score": 0,
180
- "contradiction_score": 0,
181
- "contradictory_phrases": []
182
  }
 
1
  import logging
2
  from typing import Dict, Any, List
3
+ from transformers import pipeline, AutoTokenizer
 
4
  import numpy as np
5
  import nltk
6
  from nltk.tokenize import sent_tokenize
 
8
  logger = logging.getLogger(__name__)
9
 
10
  class HeadlineAnalyzer:
11
+ def __init__(self, use_ai: bool = True):
12
+ """
13
+ Initialize the analyzers for headline analysis.
 
 
14
 
15
+ Args:
16
+ use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
17
+ """
18
+ self.use_ai = use_ai
19
+ self.llm_available = False
20
+
21
+ if use_ai:
22
+ try:
23
+ # NLI model for contradiction/entailment
24
+ self.nli_pipeline = pipeline("text-classification", model="roberta-large-mnli")
25
+
26
+ # Zero-shot classifier for clickbait and sensationalism
27
+ self.zero_shot = pipeline(
28
+ "zero-shot-classification",
29
+ model="facebook/bart-large-mnli",
30
+ device=-1
31
+ )
32
+
33
+ self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
34
+ self.max_length = 512
35
+ self.llm_available = True
36
+ logger.info("LLM pipelines initialized successfully for headline analysis")
37
+ except Exception as e:
38
+ logger.warning(f"Failed to initialize LLM pipelines: {str(e)}")
39
+ self.llm_available = False
40
+ else:
41
+ logger.info("Initializing headline analyzer in traditional mode")
42
+
43
  def _split_content(self, headline: str, content: str) -> List[str]:
44
  """Split content into sections that fit within token limit."""
45
  content_words = content.split()
 
48
 
49
  # Account for headline and [SEP] token in the max length
50
  headline_tokens = len(self.tokenizer.encode(headline))
51
+ sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2
52
  max_content_tokens = self.max_length - headline_tokens - sep_tokens
53
 
54
  # Process words into sections
 
58
  # Check if current section is approaching token limit
59
  current_text = " ".join(current_section)
60
  if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
 
61
  current_section.pop()
62
  sections.append(" ".join(current_section))
63
 
 
66
  current_section = current_section[overlap_start:]
67
  current_section.append(word)
68
 
69
+ # Add any remaining content
70
  if current_section:
71
  sections.append(" ".join(current_section))
72
 
 
 
 
 
 
 
73
  return sections
74
 
75
+ def _analyze_section(self, headline: str, section: str) -> Dict[str, Any]:
76
+ """Analyze a single section for headline accuracy and sensationalism."""
77
+ try:
78
+ # Download NLTK data if needed
79
+ try:
80
+ nltk.data.find('tokenizers/punkt')
81
+ except LookupError:
82
+ nltk.download('punkt')
83
+
84
+ sentences = sent_tokenize(section)
85
+
86
+ # Analyze headline against content for contradiction/entailment
87
+ nli_scores = []
88
+ flagged_phrases = []
89
+
90
+ # Categories for sensationalism check
91
+ sensationalism_categories = [
92
+ "clickbait",
93
+ "sensationalized",
94
+ "misleading",
95
+ "factual reporting",
96
+ "accurate headline"
97
+ ]
98
+
99
+ # Check headline for sensationalism
100
+ sensationalism_result = self.zero_shot(
101
+ headline,
102
+ sensationalism_categories,
103
+ multi_label=True
104
+ )
105
+
106
+ sensationalism_scores = {
107
+ label: score
108
+ for label, score in zip(sensationalism_result['labels'], sensationalism_result['scores'])
109
+ }
110
+
111
+ # Analyze each sentence for contradiction/support
112
+ for sentence in sentences:
113
+ if len(sentence.strip()) > 10:
114
+ # Check for contradiction/entailment
115
+ input_text = f"{headline} [SEP] {sentence}"
116
+ nli_result = self.nli_pipeline(input_text, top_k=None)
117
+ scores = {item['label']: item['score'] for item in nli_result}
118
+ nli_scores.append(scores)
119
+
120
+ # Flag contradictory or highly sensationalized content
121
+ if scores.get('CONTRADICTION', 0) > 0.4:
122
+ flagged_phrases.append({
123
+ 'text': sentence.strip(),
124
+ 'type': 'contradiction',
125
+ 'score': scores['CONTRADICTION']
126
+ })
127
+
128
+ # Calculate aggregate scores
129
+ avg_scores = {
130
+ label: np.mean([score[label] for score in nli_scores])
131
+ for label in ['ENTAILMENT', 'CONTRADICTION', 'NEUTRAL']
132
+ }
133
+
134
+ # Calculate headline accuracy score
135
+ accuracy_components = {
136
+ 'entailment': avg_scores['ENTAILMENT'] * 0.4,
137
+ 'non_contradiction': (1 - avg_scores['CONTRADICTION']) * 0.3,
138
+ 'non_sensational': (
139
+ sensationalism_scores.get('factual reporting', 0) +
140
+ sensationalism_scores.get('accurate headline', 0)
141
+ ) * 0.15,
142
+ 'non_clickbait': (
143
+ 1 - sensationalism_scores.get('clickbait', 0) -
144
+ sensationalism_scores.get('sensationalized', 0)
145
+ ) * 0.15
146
+ }
147
+
148
+ accuracy_score = sum(accuracy_components.values()) * 100
149
+
150
+ # Sort and limit flagged phrases
151
+ sorted_phrases = sorted(
152
+ flagged_phrases,
153
+ key=lambda x: x['score'],
154
+ reverse=True
155
+ )
156
+ top_phrases = [phrase['text'] for phrase in sorted_phrases[:5]]
157
+
158
+ return {
159
+ "accuracy_score": accuracy_score,
160
+ "flagged_phrases": top_phrases,
161
+ "detailed_scores": {
162
+ "nli": avg_scores,
163
+ "sensationalism": sensationalism_scores
164
+ }
165
+ }
166
+
167
+ except Exception as e:
168
+ logger.error(f"Section analysis failed: {str(e)}")
169
+ return {
170
+ "accuracy_score": 0,
171
+ "flagged_phrases": [],
172
+ "detailed_scores": {}
173
+ }
174
 
175
+ def _analyze_traditional(self, headline: str, content: str) -> Dict[str, Any]:
176
+ """Traditional headline analysis method."""
177
+ try:
178
+ # Download NLTK data if needed
179
+ try:
180
+ nltk.data.find('tokenizers/punkt')
181
+ except LookupError:
182
+ nltk.download('punkt')
 
 
 
 
 
 
 
 
 
183
 
184
+ # Basic metrics
185
+ headline_words = set(headline.lower().split())
186
+ content_words = set(content.lower().split())
187
+
188
+ # Calculate word overlap
189
+ overlap_words = headline_words.intersection(content_words)
190
+ overlap_score = len(overlap_words) / len(headline_words) if headline_words else 0
191
+
192
+ # Check for clickbait patterns
193
+ clickbait_patterns = [
194
+ "you won't believe",
195
+ "shocking",
196
+ "mind blowing",
197
+ "amazing",
198
+ "incredible",
199
+ "unbelievable",
200
+ "must see",
201
+ "click here",
202
+ "find out",
203
+ "what happens next"
204
+ ]
205
+
206
+ clickbait_count = sum(1 for pattern in clickbait_patterns if pattern in headline.lower())
207
+ clickbait_penalty = clickbait_count * 10 # 10% penalty per clickbait phrase
208
+
209
+ # Calculate final score (0-100)
210
+ base_score = overlap_score * 100
211
+ final_score = max(0, min(100, base_score - clickbait_penalty))
212
+
213
+ # Find potentially misleading phrases
214
+ flagged_phrases = []
215
+ sentences = sent_tokenize(content)
216
+
217
+ for sentence in sentences:
218
+ # Flag sentences that directly contradict headline words
219
+ sentence_words = set(sentence.lower().split())
220
+ if len(headline_words.intersection(sentence_words)) > 2:
221
+ flagged_phrases.append(sentence.strip())
222
+
223
+ # Flag sentences with clickbait patterns
224
+ if any(pattern in sentence.lower() for pattern in clickbait_patterns):
225
+ flagged_phrases.append(sentence.strip())
226
+
227
+ return {
228
+ "headline_vs_content_score": round(final_score, 1),
229
+ "flagged_phrases": list(set(flagged_phrases))[:5] # Limit to top 5 unique phrases
230
+ }
231
 
232
+ except Exception as e:
233
+ logger.error(f"Traditional analysis failed: {str(e)}")
234
+ return {
235
+ "headline_vs_content_score": 0,
236
+ "flagged_phrases": []
237
+ }
238
 
239
  def analyze(self, headline: str, content: str) -> Dict[str, Any]:
240
+ """Analyze how well the headline matches the content."""
241
  try:
242
  logger.info("\n" + "="*50)
243
  logger.info("HEADLINE ANALYSIS STARTED")
244
  logger.info("="*50)
245
 
 
246
  if not headline.strip() or not content.strip():
247
  logger.warning("Empty headline or content provided")
248
  return {
249
  "headline_vs_content_score": 0,
250
+ "flagged_phrases": []
 
 
251
  }
252
 
253
+ # Use LLM analysis if available and enabled
254
+ if self.use_ai and self.llm_available:
255
+ logger.info("Using LLM analysis for headline")
256
+ # Split content if needed
 
 
 
 
 
257
  sections = self._split_content(headline, content)
258
+ section_results = []
259
 
260
  # Analyze each section
261
+ for section in sections:
262
+ result = self._analyze_section(headline, section)
263
+ section_results.append(result)
264
+
265
+ # Aggregate results across sections
266
+ accuracy_scores = [r['accuracy_score'] for r in section_results]
267
+ final_score = np.mean(accuracy_scores)
268
+
269
+ # Combine flagged phrases from all sections
270
+ all_phrases = []
271
+ for result in section_results:
272
+ all_phrases.extend(result['flagged_phrases'])
273
 
274
+ # Remove duplicates and limit to top 5
275
+ unique_phrases = list(dict.fromkeys(all_phrases))[:5]
 
 
 
 
 
276
 
277
+ return {
278
+ "headline_vs_content_score": round(final_score, 1),
279
+ "flagged_phrases": unique_phrases
280
+ }
 
281
  else:
282
+ # Use traditional analysis
283
+ logger.info("Using traditional headline analysis")
284
+ return self._analyze_traditional(headline, content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
  except Exception as e:
287
+ logger.error(f"Headline analysis failed: {str(e)}")
 
 
 
 
 
288
  return {
289
  "headline_vs_content_score": 0,
290
+ "flagged_phrases": []
 
 
291
  }
mediaunmasked/analyzers/scoring.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, Any
2
  import logging
3
 
4
  from .headline_analyzer import HeadlineAnalyzer
@@ -8,17 +8,34 @@ from .evidence_analyzer import EvidenceAnalyzer
8
 
9
  logger = logging.getLogger(__name__)
10
 
 
 
 
11
  class MediaScorer:
12
- def __init__(self):
13
- """Initialize the MediaScorer with required analyzers."""
14
- self.headline_analyzer = HeadlineAnalyzer()
15
- self.sentiment_analyzer = SentimentAnalyzer()
16
- self.bias_analyzer = BiasAnalyzer()
17
- self.evidence_analyzer = EvidenceAnalyzer()
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
20
  """Calculate final media credibility score."""
21
  try:
 
 
22
  headline_analysis = self.headline_analyzer.analyze(headline, content)
23
  sentiment_analysis = self.sentiment_analyzer.analyze(content)
24
  bias_analysis = self.bias_analyzer.analyze(content)
@@ -74,6 +91,7 @@ class MediaScorer:
74
  result = {
75
  "media_unmasked_score": round(final_score, 1),
76
  "rating": rating,
 
77
  "details": {
78
  "headline_analysis": {
79
  "headline_vs_content_score": headline_analysis["headline_vs_content_score"],
@@ -107,6 +125,7 @@ class MediaScorer:
107
  return {
108
  "media_unmasked_score": 0,
109
  "rating": "Error",
 
110
  "details": {
111
  "headline_analysis": {"headline_vs_content_score": 0, "flagged_phrases": []},
112
  "sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},
 
1
+ from typing import Dict, Any, Literal
2
  import logging
3
 
4
  from .headline_analyzer import HeadlineAnalyzer
 
8
 
9
  logger = logging.getLogger(__name__)
10
 
11
+ # Define analysis mode type
12
+ AnalysisMode = Literal['ai', 'traditional']
13
+
14
  class MediaScorer:
15
+ def __init__(self, use_ai: bool = True):
16
+ """
17
+ Initialize the MediaScorer with required analyzers.
18
+
19
+ Args:
20
+ use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
21
+ """
22
+ self.use_ai = use_ai
23
+ self.analysis_mode: AnalysisMode = 'ai' if use_ai else 'traditional'
24
+ logger.info(f"Initializing MediaScorer with {self.analysis_mode} analysis")
25
+
26
+ # Initialize analyzers with analysis mode preference
27
+ self.headline_analyzer = HeadlineAnalyzer(use_ai=use_ai)
28
+ self.sentiment_analyzer = SentimentAnalyzer(use_ai=use_ai)
29
+ self.bias_analyzer = BiasAnalyzer(use_ai=use_ai)
30
+ self.evidence_analyzer = EvidenceAnalyzer(use_ai=use_ai)
31
+
32
+ logger.info(f"All analyzers initialized in {self.analysis_mode} mode")
33
 
34
  def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
35
  """Calculate final media credibility score."""
36
  try:
37
+ logger.info(f"Calculating media score using {self.analysis_mode} analysis")
38
+
39
  headline_analysis = self.headline_analyzer.analyze(headline, content)
40
  sentiment_analysis = self.sentiment_analyzer.analyze(content)
41
  bias_analysis = self.bias_analyzer.analyze(content)
 
91
  result = {
92
  "media_unmasked_score": round(final_score, 1),
93
  "rating": rating,
94
+ "analysis_mode": self.analysis_mode,
95
  "details": {
96
  "headline_analysis": {
97
  "headline_vs_content_score": headline_analysis["headline_vs_content_score"],
 
125
  return {
126
  "media_unmasked_score": 0,
127
  "rating": "Error",
128
+ "analysis_mode": self.analysis_mode,
129
  "details": {
130
  "headline_analysis": {"headline_vs_content_score": 0, "flagged_phrases": []},
131
  "sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},
mediaunmasked/analyzers/sentiment_analyzer.py CHANGED
@@ -1,11 +1,23 @@
1
  import logging
2
  from typing import Dict, Any, List
3
  from textblob import TextBlob
 
 
4
 
5
  logger = logging.getLogger(__name__)
6
 
7
  class SentimentAnalyzer:
8
- def __init__(self):
 
 
 
 
 
 
 
 
 
 
9
  self.manipulative_patterns = [
10
  "experts say",
11
  "sources claim",
@@ -17,10 +29,267 @@ class SentimentAnalyzer:
17
  "without doubt",
18
  "certainly"
19
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def analyze(self, text: str) -> Dict[str, Any]:
22
- """Analyze sentiment using TextBlob."""
 
 
 
 
 
 
 
 
23
  try:
 
 
 
 
 
 
 
 
24
  blob = TextBlob(text)
25
  sentiment_score = blob.sentiment.polarity
26
 
@@ -34,13 +303,10 @@ class SentimentAnalyzer:
34
  else:
35
  sentiment = "Neutral"
36
 
37
- if manipulation_score > 50:
38
- sentiment = "Manipulative"
39
-
40
  return {
41
  "sentiment": sentiment,
42
  "manipulation_score": min(manipulation_score, 100),
43
- "flagged_phrases": manipulative_phrases
44
  }
45
 
46
  except Exception as e:
 
1
  import logging
2
  from typing import Dict, Any, List
3
  from textblob import TextBlob
4
+ from transformers import pipeline
5
+ import numpy as np
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
  class SentimentAnalyzer:
10
+ def __init__(self, use_ai: bool = True):
11
+ """
12
+ Initialize sentiment analyzer with both traditional and LLM-based approaches.
13
+
14
+ Args:
15
+ use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
16
+ """
17
+ self.use_ai = use_ai
18
+ self.llm_available = False
19
+
20
+ # Traditional manipulation patterns
21
  self.manipulative_patterns = [
22
  "experts say",
23
  "sources claim",
 
29
  "without doubt",
30
  "certainly"
31
  ]
32
+
33
+ if use_ai:
34
+ try:
35
+ # Initialize LLM pipelines
36
+ self.sentiment_pipeline = pipeline(
37
+ "text-classification",
38
+ model="SamLowe/roberta-base-go_emotions",
39
+ top_k=None
40
+ )
41
+ self.toxicity_pipeline = pipeline(
42
+ "text-classification",
43
+ model="martin-ha/toxic-comment-model",
44
+ top_k=None
45
+ )
46
+ self.manipulation_pipeline = pipeline(
47
+ "zero-shot-classification",
48
+ model="facebook/bart-large-mnli",
49
+ device=-1
50
+ )
51
+ self.llm_available = True
52
+ logger.info("LLM pipelines initialized successfully")
53
+ except Exception as e:
54
+ logger.warning(f"Failed to initialize LLM pipelines: {str(e)}")
55
+ self.llm_available = False
56
+ else:
57
+ logger.info("Initializing sentiment analyzer in traditional mode")
58
+
59
+ def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
60
+ """Perform sentiment analysis using LLM models."""
61
+ try:
62
+ logger.info("Starting LLM sentiment analysis")
63
+
64
+ # Clean the text of formatting markers
65
+ cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
66
+ cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
67
+ if not line.startswith('[') and not line.startswith('More on'))
68
+
69
+ logger.info("Text cleaned and prepared for analysis")
70
+
71
+ # Split text into chunks of 512 tokens (approximate)
72
+ chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
73
+ logger.info(f"Text split into {len(chunks)} chunks for processing")
74
+
75
+ # Initialize aggregation variables
76
+ sentiment_scores = []
77
+ toxicity_scores = []
78
+ manipulation_scores = []
79
+ flagged_phrases = []
80
+
81
+ manipulation_categories = [
82
+ "emotional manipulation",
83
+ "fear mongering",
84
+ "propaganda",
85
+ "factual reporting",
86
+ "balanced perspective"
87
+ ]
88
+
89
+ # Process each chunk
90
+ for i, chunk in enumerate(chunks, 1):
91
+ logger.info(f"Processing chunk {i}/{len(chunks)}")
92
+
93
+ try:
94
+ # Get emotion scores with detailed logging
95
+ logger.debug(f"Analyzing emotions for chunk {i}")
96
+ emotions = self.sentiment_pipeline(chunk)
97
+ logger.debug(f"Raw emotion response: {emotions}")
98
+
99
+ # Handle different response formats
100
+ if isinstance(emotions, list):
101
+ # Multiple results format
102
+ for emotion in emotions:
103
+ if isinstance(emotion, dict) and 'label' in emotion and 'score' in emotion:
104
+ sentiment_scores.append(emotion)
105
+ elif isinstance(emotions, dict) and 'label' in emotions and 'score' in emotions:
106
+ # Single result format
107
+ sentiment_scores.append(emotions)
108
+ logger.debug(f"Processed emotion scores: {sentiment_scores}")
109
+
110
+ # Get toxicity scores
111
+ logger.debug(f"Analyzing toxicity for chunk {i}")
112
+ toxicity = self.toxicity_pipeline(chunk)
113
+ if isinstance(toxicity, list):
114
+ toxicity_scores.extend(toxicity)
115
+ else:
116
+ toxicity_scores.append(toxicity)
117
+ logger.debug(f"Processed toxicity scores: {toxicity_scores}")
118
+
119
+ # Get manipulation scores
120
+ logger.debug(f"Analyzing manipulation for chunk {i}")
121
+ manipulation = self.manipulation_pipeline(
122
+ chunk,
123
+ manipulation_categories,
124
+ multi_label=True
125
+ )
126
+
127
+ if isinstance(manipulation, dict) and 'labels' in manipulation and 'scores' in manipulation:
128
+ manipulation_scores.append({
129
+ label: score
130
+ for label, score in zip(manipulation['labels'], manipulation['scores'])
131
+ })
132
+ logger.debug(f"Processed manipulation scores: {manipulation_scores}")
133
+
134
+ # Analyze sentences for manipulation
135
+ sentences = chunk.split('.')
136
+ for sentence in sentences:
137
+ if len(sentence.strip()) > 10:
138
+ sent_result = self.manipulation_pipeline(
139
+ sentence.strip(),
140
+ manipulation_categories,
141
+ multi_label=False
142
+ )
143
+ if (sent_result['labels'][0] in ["emotional manipulation", "fear mongering", "propaganda"]
144
+ and sent_result['scores'][0] > 0.7):
145
+ flagged_phrases.append({
146
+ 'text': sentence.strip(),
147
+ 'type': sent_result['labels'][0],
148
+ 'score': sent_result['scores'][0]
149
+ })
150
+
151
+ except Exception as chunk_error:
152
+ logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
153
+ continue
154
+
155
+ logger.info("All chunks processed, aggregating scores")
156
+
157
+ # Aggregate scores with error handling
158
+ def aggregate_scores(scores_list, score_type: str):
159
+ try:
160
+ all_scores = {}
161
+ for scores in scores_list:
162
+ if isinstance(scores, dict):
163
+ if 'label' in scores and 'score' in scores:
164
+ label = scores['label']
165
+ score = scores['score']
166
+ else:
167
+ # Handle direct label-score mapping
168
+ for label, score in scores.items():
169
+ if label not in all_scores:
170
+ all_scores[label] = []
171
+ if isinstance(score, (int, float)):
172
+ all_scores[label].append(score)
173
+ continue
174
+ else:
175
+ logger.warning(f"Unexpected score format in {score_type}: {scores}")
176
+ continue
177
+
178
+ if isinstance(label, (str, bytes)):
179
+ if label not in all_scores:
180
+ all_scores[label] = []
181
+ if isinstance(score, (int, float)):
182
+ all_scores[label].append(score)
183
+
184
+ return {k: np.mean(v) for k, v in all_scores.items() if v}
185
+ except Exception as agg_error:
186
+ logger.error(f"Error aggregating {score_type} scores: {str(agg_error)}")
187
+ return {}
188
+
189
+ emotion_scores = aggregate_scores(sentiment_scores, "emotion")
190
+ toxicity_scores = aggregate_scores(toxicity_scores, "toxicity")
191
+ logger.debug(f"Aggregated emotion scores: {emotion_scores}")
192
+ logger.debug(f"Aggregated toxicity scores: {toxicity_scores}")
193
+
194
+ # Aggregate manipulation scores
195
+ manipulation_agg = {
196
+ category: np.mean([
197
+ scores.get(category, 0)
198
+ for scores in manipulation_scores
199
+ ])
200
+ for category in manipulation_categories
201
+ }
202
+ logger.debug(f"Aggregated manipulation scores: {manipulation_agg}")
203
+
204
+ # Calculate manipulation score based on multiple factors
205
+ manipulation_indicators = {
206
+ 'emotional manipulation': 0.4,
207
+ 'fear mongering': 0.3,
208
+ 'propaganda': 0.3,
209
+ 'toxic': 0.2,
210
+ 'severe_toxic': 0.3,
211
+ 'threat': 0.2
212
+ }
213
+
214
+ # Combine toxicity and manipulation scores
215
+ combined_scores = {**toxicity_scores, **manipulation_agg}
216
+ manipulation_score = min(100, sum(
217
+ combined_scores.get(k, 0) * weight
218
+ for k, weight in manipulation_indicators.items()
219
+ ) * 100)
220
+
221
+ logger.info(f"Final manipulation score: {manipulation_score}")
222
+
223
+ # Determine overall sentiment
224
+ positive_emotions = ['admiration', 'joy', 'amusement', 'approval']
225
+ negative_emotions = ['disgust', 'anger', 'disappointment', 'fear']
226
+ neutral_emotions = ['neutral', 'confusion', 'realization']
227
+
228
+ pos_score = sum(emotion_scores.get(emotion, 0) for emotion in positive_emotions)
229
+ neg_score = sum(emotion_scores.get(emotion, 0) for emotion in negative_emotions)
230
+ neu_score = sum(emotion_scores.get(emotion, 0) for emotion in neutral_emotions)
231
+
232
+ logger.debug(f"Sentiment scores - Positive: {pos_score}, Negative: {neg_score}, Neutral: {neu_score}")
233
+
234
+ # Determine sentiment based on highest score
235
+ max_score = max(pos_score, neg_score, neu_score)
236
+ if max_score == pos_score and pos_score > 0.3:
237
+ sentiment = "Positive"
238
+ elif max_score == neg_score and neg_score > 0.3:
239
+ sentiment = "Negative"
240
+ else:
241
+ sentiment = "Neutral"
242
+
243
+ logger.info(f"Final sentiment determination: {sentiment}")
244
+
245
+ # Sort and limit flagged phrases by manipulation score
246
+ sorted_phrases = sorted(flagged_phrases, key=lambda x: x['score'], reverse=True)
247
+ unique_phrases = []
248
+ seen = set()
249
+ for phrase in sorted_phrases:
250
+ clean_text = phrase['text'].strip()
251
+ if clean_text not in seen:
252
+ unique_phrases.append(clean_text)
253
+ seen.add(clean_text)
254
+ if len(unique_phrases) >= 5:
255
+ break
256
+
257
+ logger.info("LLM analysis completed successfully")
258
+
259
+ return {
260
+ "sentiment": sentiment,
261
+ "manipulation_score": manipulation_score,
262
+ "flagged_phrases": unique_phrases,
263
+ "detailed_scores": {
264
+ "emotions": emotion_scores,
265
+ "manipulation": manipulation_agg,
266
+ "toxicity": toxicity_scores
267
+ }
268
+ }
269
+
270
+ except Exception as e:
271
+ logger.error(f"LLM analysis failed: {str(e)}", exc_info=True)
272
+ return None
273
 
274
  def analyze(self, text: str) -> Dict[str, Any]:
275
+ """
276
+ Analyze sentiment using LLM with fallback to traditional methods.
277
+
278
+ Args:
279
+ text: The text to analyze
280
+
281
+ Returns:
282
+ Dict containing sentiment analysis results
283
+ """
284
  try:
285
+ # Try LLM analysis if enabled and available
286
+ if self.use_ai and self.llm_available:
287
+ llm_result = self._analyze_with_llm(text)
288
+ if llm_result:
289
+ return llm_result
290
+
291
+ # Use traditional analysis
292
+ logger.info("Using traditional sentiment analysis")
293
  blob = TextBlob(text)
294
  sentiment_score = blob.sentiment.polarity
295
 
 
303
  else:
304
  sentiment = "Neutral"
305
 
 
 
 
306
  return {
307
  "sentiment": sentiment,
308
  "manipulation_score": min(manipulation_score, 100),
309
+ "flagged_phrases": manipulative_phrases[:5] # Limit to top 5 phrases
310
  }
311
 
312
  except Exception as e:
package-lock.json CHANGED
@@ -6,6 +6,9 @@
6
  "": {
7
  "dependencies": {
8
  "supabase": "^2.12.1"
 
 
 
9
  }
10
  },
11
  "node_modules/@isaacs/cliui": {
@@ -47,6 +50,16 @@
47
  "node": ">=14"
48
  }
49
  },
 
 
 
 
 
 
 
 
 
 
50
  "node_modules/agent-base": {
51
  "version": "7.1.3",
52
  "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz",
@@ -161,6 +174,13 @@
161
  "node": ">= 8"
162
  }
163
  },
 
 
 
 
 
 
 
164
  "node_modules/data-uri-to-buffer": {
165
  "version": "4.0.1",
166
  "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
 
6
  "": {
7
  "dependencies": {
8
  "supabase": "^2.12.1"
9
+ },
10
+ "devDependencies": {
11
+ "@types/react": "^19.0.10"
12
  }
13
  },
14
  "node_modules/@isaacs/cliui": {
 
50
  "node": ">=14"
51
  }
52
  },
53
+ "node_modules/@types/react": {
54
+ "version": "19.0.10",
55
+ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.0.10.tgz",
56
+ "integrity": "sha512-JuRQ9KXLEjaUNjTWpzuR231Z2WpIwczOkBEIvbHNCzQefFIT0L8IqE6NV6ULLyC1SI/i234JnDoMkfg+RjQj2g==",
57
+ "dev": true,
58
+ "license": "MIT",
59
+ "dependencies": {
60
+ "csstype": "^3.0.2"
61
+ }
62
+ },
63
  "node_modules/agent-base": {
64
  "version": "7.1.3",
65
  "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz",
 
174
  "node": ">= 8"
175
  }
176
  },
177
+ "node_modules/csstype": {
178
+ "version": "3.1.3",
179
+ "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
180
+ "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
181
+ "dev": true,
182
+ "license": "MIT"
183
+ },
184
  "node_modules/data-uri-to-buffer": {
185
  "version": "4.0.1",
186
  "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
package.json CHANGED
@@ -1,5 +1,8 @@
1
  {
2
  "dependencies": {
3
  "supabase": "^2.12.1"
 
 
 
4
  }
5
  }
 
1
  {
2
  "dependencies": {
3
  "supabase": "^2.12.1"
4
+ },
5
+ "devDependencies": {
6
+ "@types/react": "^19.0.10"
7
  }
8
  }
tests/test_LLM_comparisons.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer
2
+ import unittest
3
+ from mediaunmasked.scrapers.article_scraper import ArticleScraper
4
+ from tabulate import tabulate
5
+ import torch
6
+ from typing import List
7
+ import logging
8
+ import transformers
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class MediaUnmaskLLMTester(unittest.TestCase):
14
+ transformers.logging.set_verbosity_error()
15
+ def setUp(self):
16
+ """Set up LLMs and scrape article."""
17
+ self.models = {
18
+ # Upgraded Evidence-Based Models
19
+ "RoBERTa-MNLI": {"model": "roberta-large-mnli", "max_length": 512}, # Corrected to standard MNLI model
20
+ "DeBERTa-Fact": {"model": "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli", "max_length": 512},
21
+ "T5-Large": {"model": "google/t5-v1_1-large", "max_length": 512},
22
+ "SciBERT": {"model": "allenai/scibert_scivocab_uncased", "max_length": 512},
23
+ "BART-FEVER": {"model": "facebook/bart-large", "max_length": 1024}, # Note: Needs FEVER fine-tuning
24
+ "MultiQA-MiniLM": {"model": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", "max_length": 512},
25
+
26
+ # Existing Models for Benchmarking
27
+ "BART-MNLI": {"model": "facebook/bart-large-mnli", "max_length": 1024},
28
+ "RoBERTa-Bias": {"model": "cardiffnlp/twitter-roberta-base-hate", "max_length": 512},
29
+ "DistilBERT-Sentiment": {"model": "distilbert-base-uncased-finetuned-sst-2-english", "max_length": 512},
30
+ "GPT2-Generation": {"model": "gpt2", "max_length": 1024},
31
+ }
32
+
33
+ self.device = 0 if torch.cuda.is_available() else -1
34
+ self.scraper = ArticleScraper()
35
+ self.article_url = "https://www.snopes.com/fact-check/trump-super-bowl-cost-taxpayers/"
36
+ self.article_data = self.scraper.scrape_article(self.article_url) or {}
37
+
38
+ self.results = {
39
+ "headline": self.article_data.get("headline", "No headline"),
40
+ "content": self.article_data.get("content", "No content available"),
41
+ "scores": {}
42
+ }
43
+
44
+ self.tokenizers = {name: AutoTokenizer.from_pretrained(model["model"]) for name, model in self.models.items()}
45
+
46
+ def _split_content(self, model_name: str, content: str) -> List[str]:
47
+ """Split content into sections within model token limits, ensuring valid output."""
48
+ tokenizer = self.tokenizers[model_name]
49
+ max_length = self.models[model_name]["max_length"]
50
+
51
+ if not content or not content.strip():
52
+ return ["No valid content"]
53
+
54
+ encoded = tokenizer.encode_plus(content, add_special_tokens=True, truncation=True, max_length=max_length)
55
+ decoded = tokenizer.decode(encoded["input_ids"], skip_special_tokens=True)
56
+
57
+ return [decoded] if decoded.strip() else ["No valid content"]
58
+
59
+ def _get_flagged_phrases(self, model_pipeline, sections, threshold=0.6, top_k=5):
60
+ """Extract top-scoring flagged phrases while handling None values safely."""
61
+ if not sections or not isinstance(sections, list):
62
+ return [("None", "N/A")]
63
+
64
+ flagged_phrases = []
65
+
66
+ for section in sections:
67
+ if not section or not isinstance(section, str) or not section.strip(): # Ensure section is a valid string
68
+ continue
69
+
70
+ sentences = [s.strip() for s in section.split(". ") if s.strip()]
71
+ for sentence in sentences:
72
+ if not sentence or not isinstance(sentence, str): # Double-check before running the model
73
+ continue
74
+
75
+ try:
76
+ preds = model_pipeline(sentence)
77
+ if preds and isinstance(preds, list):
78
+ top_pred = max(preds, key=lambda x: x["score"])
79
+ if top_pred["score"] >= threshold:
80
+ short_phrase = " ".join(sentence.split()[:10]) # Shorten for readability
81
+ flagged_phrases.append((short_phrase, top_pred["score"], top_pred["label"]))
82
+ except Exception as e:
83
+ logger.error(f"Error analyzing sentence: {e}")
84
+ continue
85
+
86
+ flagged_phrases.sort(key=lambda x: x[1], reverse=True)
87
+ return [(phrase, label) for phrase, _, label in flagged_phrases[:top_k]] or [("None", "N/A")]
88
+
89
+ def test_headline_vs_content(self):
90
+ """Check headline-content alignment."""
91
+ headline = self.results["headline"]
92
+ content = self.results["content"]
93
+
94
+ for model_name in self.models:
95
+ with self.subTest(model=model_name):
96
+ analyzer = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
97
+ sections = self._split_content(model_name, content)
98
+
99
+ headline_score = max(analyzer(headline), key=lambda x: x["score"])["score"]
100
+ content_scores = [max(analyzer(section), key=lambda x: x["score"])["score"] for section in sections]
101
+ avg_content_score = sum(content_scores) / len(content_scores)
102
+ consistency_score = abs(headline_score - avg_content_score)
103
+
104
+ flagged_phrases = self._get_flagged_phrases(analyzer, sections)
105
+ self.results["scores"].setdefault("headline_vs_content", {})[model_name] = {
106
+ "score": consistency_score,
107
+ "flagged_phrases": flagged_phrases
108
+ }
109
+ self.assertIsNotNone(consistency_score)
110
+
111
+ def test_evidence_based(self):
112
+ """Test evidence-based content."""
113
+ content = self.results["content"]
114
+
115
+ for model_name in self.models:
116
+ if any(keyword in model_name.lower() for keyword in ["mnli", "fact", "fever", "qa"]):
117
+ with self.subTest(model=model_name):
118
+ classifier = pipeline("zero-shot-classification", model=self.models[model_name]["model"], device=self.device)
119
+ sections = self._split_content(model_name, content)
120
+
121
+ results = [classifier(section, candidate_labels=["evidence-based", "opinion", "misleading"]) for section in sections]
122
+ avg_score = sum(r["scores"][r["labels"].index("evidence-based")] for r in results) / len(results)
123
+
124
+ flagged_phrases = self._get_flagged_phrases(classifier, sections)
125
+ self.results["scores"].setdefault("evidence_based", {})[model_name] = {
126
+ "score": avg_score,
127
+ "flagged_phrases": flagged_phrases
128
+ }
129
+ self.assertIsNotNone(avg_score)
130
+
131
+ def test_manipulative_language(self):
132
+ """Detect manipulative language."""
133
+ content = self.results["content"]
134
+
135
+ for model_name in self.models:
136
+ if "sentiment" in model_name.lower() or "emotion" in model_name.lower() or "gpt" in model_name.lower():
137
+ with self.subTest(model=model_name):
138
+ detector = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
139
+ sections = self._split_content(model_name, content)
140
+
141
+ results = [max(detector(section), key=lambda x: x["score"]) for section in sections]
142
+ avg_score = sum(r["score"] for r in results) / len(results)
143
+
144
+ flagged_phrases = self._get_flagged_phrases(detector, sections)
145
+ self.results["scores"].setdefault("manipulative_language", {})[model_name] = {
146
+ "score": avg_score,
147
+ "flagged_phrases": flagged_phrases
148
+ }
149
+ self.assertIsNotNone(avg_score)
150
+
151
+ def test_bias_detection(self):
152
+ """Detect bias."""
153
+ content = self.results["content"]
154
+
155
+ for model_name in self.models:
156
+ if "bias" in model_name.lower() or "toxic" in model_name.lower() or "roberta" in model_name.lower():
157
+ with self.subTest(model=model_name):
158
+ detector = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
159
+ sections = self._split_content(model_name, content)
160
+
161
+ results = [max(detector(section), key=lambda x: x["score"]) for section in sections]
162
+ avg_score = sum(r["score"] for r in results) / len(results)
163
+
164
+ flagged_phrases = self._get_flagged_phrases(detector, sections)
165
+ self.results["scores"].setdefault("bias_detection", {})[model_name] = {
166
+ "score": avg_score,
167
+ "flagged_phrases": flagged_phrases
168
+ }
169
+ self.assertIsNotNone(avg_score)
170
+
171
+ def tearDown(self):
172
+ """Print top 2 models per test with clearer formatting."""
173
+ print("\n=== Top Model Recommendations ===")
174
+
175
+ for test_type, model_results in self.results["scores"].items():
176
+ print(f"\nTop 2 Models for {test_type}:")
177
+
178
+ sorted_results = sorted(
179
+ model_results.items(),
180
+ key=lambda x: x[1]["score"],
181
+ reverse=(test_type != "headline_vs_content")
182
+ )
183
+
184
+ top_2 = sorted_results[:2]
185
+ table = [
186
+ [
187
+ model,
188
+ f"{res['score']:.6f}",
189
+ ", ".join(f"{phrase} ({label})" for phrase, label in res["flagged_phrases"])
190
+ ]
191
+ for model, res in top_2
192
+ ]
193
+
194
+ print(tabulate(table, headers=["Model", "Score", "Flagged Phrases"], tablefmt="grid"))
195
+ criteria = "Lowest consistency score (better alignment)" if test_type == "headline_vs_content" else "Highest detection score"
196
+ print(f"Criteria: {criteria}")
197
+
198
+ if __name__ == "__main__":
199
+ unittest.main()