Spaces:
Running
Running
updating backend to implement either AI or traditional scoring values and return flagged phrases. updating table calls for supabase to incorporate new column
Browse files- app/routers/analyze.py +51 -23
- mediaunmasked/analyzers/bias_analyzer.py +181 -40
- mediaunmasked/analyzers/evidence_analyzer.py +198 -6
- mediaunmasked/analyzers/headline_analyzer.py +224 -115
- mediaunmasked/analyzers/scoring.py +26 -7
- mediaunmasked/analyzers/sentiment_analyzer.py +272 -6
- package-lock.json +20 -0
- package.json +3 -0
- tests/test_LLM_comparisons.py +199 -0
app/routers/analyze.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from fastapi import APIRouter, HTTPException
|
2 |
from pydantic import BaseModel, HttpUrl
|
3 |
-
from typing import Dict, Any, List
|
4 |
import logging
|
5 |
import os
|
6 |
from supabase import AsyncClient
|
@@ -20,7 +20,6 @@ logger = logging.getLogger(__name__)
|
|
20 |
# Initialize router and dependencies
|
21 |
router = APIRouter(tags=["analysis"])
|
22 |
scraper = ArticleScraper()
|
23 |
-
scorer = MediaScorer()
|
24 |
|
25 |
# Get Supabase credentials
|
26 |
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
@@ -32,8 +31,12 @@ if not SUPABASE_URL or not SUPABASE_KEY:
|
|
32 |
|
33 |
supabase = AsyncClient(SUPABASE_URL, SUPABASE_KEY)
|
34 |
|
|
|
|
|
|
|
35 |
class ArticleRequest(BaseModel):
|
36 |
url: HttpUrl
|
|
|
37 |
|
38 |
class MediaScoreDetails(BaseModel):
|
39 |
headline_analysis: Dict[str, Any]
|
@@ -54,6 +57,7 @@ class AnalysisResponse(BaseModel):
|
|
54 |
bias_score: float
|
55 |
bias_percentage: float
|
56 |
media_score: MediaScore
|
|
|
57 |
|
58 |
@router.post("/analyze", response_model=AnalysisResponse)
|
59 |
async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
|
@@ -61,7 +65,7 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
|
|
61 |
Analyze an article for bias, sentiment, and credibility.
|
62 |
|
63 |
Args:
|
64 |
-
request: ArticleRequest containing the URL to analyze
|
65 |
|
66 |
Returns:
|
67 |
AnalysisResponse with complete analysis results
|
@@ -70,16 +74,27 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
|
|
70 |
HTTPException: If scraping or analysis fails
|
71 |
"""
|
72 |
try:
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
existing_article = await supabase.table('article_analysis').select('*').eq('url', str(request.url)).execute()
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
# Scrape article
|
85 |
article = scraper.scrape_article(str(request.url))
|
@@ -89,6 +104,9 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
|
|
89 |
detail="Failed to scrape article content"
|
90 |
)
|
91 |
|
|
|
|
|
|
|
92 |
# Analyze content
|
93 |
analysis = scorer.calculate_media_score(
|
94 |
article["headline"],
|
@@ -108,6 +126,7 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
|
|
108 |
"bias": str(analysis['details']['bias_analysis']['bias']),
|
109 |
"bias_score": float(analysis['details']['bias_analysis']['bias_score']),
|
110 |
"bias_percentage": float(analysis['details']['bias_analysis']['bias_percentage']),
|
|
|
111 |
"media_score": {
|
112 |
"media_unmasked_score": float(analysis['media_unmasked_score']),
|
113 |
"rating": str(analysis['rating']),
|
@@ -135,17 +154,26 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
|
|
135 |
}
|
136 |
}
|
137 |
|
138 |
-
# Save
|
139 |
-
|
140 |
-
'
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
# Return the response
|
151 |
return AnalysisResponse.parse_obj(response_dict)
|
|
|
1 |
from fastapi import APIRouter, HTTPException
|
2 |
from pydantic import BaseModel, HttpUrl
|
3 |
+
from typing import Dict, Any, List, Literal
|
4 |
import logging
|
5 |
import os
|
6 |
from supabase import AsyncClient
|
|
|
20 |
# Initialize router and dependencies
|
21 |
router = APIRouter(tags=["analysis"])
|
22 |
scraper = ArticleScraper()
|
|
|
23 |
|
24 |
# Get Supabase credentials
|
25 |
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
|
|
31 |
|
32 |
supabase = AsyncClient(SUPABASE_URL, SUPABASE_KEY)
|
33 |
|
34 |
+
# Define analysis mode type
|
35 |
+
AnalysisMode = Literal['ai', 'traditional']
|
36 |
+
|
37 |
class ArticleRequest(BaseModel):
|
38 |
url: HttpUrl
|
39 |
+
use_ai: bool = True # Default to AI-powered analysis
|
40 |
|
41 |
class MediaScoreDetails(BaseModel):
|
42 |
headline_analysis: Dict[str, Any]
|
|
|
57 |
bias_score: float
|
58 |
bias_percentage: float
|
59 |
media_score: MediaScore
|
60 |
+
analysis_mode: AnalysisMode
|
61 |
|
62 |
@router.post("/analyze", response_model=AnalysisResponse)
|
63 |
async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
|
|
|
65 |
Analyze an article for bias, sentiment, and credibility.
|
66 |
|
67 |
Args:
|
68 |
+
request: ArticleRequest containing the URL to analyze and analysis preferences
|
69 |
|
70 |
Returns:
|
71 |
AnalysisResponse with complete analysis results
|
|
|
74 |
HTTPException: If scraping or analysis fails
|
75 |
"""
|
76 |
try:
|
77 |
+
# Determine analysis mode
|
78 |
+
analysis_mode: AnalysisMode = 'ai' if request.use_ai else 'traditional'
|
79 |
+
logger.info(f"Analyzing article: {request.url} (Analysis Mode: {analysis_mode})")
|
|
|
80 |
|
81 |
+
# Check cache with both URL and analysis mode
|
82 |
+
try:
|
83 |
+
cached_result = await supabase.table('article_analysis') \
|
84 |
+
.select('*') \
|
85 |
+
.eq('url', str(request.url)) \
|
86 |
+
.eq('analysis_mode', analysis_mode) \
|
87 |
+
.limit(1) \
|
88 |
+
.single() \
|
89 |
+
.execute()
|
90 |
+
|
91 |
+
if cached_result and cached_result.data:
|
92 |
+
logger.info(f"Found cached analysis for URL with {analysis_mode} mode")
|
93 |
+
return AnalysisResponse.parse_obj(cached_result.data)
|
94 |
+
|
95 |
+
except Exception as cache_error:
|
96 |
+
logger.warning(f"Cache lookup failed: {str(cache_error)}")
|
97 |
+
# Continue with analysis if cache lookup fails
|
98 |
|
99 |
# Scrape article
|
100 |
article = scraper.scrape_article(str(request.url))
|
|
|
104 |
detail="Failed to scrape article content"
|
105 |
)
|
106 |
|
107 |
+
# Initialize scorer with specified analysis preference
|
108 |
+
scorer = MediaScorer(use_ai=request.use_ai)
|
109 |
+
|
110 |
# Analyze content
|
111 |
analysis = scorer.calculate_media_score(
|
112 |
article["headline"],
|
|
|
126 |
"bias": str(analysis['details']['bias_analysis']['bias']),
|
127 |
"bias_score": float(analysis['details']['bias_analysis']['bias_score']),
|
128 |
"bias_percentage": float(analysis['details']['bias_analysis']['bias_percentage']),
|
129 |
+
"analysis_mode": analysis_mode,
|
130 |
"media_score": {
|
131 |
"media_unmasked_score": float(analysis['media_unmasked_score']),
|
132 |
"rating": str(analysis['rating']),
|
|
|
154 |
}
|
155 |
}
|
156 |
|
157 |
+
# Save to Supabase with analysis mode
|
158 |
+
try:
|
159 |
+
await supabase.table('article_analysis').upsert({
|
160 |
+
'url': str(request.url),
|
161 |
+
'headline': response_dict['headline'],
|
162 |
+
'content': response_dict['content'],
|
163 |
+
'sentiment': response_dict['sentiment'],
|
164 |
+
'bias': response_dict['bias'],
|
165 |
+
'bias_score': response_dict['bias_score'],
|
166 |
+
'bias_percentage': response_dict['bias_percentage'],
|
167 |
+
'media_score': response_dict['media_score'],
|
168 |
+
'analysis_mode': analysis_mode, # Store the analysis mode
|
169 |
+
'created_at': 'now()' # Use server timestamp
|
170 |
+
}, on_conflict='url,analysis_mode').execute() # Specify composite unique constraint
|
171 |
+
|
172 |
+
logger.info(f"Saved analysis to database with mode: {analysis_mode}")
|
173 |
+
|
174 |
+
except Exception as db_error:
|
175 |
+
logger.error(f"Failed to save to database: {str(db_error)}")
|
176 |
+
# Continue since we can still return the analysis even if saving fails
|
177 |
|
178 |
# Return the response
|
179 |
return AnalysisResponse.parse_obj(response_dict)
|
mediaunmasked/analyzers/bias_analyzer.py
CHANGED
@@ -1,14 +1,72 @@
|
|
1 |
import logging
|
2 |
import os
|
3 |
from typing import Dict, Any, List
|
|
|
|
|
4 |
|
5 |
logger = logging.getLogger(__name__)
|
6 |
|
7 |
class BiasAnalyzer:
|
8 |
-
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
|
10 |
self.left_keywords = self._load_keywords('left_bias_words.txt')
|
11 |
self.right_keywords = self._load_keywords('right_bias_words.txt')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def _load_keywords(self, filename: str) -> List[str]:
|
14 |
"""Load keywords from file."""
|
@@ -20,63 +78,146 @@ class BiasAnalyzer:
|
|
20 |
logger.error(f"Error loading {filename}: {str(e)}")
|
21 |
return []
|
22 |
|
23 |
-
def
|
24 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
try:
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
|
|
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
flagged_phrases
|
33 |
-
right_count = sum(1 for word in self.right_keywords if word in text_lower)
|
34 |
-
flagged_phrases.extend([word for word in self.right_keywords if word in text_lower])
|
35 |
-
|
36 |
-
total_words = left_count + right_count
|
37 |
-
if total_words == 0:
|
38 |
-
return {
|
39 |
-
"bias": "Neutral",
|
40 |
-
"bias_score": 0.0, # True neutral
|
41 |
-
"bias_percentage": 0, # Neutral percentage
|
42 |
-
"flagged_phrases": []
|
43 |
-
}
|
44 |
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
#
|
49 |
-
|
50 |
-
logger.info(f"Bias score: {bias_score:.2f}, Bias percentage: {bias_percentage:.1f}%")
|
51 |
|
52 |
# Determine bias label
|
53 |
-
if bias_score < -0.
|
54 |
bias = "Strongly Left"
|
55 |
-
elif bias_score < -0.
|
56 |
bias = "Moderately Left"
|
57 |
-
elif bias_score < -0.
|
58 |
bias = "Leaning Left"
|
59 |
-
elif bias_score > 0.
|
60 |
bias = "Strongly Right"
|
61 |
-
elif bias_score > 0.
|
62 |
bias = "Moderately Right"
|
63 |
-
elif bias_score > 0.
|
64 |
bias = "Leaning Right"
|
65 |
else:
|
66 |
bias = "Neutral"
|
67 |
|
|
|
|
|
|
|
68 |
return {
|
69 |
"bias": bias,
|
70 |
-
"bias_score": round(bias_score, 2),
|
71 |
-
"bias_percentage":
|
72 |
-
"flagged_phrases": flagged_phrases
|
|
|
|
|
|
|
|
|
|
|
73 |
}
|
74 |
|
75 |
except Exception as e:
|
76 |
-
logger.error(f"
|
77 |
-
return
|
78 |
-
"bias": "Error",
|
79 |
-
"bias_score": 0.0,
|
80 |
-
"bias_percentage": 0,
|
81 |
-
"flagged_phrases": []
|
82 |
-
}
|
|
|
1 |
import logging
|
2 |
import os
|
3 |
from typing import Dict, Any, List
|
4 |
+
from transformers import pipeline
|
5 |
+
import numpy as np
|
6 |
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
9 |
class BiasAnalyzer:
|
10 |
+
def __init__(self, use_ai: bool = True):
|
11 |
+
"""
|
12 |
+
Initialize bias analyzer with both LLM and traditional approaches.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
|
16 |
+
"""
|
17 |
+
self.use_ai = use_ai
|
18 |
+
self.llm_available = False
|
19 |
+
|
20 |
+
# Load traditional keywords
|
21 |
self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
|
22 |
self.left_keywords = self._load_keywords('left_bias_words.txt')
|
23 |
self.right_keywords = self._load_keywords('right_bias_words.txt')
|
24 |
+
|
25 |
+
if use_ai:
|
26 |
+
try:
|
27 |
+
# Initialize LLM pipeline for zero-shot classification
|
28 |
+
self.classifier = pipeline(
|
29 |
+
"zero-shot-classification",
|
30 |
+
model="facebook/bart-large-mnli",
|
31 |
+
device=-1 # Use CPU, change to specific GPU index if available
|
32 |
+
)
|
33 |
+
self.llm_available = True
|
34 |
+
logger.info("LLM pipeline initialized successfully for bias analysis")
|
35 |
+
except Exception as e:
|
36 |
+
logger.warning(f"Failed to initialize LLM pipeline: {str(e)}")
|
37 |
+
self.llm_available = False
|
38 |
+
else:
|
39 |
+
logger.info("Initializing bias analyzer in traditional mode")
|
40 |
+
|
41 |
+
def analyze(self, text: str) -> Dict[str, Any]:
|
42 |
+
"""
|
43 |
+
Analyze bias using LLM with fallback to traditional method.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
text: The text to analyze
|
47 |
+
|
48 |
+
Returns:
|
49 |
+
Dict containing bias analysis results
|
50 |
+
"""
|
51 |
+
try:
|
52 |
+
# Try LLM analysis if enabled and available
|
53 |
+
if self.use_ai and self.llm_available:
|
54 |
+
llm_result = self._analyze_with_llm(text)
|
55 |
+
if llm_result:
|
56 |
+
return llm_result
|
57 |
+
|
58 |
+
# Use traditional analysis
|
59 |
+
logger.info("Using traditional bias analysis")
|
60 |
+
return self._analyze_traditional(text)
|
61 |
+
|
62 |
+
except Exception as e:
|
63 |
+
logger.error(f"Error in bias analysis: {str(e)}")
|
64 |
+
return {
|
65 |
+
"bias": "Error",
|
66 |
+
"bias_score": 0.0,
|
67 |
+
"bias_percentage": 0,
|
68 |
+
"flagged_phrases": []
|
69 |
+
}
|
70 |
|
71 |
def _load_keywords(self, filename: str) -> List[str]:
|
72 |
"""Load keywords from file."""
|
|
|
78 |
logger.error(f"Error loading {filename}: {str(e)}")
|
79 |
return []
|
80 |
|
81 |
+
def _analyze_traditional(self, text: str) -> Dict[str, Any]:
|
82 |
+
"""Traditional keyword-based bias analysis."""
|
83 |
+
text_lower = text.lower()
|
84 |
+
|
85 |
+
# Count matches and collect flagged phrases
|
86 |
+
left_matches = [word for word in self.left_keywords if word in text_lower]
|
87 |
+
right_matches = [word for word in self.right_keywords if word in text_lower]
|
88 |
+
|
89 |
+
left_count = len(left_matches)
|
90 |
+
right_count = len(right_matches)
|
91 |
+
total_count = left_count + right_count
|
92 |
+
|
93 |
+
if total_count == 0:
|
94 |
+
return {
|
95 |
+
"bias": "Neutral",
|
96 |
+
"bias_score": 0.0,
|
97 |
+
"bias_percentage": 0,
|
98 |
+
"flagged_phrases": []
|
99 |
+
}
|
100 |
+
|
101 |
+
# Calculate bias score (-1 to 1)
|
102 |
+
bias_score = (right_count - left_count) / total_count
|
103 |
+
|
104 |
+
# Calculate bias percentage
|
105 |
+
bias_percentage = abs(bias_score * 100)
|
106 |
+
|
107 |
+
# Determine bias label
|
108 |
+
if bias_score < -0.6:
|
109 |
+
bias = "Strongly Left"
|
110 |
+
elif bias_score < -0.3:
|
111 |
+
bias = "Moderately Left"
|
112 |
+
elif bias_score < -0.1:
|
113 |
+
bias = "Leaning Left"
|
114 |
+
elif bias_score > 0.6:
|
115 |
+
bias = "Strongly Right"
|
116 |
+
elif bias_score > 0.3:
|
117 |
+
bias = "Moderately Right"
|
118 |
+
elif bias_score > 0.1:
|
119 |
+
bias = "Leaning Right"
|
120 |
+
else:
|
121 |
+
bias = "Neutral"
|
122 |
+
|
123 |
+
return {
|
124 |
+
"bias": bias,
|
125 |
+
"bias_score": round(bias_score, 2),
|
126 |
+
"bias_percentage": round(bias_percentage, 1),
|
127 |
+
"flagged_phrases": list(set(left_matches + right_matches))[:5] # Limit to top 5 unique phrases
|
128 |
+
}
|
129 |
+
|
130 |
+
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
131 |
+
"""Analyze bias using LLM zero-shot classification."""
|
132 |
try:
|
133 |
+
# Define bias categories to check against
|
134 |
+
bias_categories = [
|
135 |
+
"left-wing bias",
|
136 |
+
"right-wing bias",
|
137 |
+
"neutral/balanced perspective"
|
138 |
+
]
|
139 |
|
140 |
+
# Split text into manageable chunks (2000 chars each)
|
141 |
+
chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
|
142 |
|
143 |
+
# Analyze each chunk
|
144 |
+
chunk_scores = []
|
145 |
+
flagged_phrases = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
+
for chunk in chunks:
|
148 |
+
# Perform zero-shot classification
|
149 |
+
result = self.classifier(
|
150 |
+
chunk,
|
151 |
+
bias_categories,
|
152 |
+
multi_label=True
|
153 |
+
)
|
154 |
+
|
155 |
+
chunk_scores.append({
|
156 |
+
label: score
|
157 |
+
for label, score in zip(result['labels'], result['scores'])
|
158 |
+
})
|
159 |
+
|
160 |
+
# Identify strongly biased phrases
|
161 |
+
sentences = chunk.split('.')
|
162 |
+
for sentence in sentences:
|
163 |
+
if len(sentence.strip()) > 10: # Ignore very short sentences
|
164 |
+
sentence_result = self.classifier(
|
165 |
+
sentence.strip(),
|
166 |
+
bias_categories,
|
167 |
+
multi_label=False
|
168 |
+
)
|
169 |
+
max_score = max(sentence_result['scores'])
|
170 |
+
if max_score > 0.8 and sentence_result['labels'][0] != "neutral/balanced perspective":
|
171 |
+
flagged_phrases.append(sentence.strip())
|
172 |
+
|
173 |
+
# Aggregate scores across chunks
|
174 |
+
aggregated_scores = {
|
175 |
+
category: np.mean([
|
176 |
+
scores[category]
|
177 |
+
for scores in chunk_scores
|
178 |
+
])
|
179 |
+
for category in bias_categories
|
180 |
+
}
|
181 |
+
|
182 |
+
# Calculate bias metrics
|
183 |
+
left_score = aggregated_scores["left-wing bias"]
|
184 |
+
right_score = aggregated_scores["right-wing bias"]
|
185 |
+
neutral_score = aggregated_scores["neutral/balanced perspective"]
|
186 |
|
187 |
+
# Calculate bias score (-1 to 1, where negative is left and positive is right)
|
188 |
+
bias_score = (right_score - left_score) / max(right_score + left_score, 0.0001)
|
|
|
189 |
|
190 |
# Determine bias label
|
191 |
+
if bias_score < -0.6:
|
192 |
bias = "Strongly Left"
|
193 |
+
elif bias_score < -0.3:
|
194 |
bias = "Moderately Left"
|
195 |
+
elif bias_score < -0.1:
|
196 |
bias = "Leaning Left"
|
197 |
+
elif bias_score > 0.6:
|
198 |
bias = "Strongly Right"
|
199 |
+
elif bias_score > 0.3:
|
200 |
bias = "Moderately Right"
|
201 |
+
elif bias_score > 0.1:
|
202 |
bias = "Leaning Right"
|
203 |
else:
|
204 |
bias = "Neutral"
|
205 |
|
206 |
+
# Calculate bias percentage (0-100)
|
207 |
+
bias_percentage = min(100, abs(bias_score * 100))
|
208 |
+
|
209 |
return {
|
210 |
"bias": bias,
|
211 |
+
"bias_score": round(bias_score, 2),
|
212 |
+
"bias_percentage": round(bias_percentage, 1),
|
213 |
+
"flagged_phrases": list(set(flagged_phrases))[:5], # Limit to top 5 unique phrases
|
214 |
+
"detailed_scores": {
|
215 |
+
"left_bias": round(left_score * 100, 1),
|
216 |
+
"right_bias": round(right_score * 100, 1),
|
217 |
+
"neutral": round(neutral_score * 100, 1)
|
218 |
+
}
|
219 |
}
|
220 |
|
221 |
except Exception as e:
|
222 |
+
logger.error(f"LLM analysis failed: {str(e)}")
|
223 |
+
return None
|
|
|
|
|
|
|
|
|
|
mediaunmasked/analyzers/evidence_analyzer.py
CHANGED
@@ -1,10 +1,40 @@
|
|
1 |
import logging
|
2 |
from typing import Dict, Any, List
|
|
|
|
|
|
|
|
|
3 |
|
4 |
logger = logging.getLogger(__name__)
|
5 |
|
6 |
class EvidenceAnalyzer:
|
7 |
-
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
self.citation_markers = [
|
9 |
"according to",
|
10 |
"said",
|
@@ -29,25 +59,187 @@ class EvidenceAnalyzer:
|
|
29 |
"allegedly"
|
30 |
]
|
31 |
|
32 |
-
def
|
33 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
try:
|
35 |
text_lower = text.lower()
|
36 |
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
|
39 |
|
|
|
|
|
40 |
base_score = min(citation_count * 20, 100)
|
41 |
penalty = vague_count * 10
|
42 |
|
43 |
evidence_score = max(0, base_score - penalty)
|
44 |
|
45 |
return {
|
46 |
-
"evidence_based_score": evidence_score
|
|
|
47 |
}
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
except Exception as e:
|
50 |
logger.error(f"Error in evidence analysis: {str(e)}")
|
51 |
return {
|
52 |
-
"evidence_based_score": 0
|
|
|
53 |
}
|
|
|
1 |
import logging
|
2 |
from typing import Dict, Any, List
|
3 |
+
from transformers import pipeline
|
4 |
+
import numpy as np
|
5 |
+
import nltk
|
6 |
+
from nltk.tokenize import sent_tokenize
|
7 |
|
8 |
logger = logging.getLogger(__name__)
|
9 |
|
10 |
class EvidenceAnalyzer:
|
11 |
+
def __init__(self, use_ai: bool = True):
|
12 |
+
"""
|
13 |
+
Initialize evidence analyzer with LLM and traditional approaches.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
|
17 |
+
"""
|
18 |
+
self.use_ai = use_ai
|
19 |
+
self.llm_available = False
|
20 |
+
|
21 |
+
if use_ai:
|
22 |
+
try:
|
23 |
+
# Zero-shot classifier for evidence analysis
|
24 |
+
self.classifier = pipeline(
|
25 |
+
"zero-shot-classification",
|
26 |
+
model="facebook/bart-large-mnli",
|
27 |
+
device=-1
|
28 |
+
)
|
29 |
+
self.llm_available = True
|
30 |
+
logger.info("LLM pipeline initialized successfully for evidence analysis")
|
31 |
+
except Exception as e:
|
32 |
+
logger.warning(f"Failed to initialize LLM pipeline: {str(e)}")
|
33 |
+
self.llm_available = False
|
34 |
+
else:
|
35 |
+
logger.info("Initializing evidence analyzer in traditional mode")
|
36 |
+
|
37 |
+
# Traditional markers for fallback
|
38 |
self.citation_markers = [
|
39 |
"according to",
|
40 |
"said",
|
|
|
59 |
"allegedly"
|
60 |
]
|
61 |
|
62 |
+
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
63 |
+
"""Analyze evidence using LLM."""
|
64 |
+
try:
|
65 |
+
# Clean the text of formatting markers
|
66 |
+
cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
|
67 |
+
cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
|
68 |
+
if not line.startswith('[') and not line.startswith('More on'))
|
69 |
+
|
70 |
+
# Download NLTK data if needed
|
71 |
+
try:
|
72 |
+
nltk.data.find('tokenizers/punkt')
|
73 |
+
except LookupError:
|
74 |
+
nltk.download('punkt')
|
75 |
+
|
76 |
+
# Split text into chunks
|
77 |
+
chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
|
78 |
+
|
79 |
+
# Categories for evidence classification
|
80 |
+
evidence_categories = [
|
81 |
+
"factual statement with source",
|
82 |
+
"verifiable claim",
|
83 |
+
"expert opinion",
|
84 |
+
"data-backed claim",
|
85 |
+
"unsubstantiated claim",
|
86 |
+
"opinion statement"
|
87 |
+
]
|
88 |
+
|
89 |
+
chunk_scores = []
|
90 |
+
flagged_phrases = []
|
91 |
+
|
92 |
+
for chunk in chunks:
|
93 |
+
# Analyze each sentence in the chunk
|
94 |
+
sentences = sent_tokenize(chunk)
|
95 |
+
|
96 |
+
for sentence in sentences:
|
97 |
+
if len(sentence.strip()) > 10:
|
98 |
+
# Classify the type of evidence
|
99 |
+
result = self.classifier(
|
100 |
+
sentence.strip(),
|
101 |
+
evidence_categories,
|
102 |
+
multi_label=True
|
103 |
+
)
|
104 |
+
|
105 |
+
# Calculate evidence score for the sentence
|
106 |
+
evidence_scores = {
|
107 |
+
label: score
|
108 |
+
for label, score in zip(result['labels'], result['scores'])
|
109 |
+
}
|
110 |
+
|
111 |
+
# Strong evidence indicators
|
112 |
+
strong_evidence = sum([
|
113 |
+
evidence_scores.get("factual statement with source", 0),
|
114 |
+
evidence_scores.get("data-backed claim", 0),
|
115 |
+
evidence_scores.get("expert opinion", 0)
|
116 |
+
]) / 3 # Average the strong evidence scores
|
117 |
+
|
118 |
+
# Weak or no evidence indicators
|
119 |
+
weak_evidence = sum([
|
120 |
+
evidence_scores.get("unsubstantiated claim", 0),
|
121 |
+
evidence_scores.get("opinion statement", 0)
|
122 |
+
]) / 2 # Average the weak evidence scores
|
123 |
+
|
124 |
+
# Store scores for overall calculation
|
125 |
+
chunk_scores.append({
|
126 |
+
'strong_evidence': strong_evidence,
|
127 |
+
'weak_evidence': weak_evidence
|
128 |
+
})
|
129 |
+
|
130 |
+
# Flag high-quality evidence
|
131 |
+
if strong_evidence > 0.7 and not any(
|
132 |
+
marker in sentence.lower()
|
133 |
+
for marker in ['more on this story', 'click here', 'read more']
|
134 |
+
):
|
135 |
+
flagged_phrases.append({
|
136 |
+
'text': sentence.strip(),
|
137 |
+
'type': 'strong_evidence',
|
138 |
+
'score': strong_evidence
|
139 |
+
})
|
140 |
+
|
141 |
+
# Calculate overall evidence score
|
142 |
+
if chunk_scores:
|
143 |
+
avg_strong = np.mean([s['strong_evidence'] for s in chunk_scores])
|
144 |
+
avg_weak = np.mean([s['weak_evidence'] for s in chunk_scores])
|
145 |
+
|
146 |
+
# Evidence score formula:
|
147 |
+
# - Reward strong evidence (70% weight)
|
148 |
+
# - Penalize weak/unsubstantiated claims (30% weight)
|
149 |
+
# - Ensure score is between 0 and 100
|
150 |
+
evidence_score = min(100, (
|
151 |
+
(avg_strong * 0.7) +
|
152 |
+
((1 - avg_weak) * 0.3)
|
153 |
+
) * 100)
|
154 |
+
else:
|
155 |
+
evidence_score = 0
|
156 |
+
|
157 |
+
# Sort and select top evidence phrases
|
158 |
+
sorted_phrases = sorted(
|
159 |
+
flagged_phrases,
|
160 |
+
key=lambda x: x['score'],
|
161 |
+
reverse=True
|
162 |
+
)
|
163 |
+
# Filter out formatting text and duplicates
|
164 |
+
unique_phrases = []
|
165 |
+
seen = set()
|
166 |
+
for phrase in sorted_phrases:
|
167 |
+
clean_text = phrase['text'].strip()
|
168 |
+
if clean_text not in seen and not any(
|
169 |
+
marker in clean_text.lower()
|
170 |
+
for marker in ['more on this story', 'click here', 'read more']
|
171 |
+
):
|
172 |
+
unique_phrases.append(clean_text)
|
173 |
+
seen.add(clean_text)
|
174 |
+
if len(unique_phrases) >= 5:
|
175 |
+
break
|
176 |
+
|
177 |
+
return {
|
178 |
+
"evidence_based_score": round(evidence_score, 1),
|
179 |
+
"flagged_phrases": unique_phrases
|
180 |
+
}
|
181 |
+
|
182 |
+
except Exception as e:
|
183 |
+
logger.error(f"LLM analysis failed: {str(e)}")
|
184 |
+
return None
|
185 |
+
|
186 |
+
def _analyze_traditional(self, text: str) -> Dict[str, Any]:
|
187 |
+
"""Traditional evidence analysis as fallback."""
|
188 |
try:
|
189 |
text_lower = text.lower()
|
190 |
|
191 |
+
# Find citations and evidence
|
192 |
+
evidence_phrases = []
|
193 |
+
for marker in self.citation_markers:
|
194 |
+
index = text_lower.find(marker)
|
195 |
+
while index != -1:
|
196 |
+
# Get the sentence containing the marker
|
197 |
+
start = max(0, text_lower.rfind('.', 0, index) + 1)
|
198 |
+
end = text_lower.find('.', index)
|
199 |
+
if end == -1:
|
200 |
+
end = len(text_lower)
|
201 |
+
|
202 |
+
evidence_phrases.append(text[start:end].strip())
|
203 |
+
index = text_lower.find(marker, end)
|
204 |
+
|
205 |
+
# Count vague references
|
206 |
vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
|
207 |
|
208 |
+
# Calculate score
|
209 |
+
citation_count = len(evidence_phrases)
|
210 |
base_score = min(citation_count * 20, 100)
|
211 |
penalty = vague_count * 10
|
212 |
|
213 |
evidence_score = max(0, base_score - penalty)
|
214 |
|
215 |
return {
|
216 |
+
"evidence_based_score": evidence_score,
|
217 |
+
"flagged_phrases": list(set(evidence_phrases))[:5] # Limit to top 5 unique phrases
|
218 |
}
|
219 |
|
220 |
+
except Exception as e:
|
221 |
+
logger.error(f"Traditional analysis failed: {str(e)}")
|
222 |
+
return {
|
223 |
+
"evidence_based_score": 0,
|
224 |
+
"flagged_phrases": []
|
225 |
+
}
|
226 |
+
|
227 |
+
def analyze(self, text: str) -> Dict[str, Any]:
|
228 |
+
"""Analyze evidence using LLM with fallback to traditional method."""
|
229 |
+
try:
|
230 |
+
# Try LLM analysis if enabled and available
|
231 |
+
if self.use_ai and self.llm_available:
|
232 |
+
llm_result = self._analyze_with_llm(text)
|
233 |
+
if llm_result:
|
234 |
+
return llm_result
|
235 |
+
|
236 |
+
# Use traditional analysis
|
237 |
+
logger.info("Using traditional evidence analysis")
|
238 |
+
return self._analyze_traditional(text)
|
239 |
+
|
240 |
except Exception as e:
|
241 |
logger.error(f"Error in evidence analysis: {str(e)}")
|
242 |
return {
|
243 |
+
"evidence_based_score": 0,
|
244 |
+
"flagged_phrases": []
|
245 |
}
|
mediaunmasked/analyzers/headline_analyzer.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import logging
|
2 |
from typing import Dict, Any, List
|
3 |
-
from transformers import pipeline
|
4 |
-
from transformers import AutoTokenizer
|
5 |
import numpy as np
|
6 |
import nltk
|
7 |
from nltk.tokenize import sent_tokenize
|
@@ -9,12 +8,38 @@ from nltk.tokenize import sent_tokenize
|
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
class HeadlineAnalyzer:
|
12 |
-
def __init__(self):
|
13 |
-
"""
|
14 |
-
|
15 |
-
self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
|
16 |
-
self.max_length = 512
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def _split_content(self, headline: str, content: str) -> List[str]:
|
19 |
"""Split content into sections that fit within token limit."""
|
20 |
content_words = content.split()
|
@@ -23,7 +48,7 @@ class HeadlineAnalyzer:
|
|
23 |
|
24 |
# Account for headline and [SEP] token in the max length
|
25 |
headline_tokens = len(self.tokenizer.encode(headline))
|
26 |
-
sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2
|
27 |
max_content_tokens = self.max_length - headline_tokens - sep_tokens
|
28 |
|
29 |
# Process words into sections
|
@@ -33,7 +58,6 @@ class HeadlineAnalyzer:
|
|
33 |
# Check if current section is approaching token limit
|
34 |
current_text = " ".join(current_section)
|
35 |
if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
|
36 |
-
# Remove last word (it might make us go over limit)
|
37 |
current_section.pop()
|
38 |
sections.append(" ".join(current_section))
|
39 |
|
@@ -42,141 +66,226 @@ class HeadlineAnalyzer:
|
|
42 |
current_section = current_section[overlap_start:]
|
43 |
current_section.append(word)
|
44 |
|
45 |
-
# Add any remaining content
|
46 |
if current_section:
|
47 |
sections.append(" ".join(current_section))
|
48 |
|
49 |
-
logger.info(f"""Content Splitting:
|
50 |
-
- Original content length: {len(content_words)} words
|
51 |
-
- Split into {len(sections)} sections
|
52 |
-
- Headline uses {headline_tokens} tokens
|
53 |
-
- Available tokens per section: {max_content_tokens}
|
54 |
-
""")
|
55 |
return sections
|
56 |
|
57 |
-
def _analyze_section(self, headline: str, section: str) -> Dict[str,
|
58 |
-
"""Analyze a single section
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
logger.info(f"Scores: {scores}")
|
72 |
-
|
73 |
-
# Set the threshold for contradiction to anything higher than 0.1
|
74 |
-
if scores.get('CONTRADICTION', 0) > 0.1: # Threshold set to > 0.1
|
75 |
-
flagged_phrases.append(sentence)
|
76 |
-
|
77 |
-
# Adjust the headline_vs_content_score based on contradictions
|
78 |
-
contradiction_penalty = len(flagged_phrases) * 0.1 # Example penalty per contradiction
|
79 |
-
adjusted_score = max(0, scores.get('ENTAILMENT', 0) - contradiction_penalty)
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
def analyze(self, headline: str, content: str) -> Dict[str, Any]:
|
90 |
-
"""Analyze how well the headline matches the content
|
91 |
try:
|
92 |
logger.info("\n" + "="*50)
|
93 |
logger.info("HEADLINE ANALYSIS STARTED")
|
94 |
logger.info("="*50)
|
95 |
|
96 |
-
# Handle empty inputs
|
97 |
if not headline.strip() or not content.strip():
|
98 |
logger.warning("Empty headline or content provided")
|
99 |
return {
|
100 |
"headline_vs_content_score": 0,
|
101 |
-
"
|
102 |
-
"contradiction_score": 0,
|
103 |
-
"contradictory_phrases": []
|
104 |
}
|
105 |
|
106 |
-
#
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
Content Length Warning:
|
111 |
-
- Total tokens: {content_tokens}
|
112 |
-
- Max allowed: {self.max_length}
|
113 |
-
- Splitting into sections...
|
114 |
-
""")
|
115 |
sections = self._split_content(headline, content)
|
|
|
116 |
|
117 |
# Analyze each section
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
-
#
|
125 |
-
|
126 |
-
# Use mean entailment (overall support across sections)
|
127 |
-
# Use mean neutral (general neutral tone across sections)
|
128 |
-
entailment_score = np.mean([s.get('ENTAILMENT', 0) for s in section_scores])
|
129 |
-
contradiction_score = np.max([s.get('CONTRADICTION', 0) for s in section_scores])
|
130 |
-
neutral_score = np.mean([s.get('NEUTRAL', 0) for s in section_scores])
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
logger.info(f"Mean Neutral: {neutral_score:.3f}")
|
137 |
else:
|
138 |
-
#
|
139 |
-
|
140 |
-
|
141 |
-
contradiction_score = scores.get('CONTRADICTION', 0)
|
142 |
-
neutral_score = scores.get('NEUTRAL', 0)
|
143 |
-
|
144 |
-
# Compute final consistency score
|
145 |
-
final_score = (
|
146 |
-
(entailment_score * 0.6) + # Base score from entailment
|
147 |
-
(neutral_score * 0.3) + # Neutral is acceptable
|
148 |
-
((1 - contradiction_score) * 0.1) # Small penalty for contradiction
|
149 |
-
) * 100
|
150 |
-
|
151 |
-
# Log final results
|
152 |
-
logger.info("\nFinal Analysis Results:")
|
153 |
-
logger.info("-"*30)
|
154 |
-
logger.info(f"Headline: {headline}")
|
155 |
-
logger.info(f"Content Length: {content_tokens} tokens")
|
156 |
-
logger.info("\nFinal Scores:")
|
157 |
-
logger.info(f"{'Entailment:':<15} {entailment_score:.3f}")
|
158 |
-
logger.info(f"{'Neutral:':<15} {neutral_score:.3f}")
|
159 |
-
logger.info(f"{'Contradiction:':<15} {contradiction_score:.3f}")
|
160 |
-
logger.info(f"\nFinal Score: {final_score:.1f}%")
|
161 |
-
logger.info("="*50 + "\n")
|
162 |
-
|
163 |
-
return {
|
164 |
-
"headline_vs_content_score": round(final_score, 1),
|
165 |
-
"entailment_score": round(entailment_score, 2),
|
166 |
-
"contradiction_score": round(contradiction_score, 2),
|
167 |
-
"contradictory_phrases": scores.get('flagged_phrases', [])
|
168 |
-
}
|
169 |
|
170 |
except Exception as e:
|
171 |
-
logger.error("
|
172 |
-
logger.error("-"*30)
|
173 |
-
logger.error(f"Error Type: {type(e).__name__}")
|
174 |
-
logger.error(f"Error Message: {str(e)}")
|
175 |
-
logger.error("Stack Trace:", exc_info=True)
|
176 |
-
logger.error("="*50 + "\n")
|
177 |
return {
|
178 |
"headline_vs_content_score": 0,
|
179 |
-
"
|
180 |
-
"contradiction_score": 0,
|
181 |
-
"contradictory_phrases": []
|
182 |
}
|
|
|
1 |
import logging
|
2 |
from typing import Dict, Any, List
|
3 |
+
from transformers import pipeline, AutoTokenizer
|
|
|
4 |
import numpy as np
|
5 |
import nltk
|
6 |
from nltk.tokenize import sent_tokenize
|
|
|
8 |
logger = logging.getLogger(__name__)
|
9 |
|
10 |
class HeadlineAnalyzer:
|
11 |
+
def __init__(self, use_ai: bool = True):
|
12 |
+
"""
|
13 |
+
Initialize the analyzers for headline analysis.
|
|
|
|
|
14 |
|
15 |
+
Args:
|
16 |
+
use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
|
17 |
+
"""
|
18 |
+
self.use_ai = use_ai
|
19 |
+
self.llm_available = False
|
20 |
+
|
21 |
+
if use_ai:
|
22 |
+
try:
|
23 |
+
# NLI model for contradiction/entailment
|
24 |
+
self.nli_pipeline = pipeline("text-classification", model="roberta-large-mnli")
|
25 |
+
|
26 |
+
# Zero-shot classifier for clickbait and sensationalism
|
27 |
+
self.zero_shot = pipeline(
|
28 |
+
"zero-shot-classification",
|
29 |
+
model="facebook/bart-large-mnli",
|
30 |
+
device=-1
|
31 |
+
)
|
32 |
+
|
33 |
+
self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
|
34 |
+
self.max_length = 512
|
35 |
+
self.llm_available = True
|
36 |
+
logger.info("LLM pipelines initialized successfully for headline analysis")
|
37 |
+
except Exception as e:
|
38 |
+
logger.warning(f"Failed to initialize LLM pipelines: {str(e)}")
|
39 |
+
self.llm_available = False
|
40 |
+
else:
|
41 |
+
logger.info("Initializing headline analyzer in traditional mode")
|
42 |
+
|
43 |
def _split_content(self, headline: str, content: str) -> List[str]:
|
44 |
"""Split content into sections that fit within token limit."""
|
45 |
content_words = content.split()
|
|
|
48 |
|
49 |
# Account for headline and [SEP] token in the max length
|
50 |
headline_tokens = len(self.tokenizer.encode(headline))
|
51 |
+
sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2
|
52 |
max_content_tokens = self.max_length - headline_tokens - sep_tokens
|
53 |
|
54 |
# Process words into sections
|
|
|
58 |
# Check if current section is approaching token limit
|
59 |
current_text = " ".join(current_section)
|
60 |
if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
|
|
|
61 |
current_section.pop()
|
62 |
sections.append(" ".join(current_section))
|
63 |
|
|
|
66 |
current_section = current_section[overlap_start:]
|
67 |
current_section.append(word)
|
68 |
|
69 |
+
# Add any remaining content
|
70 |
if current_section:
|
71 |
sections.append(" ".join(current_section))
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
return sections
|
74 |
|
75 |
+
def _analyze_section(self, headline: str, section: str) -> Dict[str, Any]:
|
76 |
+
"""Analyze a single section for headline accuracy and sensationalism."""
|
77 |
+
try:
|
78 |
+
# Download NLTK data if needed
|
79 |
+
try:
|
80 |
+
nltk.data.find('tokenizers/punkt')
|
81 |
+
except LookupError:
|
82 |
+
nltk.download('punkt')
|
83 |
+
|
84 |
+
sentences = sent_tokenize(section)
|
85 |
+
|
86 |
+
# Analyze headline against content for contradiction/entailment
|
87 |
+
nli_scores = []
|
88 |
+
flagged_phrases = []
|
89 |
+
|
90 |
+
# Categories for sensationalism check
|
91 |
+
sensationalism_categories = [
|
92 |
+
"clickbait",
|
93 |
+
"sensationalized",
|
94 |
+
"misleading",
|
95 |
+
"factual reporting",
|
96 |
+
"accurate headline"
|
97 |
+
]
|
98 |
+
|
99 |
+
# Check headline for sensationalism
|
100 |
+
sensationalism_result = self.zero_shot(
|
101 |
+
headline,
|
102 |
+
sensationalism_categories,
|
103 |
+
multi_label=True
|
104 |
+
)
|
105 |
+
|
106 |
+
sensationalism_scores = {
|
107 |
+
label: score
|
108 |
+
for label, score in zip(sensationalism_result['labels'], sensationalism_result['scores'])
|
109 |
+
}
|
110 |
+
|
111 |
+
# Analyze each sentence for contradiction/support
|
112 |
+
for sentence in sentences:
|
113 |
+
if len(sentence.strip()) > 10:
|
114 |
+
# Check for contradiction/entailment
|
115 |
+
input_text = f"{headline} [SEP] {sentence}"
|
116 |
+
nli_result = self.nli_pipeline(input_text, top_k=None)
|
117 |
+
scores = {item['label']: item['score'] for item in nli_result}
|
118 |
+
nli_scores.append(scores)
|
119 |
+
|
120 |
+
# Flag contradictory or highly sensationalized content
|
121 |
+
if scores.get('CONTRADICTION', 0) > 0.4:
|
122 |
+
flagged_phrases.append({
|
123 |
+
'text': sentence.strip(),
|
124 |
+
'type': 'contradiction',
|
125 |
+
'score': scores['CONTRADICTION']
|
126 |
+
})
|
127 |
+
|
128 |
+
# Calculate aggregate scores
|
129 |
+
avg_scores = {
|
130 |
+
label: np.mean([score[label] for score in nli_scores])
|
131 |
+
for label in ['ENTAILMENT', 'CONTRADICTION', 'NEUTRAL']
|
132 |
+
}
|
133 |
+
|
134 |
+
# Calculate headline accuracy score
|
135 |
+
accuracy_components = {
|
136 |
+
'entailment': avg_scores['ENTAILMENT'] * 0.4,
|
137 |
+
'non_contradiction': (1 - avg_scores['CONTRADICTION']) * 0.3,
|
138 |
+
'non_sensational': (
|
139 |
+
sensationalism_scores.get('factual reporting', 0) +
|
140 |
+
sensationalism_scores.get('accurate headline', 0)
|
141 |
+
) * 0.15,
|
142 |
+
'non_clickbait': (
|
143 |
+
1 - sensationalism_scores.get('clickbait', 0) -
|
144 |
+
sensationalism_scores.get('sensationalized', 0)
|
145 |
+
) * 0.15
|
146 |
+
}
|
147 |
+
|
148 |
+
accuracy_score = sum(accuracy_components.values()) * 100
|
149 |
+
|
150 |
+
# Sort and limit flagged phrases
|
151 |
+
sorted_phrases = sorted(
|
152 |
+
flagged_phrases,
|
153 |
+
key=lambda x: x['score'],
|
154 |
+
reverse=True
|
155 |
+
)
|
156 |
+
top_phrases = [phrase['text'] for phrase in sorted_phrases[:5]]
|
157 |
+
|
158 |
+
return {
|
159 |
+
"accuracy_score": accuracy_score,
|
160 |
+
"flagged_phrases": top_phrases,
|
161 |
+
"detailed_scores": {
|
162 |
+
"nli": avg_scores,
|
163 |
+
"sensationalism": sensationalism_scores
|
164 |
+
}
|
165 |
+
}
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
logger.error(f"Section analysis failed: {str(e)}")
|
169 |
+
return {
|
170 |
+
"accuracy_score": 0,
|
171 |
+
"flagged_phrases": [],
|
172 |
+
"detailed_scores": {}
|
173 |
+
}
|
174 |
|
175 |
+
def _analyze_traditional(self, headline: str, content: str) -> Dict[str, Any]:
|
176 |
+
"""Traditional headline analysis method."""
|
177 |
+
try:
|
178 |
+
# Download NLTK data if needed
|
179 |
+
try:
|
180 |
+
nltk.data.find('tokenizers/punkt')
|
181 |
+
except LookupError:
|
182 |
+
nltk.download('punkt')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
+
# Basic metrics
|
185 |
+
headline_words = set(headline.lower().split())
|
186 |
+
content_words = set(content.lower().split())
|
187 |
+
|
188 |
+
# Calculate word overlap
|
189 |
+
overlap_words = headline_words.intersection(content_words)
|
190 |
+
overlap_score = len(overlap_words) / len(headline_words) if headline_words else 0
|
191 |
+
|
192 |
+
# Check for clickbait patterns
|
193 |
+
clickbait_patterns = [
|
194 |
+
"you won't believe",
|
195 |
+
"shocking",
|
196 |
+
"mind blowing",
|
197 |
+
"amazing",
|
198 |
+
"incredible",
|
199 |
+
"unbelievable",
|
200 |
+
"must see",
|
201 |
+
"click here",
|
202 |
+
"find out",
|
203 |
+
"what happens next"
|
204 |
+
]
|
205 |
+
|
206 |
+
clickbait_count = sum(1 for pattern in clickbait_patterns if pattern in headline.lower())
|
207 |
+
clickbait_penalty = clickbait_count * 10 # 10% penalty per clickbait phrase
|
208 |
+
|
209 |
+
# Calculate final score (0-100)
|
210 |
+
base_score = overlap_score * 100
|
211 |
+
final_score = max(0, min(100, base_score - clickbait_penalty))
|
212 |
+
|
213 |
+
# Find potentially misleading phrases
|
214 |
+
flagged_phrases = []
|
215 |
+
sentences = sent_tokenize(content)
|
216 |
+
|
217 |
+
for sentence in sentences:
|
218 |
+
# Flag sentences that directly contradict headline words
|
219 |
+
sentence_words = set(sentence.lower().split())
|
220 |
+
if len(headline_words.intersection(sentence_words)) > 2:
|
221 |
+
flagged_phrases.append(sentence.strip())
|
222 |
+
|
223 |
+
# Flag sentences with clickbait patterns
|
224 |
+
if any(pattern in sentence.lower() for pattern in clickbait_patterns):
|
225 |
+
flagged_phrases.append(sentence.strip())
|
226 |
+
|
227 |
+
return {
|
228 |
+
"headline_vs_content_score": round(final_score, 1),
|
229 |
+
"flagged_phrases": list(set(flagged_phrases))[:5] # Limit to top 5 unique phrases
|
230 |
+
}
|
231 |
|
232 |
+
except Exception as e:
|
233 |
+
logger.error(f"Traditional analysis failed: {str(e)}")
|
234 |
+
return {
|
235 |
+
"headline_vs_content_score": 0,
|
236 |
+
"flagged_phrases": []
|
237 |
+
}
|
238 |
|
239 |
def analyze(self, headline: str, content: str) -> Dict[str, Any]:
|
240 |
+
"""Analyze how well the headline matches the content."""
|
241 |
try:
|
242 |
logger.info("\n" + "="*50)
|
243 |
logger.info("HEADLINE ANALYSIS STARTED")
|
244 |
logger.info("="*50)
|
245 |
|
|
|
246 |
if not headline.strip() or not content.strip():
|
247 |
logger.warning("Empty headline or content provided")
|
248 |
return {
|
249 |
"headline_vs_content_score": 0,
|
250 |
+
"flagged_phrases": []
|
|
|
|
|
251 |
}
|
252 |
|
253 |
+
# Use LLM analysis if available and enabled
|
254 |
+
if self.use_ai and self.llm_available:
|
255 |
+
logger.info("Using LLM analysis for headline")
|
256 |
+
# Split content if needed
|
|
|
|
|
|
|
|
|
|
|
257 |
sections = self._split_content(headline, content)
|
258 |
+
section_results = []
|
259 |
|
260 |
# Analyze each section
|
261 |
+
for section in sections:
|
262 |
+
result = self._analyze_section(headline, section)
|
263 |
+
section_results.append(result)
|
264 |
+
|
265 |
+
# Aggregate results across sections
|
266 |
+
accuracy_scores = [r['accuracy_score'] for r in section_results]
|
267 |
+
final_score = np.mean(accuracy_scores)
|
268 |
+
|
269 |
+
# Combine flagged phrases from all sections
|
270 |
+
all_phrases = []
|
271 |
+
for result in section_results:
|
272 |
+
all_phrases.extend(result['flagged_phrases'])
|
273 |
|
274 |
+
# Remove duplicates and limit to top 5
|
275 |
+
unique_phrases = list(dict.fromkeys(all_phrases))[:5]
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
+
return {
|
278 |
+
"headline_vs_content_score": round(final_score, 1),
|
279 |
+
"flagged_phrases": unique_phrases
|
280 |
+
}
|
|
|
281 |
else:
|
282 |
+
# Use traditional analysis
|
283 |
+
logger.info("Using traditional headline analysis")
|
284 |
+
return self._analyze_traditional(headline, content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
|
286 |
except Exception as e:
|
287 |
+
logger.error(f"Headline analysis failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
288 |
return {
|
289 |
"headline_vs_content_score": 0,
|
290 |
+
"flagged_phrases": []
|
|
|
|
|
291 |
}
|
mediaunmasked/analyzers/scoring.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from typing import Dict, Any
|
2 |
import logging
|
3 |
|
4 |
from .headline_analyzer import HeadlineAnalyzer
|
@@ -8,17 +8,34 @@ from .evidence_analyzer import EvidenceAnalyzer
|
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
|
|
|
|
|
|
11 |
class MediaScorer:
|
12 |
-
def __init__(self):
|
13 |
-
"""
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
|
20 |
"""Calculate final media credibility score."""
|
21 |
try:
|
|
|
|
|
22 |
headline_analysis = self.headline_analyzer.analyze(headline, content)
|
23 |
sentiment_analysis = self.sentiment_analyzer.analyze(content)
|
24 |
bias_analysis = self.bias_analyzer.analyze(content)
|
@@ -74,6 +91,7 @@ class MediaScorer:
|
|
74 |
result = {
|
75 |
"media_unmasked_score": round(final_score, 1),
|
76 |
"rating": rating,
|
|
|
77 |
"details": {
|
78 |
"headline_analysis": {
|
79 |
"headline_vs_content_score": headline_analysis["headline_vs_content_score"],
|
@@ -107,6 +125,7 @@ class MediaScorer:
|
|
107 |
return {
|
108 |
"media_unmasked_score": 0,
|
109 |
"rating": "Error",
|
|
|
110 |
"details": {
|
111 |
"headline_analysis": {"headline_vs_content_score": 0, "flagged_phrases": []},
|
112 |
"sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},
|
|
|
1 |
+
from typing import Dict, Any, Literal
|
2 |
import logging
|
3 |
|
4 |
from .headline_analyzer import HeadlineAnalyzer
|
|
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
+
# Define analysis mode type
|
12 |
+
AnalysisMode = Literal['ai', 'traditional']
|
13 |
+
|
14 |
class MediaScorer:
|
15 |
+
def __init__(self, use_ai: bool = True):
|
16 |
+
"""
|
17 |
+
Initialize the MediaScorer with required analyzers.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
|
21 |
+
"""
|
22 |
+
self.use_ai = use_ai
|
23 |
+
self.analysis_mode: AnalysisMode = 'ai' if use_ai else 'traditional'
|
24 |
+
logger.info(f"Initializing MediaScorer with {self.analysis_mode} analysis")
|
25 |
+
|
26 |
+
# Initialize analyzers with analysis mode preference
|
27 |
+
self.headline_analyzer = HeadlineAnalyzer(use_ai=use_ai)
|
28 |
+
self.sentiment_analyzer = SentimentAnalyzer(use_ai=use_ai)
|
29 |
+
self.bias_analyzer = BiasAnalyzer(use_ai=use_ai)
|
30 |
+
self.evidence_analyzer = EvidenceAnalyzer(use_ai=use_ai)
|
31 |
+
|
32 |
+
logger.info(f"All analyzers initialized in {self.analysis_mode} mode")
|
33 |
|
34 |
def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
|
35 |
"""Calculate final media credibility score."""
|
36 |
try:
|
37 |
+
logger.info(f"Calculating media score using {self.analysis_mode} analysis")
|
38 |
+
|
39 |
headline_analysis = self.headline_analyzer.analyze(headline, content)
|
40 |
sentiment_analysis = self.sentiment_analyzer.analyze(content)
|
41 |
bias_analysis = self.bias_analyzer.analyze(content)
|
|
|
91 |
result = {
|
92 |
"media_unmasked_score": round(final_score, 1),
|
93 |
"rating": rating,
|
94 |
+
"analysis_mode": self.analysis_mode,
|
95 |
"details": {
|
96 |
"headline_analysis": {
|
97 |
"headline_vs_content_score": headline_analysis["headline_vs_content_score"],
|
|
|
125 |
return {
|
126 |
"media_unmasked_score": 0,
|
127 |
"rating": "Error",
|
128 |
+
"analysis_mode": self.analysis_mode,
|
129 |
"details": {
|
130 |
"headline_analysis": {"headline_vs_content_score": 0, "flagged_phrases": []},
|
131 |
"sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},
|
mediaunmasked/analyzers/sentiment_analyzer.py
CHANGED
@@ -1,11 +1,23 @@
|
|
1 |
import logging
|
2 |
from typing import Dict, Any, List
|
3 |
from textblob import TextBlob
|
|
|
|
|
4 |
|
5 |
logger = logging.getLogger(__name__)
|
6 |
|
7 |
class SentimentAnalyzer:
|
8 |
-
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
self.manipulative_patterns = [
|
10 |
"experts say",
|
11 |
"sources claim",
|
@@ -17,10 +29,267 @@ class SentimentAnalyzer:
|
|
17 |
"without doubt",
|
18 |
"certainly"
|
19 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
def analyze(self, text: str) -> Dict[str, Any]:
|
22 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
blob = TextBlob(text)
|
25 |
sentiment_score = blob.sentiment.polarity
|
26 |
|
@@ -34,13 +303,10 @@ class SentimentAnalyzer:
|
|
34 |
else:
|
35 |
sentiment = "Neutral"
|
36 |
|
37 |
-
if manipulation_score > 50:
|
38 |
-
sentiment = "Manipulative"
|
39 |
-
|
40 |
return {
|
41 |
"sentiment": sentiment,
|
42 |
"manipulation_score": min(manipulation_score, 100),
|
43 |
-
"flagged_phrases": manipulative_phrases
|
44 |
}
|
45 |
|
46 |
except Exception as e:
|
|
|
1 |
import logging
|
2 |
from typing import Dict, Any, List
|
3 |
from textblob import TextBlob
|
4 |
+
from transformers import pipeline
|
5 |
+
import numpy as np
|
6 |
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
9 |
class SentimentAnalyzer:
|
10 |
+
def __init__(self, use_ai: bool = True):
|
11 |
+
"""
|
12 |
+
Initialize sentiment analyzer with both traditional and LLM-based approaches.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
|
16 |
+
"""
|
17 |
+
self.use_ai = use_ai
|
18 |
+
self.llm_available = False
|
19 |
+
|
20 |
+
# Traditional manipulation patterns
|
21 |
self.manipulative_patterns = [
|
22 |
"experts say",
|
23 |
"sources claim",
|
|
|
29 |
"without doubt",
|
30 |
"certainly"
|
31 |
]
|
32 |
+
|
33 |
+
if use_ai:
|
34 |
+
try:
|
35 |
+
# Initialize LLM pipelines
|
36 |
+
self.sentiment_pipeline = pipeline(
|
37 |
+
"text-classification",
|
38 |
+
model="SamLowe/roberta-base-go_emotions",
|
39 |
+
top_k=None
|
40 |
+
)
|
41 |
+
self.toxicity_pipeline = pipeline(
|
42 |
+
"text-classification",
|
43 |
+
model="martin-ha/toxic-comment-model",
|
44 |
+
top_k=None
|
45 |
+
)
|
46 |
+
self.manipulation_pipeline = pipeline(
|
47 |
+
"zero-shot-classification",
|
48 |
+
model="facebook/bart-large-mnli",
|
49 |
+
device=-1
|
50 |
+
)
|
51 |
+
self.llm_available = True
|
52 |
+
logger.info("LLM pipelines initialized successfully")
|
53 |
+
except Exception as e:
|
54 |
+
logger.warning(f"Failed to initialize LLM pipelines: {str(e)}")
|
55 |
+
self.llm_available = False
|
56 |
+
else:
|
57 |
+
logger.info("Initializing sentiment analyzer in traditional mode")
|
58 |
+
|
59 |
+
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
60 |
+
"""Perform sentiment analysis using LLM models."""
|
61 |
+
try:
|
62 |
+
logger.info("Starting LLM sentiment analysis")
|
63 |
+
|
64 |
+
# Clean the text of formatting markers
|
65 |
+
cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
|
66 |
+
cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
|
67 |
+
if not line.startswith('[') and not line.startswith('More on'))
|
68 |
+
|
69 |
+
logger.info("Text cleaned and prepared for analysis")
|
70 |
+
|
71 |
+
# Split text into chunks of 512 tokens (approximate)
|
72 |
+
chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
|
73 |
+
logger.info(f"Text split into {len(chunks)} chunks for processing")
|
74 |
+
|
75 |
+
# Initialize aggregation variables
|
76 |
+
sentiment_scores = []
|
77 |
+
toxicity_scores = []
|
78 |
+
manipulation_scores = []
|
79 |
+
flagged_phrases = []
|
80 |
+
|
81 |
+
manipulation_categories = [
|
82 |
+
"emotional manipulation",
|
83 |
+
"fear mongering",
|
84 |
+
"propaganda",
|
85 |
+
"factual reporting",
|
86 |
+
"balanced perspective"
|
87 |
+
]
|
88 |
+
|
89 |
+
# Process each chunk
|
90 |
+
for i, chunk in enumerate(chunks, 1):
|
91 |
+
logger.info(f"Processing chunk {i}/{len(chunks)}")
|
92 |
+
|
93 |
+
try:
|
94 |
+
# Get emotion scores with detailed logging
|
95 |
+
logger.debug(f"Analyzing emotions for chunk {i}")
|
96 |
+
emotions = self.sentiment_pipeline(chunk)
|
97 |
+
logger.debug(f"Raw emotion response: {emotions}")
|
98 |
+
|
99 |
+
# Handle different response formats
|
100 |
+
if isinstance(emotions, list):
|
101 |
+
# Multiple results format
|
102 |
+
for emotion in emotions:
|
103 |
+
if isinstance(emotion, dict) and 'label' in emotion and 'score' in emotion:
|
104 |
+
sentiment_scores.append(emotion)
|
105 |
+
elif isinstance(emotions, dict) and 'label' in emotions and 'score' in emotions:
|
106 |
+
# Single result format
|
107 |
+
sentiment_scores.append(emotions)
|
108 |
+
logger.debug(f"Processed emotion scores: {sentiment_scores}")
|
109 |
+
|
110 |
+
# Get toxicity scores
|
111 |
+
logger.debug(f"Analyzing toxicity for chunk {i}")
|
112 |
+
toxicity = self.toxicity_pipeline(chunk)
|
113 |
+
if isinstance(toxicity, list):
|
114 |
+
toxicity_scores.extend(toxicity)
|
115 |
+
else:
|
116 |
+
toxicity_scores.append(toxicity)
|
117 |
+
logger.debug(f"Processed toxicity scores: {toxicity_scores}")
|
118 |
+
|
119 |
+
# Get manipulation scores
|
120 |
+
logger.debug(f"Analyzing manipulation for chunk {i}")
|
121 |
+
manipulation = self.manipulation_pipeline(
|
122 |
+
chunk,
|
123 |
+
manipulation_categories,
|
124 |
+
multi_label=True
|
125 |
+
)
|
126 |
+
|
127 |
+
if isinstance(manipulation, dict) and 'labels' in manipulation and 'scores' in manipulation:
|
128 |
+
manipulation_scores.append({
|
129 |
+
label: score
|
130 |
+
for label, score in zip(manipulation['labels'], manipulation['scores'])
|
131 |
+
})
|
132 |
+
logger.debug(f"Processed manipulation scores: {manipulation_scores}")
|
133 |
+
|
134 |
+
# Analyze sentences for manipulation
|
135 |
+
sentences = chunk.split('.')
|
136 |
+
for sentence in sentences:
|
137 |
+
if len(sentence.strip()) > 10:
|
138 |
+
sent_result = self.manipulation_pipeline(
|
139 |
+
sentence.strip(),
|
140 |
+
manipulation_categories,
|
141 |
+
multi_label=False
|
142 |
+
)
|
143 |
+
if (sent_result['labels'][0] in ["emotional manipulation", "fear mongering", "propaganda"]
|
144 |
+
and sent_result['scores'][0] > 0.7):
|
145 |
+
flagged_phrases.append({
|
146 |
+
'text': sentence.strip(),
|
147 |
+
'type': sent_result['labels'][0],
|
148 |
+
'score': sent_result['scores'][0]
|
149 |
+
})
|
150 |
+
|
151 |
+
except Exception as chunk_error:
|
152 |
+
logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
|
153 |
+
continue
|
154 |
+
|
155 |
+
logger.info("All chunks processed, aggregating scores")
|
156 |
+
|
157 |
+
# Aggregate scores with error handling
|
158 |
+
def aggregate_scores(scores_list, score_type: str):
|
159 |
+
try:
|
160 |
+
all_scores = {}
|
161 |
+
for scores in scores_list:
|
162 |
+
if isinstance(scores, dict):
|
163 |
+
if 'label' in scores and 'score' in scores:
|
164 |
+
label = scores['label']
|
165 |
+
score = scores['score']
|
166 |
+
else:
|
167 |
+
# Handle direct label-score mapping
|
168 |
+
for label, score in scores.items():
|
169 |
+
if label not in all_scores:
|
170 |
+
all_scores[label] = []
|
171 |
+
if isinstance(score, (int, float)):
|
172 |
+
all_scores[label].append(score)
|
173 |
+
continue
|
174 |
+
else:
|
175 |
+
logger.warning(f"Unexpected score format in {score_type}: {scores}")
|
176 |
+
continue
|
177 |
+
|
178 |
+
if isinstance(label, (str, bytes)):
|
179 |
+
if label not in all_scores:
|
180 |
+
all_scores[label] = []
|
181 |
+
if isinstance(score, (int, float)):
|
182 |
+
all_scores[label].append(score)
|
183 |
+
|
184 |
+
return {k: np.mean(v) for k, v in all_scores.items() if v}
|
185 |
+
except Exception as agg_error:
|
186 |
+
logger.error(f"Error aggregating {score_type} scores: {str(agg_error)}")
|
187 |
+
return {}
|
188 |
+
|
189 |
+
emotion_scores = aggregate_scores(sentiment_scores, "emotion")
|
190 |
+
toxicity_scores = aggregate_scores(toxicity_scores, "toxicity")
|
191 |
+
logger.debug(f"Aggregated emotion scores: {emotion_scores}")
|
192 |
+
logger.debug(f"Aggregated toxicity scores: {toxicity_scores}")
|
193 |
+
|
194 |
+
# Aggregate manipulation scores
|
195 |
+
manipulation_agg = {
|
196 |
+
category: np.mean([
|
197 |
+
scores.get(category, 0)
|
198 |
+
for scores in manipulation_scores
|
199 |
+
])
|
200 |
+
for category in manipulation_categories
|
201 |
+
}
|
202 |
+
logger.debug(f"Aggregated manipulation scores: {manipulation_agg}")
|
203 |
+
|
204 |
+
# Calculate manipulation score based on multiple factors
|
205 |
+
manipulation_indicators = {
|
206 |
+
'emotional manipulation': 0.4,
|
207 |
+
'fear mongering': 0.3,
|
208 |
+
'propaganda': 0.3,
|
209 |
+
'toxic': 0.2,
|
210 |
+
'severe_toxic': 0.3,
|
211 |
+
'threat': 0.2
|
212 |
+
}
|
213 |
+
|
214 |
+
# Combine toxicity and manipulation scores
|
215 |
+
combined_scores = {**toxicity_scores, **manipulation_agg}
|
216 |
+
manipulation_score = min(100, sum(
|
217 |
+
combined_scores.get(k, 0) * weight
|
218 |
+
for k, weight in manipulation_indicators.items()
|
219 |
+
) * 100)
|
220 |
+
|
221 |
+
logger.info(f"Final manipulation score: {manipulation_score}")
|
222 |
+
|
223 |
+
# Determine overall sentiment
|
224 |
+
positive_emotions = ['admiration', 'joy', 'amusement', 'approval']
|
225 |
+
negative_emotions = ['disgust', 'anger', 'disappointment', 'fear']
|
226 |
+
neutral_emotions = ['neutral', 'confusion', 'realization']
|
227 |
+
|
228 |
+
pos_score = sum(emotion_scores.get(emotion, 0) for emotion in positive_emotions)
|
229 |
+
neg_score = sum(emotion_scores.get(emotion, 0) for emotion in negative_emotions)
|
230 |
+
neu_score = sum(emotion_scores.get(emotion, 0) for emotion in neutral_emotions)
|
231 |
+
|
232 |
+
logger.debug(f"Sentiment scores - Positive: {pos_score}, Negative: {neg_score}, Neutral: {neu_score}")
|
233 |
+
|
234 |
+
# Determine sentiment based on highest score
|
235 |
+
max_score = max(pos_score, neg_score, neu_score)
|
236 |
+
if max_score == pos_score and pos_score > 0.3:
|
237 |
+
sentiment = "Positive"
|
238 |
+
elif max_score == neg_score and neg_score > 0.3:
|
239 |
+
sentiment = "Negative"
|
240 |
+
else:
|
241 |
+
sentiment = "Neutral"
|
242 |
+
|
243 |
+
logger.info(f"Final sentiment determination: {sentiment}")
|
244 |
+
|
245 |
+
# Sort and limit flagged phrases by manipulation score
|
246 |
+
sorted_phrases = sorted(flagged_phrases, key=lambda x: x['score'], reverse=True)
|
247 |
+
unique_phrases = []
|
248 |
+
seen = set()
|
249 |
+
for phrase in sorted_phrases:
|
250 |
+
clean_text = phrase['text'].strip()
|
251 |
+
if clean_text not in seen:
|
252 |
+
unique_phrases.append(clean_text)
|
253 |
+
seen.add(clean_text)
|
254 |
+
if len(unique_phrases) >= 5:
|
255 |
+
break
|
256 |
+
|
257 |
+
logger.info("LLM analysis completed successfully")
|
258 |
+
|
259 |
+
return {
|
260 |
+
"sentiment": sentiment,
|
261 |
+
"manipulation_score": manipulation_score,
|
262 |
+
"flagged_phrases": unique_phrases,
|
263 |
+
"detailed_scores": {
|
264 |
+
"emotions": emotion_scores,
|
265 |
+
"manipulation": manipulation_agg,
|
266 |
+
"toxicity": toxicity_scores
|
267 |
+
}
|
268 |
+
}
|
269 |
+
|
270 |
+
except Exception as e:
|
271 |
+
logger.error(f"LLM analysis failed: {str(e)}", exc_info=True)
|
272 |
+
return None
|
273 |
|
274 |
def analyze(self, text: str) -> Dict[str, Any]:
|
275 |
+
"""
|
276 |
+
Analyze sentiment using LLM with fallback to traditional methods.
|
277 |
+
|
278 |
+
Args:
|
279 |
+
text: The text to analyze
|
280 |
+
|
281 |
+
Returns:
|
282 |
+
Dict containing sentiment analysis results
|
283 |
+
"""
|
284 |
try:
|
285 |
+
# Try LLM analysis if enabled and available
|
286 |
+
if self.use_ai and self.llm_available:
|
287 |
+
llm_result = self._analyze_with_llm(text)
|
288 |
+
if llm_result:
|
289 |
+
return llm_result
|
290 |
+
|
291 |
+
# Use traditional analysis
|
292 |
+
logger.info("Using traditional sentiment analysis")
|
293 |
blob = TextBlob(text)
|
294 |
sentiment_score = blob.sentiment.polarity
|
295 |
|
|
|
303 |
else:
|
304 |
sentiment = "Neutral"
|
305 |
|
|
|
|
|
|
|
306 |
return {
|
307 |
"sentiment": sentiment,
|
308 |
"manipulation_score": min(manipulation_score, 100),
|
309 |
+
"flagged_phrases": manipulative_phrases[:5] # Limit to top 5 phrases
|
310 |
}
|
311 |
|
312 |
except Exception as e:
|
package-lock.json
CHANGED
@@ -6,6 +6,9 @@
|
|
6 |
"": {
|
7 |
"dependencies": {
|
8 |
"supabase": "^2.12.1"
|
|
|
|
|
|
|
9 |
}
|
10 |
},
|
11 |
"node_modules/@isaacs/cliui": {
|
@@ -47,6 +50,16 @@
|
|
47 |
"node": ">=14"
|
48 |
}
|
49 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
"node_modules/agent-base": {
|
51 |
"version": "7.1.3",
|
52 |
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz",
|
@@ -161,6 +174,13 @@
|
|
161 |
"node": ">= 8"
|
162 |
}
|
163 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
"node_modules/data-uri-to-buffer": {
|
165 |
"version": "4.0.1",
|
166 |
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
|
|
|
6 |
"": {
|
7 |
"dependencies": {
|
8 |
"supabase": "^2.12.1"
|
9 |
+
},
|
10 |
+
"devDependencies": {
|
11 |
+
"@types/react": "^19.0.10"
|
12 |
}
|
13 |
},
|
14 |
"node_modules/@isaacs/cliui": {
|
|
|
50 |
"node": ">=14"
|
51 |
}
|
52 |
},
|
53 |
+
"node_modules/@types/react": {
|
54 |
+
"version": "19.0.10",
|
55 |
+
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.0.10.tgz",
|
56 |
+
"integrity": "sha512-JuRQ9KXLEjaUNjTWpzuR231Z2WpIwczOkBEIvbHNCzQefFIT0L8IqE6NV6ULLyC1SI/i234JnDoMkfg+RjQj2g==",
|
57 |
+
"dev": true,
|
58 |
+
"license": "MIT",
|
59 |
+
"dependencies": {
|
60 |
+
"csstype": "^3.0.2"
|
61 |
+
}
|
62 |
+
},
|
63 |
"node_modules/agent-base": {
|
64 |
"version": "7.1.3",
|
65 |
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz",
|
|
|
174 |
"node": ">= 8"
|
175 |
}
|
176 |
},
|
177 |
+
"node_modules/csstype": {
|
178 |
+
"version": "3.1.3",
|
179 |
+
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
|
180 |
+
"integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
|
181 |
+
"dev": true,
|
182 |
+
"license": "MIT"
|
183 |
+
},
|
184 |
"node_modules/data-uri-to-buffer": {
|
185 |
"version": "4.0.1",
|
186 |
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
|
package.json
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
{
|
2 |
"dependencies": {
|
3 |
"supabase": "^2.12.1"
|
|
|
|
|
|
|
4 |
}
|
5 |
}
|
|
|
1 |
{
|
2 |
"dependencies": {
|
3 |
"supabase": "^2.12.1"
|
4 |
+
},
|
5 |
+
"devDependencies": {
|
6 |
+
"@types/react": "^19.0.10"
|
7 |
}
|
8 |
}
|
tests/test_LLM_comparisons.py
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline, AutoTokenizer
|
2 |
+
import unittest
|
3 |
+
from mediaunmasked.scrapers.article_scraper import ArticleScraper
|
4 |
+
from tabulate import tabulate
|
5 |
+
import torch
|
6 |
+
from typing import List
|
7 |
+
import logging
|
8 |
+
import transformers
|
9 |
+
|
10 |
+
logging.basicConfig(level=logging.INFO)
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
class MediaUnmaskLLMTester(unittest.TestCase):
|
14 |
+
transformers.logging.set_verbosity_error()
|
15 |
+
def setUp(self):
|
16 |
+
"""Set up LLMs and scrape article."""
|
17 |
+
self.models = {
|
18 |
+
# Upgraded Evidence-Based Models
|
19 |
+
"RoBERTa-MNLI": {"model": "roberta-large-mnli", "max_length": 512}, # Corrected to standard MNLI model
|
20 |
+
"DeBERTa-Fact": {"model": "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli", "max_length": 512},
|
21 |
+
"T5-Large": {"model": "google/t5-v1_1-large", "max_length": 512},
|
22 |
+
"SciBERT": {"model": "allenai/scibert_scivocab_uncased", "max_length": 512},
|
23 |
+
"BART-FEVER": {"model": "facebook/bart-large", "max_length": 1024}, # Note: Needs FEVER fine-tuning
|
24 |
+
"MultiQA-MiniLM": {"model": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", "max_length": 512},
|
25 |
+
|
26 |
+
# Existing Models for Benchmarking
|
27 |
+
"BART-MNLI": {"model": "facebook/bart-large-mnli", "max_length": 1024},
|
28 |
+
"RoBERTa-Bias": {"model": "cardiffnlp/twitter-roberta-base-hate", "max_length": 512},
|
29 |
+
"DistilBERT-Sentiment": {"model": "distilbert-base-uncased-finetuned-sst-2-english", "max_length": 512},
|
30 |
+
"GPT2-Generation": {"model": "gpt2", "max_length": 1024},
|
31 |
+
}
|
32 |
+
|
33 |
+
self.device = 0 if torch.cuda.is_available() else -1
|
34 |
+
self.scraper = ArticleScraper()
|
35 |
+
self.article_url = "https://www.snopes.com/fact-check/trump-super-bowl-cost-taxpayers/"
|
36 |
+
self.article_data = self.scraper.scrape_article(self.article_url) or {}
|
37 |
+
|
38 |
+
self.results = {
|
39 |
+
"headline": self.article_data.get("headline", "No headline"),
|
40 |
+
"content": self.article_data.get("content", "No content available"),
|
41 |
+
"scores": {}
|
42 |
+
}
|
43 |
+
|
44 |
+
self.tokenizers = {name: AutoTokenizer.from_pretrained(model["model"]) for name, model in self.models.items()}
|
45 |
+
|
46 |
+
def _split_content(self, model_name: str, content: str) -> List[str]:
|
47 |
+
"""Split content into sections within model token limits, ensuring valid output."""
|
48 |
+
tokenizer = self.tokenizers[model_name]
|
49 |
+
max_length = self.models[model_name]["max_length"]
|
50 |
+
|
51 |
+
if not content or not content.strip():
|
52 |
+
return ["No valid content"]
|
53 |
+
|
54 |
+
encoded = tokenizer.encode_plus(content, add_special_tokens=True, truncation=True, max_length=max_length)
|
55 |
+
decoded = tokenizer.decode(encoded["input_ids"], skip_special_tokens=True)
|
56 |
+
|
57 |
+
return [decoded] if decoded.strip() else ["No valid content"]
|
58 |
+
|
59 |
+
def _get_flagged_phrases(self, model_pipeline, sections, threshold=0.6, top_k=5):
|
60 |
+
"""Extract top-scoring flagged phrases while handling None values safely."""
|
61 |
+
if not sections or not isinstance(sections, list):
|
62 |
+
return [("None", "N/A")]
|
63 |
+
|
64 |
+
flagged_phrases = []
|
65 |
+
|
66 |
+
for section in sections:
|
67 |
+
if not section or not isinstance(section, str) or not section.strip(): # Ensure section is a valid string
|
68 |
+
continue
|
69 |
+
|
70 |
+
sentences = [s.strip() for s in section.split(". ") if s.strip()]
|
71 |
+
for sentence in sentences:
|
72 |
+
if not sentence or not isinstance(sentence, str): # Double-check before running the model
|
73 |
+
continue
|
74 |
+
|
75 |
+
try:
|
76 |
+
preds = model_pipeline(sentence)
|
77 |
+
if preds and isinstance(preds, list):
|
78 |
+
top_pred = max(preds, key=lambda x: x["score"])
|
79 |
+
if top_pred["score"] >= threshold:
|
80 |
+
short_phrase = " ".join(sentence.split()[:10]) # Shorten for readability
|
81 |
+
flagged_phrases.append((short_phrase, top_pred["score"], top_pred["label"]))
|
82 |
+
except Exception as e:
|
83 |
+
logger.error(f"Error analyzing sentence: {e}")
|
84 |
+
continue
|
85 |
+
|
86 |
+
flagged_phrases.sort(key=lambda x: x[1], reverse=True)
|
87 |
+
return [(phrase, label) for phrase, _, label in flagged_phrases[:top_k]] or [("None", "N/A")]
|
88 |
+
|
89 |
+
def test_headline_vs_content(self):
|
90 |
+
"""Check headline-content alignment."""
|
91 |
+
headline = self.results["headline"]
|
92 |
+
content = self.results["content"]
|
93 |
+
|
94 |
+
for model_name in self.models:
|
95 |
+
with self.subTest(model=model_name):
|
96 |
+
analyzer = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
|
97 |
+
sections = self._split_content(model_name, content)
|
98 |
+
|
99 |
+
headline_score = max(analyzer(headline), key=lambda x: x["score"])["score"]
|
100 |
+
content_scores = [max(analyzer(section), key=lambda x: x["score"])["score"] for section in sections]
|
101 |
+
avg_content_score = sum(content_scores) / len(content_scores)
|
102 |
+
consistency_score = abs(headline_score - avg_content_score)
|
103 |
+
|
104 |
+
flagged_phrases = self._get_flagged_phrases(analyzer, sections)
|
105 |
+
self.results["scores"].setdefault("headline_vs_content", {})[model_name] = {
|
106 |
+
"score": consistency_score,
|
107 |
+
"flagged_phrases": flagged_phrases
|
108 |
+
}
|
109 |
+
self.assertIsNotNone(consistency_score)
|
110 |
+
|
111 |
+
def test_evidence_based(self):
|
112 |
+
"""Test evidence-based content."""
|
113 |
+
content = self.results["content"]
|
114 |
+
|
115 |
+
for model_name in self.models:
|
116 |
+
if any(keyword in model_name.lower() for keyword in ["mnli", "fact", "fever", "qa"]):
|
117 |
+
with self.subTest(model=model_name):
|
118 |
+
classifier = pipeline("zero-shot-classification", model=self.models[model_name]["model"], device=self.device)
|
119 |
+
sections = self._split_content(model_name, content)
|
120 |
+
|
121 |
+
results = [classifier(section, candidate_labels=["evidence-based", "opinion", "misleading"]) for section in sections]
|
122 |
+
avg_score = sum(r["scores"][r["labels"].index("evidence-based")] for r in results) / len(results)
|
123 |
+
|
124 |
+
flagged_phrases = self._get_flagged_phrases(classifier, sections)
|
125 |
+
self.results["scores"].setdefault("evidence_based", {})[model_name] = {
|
126 |
+
"score": avg_score,
|
127 |
+
"flagged_phrases": flagged_phrases
|
128 |
+
}
|
129 |
+
self.assertIsNotNone(avg_score)
|
130 |
+
|
131 |
+
def test_manipulative_language(self):
|
132 |
+
"""Detect manipulative language."""
|
133 |
+
content = self.results["content"]
|
134 |
+
|
135 |
+
for model_name in self.models:
|
136 |
+
if "sentiment" in model_name.lower() or "emotion" in model_name.lower() or "gpt" in model_name.lower():
|
137 |
+
with self.subTest(model=model_name):
|
138 |
+
detector = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
|
139 |
+
sections = self._split_content(model_name, content)
|
140 |
+
|
141 |
+
results = [max(detector(section), key=lambda x: x["score"]) for section in sections]
|
142 |
+
avg_score = sum(r["score"] for r in results) / len(results)
|
143 |
+
|
144 |
+
flagged_phrases = self._get_flagged_phrases(detector, sections)
|
145 |
+
self.results["scores"].setdefault("manipulative_language", {})[model_name] = {
|
146 |
+
"score": avg_score,
|
147 |
+
"flagged_phrases": flagged_phrases
|
148 |
+
}
|
149 |
+
self.assertIsNotNone(avg_score)
|
150 |
+
|
151 |
+
def test_bias_detection(self):
|
152 |
+
"""Detect bias."""
|
153 |
+
content = self.results["content"]
|
154 |
+
|
155 |
+
for model_name in self.models:
|
156 |
+
if "bias" in model_name.lower() or "toxic" in model_name.lower() or "roberta" in model_name.lower():
|
157 |
+
with self.subTest(model=model_name):
|
158 |
+
detector = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
|
159 |
+
sections = self._split_content(model_name, content)
|
160 |
+
|
161 |
+
results = [max(detector(section), key=lambda x: x["score"]) for section in sections]
|
162 |
+
avg_score = sum(r["score"] for r in results) / len(results)
|
163 |
+
|
164 |
+
flagged_phrases = self._get_flagged_phrases(detector, sections)
|
165 |
+
self.results["scores"].setdefault("bias_detection", {})[model_name] = {
|
166 |
+
"score": avg_score,
|
167 |
+
"flagged_phrases": flagged_phrases
|
168 |
+
}
|
169 |
+
self.assertIsNotNone(avg_score)
|
170 |
+
|
171 |
+
def tearDown(self):
|
172 |
+
"""Print top 2 models per test with clearer formatting."""
|
173 |
+
print("\n=== Top Model Recommendations ===")
|
174 |
+
|
175 |
+
for test_type, model_results in self.results["scores"].items():
|
176 |
+
print(f"\nTop 2 Models for {test_type}:")
|
177 |
+
|
178 |
+
sorted_results = sorted(
|
179 |
+
model_results.items(),
|
180 |
+
key=lambda x: x[1]["score"],
|
181 |
+
reverse=(test_type != "headline_vs_content")
|
182 |
+
)
|
183 |
+
|
184 |
+
top_2 = sorted_results[:2]
|
185 |
+
table = [
|
186 |
+
[
|
187 |
+
model,
|
188 |
+
f"{res['score']:.6f}",
|
189 |
+
", ".join(f"{phrase} ({label})" for phrase, label in res["flagged_phrases"])
|
190 |
+
]
|
191 |
+
for model, res in top_2
|
192 |
+
]
|
193 |
+
|
194 |
+
print(tabulate(table, headers=["Model", "Score", "Flagged Phrases"], tablefmt="grid"))
|
195 |
+
criteria = "Lowest consistency score (better alignment)" if test_type == "headline_vs_content" else "Highest detection score"
|
196 |
+
print(f"Criteria: {criteria}")
|
197 |
+
|
198 |
+
if __name__ == "__main__":
|
199 |
+
unittest.main()
|