Spaces:
Running
Running
updating logging
Browse files
mediaunmasked/analyzers/bias_analyzer.py
CHANGED
@@ -140,27 +140,42 @@ class BiasAnalyzer:
|
|
140 |
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
141 |
"""Analyze bias using LLM zero-shot classification with batch processing."""
|
142 |
try:
|
|
|
|
|
|
|
|
|
143 |
# Define bias categories
|
144 |
bias_categories = [
|
145 |
"left-wing bias",
|
146 |
"right-wing bias",
|
147 |
"neutral/balanced perspective"
|
148 |
]
|
|
|
|
|
|
|
149 |
|
150 |
# Clean and prepare text
|
|
|
151 |
cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
|
152 |
cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
|
153 |
if not line.startswith('[') and not line.startswith('More on'))
|
|
|
154 |
|
155 |
# Split into larger chunks (4000 chars) for fewer API calls
|
156 |
chunks = [cleaned_text[i:i+4000] for i in range(0, len(cleaned_text), 4000)]
|
|
|
157 |
|
158 |
# Process chunks in batches
|
159 |
chunk_scores = []
|
160 |
flagged_phrases = []
|
161 |
|
162 |
-
for chunk in chunks:
|
|
|
|
|
|
|
|
|
163 |
# Analyze chunk as a whole first
|
|
|
164 |
chunk_result = self.classifier(
|
165 |
chunk,
|
166 |
bias_categories,
|
@@ -172,16 +187,24 @@ class BiasAnalyzer:
|
|
172 |
for label, score in zip(chunk_result['labels'], chunk_result['scores'])
|
173 |
})
|
174 |
|
|
|
|
|
|
|
|
|
175 |
# Only analyze individual sentences if chunk shows strong bias
|
176 |
max_chunk_score = max(chunk_result['scores'])
|
177 |
if max_chunk_score > 0.6:
|
|
|
178 |
sentences = sent_tokenize(chunk)
|
|
|
|
|
179 |
# Filter sentences for analysis (longer, potentially more meaningful ones)
|
180 |
relevant_sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
|
|
|
181 |
|
182 |
# Process sentences in batches of 8
|
183 |
-
for
|
184 |
-
batch = relevant_sentences[
|
185 |
try:
|
186 |
batch_results = self.classifier(
|
187 |
batch,
|
@@ -196,6 +219,8 @@ class BiasAnalyzer:
|
|
196 |
for sentence, result in zip(batch, batch_results):
|
197 |
max_score = max(result['scores'])
|
198 |
if max_score > 0.8 and result['labels'][0] != "neutral/balanced perspective":
|
|
|
|
|
199 |
flagged_phrases.append({
|
200 |
"text": sentence,
|
201 |
"type": result['labels'][0],
|
@@ -208,6 +233,7 @@ class BiasAnalyzer:
|
|
208 |
continue
|
209 |
|
210 |
# Aggregate scores across chunks
|
|
|
211 |
aggregated_scores = {
|
212 |
category: np.mean([
|
213 |
scores[category]
|
@@ -216,6 +242,10 @@ class BiasAnalyzer:
|
|
216 |
for category in bias_categories
|
217 |
}
|
218 |
|
|
|
|
|
|
|
|
|
219 |
# Calculate bias metrics
|
220 |
left_score = aggregated_scores["left-wing bias"]
|
221 |
right_score = aggregated_scores["right-wing bias"]
|
@@ -223,6 +253,7 @@ class BiasAnalyzer:
|
|
223 |
|
224 |
# Calculate bias score (-1 to 1)
|
225 |
bias_score = (right_score - left_score) / max(right_score + left_score, 0.0001)
|
|
|
226 |
|
227 |
# Determine bias label
|
228 |
if bias_score < -0.6:
|
@@ -240,8 +271,11 @@ class BiasAnalyzer:
|
|
240 |
else:
|
241 |
bias = "Neutral"
|
242 |
|
|
|
|
|
243 |
# Calculate bias percentage (0-100)
|
244 |
bias_percentage = min(100, abs(bias_score * 100))
|
|
|
245 |
|
246 |
# Sort and limit flagged phrases
|
247 |
sorted_phrases = sorted(flagged_phrases, key=lambda x: x['score'], reverse=True)
|
@@ -255,6 +289,10 @@ class BiasAnalyzer:
|
|
255 |
if len(unique_phrases) >= 5:
|
256 |
break
|
257 |
|
|
|
|
|
|
|
|
|
258 |
return {
|
259 |
"bias": bias,
|
260 |
"bias_score": round(bias_score, 2),
|
|
|
140 |
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
141 |
"""Analyze bias using LLM zero-shot classification with batch processing."""
|
142 |
try:
|
143 |
+
logger.info("\n" + "="*50)
|
144 |
+
logger.info("BIAS ANALYSIS STARTED")
|
145 |
+
logger.info("="*50)
|
146 |
+
|
147 |
# Define bias categories
|
148 |
bias_categories = [
|
149 |
"left-wing bias",
|
150 |
"right-wing bias",
|
151 |
"neutral/balanced perspective"
|
152 |
]
|
153 |
+
logger.info("Using categories for analysis:")
|
154 |
+
for cat in bias_categories:
|
155 |
+
logger.info(f" - {cat}")
|
156 |
|
157 |
# Clean and prepare text
|
158 |
+
logger.info("\nCleaning and preparing text...")
|
159 |
cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
|
160 |
cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
|
161 |
if not line.startswith('[') and not line.startswith('More on'))
|
162 |
+
logger.info(f"Text prepared - Length: {len(cleaned_text)} characters")
|
163 |
|
164 |
# Split into larger chunks (4000 chars) for fewer API calls
|
165 |
chunks = [cleaned_text[i:i+4000] for i in range(0, len(cleaned_text), 4000)]
|
166 |
+
logger.info(f"Split text into {len(chunks)} chunks for processing")
|
167 |
|
168 |
# Process chunks in batches
|
169 |
chunk_scores = []
|
170 |
flagged_phrases = []
|
171 |
|
172 |
+
for i, chunk in enumerate(chunks, 1):
|
173 |
+
logger.info(f"\n{'-'*30}")
|
174 |
+
logger.info(f"Processing chunk {i}/{len(chunks)}")
|
175 |
+
logger.info(f"Chunk length: {len(chunk)} characters")
|
176 |
+
|
177 |
# Analyze chunk as a whole first
|
178 |
+
logger.info("Analyzing chunk for overall bias...")
|
179 |
chunk_result = self.classifier(
|
180 |
chunk,
|
181 |
bias_categories,
|
|
|
187 |
for label, score in zip(chunk_result['labels'], chunk_result['scores'])
|
188 |
})
|
189 |
|
190 |
+
logger.info("Chunk bias scores:")
|
191 |
+
for label, score in chunk_scores[-1].items():
|
192 |
+
logger.info(f" - {label}: {score:.3f}")
|
193 |
+
|
194 |
# Only analyze individual sentences if chunk shows strong bias
|
195 |
max_chunk_score = max(chunk_result['scores'])
|
196 |
if max_chunk_score > 0.6:
|
197 |
+
logger.info(f"Strong bias detected (score: {max_chunk_score:.3f}), analyzing individual sentences...")
|
198 |
sentences = sent_tokenize(chunk)
|
199 |
+
logger.info(f"Found {len(sentences)} sentences to analyze")
|
200 |
+
|
201 |
# Filter sentences for analysis (longer, potentially more meaningful ones)
|
202 |
relevant_sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
|
203 |
+
logger.info(f"Filtered to {len(relevant_sentences)} relevant sentences")
|
204 |
|
205 |
# Process sentences in batches of 8
|
206 |
+
for j in range(0, len(relevant_sentences), 8):
|
207 |
+
batch = relevant_sentences[j:j+8]
|
208 |
try:
|
209 |
batch_results = self.classifier(
|
210 |
batch,
|
|
|
219 |
for sentence, result in zip(batch, batch_results):
|
220 |
max_score = max(result['scores'])
|
221 |
if max_score > 0.8 and result['labels'][0] != "neutral/balanced perspective":
|
222 |
+
logger.info(f"Found biased sentence (score: {max_score:.3f}, type: {result['labels'][0]}):")
|
223 |
+
logger.info(f" \"{sentence}\"")
|
224 |
flagged_phrases.append({
|
225 |
"text": sentence,
|
226 |
"type": result['labels'][0],
|
|
|
233 |
continue
|
234 |
|
235 |
# Aggregate scores across chunks
|
236 |
+
logger.info("\nAggregating scores across all chunks...")
|
237 |
aggregated_scores = {
|
238 |
category: np.mean([
|
239 |
scores[category]
|
|
|
242 |
for category in bias_categories
|
243 |
}
|
244 |
|
245 |
+
logger.info("\nFinal aggregated scores:")
|
246 |
+
for category, score in aggregated_scores.items():
|
247 |
+
logger.info(f" - {category}: {score:.3f}")
|
248 |
+
|
249 |
# Calculate bias metrics
|
250 |
left_score = aggregated_scores["left-wing bias"]
|
251 |
right_score = aggregated_scores["right-wing bias"]
|
|
|
253 |
|
254 |
# Calculate bias score (-1 to 1)
|
255 |
bias_score = (right_score - left_score) / max(right_score + left_score, 0.0001)
|
256 |
+
logger.info(f"\nRaw bias score: {bias_score:.3f}")
|
257 |
|
258 |
# Determine bias label
|
259 |
if bias_score < -0.6:
|
|
|
271 |
else:
|
272 |
bias = "Neutral"
|
273 |
|
274 |
+
logger.info(f"Determined bias label: {bias}")
|
275 |
+
|
276 |
# Calculate bias percentage (0-100)
|
277 |
bias_percentage = min(100, abs(bias_score * 100))
|
278 |
+
logger.info(f"Bias percentage: {bias_percentage:.1f}%")
|
279 |
|
280 |
# Sort and limit flagged phrases
|
281 |
sorted_phrases = sorted(flagged_phrases, key=lambda x: x['score'], reverse=True)
|
|
|
289 |
if len(unique_phrases) >= 5:
|
290 |
break
|
291 |
|
292 |
+
logger.info(f"\nFlagged {len(unique_phrases)} unique biased phrases")
|
293 |
+
|
294 |
+
logger.info("\nBias analysis completed successfully")
|
295 |
+
|
296 |
return {
|
297 |
"bias": bias,
|
298 |
"bias_score": round(bias_score, 2),
|
mediaunmasked/analyzers/evidence_analyzer.py
CHANGED
@@ -71,19 +71,27 @@ class EvidenceAnalyzer:
|
|
71 |
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
72 |
"""Analyze evidence using LLM."""
|
73 |
try:
|
|
|
|
|
|
|
|
|
74 |
# Clean the text of formatting markers
|
|
|
75 |
cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
|
76 |
cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
|
77 |
if not line.startswith('[') and not line.startswith('More on'))
|
|
|
78 |
|
79 |
# Download NLTK data if needed
|
80 |
try:
|
81 |
nltk.data.find('tokenizers/punkt')
|
82 |
except LookupError:
|
|
|
83 |
nltk.download('punkt')
|
84 |
|
85 |
# Split text into chunks
|
86 |
chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
|
|
|
87 |
|
88 |
# Categories for evidence classification
|
89 |
evidence_categories = [
|
@@ -95,15 +103,28 @@ class EvidenceAnalyzer:
|
|
95 |
"opinion statement"
|
96 |
]
|
97 |
|
|
|
|
|
|
|
|
|
98 |
chunk_scores = []
|
99 |
flagged_phrases = []
|
100 |
|
101 |
-
for chunk in chunks:
|
|
|
|
|
|
|
|
|
102 |
# Analyze each sentence in the chunk
|
103 |
sentences = sent_tokenize(chunk)
|
|
|
|
|
|
|
|
|
104 |
|
105 |
for sentence in sentences:
|
106 |
if len(sentence.strip()) > 10:
|
|
|
107 |
# Classify the type of evidence
|
108 |
result = self.classifier(
|
109 |
sentence.strip(),
|
@@ -141,17 +162,28 @@ class EvidenceAnalyzer:
|
|
141 |
marker in sentence.lower()
|
142 |
for marker in ['more on this story', 'click here', 'read more']
|
143 |
):
|
|
|
|
|
|
|
144 |
flagged_phrases.append({
|
145 |
'text': sentence.strip(),
|
146 |
'type': 'strong_evidence',
|
147 |
'score': strong_evidence
|
148 |
})
|
|
|
|
|
|
|
149 |
|
150 |
# Calculate overall evidence score
|
|
|
151 |
if chunk_scores:
|
152 |
avg_strong = np.mean([s['strong_evidence'] for s in chunk_scores])
|
153 |
avg_weak = np.mean([s['weak_evidence'] for s in chunk_scores])
|
154 |
|
|
|
|
|
|
|
|
|
155 |
# Evidence score formula:
|
156 |
# - Reward strong evidence (70% weight)
|
157 |
# - Penalize weak/unsubstantiated claims (30% weight)
|
@@ -162,6 +194,9 @@ class EvidenceAnalyzer:
|
|
162 |
) * 100)
|
163 |
else:
|
164 |
evidence_score = 0
|
|
|
|
|
|
|
165 |
|
166 |
# Sort and select top evidence phrases
|
167 |
sorted_phrases = sorted(
|
@@ -169,6 +204,7 @@ class EvidenceAnalyzer:
|
|
169 |
key=lambda x: x['score'],
|
170 |
reverse=True
|
171 |
)
|
|
|
172 |
# Filter out formatting text and duplicates
|
173 |
unique_phrases = []
|
174 |
seen = set()
|
@@ -183,6 +219,10 @@ class EvidenceAnalyzer:
|
|
183 |
if len(unique_phrases) >= 5:
|
184 |
break
|
185 |
|
|
|
|
|
|
|
|
|
186 |
return {
|
187 |
"evidence_based_score": round(evidence_score, 1),
|
188 |
"flagged_phrases": unique_phrases
|
|
|
71 |
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
72 |
"""Analyze evidence using LLM."""
|
73 |
try:
|
74 |
+
logger.info("\n" + "="*50)
|
75 |
+
logger.info("EVIDENCE ANALYSIS STARTED")
|
76 |
+
logger.info("="*50)
|
77 |
+
|
78 |
# Clean the text of formatting markers
|
79 |
+
logger.info("Cleaning and preparing text...")
|
80 |
cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
|
81 |
cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
|
82 |
if not line.startswith('[') and not line.startswith('More on'))
|
83 |
+
logger.info(f"Text prepared - Length: {len(cleaned_text)} characters")
|
84 |
|
85 |
# Download NLTK data if needed
|
86 |
try:
|
87 |
nltk.data.find('tokenizers/punkt')
|
88 |
except LookupError:
|
89 |
+
logger.info("Downloading required NLTK data...")
|
90 |
nltk.download('punkt')
|
91 |
|
92 |
# Split text into chunks
|
93 |
chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
|
94 |
+
logger.info(f"Split text into {len(chunks)} chunks for processing")
|
95 |
|
96 |
# Categories for evidence classification
|
97 |
evidence_categories = [
|
|
|
103 |
"opinion statement"
|
104 |
]
|
105 |
|
106 |
+
logger.info("\nUsing evidence categories:")
|
107 |
+
for cat in evidence_categories:
|
108 |
+
logger.info(f" - {cat}")
|
109 |
+
|
110 |
chunk_scores = []
|
111 |
flagged_phrases = []
|
112 |
|
113 |
+
for i, chunk in enumerate(chunks, 1):
|
114 |
+
logger.info(f"\n{'-'*30}")
|
115 |
+
logger.info(f"Processing chunk {i}/{len(chunks)}")
|
116 |
+
logger.info(f"Chunk length: {len(chunk)} characters")
|
117 |
+
|
118 |
# Analyze each sentence in the chunk
|
119 |
sentences = sent_tokenize(chunk)
|
120 |
+
logger.info(f"Found {len(sentences)} sentences to analyze")
|
121 |
+
|
122 |
+
sentence_count = 0
|
123 |
+
strong_evidence_count = 0
|
124 |
|
125 |
for sentence in sentences:
|
126 |
if len(sentence.strip()) > 10:
|
127 |
+
sentence_count += 1
|
128 |
# Classify the type of evidence
|
129 |
result = self.classifier(
|
130 |
sentence.strip(),
|
|
|
162 |
marker in sentence.lower()
|
163 |
for marker in ['more on this story', 'click here', 'read more']
|
164 |
):
|
165 |
+
strong_evidence_count += 1
|
166 |
+
logger.info(f"Found strong evidence (score: {strong_evidence:.3f}):")
|
167 |
+
logger.info(f" \"{sentence.strip()}\"")
|
168 |
flagged_phrases.append({
|
169 |
'text': sentence.strip(),
|
170 |
'type': 'strong_evidence',
|
171 |
'score': strong_evidence
|
172 |
})
|
173 |
+
|
174 |
+
logger.info(f"Processed {sentence_count} sentences in chunk {i}")
|
175 |
+
logger.info(f"Found {strong_evidence_count} sentences with strong evidence")
|
176 |
|
177 |
# Calculate overall evidence score
|
178 |
+
logger.info("\nCalculating final evidence scores...")
|
179 |
if chunk_scores:
|
180 |
avg_strong = np.mean([s['strong_evidence'] for s in chunk_scores])
|
181 |
avg_weak = np.mean([s['weak_evidence'] for s in chunk_scores])
|
182 |
|
183 |
+
logger.info("Average evidence scores:")
|
184 |
+
logger.info(f" - Strong evidence: {avg_strong:.3f}")
|
185 |
+
logger.info(f" - Weak evidence: {avg_weak:.3f}")
|
186 |
+
|
187 |
# Evidence score formula:
|
188 |
# - Reward strong evidence (70% weight)
|
189 |
# - Penalize weak/unsubstantiated claims (30% weight)
|
|
|
194 |
) * 100)
|
195 |
else:
|
196 |
evidence_score = 0
|
197 |
+
logger.warning("No scores available, defaulting to 0")
|
198 |
+
|
199 |
+
logger.info(f"Final evidence score: {evidence_score:.1f}")
|
200 |
|
201 |
# Sort and select top evidence phrases
|
202 |
sorted_phrases = sorted(
|
|
|
204 |
key=lambda x: x['score'],
|
205 |
reverse=True
|
206 |
)
|
207 |
+
|
208 |
# Filter out formatting text and duplicates
|
209 |
unique_phrases = []
|
210 |
seen = set()
|
|
|
219 |
if len(unique_phrases) >= 5:
|
220 |
break
|
221 |
|
222 |
+
logger.info(f"\nFlagged {len(unique_phrases)} unique evidence-based phrases")
|
223 |
+
|
224 |
+
logger.info("\nEvidence analysis completed successfully")
|
225 |
+
|
226 |
return {
|
227 |
"evidence_based_score": round(evidence_score, 1),
|
228 |
"flagged_phrases": unique_phrases
|
mediaunmasked/analyzers/headline_analyzer.py
CHANGED
@@ -82,6 +82,12 @@ class HeadlineAnalyzer:
|
|
82 |
def _analyze_section(self, headline: str, section: str) -> Dict[str, Any]:
|
83 |
"""Analyze a single section for headline accuracy and sensationalism."""
|
84 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# Download NLTK data if needed
|
86 |
try:
|
87 |
nltk.data.find('tokenizers/punkt')
|
@@ -89,10 +95,12 @@ class HeadlineAnalyzer:
|
|
89 |
nltk.download('punkt')
|
90 |
|
91 |
sentences = sent_tokenize(section)
|
|
|
|
|
92 |
if not sentences:
|
93 |
logger.warning("No sentences found in section")
|
94 |
return {
|
95 |
-
"accuracy_score": 50.0,
|
96 |
"flagged_phrases": [],
|
97 |
"detailed_scores": {
|
98 |
"nli": {"ENTAILMENT": 0.0, "CONTRADICTION": 0.0, "NEUTRAL": 1.0},
|
@@ -109,7 +117,7 @@ class HeadlineAnalyzer:
|
|
109 |
"accurate headline"
|
110 |
]
|
111 |
|
112 |
-
|
113 |
sensationalism_result = self.zero_shot(
|
114 |
headline,
|
115 |
sensationalism_categories,
|
@@ -120,14 +128,16 @@ class HeadlineAnalyzer:
|
|
120 |
label: score
|
121 |
for label, score in zip(sensationalism_result['labels'], sensationalism_result['scores'])
|
122 |
}
|
|
|
123 |
|
124 |
# Filter relevant sentences (longer than 20 chars)
|
125 |
relevant_sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
|
|
|
126 |
|
127 |
if not relevant_sentences:
|
128 |
logger.warning("No relevant sentences found in section")
|
129 |
return {
|
130 |
-
"accuracy_score": 50.0,
|
131 |
"flagged_phrases": [],
|
132 |
"detailed_scores": {
|
133 |
"nli": {"ENTAILMENT": 0.0, "CONTRADICTION": 0.0, "NEUTRAL": 1.0},
|
@@ -140,6 +150,7 @@ class HeadlineAnalyzer:
|
|
140 |
flagged_phrases = []
|
141 |
batch_size = 8
|
142 |
|
|
|
143 |
for i in range(0, len(relevant_sentences), batch_size):
|
144 |
batch = relevant_sentences[i:i+batch_size]
|
145 |
batch_inputs = [f"{headline} [SEP] {sentence}" for sentence in batch]
|
@@ -154,14 +165,25 @@ class HeadlineAnalyzer:
|
|
154 |
scores = {item['label']: item['score'] for item in result}
|
155 |
nli_scores.append(scores)
|
156 |
|
157 |
-
# Flag contradictory content
|
158 |
-
if scores.get('CONTRADICTION', 0) > 0.
|
|
|
159 |
flagged_phrases.append({
|
160 |
'text': sentence,
|
161 |
'type': 'Contradiction',
|
162 |
'score': scores['CONTRADICTION'],
|
163 |
'highlight': f"[CONTRADICTION] (Score: {round(scores['CONTRADICTION'] * 100, 1)}%) \"{sentence}\""
|
164 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
except Exception as batch_error:
|
167 |
logger.warning(f"Batch processing error: {str(batch_error)}")
|
@@ -180,6 +202,7 @@ class HeadlineAnalyzer:
|
|
180 |
]))
|
181 |
for label in ['ENTAILMENT', 'CONTRADICTION', 'NEUTRAL']
|
182 |
}
|
|
|
183 |
except Exception as agg_error:
|
184 |
logger.error(f"Error aggregating NLI scores: {str(agg_error)}")
|
185 |
avg_scores = {"ENTAILMENT": 0.0, "CONTRADICTION": 0.0, "NEUTRAL": 1.0}
|
@@ -199,6 +222,7 @@ class HeadlineAnalyzer:
|
|
199 |
) * 0.15
|
200 |
}
|
201 |
|
|
|
202 |
accuracy_score = sum(accuracy_components.values()) * 100
|
203 |
|
204 |
# Validate final score
|
@@ -207,6 +231,7 @@ class HeadlineAnalyzer:
|
|
207 |
accuracy_score = 50.0
|
208 |
else:
|
209 |
accuracy_score = float(accuracy_score)
|
|
|
210 |
|
211 |
except Exception as score_error:
|
212 |
logger.error(f"Error calculating accuracy score: {str(score_error)}")
|
@@ -228,6 +253,8 @@ class HeadlineAnalyzer:
|
|
228 |
if len(unique_phrases) >= 5:
|
229 |
break
|
230 |
|
|
|
|
|
231 |
return {
|
232 |
"accuracy_score": accuracy_score,
|
233 |
"flagged_phrases": unique_phrases,
|
@@ -240,7 +267,7 @@ class HeadlineAnalyzer:
|
|
240 |
except Exception as e:
|
241 |
logger.error(f"Section analysis failed: {str(e)}")
|
242 |
return {
|
243 |
-
"accuracy_score": 50.0,
|
244 |
"flagged_phrases": [],
|
245 |
"detailed_scores": {
|
246 |
"nli": {"ENTAILMENT": 0.0, "CONTRADICTION": 0.0, "NEUTRAL": 1.0},
|
|
|
82 |
def _analyze_section(self, headline: str, section: str) -> Dict[str, Any]:
|
83 |
"""Analyze a single section for headline accuracy and sensationalism."""
|
84 |
try:
|
85 |
+
logger.info("\n" + "-"*30)
|
86 |
+
logger.info("ANALYZING SECTION")
|
87 |
+
logger.info("-"*30)
|
88 |
+
logger.info(f"Headline: {headline}")
|
89 |
+
logger.info(f"Section length: {len(section)} characters")
|
90 |
+
|
91 |
# Download NLTK data if needed
|
92 |
try:
|
93 |
nltk.data.find('tokenizers/punkt')
|
|
|
95 |
nltk.download('punkt')
|
96 |
|
97 |
sentences = sent_tokenize(section)
|
98 |
+
logger.info(f"Found {len(sentences)} sentences in section")
|
99 |
+
|
100 |
if not sentences:
|
101 |
logger.warning("No sentences found in section")
|
102 |
return {
|
103 |
+
"accuracy_score": 50.0,
|
104 |
"flagged_phrases": [],
|
105 |
"detailed_scores": {
|
106 |
"nli": {"ENTAILMENT": 0.0, "CONTRADICTION": 0.0, "NEUTRAL": 1.0},
|
|
|
117 |
"accurate headline"
|
118 |
]
|
119 |
|
120 |
+
logger.info("Checking headline for sensationalism...")
|
121 |
sensationalism_result = self.zero_shot(
|
122 |
headline,
|
123 |
sensationalism_categories,
|
|
|
128 |
label: score
|
129 |
for label, score in zip(sensationalism_result['labels'], sensationalism_result['scores'])
|
130 |
}
|
131 |
+
logger.info(f"Sensationalism scores: {sensationalism_scores}")
|
132 |
|
133 |
# Filter relevant sentences (longer than 20 chars)
|
134 |
relevant_sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
|
135 |
+
logger.info(f"Found {len(relevant_sentences)} relevant sentences after filtering")
|
136 |
|
137 |
if not relevant_sentences:
|
138 |
logger.warning("No relevant sentences found in section")
|
139 |
return {
|
140 |
+
"accuracy_score": 50.0,
|
141 |
"flagged_phrases": [],
|
142 |
"detailed_scores": {
|
143 |
"nli": {"ENTAILMENT": 0.0, "CONTRADICTION": 0.0, "NEUTRAL": 1.0},
|
|
|
150 |
flagged_phrases = []
|
151 |
batch_size = 8
|
152 |
|
153 |
+
logger.info("Processing sentences for contradictions...")
|
154 |
for i in range(0, len(relevant_sentences), batch_size):
|
155 |
batch = relevant_sentences[i:i+batch_size]
|
156 |
batch_inputs = [f"{headline} [SEP] {sentence}" for sentence in batch]
|
|
|
165 |
scores = {item['label']: item['score'] for item in result}
|
166 |
nli_scores.append(scores)
|
167 |
|
168 |
+
# Flag contradictory content with lower threshold
|
169 |
+
if scores.get('CONTRADICTION', 0) > 0.3: # Lowered threshold
|
170 |
+
logger.info(f"Found contradictory sentence (score: {scores['CONTRADICTION']:.2f}): {sentence}")
|
171 |
flagged_phrases.append({
|
172 |
'text': sentence,
|
173 |
'type': 'Contradiction',
|
174 |
'score': scores['CONTRADICTION'],
|
175 |
'highlight': f"[CONTRADICTION] (Score: {round(scores['CONTRADICTION'] * 100, 1)}%) \"{sentence}\""
|
176 |
})
|
177 |
+
|
178 |
+
# Flag highly sensationalized content
|
179 |
+
if sensationalism_scores.get('sensationalized', 0) > 0.6 or sensationalism_scores.get('clickbait', 0) > 0.6:
|
180 |
+
logger.info(f"Found sensationalized content: {sentence}")
|
181 |
+
flagged_phrases.append({
|
182 |
+
'text': sentence,
|
183 |
+
'type': 'Sensationalized',
|
184 |
+
'score': max(sensationalism_scores.get('sensationalized', 0), sensationalism_scores.get('clickbait', 0)),
|
185 |
+
'highlight': f"[SENSATIONALIZED] \"{sentence}\""
|
186 |
+
})
|
187 |
|
188 |
except Exception as batch_error:
|
189 |
logger.warning(f"Batch processing error: {str(batch_error)}")
|
|
|
202 |
]))
|
203 |
for label in ['ENTAILMENT', 'CONTRADICTION', 'NEUTRAL']
|
204 |
}
|
205 |
+
logger.info(f"Average NLI scores: {avg_scores}")
|
206 |
except Exception as agg_error:
|
207 |
logger.error(f"Error aggregating NLI scores: {str(agg_error)}")
|
208 |
avg_scores = {"ENTAILMENT": 0.0, "CONTRADICTION": 0.0, "NEUTRAL": 1.0}
|
|
|
222 |
) * 0.15
|
223 |
}
|
224 |
|
225 |
+
logger.info(f"Accuracy components: {accuracy_components}")
|
226 |
accuracy_score = sum(accuracy_components.values()) * 100
|
227 |
|
228 |
# Validate final score
|
|
|
231 |
accuracy_score = 50.0
|
232 |
else:
|
233 |
accuracy_score = float(accuracy_score)
|
234 |
+
logger.info(f"Final accuracy score: {accuracy_score:.1f}")
|
235 |
|
236 |
except Exception as score_error:
|
237 |
logger.error(f"Error calculating accuracy score: {str(score_error)}")
|
|
|
253 |
if len(unique_phrases) >= 5:
|
254 |
break
|
255 |
|
256 |
+
logger.info(f"Final number of flagged phrases: {len(unique_phrases)}")
|
257 |
+
|
258 |
return {
|
259 |
"accuracy_score": accuracy_score,
|
260 |
"flagged_phrases": unique_phrases,
|
|
|
267 |
except Exception as e:
|
268 |
logger.error(f"Section analysis failed: {str(e)}")
|
269 |
return {
|
270 |
+
"accuracy_score": 50.0,
|
271 |
"flagged_phrases": [],
|
272 |
"detailed_scores": {
|
273 |
"nli": {"ENTAILMENT": 0.0, "CONTRADICTION": 0.0, "NEUTRAL": 1.0},
|
mediaunmasked/analyzers/scoring.py
CHANGED
@@ -115,24 +115,47 @@ class MediaScorer:
|
|
115 |
def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
|
116 |
"""Calculate final media credibility score."""
|
117 |
try:
|
118 |
-
logger.info(
|
|
|
|
|
|
|
119 |
|
|
|
|
|
|
|
|
|
120 |
headline_analysis = self.headline_analyzer.analyze(headline, content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
sentiment_analysis = self.sentiment_analyzer.analyze(content)
|
122 |
-
|
123 |
-
|
|
|
124 |
|
125 |
-
#
|
126 |
-
logger.info("\n
|
127 |
-
logger.info(
|
128 |
-
logger.info(
|
129 |
-
|
130 |
-
|
131 |
-
Label: {bias_analysis
|
132 |
-
Score: {bias_analysis
|
133 |
-
Percentage: {bias_analysis
|
|
|
134 |
""")
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
# Calculate component scores with NaN handling
|
138 |
# For headline: 20% contradiction = 20% score (don't invert)
|
|
|
115 |
def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
|
116 |
"""Calculate final media credibility score."""
|
117 |
try:
|
118 |
+
logger.info("\n" + "="*50)
|
119 |
+
logger.info("MEDIA SCORE CALCULATION STARTED")
|
120 |
+
logger.info("="*50)
|
121 |
+
logger.info(f"Analysis Mode: {self.analysis_mode}")
|
122 |
|
123 |
+
# Headline Analysis
|
124 |
+
logger.info("\n" + "-"*30)
|
125 |
+
logger.info("HEADLINE ANALYSIS")
|
126 |
+
logger.info("-"*30)
|
127 |
headline_analysis = self.headline_analyzer.analyze(headline, content)
|
128 |
+
logger.info(f"Headline Score: {headline_analysis.get('headline_vs_content_score', 0)}")
|
129 |
+
logger.info(f"Flagged Phrases: {headline_analysis.get('flagged_phrases', [])}")
|
130 |
+
|
131 |
+
# Sentiment Analysis
|
132 |
+
logger.info("\n" + "-"*30)
|
133 |
+
logger.info("SENTIMENT ANALYSIS")
|
134 |
+
logger.info("-"*30)
|
135 |
sentiment_analysis = self.sentiment_analyzer.analyze(content)
|
136 |
+
logger.info(f"Sentiment: {sentiment_analysis.get('sentiment', 'Unknown')}")
|
137 |
+
logger.info(f"Manipulation Score: {sentiment_analysis.get('manipulation_score', 0)}")
|
138 |
+
logger.info(f"Flagged Phrases: {sentiment_analysis.get('flagged_phrases', [])}")
|
139 |
|
140 |
+
# Bias Analysis
|
141 |
+
logger.info("\n" + "-"*30)
|
142 |
+
logger.info("BIAS ANALYSIS")
|
143 |
+
logger.info("-"*30)
|
144 |
+
bias_analysis = self.bias_analyzer.analyze(content)
|
145 |
+
logger.info(f"""Bias Results:
|
146 |
+
Label: {bias_analysis.get('bias', 'Unknown')}
|
147 |
+
Score: {bias_analysis.get('bias_score', 0)}
|
148 |
+
Percentage: {bias_analysis.get('bias_percentage', 0)}%
|
149 |
+
Flagged Phrases: {bias_analysis.get('flagged_phrases', [])}
|
150 |
""")
|
151 |
+
|
152 |
+
# Evidence Analysis
|
153 |
+
logger.info("\n" + "-"*30)
|
154 |
+
logger.info("EVIDENCE ANALYSIS")
|
155 |
+
logger.info("-"*30)
|
156 |
+
evidence_analysis = self.evidence_analyzer.analyze(content)
|
157 |
+
logger.info(f"Evidence Score: {evidence_analysis.get('evidence_based_score', 0)}")
|
158 |
+
logger.info(f"Flagged Phrases: {evidence_analysis.get('flagged_phrases', [])}")
|
159 |
|
160 |
# Calculate component scores with NaN handling
|
161 |
# For headline: 20% contradiction = 20% score (don't invert)
|
mediaunmasked/analyzers/sentiment_analyzer.py
CHANGED
@@ -85,18 +85,21 @@ class SentimentAnalyzer:
|
|
85 |
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
86 |
"""Perform sentiment analysis using LLM models."""
|
87 |
try:
|
88 |
-
logger.info("
|
|
|
|
|
89 |
|
90 |
# Clean the text of formatting markers
|
|
|
91 |
cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
|
92 |
cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
|
93 |
if not line.startswith('[') and not line.startswith('More on'))
|
94 |
|
95 |
-
logger.info("Text
|
96 |
|
97 |
# Split text into chunks of 512 tokens (approximate)
|
98 |
chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
|
99 |
-
logger.info(f"
|
100 |
|
101 |
# Initialize aggregation variables
|
102 |
sentiment_scores = []
|
@@ -114,40 +117,42 @@ class SentimentAnalyzer:
|
|
114 |
|
115 |
# Process each chunk
|
116 |
for i, chunk in enumerate(chunks, 1):
|
|
|
117 |
logger.info(f"Processing chunk {i}/{len(chunks)}")
|
|
|
118 |
|
119 |
try:
|
120 |
-
# Get emotion scores
|
121 |
-
logger.
|
122 |
emotions = self.sentiment_pipeline(chunk)
|
123 |
logger.debug(f"Raw emotion response: {emotions}")
|
124 |
|
125 |
# Handle different response formats
|
126 |
if isinstance(emotions, list):
|
127 |
-
# Multiple results format
|
128 |
for emotion in emotions:
|
129 |
if isinstance(emotion, dict) and 'label' in emotion and 'score' in emotion:
|
130 |
sentiment_scores.append(emotion)
|
|
|
131 |
elif isinstance(emotions, dict) and 'label' in emotions and 'score' in emotions:
|
132 |
-
# Single result format
|
133 |
sentiment_scores.append(emotions)
|
134 |
-
|
135 |
|
136 |
# Get toxicity scores if available
|
137 |
if self.toxicity_available:
|
138 |
-
logger.
|
139 |
try:
|
140 |
toxicity = self.toxicity_pipeline(chunk)
|
141 |
if isinstance(toxicity, list):
|
142 |
toxicity_scores.extend(toxicity)
|
143 |
else:
|
144 |
toxicity_scores.append(toxicity)
|
145 |
-
logger.
|
|
|
146 |
except Exception as tox_error:
|
147 |
logger.warning(f"Toxicity analysis failed for chunk {i}: {str(tox_error)}")
|
148 |
|
149 |
# Get manipulation scores
|
150 |
-
logger.
|
151 |
manipulation = self.zero_shot(
|
152 |
chunk,
|
153 |
manipulation_categories,
|
@@ -155,13 +160,17 @@ class SentimentAnalyzer:
|
|
155 |
)
|
156 |
|
157 |
if isinstance(manipulation, dict) and 'labels' in manipulation and 'scores' in manipulation:
|
158 |
-
|
159 |
label: score
|
160 |
for label, score in zip(manipulation['labels'], manipulation['scores'])
|
161 |
-
}
|
162 |
-
|
|
|
|
|
|
|
163 |
|
164 |
# Analyze sentences for manipulation
|
|
|
165 |
sentences = chunk.split('.')
|
166 |
for sentence in sentences:
|
167 |
if len(sentence.strip()) > 10:
|
@@ -172,6 +181,7 @@ class SentimentAnalyzer:
|
|
172 |
)
|
173 |
if (sent_result['labels'][0] in ["emotional manipulation", "fear mongering", "propaganda"]
|
174 |
and sent_result['scores'][0] > 0.7):
|
|
|
175 |
flagged_phrases.append({
|
176 |
'text': sentence.strip(),
|
177 |
'type': sent_result['labels'][0],
|
@@ -182,7 +192,7 @@ class SentimentAnalyzer:
|
|
182 |
logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
|
183 |
continue
|
184 |
|
185 |
-
logger.info("
|
186 |
|
187 |
# Aggregate scores with error handling
|
188 |
def aggregate_scores(scores_list, score_type: str):
|
@@ -222,8 +232,15 @@ class SentimentAnalyzer:
|
|
222 |
|
223 |
emotion_scores = aggregate_scores(sentiment_scores, "emotion")
|
224 |
toxicity_scores = aggregate_scores(toxicity_scores, "toxicity") if self.toxicity_available else {}
|
225 |
-
|
226 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
# Aggregate manipulation scores
|
229 |
manipulation_agg = {
|
@@ -232,9 +249,12 @@ class SentimentAnalyzer:
|
|
232 |
for scores in manipulation_scores
|
233 |
]))
|
234 |
for category in manipulation_categories
|
235 |
-
if manipulation_scores
|
236 |
}
|
237 |
-
|
|
|
|
|
|
|
238 |
|
239 |
# Calculate manipulation score based on multiple factors
|
240 |
manipulation_indicators = {
|
@@ -263,7 +283,7 @@ class SentimentAnalyzer:
|
|
263 |
# Fallback to traditional analysis if no scores available
|
264 |
manipulation_score = len(self._detect_manipulative_phrases(text)) * 10
|
265 |
|
266 |
-
logger.info(f"
|
267 |
|
268 |
# Determine overall sentiment
|
269 |
positive_emotions = ['admiration', 'joy', 'amusement', 'approval']
|
@@ -274,7 +294,10 @@ class SentimentAnalyzer:
|
|
274 |
neg_score = sum(emotion_scores.get(emotion, 0) for emotion in negative_emotions)
|
275 |
neu_score = sum(emotion_scores.get(emotion, 0) for emotion in neutral_emotions)
|
276 |
|
277 |
-
logger.
|
|
|
|
|
|
|
278 |
|
279 |
# Determine sentiment based on highest score
|
280 |
max_score = max(pos_score, neg_score, neu_score)
|
@@ -285,7 +308,7 @@ class SentimentAnalyzer:
|
|
285 |
else:
|
286 |
sentiment = "Neutral"
|
287 |
|
288 |
-
logger.info(f"
|
289 |
|
290 |
# Sort and limit flagged phrases by manipulation score
|
291 |
sorted_phrases = sorted(flagged_phrases, key=lambda x: x['score'], reverse=True)
|
@@ -299,7 +322,9 @@ class SentimentAnalyzer:
|
|
299 |
if len(unique_phrases) >= 5:
|
300 |
break
|
301 |
|
302 |
-
logger.info("
|
|
|
|
|
303 |
|
304 |
return {
|
305 |
"sentiment": sentiment,
|
|
|
85 |
def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
|
86 |
"""Perform sentiment analysis using LLM models."""
|
87 |
try:
|
88 |
+
logger.info("\n" + "="*50)
|
89 |
+
logger.info("SENTIMENT ANALYSIS STARTED")
|
90 |
+
logger.info("="*50)
|
91 |
|
92 |
# Clean the text of formatting markers
|
93 |
+
logger.info("Cleaning and preparing text...")
|
94 |
cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
|
95 |
cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
|
96 |
if not line.startswith('[') and not line.startswith('More on'))
|
97 |
|
98 |
+
logger.info(f"Text prepared - Length: {len(cleaned_text)} characters")
|
99 |
|
100 |
# Split text into chunks of 512 tokens (approximate)
|
101 |
chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
|
102 |
+
logger.info(f"Split text into {len(chunks)} chunks for processing")
|
103 |
|
104 |
# Initialize aggregation variables
|
105 |
sentiment_scores = []
|
|
|
117 |
|
118 |
# Process each chunk
|
119 |
for i, chunk in enumerate(chunks, 1):
|
120 |
+
logger.info(f"\n{'-'*30}")
|
121 |
logger.info(f"Processing chunk {i}/{len(chunks)}")
|
122 |
+
logger.info(f"Chunk length: {len(chunk)} characters")
|
123 |
|
124 |
try:
|
125 |
+
# Get emotion scores
|
126 |
+
logger.info("Analyzing emotions...")
|
127 |
emotions = self.sentiment_pipeline(chunk)
|
128 |
logger.debug(f"Raw emotion response: {emotions}")
|
129 |
|
130 |
# Handle different response formats
|
131 |
if isinstance(emotions, list):
|
|
|
132 |
for emotion in emotions:
|
133 |
if isinstance(emotion, dict) and 'label' in emotion and 'score' in emotion:
|
134 |
sentiment_scores.append(emotion)
|
135 |
+
logger.info(f"Detected emotion: {emotion['label']} (score: {emotion['score']:.3f})")
|
136 |
elif isinstance(emotions, dict) and 'label' in emotions and 'score' in emotions:
|
|
|
137 |
sentiment_scores.append(emotions)
|
138 |
+
logger.info(f"Detected emotion: {emotions['label']} (score: {emotions['score']:.3f})")
|
139 |
|
140 |
# Get toxicity scores if available
|
141 |
if self.toxicity_available:
|
142 |
+
logger.info("Analyzing toxicity...")
|
143 |
try:
|
144 |
toxicity = self.toxicity_pipeline(chunk)
|
145 |
if isinstance(toxicity, list):
|
146 |
toxicity_scores.extend(toxicity)
|
147 |
else:
|
148 |
toxicity_scores.append(toxicity)
|
149 |
+
logger.info(f"Toxicity analysis complete for chunk {i}")
|
150 |
+
logger.debug(f"Toxicity scores: {toxicity_scores[-1]}")
|
151 |
except Exception as tox_error:
|
152 |
logger.warning(f"Toxicity analysis failed for chunk {i}: {str(tox_error)}")
|
153 |
|
154 |
# Get manipulation scores
|
155 |
+
logger.info("Analyzing manipulation patterns...")
|
156 |
manipulation = self.zero_shot(
|
157 |
chunk,
|
158 |
manipulation_categories,
|
|
|
160 |
)
|
161 |
|
162 |
if isinstance(manipulation, dict) and 'labels' in manipulation and 'scores' in manipulation:
|
163 |
+
chunk_scores = {
|
164 |
label: score
|
165 |
for label, score in zip(manipulation['labels'], manipulation['scores'])
|
166 |
+
}
|
167 |
+
manipulation_scores.append(chunk_scores)
|
168 |
+
logger.info("Manipulation scores for chunk:")
|
169 |
+
for label, score in chunk_scores.items():
|
170 |
+
logger.info(f" - {label}: {score:.3f}")
|
171 |
|
172 |
# Analyze sentences for manipulation
|
173 |
+
logger.info("Analyzing individual sentences for manipulation...")
|
174 |
sentences = chunk.split('.')
|
175 |
for sentence in sentences:
|
176 |
if len(sentence.strip()) > 10:
|
|
|
181 |
)
|
182 |
if (sent_result['labels'][0] in ["emotional manipulation", "fear mongering", "propaganda"]
|
183 |
and sent_result['scores'][0] > 0.7):
|
184 |
+
logger.info(f"Found manipulative content (score: {sent_result['scores'][0]:.3f}): {sentence.strip()}")
|
185 |
flagged_phrases.append({
|
186 |
'text': sentence.strip(),
|
187 |
'type': sent_result['labels'][0],
|
|
|
192 |
logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
|
193 |
continue
|
194 |
|
195 |
+
logger.info("\nAggregating final scores...")
|
196 |
|
197 |
# Aggregate scores with error handling
|
198 |
def aggregate_scores(scores_list, score_type: str):
|
|
|
232 |
|
233 |
emotion_scores = aggregate_scores(sentiment_scores, "emotion")
|
234 |
toxicity_scores = aggregate_scores(toxicity_scores, "toxicity") if self.toxicity_available else {}
|
235 |
+
|
236 |
+
logger.info("\nFinal emotion scores:")
|
237 |
+
for emotion, score in emotion_scores.items():
|
238 |
+
logger.info(f" - {emotion}: {score:.3f}")
|
239 |
+
|
240 |
+
if toxicity_scores:
|
241 |
+
logger.info("\nFinal toxicity scores:")
|
242 |
+
for category, score in toxicity_scores.items():
|
243 |
+
logger.info(f" - {category}: {score:.3f}")
|
244 |
|
245 |
# Aggregate manipulation scores
|
246 |
manipulation_agg = {
|
|
|
249 |
for scores in manipulation_scores
|
250 |
]))
|
251 |
for category in manipulation_categories
|
252 |
+
if manipulation_scores
|
253 |
}
|
254 |
+
|
255 |
+
logger.info("\nFinal manipulation scores:")
|
256 |
+
for category, score in manipulation_agg.items():
|
257 |
+
logger.info(f" - {category}: {score:.3f}")
|
258 |
|
259 |
# Calculate manipulation score based on multiple factors
|
260 |
manipulation_indicators = {
|
|
|
283 |
# Fallback to traditional analysis if no scores available
|
284 |
manipulation_score = len(self._detect_manipulative_phrases(text)) * 10
|
285 |
|
286 |
+
logger.info(f"\nFinal manipulation score: {manipulation_score:.1f}")
|
287 |
|
288 |
# Determine overall sentiment
|
289 |
positive_emotions = ['admiration', 'joy', 'amusement', 'approval']
|
|
|
294 |
neg_score = sum(emotion_scores.get(emotion, 0) for emotion in negative_emotions)
|
295 |
neu_score = sum(emotion_scores.get(emotion, 0) for emotion in neutral_emotions)
|
296 |
|
297 |
+
logger.info(f"\nSentiment component scores:")
|
298 |
+
logger.info(f" - Positive: {pos_score:.3f}")
|
299 |
+
logger.info(f" - Negative: {neg_score:.3f}")
|
300 |
+
logger.info(f" - Neutral: {neu_score:.3f}")
|
301 |
|
302 |
# Determine sentiment based on highest score
|
303 |
max_score = max(pos_score, neg_score, neu_score)
|
|
|
308 |
else:
|
309 |
sentiment = "Neutral"
|
310 |
|
311 |
+
logger.info(f"\nFinal sentiment determination: {sentiment}")
|
312 |
|
313 |
# Sort and limit flagged phrases by manipulation score
|
314 |
sorted_phrases = sorted(flagged_phrases, key=lambda x: x['score'], reverse=True)
|
|
|
322 |
if len(unique_phrases) >= 5:
|
323 |
break
|
324 |
|
325 |
+
logger.info(f"\nFlagged {len(unique_phrases)} unique manipulative phrases")
|
326 |
+
|
327 |
+
logger.info("\nSentiment analysis completed successfully")
|
328 |
|
329 |
return {
|
330 |
"sentiment": sentiment,
|