Spaces:
Running
Running
initial commit of media-unmasked-api to huggingface
Browse files- .gitignore +5 -0
- Dockerfile +21 -0
- app/main.py +22 -0
- app/routers/analyze.py +18 -0
- app/routers/health.py +7 -0
- create_structure.sh +3 -0
- directory_structure.txt +0 -0
- mediaunmasked/__init__.py +5 -0
- mediaunmasked/analyzers/__init__.py +1 -0
- mediaunmasked/analyzers/bias_analyzer.py +75 -0
- mediaunmasked/analyzers/evidence_analyzer.py +53 -0
- mediaunmasked/analyzers/headline_analyzer.py +164 -0
- mediaunmasked/analyzers/scoring.py +101 -0
- mediaunmasked/analyzers/sentiment_analyzer.py +65 -0
- mediaunmasked/resources/left_bias_words.txt +187 -0
- mediaunmasked/resources/manipulative_patterns.txt +178 -0
- mediaunmasked/resources/right_bias_words.txt +233 -0
- mediaunmasked/schemas/requests.py +5 -0
- mediaunmasked/schemas/responses.py +6 -0
- mediaunmasked/scrapers/__init__.py +3 -0
- mediaunmasked/scrapers/article_scraper.py +139 -0
- mediaunmasked/services/analyzer_service.py +9 -0
- mediaunmasked/utils/__init__.py +1 -0
- mediaunmasked/utils/logging_config.py +10 -0
- mediaunmasked/web/__init__.py +1 -0
- mediaunmasked/web/app.py +82 -0
- requirements.txt +14 -0
- setup.py +15 -0
- start.sh +10 -0
- tests/unit/test_headline_analyzer.py +24 -0
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore Python compiled files
|
2 |
+
__pycache__/
|
3 |
+
*.pyc
|
4 |
+
*.pyo
|
5 |
+
*.pyd
|
Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use Python Slim Image
|
2 |
+
FROM python:3.10-slim
|
3 |
+
|
4 |
+
# Set working directory inside container
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copy dependencies
|
8 |
+
COPY requirements.txt .
|
9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
10 |
+
|
11 |
+
# Copy all files
|
12 |
+
COPY . .
|
13 |
+
|
14 |
+
# Set PYTHONPATH (simplified)
|
15 |
+
ENV PYTHONPATH=/app
|
16 |
+
|
17 |
+
# Expose FastAPI's default port
|
18 |
+
EXPOSE 7860
|
19 |
+
|
20 |
+
# Start FastAPI
|
21 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/main.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware # ✅ Import this
|
3 |
+
from app.routers import analyze, health
|
4 |
+
|
5 |
+
app = FastAPI(title="MediaUnmasked API")
|
6 |
+
|
7 |
+
# ✅ Enable CORS for Swagger UI
|
8 |
+
app.add_middleware(
|
9 |
+
CORSMiddleware,
|
10 |
+
allow_origins=["*"], # Allow all origins (or specify ["http://localhost:7860"])
|
11 |
+
allow_credentials=True,
|
12 |
+
allow_methods=["*"], # Allow all methods
|
13 |
+
allow_headers=["*"], # Allow all headers
|
14 |
+
)
|
15 |
+
|
16 |
+
# Include routers
|
17 |
+
app.include_router(analyze.router, prefix="/api")
|
18 |
+
app.include_router(health.router, prefix="/health")
|
19 |
+
|
20 |
+
@app.get("/")
|
21 |
+
async def root():
|
22 |
+
return {"message": "MediaUnmasked API is running!"}
|
app/routers/analyze.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, HTTPException
|
2 |
+
from mediaunmasked.schemas.requests import AnalyzeRequest
|
3 |
+
from mediaunmasked.schemas.responses import AnalyzeResponse
|
4 |
+
from mediaunmasked.services.analyzer_service import AnalyzerService
|
5 |
+
|
6 |
+
router = APIRouter(tags=["analysis"])
|
7 |
+
|
8 |
+
@router.post("/analyze", response_model=AnalyzeResponse)
|
9 |
+
async def analyze_content(request: AnalyzeRequest):
|
10 |
+
try:
|
11 |
+
analyzer_service = AnalyzerService()
|
12 |
+
result = await analyzer_service.analyze_content(
|
13 |
+
headline=request.headline,
|
14 |
+
content=request.content
|
15 |
+
)
|
16 |
+
return result
|
17 |
+
except Exception as e:
|
18 |
+
raise HTTPException(status_code=500, detail=str(e))
|
app/routers/health.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
|
3 |
+
router = APIRouter()
|
4 |
+
|
5 |
+
@router.get("/")
|
6 |
+
async def health_check():
|
7 |
+
return {"status": "healthy"}
|
create_structure.sh
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
mkdir -p api/routers
|
2 |
+
mkdir -p src/mediaunmasked/{services,models,schemas,config}
|
3 |
+
mkdir -p tests/{unit,integration}
|
directory_structure.txt
ADDED
Binary file (4.85 kB). View file
|
|
mediaunmasked/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
MediaUnmasked - AI-powered media watchdog for analyzing bias and fact-checking.
|
3 |
+
"""
|
4 |
+
|
5 |
+
__version__ = "0.1.0"
|
mediaunmasked/analyzers/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Empty file to make the directory a Python package
|
mediaunmasked/analyzers/bias_analyzer.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
from typing import Dict, Any, List
|
4 |
+
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
|
7 |
+
class BiasAnalyzer:
|
8 |
+
def __init__(self):
|
9 |
+
self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
|
10 |
+
self.left_keywords = self._load_keywords('left_bias_words.txt')
|
11 |
+
self.right_keywords = self._load_keywords('right_bias_words.txt')
|
12 |
+
|
13 |
+
def _load_keywords(self, filename: str) -> List[str]:
|
14 |
+
"""Load keywords from file."""
|
15 |
+
try:
|
16 |
+
filepath = os.path.join(self.resources_dir, filename)
|
17 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
18 |
+
return [line.strip().lower() for line in f if line.strip() and not line.startswith('#')]
|
19 |
+
except Exception as e:
|
20 |
+
logger.error(f"Error loading {filename}: {str(e)}")
|
21 |
+
return []
|
22 |
+
|
23 |
+
def analyze(self, text: str) -> Dict[str, Any]:
|
24 |
+
"""Detect bias using keyword analysis."""
|
25 |
+
try:
|
26 |
+
text_lower = text.lower()
|
27 |
+
|
28 |
+
# Count matches
|
29 |
+
left_count = sum(1 for word in self.left_keywords if word in text_lower)
|
30 |
+
right_count = sum(1 for word in self.right_keywords if word in text_lower)
|
31 |
+
|
32 |
+
total_words = left_count + right_count
|
33 |
+
if total_words == 0:
|
34 |
+
return {
|
35 |
+
"bias": "Neutral",
|
36 |
+
"bias_score": 0.0, # True neutral
|
37 |
+
"bias_percentage": 0 # Neutral percentage
|
38 |
+
}
|
39 |
+
|
40 |
+
# New bias score formula (-1.0 left, 0.0 neutral, 1.0 right)
|
41 |
+
bias_score = (right_count - left_count) / total_words
|
42 |
+
|
43 |
+
# Convert bias_score to percentage (-100% to +100%)
|
44 |
+
bias_percentage = bias_score * 100
|
45 |
+
logger.info(f"Bias score: {bias_score:.2f}, Bias percentage: {bias_percentage:.1f}%")
|
46 |
+
|
47 |
+
# Determine bias label
|
48 |
+
if bias_score < -0.8:
|
49 |
+
bias = "Strongly Left"
|
50 |
+
elif bias_score < -0.5:
|
51 |
+
bias = "Moderately Left"
|
52 |
+
elif bias_score < -0.2:
|
53 |
+
bias = "Leaning Left"
|
54 |
+
elif bias_score > 0.8:
|
55 |
+
bias = "Strongly Right"
|
56 |
+
elif bias_score > 0.5:
|
57 |
+
bias = "Moderately Right"
|
58 |
+
elif bias_score > 0.2:
|
59 |
+
bias = "Leaning Right"
|
60 |
+
else:
|
61 |
+
bias = "Neutral"
|
62 |
+
|
63 |
+
return {
|
64 |
+
"bias": bias,
|
65 |
+
"bias_score": round(bias_score, 2), # Keep 2 decimal places
|
66 |
+
"bias_percentage": abs(round(bias_percentage, 1))
|
67 |
+
}
|
68 |
+
|
69 |
+
except Exception as e:
|
70 |
+
logger.error(f"Error in bias analysis: {str(e)}")
|
71 |
+
return {
|
72 |
+
"bias": "Error",
|
73 |
+
"bias_score": 0.0,
|
74 |
+
"bias_percentage": 0
|
75 |
+
}
|
mediaunmasked/analyzers/evidence_analyzer.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Dict, Any, List
|
3 |
+
|
4 |
+
logger = logging.getLogger(__name__)
|
5 |
+
|
6 |
+
class EvidenceAnalyzer:
|
7 |
+
def __init__(self):
|
8 |
+
self.citation_markers = [
|
9 |
+
"according to",
|
10 |
+
"said",
|
11 |
+
"reported",
|
12 |
+
"stated",
|
13 |
+
"shows",
|
14 |
+
"found",
|
15 |
+
"study",
|
16 |
+
"research",
|
17 |
+
"data",
|
18 |
+
"evidence"
|
19 |
+
]
|
20 |
+
|
21 |
+
self.vague_markers = [
|
22 |
+
"some say",
|
23 |
+
"many believe",
|
24 |
+
"people think",
|
25 |
+
"experts claim",
|
26 |
+
"sources say",
|
27 |
+
"it is believed",
|
28 |
+
"reportedly",
|
29 |
+
"allegedly"
|
30 |
+
]
|
31 |
+
|
32 |
+
def analyze(self, text: str) -> Dict[str, Any]:
|
33 |
+
"""Check for evidence-based reporting."""
|
34 |
+
try:
|
35 |
+
text_lower = text.lower()
|
36 |
+
|
37 |
+
citation_count = sum(1 for marker in self.citation_markers if marker in text_lower)
|
38 |
+
vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
|
39 |
+
|
40 |
+
base_score = min(citation_count * 20, 100)
|
41 |
+
penalty = vague_count * 10
|
42 |
+
|
43 |
+
evidence_score = max(0, base_score - penalty)
|
44 |
+
|
45 |
+
return {
|
46 |
+
"evidence_based_score": evidence_score
|
47 |
+
}
|
48 |
+
|
49 |
+
except Exception as e:
|
50 |
+
logger.error(f"Error in evidence analysis: {str(e)}")
|
51 |
+
return {
|
52 |
+
"evidence_based_score": 0
|
53 |
+
}
|
mediaunmasked/analyzers/headline_analyzer.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Dict, Any, List
|
3 |
+
from transformers import pipeline
|
4 |
+
from transformers import AutoTokenizer
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
class HeadlineAnalyzer:
|
10 |
+
def __init__(self):
|
11 |
+
"""Initialize the NLI model for contradiction detection."""
|
12 |
+
self.nli_pipeline = pipeline("text-classification", model="roberta-large-mnli")
|
13 |
+
self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
|
14 |
+
self.max_length = 512
|
15 |
+
|
16 |
+
def _split_content(self, headline: str, content: str) -> List[str]:
|
17 |
+
"""Split content into sections that fit within token limit."""
|
18 |
+
content_words = content.split()
|
19 |
+
sections = []
|
20 |
+
current_section = []
|
21 |
+
|
22 |
+
# Account for headline and [SEP] token in the max length
|
23 |
+
headline_tokens = len(self.tokenizer.encode(headline))
|
24 |
+
sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2 # -2 because encode adds special tokens
|
25 |
+
max_content_tokens = self.max_length - headline_tokens - sep_tokens
|
26 |
+
|
27 |
+
# Process words into sections
|
28 |
+
for word in content_words:
|
29 |
+
current_section.append(word)
|
30 |
+
|
31 |
+
# Check if current section is approaching token limit
|
32 |
+
current_text = " ".join(current_section)
|
33 |
+
if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
|
34 |
+
# Remove last word (it might make us go over limit)
|
35 |
+
current_section.pop()
|
36 |
+
sections.append(" ".join(current_section))
|
37 |
+
|
38 |
+
# Start new section with 20% overlap for context
|
39 |
+
overlap_start = max(0, len(current_section) - int(len(current_section) * 0.2))
|
40 |
+
current_section = current_section[overlap_start:]
|
41 |
+
current_section.append(word)
|
42 |
+
|
43 |
+
# Add any remaining content as the last section
|
44 |
+
if current_section:
|
45 |
+
sections.append(" ".join(current_section))
|
46 |
+
|
47 |
+
logger.info(f"""Content Splitting:
|
48 |
+
- Original content length: {len(content_words)} words
|
49 |
+
- Split into {len(sections)} sections
|
50 |
+
- Headline uses {headline_tokens} tokens
|
51 |
+
- Available tokens per section: {max_content_tokens}
|
52 |
+
""")
|
53 |
+
return sections
|
54 |
+
|
55 |
+
def _analyze_section(self, headline: str, section: str) -> Dict[str, float]:
|
56 |
+
"""Analyze a single section of content."""
|
57 |
+
input_text = f"{headline} [SEP] {section}"
|
58 |
+
result = self.nli_pipeline(input_text, top_k=None)
|
59 |
+
|
60 |
+
# Extract scores
|
61 |
+
scores = {item['label']: item['score'] for item in result}
|
62 |
+
|
63 |
+
logger.info("\nSection Analysis:")
|
64 |
+
logger.info("-"*30)
|
65 |
+
logger.info(f"Section preview: {section[:100]}...")
|
66 |
+
for label, score in scores.items():
|
67 |
+
logger.info(f"Label: {label:<12} Score: {score:.3f}")
|
68 |
+
|
69 |
+
return scores
|
70 |
+
|
71 |
+
def analyze(self, headline: str, content: str) -> Dict[str, Any]:
|
72 |
+
"""Analyze how well the headline matches the content using an AI model."""
|
73 |
+
try:
|
74 |
+
logger.info("\n" + "="*50)
|
75 |
+
logger.info("HEADLINE ANALYSIS STARTED")
|
76 |
+
logger.info("="*50)
|
77 |
+
|
78 |
+
# Handle empty inputs
|
79 |
+
if not headline.strip() or not content.strip():
|
80 |
+
logger.warning("Empty headline or content provided")
|
81 |
+
return {
|
82 |
+
"headline_vs_content_score": 0,
|
83 |
+
"entailment_score": 0,
|
84 |
+
"contradiction_score": 0,
|
85 |
+
"contradictory_phrases": []
|
86 |
+
}
|
87 |
+
|
88 |
+
# Split content if too long
|
89 |
+
content_tokens = len(self.tokenizer.encode(content))
|
90 |
+
if content_tokens > self.max_length:
|
91 |
+
logger.warning(f"""
|
92 |
+
Content Length Warning:
|
93 |
+
- Total tokens: {content_tokens}
|
94 |
+
- Max allowed: {self.max_length}
|
95 |
+
- Splitting into sections...
|
96 |
+
""")
|
97 |
+
sections = self._split_content(headline, content)
|
98 |
+
|
99 |
+
# Analyze each section
|
100 |
+
section_scores = []
|
101 |
+
for i, section in enumerate(sections, 1):
|
102 |
+
logger.info(f"\nAnalyzing section {i}/{len(sections)}")
|
103 |
+
scores = self._analyze_section(headline, section)
|
104 |
+
section_scores.append(scores)
|
105 |
+
|
106 |
+
# Aggregate scores across sections
|
107 |
+
# Use max contradiction (if any section strongly contradicts, that's important)
|
108 |
+
# Use mean entailment (overall support across sections)
|
109 |
+
# Use mean neutral (general neutral tone across sections)
|
110 |
+
entailment_score = np.mean([s.get('ENTAILMENT', 0) for s in section_scores])
|
111 |
+
contradiction_score = np.max([s.get('CONTRADICTION', 0) for s in section_scores])
|
112 |
+
neutral_score = np.mean([s.get('NEUTRAL', 0) for s in section_scores])
|
113 |
+
|
114 |
+
logger.info("\nAggregated Scores Across Sections:")
|
115 |
+
logger.info("-"*30)
|
116 |
+
logger.info(f"Mean Entailment: {entailment_score:.3f}")
|
117 |
+
logger.info(f"Max Contradiction: {contradiction_score:.3f}")
|
118 |
+
logger.info(f"Mean Neutral: {neutral_score:.3f}")
|
119 |
+
else:
|
120 |
+
# Single section analysis
|
121 |
+
scores = self._analyze_section(headline, content)
|
122 |
+
entailment_score = scores.get('ENTAILMENT', 0)
|
123 |
+
contradiction_score = scores.get('CONTRADICTION', 0)
|
124 |
+
neutral_score = scores.get('NEUTRAL', 0)
|
125 |
+
|
126 |
+
# Compute final consistency score
|
127 |
+
final_score = (
|
128 |
+
(entailment_score * 0.6) + # Base score from entailment
|
129 |
+
(neutral_score * 0.3) + # Neutral is acceptable
|
130 |
+
((1 - contradiction_score) * 0.1) # Small penalty for contradiction
|
131 |
+
) * 100
|
132 |
+
|
133 |
+
# Log final results
|
134 |
+
logger.info("\nFinal Analysis Results:")
|
135 |
+
logger.info("-"*30)
|
136 |
+
logger.info(f"Headline: {headline}")
|
137 |
+
logger.info(f"Content Length: {content_tokens} tokens")
|
138 |
+
logger.info("\nFinal Scores:")
|
139 |
+
logger.info(f"{'Entailment:':<15} {entailment_score:.3f}")
|
140 |
+
logger.info(f"{'Neutral:':<15} {neutral_score:.3f}")
|
141 |
+
logger.info(f"{'Contradiction:':<15} {contradiction_score:.3f}")
|
142 |
+
logger.info(f"\nFinal Score: {final_score:.1f}%")
|
143 |
+
logger.info("="*50 + "\n")
|
144 |
+
|
145 |
+
return {
|
146 |
+
"headline_vs_content_score": round(final_score, 1),
|
147 |
+
"entailment_score": round(entailment_score, 2),
|
148 |
+
"contradiction_score": round(contradiction_score, 2),
|
149 |
+
"contradictory_phrases": []
|
150 |
+
}
|
151 |
+
|
152 |
+
except Exception as e:
|
153 |
+
logger.error("\nHEADLINE ANALYSIS ERROR")
|
154 |
+
logger.error("-"*30)
|
155 |
+
logger.error(f"Error Type: {type(e).__name__}")
|
156 |
+
logger.error(f"Error Message: {str(e)}")
|
157 |
+
logger.error("Stack Trace:", exc_info=True)
|
158 |
+
logger.error("="*50 + "\n")
|
159 |
+
return {
|
160 |
+
"headline_vs_content_score": 0,
|
161 |
+
"entailment_score": 0,
|
162 |
+
"contradiction_score": 0,
|
163 |
+
"contradictory_phrases": []
|
164 |
+
}
|
mediaunmasked/analyzers/scoring.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Any
|
2 |
+
import logging
|
3 |
+
|
4 |
+
from .headline_analyzer import HeadlineAnalyzer
|
5 |
+
from .sentiment_analyzer import SentimentAnalyzer
|
6 |
+
from .bias_analyzer import BiasAnalyzer
|
7 |
+
from .evidence_analyzer import EvidenceAnalyzer
|
8 |
+
|
9 |
+
logger = logging.getLogger(__name__)
|
10 |
+
|
11 |
+
class MediaScorer:
|
12 |
+
def __init__(self):
|
13 |
+
"""Initialize the MediaScorer with required analyzers."""
|
14 |
+
self.headline_analyzer = HeadlineAnalyzer()
|
15 |
+
self.sentiment_analyzer = SentimentAnalyzer()
|
16 |
+
self.bias_analyzer = BiasAnalyzer()
|
17 |
+
self.evidence_analyzer = EvidenceAnalyzer()
|
18 |
+
|
19 |
+
def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
|
20 |
+
"""Calculate final media credibility score."""
|
21 |
+
try:
|
22 |
+
headline_analysis = self.headline_analyzer.analyze(headline, content)
|
23 |
+
sentiment_analysis = self.sentiment_analyzer.analyze(content)
|
24 |
+
bias_analysis = self.bias_analyzer.analyze(content)
|
25 |
+
evidence_analysis = self.evidence_analyzer.analyze(content)
|
26 |
+
|
27 |
+
# Log intermediate results
|
28 |
+
logger.info("\n=== Raw Analysis Results ===")
|
29 |
+
logger.info(f"Headline Analysis: {headline_analysis}")
|
30 |
+
logger.info(f"Sentiment Analysis: {sentiment_analysis}")
|
31 |
+
logger.info(f"""Bias Analysis:
|
32 |
+
Raw: {bias_analysis}
|
33 |
+
Label: {bias_analysis['bias']}
|
34 |
+
Score: {bias_analysis['bias_score']}
|
35 |
+
Percentage: {bias_analysis['bias_percentage']}%
|
36 |
+
""")
|
37 |
+
logger.info(f"Evidence Analysis: {evidence_analysis}")
|
38 |
+
|
39 |
+
# Calculate component scores
|
40 |
+
# For headline: 20% contradiction = 20% score (don't invert)
|
41 |
+
headline_score = headline_analysis["headline_vs_content_score"] / 100
|
42 |
+
|
43 |
+
# For manipulation: 0% = good (use directly), 100% = bad
|
44 |
+
manipulation_score = (100 - sentiment_analysis["manipulation_score"]) / 100
|
45 |
+
|
46 |
+
# For bias: 0% = good (use directly), 100% = bad
|
47 |
+
bias_score = (100 - bias_analysis["bias_percentage"]) / 100
|
48 |
+
|
49 |
+
evidence_score = evidence_analysis["evidence_based_score"] / 100 # Higher is better
|
50 |
+
|
51 |
+
logger.info(f"""Component Scores:
|
52 |
+
Headline: {headline_score * 100:.1f}% (from {headline_analysis["headline_vs_content_score"]}%)
|
53 |
+
Evidence: {evidence_score * 100:.1f}%
|
54 |
+
Manipulation: {manipulation_score * 100:.1f}% (100 - {sentiment_analysis["manipulation_score"]}%)
|
55 |
+
Bias: {bias_score * 100:.1f}% (100 - {bias_analysis["bias_percentage"]}%)
|
56 |
+
""")
|
57 |
+
|
58 |
+
# Calculate final score
|
59 |
+
final_score = (
|
60 |
+
(headline_score * 0.25) +
|
61 |
+
(manipulation_score * 0.25) +
|
62 |
+
(bias_score * 0.25) +
|
63 |
+
(evidence_score * 0.25)
|
64 |
+
) * 100
|
65 |
+
|
66 |
+
# Determine rating
|
67 |
+
if final_score >= 80:
|
68 |
+
rating = "Trustworthy"
|
69 |
+
elif final_score >= 50:
|
70 |
+
rating = "Bias Present"
|
71 |
+
else:
|
72 |
+
rating = "Misleading"
|
73 |
+
|
74 |
+
result = {
|
75 |
+
"media_unmasked_score": round(final_score, 1),
|
76 |
+
"rating": rating,
|
77 |
+
"details": {
|
78 |
+
"headline_analysis": headline_analysis,
|
79 |
+
"sentiment_analysis": sentiment_analysis,
|
80 |
+
"bias_analysis": bias_analysis,
|
81 |
+
"evidence_analysis": evidence_analysis
|
82 |
+
}
|
83 |
+
}
|
84 |
+
|
85 |
+
logger.info("\n=== Final Score Result ===")
|
86 |
+
logger.info(f"Result: {result}")
|
87 |
+
|
88 |
+
return result
|
89 |
+
|
90 |
+
except Exception as e:
|
91 |
+
logger.error(f"Error calculating media score: {str(e)}")
|
92 |
+
return {
|
93 |
+
"media_unmasked_score": 0,
|
94 |
+
"rating": "Error",
|
95 |
+
"details": {
|
96 |
+
"headline_analysis": {"headline_vs_content_score": 0, "contradictory_phrases": []},
|
97 |
+
"sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},
|
98 |
+
"bias_analysis": {"bias": "Error", "bias_score": 0.0, "bias_percentage": 0},
|
99 |
+
"evidence_analysis": {"evidence_based_score": 0}
|
100 |
+
}
|
101 |
+
}
|
mediaunmasked/analyzers/sentiment_analyzer.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Dict, Any, List
|
3 |
+
from textblob import TextBlob
|
4 |
+
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
|
7 |
+
class SentimentAnalyzer:
|
8 |
+
def __init__(self):
|
9 |
+
self.manipulative_patterns = [
|
10 |
+
"experts say",
|
11 |
+
"sources claim",
|
12 |
+
"many believe",
|
13 |
+
"some say",
|
14 |
+
"everyone knows",
|
15 |
+
"clearly",
|
16 |
+
"obviously",
|
17 |
+
"without doubt",
|
18 |
+
"certainly"
|
19 |
+
]
|
20 |
+
|
21 |
+
def analyze(self, text: str) -> Dict[str, Any]:
|
22 |
+
"""Analyze sentiment using TextBlob."""
|
23 |
+
try:
|
24 |
+
blob = TextBlob(text)
|
25 |
+
sentiment_score = blob.sentiment.polarity
|
26 |
+
|
27 |
+
manipulative_phrases = self._detect_manipulative_phrases(text)
|
28 |
+
manipulation_score = len(manipulative_phrases) * 10
|
29 |
+
|
30 |
+
if sentiment_score > 0.2:
|
31 |
+
sentiment = "Positive"
|
32 |
+
elif sentiment_score < -0.2:
|
33 |
+
sentiment = "Negative"
|
34 |
+
else:
|
35 |
+
sentiment = "Neutral"
|
36 |
+
|
37 |
+
if manipulation_score > 50:
|
38 |
+
sentiment = "Manipulative"
|
39 |
+
|
40 |
+
return {
|
41 |
+
"sentiment": sentiment,
|
42 |
+
"manipulation_score": min(manipulation_score, 100),
|
43 |
+
"flagged_phrases": manipulative_phrases
|
44 |
+
}
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
logger.error(f"Error in sentiment analysis: {str(e)}")
|
48 |
+
return {
|
49 |
+
"sentiment": "Error",
|
50 |
+
"manipulation_score": 0,
|
51 |
+
"flagged_phrases": []
|
52 |
+
}
|
53 |
+
|
54 |
+
def _detect_manipulative_phrases(self, text: str) -> List[str]:
|
55 |
+
"""Detect potentially manipulative phrases."""
|
56 |
+
found_phrases = []
|
57 |
+
text_lower = text.lower()
|
58 |
+
|
59 |
+
for pattern in self.manipulative_patterns:
|
60 |
+
if pattern in text_lower:
|
61 |
+
start = text_lower.find(pattern)
|
62 |
+
context = text[max(0, start-20):min(len(text), start+len(pattern)+20)]
|
63 |
+
found_phrases.append(context.strip())
|
64 |
+
|
65 |
+
return found_phrases
|
mediaunmasked/resources/left_bias_words.txt
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 📂 resources/left_bias_words.txt
|
2 |
+
# -------------------------------------------------
|
3 |
+
# 🔹 Political Ideology & Economic Policy
|
4 |
+
progressive
|
5 |
+
conservative
|
6 |
+
socialist
|
7 |
+
democratic socialism
|
8 |
+
democratic socialist
|
9 |
+
far-right
|
10 |
+
equity
|
11 |
+
justice for all
|
12 |
+
wealth redistribution
|
13 |
+
universal basic income
|
14 |
+
living wage
|
15 |
+
income inequality
|
16 |
+
wealth inequality
|
17 |
+
fair trade
|
18 |
+
social safety net
|
19 |
+
corporate greed
|
20 |
+
workers' rights
|
21 |
+
unionize
|
22 |
+
collective bargaining
|
23 |
+
minimum wage increase
|
24 |
+
universal childcare
|
25 |
+
tax the rich
|
26 |
+
economic justice
|
27 |
+
capitalism is broken
|
28 |
+
billionaires shouldn't exist
|
29 |
+
|
30 |
+
# 🔹 Climate & Environmental Policy
|
31 |
+
climate crisis
|
32 |
+
climate emergency
|
33 |
+
sustainability
|
34 |
+
green energy
|
35 |
+
carbon footprint
|
36 |
+
fossil fuel divestment
|
37 |
+
environmental justice
|
38 |
+
net zero
|
39 |
+
renewable energy
|
40 |
+
solar energy
|
41 |
+
climate action
|
42 |
+
big oil
|
43 |
+
carbon tax
|
44 |
+
Green New Deal
|
45 |
+
climate deniers
|
46 |
+
eco-friendly policies
|
47 |
+
clean energy revolution
|
48 |
+
plastic ban
|
49 |
+
end fracking
|
50 |
+
divest from coal
|
51 |
+
extreme weather is worsening
|
52 |
+
global warming is real
|
53 |
+
environmental responsibility
|
54 |
+
wildlife protection
|
55 |
+
eco-activism
|
56 |
+
|
57 |
+
# 🔹 Social Justice & Identity Politics
|
58 |
+
social justice
|
59 |
+
racial justice
|
60 |
+
systemic racism
|
61 |
+
white privilege
|
62 |
+
microaggressions
|
63 |
+
BIPOC
|
64 |
+
LGBTQ+ rights
|
65 |
+
gender pay gap
|
66 |
+
affirmative action
|
67 |
+
decolonization
|
68 |
+
indigenous sovereignty
|
69 |
+
equity vs equality
|
70 |
+
patriarchy
|
71 |
+
gender-inclusive
|
72 |
+
intersectionality
|
73 |
+
trans rights
|
74 |
+
feminism
|
75 |
+
gender-affirming care
|
76 |
+
abolish ICE
|
77 |
+
police brutality
|
78 |
+
defund the police
|
79 |
+
prison abolition
|
80 |
+
restorative justice
|
81 |
+
white supremacy
|
82 |
+
hate speech laws
|
83 |
+
critical race theory
|
84 |
+
diversity, equity, inclusion
|
85 |
+
reproductive justice
|
86 |
+
women's bodily autonomy
|
87 |
+
reparations
|
88 |
+
|
89 |
+
# 🔹 Healthcare & Public Welfare
|
90 |
+
Medicare for All
|
91 |
+
universal healthcare
|
92 |
+
public option
|
93 |
+
free healthcare
|
94 |
+
single-payer system
|
95 |
+
affordable healthcare
|
96 |
+
healthcare is a human right
|
97 |
+
insulin price cap
|
98 |
+
Big Pharma
|
99 |
+
mental health parity
|
100 |
+
food insecurity
|
101 |
+
public housing
|
102 |
+
student loan forgiveness
|
103 |
+
affordable education
|
104 |
+
debt relief
|
105 |
+
expand social security
|
106 |
+
disability rights
|
107 |
+
homeless crisis
|
108 |
+
opioid epidemic response
|
109 |
+
guaranteed paid leave
|
110 |
+
maternal mortality crisis
|
111 |
+
expand Medicaid
|
112 |
+
community healthcare clinics
|
113 |
+
healthcare access for all
|
114 |
+
pre-existing conditions coverage
|
115 |
+
|
116 |
+
# 🔹 Gun Control & Public Safety
|
117 |
+
gun violence prevention
|
118 |
+
common-sense gun laws
|
119 |
+
background checks
|
120 |
+
gun reform
|
121 |
+
assault weapons ban
|
122 |
+
mass shootings epidemic
|
123 |
+
red flag laws
|
124 |
+
gun buyback programs
|
125 |
+
ban high-capacity magazines
|
126 |
+
NRA influence
|
127 |
+
public safety over profit
|
128 |
+
gun safety legislation
|
129 |
+
school shootings crisis
|
130 |
+
responsible gun ownership
|
131 |
+
fewer guns, safer communities
|
132 |
+
demilitarize the police
|
133 |
+
ban ghost guns
|
134 |
+
universal gun laws
|
135 |
+
ban open carry
|
136 |
+
reduce firearm access
|
137 |
+
mandatory firearm registration
|
138 |
+
|
139 |
+
# 🔹 Immigration & Border Policy
|
140 |
+
path to citizenship
|
141 |
+
DACA
|
142 |
+
dreamers
|
143 |
+
migrant rights
|
144 |
+
asylum seekers
|
145 |
+
refugee protection
|
146 |
+
abolish ICE
|
147 |
+
border security is racist
|
148 |
+
family separation
|
149 |
+
sanctuary cities
|
150 |
+
humanitarian crisis at the border
|
151 |
+
comprehensive immigration reform
|
152 |
+
no human is illegal
|
153 |
+
end child detention
|
154 |
+
protect immigrants
|
155 |
+
immigrants strengthen the economy
|
156 |
+
undocumented workers deserve rights
|
157 |
+
border wall waste
|
158 |
+
decriminalize border crossings
|
159 |
+
reunite families
|
160 |
+
amnesty for undocumented
|
161 |
+
|
162 |
+
# 🔹 Media & Information Bias
|
163 |
+
misinformation crisis
|
164 |
+
fact-based reporting
|
165 |
+
right-wing disinformation
|
166 |
+
alternative facts
|
167 |
+
Fox News propaganda
|
168 |
+
media literacy
|
169 |
+
fight misinformation
|
170 |
+
Big Tech accountability
|
171 |
+
social media regulation
|
172 |
+
disinformation campaigns
|
173 |
+
protect press freedom
|
174 |
+
independent journalism
|
175 |
+
mainstream media bias
|
176 |
+
fact-checking matters
|
177 |
+
ban fake news
|
178 |
+
Russian interference
|
179 |
+
algorithmic bias
|
180 |
+
political misinformation
|
181 |
+
fair and accurate reporting
|
182 |
+
truth matters
|
183 |
+
anti-science rhetoric
|
184 |
+
climate denial media
|
185 |
+
ban extremist media
|
186 |
+
right-wing conspiracy theories
|
187 |
+
protecting democracy
|
mediaunmasked/resources/manipulative_patterns.txt
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 📂 resources/manipulative_patterns.txt
|
2 |
+
# -------------------------------------------------
|
3 |
+
# 🔹 Vague Attribution (Unverifiable Sources)
|
4 |
+
experts fear
|
5 |
+
some say
|
6 |
+
many believe
|
7 |
+
it's clear that
|
8 |
+
obviously
|
9 |
+
everyone knows
|
10 |
+
sources say
|
11 |
+
people are saying
|
12 |
+
research suggests
|
13 |
+
critics argue
|
14 |
+
analysts warn
|
15 |
+
reportedly
|
16 |
+
insiders claim
|
17 |
+
industry experts agree
|
18 |
+
whispers in the industry
|
19 |
+
a growing number of people think
|
20 |
+
sources close to the matter indicate
|
21 |
+
reports suggest
|
22 |
+
insiders reveal
|
23 |
+
unnamed sources confirm
|
24 |
+
widely believed
|
25 |
+
it has been said
|
26 |
+
word on the street is
|
27 |
+
|
28 |
+
# 🔹 Exaggeration & Absolutist Language
|
29 |
+
\b(all|none|every|always|never)\b
|
30 |
+
without question
|
31 |
+
undeniably
|
32 |
+
beyond a doubt
|
33 |
+
without a shadow of a doubt
|
34 |
+
irrefutable proof
|
35 |
+
inarguable
|
36 |
+
scientifically proven
|
37 |
+
guaranteed
|
38 |
+
no one can deny
|
39 |
+
absolutely certain
|
40 |
+
inevitable collapse
|
41 |
+
completely unprecedented
|
42 |
+
no alternative but
|
43 |
+
totally discredited
|
44 |
+
this changes everything
|
45 |
+
nothing can stop
|
46 |
+
without fail
|
47 |
+
history shows that
|
48 |
+
|
49 |
+
# 🔹 Emotional Manipulation & Loaded Language
|
50 |
+
the shocking truth
|
51 |
+
horrifying evidence
|
52 |
+
dangerously misguided
|
53 |
+
deeply disturbing
|
54 |
+
alarming new trend
|
55 |
+
terrifying reality
|
56 |
+
outrageous attack
|
57 |
+
crippling consequences
|
58 |
+
heartbreaking truth
|
59 |
+
a devastating blow
|
60 |
+
frightening new report
|
61 |
+
explosive details
|
62 |
+
disturbing allegations
|
63 |
+
corrupt elites
|
64 |
+
facing total destruction
|
65 |
+
hidden agenda
|
66 |
+
deliberate deception
|
67 |
+
reckless policies
|
68 |
+
radical takeover
|
69 |
+
secret plot exposed
|
70 |
+
exposed corruption
|
71 |
+
will destroy everything
|
72 |
+
brainwashing the masses
|
73 |
+
a brutal betrayal
|
74 |
+
shocking revelations
|
75 |
+
an unthinkable scenario
|
76 |
+
must be stopped at all costs
|
77 |
+
selling out the people
|
78 |
+
|
79 |
+
# 🔹 False Balance & False Equivalencies
|
80 |
+
both sides are equally to blame
|
81 |
+
to be fair, some argue
|
82 |
+
some would say it's just as bad as
|
83 |
+
on one hand, but on the other hand
|
84 |
+
many claim there’s no difference
|
85 |
+
equally problematic on both sides
|
86 |
+
critics claim, but supporters argue
|
87 |
+
it’s just like (unrelated issue)
|
88 |
+
just as bad as
|
89 |
+
making the same mistakes
|
90 |
+
exactly like
|
91 |
+
history repeating itself
|
92 |
+
|
93 |
+
# 🔹 Implying Authority Without Evidence
|
94 |
+
leading experts agree
|
95 |
+
a well-known figure once said
|
96 |
+
the science is settled
|
97 |
+
unquestionable truth
|
98 |
+
indisputable fact
|
99 |
+
respected authorities confirm
|
100 |
+
established research shows
|
101 |
+
a Nobel Prize-winning scientist believes
|
102 |
+
the most intelligent minds agree
|
103 |
+
top thinkers of our time argue
|
104 |
+
those who disagree are uninformed
|
105 |
+
no real expert would dispute this
|
106 |
+
a professor from a top university claims
|
107 |
+
all credible scientists believe
|
108 |
+
no serious researcher disagrees
|
109 |
+
|
110 |
+
# 🔹 Implying Popular Consensus Without Data
|
111 |
+
the majority of people think
|
112 |
+
society agrees that
|
113 |
+
most intelligent people understand
|
114 |
+
an overwhelming number of people
|
115 |
+
the vast majority
|
116 |
+
widely considered to be true
|
117 |
+
popular opinion suggests
|
118 |
+
everyone is talking about
|
119 |
+
most believe
|
120 |
+
|
121 |
+
# 🔹 Framing Opponents in a Negative Light
|
122 |
+
only extremists believe otherwise
|
123 |
+
people who disagree are in denial
|
124 |
+
anyone who questions this is ignorant
|
125 |
+
blindly following the agenda
|
126 |
+
out of touch with reality
|
127 |
+
desperate attempt to save face
|
128 |
+
trying to cover up the truth
|
129 |
+
a last-ditch effort to deceive
|
130 |
+
refusing to accept facts
|
131 |
+
spreading misinformation
|
132 |
+
manipulated by special interests
|
133 |
+
driven by greed and corruption
|
134 |
+
being paid to say otherwise
|
135 |
+
deliberately misleading
|
136 |
+
hiding the truth from the public
|
137 |
+
working against the people
|
138 |
+
exploiting the system
|
139 |
+
part of the problem, not the solution
|
140 |
+
dangerous and reckless
|
141 |
+
acting in bad faith
|
142 |
+
|
143 |
+
# 🔹 Implying Urgency & Fear-Mongering
|
144 |
+
we are running out of time
|
145 |
+
before it’s too late
|
146 |
+
act now before disaster strikes
|
147 |
+
imminent collapse
|
148 |
+
crisis is unfolding
|
149 |
+
ticking time bomb
|
150 |
+
on the brink of disaster
|
151 |
+
looming catastrophe
|
152 |
+
a dire warning
|
153 |
+
facing an existential threat
|
154 |
+
the fate of our nation
|
155 |
+
before it's too late
|
156 |
+
can’t afford to wait
|
157 |
+
if this continues, we’re doomed
|
158 |
+
the last chance to save
|
159 |
+
history will not be kind
|
160 |
+
future generations will suffer
|
161 |
+
too dangerous to ignore
|
162 |
+
|
163 |
+
# 🔹 Appealing to Nostalgia & Past Glory
|
164 |
+
things were better before
|
165 |
+
back in the good old days
|
166 |
+
when America was great
|
167 |
+
returning to our roots
|
168 |
+
the way it was meant to be
|
169 |
+
before things got out of hand
|
170 |
+
we’ve lost our way
|
171 |
+
we need to go back to simpler times
|
172 |
+
what our forefathers intended
|
173 |
+
traditional values are under attack
|
174 |
+
restoring the glory days
|
175 |
+
reclaiming what was lost
|
176 |
+
remember when things made sense?
|
177 |
+
back when people had morals
|
178 |
+
the downfall of our civilization
|
mediaunmasked/resources/right_bias_words.txt
ADDED
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 📂 resources/right_bias_words.txt
|
2 |
+
# -------------------------------------------------
|
3 |
+
# 🔹 Political Ideology & Economic Policy
|
4 |
+
right-wing
|
5 |
+
liberal
|
6 |
+
traditional values
|
7 |
+
free market
|
8 |
+
capitalism
|
9 |
+
small government
|
10 |
+
big government overreach
|
11 |
+
limited government
|
12 |
+
fiscal responsibility
|
13 |
+
trickle-down economics
|
14 |
+
deregulation
|
15 |
+
job creators
|
16 |
+
personal responsibility
|
17 |
+
welfare dependency
|
18 |
+
individual liberty
|
19 |
+
government waste
|
20 |
+
tax cuts
|
21 |
+
pro-business policies
|
22 |
+
pro-growth policies
|
23 |
+
big government socialism
|
24 |
+
crony capitalism
|
25 |
+
hard work pays off
|
26 |
+
socialist policies fail
|
27 |
+
self-reliance
|
28 |
+
national sovereignty
|
29 |
+
|
30 |
+
# 🔹 Climate & Environmental Policy
|
31 |
+
climate alarmism
|
32 |
+
climate hoax
|
33 |
+
green energy scam
|
34 |
+
drill baby drill
|
35 |
+
energy independence
|
36 |
+
clean coal
|
37 |
+
pro-fracking
|
38 |
+
stop the war on oil
|
39 |
+
anti-carbon tax
|
40 |
+
regulatory overreach
|
41 |
+
climate hysteria
|
42 |
+
fossil fuel industry
|
43 |
+
radical environmentalists
|
44 |
+
alternative energy myths
|
45 |
+
global warming exaggeration
|
46 |
+
renewable energy failure
|
47 |
+
climate change agenda
|
48 |
+
big government green policies
|
49 |
+
end subsidies for green energy
|
50 |
+
eco-terrorism
|
51 |
+
emissions regulations kill jobs
|
52 |
+
climate change skepticism
|
53 |
+
scientific consensus is flawed
|
54 |
+
|
55 |
+
# 🔹 Social Issues & Culture Wars
|
56 |
+
woke agenda
|
57 |
+
cancel culture
|
58 |
+
critical race theory
|
59 |
+
identity politics
|
60 |
+
anti-woke
|
61 |
+
war on Christmas
|
62 |
+
traditional marriage
|
63 |
+
family values
|
64 |
+
religious freedom
|
65 |
+
biblical principles
|
66 |
+
faith-based values
|
67 |
+
cultural marxism
|
68 |
+
gender ideology
|
69 |
+
biological reality
|
70 |
+
trans agenda
|
71 |
+
protect women’s sports
|
72 |
+
Christian persecution
|
73 |
+
church over state
|
74 |
+
anti-religious bigotry
|
75 |
+
parental rights
|
76 |
+
indoctrination in schools
|
77 |
+
reverse racism
|
78 |
+
meritocracy matters
|
79 |
+
law and order
|
80 |
+
war on masculinity
|
81 |
+
anti-gun propaganda
|
82 |
+
|
83 |
+
# 🔹 Immigration & Border Policy
|
84 |
+
illegal aliens
|
85 |
+
border crisis
|
86 |
+
invasion at the border
|
87 |
+
build the wall
|
88 |
+
secure our borders
|
89 |
+
deportation
|
90 |
+
amnesty is a scam
|
91 |
+
open borders policy
|
92 |
+
sanctuary cities are unsafe
|
93 |
+
migrant caravans
|
94 |
+
protect American workers
|
95 |
+
chain migration
|
96 |
+
anchor babies
|
97 |
+
English as the official language
|
98 |
+
vetting immigrants
|
99 |
+
catch and deport
|
100 |
+
mass migration problem
|
101 |
+
border security first
|
102 |
+
illegals taking American jobs
|
103 |
+
criminal aliens
|
104 |
+
no asylum abuse
|
105 |
+
end birthright citizenship
|
106 |
+
border patrol under attack
|
107 |
+
|
108 |
+
# 🔹 Healthcare & Public Welfare
|
109 |
+
government takeover of healthcare
|
110 |
+
socialized medicine fails
|
111 |
+
free healthcare is a myth
|
112 |
+
Medicare for all is unsustainable
|
113 |
+
private insurance rights
|
114 |
+
Obamacare disaster
|
115 |
+
healthcare freedom
|
116 |
+
personal responsibility in healthcare
|
117 |
+
rationed care
|
118 |
+
universal healthcare means higher taxes
|
119 |
+
health savings accounts
|
120 |
+
big pharma collusion
|
121 |
+
big government healthcare
|
122 |
+
death panels
|
123 |
+
nanny state policies
|
124 |
+
taxpayer-funded abortion
|
125 |
+
personalized medicine
|
126 |
+
free market healthcare solutions
|
127 |
+
government interference in medicine
|
128 |
+
healthcare choice
|
129 |
+
welfare abuse
|
130 |
+
entitlement reform
|
131 |
+
personalized care models
|
132 |
+
stop welfare expansion
|
133 |
+
|
134 |
+
# 🔹 Gun Rights & Public Safety
|
135 |
+
gun control doesn’t work
|
136 |
+
Second Amendment rights
|
137 |
+
constitutional carry
|
138 |
+
good guy with a gun
|
139 |
+
gun grabbers
|
140 |
+
assault weapons myth
|
141 |
+
defend the Second Amendment
|
142 |
+
law-abiding gun owners
|
143 |
+
red flag laws violate rights
|
144 |
+
shall not be infringed
|
145 |
+
gun-free zones don't work
|
146 |
+
arming teachers
|
147 |
+
NRA-backed legislation
|
148 |
+
stand your ground
|
149 |
+
self-defense rights
|
150 |
+
gun rights under attack
|
151 |
+
criminals ignore gun laws
|
152 |
+
leftists want total disarmament
|
153 |
+
defund the police is dangerous
|
154 |
+
crime wave
|
155 |
+
law and order policies
|
156 |
+
Democrats are soft on crime
|
157 |
+
|
158 |
+
# 🔹 Media & Big Tech Censorship
|
159 |
+
mainstream media lies
|
160 |
+
fake news media
|
161 |
+
media bias
|
162 |
+
left-wing media monopoly
|
163 |
+
corporate media corruption
|
164 |
+
conservative voices silenced
|
165 |
+
shadow banning
|
166 |
+
Big Tech censorship
|
167 |
+
social media suppression
|
168 |
+
fact-checkers are biased
|
169 |
+
alternative media
|
170 |
+
legacy media collapse
|
171 |
+
biased journalism
|
172 |
+
freedom of speech under attack
|
173 |
+
media elite
|
174 |
+
disinformation police
|
175 |
+
Silicon Valley leftist agenda
|
176 |
+
misinformation double standard
|
177 |
+
conservative accounts banned
|
178 |
+
election interference
|
179 |
+
algorithm manipulation
|
180 |
+
digital free speech
|
181 |
+
|
182 |
+
# 🔹 Election Integrity & Government Accountability
|
183 |
+
election fraud
|
184 |
+
stolen election
|
185 |
+
mail-in ballot fraud
|
186 |
+
ballot harvesting
|
187 |
+
illegal voting
|
188 |
+
rigged elections
|
189 |
+
secure the vote
|
190 |
+
voter ID laws
|
191 |
+
dead people voting
|
192 |
+
stop election interference
|
193 |
+
clean voter rolls
|
194 |
+
fair elections
|
195 |
+
stop the steal
|
196 |
+
Democrat-run cities are corrupt
|
197 |
+
big government tyranny
|
198 |
+
congressional overreach
|
199 |
+
political witch hunt
|
200 |
+
government accountability
|
201 |
+
unelected bureaucrats
|
202 |
+
administrative state abuse
|
203 |
+
taxpayer money wasted
|
204 |
+
drain the swamp
|
205 |
+
deep state
|
206 |
+
two-tiered justice system
|
207 |
+
weaponization of government
|
208 |
+
|
209 |
+
# 🔹 Foreign Policy & National Defense
|
210 |
+
America First
|
211 |
+
patriotic nationalism
|
212 |
+
globalism is a threat
|
213 |
+
weak foreign policy
|
214 |
+
military strength
|
215 |
+
peace through strength
|
216 |
+
support our troops
|
217 |
+
anti-interventionism
|
218 |
+
funding our enemies
|
219 |
+
China threat
|
220 |
+
Russia hoax
|
221 |
+
leftist appeasement
|
222 |
+
national security first
|
223 |
+
endless wars are a mistake
|
224 |
+
pro-Israel stance
|
225 |
+
secure our allies
|
226 |
+
foreign aid waste
|
227 |
+
Biden’s weak leadership
|
228 |
+
defund the UN
|
229 |
+
globalist elites
|
230 |
+
pro-American trade policies
|
231 |
+
stop outsourcing jobs
|
232 |
+
stop military woke policies
|
233 |
+
secure American sovereignty
|
mediaunmasked/schemas/requests.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
class AnalyzeRequest(BaseModel):
|
4 |
+
headline: str
|
5 |
+
content: str
|
mediaunmasked/schemas/responses.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
class AnalyzeResponse(BaseModel):
|
4 |
+
headline_vs_content_score: float
|
5 |
+
entailment_score: float
|
6 |
+
contradiction_score: float
|
mediaunmasked/scrapers/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .article_scraper import ArticleScraper
|
2 |
+
|
3 |
+
__all__ = ['ArticleScraper']
|
mediaunmasked/scrapers/article_scraper.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Optional
|
2 |
+
import logging
|
3 |
+
from urllib.parse import urlparse
|
4 |
+
import requests
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
+
|
7 |
+
from ..utils.logging_config import setup_logging
|
8 |
+
|
9 |
+
class ArticleScraper:
|
10 |
+
def __init__(self):
|
11 |
+
self.session = requests.Session()
|
12 |
+
self.session.headers = {
|
13 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
14 |
+
}
|
15 |
+
setup_logging()
|
16 |
+
self.logger = logging.getLogger(__name__)
|
17 |
+
|
18 |
+
def _get_domain(self, url: str) -> str:
|
19 |
+
"""Extract domain from URL."""
|
20 |
+
return urlparse(url).netloc
|
21 |
+
|
22 |
+
def _fetch_page(self, url: str) -> Optional[str]:
|
23 |
+
"""Fetch page content with error handling."""
|
24 |
+
try:
|
25 |
+
response = self.session.get(url)
|
26 |
+
response.raise_for_status()
|
27 |
+
return response.text
|
28 |
+
|
29 |
+
except Exception as e:
|
30 |
+
self.logger.error(f"Error fetching {url}: {str(e)}")
|
31 |
+
return None
|
32 |
+
|
33 |
+
def _extract_snopes(self, soup: BeautifulSoup) -> Dict[str, str]:
|
34 |
+
"""Extract content from Snopes articles."""
|
35 |
+
# Get headline from any h1 tag since it doesn't have a specific class
|
36 |
+
headline_elem = soup.find('h1')
|
37 |
+
headline = headline_elem.get_text().strip() if headline_elem else ''
|
38 |
+
self.logger.info(f"Found headline: {headline}")
|
39 |
+
|
40 |
+
# Try to find the article content
|
41 |
+
article = soup.find('article')
|
42 |
+
if article:
|
43 |
+
self.logger.info("Found article tag")
|
44 |
+
# Remove unwanted elements
|
45 |
+
for unwanted in article.find_all(['script', 'style', 'iframe', 'aside']):
|
46 |
+
unwanted.decompose()
|
47 |
+
|
48 |
+
# Get all paragraphs from the article
|
49 |
+
paragraphs = article.find_all('p')
|
50 |
+
if paragraphs:
|
51 |
+
content = ' '.join(p.get_text().strip() for p in paragraphs)
|
52 |
+
else:
|
53 |
+
content = article.get_text().strip()
|
54 |
+
else:
|
55 |
+
self.logger.warning("No article tag found")
|
56 |
+
content = ''
|
57 |
+
|
58 |
+
return {"headline": headline, "content": content}
|
59 |
+
|
60 |
+
def _extract_politifact(self, soup: BeautifulSoup) -> Dict[str, str]:
|
61 |
+
"""Extract content from PolitiFact articles."""
|
62 |
+
try:
|
63 |
+
headline = soup.find('h1', class_='article__title')
|
64 |
+
if headline:
|
65 |
+
headline = headline.get_text().strip()
|
66 |
+
else:
|
67 |
+
headline = soup.find('h1')
|
68 |
+
headline = headline.get_text().strip() if headline else "No headline found"
|
69 |
+
|
70 |
+
self.logger.info(f"Found headline: {headline}")
|
71 |
+
|
72 |
+
content_div = soup.find('article', class_='article')
|
73 |
+
if content_div:
|
74 |
+
# Remove unwanted elements
|
75 |
+
for unwanted in content_div.find_all(['script', 'style', 'iframe', 'aside']):
|
76 |
+
unwanted.decompose()
|
77 |
+
content = ' '.join(p.get_text().strip() for p in content_div.find_all('p'))
|
78 |
+
else:
|
79 |
+
# Try alternative content selectors
|
80 |
+
content_selectors = ['.article__text', '.m-textblock']
|
81 |
+
content = ''
|
82 |
+
for selector in content_selectors:
|
83 |
+
content_elem = soup.select_one(selector)
|
84 |
+
if content_elem:
|
85 |
+
content = ' '.join(p.get_text().strip() for p in content_elem.find_all('p'))
|
86 |
+
break
|
87 |
+
|
88 |
+
if not content:
|
89 |
+
self.logger.warning("No content found in article")
|
90 |
+
content = "No content found"
|
91 |
+
|
92 |
+
return {"headline": headline, "content": content}
|
93 |
+
|
94 |
+
except Exception as e:
|
95 |
+
self.logger.error(f"Error extracting PolitiFact content: {str(e)}")
|
96 |
+
return {"headline": "Error", "content": f"Failed to extract content: {str(e)}"}
|
97 |
+
|
98 |
+
def scrape_article(self, url: str) -> Optional[Dict[str, str]]:
|
99 |
+
"""
|
100 |
+
Main function to scrape fact-checking articles.
|
101 |
+
Returns a dictionary with headline and content.
|
102 |
+
"""
|
103 |
+
html_content = self._fetch_page(url)
|
104 |
+
if not html_content:
|
105 |
+
self.logger.error("Failed to fetch page content")
|
106 |
+
return None
|
107 |
+
|
108 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
109 |
+
domain = self._get_domain(url)
|
110 |
+
|
111 |
+
self.logger.info(f"Scraping article from domain: {domain}")
|
112 |
+
|
113 |
+
# Select appropriate extractor based on domain
|
114 |
+
if 'snopes.com' in domain:
|
115 |
+
result = self._extract_snopes(soup)
|
116 |
+
if not result['headline'] or not result['content']:
|
117 |
+
self.logger.warning("Failed to extract content from Snopes article")
|
118 |
+
self.logger.debug(f"HTML content: {html_content[:500]}...")
|
119 |
+
return result
|
120 |
+
elif 'politifact.com' in domain:
|
121 |
+
return self._extract_politifact(soup)
|
122 |
+
else:
|
123 |
+
# Generic extraction fallback
|
124 |
+
headline = soup.find('h1').get_text().strip() if soup.find('h1') else ''
|
125 |
+
|
126 |
+
# Try common content selectors
|
127 |
+
content_selectors = ['article', 'main', '.content', '.article-content']
|
128 |
+
content = ''
|
129 |
+
|
130 |
+
for selector in content_selectors:
|
131 |
+
content_div = soup.select_one(selector)
|
132 |
+
if content_div:
|
133 |
+
# Remove unwanted elements
|
134 |
+
for unwanted in content_div.find_all(['script', 'style', 'iframe', 'aside']):
|
135 |
+
unwanted.decompose()
|
136 |
+
content = ' '.join(p.get_text().strip() for p in content_div.find_all('p'))
|
137 |
+
break
|
138 |
+
|
139 |
+
return {"headline": headline, "content": content}
|
mediaunmasked/services/analyzer_service.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mediaunmasked.analyzers.headline_analyzer import HeadlineAnalyzer
|
2 |
+
|
3 |
+
class AnalyzerService:
|
4 |
+
def __init__(self):
|
5 |
+
self.headline_analyzer = HeadlineAnalyzer()
|
6 |
+
|
7 |
+
async def analyze_content(self, headline: str, content: str):
|
8 |
+
result = self.headline_analyzer.analyze(headline, content)
|
9 |
+
return result
|
mediaunmasked/utils/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Empty file is fine
|
mediaunmasked/utils/logging_config.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
def setup_logging(level: int = logging.INFO) -> None:
|
5 |
+
"""Configure logging for the application."""
|
6 |
+
logging.basicConfig(
|
7 |
+
level=level,
|
8 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
9 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
10 |
+
)
|
mediaunmasked/web/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Empty file is fine
|
mediaunmasked/web/app.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from ..analyzers.bias_analyzer import BiasAnalyzer
|
3 |
+
from ..scrapers.article_scraper import ArticleScraper
|
4 |
+
from ..utils.logging_config import setup_logging
|
5 |
+
import plotly.graph_objects as go
|
6 |
+
|
7 |
+
def create_sentiment_gauge(score: float) -> go.Figure:
|
8 |
+
"""Create a gauge chart for sentiment visualization."""
|
9 |
+
fig = go.Figure(go.Indicator(
|
10 |
+
mode = "gauge+number",
|
11 |
+
value = score * 100,
|
12 |
+
title = {'text': "Sentiment Score"},
|
13 |
+
gauge = {
|
14 |
+
'axis': {'range': [0, 100]},
|
15 |
+
'bar': {'color': "darkblue"},
|
16 |
+
'steps': [
|
17 |
+
{'range': [0, 33], 'color': "lightgray"},
|
18 |
+
{'range': [33, 66], 'color': "gray"},
|
19 |
+
{'range': [66, 100], 'color': "darkgray"}
|
20 |
+
],
|
21 |
+
}
|
22 |
+
))
|
23 |
+
return fig
|
24 |
+
|
25 |
+
def main():
|
26 |
+
# Set up logging
|
27 |
+
setup_logging()
|
28 |
+
|
29 |
+
# Initialize components
|
30 |
+
scraper = ArticleScraper()
|
31 |
+
analyzer = BiasAnalyzer()
|
32 |
+
|
33 |
+
# Set up the Streamlit interface
|
34 |
+
st.title("Media Bias Analyzer")
|
35 |
+
st.write("Analyze bias and sentiment in news articles")
|
36 |
+
|
37 |
+
# URL input
|
38 |
+
url = st.text_input("Enter article URL:", "https://www.snopes.com/articles/469232/musk-son-told-trump-shut-up/")
|
39 |
+
|
40 |
+
if st.button("Analyze"):
|
41 |
+
with st.spinner("Analyzing article..."):
|
42 |
+
# Scrape the article
|
43 |
+
article = scraper.scrape_article(url)
|
44 |
+
|
45 |
+
if article:
|
46 |
+
# Show article details
|
47 |
+
st.subheader("Article Details")
|
48 |
+
st.write(f"**Headline:** {article['headline']}")
|
49 |
+
|
50 |
+
with st.expander("Show Article Content"):
|
51 |
+
st.write(article['content'])
|
52 |
+
|
53 |
+
# Analyze content
|
54 |
+
result = analyzer.analyze(article['content'])
|
55 |
+
|
56 |
+
# Display results in columns
|
57 |
+
col1, col2 = st.columns(2)
|
58 |
+
|
59 |
+
with col1:
|
60 |
+
st.subheader("Sentiment Analysis")
|
61 |
+
st.write(f"**Overall Sentiment:** {result.sentiment}")
|
62 |
+
fig = create_sentiment_gauge(result.bias_score / 100)
|
63 |
+
st.plotly_chart(fig)
|
64 |
+
|
65 |
+
with col2:
|
66 |
+
st.subheader("Bias Analysis")
|
67 |
+
st.write(f"**Detected Bias:** {result.bias}")
|
68 |
+
st.write(f"**Confidence Score:** {result.bias_score:.1f}%")
|
69 |
+
|
70 |
+
# Show flagged phrases
|
71 |
+
if result.flagged_phrases:
|
72 |
+
st.subheader("Potentially Biased Phrases")
|
73 |
+
for phrase in result.flagged_phrases:
|
74 |
+
st.warning(phrase)
|
75 |
+
else:
|
76 |
+
st.info("No potentially biased phrases detected")
|
77 |
+
|
78 |
+
else:
|
79 |
+
st.error("Failed to fetch article. Please check the URL and try again.")
|
80 |
+
|
81 |
+
if __name__ == "__main__":
|
82 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi[all]==0.109.2
|
2 |
+
uvicorn==0.27.1
|
3 |
+
pydantic==2.6.1
|
4 |
+
beautifulsoup4==4.12.3
|
5 |
+
requests==2.31.0
|
6 |
+
python-dotenv==1.0.1
|
7 |
+
textblob==0.17.1
|
8 |
+
nltk==3.8.1
|
9 |
+
transformers==4.36.2
|
10 |
+
torch==2.1.2
|
11 |
+
numpy==1.26.3
|
12 |
+
pytest==7.4.3
|
13 |
+
pytest-asyncio==0.21.1
|
14 |
+
httpx==0.25.2
|
setup.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(
|
4 |
+
name="mediaunmasked",
|
5 |
+
version="0.1.0",
|
6 |
+
packages=find_packages(exclude=["tests*"]) + ["app"], # Include app/ and mediaunmasked/
|
7 |
+
package_dir={"app": "app"}, # Map app directory
|
8 |
+
install_requires=[
|
9 |
+
line.strip()
|
10 |
+
for line in open("requirements.txt").readlines()
|
11 |
+
if not line.startswith("#")
|
12 |
+
],
|
13 |
+
include_package_data=True,
|
14 |
+
python_requires=">=3.10",
|
15 |
+
)
|
start.sh
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Install dependencies
|
4 |
+
pip install -r requirements.txt
|
5 |
+
|
6 |
+
# Install package in development mode
|
7 |
+
pip install -e .
|
8 |
+
|
9 |
+
# Start the FastAPI server
|
10 |
+
uvicorn app.main:app --host 0.0.0.0 --port 7860 --reload
|
tests/unit/test_headline_analyzer.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
from src.mediaunmasked.analyzers.headline_analyzer import HeadlineAnalyzer
|
3 |
+
|
4 |
+
@pytest.fixture
|
5 |
+
def analyzer():
|
6 |
+
return HeadlineAnalyzer()
|
7 |
+
|
8 |
+
def test_matching_headline(analyzer):
|
9 |
+
headline = "New Study Shows Coffee Reduces Heart Disease Risk"
|
10 |
+
content = "Recent research suggests that coffee may have cardiovascular benefits."
|
11 |
+
|
12 |
+
result = analyzer.analyze(headline, content)
|
13 |
+
|
14 |
+
assert result["headline_vs_content_score"] > 30
|
15 |
+
assert result["contradiction_score"] < 0.3
|
16 |
+
|
17 |
+
def test_contradictory_headline(analyzer):
|
18 |
+
headline = "Coffee Increases Heart Disease Risk"
|
19 |
+
content = "Studies show coffee decreases cardiovascular disease risk."
|
20 |
+
|
21 |
+
result = analyzer.analyze(headline, content)
|
22 |
+
|
23 |
+
assert result["headline_vs_content_score"] < 30
|
24 |
+
assert result["contradiction_score"] > 0.3
|