wozwize commited on
Commit
876b12f
·
1 Parent(s): f83c2ca

initial commit of media-unmasked-api to huggingface

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Ignore Python compiled files
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python Slim Image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory inside container
5
+ WORKDIR /app
6
+
7
+ # Copy dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copy all files
12
+ COPY . .
13
+
14
+ # Set PYTHONPATH (simplified)
15
+ ENV PYTHONPATH=/app
16
+
17
+ # Expose FastAPI's default port
18
+ EXPOSE 7860
19
+
20
+ # Start FastAPI
21
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware # ✅ Import this
3
+ from app.routers import analyze, health
4
+
5
+ app = FastAPI(title="MediaUnmasked API")
6
+
7
+ # ✅ Enable CORS for Swagger UI
8
+ app.add_middleware(
9
+ CORSMiddleware,
10
+ allow_origins=["*"], # Allow all origins (or specify ["http://localhost:7860"])
11
+ allow_credentials=True,
12
+ allow_methods=["*"], # Allow all methods
13
+ allow_headers=["*"], # Allow all headers
14
+ )
15
+
16
+ # Include routers
17
+ app.include_router(analyze.router, prefix="/api")
18
+ app.include_router(health.router, prefix="/health")
19
+
20
+ @app.get("/")
21
+ async def root():
22
+ return {"message": "MediaUnmasked API is running!"}
app/routers/analyze.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from mediaunmasked.schemas.requests import AnalyzeRequest
3
+ from mediaunmasked.schemas.responses import AnalyzeResponse
4
+ from mediaunmasked.services.analyzer_service import AnalyzerService
5
+
6
+ router = APIRouter(tags=["analysis"])
7
+
8
+ @router.post("/analyze", response_model=AnalyzeResponse)
9
+ async def analyze_content(request: AnalyzeRequest):
10
+ try:
11
+ analyzer_service = AnalyzerService()
12
+ result = await analyzer_service.analyze_content(
13
+ headline=request.headline,
14
+ content=request.content
15
+ )
16
+ return result
17
+ except Exception as e:
18
+ raise HTTPException(status_code=500, detail=str(e))
app/routers/health.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ router = APIRouter()
4
+
5
+ @router.get("/")
6
+ async def health_check():
7
+ return {"status": "healthy"}
create_structure.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ mkdir -p api/routers
2
+ mkdir -p src/mediaunmasked/{services,models,schemas,config}
3
+ mkdir -p tests/{unit,integration}
directory_structure.txt ADDED
Binary file (4.85 kB). View file
 
mediaunmasked/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """
2
+ MediaUnmasked - AI-powered media watchdog for analyzing bias and fact-checking.
3
+ """
4
+
5
+ __version__ = "0.1.0"
mediaunmasked/analyzers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Empty file to make the directory a Python package
mediaunmasked/analyzers/bias_analyzer.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from typing import Dict, Any, List
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class BiasAnalyzer:
8
+ def __init__(self):
9
+ self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
10
+ self.left_keywords = self._load_keywords('left_bias_words.txt')
11
+ self.right_keywords = self._load_keywords('right_bias_words.txt')
12
+
13
+ def _load_keywords(self, filename: str) -> List[str]:
14
+ """Load keywords from file."""
15
+ try:
16
+ filepath = os.path.join(self.resources_dir, filename)
17
+ with open(filepath, 'r', encoding='utf-8') as f:
18
+ return [line.strip().lower() for line in f if line.strip() and not line.startswith('#')]
19
+ except Exception as e:
20
+ logger.error(f"Error loading {filename}: {str(e)}")
21
+ return []
22
+
23
+ def analyze(self, text: str) -> Dict[str, Any]:
24
+ """Detect bias using keyword analysis."""
25
+ try:
26
+ text_lower = text.lower()
27
+
28
+ # Count matches
29
+ left_count = sum(1 for word in self.left_keywords if word in text_lower)
30
+ right_count = sum(1 for word in self.right_keywords if word in text_lower)
31
+
32
+ total_words = left_count + right_count
33
+ if total_words == 0:
34
+ return {
35
+ "bias": "Neutral",
36
+ "bias_score": 0.0, # True neutral
37
+ "bias_percentage": 0 # Neutral percentage
38
+ }
39
+
40
+ # New bias score formula (-1.0 left, 0.0 neutral, 1.0 right)
41
+ bias_score = (right_count - left_count) / total_words
42
+
43
+ # Convert bias_score to percentage (-100% to +100%)
44
+ bias_percentage = bias_score * 100
45
+ logger.info(f"Bias score: {bias_score:.2f}, Bias percentage: {bias_percentage:.1f}%")
46
+
47
+ # Determine bias label
48
+ if bias_score < -0.8:
49
+ bias = "Strongly Left"
50
+ elif bias_score < -0.5:
51
+ bias = "Moderately Left"
52
+ elif bias_score < -0.2:
53
+ bias = "Leaning Left"
54
+ elif bias_score > 0.8:
55
+ bias = "Strongly Right"
56
+ elif bias_score > 0.5:
57
+ bias = "Moderately Right"
58
+ elif bias_score > 0.2:
59
+ bias = "Leaning Right"
60
+ else:
61
+ bias = "Neutral"
62
+
63
+ return {
64
+ "bias": bias,
65
+ "bias_score": round(bias_score, 2), # Keep 2 decimal places
66
+ "bias_percentage": abs(round(bias_percentage, 1))
67
+ }
68
+
69
+ except Exception as e:
70
+ logger.error(f"Error in bias analysis: {str(e)}")
71
+ return {
72
+ "bias": "Error",
73
+ "bias_score": 0.0,
74
+ "bias_percentage": 0
75
+ }
mediaunmasked/analyzers/evidence_analyzer.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, Any, List
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+ class EvidenceAnalyzer:
7
+ def __init__(self):
8
+ self.citation_markers = [
9
+ "according to",
10
+ "said",
11
+ "reported",
12
+ "stated",
13
+ "shows",
14
+ "found",
15
+ "study",
16
+ "research",
17
+ "data",
18
+ "evidence"
19
+ ]
20
+
21
+ self.vague_markers = [
22
+ "some say",
23
+ "many believe",
24
+ "people think",
25
+ "experts claim",
26
+ "sources say",
27
+ "it is believed",
28
+ "reportedly",
29
+ "allegedly"
30
+ ]
31
+
32
+ def analyze(self, text: str) -> Dict[str, Any]:
33
+ """Check for evidence-based reporting."""
34
+ try:
35
+ text_lower = text.lower()
36
+
37
+ citation_count = sum(1 for marker in self.citation_markers if marker in text_lower)
38
+ vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
39
+
40
+ base_score = min(citation_count * 20, 100)
41
+ penalty = vague_count * 10
42
+
43
+ evidence_score = max(0, base_score - penalty)
44
+
45
+ return {
46
+ "evidence_based_score": evidence_score
47
+ }
48
+
49
+ except Exception as e:
50
+ logger.error(f"Error in evidence analysis: {str(e)}")
51
+ return {
52
+ "evidence_based_score": 0
53
+ }
mediaunmasked/analyzers/headline_analyzer.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, Any, List
3
+ from transformers import pipeline
4
+ from transformers import AutoTokenizer
5
+ import numpy as np
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class HeadlineAnalyzer:
10
+ def __init__(self):
11
+ """Initialize the NLI model for contradiction detection."""
12
+ self.nli_pipeline = pipeline("text-classification", model="roberta-large-mnli")
13
+ self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
14
+ self.max_length = 512
15
+
16
+ def _split_content(self, headline: str, content: str) -> List[str]:
17
+ """Split content into sections that fit within token limit."""
18
+ content_words = content.split()
19
+ sections = []
20
+ current_section = []
21
+
22
+ # Account for headline and [SEP] token in the max length
23
+ headline_tokens = len(self.tokenizer.encode(headline))
24
+ sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2 # -2 because encode adds special tokens
25
+ max_content_tokens = self.max_length - headline_tokens - sep_tokens
26
+
27
+ # Process words into sections
28
+ for word in content_words:
29
+ current_section.append(word)
30
+
31
+ # Check if current section is approaching token limit
32
+ current_text = " ".join(current_section)
33
+ if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
34
+ # Remove last word (it might make us go over limit)
35
+ current_section.pop()
36
+ sections.append(" ".join(current_section))
37
+
38
+ # Start new section with 20% overlap for context
39
+ overlap_start = max(0, len(current_section) - int(len(current_section) * 0.2))
40
+ current_section = current_section[overlap_start:]
41
+ current_section.append(word)
42
+
43
+ # Add any remaining content as the last section
44
+ if current_section:
45
+ sections.append(" ".join(current_section))
46
+
47
+ logger.info(f"""Content Splitting:
48
+ - Original content length: {len(content_words)} words
49
+ - Split into {len(sections)} sections
50
+ - Headline uses {headline_tokens} tokens
51
+ - Available tokens per section: {max_content_tokens}
52
+ """)
53
+ return sections
54
+
55
+ def _analyze_section(self, headline: str, section: str) -> Dict[str, float]:
56
+ """Analyze a single section of content."""
57
+ input_text = f"{headline} [SEP] {section}"
58
+ result = self.nli_pipeline(input_text, top_k=None)
59
+
60
+ # Extract scores
61
+ scores = {item['label']: item['score'] for item in result}
62
+
63
+ logger.info("\nSection Analysis:")
64
+ logger.info("-"*30)
65
+ logger.info(f"Section preview: {section[:100]}...")
66
+ for label, score in scores.items():
67
+ logger.info(f"Label: {label:<12} Score: {score:.3f}")
68
+
69
+ return scores
70
+
71
+ def analyze(self, headline: str, content: str) -> Dict[str, Any]:
72
+ """Analyze how well the headline matches the content using an AI model."""
73
+ try:
74
+ logger.info("\n" + "="*50)
75
+ logger.info("HEADLINE ANALYSIS STARTED")
76
+ logger.info("="*50)
77
+
78
+ # Handle empty inputs
79
+ if not headline.strip() or not content.strip():
80
+ logger.warning("Empty headline or content provided")
81
+ return {
82
+ "headline_vs_content_score": 0,
83
+ "entailment_score": 0,
84
+ "contradiction_score": 0,
85
+ "contradictory_phrases": []
86
+ }
87
+
88
+ # Split content if too long
89
+ content_tokens = len(self.tokenizer.encode(content))
90
+ if content_tokens > self.max_length:
91
+ logger.warning(f"""
92
+ Content Length Warning:
93
+ - Total tokens: {content_tokens}
94
+ - Max allowed: {self.max_length}
95
+ - Splitting into sections...
96
+ """)
97
+ sections = self._split_content(headline, content)
98
+
99
+ # Analyze each section
100
+ section_scores = []
101
+ for i, section in enumerate(sections, 1):
102
+ logger.info(f"\nAnalyzing section {i}/{len(sections)}")
103
+ scores = self._analyze_section(headline, section)
104
+ section_scores.append(scores)
105
+
106
+ # Aggregate scores across sections
107
+ # Use max contradiction (if any section strongly contradicts, that's important)
108
+ # Use mean entailment (overall support across sections)
109
+ # Use mean neutral (general neutral tone across sections)
110
+ entailment_score = np.mean([s.get('ENTAILMENT', 0) for s in section_scores])
111
+ contradiction_score = np.max([s.get('CONTRADICTION', 0) for s in section_scores])
112
+ neutral_score = np.mean([s.get('NEUTRAL', 0) for s in section_scores])
113
+
114
+ logger.info("\nAggregated Scores Across Sections:")
115
+ logger.info("-"*30)
116
+ logger.info(f"Mean Entailment: {entailment_score:.3f}")
117
+ logger.info(f"Max Contradiction: {contradiction_score:.3f}")
118
+ logger.info(f"Mean Neutral: {neutral_score:.3f}")
119
+ else:
120
+ # Single section analysis
121
+ scores = self._analyze_section(headline, content)
122
+ entailment_score = scores.get('ENTAILMENT', 0)
123
+ contradiction_score = scores.get('CONTRADICTION', 0)
124
+ neutral_score = scores.get('NEUTRAL', 0)
125
+
126
+ # Compute final consistency score
127
+ final_score = (
128
+ (entailment_score * 0.6) + # Base score from entailment
129
+ (neutral_score * 0.3) + # Neutral is acceptable
130
+ ((1 - contradiction_score) * 0.1) # Small penalty for contradiction
131
+ ) * 100
132
+
133
+ # Log final results
134
+ logger.info("\nFinal Analysis Results:")
135
+ logger.info("-"*30)
136
+ logger.info(f"Headline: {headline}")
137
+ logger.info(f"Content Length: {content_tokens} tokens")
138
+ logger.info("\nFinal Scores:")
139
+ logger.info(f"{'Entailment:':<15} {entailment_score:.3f}")
140
+ logger.info(f"{'Neutral:':<15} {neutral_score:.3f}")
141
+ logger.info(f"{'Contradiction:':<15} {contradiction_score:.3f}")
142
+ logger.info(f"\nFinal Score: {final_score:.1f}%")
143
+ logger.info("="*50 + "\n")
144
+
145
+ return {
146
+ "headline_vs_content_score": round(final_score, 1),
147
+ "entailment_score": round(entailment_score, 2),
148
+ "contradiction_score": round(contradiction_score, 2),
149
+ "contradictory_phrases": []
150
+ }
151
+
152
+ except Exception as e:
153
+ logger.error("\nHEADLINE ANALYSIS ERROR")
154
+ logger.error("-"*30)
155
+ logger.error(f"Error Type: {type(e).__name__}")
156
+ logger.error(f"Error Message: {str(e)}")
157
+ logger.error("Stack Trace:", exc_info=True)
158
+ logger.error("="*50 + "\n")
159
+ return {
160
+ "headline_vs_content_score": 0,
161
+ "entailment_score": 0,
162
+ "contradiction_score": 0,
163
+ "contradictory_phrases": []
164
+ }
mediaunmasked/analyzers/scoring.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+ import logging
3
+
4
+ from .headline_analyzer import HeadlineAnalyzer
5
+ from .sentiment_analyzer import SentimentAnalyzer
6
+ from .bias_analyzer import BiasAnalyzer
7
+ from .evidence_analyzer import EvidenceAnalyzer
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class MediaScorer:
12
+ def __init__(self):
13
+ """Initialize the MediaScorer with required analyzers."""
14
+ self.headline_analyzer = HeadlineAnalyzer()
15
+ self.sentiment_analyzer = SentimentAnalyzer()
16
+ self.bias_analyzer = BiasAnalyzer()
17
+ self.evidence_analyzer = EvidenceAnalyzer()
18
+
19
+ def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
20
+ """Calculate final media credibility score."""
21
+ try:
22
+ headline_analysis = self.headline_analyzer.analyze(headline, content)
23
+ sentiment_analysis = self.sentiment_analyzer.analyze(content)
24
+ bias_analysis = self.bias_analyzer.analyze(content)
25
+ evidence_analysis = self.evidence_analyzer.analyze(content)
26
+
27
+ # Log intermediate results
28
+ logger.info("\n=== Raw Analysis Results ===")
29
+ logger.info(f"Headline Analysis: {headline_analysis}")
30
+ logger.info(f"Sentiment Analysis: {sentiment_analysis}")
31
+ logger.info(f"""Bias Analysis:
32
+ Raw: {bias_analysis}
33
+ Label: {bias_analysis['bias']}
34
+ Score: {bias_analysis['bias_score']}
35
+ Percentage: {bias_analysis['bias_percentage']}%
36
+ """)
37
+ logger.info(f"Evidence Analysis: {evidence_analysis}")
38
+
39
+ # Calculate component scores
40
+ # For headline: 20% contradiction = 20% score (don't invert)
41
+ headline_score = headline_analysis["headline_vs_content_score"] / 100
42
+
43
+ # For manipulation: 0% = good (use directly), 100% = bad
44
+ manipulation_score = (100 - sentiment_analysis["manipulation_score"]) / 100
45
+
46
+ # For bias: 0% = good (use directly), 100% = bad
47
+ bias_score = (100 - bias_analysis["bias_percentage"]) / 100
48
+
49
+ evidence_score = evidence_analysis["evidence_based_score"] / 100 # Higher is better
50
+
51
+ logger.info(f"""Component Scores:
52
+ Headline: {headline_score * 100:.1f}% (from {headline_analysis["headline_vs_content_score"]}%)
53
+ Evidence: {evidence_score * 100:.1f}%
54
+ Manipulation: {manipulation_score * 100:.1f}% (100 - {sentiment_analysis["manipulation_score"]}%)
55
+ Bias: {bias_score * 100:.1f}% (100 - {bias_analysis["bias_percentage"]}%)
56
+ """)
57
+
58
+ # Calculate final score
59
+ final_score = (
60
+ (headline_score * 0.25) +
61
+ (manipulation_score * 0.25) +
62
+ (bias_score * 0.25) +
63
+ (evidence_score * 0.25)
64
+ ) * 100
65
+
66
+ # Determine rating
67
+ if final_score >= 80:
68
+ rating = "Trustworthy"
69
+ elif final_score >= 50:
70
+ rating = "Bias Present"
71
+ else:
72
+ rating = "Misleading"
73
+
74
+ result = {
75
+ "media_unmasked_score": round(final_score, 1),
76
+ "rating": rating,
77
+ "details": {
78
+ "headline_analysis": headline_analysis,
79
+ "sentiment_analysis": sentiment_analysis,
80
+ "bias_analysis": bias_analysis,
81
+ "evidence_analysis": evidence_analysis
82
+ }
83
+ }
84
+
85
+ logger.info("\n=== Final Score Result ===")
86
+ logger.info(f"Result: {result}")
87
+
88
+ return result
89
+
90
+ except Exception as e:
91
+ logger.error(f"Error calculating media score: {str(e)}")
92
+ return {
93
+ "media_unmasked_score": 0,
94
+ "rating": "Error",
95
+ "details": {
96
+ "headline_analysis": {"headline_vs_content_score": 0, "contradictory_phrases": []},
97
+ "sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},
98
+ "bias_analysis": {"bias": "Error", "bias_score": 0.0, "bias_percentage": 0},
99
+ "evidence_analysis": {"evidence_based_score": 0}
100
+ }
101
+ }
mediaunmasked/analyzers/sentiment_analyzer.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, Any, List
3
+ from textblob import TextBlob
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class SentimentAnalyzer:
8
+ def __init__(self):
9
+ self.manipulative_patterns = [
10
+ "experts say",
11
+ "sources claim",
12
+ "many believe",
13
+ "some say",
14
+ "everyone knows",
15
+ "clearly",
16
+ "obviously",
17
+ "without doubt",
18
+ "certainly"
19
+ ]
20
+
21
+ def analyze(self, text: str) -> Dict[str, Any]:
22
+ """Analyze sentiment using TextBlob."""
23
+ try:
24
+ blob = TextBlob(text)
25
+ sentiment_score = blob.sentiment.polarity
26
+
27
+ manipulative_phrases = self._detect_manipulative_phrases(text)
28
+ manipulation_score = len(manipulative_phrases) * 10
29
+
30
+ if sentiment_score > 0.2:
31
+ sentiment = "Positive"
32
+ elif sentiment_score < -0.2:
33
+ sentiment = "Negative"
34
+ else:
35
+ sentiment = "Neutral"
36
+
37
+ if manipulation_score > 50:
38
+ sentiment = "Manipulative"
39
+
40
+ return {
41
+ "sentiment": sentiment,
42
+ "manipulation_score": min(manipulation_score, 100),
43
+ "flagged_phrases": manipulative_phrases
44
+ }
45
+
46
+ except Exception as e:
47
+ logger.error(f"Error in sentiment analysis: {str(e)}")
48
+ return {
49
+ "sentiment": "Error",
50
+ "manipulation_score": 0,
51
+ "flagged_phrases": []
52
+ }
53
+
54
+ def _detect_manipulative_phrases(self, text: str) -> List[str]:
55
+ """Detect potentially manipulative phrases."""
56
+ found_phrases = []
57
+ text_lower = text.lower()
58
+
59
+ for pattern in self.manipulative_patterns:
60
+ if pattern in text_lower:
61
+ start = text_lower.find(pattern)
62
+ context = text[max(0, start-20):min(len(text), start+len(pattern)+20)]
63
+ found_phrases.append(context.strip())
64
+
65
+ return found_phrases
mediaunmasked/resources/left_bias_words.txt ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📂 resources/left_bias_words.txt
2
+ # -------------------------------------------------
3
+ # 🔹 Political Ideology & Economic Policy
4
+ progressive
5
+ conservative
6
+ socialist
7
+ democratic socialism
8
+ democratic socialist
9
+ far-right
10
+ equity
11
+ justice for all
12
+ wealth redistribution
13
+ universal basic income
14
+ living wage
15
+ income inequality
16
+ wealth inequality
17
+ fair trade
18
+ social safety net
19
+ corporate greed
20
+ workers' rights
21
+ unionize
22
+ collective bargaining
23
+ minimum wage increase
24
+ universal childcare
25
+ tax the rich
26
+ economic justice
27
+ capitalism is broken
28
+ billionaires shouldn't exist
29
+
30
+ # 🔹 Climate & Environmental Policy
31
+ climate crisis
32
+ climate emergency
33
+ sustainability
34
+ green energy
35
+ carbon footprint
36
+ fossil fuel divestment
37
+ environmental justice
38
+ net zero
39
+ renewable energy
40
+ solar energy
41
+ climate action
42
+ big oil
43
+ carbon tax
44
+ Green New Deal
45
+ climate deniers
46
+ eco-friendly policies
47
+ clean energy revolution
48
+ plastic ban
49
+ end fracking
50
+ divest from coal
51
+ extreme weather is worsening
52
+ global warming is real
53
+ environmental responsibility
54
+ wildlife protection
55
+ eco-activism
56
+
57
+ # 🔹 Social Justice & Identity Politics
58
+ social justice
59
+ racial justice
60
+ systemic racism
61
+ white privilege
62
+ microaggressions
63
+ BIPOC
64
+ LGBTQ+ rights
65
+ gender pay gap
66
+ affirmative action
67
+ decolonization
68
+ indigenous sovereignty
69
+ equity vs equality
70
+ patriarchy
71
+ gender-inclusive
72
+ intersectionality
73
+ trans rights
74
+ feminism
75
+ gender-affirming care
76
+ abolish ICE
77
+ police brutality
78
+ defund the police
79
+ prison abolition
80
+ restorative justice
81
+ white supremacy
82
+ hate speech laws
83
+ critical race theory
84
+ diversity, equity, inclusion
85
+ reproductive justice
86
+ women's bodily autonomy
87
+ reparations
88
+
89
+ # 🔹 Healthcare & Public Welfare
90
+ Medicare for All
91
+ universal healthcare
92
+ public option
93
+ free healthcare
94
+ single-payer system
95
+ affordable healthcare
96
+ healthcare is a human right
97
+ insulin price cap
98
+ Big Pharma
99
+ mental health parity
100
+ food insecurity
101
+ public housing
102
+ student loan forgiveness
103
+ affordable education
104
+ debt relief
105
+ expand social security
106
+ disability rights
107
+ homeless crisis
108
+ opioid epidemic response
109
+ guaranteed paid leave
110
+ maternal mortality crisis
111
+ expand Medicaid
112
+ community healthcare clinics
113
+ healthcare access for all
114
+ pre-existing conditions coverage
115
+
116
+ # 🔹 Gun Control & Public Safety
117
+ gun violence prevention
118
+ common-sense gun laws
119
+ background checks
120
+ gun reform
121
+ assault weapons ban
122
+ mass shootings epidemic
123
+ red flag laws
124
+ gun buyback programs
125
+ ban high-capacity magazines
126
+ NRA influence
127
+ public safety over profit
128
+ gun safety legislation
129
+ school shootings crisis
130
+ responsible gun ownership
131
+ fewer guns, safer communities
132
+ demilitarize the police
133
+ ban ghost guns
134
+ universal gun laws
135
+ ban open carry
136
+ reduce firearm access
137
+ mandatory firearm registration
138
+
139
+ # 🔹 Immigration & Border Policy
140
+ path to citizenship
141
+ DACA
142
+ dreamers
143
+ migrant rights
144
+ asylum seekers
145
+ refugee protection
146
+ abolish ICE
147
+ border security is racist
148
+ family separation
149
+ sanctuary cities
150
+ humanitarian crisis at the border
151
+ comprehensive immigration reform
152
+ no human is illegal
153
+ end child detention
154
+ protect immigrants
155
+ immigrants strengthen the economy
156
+ undocumented workers deserve rights
157
+ border wall waste
158
+ decriminalize border crossings
159
+ reunite families
160
+ amnesty for undocumented
161
+
162
+ # 🔹 Media & Information Bias
163
+ misinformation crisis
164
+ fact-based reporting
165
+ right-wing disinformation
166
+ alternative facts
167
+ Fox News propaganda
168
+ media literacy
169
+ fight misinformation
170
+ Big Tech accountability
171
+ social media regulation
172
+ disinformation campaigns
173
+ protect press freedom
174
+ independent journalism
175
+ mainstream media bias
176
+ fact-checking matters
177
+ ban fake news
178
+ Russian interference
179
+ algorithmic bias
180
+ political misinformation
181
+ fair and accurate reporting
182
+ truth matters
183
+ anti-science rhetoric
184
+ climate denial media
185
+ ban extremist media
186
+ right-wing conspiracy theories
187
+ protecting democracy
mediaunmasked/resources/manipulative_patterns.txt ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📂 resources/manipulative_patterns.txt
2
+ # -------------------------------------------------
3
+ # 🔹 Vague Attribution (Unverifiable Sources)
4
+ experts fear
5
+ some say
6
+ many believe
7
+ it's clear that
8
+ obviously
9
+ everyone knows
10
+ sources say
11
+ people are saying
12
+ research suggests
13
+ critics argue
14
+ analysts warn
15
+ reportedly
16
+ insiders claim
17
+ industry experts agree
18
+ whispers in the industry
19
+ a growing number of people think
20
+ sources close to the matter indicate
21
+ reports suggest
22
+ insiders reveal
23
+ unnamed sources confirm
24
+ widely believed
25
+ it has been said
26
+ word on the street is
27
+
28
+ # 🔹 Exaggeration & Absolutist Language
29
+ \b(all|none|every|always|never)\b
30
+ without question
31
+ undeniably
32
+ beyond a doubt
33
+ without a shadow of a doubt
34
+ irrefutable proof
35
+ inarguable
36
+ scientifically proven
37
+ guaranteed
38
+ no one can deny
39
+ absolutely certain
40
+ inevitable collapse
41
+ completely unprecedented
42
+ no alternative but
43
+ totally discredited
44
+ this changes everything
45
+ nothing can stop
46
+ without fail
47
+ history shows that
48
+
49
+ # 🔹 Emotional Manipulation & Loaded Language
50
+ the shocking truth
51
+ horrifying evidence
52
+ dangerously misguided
53
+ deeply disturbing
54
+ alarming new trend
55
+ terrifying reality
56
+ outrageous attack
57
+ crippling consequences
58
+ heartbreaking truth
59
+ a devastating blow
60
+ frightening new report
61
+ explosive details
62
+ disturbing allegations
63
+ corrupt elites
64
+ facing total destruction
65
+ hidden agenda
66
+ deliberate deception
67
+ reckless policies
68
+ radical takeover
69
+ secret plot exposed
70
+ exposed corruption
71
+ will destroy everything
72
+ brainwashing the masses
73
+ a brutal betrayal
74
+ shocking revelations
75
+ an unthinkable scenario
76
+ must be stopped at all costs
77
+ selling out the people
78
+
79
+ # 🔹 False Balance & False Equivalencies
80
+ both sides are equally to blame
81
+ to be fair, some argue
82
+ some would say it's just as bad as
83
+ on one hand, but on the other hand
84
+ many claim there’s no difference
85
+ equally problematic on both sides
86
+ critics claim, but supporters argue
87
+ it’s just like (unrelated issue)
88
+ just as bad as
89
+ making the same mistakes
90
+ exactly like
91
+ history repeating itself
92
+
93
+ # 🔹 Implying Authority Without Evidence
94
+ leading experts agree
95
+ a well-known figure once said
96
+ the science is settled
97
+ unquestionable truth
98
+ indisputable fact
99
+ respected authorities confirm
100
+ established research shows
101
+ a Nobel Prize-winning scientist believes
102
+ the most intelligent minds agree
103
+ top thinkers of our time argue
104
+ those who disagree are uninformed
105
+ no real expert would dispute this
106
+ a professor from a top university claims
107
+ all credible scientists believe
108
+ no serious researcher disagrees
109
+
110
+ # 🔹 Implying Popular Consensus Without Data
111
+ the majority of people think
112
+ society agrees that
113
+ most intelligent people understand
114
+ an overwhelming number of people
115
+ the vast majority
116
+ widely considered to be true
117
+ popular opinion suggests
118
+ everyone is talking about
119
+ most believe
120
+
121
+ # 🔹 Framing Opponents in a Negative Light
122
+ only extremists believe otherwise
123
+ people who disagree are in denial
124
+ anyone who questions this is ignorant
125
+ blindly following the agenda
126
+ out of touch with reality
127
+ desperate attempt to save face
128
+ trying to cover up the truth
129
+ a last-ditch effort to deceive
130
+ refusing to accept facts
131
+ spreading misinformation
132
+ manipulated by special interests
133
+ driven by greed and corruption
134
+ being paid to say otherwise
135
+ deliberately misleading
136
+ hiding the truth from the public
137
+ working against the people
138
+ exploiting the system
139
+ part of the problem, not the solution
140
+ dangerous and reckless
141
+ acting in bad faith
142
+
143
+ # 🔹 Implying Urgency & Fear-Mongering
144
+ we are running out of time
145
+ before it’s too late
146
+ act now before disaster strikes
147
+ imminent collapse
148
+ crisis is unfolding
149
+ ticking time bomb
150
+ on the brink of disaster
151
+ looming catastrophe
152
+ a dire warning
153
+ facing an existential threat
154
+ the fate of our nation
155
+ before it's too late
156
+ can’t afford to wait
157
+ if this continues, we’re doomed
158
+ the last chance to save
159
+ history will not be kind
160
+ future generations will suffer
161
+ too dangerous to ignore
162
+
163
+ # 🔹 Appealing to Nostalgia & Past Glory
164
+ things were better before
165
+ back in the good old days
166
+ when America was great
167
+ returning to our roots
168
+ the way it was meant to be
169
+ before things got out of hand
170
+ we’ve lost our way
171
+ we need to go back to simpler times
172
+ what our forefathers intended
173
+ traditional values are under attack
174
+ restoring the glory days
175
+ reclaiming what was lost
176
+ remember when things made sense?
177
+ back when people had morals
178
+ the downfall of our civilization
mediaunmasked/resources/right_bias_words.txt ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📂 resources/right_bias_words.txt
2
+ # -------------------------------------------------
3
+ # 🔹 Political Ideology & Economic Policy
4
+ right-wing
5
+ liberal
6
+ traditional values
7
+ free market
8
+ capitalism
9
+ small government
10
+ big government overreach
11
+ limited government
12
+ fiscal responsibility
13
+ trickle-down economics
14
+ deregulation
15
+ job creators
16
+ personal responsibility
17
+ welfare dependency
18
+ individual liberty
19
+ government waste
20
+ tax cuts
21
+ pro-business policies
22
+ pro-growth policies
23
+ big government socialism
24
+ crony capitalism
25
+ hard work pays off
26
+ socialist policies fail
27
+ self-reliance
28
+ national sovereignty
29
+
30
+ # 🔹 Climate & Environmental Policy
31
+ climate alarmism
32
+ climate hoax
33
+ green energy scam
34
+ drill baby drill
35
+ energy independence
36
+ clean coal
37
+ pro-fracking
38
+ stop the war on oil
39
+ anti-carbon tax
40
+ regulatory overreach
41
+ climate hysteria
42
+ fossil fuel industry
43
+ radical environmentalists
44
+ alternative energy myths
45
+ global warming exaggeration
46
+ renewable energy failure
47
+ climate change agenda
48
+ big government green policies
49
+ end subsidies for green energy
50
+ eco-terrorism
51
+ emissions regulations kill jobs
52
+ climate change skepticism
53
+ scientific consensus is flawed
54
+
55
+ # 🔹 Social Issues & Culture Wars
56
+ woke agenda
57
+ cancel culture
58
+ critical race theory
59
+ identity politics
60
+ anti-woke
61
+ war on Christmas
62
+ traditional marriage
63
+ family values
64
+ religious freedom
65
+ biblical principles
66
+ faith-based values
67
+ cultural marxism
68
+ gender ideology
69
+ biological reality
70
+ trans agenda
71
+ protect women’s sports
72
+ Christian persecution
73
+ church over state
74
+ anti-religious bigotry
75
+ parental rights
76
+ indoctrination in schools
77
+ reverse racism
78
+ meritocracy matters
79
+ law and order
80
+ war on masculinity
81
+ anti-gun propaganda
82
+
83
+ # 🔹 Immigration & Border Policy
84
+ illegal aliens
85
+ border crisis
86
+ invasion at the border
87
+ build the wall
88
+ secure our borders
89
+ deportation
90
+ amnesty is a scam
91
+ open borders policy
92
+ sanctuary cities are unsafe
93
+ migrant caravans
94
+ protect American workers
95
+ chain migration
96
+ anchor babies
97
+ English as the official language
98
+ vetting immigrants
99
+ catch and deport
100
+ mass migration problem
101
+ border security first
102
+ illegals taking American jobs
103
+ criminal aliens
104
+ no asylum abuse
105
+ end birthright citizenship
106
+ border patrol under attack
107
+
108
+ # 🔹 Healthcare & Public Welfare
109
+ government takeover of healthcare
110
+ socialized medicine fails
111
+ free healthcare is a myth
112
+ Medicare for all is unsustainable
113
+ private insurance rights
114
+ Obamacare disaster
115
+ healthcare freedom
116
+ personal responsibility in healthcare
117
+ rationed care
118
+ universal healthcare means higher taxes
119
+ health savings accounts
120
+ big pharma collusion
121
+ big government healthcare
122
+ death panels
123
+ nanny state policies
124
+ taxpayer-funded abortion
125
+ personalized medicine
126
+ free market healthcare solutions
127
+ government interference in medicine
128
+ healthcare choice
129
+ welfare abuse
130
+ entitlement reform
131
+ personalized care models
132
+ stop welfare expansion
133
+
134
+ # 🔹 Gun Rights & Public Safety
135
+ gun control doesn’t work
136
+ Second Amendment rights
137
+ constitutional carry
138
+ good guy with a gun
139
+ gun grabbers
140
+ assault weapons myth
141
+ defend the Second Amendment
142
+ law-abiding gun owners
143
+ red flag laws violate rights
144
+ shall not be infringed
145
+ gun-free zones don't work
146
+ arming teachers
147
+ NRA-backed legislation
148
+ stand your ground
149
+ self-defense rights
150
+ gun rights under attack
151
+ criminals ignore gun laws
152
+ leftists want total disarmament
153
+ defund the police is dangerous
154
+ crime wave
155
+ law and order policies
156
+ Democrats are soft on crime
157
+
158
+ # 🔹 Media & Big Tech Censorship
159
+ mainstream media lies
160
+ fake news media
161
+ media bias
162
+ left-wing media monopoly
163
+ corporate media corruption
164
+ conservative voices silenced
165
+ shadow banning
166
+ Big Tech censorship
167
+ social media suppression
168
+ fact-checkers are biased
169
+ alternative media
170
+ legacy media collapse
171
+ biased journalism
172
+ freedom of speech under attack
173
+ media elite
174
+ disinformation police
175
+ Silicon Valley leftist agenda
176
+ misinformation double standard
177
+ conservative accounts banned
178
+ election interference
179
+ algorithm manipulation
180
+ digital free speech
181
+
182
+ # 🔹 Election Integrity & Government Accountability
183
+ election fraud
184
+ stolen election
185
+ mail-in ballot fraud
186
+ ballot harvesting
187
+ illegal voting
188
+ rigged elections
189
+ secure the vote
190
+ voter ID laws
191
+ dead people voting
192
+ stop election interference
193
+ clean voter rolls
194
+ fair elections
195
+ stop the steal
196
+ Democrat-run cities are corrupt
197
+ big government tyranny
198
+ congressional overreach
199
+ political witch hunt
200
+ government accountability
201
+ unelected bureaucrats
202
+ administrative state abuse
203
+ taxpayer money wasted
204
+ drain the swamp
205
+ deep state
206
+ two-tiered justice system
207
+ weaponization of government
208
+
209
+ # 🔹 Foreign Policy & National Defense
210
+ America First
211
+ patriotic nationalism
212
+ globalism is a threat
213
+ weak foreign policy
214
+ military strength
215
+ peace through strength
216
+ support our troops
217
+ anti-interventionism
218
+ funding our enemies
219
+ China threat
220
+ Russia hoax
221
+ leftist appeasement
222
+ national security first
223
+ endless wars are a mistake
224
+ pro-Israel stance
225
+ secure our allies
226
+ foreign aid waste
227
+ Biden’s weak leadership
228
+ defund the UN
229
+ globalist elites
230
+ pro-American trade policies
231
+ stop outsourcing jobs
232
+ stop military woke policies
233
+ secure American sovereignty
mediaunmasked/schemas/requests.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class AnalyzeRequest(BaseModel):
4
+ headline: str
5
+ content: str
mediaunmasked/schemas/responses.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class AnalyzeResponse(BaseModel):
4
+ headline_vs_content_score: float
5
+ entailment_score: float
6
+ contradiction_score: float
mediaunmasked/scrapers/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .article_scraper import ArticleScraper
2
+
3
+ __all__ = ['ArticleScraper']
mediaunmasked/scrapers/article_scraper.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Optional
2
+ import logging
3
+ from urllib.parse import urlparse
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+
7
+ from ..utils.logging_config import setup_logging
8
+
9
+ class ArticleScraper:
10
+ def __init__(self):
11
+ self.session = requests.Session()
12
+ self.session.headers = {
13
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
14
+ }
15
+ setup_logging()
16
+ self.logger = logging.getLogger(__name__)
17
+
18
+ def _get_domain(self, url: str) -> str:
19
+ """Extract domain from URL."""
20
+ return urlparse(url).netloc
21
+
22
+ def _fetch_page(self, url: str) -> Optional[str]:
23
+ """Fetch page content with error handling."""
24
+ try:
25
+ response = self.session.get(url)
26
+ response.raise_for_status()
27
+ return response.text
28
+
29
+ except Exception as e:
30
+ self.logger.error(f"Error fetching {url}: {str(e)}")
31
+ return None
32
+
33
+ def _extract_snopes(self, soup: BeautifulSoup) -> Dict[str, str]:
34
+ """Extract content from Snopes articles."""
35
+ # Get headline from any h1 tag since it doesn't have a specific class
36
+ headline_elem = soup.find('h1')
37
+ headline = headline_elem.get_text().strip() if headline_elem else ''
38
+ self.logger.info(f"Found headline: {headline}")
39
+
40
+ # Try to find the article content
41
+ article = soup.find('article')
42
+ if article:
43
+ self.logger.info("Found article tag")
44
+ # Remove unwanted elements
45
+ for unwanted in article.find_all(['script', 'style', 'iframe', 'aside']):
46
+ unwanted.decompose()
47
+
48
+ # Get all paragraphs from the article
49
+ paragraphs = article.find_all('p')
50
+ if paragraphs:
51
+ content = ' '.join(p.get_text().strip() for p in paragraphs)
52
+ else:
53
+ content = article.get_text().strip()
54
+ else:
55
+ self.logger.warning("No article tag found")
56
+ content = ''
57
+
58
+ return {"headline": headline, "content": content}
59
+
60
+ def _extract_politifact(self, soup: BeautifulSoup) -> Dict[str, str]:
61
+ """Extract content from PolitiFact articles."""
62
+ try:
63
+ headline = soup.find('h1', class_='article__title')
64
+ if headline:
65
+ headline = headline.get_text().strip()
66
+ else:
67
+ headline = soup.find('h1')
68
+ headline = headline.get_text().strip() if headline else "No headline found"
69
+
70
+ self.logger.info(f"Found headline: {headline}")
71
+
72
+ content_div = soup.find('article', class_='article')
73
+ if content_div:
74
+ # Remove unwanted elements
75
+ for unwanted in content_div.find_all(['script', 'style', 'iframe', 'aside']):
76
+ unwanted.decompose()
77
+ content = ' '.join(p.get_text().strip() for p in content_div.find_all('p'))
78
+ else:
79
+ # Try alternative content selectors
80
+ content_selectors = ['.article__text', '.m-textblock']
81
+ content = ''
82
+ for selector in content_selectors:
83
+ content_elem = soup.select_one(selector)
84
+ if content_elem:
85
+ content = ' '.join(p.get_text().strip() for p in content_elem.find_all('p'))
86
+ break
87
+
88
+ if not content:
89
+ self.logger.warning("No content found in article")
90
+ content = "No content found"
91
+
92
+ return {"headline": headline, "content": content}
93
+
94
+ except Exception as e:
95
+ self.logger.error(f"Error extracting PolitiFact content: {str(e)}")
96
+ return {"headline": "Error", "content": f"Failed to extract content: {str(e)}"}
97
+
98
+ def scrape_article(self, url: str) -> Optional[Dict[str, str]]:
99
+ """
100
+ Main function to scrape fact-checking articles.
101
+ Returns a dictionary with headline and content.
102
+ """
103
+ html_content = self._fetch_page(url)
104
+ if not html_content:
105
+ self.logger.error("Failed to fetch page content")
106
+ return None
107
+
108
+ soup = BeautifulSoup(html_content, 'html.parser')
109
+ domain = self._get_domain(url)
110
+
111
+ self.logger.info(f"Scraping article from domain: {domain}")
112
+
113
+ # Select appropriate extractor based on domain
114
+ if 'snopes.com' in domain:
115
+ result = self._extract_snopes(soup)
116
+ if not result['headline'] or not result['content']:
117
+ self.logger.warning("Failed to extract content from Snopes article")
118
+ self.logger.debug(f"HTML content: {html_content[:500]}...")
119
+ return result
120
+ elif 'politifact.com' in domain:
121
+ return self._extract_politifact(soup)
122
+ else:
123
+ # Generic extraction fallback
124
+ headline = soup.find('h1').get_text().strip() if soup.find('h1') else ''
125
+
126
+ # Try common content selectors
127
+ content_selectors = ['article', 'main', '.content', '.article-content']
128
+ content = ''
129
+
130
+ for selector in content_selectors:
131
+ content_div = soup.select_one(selector)
132
+ if content_div:
133
+ # Remove unwanted elements
134
+ for unwanted in content_div.find_all(['script', 'style', 'iframe', 'aside']):
135
+ unwanted.decompose()
136
+ content = ' '.join(p.get_text().strip() for p in content_div.find_all('p'))
137
+ break
138
+
139
+ return {"headline": headline, "content": content}
mediaunmasked/services/analyzer_service.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from mediaunmasked.analyzers.headline_analyzer import HeadlineAnalyzer
2
+
3
+ class AnalyzerService:
4
+ def __init__(self):
5
+ self.headline_analyzer = HeadlineAnalyzer()
6
+
7
+ async def analyze_content(self, headline: str, content: str):
8
+ result = self.headline_analyzer.analyze(headline, content)
9
+ return result
mediaunmasked/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Empty file is fine
mediaunmasked/utils/logging_config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Optional
3
+
4
+ def setup_logging(level: int = logging.INFO) -> None:
5
+ """Configure logging for the application."""
6
+ logging.basicConfig(
7
+ level=level,
8
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
9
+ datefmt='%Y-%m-%d %H:%M:%S'
10
+ )
mediaunmasked/web/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Empty file is fine
mediaunmasked/web/app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from ..analyzers.bias_analyzer import BiasAnalyzer
3
+ from ..scrapers.article_scraper import ArticleScraper
4
+ from ..utils.logging_config import setup_logging
5
+ import plotly.graph_objects as go
6
+
7
+ def create_sentiment_gauge(score: float) -> go.Figure:
8
+ """Create a gauge chart for sentiment visualization."""
9
+ fig = go.Figure(go.Indicator(
10
+ mode = "gauge+number",
11
+ value = score * 100,
12
+ title = {'text': "Sentiment Score"},
13
+ gauge = {
14
+ 'axis': {'range': [0, 100]},
15
+ 'bar': {'color': "darkblue"},
16
+ 'steps': [
17
+ {'range': [0, 33], 'color': "lightgray"},
18
+ {'range': [33, 66], 'color': "gray"},
19
+ {'range': [66, 100], 'color': "darkgray"}
20
+ ],
21
+ }
22
+ ))
23
+ return fig
24
+
25
+ def main():
26
+ # Set up logging
27
+ setup_logging()
28
+
29
+ # Initialize components
30
+ scraper = ArticleScraper()
31
+ analyzer = BiasAnalyzer()
32
+
33
+ # Set up the Streamlit interface
34
+ st.title("Media Bias Analyzer")
35
+ st.write("Analyze bias and sentiment in news articles")
36
+
37
+ # URL input
38
+ url = st.text_input("Enter article URL:", "https://www.snopes.com/articles/469232/musk-son-told-trump-shut-up/")
39
+
40
+ if st.button("Analyze"):
41
+ with st.spinner("Analyzing article..."):
42
+ # Scrape the article
43
+ article = scraper.scrape_article(url)
44
+
45
+ if article:
46
+ # Show article details
47
+ st.subheader("Article Details")
48
+ st.write(f"**Headline:** {article['headline']}")
49
+
50
+ with st.expander("Show Article Content"):
51
+ st.write(article['content'])
52
+
53
+ # Analyze content
54
+ result = analyzer.analyze(article['content'])
55
+
56
+ # Display results in columns
57
+ col1, col2 = st.columns(2)
58
+
59
+ with col1:
60
+ st.subheader("Sentiment Analysis")
61
+ st.write(f"**Overall Sentiment:** {result.sentiment}")
62
+ fig = create_sentiment_gauge(result.bias_score / 100)
63
+ st.plotly_chart(fig)
64
+
65
+ with col2:
66
+ st.subheader("Bias Analysis")
67
+ st.write(f"**Detected Bias:** {result.bias}")
68
+ st.write(f"**Confidence Score:** {result.bias_score:.1f}%")
69
+
70
+ # Show flagged phrases
71
+ if result.flagged_phrases:
72
+ st.subheader("Potentially Biased Phrases")
73
+ for phrase in result.flagged_phrases:
74
+ st.warning(phrase)
75
+ else:
76
+ st.info("No potentially biased phrases detected")
77
+
78
+ else:
79
+ st.error("Failed to fetch article. Please check the URL and try again.")
80
+
81
+ if __name__ == "__main__":
82
+ main()
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi[all]==0.109.2
2
+ uvicorn==0.27.1
3
+ pydantic==2.6.1
4
+ beautifulsoup4==4.12.3
5
+ requests==2.31.0
6
+ python-dotenv==1.0.1
7
+ textblob==0.17.1
8
+ nltk==3.8.1
9
+ transformers==4.36.2
10
+ torch==2.1.2
11
+ numpy==1.26.3
12
+ pytest==7.4.3
13
+ pytest-asyncio==0.21.1
14
+ httpx==0.25.2
setup.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="mediaunmasked",
5
+ version="0.1.0",
6
+ packages=find_packages(exclude=["tests*"]) + ["app"], # Include app/ and mediaunmasked/
7
+ package_dir={"app": "app"}, # Map app directory
8
+ install_requires=[
9
+ line.strip()
10
+ for line in open("requirements.txt").readlines()
11
+ if not line.startswith("#")
12
+ ],
13
+ include_package_data=True,
14
+ python_requires=">=3.10",
15
+ )
start.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Install dependencies
4
+ pip install -r requirements.txt
5
+
6
+ # Install package in development mode
7
+ pip install -e .
8
+
9
+ # Start the FastAPI server
10
+ uvicorn app.main:app --host 0.0.0.0 --port 7860 --reload
tests/unit/test_headline_analyzer.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from src.mediaunmasked.analyzers.headline_analyzer import HeadlineAnalyzer
3
+
4
+ @pytest.fixture
5
+ def analyzer():
6
+ return HeadlineAnalyzer()
7
+
8
+ def test_matching_headline(analyzer):
9
+ headline = "New Study Shows Coffee Reduces Heart Disease Risk"
10
+ content = "Recent research suggests that coffee may have cardiovascular benefits."
11
+
12
+ result = analyzer.analyze(headline, content)
13
+
14
+ assert result["headline_vs_content_score"] > 30
15
+ assert result["contradiction_score"] < 0.3
16
+
17
+ def test_contradictory_headline(analyzer):
18
+ headline = "Coffee Increases Heart Disease Risk"
19
+ content = "Studies show coffee decreases cardiovascular disease risk."
20
+
21
+ result = analyzer.analyze(headline, content)
22
+
23
+ assert result["headline_vs_content_score"] < 30
24
+ assert result["contradiction_score"] > 0.3