Spaces:

wozwize
/

media-unmasked-api

Running

App Files Files Community

wozwize commited on Feb 21

Commit

876b12f

1 Parent(s): f83c2ca

initial commit of media-unmasked-api to huggingface

Browse files

Files changed (30) hide show

.gitignore +5 -0
Dockerfile +21 -0
app/main.py +22 -0
app/routers/analyze.py +18 -0
app/routers/health.py +7 -0
create_structure.sh +3 -0
directory_structure.txt +0 -0
mediaunmasked/__init__.py +5 -0
mediaunmasked/analyzers/__init__.py +1 -0
mediaunmasked/analyzers/bias_analyzer.py +75 -0
mediaunmasked/analyzers/evidence_analyzer.py +53 -0
mediaunmasked/analyzers/headline_analyzer.py +164 -0
mediaunmasked/analyzers/scoring.py +101 -0
mediaunmasked/analyzers/sentiment_analyzer.py +65 -0
mediaunmasked/resources/left_bias_words.txt +187 -0
mediaunmasked/resources/manipulative_patterns.txt +178 -0
mediaunmasked/resources/right_bias_words.txt +233 -0
mediaunmasked/schemas/requests.py +5 -0
mediaunmasked/schemas/responses.py +6 -0
mediaunmasked/scrapers/__init__.py +3 -0
mediaunmasked/scrapers/article_scraper.py +139 -0
mediaunmasked/services/analyzer_service.py +9 -0
mediaunmasked/utils/__init__.py +1 -0
mediaunmasked/utils/logging_config.py +10 -0
mediaunmasked/web/__init__.py +1 -0
mediaunmasked/web/app.py +82 -0
requirements.txt +14 -0
setup.py +15 -0
start.sh +10 -0
tests/unit/test_headline_analyzer.py +24 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+# Ignore Python compiled files
+__pycache__/
+*.pyc
+*.pyo
+*.pyd

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+# Use Python Slim Image
+FROM python:3.10-slim
+# Set working directory inside container
+WORKDIR /app
+# Copy dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy all files
+COPY . .
+# Set PYTHONPATH (simplified)
+ENV PYTHONPATH=/app
+# Expose FastAPI's default port
+EXPOSE 7860
+# Start FastAPI
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app/main.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware  # ✅ Import this
+from app.routers import analyze, health
+app = FastAPI(title="MediaUnmasked API")
+# ✅ Enable CORS for Swagger UI
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins (or specify ["http://localhost:7860"])
+    allow_credentials=True,
+    allow_methods=["*"],  # Allow all methods
+    allow_headers=["*"],  # Allow all headers
+)
+# Include routers
+app.include_router(analyze.router, prefix="/api")
+app.include_router(health.router, prefix="/health")
+@app.get("/")
+async def root():
+    return {"message": "MediaUnmasked API is running!"}

app/routers/analyze.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from fastapi import APIRouter, HTTPException
+from mediaunmasked.schemas.requests import AnalyzeRequest
+from mediaunmasked.schemas.responses import AnalyzeResponse
+from mediaunmasked.services.analyzer_service import AnalyzerService
+router = APIRouter(tags=["analysis"])
+@router.post("/analyze", response_model=AnalyzeResponse)
+async def analyze_content(request: AnalyzeRequest):
+    try:
+        analyzer_service = AnalyzerService()
+        result = await analyzer_service.analyze_content(
+            headline=request.headline,
+            content=request.content
+        )
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

app/routers/health.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fastapi import APIRouter
+router = APIRouter()
+@router.get("/")
+async def health_check():
+    return {"status": "healthy"}

create_structure.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+mkdir -p api/routers
+mkdir -p src/mediaunmasked/{services,models,schemas,config}
+mkdir -p tests/{unit,integration}

directory_structure.txt ADDED Viewed

Binary file (4.85 kB). View file

mediaunmasked/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""
+MediaUnmasked - AI-powered media watchdog for analyzing bias and fact-checking.
+"""
+__version__ = "0.1.0"

mediaunmasked/analyzers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Empty file to make the directory a Python package

mediaunmasked/analyzers/bias_analyzer.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import logging
+import os
+from typing import Dict, Any, List
+logger = logging.getLogger(__name__)
+class BiasAnalyzer:
+    def __init__(self):
+        self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
+        self.left_keywords = self._load_keywords('left_bias_words.txt')
+        self.right_keywords = self._load_keywords('right_bias_words.txt')
+    def _load_keywords(self, filename: str) -> List[str]:
+        """Load keywords from file."""
+        try:
+            filepath = os.path.join(self.resources_dir, filename)
+            with open(filepath, 'r', encoding='utf-8') as f:
+                return [line.strip().lower() for line in f if line.strip() and not line.startswith('#')]
+        except Exception as e:
+            logger.error(f"Error loading {filename}: {str(e)}")
+            return []
+    def analyze(self, text: str) -> Dict[str, Any]:
+        """Detect bias using keyword analysis."""
+        try:
+            text_lower = text.lower()
+            # Count matches
+            left_count = sum(1 for word in self.left_keywords if word in text_lower)
+            right_count = sum(1 for word in self.right_keywords if word in text_lower)
+            total_words = left_count + right_count
+            if total_words == 0:
+                return {
+                    "bias": "Neutral",
+                    "bias_score": 0.0,  # True neutral
+                    "bias_percentage": 0  # Neutral percentage
+                }
+            # New bias score formula (-1.0 left, 0.0 neutral, 1.0 right)
+            bias_score = (right_count - left_count) / total_words
+            # Convert bias_score to percentage (-100% to +100%)
+            bias_percentage = bias_score * 100
+            logger.info(f"Bias score: {bias_score:.2f}, Bias percentage: {bias_percentage:.1f}%")
+            # Determine bias label
+            if bias_score < -0.8:
+                bias = "Strongly Left"
+            elif bias_score < -0.5:
+                bias = "Moderately Left"
+            elif bias_score < -0.2:
+                bias = "Leaning Left"
+            elif bias_score > 0.8:
+                bias = "Strongly Right"
+            elif bias_score > 0.5:
+                bias = "Moderately Right"
+            elif bias_score > 0.2:
+                bias = "Leaning Right"
+            else:
+                bias = "Neutral"
+            return {
+                "bias": bias,
+                "bias_score": round(bias_score, 2),  # Keep 2 decimal places
+                "bias_percentage": abs(round(bias_percentage, 1))
+            }
+        except Exception as e:
+            logger.error(f"Error in bias analysis: {str(e)}")
+            return {
+                "bias": "Error",
+                "bias_score": 0.0,
+                "bias_percentage": 0
+            }

mediaunmasked/analyzers/evidence_analyzer.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import logging
+from typing import Dict, Any, List
+logger = logging.getLogger(__name__)
+class EvidenceAnalyzer:
+    def __init__(self):
+        self.citation_markers = [
+            "according to",
+            "said",
+            "reported",
+            "stated",
+            "shows",
+            "found",
+            "study",
+            "research",
+            "data",
+            "evidence"
+        ]
+        self.vague_markers = [
+            "some say",
+            "many believe",
+            "people think",
+            "experts claim",
+            "sources say",
+            "it is believed",
+            "reportedly",
+            "allegedly"
+        ]
+    def analyze(self, text: str) -> Dict[str, Any]:
+        """Check for evidence-based reporting."""
+        try:
+            text_lower = text.lower()
+            citation_count = sum(1 for marker in self.citation_markers if marker in text_lower)
+            vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
+            base_score = min(citation_count * 20, 100)
+            penalty = vague_count * 10
+            evidence_score = max(0, base_score - penalty)
+            return {
+                "evidence_based_score": evidence_score
+            }
+        except Exception as e:
+            logger.error(f"Error in evidence analysis: {str(e)}")
+            return {
+                "evidence_based_score": 0
+            }

mediaunmasked/analyzers/headline_analyzer.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import logging
+from typing import Dict, Any, List
+from transformers import pipeline
+from transformers import AutoTokenizer
+import numpy as np
+logger = logging.getLogger(__name__)
+class HeadlineAnalyzer:
+    def __init__(self):
+        """Initialize the NLI model for contradiction detection."""
+        self.nli_pipeline = pipeline("text-classification", model="roberta-large-mnli")
+        self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
+        self.max_length = 512
+    def _split_content(self, headline: str, content: str) -> List[str]:
+        """Split content into sections that fit within token limit."""
+        content_words = content.split()
+        sections = []
+        current_section = []
+        # Account for headline and [SEP] token in the max length
+        headline_tokens = len(self.tokenizer.encode(headline))
+        sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2  # -2 because encode adds special tokens
+        max_content_tokens = self.max_length - headline_tokens - sep_tokens
+        # Process words into sections
+        for word in content_words:
+            current_section.append(word)
+            # Check if current section is approaching token limit
+            current_text = " ".join(current_section)
+            if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
+                # Remove last word (it might make us go over limit)
+                current_section.pop()
+                sections.append(" ".join(current_section))
+                # Start new section with 20% overlap for context
+                overlap_start = max(0, len(current_section) - int(len(current_section) * 0.2))
+                current_section = current_section[overlap_start:]
+                current_section.append(word)
+        # Add any remaining content as the last section
+        if current_section:
+            sections.append(" ".join(current_section))
+        logger.info(f"""Content Splitting:
+            - Original content length: {len(content_words)} words
+            - Split into {len(sections)} sections
+            - Headline uses {headline_tokens} tokens
+            - Available tokens per section: {max_content_tokens}
+        """)
+        return sections
+    def _analyze_section(self, headline: str, section: str) -> Dict[str, float]:
+        """Analyze a single section of content."""
+        input_text = f"{headline} [SEP] {section}"
+        result = self.nli_pipeline(input_text, top_k=None)
+        # Extract scores
+        scores = {item['label']: item['score'] for item in result}
+        logger.info("\nSection Analysis:")
+        logger.info("-"*30)
+        logger.info(f"Section preview: {section[:100]}...")
+        for label, score in scores.items():
+            logger.info(f"Label: {label:<12} Score: {score:.3f}")
+        return scores
+    def analyze(self, headline: str, content: str) -> Dict[str, Any]:
+        """Analyze how well the headline matches the content using an AI model."""
+        try:
+            logger.info("\n" + "="*50)
+            logger.info("HEADLINE ANALYSIS STARTED")
+            logger.info("="*50)
+            # Handle empty inputs
+            if not headline.strip() or not content.strip():
+                logger.warning("Empty headline or content provided")
+                return {
+                    "headline_vs_content_score": 0,
+                    "entailment_score": 0,
+                    "contradiction_score": 0,
+                    "contradictory_phrases": []
+                }
+            # Split content if too long
+            content_tokens = len(self.tokenizer.encode(content))
+            if content_tokens > self.max_length:
+                logger.warning(f"""
+                    Content Length Warning:
+                    - Total tokens: {content_tokens}
+                    - Max allowed: {self.max_length}
+                    - Splitting into sections...
+                """)
+                sections = self._split_content(headline, content)
+                # Analyze each section
+                section_scores = []
+                for i, section in enumerate(sections, 1):
+                    logger.info(f"\nAnalyzing section {i}/{len(sections)}")
+                    scores = self._analyze_section(headline, section)
+                    section_scores.append(scores)
+                # Aggregate scores across sections
+                # Use max contradiction (if any section strongly contradicts, that's important)
+                # Use mean entailment (overall support across sections)
+                # Use mean neutral (general neutral tone across sections)
+                entailment_score = np.mean([s.get('ENTAILMENT', 0) for s in section_scores])
+                contradiction_score = np.max([s.get('CONTRADICTION', 0) for s in section_scores])
+                neutral_score = np.mean([s.get('NEUTRAL', 0) for s in section_scores])
+                logger.info("\nAggregated Scores Across Sections:")
+                logger.info("-"*30)
+                logger.info(f"Mean Entailment: {entailment_score:.3f}")
+                logger.info(f"Max Contradiction: {contradiction_score:.3f}")
+                logger.info(f"Mean Neutral: {neutral_score:.3f}")
+            else:
+                # Single section analysis
+                scores = self._analyze_section(headline, content)
+                entailment_score = scores.get('ENTAILMENT', 0)
+                contradiction_score = scores.get('CONTRADICTION', 0)
+                neutral_score = scores.get('NEUTRAL', 0)
+            # Compute final consistency score
+            final_score = (
+                (entailment_score * 0.6) +      # Base score from entailment
+                (neutral_score * 0.3) +         # Neutral is acceptable
+                ((1 - contradiction_score) * 0.1)  # Small penalty for contradiction
+            ) * 100
+            # Log final results
+            logger.info("\nFinal Analysis Results:")
+            logger.info("-"*30)
+            logger.info(f"Headline: {headline}")
+            logger.info(f"Content Length: {content_tokens} tokens")
+            logger.info("\nFinal Scores:")
+            logger.info(f"{'Entailment:':<15} {entailment_score:.3f}")
+            logger.info(f"{'Neutral:':<15} {neutral_score:.3f}")
+            logger.info(f"{'Contradiction:':<15} {contradiction_score:.3f}")
+            logger.info(f"\nFinal Score: {final_score:.1f}%")
+            logger.info("="*50 + "\n")
+            return {
+                "headline_vs_content_score": round(final_score, 1),
+                "entailment_score": round(entailment_score, 2),
+                "contradiction_score": round(contradiction_score, 2),
+                "contradictory_phrases": []
+            }
+        except Exception as e:
+            logger.error("\nHEADLINE ANALYSIS ERROR")
+            logger.error("-"*30)
+            logger.error(f"Error Type: {type(e).__name__}")
+            logger.error(f"Error Message: {str(e)}")
+            logger.error("Stack Trace:", exc_info=True)
+            logger.error("="*50 + "\n")
+            return {
+                "headline_vs_content_score": 0,
+                "entailment_score": 0,
+                "contradiction_score": 0,
+                "contradictory_phrases": []
+            }

mediaunmasked/analyzers/scoring.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from typing import Dict, Any
+import logging
+from .headline_analyzer import HeadlineAnalyzer
+from .sentiment_analyzer import SentimentAnalyzer
+from .bias_analyzer import BiasAnalyzer
+from .evidence_analyzer import EvidenceAnalyzer
+logger = logging.getLogger(__name__)
+class MediaScorer:
+    def __init__(self):
+        """Initialize the MediaScorer with required analyzers."""
+        self.headline_analyzer = HeadlineAnalyzer()
+        self.sentiment_analyzer = SentimentAnalyzer()
+        self.bias_analyzer = BiasAnalyzer()
+        self.evidence_analyzer = EvidenceAnalyzer()
+    def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
+        """Calculate final media credibility score."""
+        try:
+            headline_analysis = self.headline_analyzer.analyze(headline, content)
+            sentiment_analysis = self.sentiment_analyzer.analyze(content)
+            bias_analysis = self.bias_analyzer.analyze(content)
+            evidence_analysis = self.evidence_analyzer.analyze(content)
+            # Log intermediate results
+            logger.info("\n=== Raw Analysis Results ===")
+            logger.info(f"Headline Analysis: {headline_analysis}")
+            logger.info(f"Sentiment Analysis: {sentiment_analysis}")
+            logger.info(f"""Bias Analysis:
+                Raw: {bias_analysis}
+                Label: {bias_analysis['bias']}
+                Score: {bias_analysis['bias_score']}
+                Percentage: {bias_analysis['bias_percentage']}%
+            """)
+            logger.info(f"Evidence Analysis: {evidence_analysis}")
+            # Calculate component scores
+            # For headline: 20% contradiction = 20% score (don't invert)
+            headline_score = headline_analysis["headline_vs_content_score"] / 100
+            # For manipulation: 0% = good (use directly), 100% = bad
+            manipulation_score = (100 - sentiment_analysis["manipulation_score"]) / 100
+            # For bias: 0% = good (use directly), 100% = bad
+            bias_score = (100 - bias_analysis["bias_percentage"]) / 100
+            evidence_score = evidence_analysis["evidence_based_score"] / 100  # Higher is better
+            logger.info(f"""Component Scores:
+                Headline: {headline_score * 100:.1f}% (from {headline_analysis["headline_vs_content_score"]}%)
+                Evidence: {evidence_score * 100:.1f}%
+                Manipulation: {manipulation_score * 100:.1f}% (100 - {sentiment_analysis["manipulation_score"]}%)
+                Bias: {bias_score * 100:.1f}% (100 - {bias_analysis["bias_percentage"]}%)
+            """)
+            # Calculate final score
+            final_score = (
+                (headline_score * 0.25) +
+                (manipulation_score * 0.25) +
+                (bias_score * 0.25) +
+                (evidence_score * 0.25)
+            ) * 100
+            # Determine rating
+            if final_score >= 80:
+                rating = "Trustworthy"
+            elif final_score >= 50:
+                rating = "Bias Present"
+            else:
+                rating = "Misleading"
+            result = {
+                "media_unmasked_score": round(final_score, 1),
+                "rating": rating,
+                "details": {
+                    "headline_analysis": headline_analysis,
+                    "sentiment_analysis": sentiment_analysis,
+                    "bias_analysis": bias_analysis,
+                    "evidence_analysis": evidence_analysis
+                }
+            }
+            logger.info("\n=== Final Score Result ===")
+            logger.info(f"Result: {result}")
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating media score: {str(e)}")
+            return {
+                "media_unmasked_score": 0,
+                "rating": "Error",
+                "details": {
+                    "headline_analysis": {"headline_vs_content_score": 0, "contradictory_phrases": []},
+                    "sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},
+                    "bias_analysis": {"bias": "Error", "bias_score": 0.0, "bias_percentage": 0},
+                    "evidence_analysis": {"evidence_based_score": 0}
+                }
+            }

mediaunmasked/analyzers/sentiment_analyzer.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import logging
+from typing import Dict, Any, List
+from textblob import TextBlob
+logger = logging.getLogger(__name__)
+class SentimentAnalyzer:
+    def __init__(self):
+        self.manipulative_patterns = [
+            "experts say",
+            "sources claim",
+            "many believe",
+            "some say",
+            "everyone knows",
+            "clearly",
+            "obviously",
+            "without doubt",
+            "certainly"
+        ]
+    def analyze(self, text: str) -> Dict[str, Any]:
+        """Analyze sentiment using TextBlob."""
+        try:
+            blob = TextBlob(text)
+            sentiment_score = blob.sentiment.polarity
+            manipulative_phrases = self._detect_manipulative_phrases(text)
+            manipulation_score = len(manipulative_phrases) * 10
+            if sentiment_score > 0.2:
+                sentiment = "Positive"
+            elif sentiment_score < -0.2:
+                sentiment = "Negative"
+            else:
+                sentiment = "Neutral"
+            if manipulation_score > 50:
+                sentiment = "Manipulative"
+            return {
+                "sentiment": sentiment,
+                "manipulation_score": min(manipulation_score, 100),
+                "flagged_phrases": manipulative_phrases
+            }
+        except Exception as e:
+            logger.error(f"Error in sentiment analysis: {str(e)}")
+            return {
+                "sentiment": "Error",
+                "manipulation_score": 0,
+                "flagged_phrases": []
+            }
+    def _detect_manipulative_phrases(self, text: str) -> List[str]:
+        """Detect potentially manipulative phrases."""
+        found_phrases = []
+        text_lower = text.lower()
+        for pattern in self.manipulative_patterns:
+            if pattern in text_lower:
+                start = text_lower.find(pattern)
+                context = text[max(0, start-20):min(len(text), start+len(pattern)+20)]
+                found_phrases.append(context.strip())
+        return found_phrases

mediaunmasked/resources/left_bias_words.txt ADDED Viewed

	@@ -0,0 +1,187 @@

+# 📂 resources/left_bias_words.txt
+# -------------------------------------------------
+# 🔹 Political Ideology & Economic Policy
+progressive
+conservative
+socialist
+democratic socialism
+democratic socialist
+far-right
+equity
+justice for all
+wealth redistribution
+universal basic income
+living wage
+income inequality
+wealth inequality
+fair trade
+social safety net
+corporate greed
+workers' rights
+unionize
+collective bargaining
+minimum wage increase
+universal childcare
+tax the rich
+economic justice
+capitalism is broken
+billionaires shouldn't exist
+# 🔹 Climate & Environmental Policy
+climate crisis
+climate emergency
+sustainability
+green energy
+carbon footprint
+fossil fuel divestment
+environmental justice
+net zero
+renewable energy
+solar energy
+climate action
+big oil
+carbon tax
+Green New Deal
+climate deniers
+eco-friendly policies
+clean energy revolution
+plastic ban
+end fracking
+divest from coal
+extreme weather is worsening
+global warming is real
+environmental responsibility
+wildlife protection
+eco-activism
+# 🔹 Social Justice & Identity Politics
+social justice
+racial justice
+systemic racism
+white privilege
+microaggressions
+BIPOC
+LGBTQ+ rights
+gender pay gap
+affirmative action
+decolonization
+indigenous sovereignty
+equity vs equality
+patriarchy
+gender-inclusive
+intersectionality
+trans rights
+feminism
+gender-affirming care
+abolish ICE
+police brutality
+defund the police
+prison abolition
+restorative justice
+white supremacy
+hate speech laws
+critical race theory
+diversity, equity, inclusion
+reproductive justice
+women's bodily autonomy
+reparations
+# 🔹 Healthcare & Public Welfare
+Medicare for All
+universal healthcare
+public option
+free healthcare
+single-payer system
+affordable healthcare
+healthcare is a human right
+insulin price cap
+Big Pharma
+mental health parity
+food insecurity
+public housing
+student loan forgiveness
+affordable education
+debt relief
+expand social security
+disability rights
+homeless crisis
+opioid epidemic response
+guaranteed paid leave
+maternal mortality crisis
+expand Medicaid
+community healthcare clinics
+healthcare access for all
+pre-existing conditions coverage
+# 🔹 Gun Control & Public Safety
+gun violence prevention
+common-sense gun laws
+background checks
+gun reform
+assault weapons ban
+mass shootings epidemic
+red flag laws
+gun buyback programs
+ban high-capacity magazines
+NRA influence
+public safety over profit
+gun safety legislation
+school shootings crisis
+responsible gun ownership
+fewer guns, safer communities
+demilitarize the police
+ban ghost guns
+universal gun laws
+ban open carry
+reduce firearm access
+mandatory firearm registration
+# 🔹 Immigration & Border Policy
+path to citizenship
+DACA
+dreamers
+migrant rights
+asylum seekers
+refugee protection
+abolish ICE
+border security is racist
+family separation
+sanctuary cities
+humanitarian crisis at the border
+comprehensive immigration reform
+no human is illegal
+end child detention
+protect immigrants
+immigrants strengthen the economy
+undocumented workers deserve rights
+border wall waste
+decriminalize border crossings
+reunite families
+amnesty for undocumented
+# 🔹 Media & Information Bias
+misinformation crisis
+fact-based reporting
+right-wing disinformation
+alternative facts
+Fox News propaganda
+media literacy
+fight misinformation
+Big Tech accountability
+social media regulation
+disinformation campaigns
+protect press freedom
+independent journalism
+mainstream media bias
+fact-checking matters
+ban fake news
+Russian interference
+algorithmic bias
+political misinformation
+fair and accurate reporting
+truth matters
+anti-science rhetoric
+climate denial media
+ban extremist media
+right-wing conspiracy theories
+protecting democracy

mediaunmasked/resources/manipulative_patterns.txt ADDED Viewed

	@@ -0,0 +1,178 @@

+# 📂 resources/manipulative_patterns.txt
+# -------------------------------------------------
+# 🔹 Vague Attribution (Unverifiable Sources)
+experts fear
+some say
+many believe
+it's clear that
+obviously
+everyone knows
+sources say
+people are saying
+research suggests
+critics argue
+analysts warn
+reportedly
+insiders claim
+industry experts agree
+whispers in the industry
+a growing number of people think
+sources close to the matter indicate
+reports suggest
+insiders reveal
+unnamed sources confirm
+widely believed
+it has been said
+word on the street is
+# 🔹 Exaggeration & Absolutist Language
+\b(all|none|every|always|never)\b
+without question
+undeniably
+beyond a doubt
+without a shadow of a doubt
+irrefutable proof
+inarguable
+scientifically proven
+guaranteed
+no one can deny
+absolutely certain
+inevitable collapse
+completely unprecedented
+no alternative but
+totally discredited
+this changes everything
+nothing can stop
+without fail
+history shows that
+# 🔹 Emotional Manipulation & Loaded Language
+the shocking truth
+horrifying evidence
+dangerously misguided
+deeply disturbing
+alarming new trend
+terrifying reality
+outrageous attack
+crippling consequences
+heartbreaking truth
+a devastating blow
+frightening new report
+explosive details
+disturbing allegations
+corrupt elites
+facing total destruction
+hidden agenda
+deliberate deception
+reckless policies
+radical takeover
+secret plot exposed
+exposed corruption
+will destroy everything
+brainwashing the masses
+a brutal betrayal
+shocking revelations
+an unthinkable scenario
+must be stopped at all costs
+selling out the people
+# 🔹 False Balance & False Equivalencies
+both sides are equally to blame
+to be fair, some argue
+some would say it's just as bad as
+on one hand, but on the other hand
+many claim there’s no difference
+equally problematic on both sides
+critics claim, but supporters argue
+it’s just like (unrelated issue)
+just as bad as
+making the same mistakes
+exactly like
+history repeating itself
+# 🔹 Implying Authority Without Evidence
+leading experts agree
+a well-known figure once said
+the science is settled
+unquestionable truth
+indisputable fact
+respected authorities confirm
+established research shows
+a Nobel Prize-winning scientist believes
+the most intelligent minds agree
+top thinkers of our time argue
+those who disagree are uninformed
+no real expert would dispute this
+a professor from a top university claims
+all credible scientists believe
+no serious researcher disagrees
+# 🔹 Implying Popular Consensus Without Data
+the majority of people think
+society agrees that
+most intelligent people understand
+an overwhelming number of people
+the vast majority
+widely considered to be true
+popular opinion suggests
+everyone is talking about
+most believe
+# 🔹 Framing Opponents in a Negative Light
+only extremists believe otherwise
+people who disagree are in denial
+anyone who questions this is ignorant
+blindly following the agenda
+out of touch with reality
+desperate attempt to save face
+trying to cover up the truth
+a last-ditch effort to deceive
+refusing to accept facts
+spreading misinformation
+manipulated by special interests
+driven by greed and corruption
+being paid to say otherwise
+deliberately misleading
+hiding the truth from the public
+working against the people
+exploiting the system
+part of the problem, not the solution
+dangerous and reckless
+acting in bad faith
+# 🔹 Implying Urgency & Fear-Mongering
+we are running out of time
+before it’s too late
+act now before disaster strikes
+imminent collapse
+crisis is unfolding
+ticking time bomb
+on the brink of disaster
+looming catastrophe
+a dire warning
+facing an existential threat
+the fate of our nation
+before it's too late
+can’t afford to wait
+if this continues, we’re doomed
+the last chance to save
+history will not be kind
+future generations will suffer
+too dangerous to ignore
+# 🔹 Appealing to Nostalgia & Past Glory
+things were better before
+back in the good old days
+when America was great
+returning to our roots
+the way it was meant to be
+before things got out of hand
+we’ve lost our way
+we need to go back to simpler times
+what our forefathers intended
+traditional values are under attack
+restoring the glory days
+reclaiming what was lost
+remember when things made sense?
+back when people had morals
+the downfall of our civilization

mediaunmasked/resources/right_bias_words.txt ADDED Viewed

	@@ -0,0 +1,233 @@

+# 📂 resources/right_bias_words.txt
+# -------------------------------------------------
+# 🔹 Political Ideology & Economic Policy
+right-wing
+liberal
+traditional values
+free market
+capitalism
+small government
+big government overreach
+limited government
+fiscal responsibility
+trickle-down economics
+deregulation
+job creators
+personal responsibility
+welfare dependency
+individual liberty
+government waste
+tax cuts
+pro-business policies
+pro-growth policies
+big government socialism
+crony capitalism
+hard work pays off
+socialist policies fail
+self-reliance
+national sovereignty
+# 🔹 Climate & Environmental Policy
+climate alarmism
+climate hoax
+green energy scam
+drill baby drill
+energy independence
+clean coal
+pro-fracking
+stop the war on oil
+anti-carbon tax
+regulatory overreach
+climate hysteria
+fossil fuel industry
+radical environmentalists
+alternative energy myths
+global warming exaggeration
+renewable energy failure
+climate change agenda
+big government green policies
+end subsidies for green energy
+eco-terrorism
+emissions regulations kill jobs
+climate change skepticism
+scientific consensus is flawed
+# 🔹 Social Issues & Culture Wars
+woke agenda
+cancel culture
+critical race theory
+identity politics
+anti-woke
+war on Christmas
+traditional marriage
+family values
+religious freedom
+biblical principles
+faith-based values
+cultural marxism
+gender ideology
+biological reality
+trans agenda
+protect women’s sports
+Christian persecution
+church over state
+anti-religious bigotry
+parental rights
+indoctrination in schools
+reverse racism
+meritocracy matters
+law and order
+war on masculinity
+anti-gun propaganda
+# 🔹 Immigration & Border Policy
+illegal aliens
+border crisis
+invasion at the border
+build the wall
+secure our borders
+deportation
+amnesty is a scam
+open borders policy
+sanctuary cities are unsafe
+migrant caravans
+protect American workers
+chain migration
+anchor babies
+English as the official language
+vetting immigrants
+catch and deport
+mass migration problem
+border security first
+illegals taking American jobs
+criminal aliens
+no asylum abuse
+end birthright citizenship
+border patrol under attack
+# 🔹 Healthcare & Public Welfare
+government takeover of healthcare
+socialized medicine fails
+free healthcare is a myth
+Medicare for all is unsustainable
+private insurance rights
+Obamacare disaster
+healthcare freedom
+personal responsibility in healthcare
+rationed care
+universal healthcare means higher taxes
+health savings accounts
+big pharma collusion
+big government healthcare
+death panels
+nanny state policies
+taxpayer-funded abortion
+personalized medicine
+free market healthcare solutions
+government interference in medicine
+healthcare choice
+welfare abuse
+entitlement reform
+personalized care models
+stop welfare expansion
+# 🔹 Gun Rights & Public Safety
+gun control doesn’t work
+Second Amendment rights
+constitutional carry
+good guy with a gun
+gun grabbers
+assault weapons myth
+defend the Second Amendment
+law-abiding gun owners
+red flag laws violate rights
+shall not be infringed
+gun-free zones don't work
+arming teachers
+NRA-backed legislation
+stand your ground
+self-defense rights
+gun rights under attack
+criminals ignore gun laws
+leftists want total disarmament
+defund the police is dangerous
+crime wave
+law and order policies
+Democrats are soft on crime
+# 🔹 Media & Big Tech Censorship
+mainstream media lies
+fake news media
+media bias
+left-wing media monopoly
+corporate media corruption
+conservative voices silenced
+shadow banning
+Big Tech censorship
+social media suppression
+fact-checkers are biased
+alternative media
+legacy media collapse
+biased journalism
+freedom of speech under attack
+media elite
+disinformation police
+Silicon Valley leftist agenda
+misinformation double standard
+conservative accounts banned
+election interference
+algorithm manipulation
+digital free speech
+# 🔹 Election Integrity & Government Accountability
+election fraud
+stolen election
+mail-in ballot fraud
+ballot harvesting
+illegal voting
+rigged elections
+secure the vote
+voter ID laws
+dead people voting
+stop election interference
+clean voter rolls
+fair elections
+stop the steal
+Democrat-run cities are corrupt
+big government tyranny
+congressional overreach
+political witch hunt
+government accountability
+unelected bureaucrats
+administrative state abuse
+taxpayer money wasted
+drain the swamp
+deep state
+two-tiered justice system
+weaponization of government
+# 🔹 Foreign Policy & National Defense
+America First
+patriotic nationalism
+globalism is a threat
+weak foreign policy
+military strength
+peace through strength
+support our troops
+anti-interventionism
+funding our enemies
+China threat
+Russia hoax
+leftist appeasement
+national security first
+endless wars are a mistake
+pro-Israel stance
+secure our allies
+foreign aid waste
+Biden’s weak leadership
+defund the UN
+globalist elites
+pro-American trade policies
+stop outsourcing jobs
+stop military woke policies
+secure American sovereignty

mediaunmasked/schemas/requests.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from pydantic import BaseModel
+class AnalyzeRequest(BaseModel):
+    headline: str
+    content: str

mediaunmasked/schemas/responses.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pydantic import BaseModel
+class AnalyzeResponse(BaseModel):
+    headline_vs_content_score: float
+    entailment_score: float
+    contradiction_score: float

mediaunmasked/scrapers/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .article_scraper import ArticleScraper
2	+
3	+ __all__ = ['ArticleScraper']

mediaunmasked/scrapers/article_scraper.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from typing import Dict, Optional
+import logging
+from urllib.parse import urlparse
+import requests
+from bs4 import BeautifulSoup
+from ..utils.logging_config import setup_logging
+class ArticleScraper:
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        setup_logging()
+        self.logger = logging.getLogger(__name__)
+    def _get_domain(self, url: str) -> str:
+        """Extract domain from URL."""
+        return urlparse(url).netloc
+    def _fetch_page(self, url: str) -> Optional[str]:
+        """Fetch page content with error handling."""
+        try:
+            response = self.session.get(url)
+            response.raise_for_status()
+            return response.text
+        except Exception as e:
+            self.logger.error(f"Error fetching {url}: {str(e)}")
+            return None
+    def _extract_snopes(self, soup: BeautifulSoup) -> Dict[str, str]:
+        """Extract content from Snopes articles."""
+        # Get headline from any h1 tag since it doesn't have a specific class
+        headline_elem = soup.find('h1')
+        headline = headline_elem.get_text().strip() if headline_elem else ''
+        self.logger.info(f"Found headline: {headline}")
+        # Try to find the article content
+        article = soup.find('article')
+        if article:
+            self.logger.info("Found article tag")
+            # Remove unwanted elements
+            for unwanted in article.find_all(['script', 'style', 'iframe', 'aside']):
+                unwanted.decompose()
+            # Get all paragraphs from the article
+            paragraphs = article.find_all('p')
+            if paragraphs:
+                content = ' '.join(p.get_text().strip() for p in paragraphs)
+            else:
+                content = article.get_text().strip()
+        else:
+            self.logger.warning("No article tag found")
+            content = ''
+        return {"headline": headline, "content": content}
+    def _extract_politifact(self, soup: BeautifulSoup) -> Dict[str, str]:
+        """Extract content from PolitiFact articles."""
+        try:
+            headline = soup.find('h1', class_='article__title')
+            if headline:
+                headline = headline.get_text().strip()
+            else:
+                headline = soup.find('h1')
+                headline = headline.get_text().strip() if headline else "No headline found"
+            self.logger.info(f"Found headline: {headline}")
+            content_div = soup.find('article', class_='article')
+            if content_div:
+                # Remove unwanted elements
+                for unwanted in content_div.find_all(['script', 'style', 'iframe', 'aside']):
+                    unwanted.decompose()
+                content = ' '.join(p.get_text().strip() for p in content_div.find_all('p'))
+            else:
+                # Try alternative content selectors
+                content_selectors = ['.article__text', '.m-textblock']
+                content = ''
+                for selector in content_selectors:
+                    content_elem = soup.select_one(selector)
+                    if content_elem:
+                        content = ' '.join(p.get_text().strip() for p in content_elem.find_all('p'))
+                        break
+            if not content:
+                self.logger.warning("No content found in article")
+                content = "No content found"
+            return {"headline": headline, "content": content}
+        except Exception as e:
+            self.logger.error(f"Error extracting PolitiFact content: {str(e)}")
+            return {"headline": "Error", "content": f"Failed to extract content: {str(e)}"}
+    def scrape_article(self, url: str) -> Optional[Dict[str, str]]:
+        """
+        Main function to scrape fact-checking articles.
+        Returns a dictionary with headline and content.
+        """
+        html_content = self._fetch_page(url)
+        if not html_content:
+            self.logger.error("Failed to fetch page content")
+            return None
+        soup = BeautifulSoup(html_content, 'html.parser')
+        domain = self._get_domain(url)
+        self.logger.info(f"Scraping article from domain: {domain}")
+        # Select appropriate extractor based on domain
+        if 'snopes.com' in domain:
+            result = self._extract_snopes(soup)
+            if not result['headline'] or not result['content']:
+                self.logger.warning("Failed to extract content from Snopes article")
+                self.logger.debug(f"HTML content: {html_content[:500]}...")
+            return result
+        elif 'politifact.com' in domain:
+            return self._extract_politifact(soup)
+        else:
+            # Generic extraction fallback
+            headline = soup.find('h1').get_text().strip() if soup.find('h1') else ''
+            # Try common content selectors
+            content_selectors = ['article', 'main', '.content', '.article-content']
+            content = ''
+            for selector in content_selectors:
+                content_div = soup.select_one(selector)
+                if content_div:
+                    # Remove unwanted elements
+                    for unwanted in content_div.find_all(['script', 'style', 'iframe', 'aside']):
+                        unwanted.decompose()
+                    content = ' '.join(p.get_text().strip() for p in content_div.find_all('p'))
+                    break
+            return {"headline": headline, "content": content}

mediaunmasked/services/analyzer_service.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from mediaunmasked.analyzers.headline_analyzer import HeadlineAnalyzer
+class AnalyzerService:
+    def __init__(self):
+        self.headline_analyzer = HeadlineAnalyzer()
+    async def analyze_content(self, headline: str, content: str):
+        result = self.headline_analyzer.analyze(headline, content)
+        return result

mediaunmasked/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Empty file is fine

mediaunmasked/utils/logging_config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import logging
+from typing import Optional
+def setup_logging(level: int = logging.INFO) -> None:
+    """Configure logging for the application."""
+    logging.basicConfig(
+        level=level,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )

mediaunmasked/web/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Empty file is fine

mediaunmasked/web/app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import streamlit as st
+from ..analyzers.bias_analyzer import BiasAnalyzer
+from ..scrapers.article_scraper import ArticleScraper
+from ..utils.logging_config import setup_logging
+import plotly.graph_objects as go
+def create_sentiment_gauge(score: float) -> go.Figure:
+    """Create a gauge chart for sentiment visualization."""
+    fig = go.Figure(go.Indicator(
+        mode = "gauge+number",
+        value = score * 100,
+        title = {'text': "Sentiment Score"},
+        gauge = {
+            'axis': {'range': [0, 100]},
+            'bar': {'color': "darkblue"},
+            'steps': [
+                {'range': [0, 33], 'color': "lightgray"},
+                {'range': [33, 66], 'color': "gray"},
+                {'range': [66, 100], 'color': "darkgray"}
+            ],
+        }
+    ))
+    return fig
+def main():
+    # Set up logging
+    setup_logging()
+    # Initialize components
+    scraper = ArticleScraper()
+    analyzer = BiasAnalyzer()
+    # Set up the Streamlit interface
+    st.title("Media Bias Analyzer")
+    st.write("Analyze bias and sentiment in news articles")
+    # URL input
+    url = st.text_input("Enter article URL:", "https://www.snopes.com/articles/469232/musk-son-told-trump-shut-up/")
+    if st.button("Analyze"):
+        with st.spinner("Analyzing article..."):
+            # Scrape the article
+            article = scraper.scrape_article(url)
+            if article:
+                # Show article details
+                st.subheader("Article Details")
+                st.write(f"**Headline:** {article['headline']}")
+                with st.expander("Show Article Content"):
+                    st.write(article['content'])
+                # Analyze content
+                result = analyzer.analyze(article['content'])
+                # Display results in columns
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.subheader("Sentiment Analysis")
+                    st.write(f"**Overall Sentiment:** {result.sentiment}")
+                    fig = create_sentiment_gauge(result.bias_score / 100)
+                    st.plotly_chart(fig)
+                with col2:
+                    st.subheader("Bias Analysis")
+                    st.write(f"**Detected Bias:** {result.bias}")
+                    st.write(f"**Confidence Score:** {result.bias_score:.1f}%")
+                # Show flagged phrases
+                if result.flagged_phrases:
+                    st.subheader("Potentially Biased Phrases")
+                    for phrase in result.flagged_phrases:
+                        st.warning(phrase)
+                else:
+                    st.info("No potentially biased phrases detected")
+            else:
+                st.error("Failed to fetch article. Please check the URL and try again.")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+fastapi[all]==0.109.2
+uvicorn==0.27.1
+pydantic==2.6.1
+beautifulsoup4==4.12.3
+requests==2.31.0
+python-dotenv==1.0.1
+textblob==0.17.1
+nltk==3.8.1
+transformers==4.36.2
+torch==2.1.2
+numpy==1.26.3
+pytest==7.4.3
+pytest-asyncio==0.21.1
+httpx==0.25.2

setup.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from setuptools import setup, find_packages
+setup(
+    name="mediaunmasked",
+    version="0.1.0",
+    packages=find_packages(exclude=["tests*"]) + ["app"],  # Include app/ and mediaunmasked/
+    package_dir={"app": "app"},  # Map app directory
+    install_requires=[
+        line.strip()
+        for line in open("requirements.txt").readlines()
+        if not line.startswith("#")
+    ],
+    include_package_data=True,
+    python_requires=">=3.10",
+)

start.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+#!/bin/bash
+# Install dependencies
+pip install -r requirements.txt
+# Install package in development mode
+pip install -e .
+# Start the FastAPI server
+uvicorn app.main:app --host 0.0.0.0 --port 7860 --reload

tests/unit/test_headline_analyzer.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import pytest
+from src.mediaunmasked.analyzers.headline_analyzer import HeadlineAnalyzer
+@pytest.fixture
+def analyzer():
+    return HeadlineAnalyzer()
+def test_matching_headline(analyzer):
+    headline = "New Study Shows Coffee Reduces Heart Disease Risk"
+    content = "Recent research suggests that coffee may have cardiovascular benefits."
+    result = analyzer.analyze(headline, content)
+    assert result["headline_vs_content_score"] > 30
+    assert result["contradiction_score"] < 0.3
+def test_contradictory_headline(analyzer):
+    headline = "Coffee Increases Heart Disease Risk"
+    content = "Studies show coffee decreases cardiovascular disease risk."
+    result = analyzer.analyze(headline, content)
+    assert result["headline_vs_content_score"] < 30
+    assert result["contradiction_score"] > 0.3