Spaces:

wozwize
/

media-unmasked-api

Running

App Files Files Community

wozwize commited on Feb 27

Commit

5c3b4a6

1 Parent(s): 884bb19

updating backend to implement either AI or traditional scoring values and return flagged phrases. updating table calls for supabase to incorporate new column

Browse files

Files changed (9) hide show

app/routers/analyze.py +51 -23
mediaunmasked/analyzers/bias_analyzer.py +181 -40
mediaunmasked/analyzers/evidence_analyzer.py +198 -6
mediaunmasked/analyzers/headline_analyzer.py +224 -115
mediaunmasked/analyzers/scoring.py +26 -7
mediaunmasked/analyzers/sentiment_analyzer.py +272 -6
package-lock.json +20 -0
package.json +3 -0
tests/test_LLM_comparisons.py +199 -0

app/routers/analyze.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, HttpUrl
-from typing import Dict, Any, List
 import logging
 import os
 from supabase import AsyncClient
@@ -20,7 +20,6 @@ logger = logging.getLogger(__name__)
 # Initialize router and dependencies
 router = APIRouter(tags=["analysis"])
 scraper = ArticleScraper()
-scorer = MediaScorer()
 # Get Supabase credentials
 SUPABASE_URL = os.getenv("SUPABASE_URL")
@@ -32,8 +31,12 @@ if not SUPABASE_URL or not SUPABASE_KEY:
 supabase = AsyncClient(SUPABASE_URL, SUPABASE_KEY)
 class ArticleRequest(BaseModel):
     url: HttpUrl
 class MediaScoreDetails(BaseModel):
     headline_analysis: Dict[str, Any]
@@ -54,6 +57,7 @@ class AnalysisResponse(BaseModel):
     bias_score: float
     bias_percentage: float
     media_score: MediaScore
 @router.post("/analyze", response_model=AnalysisResponse)
 async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
@@ -61,7 +65,7 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
     Analyze an article for bias, sentiment, and credibility.
     Args:
-        request: ArticleRequest containing the URL to analyze
     Returns:
         AnalysisResponse with complete analysis results
@@ -70,16 +74,27 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
         HTTPException: If scraping or analysis fails
     """
     try:
-        logger.info(f"Analyzing article: {request.url}")
-        # Check if the article has already been analyzed
-        existing_article = await supabase.table('article_analysis').select('*').eq('url', str(request.url)).execute()
-        if existing_article.data and len(existing_article.data) > 0:
-            logger.info("Article already analyzed. Returning cached data.")
-            # Return the existing analysis result if it exists
-            cached_data = existing_article.data[0]
-            return AnalysisResponse.parse_obj(cached_data)
         # Scrape article
         article = scraper.scrape_article(str(request.url))
@@ -89,6 +104,9 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
                 detail="Failed to scrape article content"
             )
         # Analyze content
         analysis = scorer.calculate_media_score(
             article["headline"],
@@ -108,6 +126,7 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
             "bias": str(analysis['details']['bias_analysis']['bias']),
             "bias_score": float(analysis['details']['bias_analysis']['bias_score']),
             "bias_percentage": float(analysis['details']['bias_analysis']['bias_percentage']),
             "media_score": {
                 "media_unmasked_score": float(analysis['media_unmasked_score']),
                 "rating": str(analysis['rating']),
@@ -135,17 +154,26 @@ async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
             }
         }
-        # Save the new analysis to Supabase
-        await supabase.table('article_analysis').upsert({
-            'url': str(request.url),
-            'headline': response_dict['headline'],
-            'content': response_dict['content'],
-            'sentiment': response_dict['sentiment'],
-            'bias': response_dict['bias'],
-            'bias_score': response_dict['bias_score'],
-            'bias_percentage': response_dict['bias_percentage'],
-            'media_score': response_dict['media_score']
-        }).execute()
         # Return the response
         return AnalysisResponse.parse_obj(response_dict)

 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, HttpUrl
+from typing import Dict, Any, List, Literal
 import logging
 import os
 from supabase import AsyncClient
 # Initialize router and dependencies
 router = APIRouter(tags=["analysis"])
 scraper = ArticleScraper()
 # Get Supabase credentials
 SUPABASE_URL = os.getenv("SUPABASE_URL")
 supabase = AsyncClient(SUPABASE_URL, SUPABASE_KEY)
+# Define analysis mode type
+AnalysisMode = Literal['ai', 'traditional']
 class ArticleRequest(BaseModel):
     url: HttpUrl
+    use_ai: bool = True  # Default to AI-powered analysis
 class MediaScoreDetails(BaseModel):
     headline_analysis: Dict[str, Any]
     bias_score: float
     bias_percentage: float
     media_score: MediaScore
+    analysis_mode: AnalysisMode
 @router.post("/analyze", response_model=AnalysisResponse)
 async def analyze_article(request: ArticleRequest) -> AnalysisResponse:
     Analyze an article for bias, sentiment, and credibility.
     Args:
+        request: ArticleRequest containing the URL to analyze and analysis preferences
     Returns:
         AnalysisResponse with complete analysis results
         HTTPException: If scraping or analysis fails
     """
     try:
+        # Determine analysis mode
+        analysis_mode: AnalysisMode = 'ai' if request.use_ai else 'traditional'
+        logger.info(f"Analyzing article: {request.url} (Analysis Mode: {analysis_mode})")
+        # Check cache with both URL and analysis mode
+        try:
+            cached_result = await supabase.table('article_analysis') \
+                .select('*') \
+                .eq('url', str(request.url)) \
+                .eq('analysis_mode', analysis_mode) \
+                .limit(1) \
+                .single() \
+                .execute()
+            if cached_result and cached_result.data:
+                logger.info(f"Found cached analysis for URL with {analysis_mode} mode")
+                return AnalysisResponse.parse_obj(cached_result.data)
+        except Exception as cache_error:
+            logger.warning(f"Cache lookup failed: {str(cache_error)}")
+            # Continue with analysis if cache lookup fails
         # Scrape article
         article = scraper.scrape_article(str(request.url))
                 detail="Failed to scrape article content"
             )
+        # Initialize scorer with specified analysis preference
+        scorer = MediaScorer(use_ai=request.use_ai)
         # Analyze content
         analysis = scorer.calculate_media_score(
             article["headline"],
             "bias": str(analysis['details']['bias_analysis']['bias']),
             "bias_score": float(analysis['details']['bias_analysis']['bias_score']),
             "bias_percentage": float(analysis['details']['bias_analysis']['bias_percentage']),
+            "analysis_mode": analysis_mode,
             "media_score": {
                 "media_unmasked_score": float(analysis['media_unmasked_score']),
                 "rating": str(analysis['rating']),
             }
         }
+        # Save to Supabase with analysis mode
+        try:
+            await supabase.table('article_analysis').upsert({
+                'url': str(request.url),
+                'headline': response_dict['headline'],
+                'content': response_dict['content'],
+                'sentiment': response_dict['sentiment'],
+                'bias': response_dict['bias'],
+                'bias_score': response_dict['bias_score'],
+                'bias_percentage': response_dict['bias_percentage'],
+                'media_score': response_dict['media_score'],
+                'analysis_mode': analysis_mode,  # Store the analysis mode
+                'created_at': 'now()'  # Use server timestamp
+            }, on_conflict='url,analysis_mode').execute()  # Specify composite unique constraint
+            logger.info(f"Saved analysis to database with mode: {analysis_mode}")
+        except Exception as db_error:
+            logger.error(f"Failed to save to database: {str(db_error)}")
+            # Continue since we can still return the analysis even if saving fails
         # Return the response
         return AnalysisResponse.parse_obj(response_dict)

mediaunmasked/analyzers/bias_analyzer.py CHANGED Viewed

@@ -1,14 +1,72 @@
 import logging
 import os
 from typing import Dict, Any, List
 logger = logging.getLogger(__name__)
 class BiasAnalyzer:
-    def __init__(self):
         self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
         self.left_keywords = self._load_keywords('left_bias_words.txt')
         self.right_keywords = self._load_keywords('right_bias_words.txt')
     def _load_keywords(self, filename: str) -> List[str]:
         """Load keywords from file."""
@@ -20,63 +78,146 @@ class BiasAnalyzer:
             logger.error(f"Error loading {filename}: {str(e)}")
             return []
-    def analyze(self, text: str) -> Dict[str, Any]:
-        """Detect bias using keyword analysis."""
         try:
-            text_lower = text.lower()
-            flagged_phrases = []
-            # Count matches and collect flagged phrases
-            left_count = sum(1 for word in self.left_keywords if word in text_lower)
-            flagged_phrases.extend([word for word in self.left_keywords if word in text_lower])
-            right_count = sum(1 for word in self.right_keywords if word in text_lower)
-            flagged_phrases.extend([word for word in self.right_keywords if word in text_lower])
-            total_words = left_count + right_count
-            if total_words == 0:
-                return {
-                    "bias": "Neutral",
-                    "bias_score": 0.0,  # True neutral
-                    "bias_percentage": 0,  # Neutral percentage
-                    "flagged_phrases": []
-                }
-            # New bias score formula (-1.0 left, 0.0 neutral, 1.0 right)
-            bias_score = (right_count - left_count) / total_words
-            # Convert bias_score to percentage (-100% to +100%)
-            bias_percentage = bias_score * 100
-            logger.info(f"Bias score: {bias_score:.2f}, Bias percentage: {bias_percentage:.1f}%")
             # Determine bias label
-            if bias_score < -0.8:
                 bias = "Strongly Left"
-            elif bias_score < -0.5:
                 bias = "Moderately Left"
-            elif bias_score < -0.2:
                 bias = "Leaning Left"
-            elif bias_score > 0.8:
                 bias = "Strongly Right"
-            elif bias_score > 0.5:
                 bias = "Moderately Right"
-            elif bias_score > 0.2:
                 bias = "Leaning Right"
             else:
                 bias = "Neutral"
             return {
                 "bias": bias,
-                "bias_score": round(bias_score, 2),  # Keep 2 decimal places
-                "bias_percentage": abs(round(bias_percentage, 1)),
-                "flagged_phrases": flagged_phrases
             }
         except Exception as e:
-            logger.error(f"Error in bias analysis: {str(e)}")
-            return {
-                "bias": "Error",
-                "bias_score": 0.0,
-                "bias_percentage": 0,
-                "flagged_phrases": []
-            }

 import logging
 import os
 from typing import Dict, Any, List
+from transformers import pipeline
+import numpy as np
 logger = logging.getLogger(__name__)
 class BiasAnalyzer:
+    def __init__(self, use_ai: bool = True):
+        """
+        Initialize bias analyzer with both LLM and traditional approaches.
+        Args:
+            use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
+        """
+        self.use_ai = use_ai
+        self.llm_available = False
+        # Load traditional keywords
         self.resources_dir = os.path.join(os.path.dirname(__file__), '..', 'resources')
         self.left_keywords = self._load_keywords('left_bias_words.txt')
         self.right_keywords = self._load_keywords('right_bias_words.txt')
+        if use_ai:
+            try:
+                # Initialize LLM pipeline for zero-shot classification
+                self.classifier = pipeline(
+                    "zero-shot-classification",
+                    model="facebook/bart-large-mnli",
+                    device=-1  # Use CPU, change to specific GPU index if available
+                )
+                self.llm_available = True
+                logger.info("LLM pipeline initialized successfully for bias analysis")
+            except Exception as e:
+                logger.warning(f"Failed to initialize LLM pipeline: {str(e)}")
+                self.llm_available = False
+        else:
+            logger.info("Initializing bias analyzer in traditional mode")
+    def analyze(self, text: str) -> Dict[str, Any]:
+        """
+        Analyze bias using LLM with fallback to traditional method.
+        Args:
+            text: The text to analyze
+        Returns:
+            Dict containing bias analysis results
+        """
+        try:
+            # Try LLM analysis if enabled and available
+            if self.use_ai and self.llm_available:
+                llm_result = self._analyze_with_llm(text)
+                if llm_result:
+                    return llm_result
+            # Use traditional analysis
+            logger.info("Using traditional bias analysis")
+            return self._analyze_traditional(text)
+        except Exception as e:
+            logger.error(f"Error in bias analysis: {str(e)}")
+            return {
+                "bias": "Error",
+                "bias_score": 0.0,
+                "bias_percentage": 0,
+                "flagged_phrases": []
+            }
     def _load_keywords(self, filename: str) -> List[str]:
         """Load keywords from file."""
             logger.error(f"Error loading {filename}: {str(e)}")
             return []
+    def _analyze_traditional(self, text: str) -> Dict[str, Any]:
+        """Traditional keyword-based bias analysis."""
+        text_lower = text.lower()
+        # Count matches and collect flagged phrases
+        left_matches = [word for word in self.left_keywords if word in text_lower]
+        right_matches = [word for word in self.right_keywords if word in text_lower]
+        left_count = len(left_matches)
+        right_count = len(right_matches)
+        total_count = left_count + right_count
+        if total_count == 0:
+            return {
+                "bias": "Neutral",
+                "bias_score": 0.0,
+                "bias_percentage": 0,
+                "flagged_phrases": []
+            }
+        # Calculate bias score (-1 to 1)
+        bias_score = (right_count - left_count) / total_count
+        # Calculate bias percentage
+        bias_percentage = abs(bias_score * 100)
+        # Determine bias label
+        if bias_score < -0.6:
+            bias = "Strongly Left"
+        elif bias_score < -0.3:
+            bias = "Moderately Left"
+        elif bias_score < -0.1:
+            bias = "Leaning Left"
+        elif bias_score > 0.6:
+            bias = "Strongly Right"
+        elif bias_score > 0.3:
+            bias = "Moderately Right"
+        elif bias_score > 0.1:
+            bias = "Leaning Right"
+        else:
+            bias = "Neutral"
+        return {
+            "bias": bias,
+            "bias_score": round(bias_score, 2),
+            "bias_percentage": round(bias_percentage, 1),
+            "flagged_phrases": list(set(left_matches + right_matches))[:5]  # Limit to top 5 unique phrases
+        }
+    def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
+        """Analyze bias using LLM zero-shot classification."""
         try:
+            # Define bias categories to check against
+            bias_categories = [
+                "left-wing bias",
+                "right-wing bias",
+                "neutral/balanced perspective"
+            ]
+            # Split text into manageable chunks (2000 chars each)
+            chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
+            # Analyze each chunk
+            chunk_scores = []
+            flagged_phrases = []
+            for chunk in chunks:
+                # Perform zero-shot classification
+                result = self.classifier(
+                    chunk,
+                    bias_categories,
+                    multi_label=True
+                )
+                chunk_scores.append({
+                    label: score
+                    for label, score in zip(result['labels'], result['scores'])
+                })
+                # Identify strongly biased phrases
+                sentences = chunk.split('.')
+                for sentence in sentences:
+                    if len(sentence.strip()) > 10:  # Ignore very short sentences
+                        sentence_result = self.classifier(
+                            sentence.strip(),
+                            bias_categories,
+                            multi_label=False
+                        )
+                        max_score = max(sentence_result['scores'])
+                        if max_score > 0.8 and sentence_result['labels'][0] != "neutral/balanced perspective":
+                            flagged_phrases.append(sentence.strip())
+            # Aggregate scores across chunks
+            aggregated_scores = {
+                category: np.mean([
+                    scores[category]
+                    for scores in chunk_scores
+                ])
+                for category in bias_categories
+            }
+            # Calculate bias metrics
+            left_score = aggregated_scores["left-wing bias"]
+            right_score = aggregated_scores["right-wing bias"]
+            neutral_score = aggregated_scores["neutral/balanced perspective"]
+            # Calculate bias score (-1 to 1, where negative is left and positive is right)
+            bias_score = (right_score - left_score) / max(right_score + left_score, 0.0001)
             # Determine bias label
+            if bias_score < -0.6:
                 bias = "Strongly Left"
+            elif bias_score < -0.3:
                 bias = "Moderately Left"
+            elif bias_score < -0.1:
                 bias = "Leaning Left"
+            elif bias_score > 0.6:
                 bias = "Strongly Right"
+            elif bias_score > 0.3:
                 bias = "Moderately Right"
+            elif bias_score > 0.1:
                 bias = "Leaning Right"
             else:
                 bias = "Neutral"
+            # Calculate bias percentage (0-100)
+            bias_percentage = min(100, abs(bias_score * 100))
             return {
                 "bias": bias,
+                "bias_score": round(bias_score, 2),
+                "bias_percentage": round(bias_percentage, 1),
+                "flagged_phrases": list(set(flagged_phrases))[:5],  # Limit to top 5 unique phrases
+                "detailed_scores": {
+                    "left_bias": round(left_score * 100, 1),
+                    "right_bias": round(right_score * 100, 1),
+                    "neutral": round(neutral_score * 100, 1)
+                }
             }
         except Exception as e:
+            logger.error(f"LLM analysis failed: {str(e)}")
+            return None

mediaunmasked/analyzers/evidence_analyzer.py CHANGED Viewed

@@ -1,10 +1,40 @@
 import logging
 from typing import Dict, Any, List
 logger = logging.getLogger(__name__)
 class EvidenceAnalyzer:
-    def __init__(self):
         self.citation_markers = [
             "according to",
             "said",
@@ -29,25 +59,187 @@ class EvidenceAnalyzer:
             "allegedly"
         ]
-    def analyze(self, text: str) -> Dict[str, Any]:
-        """Check for evidence-based reporting."""
         try:
             text_lower = text.lower()
-            citation_count = sum(1 for marker in self.citation_markers if marker in text_lower)
             vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
             base_score = min(citation_count * 20, 100)
             penalty = vague_count * 10
             evidence_score = max(0, base_score - penalty)
             return {
-                "evidence_based_score": evidence_score
             }
         except Exception as e:
             logger.error(f"Error in evidence analysis: {str(e)}")
             return {
-                "evidence_based_score": 0
             }

 import logging
 from typing import Dict, Any, List
+from transformers import pipeline
+import numpy as np
+import nltk
+from nltk.tokenize import sent_tokenize
 logger = logging.getLogger(__name__)
 class EvidenceAnalyzer:
+    def __init__(self, use_ai: bool = True):
+        """
+        Initialize evidence analyzer with LLM and traditional approaches.
+        Args:
+            use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
+        """
+        self.use_ai = use_ai
+        self.llm_available = False
+        if use_ai:
+            try:
+                # Zero-shot classifier for evidence analysis
+                self.classifier = pipeline(
+                    "zero-shot-classification",
+                    model="facebook/bart-large-mnli",
+                    device=-1
+                )
+                self.llm_available = True
+                logger.info("LLM pipeline initialized successfully for evidence analysis")
+            except Exception as e:
+                logger.warning(f"Failed to initialize LLM pipeline: {str(e)}")
+                self.llm_available = False
+        else:
+            logger.info("Initializing evidence analyzer in traditional mode")
+        # Traditional markers for fallback
         self.citation_markers = [
             "according to",
             "said",
             "allegedly"
         ]
+    def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
+        """Analyze evidence using LLM."""
+        try:
+            # Clean the text of formatting markers
+            cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
+            cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
+                                   if not line.startswith('[') and not line.startswith('More on'))
+            # Download NLTK data if needed
+            try:
+                nltk.data.find('tokenizers/punkt')
+            except LookupError:
+                nltk.download('punkt')
+            # Split text into chunks
+            chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
+            # Categories for evidence classification
+            evidence_categories = [
+                "factual statement with source",
+                "verifiable claim",
+                "expert opinion",
+                "data-backed claim",
+                "unsubstantiated claim",
+                "opinion statement"
+            ]
+            chunk_scores = []
+            flagged_phrases = []
+            for chunk in chunks:
+                # Analyze each sentence in the chunk
+                sentences = sent_tokenize(chunk)
+                for sentence in sentences:
+                    if len(sentence.strip()) > 10:
+                        # Classify the type of evidence
+                        result = self.classifier(
+                            sentence.strip(),
+                            evidence_categories,
+                            multi_label=True
+                        )
+                        # Calculate evidence score for the sentence
+                        evidence_scores = {
+                            label: score
+                            for label, score in zip(result['labels'], result['scores'])
+                        }
+                        # Strong evidence indicators
+                        strong_evidence = sum([
+                            evidence_scores.get("factual statement with source", 0),
+                            evidence_scores.get("data-backed claim", 0),
+                            evidence_scores.get("expert opinion", 0)
+                        ]) / 3  # Average the strong evidence scores
+                        # Weak or no evidence indicators
+                        weak_evidence = sum([
+                            evidence_scores.get("unsubstantiated claim", 0),
+                            evidence_scores.get("opinion statement", 0)
+                        ]) / 2  # Average the weak evidence scores
+                        # Store scores for overall calculation
+                        chunk_scores.append({
+                            'strong_evidence': strong_evidence,
+                            'weak_evidence': weak_evidence
+                        })
+                        # Flag high-quality evidence
+                        if strong_evidence > 0.7 and not any(
+                            marker in sentence.lower()
+                            for marker in ['more on this story', 'click here', 'read more']
+                        ):
+                            flagged_phrases.append({
+                                'text': sentence.strip(),
+                                'type': 'strong_evidence',
+                                'score': strong_evidence
+                            })
+            # Calculate overall evidence score
+            if chunk_scores:
+                avg_strong = np.mean([s['strong_evidence'] for s in chunk_scores])
+                avg_weak = np.mean([s['weak_evidence'] for s in chunk_scores])
+                # Evidence score formula:
+                # - Reward strong evidence (70% weight)
+                # - Penalize weak/unsubstantiated claims (30% weight)
+                # - Ensure score is between 0 and 100
+                evidence_score = min(100, (
+                    (avg_strong * 0.7) +
+                    ((1 - avg_weak) * 0.3)
+                ) * 100)
+            else:
+                evidence_score = 0
+            # Sort and select top evidence phrases
+            sorted_phrases = sorted(
+                flagged_phrases,
+                key=lambda x: x['score'],
+                reverse=True
+            )
+            # Filter out formatting text and duplicates
+            unique_phrases = []
+            seen = set()
+            for phrase in sorted_phrases:
+                clean_text = phrase['text'].strip()
+                if clean_text not in seen and not any(
+                    marker in clean_text.lower()
+                    for marker in ['more on this story', 'click here', 'read more']
+                ):
+                    unique_phrases.append(clean_text)
+                    seen.add(clean_text)
+                if len(unique_phrases) >= 5:
+                    break
+            return {
+                "evidence_based_score": round(evidence_score, 1),
+                "flagged_phrases": unique_phrases
+            }
+        except Exception as e:
+            logger.error(f"LLM analysis failed: {str(e)}")
+            return None
+    def _analyze_traditional(self, text: str) -> Dict[str, Any]:
+        """Traditional evidence analysis as fallback."""
         try:
             text_lower = text.lower()
+            # Find citations and evidence
+            evidence_phrases = []
+            for marker in self.citation_markers:
+                index = text_lower.find(marker)
+                while index != -1:
+                    # Get the sentence containing the marker
+                    start = max(0, text_lower.rfind('.', 0, index) + 1)
+                    end = text_lower.find('.', index)
+                    if end == -1:
+                        end = len(text_lower)
+                    evidence_phrases.append(text[start:end].strip())
+                    index = text_lower.find(marker, end)
+            # Count vague references
             vague_count = sum(1 for marker in self.vague_markers if marker in text_lower)
+            # Calculate score
+            citation_count = len(evidence_phrases)
             base_score = min(citation_count * 20, 100)
             penalty = vague_count * 10
             evidence_score = max(0, base_score - penalty)
             return {
+                "evidence_based_score": evidence_score,
+                "flagged_phrases": list(set(evidence_phrases))[:5]  # Limit to top 5 unique phrases
             }
+        except Exception as e:
+            logger.error(f"Traditional analysis failed: {str(e)}")
+            return {
+                "evidence_based_score": 0,
+                "flagged_phrases": []
+            }
+    def analyze(self, text: str) -> Dict[str, Any]:
+        """Analyze evidence using LLM with fallback to traditional method."""
+        try:
+            # Try LLM analysis if enabled and available
+            if self.use_ai and self.llm_available:
+                llm_result = self._analyze_with_llm(text)
+                if llm_result:
+                    return llm_result
+            # Use traditional analysis
+            logger.info("Using traditional evidence analysis")
+            return self._analyze_traditional(text)
         except Exception as e:
             logger.error(f"Error in evidence analysis: {str(e)}")
             return {
+                "evidence_based_score": 0,
+                "flagged_phrases": []
             }

mediaunmasked/analyzers/headline_analyzer.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import logging
 from typing import Dict, Any, List
-from transformers import pipeline
-from transformers import AutoTokenizer
 import numpy as np
 import nltk
 from nltk.tokenize import sent_tokenize
@@ -9,12 +8,38 @@ from nltk.tokenize import sent_tokenize
 logger = logging.getLogger(__name__)
 class HeadlineAnalyzer:
-    def __init__(self):
-        """Initialize the NLI model for contradiction detection."""
-        self.nli_pipeline = pipeline("text-classification", model="roberta-large-mnli")
-        self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
-        self.max_length = 512
     def _split_content(self, headline: str, content: str) -> List[str]:
         """Split content into sections that fit within token limit."""
         content_words = content.split()
@@ -23,7 +48,7 @@ class HeadlineAnalyzer:
         # Account for headline and [SEP] token in the max length
         headline_tokens = len(self.tokenizer.encode(headline))
-        sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2  # -2 because encode adds special tokens
         max_content_tokens = self.max_length - headline_tokens - sep_tokens
         # Process words into sections
@@ -33,7 +58,6 @@ class HeadlineAnalyzer:
             # Check if current section is approaching token limit
             current_text = " ".join(current_section)
             if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
-                # Remove last word (it might make us go over limit)
                 current_section.pop()
                 sections.append(" ".join(current_section))
@@ -42,141 +66,226 @@ class HeadlineAnalyzer:
                 current_section = current_section[overlap_start:]
                 current_section.append(word)
-        # Add any remaining content as the last section
         if current_section:
             sections.append(" ".join(current_section))
-        logger.info(f"""Content Splitting:
-            - Original content length: {len(content_words)} words
-            - Split into {len(sections)} sections
-            - Headline uses {headline_tokens} tokens
-            - Available tokens per section: {max_content_tokens}
-        """)
         return sections
-    def _analyze_section(self, headline: str, section: str) -> Dict[str, float]:
-        """Analyze a single section of content."""
-        # Use a more robust method for sentence splitting
-        nltk.download('punkt')
-        sentences = sent_tokenize(section)
-        flagged_phrases = []
-        for sentence in sentences:
-            input_text = f"{headline} [SEP] {sentence}"
-            result = self.nli_pipeline(input_text, top_k=None)
-            scores = {item['label']: item['score'] for item in result}
-            # Log the model output for debugging
-            logger.info(f"Sentence: {sentence}")
-            logger.info(f"Scores: {scores}")
-            # Set the threshold for contradiction to anything higher than 0.1
-            if scores.get('CONTRADICTION', 0) > 0.1:  # Threshold set to > 0.1
-                flagged_phrases.append(sentence)
-        # Adjust the headline_vs_content_score based on contradictions
-        contradiction_penalty = len(flagged_phrases) * 0.1  # Example penalty per contradiction
-        adjusted_score = max(0, scores.get('ENTAILMENT', 0) - contradiction_penalty)
-        logger.info("\nSection Analysis:")
-        logger.info("-"*30)
-        logger.info(f"Section preview: {section[:100]}...")
-        for label, score in scores.items():
-            logger.info(f"Label: {label:<12} Score: {score:.3f}")
-        return {"scores": scores, "flagged_phrases": flagged_phrases, "adjusted_score": adjusted_score}
     def analyze(self, headline: str, content: str) -> Dict[str, Any]:
-        """Analyze how well the headline matches the content using an AI model."""
         try:
             logger.info("\n" + "="*50)
             logger.info("HEADLINE ANALYSIS STARTED")
             logger.info("="*50)
-            # Handle empty inputs
             if not headline.strip() or not content.strip():
                 logger.warning("Empty headline or content provided")
                 return {
                     "headline_vs_content_score": 0,
-                    "entailment_score": 0,
-                    "contradiction_score": 0,
-                    "contradictory_phrases": []
                 }
-            # Split content if too long
-            content_tokens = len(self.tokenizer.encode(content))
-            if content_tokens > self.max_length:
-                logger.warning(f"""
-                    Content Length Warning:
-                    - Total tokens: {content_tokens}
-                    - Max allowed: {self.max_length}
-                    - Splitting into sections...
-                """)
                 sections = self._split_content(headline, content)
                 # Analyze each section
-                section_scores = []
-                for i, section in enumerate(sections, 1):
-                    logger.info(f"\nAnalyzing section {i}/{len(sections)}")
-                    scores = self._analyze_section(headline, section)
-                    section_scores.append(scores)
-                # Aggregate scores across sections
-                # Use max contradiction (if any section strongly contradicts, that's important)
-                # Use mean entailment (overall support across sections)
-                # Use mean neutral (general neutral tone across sections)
-                entailment_score = np.mean([s.get('ENTAILMENT', 0) for s in section_scores])
-                contradiction_score = np.max([s.get('CONTRADICTION', 0) for s in section_scores])
-                neutral_score = np.mean([s.get('NEUTRAL', 0) for s in section_scores])
-                logger.info("\nAggregated Scores Across Sections:")
-                logger.info("-"*30)
-                logger.info(f"Mean Entailment: {entailment_score:.3f}")
-                logger.info(f"Max Contradiction: {contradiction_score:.3f}")
-                logger.info(f"Mean Neutral: {neutral_score:.3f}")
             else:
-                # Single section analysis
-                scores = self._analyze_section(headline, content)
-                entailment_score = scores.get('ENTAILMENT', 0)
-                contradiction_score = scores.get('CONTRADICTION', 0)
-                neutral_score = scores.get('NEUTRAL', 0)
-            # Compute final consistency score
-            final_score = (
-                (entailment_score * 0.6) +      # Base score from entailment
-                (neutral_score * 0.3) +         # Neutral is acceptable
-                ((1 - contradiction_score) * 0.1)  # Small penalty for contradiction
-            ) * 100
-            # Log final results
-            logger.info("\nFinal Analysis Results:")
-            logger.info("-"*30)
-            logger.info(f"Headline: {headline}")
-            logger.info(f"Content Length: {content_tokens} tokens")
-            logger.info("\nFinal Scores:")
-            logger.info(f"{'Entailment:':<15} {entailment_score:.3f}")
-            logger.info(f"{'Neutral:':<15} {neutral_score:.3f}")
-            logger.info(f"{'Contradiction:':<15} {contradiction_score:.3f}")
-            logger.info(f"\nFinal Score: {final_score:.1f}%")
-            logger.info("="*50 + "\n")
-            return {
-                "headline_vs_content_score": round(final_score, 1),
-                "entailment_score": round(entailment_score, 2),
-                "contradiction_score": round(contradiction_score, 2),
-                "contradictory_phrases": scores.get('flagged_phrases', [])
-            }
         except Exception as e:
-            logger.error("\nHEADLINE ANALYSIS ERROR")
-            logger.error("-"*30)
-            logger.error(f"Error Type: {type(e).__name__}")
-            logger.error(f"Error Message: {str(e)}")
-            logger.error("Stack Trace:", exc_info=True)
-            logger.error("="*50 + "\n")
             return {
                 "headline_vs_content_score": 0,
-                "entailment_score": 0,
-                "contradiction_score": 0,
-                "contradictory_phrases": []
             }

 import logging
 from typing import Dict, Any, List
+from transformers import pipeline, AutoTokenizer
 import numpy as np
 import nltk
 from nltk.tokenize import sent_tokenize
 logger = logging.getLogger(__name__)
 class HeadlineAnalyzer:
+    def __init__(self, use_ai: bool = True):
+        """
+        Initialize the analyzers for headline analysis.
+        Args:
+            use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
+        """
+        self.use_ai = use_ai
+        self.llm_available = False
+        if use_ai:
+            try:
+                # NLI model for contradiction/entailment
+                self.nli_pipeline = pipeline("text-classification", model="roberta-large-mnli")
+                # Zero-shot classifier for clickbait and sensationalism
+                self.zero_shot = pipeline(
+                    "zero-shot-classification",
+                    model="facebook/bart-large-mnli",
+                    device=-1
+                )
+                self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
+                self.max_length = 512
+                self.llm_available = True
+                logger.info("LLM pipelines initialized successfully for headline analysis")
+            except Exception as e:
+                logger.warning(f"Failed to initialize LLM pipelines: {str(e)}")
+                self.llm_available = False
+        else:
+            logger.info("Initializing headline analyzer in traditional mode")
     def _split_content(self, headline: str, content: str) -> List[str]:
         """Split content into sections that fit within token limit."""
         content_words = content.split()
         # Account for headline and [SEP] token in the max length
         headline_tokens = len(self.tokenizer.encode(headline))
+        sep_tokens = len(self.tokenizer.encode("[SEP]")) - 2
         max_content_tokens = self.max_length - headline_tokens - sep_tokens
         # Process words into sections
             # Check if current section is approaching token limit
             current_text = " ".join(current_section)
             if len(self.tokenizer.encode(current_text)) >= max_content_tokens:
                 current_section.pop()
                 sections.append(" ".join(current_section))
                 current_section = current_section[overlap_start:]
                 current_section.append(word)
+        # Add any remaining content
         if current_section:
             sections.append(" ".join(current_section))
         return sections
+    def _analyze_section(self, headline: str, section: str) -> Dict[str, Any]:
+        """Analyze a single section for headline accuracy and sensationalism."""
+        try:
+            # Download NLTK data if needed
+            try:
+                nltk.data.find('tokenizers/punkt')
+            except LookupError:
+                nltk.download('punkt')
+            sentences = sent_tokenize(section)
+            # Analyze headline against content for contradiction/entailment
+            nli_scores = []
+            flagged_phrases = []
+            # Categories for sensationalism check
+            sensationalism_categories = [
+                "clickbait",
+                "sensationalized",
+                "misleading",
+                "factual reporting",
+                "accurate headline"
+            ]
+            # Check headline for sensationalism
+            sensationalism_result = self.zero_shot(
+                headline,
+                sensationalism_categories,
+                multi_label=True
+            )
+            sensationalism_scores = {
+                label: score
+                for label, score in zip(sensationalism_result['labels'], sensationalism_result['scores'])
+            }
+            # Analyze each sentence for contradiction/support
+            for sentence in sentences:
+                if len(sentence.strip()) > 10:
+                    # Check for contradiction/entailment
+                    input_text = f"{headline} [SEP] {sentence}"
+                    nli_result = self.nli_pipeline(input_text, top_k=None)
+                    scores = {item['label']: item['score'] for item in nli_result}
+                    nli_scores.append(scores)
+                    # Flag contradictory or highly sensationalized content
+                    if scores.get('CONTRADICTION', 0) > 0.4:
+                        flagged_phrases.append({
+                            'text': sentence.strip(),
+                            'type': 'contradiction',
+                            'score': scores['CONTRADICTION']
+                        })
+            # Calculate aggregate scores
+            avg_scores = {
+                label: np.mean([score[label] for score in nli_scores])
+                for label in ['ENTAILMENT', 'CONTRADICTION', 'NEUTRAL']
+            }
+            # Calculate headline accuracy score
+            accuracy_components = {
+                'entailment': avg_scores['ENTAILMENT'] * 0.4,
+                'non_contradiction': (1 - avg_scores['CONTRADICTION']) * 0.3,
+                'non_sensational': (
+                    sensationalism_scores.get('factual reporting', 0) +
+                    sensationalism_scores.get('accurate headline', 0)
+                ) * 0.15,
+                'non_clickbait': (
+                    1 - sensationalism_scores.get('clickbait', 0) -
+                    sensationalism_scores.get('sensationalized', 0)
+                ) * 0.15
+            }
+            accuracy_score = sum(accuracy_components.values()) * 100
+            # Sort and limit flagged phrases
+            sorted_phrases = sorted(
+                flagged_phrases,
+                key=lambda x: x['score'],
+                reverse=True
+            )
+            top_phrases = [phrase['text'] for phrase in sorted_phrases[:5]]
+            return {
+                "accuracy_score": accuracy_score,
+                "flagged_phrases": top_phrases,
+                "detailed_scores": {
+                    "nli": avg_scores,
+                    "sensationalism": sensationalism_scores
+                }
+            }
+        except Exception as e:
+            logger.error(f"Section analysis failed: {str(e)}")
+            return {
+                "accuracy_score": 0,
+                "flagged_phrases": [],
+                "detailed_scores": {}
+            }
+    def _analyze_traditional(self, headline: str, content: str) -> Dict[str, Any]:
+        """Traditional headline analysis method."""
+        try:
+            # Download NLTK data if needed
+            try:
+                nltk.data.find('tokenizers/punkt')
+            except LookupError:
+                nltk.download('punkt')
+            # Basic metrics
+            headline_words = set(headline.lower().split())
+            content_words = set(content.lower().split())
+            # Calculate word overlap
+            overlap_words = headline_words.intersection(content_words)
+            overlap_score = len(overlap_words) / len(headline_words) if headline_words else 0
+            # Check for clickbait patterns
+            clickbait_patterns = [
+                "you won't believe",
+                "shocking",
+                "mind blowing",
+                "amazing",
+                "incredible",
+                "unbelievable",
+                "must see",
+                "click here",
+                "find out",
+                "what happens next"
+            ]
+            clickbait_count = sum(1 for pattern in clickbait_patterns if pattern in headline.lower())
+            clickbait_penalty = clickbait_count * 10  # 10% penalty per clickbait phrase
+            # Calculate final score (0-100)
+            base_score = overlap_score * 100
+            final_score = max(0, min(100, base_score - clickbait_penalty))
+            # Find potentially misleading phrases
+            flagged_phrases = []
+            sentences = sent_tokenize(content)
+            for sentence in sentences:
+                # Flag sentences that directly contradict headline words
+                sentence_words = set(sentence.lower().split())
+                if len(headline_words.intersection(sentence_words)) > 2:
+                    flagged_phrases.append(sentence.strip())
+                # Flag sentences with clickbait patterns
+                if any(pattern in sentence.lower() for pattern in clickbait_patterns):
+                    flagged_phrases.append(sentence.strip())
+            return {
+                "headline_vs_content_score": round(final_score, 1),
+                "flagged_phrases": list(set(flagged_phrases))[:5]  # Limit to top 5 unique phrases
+            }
+        except Exception as e:
+            logger.error(f"Traditional analysis failed: {str(e)}")
+            return {
+                "headline_vs_content_score": 0,
+                "flagged_phrases": []
+            }
     def analyze(self, headline: str, content: str) -> Dict[str, Any]:
+        """Analyze how well the headline matches the content."""
         try:
             logger.info("\n" + "="*50)
             logger.info("HEADLINE ANALYSIS STARTED")
             logger.info("="*50)
             if not headline.strip() or not content.strip():
                 logger.warning("Empty headline or content provided")
                 return {
                     "headline_vs_content_score": 0,
+                    "flagged_phrases": []
                 }
+            # Use LLM analysis if available and enabled
+            if self.use_ai and self.llm_available:
+                logger.info("Using LLM analysis for headline")
+                # Split content if needed
                 sections = self._split_content(headline, content)
+                section_results = []
                 # Analyze each section
+                for section in sections:
+                    result = self._analyze_section(headline, section)
+                    section_results.append(result)
+                # Aggregate results across sections
+                accuracy_scores = [r['accuracy_score'] for r in section_results]
+                final_score = np.mean(accuracy_scores)
+                # Combine flagged phrases from all sections
+                all_phrases = []
+                for result in section_results:
+                    all_phrases.extend(result['flagged_phrases'])
+                # Remove duplicates and limit to top 5
+                unique_phrases = list(dict.fromkeys(all_phrases))[:5]
+                return {
+                    "headline_vs_content_score": round(final_score, 1),
+                    "flagged_phrases": unique_phrases
+                }
             else:
+                # Use traditional analysis
+                logger.info("Using traditional headline analysis")
+                return self._analyze_traditional(headline, content)
         except Exception as e:
+            logger.error(f"Headline analysis failed: {str(e)}")
             return {
                 "headline_vs_content_score": 0,
+                "flagged_phrases": []
             }

mediaunmasked/analyzers/scoring.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, Any
 import logging
 from .headline_analyzer import HeadlineAnalyzer
@@ -8,17 +8,34 @@ from .evidence_analyzer import EvidenceAnalyzer
 logger = logging.getLogger(__name__)
 class MediaScorer:
-    def __init__(self):
-        """Initialize the MediaScorer with required analyzers."""
-        self.headline_analyzer = HeadlineAnalyzer()
-        self.sentiment_analyzer = SentimentAnalyzer()
-        self.bias_analyzer = BiasAnalyzer()
-        self.evidence_analyzer = EvidenceAnalyzer()
     def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
         """Calculate final media credibility score."""
         try:
             headline_analysis = self.headline_analyzer.analyze(headline, content)
             sentiment_analysis = self.sentiment_analyzer.analyze(content)
             bias_analysis = self.bias_analyzer.analyze(content)
@@ -74,6 +91,7 @@ class MediaScorer:
             result = {
                 "media_unmasked_score": round(final_score, 1),
                 "rating": rating,
                 "details": {
                     "headline_analysis": {
                         "headline_vs_content_score": headline_analysis["headline_vs_content_score"],
@@ -107,6 +125,7 @@ class MediaScorer:
             return {
                 "media_unmasked_score": 0,
                 "rating": "Error",
                 "details": {
                     "headline_analysis": {"headline_vs_content_score": 0, "flagged_phrases": []},
                     "sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},

+from typing import Dict, Any, Literal
 import logging
 from .headline_analyzer import HeadlineAnalyzer
 logger = logging.getLogger(__name__)
+# Define analysis mode type
+AnalysisMode = Literal['ai', 'traditional']
 class MediaScorer:
+    def __init__(self, use_ai: bool = True):
+        """
+        Initialize the MediaScorer with required analyzers.
+        Args:
+            use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
+        """
+        self.use_ai = use_ai
+        self.analysis_mode: AnalysisMode = 'ai' if use_ai else 'traditional'
+        logger.info(f"Initializing MediaScorer with {self.analysis_mode} analysis")
+        # Initialize analyzers with analysis mode preference
+        self.headline_analyzer = HeadlineAnalyzer(use_ai=use_ai)
+        self.sentiment_analyzer = SentimentAnalyzer(use_ai=use_ai)
+        self.bias_analyzer = BiasAnalyzer(use_ai=use_ai)
+        self.evidence_analyzer = EvidenceAnalyzer(use_ai=use_ai)
+        logger.info(f"All analyzers initialized in {self.analysis_mode} mode")
     def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
         """Calculate final media credibility score."""
         try:
+            logger.info(f"Calculating media score using {self.analysis_mode} analysis")
             headline_analysis = self.headline_analyzer.analyze(headline, content)
             sentiment_analysis = self.sentiment_analyzer.analyze(content)
             bias_analysis = self.bias_analyzer.analyze(content)
             result = {
                 "media_unmasked_score": round(final_score, 1),
                 "rating": rating,
+                "analysis_mode": self.analysis_mode,
                 "details": {
                     "headline_analysis": {
                         "headline_vs_content_score": headline_analysis["headline_vs_content_score"],
             return {
                 "media_unmasked_score": 0,
                 "rating": "Error",
+                "analysis_mode": self.analysis_mode,
                 "details": {
                     "headline_analysis": {"headline_vs_content_score": 0, "flagged_phrases": []},
                     "sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},

mediaunmasked/analyzers/sentiment_analyzer.py CHANGED Viewed

@@ -1,11 +1,23 @@
 import logging
 from typing import Dict, Any, List
 from textblob import TextBlob
 logger = logging.getLogger(__name__)
 class SentimentAnalyzer:
-    def __init__(self):
         self.manipulative_patterns = [
             "experts say",
             "sources claim",
@@ -17,10 +29,267 @@ class SentimentAnalyzer:
             "without doubt",
             "certainly"
         ]
     def analyze(self, text: str) -> Dict[str, Any]:
-        """Analyze sentiment using TextBlob."""
         try:
             blob = TextBlob(text)
             sentiment_score = blob.sentiment.polarity
@@ -34,13 +303,10 @@ class SentimentAnalyzer:
             else:
                 sentiment = "Neutral"
-            if manipulation_score > 50:
-                sentiment = "Manipulative"
             return {
                 "sentiment": sentiment,
                 "manipulation_score": min(manipulation_score, 100),
-                "flagged_phrases": manipulative_phrases
             }
         except Exception as e:

 import logging
 from typing import Dict, Any, List
 from textblob import TextBlob
+from transformers import pipeline
+import numpy as np
 logger = logging.getLogger(__name__)
 class SentimentAnalyzer:
+    def __init__(self, use_ai: bool = True):
+        """
+        Initialize sentiment analyzer with both traditional and LLM-based approaches.
+        Args:
+            use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
+        """
+        self.use_ai = use_ai
+        self.llm_available = False
+        # Traditional manipulation patterns
         self.manipulative_patterns = [
             "experts say",
             "sources claim",
             "without doubt",
             "certainly"
         ]
+        if use_ai:
+            try:
+                # Initialize LLM pipelines
+                self.sentiment_pipeline = pipeline(
+                    "text-classification",
+                    model="SamLowe/roberta-base-go_emotions",
+                    top_k=None
+                )
+                self.toxicity_pipeline = pipeline(
+                    "text-classification",
+                    model="martin-ha/toxic-comment-model",
+                    top_k=None
+                )
+                self.manipulation_pipeline = pipeline(
+                    "zero-shot-classification",
+                    model="facebook/bart-large-mnli",
+                    device=-1
+                )
+                self.llm_available = True
+                logger.info("LLM pipelines initialized successfully")
+            except Exception as e:
+                logger.warning(f"Failed to initialize LLM pipelines: {str(e)}")
+                self.llm_available = False
+        else:
+            logger.info("Initializing sentiment analyzer in traditional mode")
+    def _analyze_with_llm(self, text: str) -> Dict[str, Any]:
+        """Perform sentiment analysis using LLM models."""
+        try:
+            logger.info("Starting LLM sentiment analysis")
+            # Clean the text of formatting markers
+            cleaned_text = text.replace('$!/$', '').replace('##', '').replace('#', '')
+            cleaned_text = '\n'.join(line for line in cleaned_text.split('\n')
+                                   if not line.startswith('[') and not line.startswith('More on'))
+            logger.info("Text cleaned and prepared for analysis")
+            # Split text into chunks of 512 tokens (approximate)
+            chunks = [cleaned_text[i:i+2000] for i in range(0, len(cleaned_text), 2000)]
+            logger.info(f"Text split into {len(chunks)} chunks for processing")
+            # Initialize aggregation variables
+            sentiment_scores = []
+            toxicity_scores = []
+            manipulation_scores = []
+            flagged_phrases = []
+            manipulation_categories = [
+                "emotional manipulation",
+                "fear mongering",
+                "propaganda",
+                "factual reporting",
+                "balanced perspective"
+            ]
+            # Process each chunk
+            for i, chunk in enumerate(chunks, 1):
+                logger.info(f"Processing chunk {i}/{len(chunks)}")
+                try:
+                    # Get emotion scores with detailed logging
+                    logger.debug(f"Analyzing emotions for chunk {i}")
+                    emotions = self.sentiment_pipeline(chunk)
+                    logger.debug(f"Raw emotion response: {emotions}")
+                    # Handle different response formats
+                    if isinstance(emotions, list):
+                        # Multiple results format
+                        for emotion in emotions:
+                            if isinstance(emotion, dict) and 'label' in emotion and 'score' in emotion:
+                                sentiment_scores.append(emotion)
+                    elif isinstance(emotions, dict) and 'label' in emotions and 'score' in emotions:
+                        # Single result format
+                        sentiment_scores.append(emotions)
+                    logger.debug(f"Processed emotion scores: {sentiment_scores}")
+                    # Get toxicity scores
+                    logger.debug(f"Analyzing toxicity for chunk {i}")
+                    toxicity = self.toxicity_pipeline(chunk)
+                    if isinstance(toxicity, list):
+                        toxicity_scores.extend(toxicity)
+                    else:
+                        toxicity_scores.append(toxicity)
+                    logger.debug(f"Processed toxicity scores: {toxicity_scores}")
+                    # Get manipulation scores
+                    logger.debug(f"Analyzing manipulation for chunk {i}")
+                    manipulation = self.manipulation_pipeline(
+                        chunk,
+                        manipulation_categories,
+                        multi_label=True
+                    )
+                    if isinstance(manipulation, dict) and 'labels' in manipulation and 'scores' in manipulation:
+                        manipulation_scores.append({
+                            label: score
+                            for label, score in zip(manipulation['labels'], manipulation['scores'])
+                        })
+                    logger.debug(f"Processed manipulation scores: {manipulation_scores}")
+                    # Analyze sentences for manipulation
+                    sentences = chunk.split('.')
+                    for sentence in sentences:
+                        if len(sentence.strip()) > 10:
+                            sent_result = self.manipulation_pipeline(
+                                sentence.strip(),
+                                manipulation_categories,
+                                multi_label=False
+                            )
+                            if (sent_result['labels'][0] in ["emotional manipulation", "fear mongering", "propaganda"]
+                                and sent_result['scores'][0] > 0.7):
+                                flagged_phrases.append({
+                                    'text': sentence.strip(),
+                                    'type': sent_result['labels'][0],
+                                    'score': sent_result['scores'][0]
+                                })
+                except Exception as chunk_error:
+                    logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
+                    continue
+            logger.info("All chunks processed, aggregating scores")
+            # Aggregate scores with error handling
+            def aggregate_scores(scores_list, score_type: str):
+                try:
+                    all_scores = {}
+                    for scores in scores_list:
+                        if isinstance(scores, dict):
+                            if 'label' in scores and 'score' in scores:
+                                label = scores['label']
+                                score = scores['score']
+                            else:
+                                # Handle direct label-score mapping
+                                for label, score in scores.items():
+                                    if label not in all_scores:
+                                        all_scores[label] = []
+                                    if isinstance(score, (int, float)):
+                                        all_scores[label].append(score)
+                                continue
+                        else:
+                            logger.warning(f"Unexpected score format in {score_type}: {scores}")
+                            continue
+                        if isinstance(label, (str, bytes)):
+                            if label not in all_scores:
+                                all_scores[label] = []
+                            if isinstance(score, (int, float)):
+                                all_scores[label].append(score)
+                    return {k: np.mean(v) for k, v in all_scores.items() if v}
+                except Exception as agg_error:
+                    logger.error(f"Error aggregating {score_type} scores: {str(agg_error)}")
+                    return {}
+            emotion_scores = aggregate_scores(sentiment_scores, "emotion")
+            toxicity_scores = aggregate_scores(toxicity_scores, "toxicity")
+            logger.debug(f"Aggregated emotion scores: {emotion_scores}")
+            logger.debug(f"Aggregated toxicity scores: {toxicity_scores}")
+            # Aggregate manipulation scores
+            manipulation_agg = {
+                category: np.mean([
+                    scores.get(category, 0)
+                    for scores in manipulation_scores
+                ])
+                for category in manipulation_categories
+            }
+            logger.debug(f"Aggregated manipulation scores: {manipulation_agg}")
+            # Calculate manipulation score based on multiple factors
+            manipulation_indicators = {
+                'emotional manipulation': 0.4,
+                'fear mongering': 0.3,
+                'propaganda': 0.3,
+                'toxic': 0.2,
+                'severe_toxic': 0.3,
+                'threat': 0.2
+            }
+            # Combine toxicity and manipulation scores
+            combined_scores = {**toxicity_scores, **manipulation_agg}
+            manipulation_score = min(100, sum(
+                combined_scores.get(k, 0) * weight
+                for k, weight in manipulation_indicators.items()
+            ) * 100)
+            logger.info(f"Final manipulation score: {manipulation_score}")
+            # Determine overall sentiment
+            positive_emotions = ['admiration', 'joy', 'amusement', 'approval']
+            negative_emotions = ['disgust', 'anger', 'disappointment', 'fear']
+            neutral_emotions = ['neutral', 'confusion', 'realization']
+            pos_score = sum(emotion_scores.get(emotion, 0) for emotion in positive_emotions)
+            neg_score = sum(emotion_scores.get(emotion, 0) for emotion in negative_emotions)
+            neu_score = sum(emotion_scores.get(emotion, 0) for emotion in neutral_emotions)
+            logger.debug(f"Sentiment scores - Positive: {pos_score}, Negative: {neg_score}, Neutral: {neu_score}")
+            # Determine sentiment based on highest score
+            max_score = max(pos_score, neg_score, neu_score)
+            if max_score == pos_score and pos_score > 0.3:
+                sentiment = "Positive"
+            elif max_score == neg_score and neg_score > 0.3:
+                sentiment = "Negative"
+            else:
+                sentiment = "Neutral"
+            logger.info(f"Final sentiment determination: {sentiment}")
+            # Sort and limit flagged phrases by manipulation score
+            sorted_phrases = sorted(flagged_phrases, key=lambda x: x['score'], reverse=True)
+            unique_phrases = []
+            seen = set()
+            for phrase in sorted_phrases:
+                clean_text = phrase['text'].strip()
+                if clean_text not in seen:
+                    unique_phrases.append(clean_text)
+                    seen.add(clean_text)
+                if len(unique_phrases) >= 5:
+                    break
+            logger.info("LLM analysis completed successfully")
+            return {
+                "sentiment": sentiment,
+                "manipulation_score": manipulation_score,
+                "flagged_phrases": unique_phrases,
+                "detailed_scores": {
+                    "emotions": emotion_scores,
+                    "manipulation": manipulation_agg,
+                    "toxicity": toxicity_scores
+                }
+            }
+        except Exception as e:
+            logger.error(f"LLM analysis failed: {str(e)}", exc_info=True)
+            return None
     def analyze(self, text: str) -> Dict[str, Any]:
+        """
+        Analyze sentiment using LLM with fallback to traditional methods.
+        Args:
+            text: The text to analyze
+        Returns:
+            Dict containing sentiment analysis results
+        """
         try:
+            # Try LLM analysis if enabled and available
+            if self.use_ai and self.llm_available:
+                llm_result = self._analyze_with_llm(text)
+                if llm_result:
+                    return llm_result
+            # Use traditional analysis
+            logger.info("Using traditional sentiment analysis")
             blob = TextBlob(text)
             sentiment_score = blob.sentiment.polarity
             else:
                 sentiment = "Neutral"
             return {
                 "sentiment": sentiment,
                 "manipulation_score": min(manipulation_score, 100),
+                "flagged_phrases": manipulative_phrases[:5]  # Limit to top 5 phrases
             }
         except Exception as e:

package-lock.json CHANGED Viewed

@@ -6,6 +6,9 @@
     "": {
       "dependencies": {
         "supabase": "^2.12.1"
       }
     },
     "node_modules/@isaacs/cliui": {
@@ -47,6 +50,16 @@
         "node": ">=14"
       }
     },
     "node_modules/agent-base": {
       "version": "7.1.3",
       "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz",
@@ -161,6 +174,13 @@
         "node": ">= 8"
       }
     },
     "node_modules/data-uri-to-buffer": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",

     "": {
       "dependencies": {
         "supabase": "^2.12.1"
+      },
+      "devDependencies": {
+        "@types/react": "^19.0.10"
       }
     },
     "node_modules/@isaacs/cliui": {
         "node": ">=14"
       }
     },
+    "node_modules/@types/react": {
+      "version": "19.0.10",
+      "resolved": "https://registry.npmjs.org/@types/react/-/react-19.0.10.tgz",
+      "integrity": "sha512-JuRQ9KXLEjaUNjTWpzuR231Z2WpIwczOkBEIvbHNCzQefFIT0L8IqE6NV6ULLyC1SI/i234JnDoMkfg+RjQj2g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "csstype": "^3.0.2"
+      }
+    },
     "node_modules/agent-base": {
       "version": "7.1.3",
       "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz",
         "node": ">= 8"
       }
     },
+    "node_modules/csstype": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
+      "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/data-uri-to-buffer": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",

package.json CHANGED Viewed

@@ -1,5 +1,8 @@
 {
   "dependencies": {
     "supabase": "^2.12.1"
   }
 }

 {
   "dependencies": {
     "supabase": "^2.12.1"
+  },
+  "devDependencies": {
+    "@types/react": "^19.0.10"
   }
 }

tests/test_LLM_comparisons.py ADDED Viewed

	@@ -0,0 +1,199 @@

+from transformers import pipeline, AutoTokenizer
+import unittest
+from mediaunmasked.scrapers.article_scraper import ArticleScraper
+from tabulate import tabulate
+import torch
+from typing import List
+import logging
+import transformers
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class MediaUnmaskLLMTester(unittest.TestCase):
+    transformers.logging.set_verbosity_error()
+    def setUp(self):
+        """Set up LLMs and scrape article."""
+        self.models = {
+            # Upgraded Evidence-Based Models
+            "RoBERTa-MNLI": {"model": "roberta-large-mnli", "max_length": 512},  # Corrected to standard MNLI model
+            "DeBERTa-Fact": {"model": "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli", "max_length": 512},
+            "T5-Large": {"model": "google/t5-v1_1-large", "max_length": 512},
+            "SciBERT": {"model": "allenai/scibert_scivocab_uncased", "max_length": 512},
+            "BART-FEVER": {"model": "facebook/bart-large", "max_length": 1024},  # Note: Needs FEVER fine-tuning
+            "MultiQA-MiniLM": {"model": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", "max_length": 512},
+            # Existing Models for Benchmarking
+            "BART-MNLI": {"model": "facebook/bart-large-mnli", "max_length": 1024},
+            "RoBERTa-Bias": {"model": "cardiffnlp/twitter-roberta-base-hate", "max_length": 512},
+            "DistilBERT-Sentiment": {"model": "distilbert-base-uncased-finetuned-sst-2-english", "max_length": 512},
+            "GPT2-Generation": {"model": "gpt2", "max_length": 1024},
+        }
+        self.device = 0 if torch.cuda.is_available() else -1
+        self.scraper = ArticleScraper()
+        self.article_url = "https://www.snopes.com/fact-check/trump-super-bowl-cost-taxpayers/"
+        self.article_data = self.scraper.scrape_article(self.article_url) or {}
+        self.results = {
+            "headline": self.article_data.get("headline", "No headline"),
+            "content": self.article_data.get("content", "No content available"),
+            "scores": {}
+        }
+        self.tokenizers = {name: AutoTokenizer.from_pretrained(model["model"]) for name, model in self.models.items()}
+    def _split_content(self, model_name: str, content: str) -> List[str]:
+        """Split content into sections within model token limits, ensuring valid output."""
+        tokenizer = self.tokenizers[model_name]
+        max_length = self.models[model_name]["max_length"]
+        if not content or not content.strip():
+            return ["No valid content"]
+        encoded = tokenizer.encode_plus(content, add_special_tokens=True, truncation=True, max_length=max_length)
+        decoded = tokenizer.decode(encoded["input_ids"], skip_special_tokens=True)
+        return [decoded] if decoded.strip() else ["No valid content"]
+    def _get_flagged_phrases(self, model_pipeline, sections, threshold=0.6, top_k=5):
+        """Extract top-scoring flagged phrases while handling None values safely."""
+        if not sections or not isinstance(sections, list):
+            return [("None", "N/A")]
+        flagged_phrases = []
+        for section in sections:
+            if not section or not isinstance(section, str) or not section.strip():  # Ensure section is a valid string
+                continue
+            sentences = [s.strip() for s in section.split(". ") if s.strip()]
+            for sentence in sentences:
+                if not sentence or not isinstance(sentence, str):  # Double-check before running the model
+                    continue
+                try:
+                    preds = model_pipeline(sentence)
+                    if preds and isinstance(preds, list):
+                        top_pred = max(preds, key=lambda x: x["score"])
+                        if top_pred["score"] >= threshold:
+                            short_phrase = " ".join(sentence.split()[:10])  # Shorten for readability
+                            flagged_phrases.append((short_phrase, top_pred["score"], top_pred["label"]))
+                except Exception as e:
+                    logger.error(f"Error analyzing sentence: {e}")
+                    continue
+        flagged_phrases.sort(key=lambda x: x[1], reverse=True)
+        return [(phrase, label) for phrase, _, label in flagged_phrases[:top_k]] or [("None", "N/A")]
+    def test_headline_vs_content(self):
+        """Check headline-content alignment."""
+        headline = self.results["headline"]
+        content = self.results["content"]
+        for model_name in self.models:
+            with self.subTest(model=model_name):
+                analyzer = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
+                sections = self._split_content(model_name, content)
+                headline_score = max(analyzer(headline), key=lambda x: x["score"])["score"]
+                content_scores = [max(analyzer(section), key=lambda x: x["score"])["score"] for section in sections]
+                avg_content_score = sum(content_scores) / len(content_scores)
+                consistency_score = abs(headline_score - avg_content_score)
+                flagged_phrases = self._get_flagged_phrases(analyzer, sections)
+                self.results["scores"].setdefault("headline_vs_content", {})[model_name] = {
+                    "score": consistency_score,
+                    "flagged_phrases": flagged_phrases
+                }
+                self.assertIsNotNone(consistency_score)
+    def test_evidence_based(self):
+        """Test evidence-based content."""
+        content = self.results["content"]
+        for model_name in self.models:
+            if any(keyword in model_name.lower() for keyword in ["mnli", "fact", "fever", "qa"]):
+                with self.subTest(model=model_name):
+                    classifier = pipeline("zero-shot-classification", model=self.models[model_name]["model"], device=self.device)
+                    sections = self._split_content(model_name, content)
+                    results = [classifier(section, candidate_labels=["evidence-based", "opinion", "misleading"]) for section in sections]
+                    avg_score = sum(r["scores"][r["labels"].index("evidence-based")] for r in results) / len(results)
+                    flagged_phrases = self._get_flagged_phrases(classifier, sections)
+                    self.results["scores"].setdefault("evidence_based", {})[model_name] = {
+                        "score": avg_score,
+                        "flagged_phrases": flagged_phrases
+                    }
+                    self.assertIsNotNone(avg_score)
+    def test_manipulative_language(self):
+        """Detect manipulative language."""
+        content = self.results["content"]
+        for model_name in self.models:
+            if "sentiment" in model_name.lower() or "emotion" in model_name.lower() or "gpt" in model_name.lower():
+                with self.subTest(model=model_name):
+                    detector = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
+                    sections = self._split_content(model_name, content)
+                    results = [max(detector(section), key=lambda x: x["score"]) for section in sections]
+                    avg_score = sum(r["score"] for r in results) / len(results)
+                    flagged_phrases = self._get_flagged_phrases(detector, sections)
+                    self.results["scores"].setdefault("manipulative_language", {})[model_name] = {
+                        "score": avg_score,
+                        "flagged_phrases": flagged_phrases
+                    }
+                    self.assertIsNotNone(avg_score)
+    def test_bias_detection(self):
+        """Detect bias."""
+        content = self.results["content"]
+        for model_name in self.models:
+            if "bias" in model_name.lower() or "toxic" in model_name.lower() or "roberta" in model_name.lower():
+                with self.subTest(model=model_name):
+                    detector = pipeline("text-classification", model=self.models[model_name]["model"], device=self.device)
+                    sections = self._split_content(model_name, content)
+                    results = [max(detector(section), key=lambda x: x["score"]) for section in sections]
+                    avg_score = sum(r["score"] for r in results) / len(results)
+                    flagged_phrases = self._get_flagged_phrases(detector, sections)
+                    self.results["scores"].setdefault("bias_detection", {})[model_name] = {
+                        "score": avg_score,
+                        "flagged_phrases": flagged_phrases
+                    }
+                    self.assertIsNotNone(avg_score)
+    def tearDown(self):
+        """Print top 2 models per test with clearer formatting."""
+        print("\n=== Top Model Recommendations ===")
+        for test_type, model_results in self.results["scores"].items():
+            print(f"\nTop 2 Models for {test_type}:")
+            sorted_results = sorted(
+                model_results.items(),
+                key=lambda x: x[1]["score"],
+                reverse=(test_type != "headline_vs_content")
+            )
+            top_2 = sorted_results[:2]
+            table = [
+                [
+                    model,
+                    f"{res['score']:.6f}",
+                    ", ".join(f"{phrase} ({label})" for phrase, label in res["flagged_phrases"])
+                ]
+                for model, res in top_2
+            ]
+            print(tabulate(table, headers=["Model", "Score", "Flagged Phrases"], tablefmt="grid"))
+            criteria = "Lowest consistency score (better alignment)" if test_type == "headline_vs_content" else "Highest detection score"
+            print(f"Criteria: {criteria}")
+if __name__ == "__main__":
+    unittest.main()