wozwize commited on
Commit
212d694
·
1 Parent(s): 2f25629

adding test files. updating exception messages

Browse files
Dockerfile CHANGED
@@ -9,7 +9,7 @@ RUN mkdir -p /app/.cache/huggingface/hub && chmod -R 777 /app/.cache
9
 
10
  # Copy dependencies
11
  COPY requirements.txt .
12
- RUN pip install --no-cache-dir -r requirements.txt
13
 
14
  # Copy all files
15
  COPY . .
 
9
 
10
  # Copy dependencies
11
  COPY requirements.txt .
12
+ RUN pip install -r requirements.txt
13
 
14
  # Copy all files
15
  COPY . .
app/routers/analyze.py CHANGED
@@ -70,8 +70,8 @@ async def analyze_content(request: AnalyzeRequest):
70
  return AnalyzeResponse.parse_obj(response_dict)
71
 
72
  except Exception as e:
73
- logger.error(f"Analysis failed: {str(e)}", exc_info=True)
74
  raise HTTPException(
75
  status_code=500,
76
- detail=f"Analysis failed: {str(e)}"
77
  )
 
70
  return AnalyzeResponse.parse_obj(response_dict)
71
 
72
  except Exception as e:
73
+ logger.error(f"Analysis failed inside of analyze.py: {str(e)}", exc_info=True)
74
  raise HTTPException(
75
  status_code=500,
76
+ detail=f"Analysis failed inside of analyze.py: {str(e)}"
77
  )
mediaunmasked/services/analyzer_service.py CHANGED
@@ -75,8 +75,8 @@ class AnalyzerService:
75
  return AnalysisResponse.parse_obj(response_dict)
76
 
77
  except Exception as e:
78
- logger.error(f"Analysis failed: {str(e)}", exc_info=True)
79
  raise HTTPException(
80
  status_code=500,
81
- detail=f"Analysis failed: {str(e)}"
82
  )
 
75
  return AnalysisResponse.parse_obj(response_dict)
76
 
77
  except Exception as e:
78
+ logger.error(f"Analysis failed inside of analyzer_service.py: {str(e)}", exc_info=True)
79
  raise HTTPException(
80
  status_code=500,
81
+ detail=f"Analysis failed inside of analyzer_service.py: {str(e)}"
82
  )
scripts/test_scraper.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mediaunmasked.scrapers.article_scraper import ArticleScraper
2
+ import logging
3
+
4
+ # Configure logging
5
+ logging.basicConfig(level=logging.INFO)
6
+ logger = logging.getLogger(__name__)
7
+
8
+ def test_scraper():
9
+ scraper = ArticleScraper()
10
+ url = "https://www.channelnewsasia.com/singapore/singapore-mccy-sg-culture-pass-arts-culture-heritage-4951451"
11
+
12
+ logger.info(f"Testing scraper with URL: {url}")
13
+
14
+ try:
15
+ result = scraper.scrape_article(url)
16
+
17
+ if result:
18
+ print("\nScraping Successful!")
19
+ print("-" * 50)
20
+ print(f"Headline: {result['headline']}")
21
+ print("-" * 50)
22
+ print("Content Preview (first 500 chars):")
23
+ print(result['content'][:500])
24
+ print("...")
25
+ print("-" * 50)
26
+ print(f"Total content length: {len(result['content'])} characters")
27
+ else:
28
+ print("Scraping failed - no result returned")
29
+
30
+ except Exception as e:
31
+ logger.error(f"Error during scraping: {str(e)}", exc_info=True)
32
+
33
+ if __name__ == "__main__":
34
+ test_scraper()
tests/test_bias_analyzer.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from mediaunmasked.analyzers.bias_analyzer import BiasAnalyzer
3
+ import logging
4
+
5
+ class TestBiasAnalyzer(unittest.TestCase):
6
+ def setUp(self):
7
+ self.analyzer = BiasAnalyzer()
8
+ logging.basicConfig(level=logging.INFO)
9
+ self.logger = logging.getLogger(__name__)
10
+
11
+ def test_left_bias(self):
12
+ """Test detection of left-leaning bias"""
13
+ text = "Progressive policies have shown success in addressing income inequality and social justice issues. The government's intervention has helped protect workers' rights."
14
+
15
+ result = self.analyzer.analyze(text)
16
+
17
+ self.assertIsNotNone(result)
18
+ self.assertIn('bias', result)
19
+ self.assertLess(result['bias_score'], 0) # Negative score indicates left bias
20
+ self.logger.info(f"Left bias result: {result}")
21
+
22
+ def test_right_bias(self):
23
+ """Test detection of right-leaning bias"""
24
+ text = "Free market solutions and deregulation have driven economic growth. Individual responsibility and traditional values remain crucial for society."
25
+
26
+ result = self.analyzer.analyze(text)
27
+
28
+ self.assertIsNotNone(result)
29
+ self.assertIn('bias', result)
30
+ self.assertGreater(result['bias_score'], 0) # Positive score indicates right bias
31
+ self.logger.info(f"Right bias result: {result}")
32
+
33
+ def test_neutral_content(self):
34
+ """Test detection of neutral content"""
35
+ text = "The study examined various economic policies and their outcomes. Researchers analyzed data from multiple sources to draw conclusions."
36
+
37
+ result = self.analyzer.analyze(text)
38
+
39
+ self.assertIsNotNone(result)
40
+ self.assertIn('bias', result)
41
+ self.assertAlmostEqual(result['bias_score'], 0, delta=0.2) # Should be close to neutral
42
+ self.logger.info(f"Neutral content result: {result}")
tests/test_evidence_analyzer.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from mediaunmasked.analyzers.evidence_analyzer import EvidenceAnalyzer
3
+ import logging
4
+
5
+ class TestEvidenceAnalyzer(unittest.TestCase):
6
+ def setUp(self):
7
+ self.analyzer = EvidenceAnalyzer()
8
+ logging.basicConfig(level=logging.INFO)
9
+ self.logger = logging.getLogger(__name__)
10
+
11
+ def test_well_supported_content(self):
12
+ """Test content with strong evidence"""
13
+ text = """According to the WHO study, vaccination rates have increased by 25%.
14
+ Research published in Nature shows significant results. The data from multiple
15
+ studies indicates a clear trend, as reported in the scientific journal."""
16
+
17
+ result = self.analyzer.analyze(text)
18
+
19
+ self.assertIsNotNone(result)
20
+ self.assertGreater(result['evidence_based_score'], 70)
21
+ self.logger.info(f"Well-supported content score: {result}")
22
+
23
+ def test_poorly_supported_content(self):
24
+ """Test content with weak evidence"""
25
+ text = """Some people say this treatment works wonders. Many believe it's the
26
+ best solution available. Sources claim it could be revolutionary."""
27
+
28
+ result = self.analyzer.analyze(text)
29
+
30
+ self.assertIsNotNone(result)
31
+ self.assertLess(result['evidence_based_score'], 50)
32
+ self.logger.info(f"Poorly-supported content score: {result}")
33
+
34
+ def test_mixed_evidence_content(self):
35
+ """Test content with mixed evidence quality"""
36
+ text = """According to recent studies, the treatment shows promise. Some experts
37
+ claim it could be effective, while research published in medical journals
38
+ indicates more testing is needed."""
39
+
40
+ result = self.analyzer.analyze(text)
41
+
42
+ self.assertIsNotNone(result)
43
+ self.assertGreater(result['evidence_based_score'], 30)
44
+ self.assertLess(result['evidence_based_score'], 80)
45
+ self.logger.info(f"Mixed evidence content score: {result}")
tests/test_headline_analyzer.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from mediaunmasked.analyzers.headline_analyzer import HeadlineAnalyzer
3
+ import logging
4
+
5
+ class TestHeadlineAnalyzer(unittest.TestCase):
6
+ def setUp(self):
7
+ self.analyzer = HeadlineAnalyzer()
8
+ logging.basicConfig(level=logging.INFO)
9
+ self.logger = logging.getLogger(__name__)
10
+
11
+ def test_matching_headline(self):
12
+ """Test when headline matches content"""
13
+ headline = "Climate Change Impact on Global Weather Patterns"
14
+ content = "Scientists have discovered significant changes in global weather patterns due to climate change. The study shows increasing temperatures are affecting weather systems worldwide."
15
+
16
+ result = self.analyzer.analyze(headline, content)
17
+
18
+ self.assertIsNotNone(result)
19
+ self.assertIn('headline_vs_content_score', result)
20
+ self.assertGreater(result['headline_vs_content_score'], 70) # Should have high score
21
+
22
+ self.logger.info(f"Matching headline score: {result['headline_vs_content_score']}")
23
+
24
+ def test_misleading_headline(self):
25
+ """Test when headline is misleading compared to content"""
26
+ headline = "Shocking New Diet Guarantees Weight Loss"
27
+ content = "While some dietary changes may contribute to weight loss, there is no guaranteed method. Studies show sustainable weight loss requires lifestyle changes."
28
+
29
+ result = self.analyzer.analyze(headline, content)
30
+
31
+ self.assertIsNotNone(result)
32
+ self.assertIn('headline_vs_content_score', result)
33
+ self.assertLess(result['headline_vs_content_score'], 50) # Should have low score
34
+
35
+ self.logger.info(f"Misleading headline score: {result['headline_vs_content_score']}")
36
+
37
+ def test_empty_inputs(self):
38
+ """Test handling of empty inputs"""
39
+ result = self.analyzer.analyze("", "")
40
+ self.assertIsNotNone(result)
41
+ self.assertIn('headline_vs_content_score', result)
42
+
43
+ def test_matching_headline(analyzer):
44
+ headline = "New Study Shows Coffee Reduces Heart Disease Risk"
45
+ content = "Recent research suggests that coffee may have cardiovascular benefits."
46
+
47
+ result = analyzer.analyze(headline, content)
48
+
49
+ assert result["headline_vs_content_score"] > 30
50
+ assert result["contradiction_score"] < 0.3
51
+
52
+ def test_contradictory_headline(analyzer):
53
+ headline = "Coffee Increases Heart Disease Risk"
54
+ content = "Studies show coffee decreases cardiovascular disease risk."
55
+
56
+ result = analyzer.analyze(headline, content)
57
+
58
+ assert result["headline_vs_content_score"] < 30
59
+ assert result["contradiction_score"] > 0.3
tests/test_scoring.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from mediaunmasked.analyzers.scoring import MediaScorer
3
+ import logging
4
+
5
+ class TestMediaScorer(unittest.TestCase):
6
+ def setUp(self):
7
+ self.scorer = MediaScorer()
8
+ logging.basicConfig(level=logging.INFO)
9
+ self.logger = logging.getLogger(__name__)
10
+
11
+ def test_high_quality_article(self):
12
+ """Test scoring of high-quality article"""
13
+ headline = "New Study Shows Link Between Exercise and Mental Health"
14
+ content = """According to research published in the Journal of Medicine, regular
15
+ exercise significantly improves mental health outcomes. The study, conducted over
16
+ two years with 1000 participants, found a 30% reduction in anxiety symptoms among
17
+ those who exercised regularly. Dr. Smith, lead researcher, stated that the findings
18
+ demonstrate a clear correlation between physical activity and mental wellbeing."""
19
+
20
+ result = self.scorer.calculate_media_score(headline, content)
21
+
22
+ self.assertIsNotNone(result)
23
+ self.assertGreater(result['media_unmasked_score'], 80)
24
+ self.assertEqual(result['rating'], 'Trustworthy')
25
+ self.logger.info(f"High quality article score: {result}")
26
+
27
+ def test_biased_article(self):
28
+ """Test scoring of biased article"""
29
+ headline = "Government Policies Destroying Our Way of Life"
30
+ content = """Experts say the radical new policies are ruining everything!
31
+ Sources claim this is the worst decision ever made. Many believe this will
32
+ lead to disaster. The socialist agenda is clearly destroying our values."""
33
+
34
+ result = self.scorer.calculate_media_score(headline, content)
35
+
36
+ self.assertIsNotNone(result)
37
+ self.assertLess(result['media_unmasked_score'], 60)
38
+ self.assertEqual(result['rating'], 'Bias Present')
39
+ self.logger.info(f"Biased article score: {result}")
40
+
41
+ def test_misleading_article(self):
42
+ """Test scoring of misleading article"""
43
+ headline = "Miracle Cure Found for All Diseases!"
44
+ content = """Some people say this amazing discovery cures everything!
45
+ You won't believe the shocking results. Everyone knows this is the
46
+ breakthrough we've been waiting for!"""
47
+
48
+ result = self.scorer.calculate_media_score(headline, content)
49
+
50
+ self.assertIsNotNone(result)
51
+ self.assertLess(result['media_unmasked_score'], 50)
52
+ self.assertEqual(result['rating'], 'Misleading')
53
+ self.logger.info(f"Misleading article score: {result}")
tests/test_scraper.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from mediaunmasked.scrapers.article_scraper import ArticleScraper
3
+ import logging
4
+
5
+ class TestArticleScraper(unittest.TestCase):
6
+ def setUp(self):
7
+ self.scraper = ArticleScraper()
8
+ # Configure logging for tests
9
+ logging.basicConfig(level=logging.INFO)
10
+ self.logger = logging.getLogger(__name__)
11
+
12
+ def test_cna_article(self):
13
+ """Test scraping a Channel News Asia article"""
14
+ url = "https://www.channelnewsasia.com/singapore/singapore-mccy-sg-culture-pass-arts-culture-heritage-4951451"
15
+ result = self.scraper.scrape_article(url)
16
+
17
+ # Log the result
18
+ self.logger.info("Scraping Result:")
19
+ self.logger.info(f"Headline: {result.get('headline', 'No headline found')}")
20
+ self.logger.info(f"Content Preview: {result.get('content', 'No content found')[:200]}...")
21
+
22
+ # Basic assertions
23
+ self.assertIsNotNone(result)
24
+ self.assertIn('headline', result)
25
+ self.assertIn('content', result)
26
+ self.assertNotEqual(result['headline'], '')
27
+ self.assertNotEqual(result['content'], '')
28
+
29
+ # Print full result for manual inspection
30
+ print("\nFull Scraping Result:")
31
+ print(f"Headline: {result['headline']}")
32
+ print(f"\nContent Preview (first 500 chars):\n{result['content'][:500]}...")
33
+
34
+ def test_invalid_url(self):
35
+ """Test scraping an invalid URL"""
36
+ url = "https://invalid.url.that.doesnt.exist"
37
+ result = self.scraper.scrape_article(url)
38
+ self.assertIsNone(result)
39
+
40
+ def test_empty_url(self):
41
+ """Test scraping with empty URL"""
42
+ url = ""
43
+ result = self.scraper.scrape_article(url)
44
+ self.assertIsNone(result)
45
+
46
+ if __name__ == '__main__':
47
+ unittest.main()
tests/test_sentiment_analyzer.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from mediaunmasked.analyzers.sentiment_analyzer import SentimentAnalyzer
3
+ import logging
4
+
5
+ class TestSentimentAnalyzer(unittest.TestCase):
6
+ def setUp(self):
7
+ self.analyzer = SentimentAnalyzer()
8
+ logging.basicConfig(level=logging.INFO)
9
+ self.logger = logging.getLogger(__name__)
10
+
11
+ def test_positive_sentiment(self):
12
+ """Test detection of positive sentiment"""
13
+ text = "The breakthrough research shows promising results in cancer treatment, bringing hope to millions of patients worldwide."
14
+
15
+ result = self.analyzer.analyze(text)
16
+
17
+ self.assertIsNotNone(result)
18
+ self.assertEqual(result['sentiment'], 'Positive')
19
+ self.logger.info(f"Positive sentiment result: {result}")
20
+
21
+ def test_negative_sentiment(self):
22
+ """Test detection of negative sentiment"""
23
+ text = "The devastating impact of the disaster has left thousands homeless and caused widespread damage to infrastructure."
24
+
25
+ result = self.analyzer.analyze(text)
26
+
27
+ self.assertIsNotNone(result)
28
+ self.assertEqual(result['sentiment'], 'Negative')
29
+ self.logger.info(f"Negative sentiment result: {result}")
30
+
31
+ def test_manipulative_content(self):
32
+ """Test detection of manipulative language"""
33
+ text = "Experts say this shocking new discovery will change everything! Sources claim it's the biggest breakthrough ever, and everyone knows it's true!"
34
+
35
+ result = self.analyzer.analyze(text)
36
+
37
+ self.assertIsNotNone(result)
38
+ self.assertGreater(result['manipulation_score'], 20)
39
+ self.assertGreater(len(result['flagged_phrases']), 0)
40
+ self.logger.info(f"Manipulative content result: {result}")
tests/unit/test_headline_analyzer.py DELETED
@@ -1,24 +0,0 @@
1
- import pytest
2
- from src.mediaunmasked.analyzers.headline_analyzer import HeadlineAnalyzer
3
-
4
- @pytest.fixture
5
- def analyzer():
6
- return HeadlineAnalyzer()
7
-
8
- def test_matching_headline(analyzer):
9
- headline = "New Study Shows Coffee Reduces Heart Disease Risk"
10
- content = "Recent research suggests that coffee may have cardiovascular benefits."
11
-
12
- result = analyzer.analyze(headline, content)
13
-
14
- assert result["headline_vs_content_score"] > 30
15
- assert result["contradiction_score"] < 0.3
16
-
17
- def test_contradictory_headline(analyzer):
18
- headline = "Coffee Increases Heart Disease Risk"
19
- content = "Studies show coffee decreases cardiovascular disease risk."
20
-
21
- result = analyzer.analyze(headline, content)
22
-
23
- assert result["headline_vs_content_score"] < 30
24
- assert result["contradiction_score"] > 0.3