Spaces:
Running
Running
File size: 1,791 Bytes
212d694 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import unittest
from mediaunmasked.scrapers.article_scraper import ArticleScraper
import logging
class TestArticleScraper(unittest.TestCase):
def setUp(self):
self.scraper = ArticleScraper()
# Configure logging for tests
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(__name__)
def test_cna_article(self):
"""Test scraping a Channel News Asia article"""
url = "https://www.channelnewsasia.com/singapore/singapore-mccy-sg-culture-pass-arts-culture-heritage-4951451"
result = self.scraper.scrape_article(url)
# Log the result
self.logger.info("Scraping Result:")
self.logger.info(f"Headline: {result.get('headline', 'No headline found')}")
self.logger.info(f"Content Preview: {result.get('content', 'No content found')[:200]}...")
# Basic assertions
self.assertIsNotNone(result)
self.assertIn('headline', result)
self.assertIn('content', result)
self.assertNotEqual(result['headline'], '')
self.assertNotEqual(result['content'], '')
# Print full result for manual inspection
print("\nFull Scraping Result:")
print(f"Headline: {result['headline']}")
print(f"\nContent Preview (first 500 chars):\n{result['content'][:500]}...")
def test_invalid_url(self):
"""Test scraping an invalid URL"""
url = "https://invalid.url.that.doesnt.exist"
result = self.scraper.scrape_article(url)
self.assertIsNone(result)
def test_empty_url(self):
"""Test scraping with empty URL"""
url = ""
result = self.scraper.scrape_article(url)
self.assertIsNone(result)
if __name__ == '__main__':
unittest.main() |