Spaces:
Running
Running
import unittest | |
from mediaunmasked.scrapers.article_scraper import ArticleScraper | |
import logging | |
class TestArticleScraper(unittest.TestCase): | |
def setUp(self): | |
self.scraper = ArticleScraper() | |
# Configure logging for tests | |
logging.basicConfig(level=logging.INFO) | |
self.logger = logging.getLogger(__name__) | |
def test_cna_article(self): | |
"""Test scraping a Channel News Asia article""" | |
url = "https://www.channelnewsasia.com/singapore/singapore-mccy-sg-culture-pass-arts-culture-heritage-4951451" | |
result = self.scraper.scrape_article(url) | |
# Log the result | |
self.logger.info("Scraping Result:") | |
self.logger.info(f"Headline: {result.get('headline', 'No headline found')}") | |
self.logger.info(f"Content Preview: {result.get('content', 'No content found')[:200]}...") | |
# Basic assertions | |
self.assertIsNotNone(result) | |
self.assertIn('headline', result) | |
self.assertIn('content', result) | |
self.assertNotEqual(result['headline'], '') | |
self.assertNotEqual(result['content'], '') | |
# Print full result for manual inspection | |
print("\nFull Scraping Result:") | |
print(f"Headline: {result['headline']}") | |
print(f"\nContent Preview (first 500 chars):\n{result['content'][:500]}...") | |
def test_invalid_url(self): | |
"""Test scraping an invalid URL""" | |
url = "https://invalid.url.that.doesnt.exist" | |
result = self.scraper.scrape_article(url) | |
self.assertIsNone(result) | |
def test_empty_url(self): | |
"""Test scraping with empty URL""" | |
url = "" | |
result = self.scraper.scrape_article(url) | |
self.assertIsNone(result) | |
if __name__ == '__main__': | |
unittest.main() |