Spaces:

abhisheksan
/

westernfront

Running

App Files Files Community

abhisheksan commited on May 26

Commit

1aa3253

verified ·

1 Parent(s): 9c81152

Update twitter_service.py

Browse files

Files changed (1) hide show

twitter_service.py +441 -442

twitter_service.py CHANGED Viewed

@@ -1,443 +1,442 @@
-import asyncio
-import json
-import os
-import time
-from datetime import datetime, timedelta, timezone
-from typing import Dict, List, Optional
-import httpx
-from cachetools import TTLCache
-from loguru import logger
-from models import NewsSource, Tweet
-class RssTwitterService:
-    """Service for collecting tweets via RSS feeds."""
-    def __init__(self):
-        self.cache_expiry = int(os.getenv("CACHE_EXPIRY_MINUTES", 120))
-        # HTTP client for making requests
-        self.client = None
-        # Enhanced cache with TTL and persistence
-        self.tweet_cache_dir = os.path.join(os.path.dirname(__file__), ".tweet_cache")
-        os.makedirs(self.tweet_cache_dir, exist_ok=True)
-        self.in_memory_cache = TTLCache(maxsize=100, ttl=self.cache_expiry * 60)
-        # Statistics and monitoring
-        self.stats = {
-            "requests": 0,
-            "cache_hits": 0,
-            "errors": 0,
-            "success": 0
-        }
-        self.last_stats_reset = time.time()
-        # Map of Twitter handles to RSS feed URLs - directly added
-        self.rss_feed_urls = {
-            "sidhant": "https://rss.app/feeds/v1.1/e3jA7zWvdgakMgqE.json",
-            "ShivAroor": "https://rss.app/feeds/v1.1/1f4kMzGI07mYZ83m.json",
-            "IAF_MCC": "https://rss.app/feeds/v1.1/KRlErbCqqu4sKtNP.json",
-            "adgpi": "https://rss.app/feeds/v1.1/br2tH5o30zxi6LjL.json",
-            "SpokespersonMoD": "https://rss.app/feeds/v1.1/tXYyx8q4L9xDNssq.json",
-            "MIB_India": "https://rss.app/feeds/v1.1/0pABfGkIm2Y1ru5z.json",
-            "DrSJaishankar": "https://rss.app/feeds/v1.1/Dq7PRmVOagKt3Q5D.json"
-        }
-        # Default trusted news sources
-        self.news_sources = [
-            NewsSource(name="Shiv Aroor", twitter_handle="ShivAroor", country="India", reliability_score=0.85),
-            NewsSource(name="Sidhant Sibal", twitter_handle="sidhant", country="India", reliability_score=0.85),
-            NewsSource(name="Indian Air Force", twitter_handle="IAF_MCC", country="India", reliability_score=0.95),
-            NewsSource(name="Indian Army", twitter_handle="adgpi", country="India", reliability_score=0.95),
-            NewsSource(name="Indian Defence Ministry", twitter_handle="SpokespersonMoD", country="India", reliability_score=0.95),
-            NewsSource(name="MIB India", twitter_handle="MIB_India", country="India", reliability_score=0.95),
-            NewsSource(name="Indian External Affairs Minister", twitter_handle="DrSJaishankar", country="India", reliability_score=0.95),
-        ]
-    async def initialize(self) -> bool:
-        """Initialize the Twitter service with RSS feed capability."""
-        try:
-            logger.info("Initializing Twitter service with RSS feed capability")
-            # Initialize HTTP client
-            self.client = httpx.AsyncClient(
-                timeout=30.0,
-                follow_redirects=True,
-                http2=True
-            )
-            # Log the pre-registered RSS feeds
-            logger.info(f"Pre-registered {len(self.rss_feed_urls)} RSS feeds for Twitter handles")
-            # Schedule background maintenance
-            asyncio.create_task(self._background_maintenance())
-            logger.info("RSS Twitter service initialized successfully")
-            return True
-        except Exception as e:
-            logger.error(f"Failed to initialize RSS Twitter service: {str(e)}")
-            return False
-    async def _background_maintenance(self):
-        """Run background maintenance tasks"""
-        while True:
-            try:
-                # Wait between maintenance cycles
-                await asyncio.sleep(900)  # 15 minutes
-                # Log statistics
-                self._log_statistics()
-                # Clean up cache files
-                self._cleanup_expired_cache()
-                # Reset statistics periodically
-                if time.time() - self.last_stats_reset > 3600:  # Reset every hour
-                    self.stats = {key: 0 for key in self.stats}
-                    self.last_stats_reset = time.time()
-            except Exception as e:
-                logger.error(f"Error in background maintenance: {str(e)}")
-    def _log_statistics(self):
-        """Log service statistics"""
-        total_requests = max(1, self.stats["requests"])
-        cache_hit_rate = self.stats["cache_hits"] / total_requests * 100
-        error_rate = self.stats["errors"] / total_requests * 100
-        logger.info(f"RssTwitterService stats - Requests: {total_requests}, " +
-                    f"Cache hits: {self.stats['cache_hits']} ({cache_hit_rate:.1f}%), " +
-                    f"Errors: {self.stats['errors']} ({error_rate:.1f}%)")
-    def _cleanup_expired_cache(self):
-        """Clean up expired cache files"""
-        now = time.time()
-        expiry_time = self.cache_expiry * 60
-        try:
-            for filename in os.listdir(self.tweet_cache_dir):
-                if not filename.endswith('.json'):
-                    continue
-                file_path = os.path.join(self.tweet_cache_dir, filename)
-                try:
-                    file_modified_time = os.path.getmtime(file_path)
-                    if now - file_modified_time > expiry_time:
-                        os.remove(file_path)
-                        logger.debug(f"Removed expired cache file: {filename}")
-                except Exception as e:
-                    logger.error(f"Error cleaning up cache file {filename}: {e}")
-        except Exception as e:
-            logger.error(f"Error during cache cleanup: {e}")
-    def _get_cache_key(self, twitter_handle, limit):
-        """Generate a cache key for a specific Twitter source"""
-        return f"{twitter_handle}_{limit}"
-    def _get_cache_path(self, cache_key):
-        """Get filesystem path for a cache key"""
-        # Create a safe filename from the cache key
-        import re
-        safe_key = re.sub(r'[^a-zA-Z0-9_-]', '_', cache_key)
-        return os.path.join(self.tweet_cache_dir, f"{safe_key}.json")
-    def _get_from_cache(self, cache_key):
-        """Get tweets from cache (memory or disk)"""
-        # Check memory cache first
-        if cache_key in self.in_memory_cache:
-            self.stats["cache_hits"] += 1
-            return self.in_memory_cache[cache_key]
-        # Check disk cache
-        cache_path = self._get_cache_path(cache_key)
-        if os.path.exists(cache_path):
-            try:
-                with open(cache_path, 'r') as f:
-                    cache_data = json.load(f)
-                # Check if cache is still valid
-                if time.time() - cache_data['timestamp'] < self.cache_expiry * 60:
-                    # Convert dictionaries back to Tweet objects
-                    tweets = []
-                    for tweet_dict in cache_data['tweets']:
-                        # Parse created_at back to datetime if it's stored as a string
-                        if 'created_at' in tweet_dict and isinstance(tweet_dict['created_at'], str):
-                            try:
-                                # Strip timezone info to make naive datetime
-                                dt = datetime.fromisoformat(tweet_dict['created_at'].replace('Z', '+00:00'))
-                                tweet_dict['created_at'] = dt.replace(tzinfo=None)
-                            except ValueError:
-                                tweet_dict['created_at'] = datetime.now()
-                        tweets.append(Tweet(**tweet_dict))
-                    # Restore to memory cache and return
-                    self.in_memory_cache[cache_key] = tweets
-                    self.stats["cache_hits"] += 1
-                    return tweets
-                else:
-                    # Cache expired, remove file
-                    os.remove(cache_path)
-            except Exception as e:
-                logger.error(f"Error reading cache file {cache_path}: {e}")
-        return None
-    def _save_to_cache(self, cache_key, tweets):
-        """Save tweets to cache (memory and disk)"""
-        # Save to memory cache
-        self.in_memory_cache[cache_key] = tweets
-        # Convert tweets to dictionaries for JSON serialization
-        tweet_dicts = []
-        for tweet in tweets:
-            # Make sure created_at is serializable
-            if hasattr(tweet.created_at, 'tzinfo') and tweet.created_at.tzinfo is not None:
-                # Convert to UTC and make naive for consistent comparisons
-                created_at = tweet.created_at.astimezone(timezone.utc).replace(tzinfo=None)
-            else:
-                created_at = tweet.created_at
-            tweet_dicts.append({
-                'id': tweet.id,
-                'text': tweet.text,
-                'author': tweet.author,
-                'created_at': created_at.isoformat() if hasattr(created_at, 'isoformat') else str(created_at),
-                'engagement': tweet.engagement,
-                'url': tweet.url
-            })
-        # Save to disk cache
-        cache_path = self._get_cache_path(cache_key)
-        try:
-            with open(cache_path, 'w') as f:
-                json.dump({
-                    'tweets': tweet_dicts,
-                    'timestamp': time.time()
-                }, f)
-        except Exception as e:
-            logger.error(f"Error writing to cache file {cache_path}: {e}")
-    def register_rss_feed(self, twitter_handle: str, rss_url: str):
-        """Register an RSS feed URL for a specific Twitter handle"""
-        self.rss_feed_urls[twitter_handle] = rss_url
-        logger.info(f"Registered RSS feed for {twitter_handle}: {rss_url}")
-    def register_rss_feed_batch(self, feed_map: Dict[str, str]):
-        """Register multiple RSS feeds at once"""
-        self.rss_feed_urls.update(feed_map)
-        logger.info(f"Registered {len(feed_map)} RSS feeds")
-    async def get_tweets_from_source(self, source: NewsSource, limit: int = 20, retries: int = 3) -> List[Tweet]:
-        """Get tweets from a specific Twitter source using RSS feed."""
-        cache_key = self._get_cache_key(source.twitter_handle, limit)
-        # Check cache first
-        cached_tweets = self._get_from_cache(cache_key)
-        if cached_tweets:
-            logger.debug(f"Returning cached tweets for {source.twitter_handle}")
-            return cached_tweets
-        self.stats["requests"] += 1
-        # Check if we have a registered RSS feed for this Twitter handle
-        rss_url = self.rss_feed_urls.get(source.twitter_handle)
-        if not rss_url:
-            logger.warning(f"No RSS feed registered for {source.twitter_handle}")
-            return []
-        # Extract tweets with retry logic
-        tweets = []
-        for attempt in range(retries + 1):
-            try:
-                logger.info(f"Fetching tweets from RSS feed for {source.twitter_handle} (attempt {attempt + 1}/{retries + 1})")
-                # Add cache-busting parameter
-                params = {"_": str(int(time.time()))}
-                response = await self.client.get(rss_url, params=params)
-                if response.status_code == 200:
-                    # Parse the RSS JSON feed
-                    try:
-                        self.stats["success"] += 1
-                        rss_data = response.json()
-                        # Parse items from the feed
-                        if "items" in rss_data:
-                            items = rss_data["items"][:limit]
-                            for item in items:
-                                try:
-                                    # Extract tweet ID from the URL
-                                    tweet_id = item.get("id", "").split("/")[-1]
-                                    if not tweet_id:
-                                        continue
-                                    # Extract tweet text
-                                    tweet_text = item.get("content_text", item.get("title", ""))
-                                    # Extract timestamp
-                                    date_str = item.get("date_published", "")
-                                    try:
-                                        # Convert to datetime then strip timezone info to make naive
-                                        dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
-                                        created_at = dt.replace(tzinfo=None)
-                                    except (ValueError, TypeError):
-                                        created_at = datetime.now()
-                                    # Extract engagement metrics if available
-                                    engagement = {"likes": 0, "retweets": 0, "replies": 0, "views": 0}
-                                    # Try to extract engagement from extensions or additional fields
-                                    if "x_metadata" in item:
-                                        x_data = item["x_metadata"]
-                                        engagement["likes"] = x_data.get("likes", 0)
-                                        engagement["retweets"] = x_data.get("retweets", 0)
-                                        engagement["replies"] = x_data.get("replies", 0)
-                                        engagement["views"] = x_data.get("views", 0)
-                                    # Construct tweet URL
-                                    tweet_url = item.get("url", f"https://x.com/{source.twitter_handle}/status/{tweet_id}")
-                                    tweets.append(
-                                        Tweet(
-                                            id=tweet_id,
-                                            text=tweet_text,
-                                            author=source.twitter_handle,
-                                            created_at=created_at,
-                                            engagement=engagement,
-                                            url=tweet_url
-                                        )
-                                    )
-                                except Exception as e:
-                                    logger.error(f"Error processing tweet from RSS for {source.twitter_handle}: {str(e)}")
-                        # Cache the results
-                        if tweets:
-                            self._save_to_cache(cache_key, tweets)
-                            logger.info(f"Fetched and cached {len(tweets)} tweets from RSS feed for {source.twitter_handle}")
-                        return tweets
-                    except json.JSONDecodeError:
-                        logger.error(f"Invalid JSON response from RSS feed for {source.twitter_handle}")
-                        self.stats["errors"] += 1
-                        if attempt < retries:
-                            await asyncio.sleep(2)
-                            continue
-                        else:
-                            return []
-                else:
-                    # HTTP error
-                    self.stats["errors"] += 1
-                    logger.error(f"Failed to fetch RSS feed for {source.twitter_handle}: HTTP {response.status_code}")
-                    if attempt < retries:
-                        await asyncio.sleep(5)
-                        continue
-                    else:
-                        return []
-            except Exception as e:
-                self.stats["errors"] += 1
-                logger.error(f"Error fetching RSS feed for {source.twitter_handle}: {str(e)}")
-                if attempt < retries:
-                    await asyncio.sleep(5)
-                    continue
-        return []  # Return empty list if all retries failed
-    async def get_related_tweets(self, keywords: List[str], days_back: int = 2) -> List[Tweet]:
-        """
-        Get tweets related to specific keywords from trusted news sources only.
-        """
-        all_tweets = []
-        # Create naive datetime for consistent comparisons
-        cutoff_date = datetime.now() - timedelta(days=days_back)
-        # Filter to active sources that have RSS feeds
-        active_sources = [source for source in self.news_sources
-                          if source.is_active and source.twitter_handle in self.rss_feed_urls]
-        # Sort sources by reliability score (prioritize higher scores)
-        active_sources.sort(key=lambda s: s.reliability_score, reverse=True)
-        logger.info(f"Collecting tweets from {len(active_sources)} trusted news sources with RSS feeds")
-        # Process sources in parallel
-        tasks = []
-        for source in active_sources:
-            tasks.append(self.get_tweets_from_source(source, limit=50))
-        source_tweets_list = await asyncio.gather(*tasks)
-        # Process results
-        for source_tweets in source_tweets_list:
-            # Filter tweets by keywords and date
-            for tweet in source_tweets:
-                try:
-                    # Make sure we're comparing naive datetimes
-                    tweet_date = tweet.created_at
-                    if hasattr(tweet_date, 'tzinfo') and tweet_date.tzinfo is not None:
-                        tweet_date = tweet_date.replace(tzinfo=None)
-                    if (tweet_date >= cutoff_date and
-                        any(keyword.lower() in tweet.text.lower() for keyword in keywords)):
-                        all_tweets.append(tweet)
-                except Exception as e:
-                    logger.error(f"Error processing tweet during filtering: {str(e)}")
-        # If we have very few results, try with more relaxed filtering
-        if len(all_tweets) < 5 and active_sources:
-            logger.info("Few relevant tweets found, trying more relaxed filtering")
-            # Process all tweets again with more relaxed keyword matching
-            for source_tweets in source_tweets_list:
-                for tweet in source_tweets:
-                    try:
-                        # Make sure we're comparing naive datetimes
-                        tweet_date = tweet.created_at
-                        if hasattr(tweet_date, 'tzinfo') and tweet_date.tzinfo is not None:
-                            tweet_date = tweet_date.replace(tzinfo=None)
-                        if tweet_date >= cutoff_date:
-                            for keyword in keywords:
-                                # Split keyword into parts and check if any part matches
-                                keyword_parts = keyword.lower().split()
-                                if any(part in tweet.text.lower() for part in keyword_parts if len(part) > 3):
-                                    if tweet.id not in [t.id for t in all_tweets]:
-                                        all_tweets.append(tweet)
-                                    break
-                    except Exception as e:
-                        logger.error(f"Error during relaxed filtering: {str(e)}")
-        # Sort by recency
-        all_tweets.sort(key=lambda x: x.created_at, reverse=True)
-        logger.info(f"Found {len(all_tweets)} tweets from trusted sources related to keywords: {keywords}")
-        return all_tweets
-    def update_sources(self, sources: List[NewsSource]) -> None:
-        """Update the list of trusted news sources."""
-        self.news_sources = sources
-        # Clear cache when sources are updated
-        self.in_memory_cache.clear()
-        logger.info(f"Updated trusted news sources. New count: {len(sources)}")
-    def get_sources(self) -> List[NewsSource]:
-        """Get the current list of trusted news sources."""
-        return self.news_sources
-    async def close(self):
-        """Clean up resources."""
-        if self.client:
             await self.client.aclose()

+import asyncio
+import json
+import os
+import time
+from datetime import datetime, timedelta, timezone
+from typing import Dict, List, Optional
+import httpx
+from cachetools import TTLCache
+from loguru import logger
+from models import NewsSource, Tweet
+class RssTwitterService:
+    """Service for collecting tweets via RSS feeds."""
+    def __init__(self):
+        self.cache_expiry = int(os.getenv("CACHE_EXPIRY_MINUTES", 120))
+        # HTTP client for making requests
+        self.client = None
+        # Enhanced cache with TTL and persistence
+        self.tweet_cache_dir = os.path.join(os.path.dirname(__file__), ".tweet_cache")
+        os.makedirs(self.tweet_cache_dir, exist_ok=True)
+        self.in_memory_cache = TTLCache(maxsize=100, ttl=self.cache_expiry * 60)
+        # Statistics and monitoring
+        self.stats = {
+            "requests": 0,
+            "cache_hits": 0,
+            "errors": 0,
+            "success": 0
+        }
+        self.last_stats_reset = time.time()
+        # Map of Twitter handles to RSS feed URLs - directly added
+        self.rss_feed_urls = {
+            "sidhant": "https://rsshub.app/twitter/user/sidhant",
+            "ShivAroor": "https://rsshub.app/twitter/user/ShivAroor",
+            "IAF_MCC": "https://rsshub.app/twitter/user/IAF_MCC",
+            "adgpi": "https://rsshub.app/twitter/user/adgpi",
+            "SpokespersonMoD": "https://rsshub.app/twitter/user/SpokespersonMoD",
+            "MIB_India": "https://rsshub.app/twitter/user/MIB_India",
+        }
+        # Default trusted news sources
+        self.news_sources = [
+            NewsSource(name="Shiv Aroor", twitter_handle="ShivAroor", country="India", reliability_score=0.85),
+            NewsSource(name="Sidhant Sibal", twitter_handle="sidhant", country="India", reliability_score=0.85),
+            NewsSource(name="Indian Air Force", twitter_handle="IAF_MCC", country="India", reliability_score=0.95),
+            NewsSource(name="Indian Army", twitter_handle="adgpi", country="India", reliability_score=0.95),
+            NewsSource(name="Indian Defence Ministry", twitter_handle="SpokespersonMoD", country="India", reliability_score=0.95),
+            NewsSource(name="MIB India", twitter_handle="MIB_India", country="India", reliability_score=0.95),
+            NewsSource(name="Indian External Affairs Minister", twitter_handle="DrSJaishankar", country="India", reliability_score=0.95),
+        ]
+    async def initialize(self) -> bool:
+        """Initialize the Twitter service with RSS feed capability."""
+        try:
+            logger.info("Initializing Twitter service with RSS feed capability")
+            # Initialize HTTP client
+            self.client = httpx.AsyncClient(
+                timeout=30.0,
+                follow_redirects=True,
+                http2=True
+            )
+            # Log the pre-registered RSS feeds
+            logger.info(f"Pre-registered {len(self.rss_feed_urls)} RSS feeds for Twitter handles")
+            # Schedule background maintenance
+            asyncio.create_task(self._background_maintenance())
+            logger.info("RSS Twitter service initialized successfully")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to initialize RSS Twitter service: {str(e)}")
+            return False
+    async def _background_maintenance(self):
+        """Run background maintenance tasks"""
+        while True:
+            try:
+                # Wait between maintenance cycles
+                await asyncio.sleep(900)  # 15 minutes
+                # Log statistics
+                self._log_statistics()
+                # Clean up cache files
+                self._cleanup_expired_cache()
+                # Reset statistics periodically
+                if time.time() - self.last_stats_reset > 3600:  # Reset every hour
+                    self.stats = {key: 0 for key in self.stats}
+                    self.last_stats_reset = time.time()
+            except Exception as e:
+                logger.error(f"Error in background maintenance: {str(e)}")
+    def _log_statistics(self):
+        """Log service statistics"""
+        total_requests = max(1, self.stats["requests"])
+        cache_hit_rate = self.stats["cache_hits"] / total_requests * 100
+        error_rate = self.stats["errors"] / total_requests * 100
+        logger.info(f"RssTwitterService stats - Requests: {total_requests}, " +
+                    f"Cache hits: {self.stats['cache_hits']} ({cache_hit_rate:.1f}%), " +
+                    f"Errors: {self.stats['errors']} ({error_rate:.1f}%)")
+    def _cleanup_expired_cache(self):
+        """Clean up expired cache files"""
+        now = time.time()
+        expiry_time = self.cache_expiry * 60
+        try:
+            for filename in os.listdir(self.tweet_cache_dir):
+                if not filename.endswith('.json'):
+                    continue
+                file_path = os.path.join(self.tweet_cache_dir, filename)
+                try:
+                    file_modified_time = os.path.getmtime(file_path)
+                    if now - file_modified_time > expiry_time:
+                        os.remove(file_path)
+                        logger.debug(f"Removed expired cache file: {filename}")
+                except Exception as e:
+                    logger.error(f"Error cleaning up cache file {filename}: {e}")
+        except Exception as e:
+            logger.error(f"Error during cache cleanup: {e}")
+    def _get_cache_key(self, twitter_handle, limit):
+        """Generate a cache key for a specific Twitter source"""
+        return f"{twitter_handle}_{limit}"
+    def _get_cache_path(self, cache_key):
+        """Get filesystem path for a cache key"""
+        # Create a safe filename from the cache key
+        import re
+        safe_key = re.sub(r'[^a-zA-Z0-9_-]', '_', cache_key)
+        return os.path.join(self.tweet_cache_dir, f"{safe_key}.json")
+    def _get_from_cache(self, cache_key):
+        """Get tweets from cache (memory or disk)"""
+        # Check memory cache first
+        if cache_key in self.in_memory_cache:
+            self.stats["cache_hits"] += 1
+            return self.in_memory_cache[cache_key]
+        # Check disk cache
+        cache_path = self._get_cache_path(cache_key)
+        if os.path.exists(cache_path):
+            try:
+                with open(cache_path, 'r') as f:
+                    cache_data = json.load(f)
+                # Check if cache is still valid
+                if time.time() - cache_data['timestamp'] < self.cache_expiry * 60:
+                    # Convert dictionaries back to Tweet objects
+                    tweets = []
+                    for tweet_dict in cache_data['tweets']:
+                        # Parse created_at back to datetime if it's stored as a string
+                        if 'created_at' in tweet_dict and isinstance(tweet_dict['created_at'], str):
+                            try:
+                                # Strip timezone info to make naive datetime
+                                dt = datetime.fromisoformat(tweet_dict['created_at'].replace('Z', '+00:00'))
+                                tweet_dict['created_at'] = dt.replace(tzinfo=None)
+                            except ValueError:
+                                tweet_dict['created_at'] = datetime.now()
+                        tweets.append(Tweet(**tweet_dict))
+                    # Restore to memory cache and return
+                    self.in_memory_cache[cache_key] = tweets
+                    self.stats["cache_hits"] += 1
+                    return tweets
+                else:
+                    # Cache expired, remove file
+                    os.remove(cache_path)
+            except Exception as e:
+                logger.error(f"Error reading cache file {cache_path}: {e}")
+        return None
+    def _save_to_cache(self, cache_key, tweets):
+        """Save tweets to cache (memory and disk)"""
+        # Save to memory cache
+        self.in_memory_cache[cache_key] = tweets
+        # Convert tweets to dictionaries for JSON serialization
+        tweet_dicts = []
+        for tweet in tweets:
+            # Make sure created_at is serializable
+            if hasattr(tweet.created_at, 'tzinfo') and tweet.created_at.tzinfo is not None:
+                # Convert to UTC and make naive for consistent comparisons
+                created_at = tweet.created_at.astimezone(timezone.utc).replace(tzinfo=None)
+            else:
+                created_at = tweet.created_at
+            tweet_dicts.append({
+                'id': tweet.id,
+                'text': tweet.text,
+                'author': tweet.author,
+                'created_at': created_at.isoformat() if hasattr(created_at, 'isoformat') else str(created_at),
+                'engagement': tweet.engagement,
+                'url': tweet.url
+            })
+        # Save to disk cache
+        cache_path = self._get_cache_path(cache_key)
+        try:
+            with open(cache_path, 'w') as f:
+                json.dump({
+                    'tweets': tweet_dicts,
+                    'timestamp': time.time()
+                }, f)
+        except Exception as e:
+            logger.error(f"Error writing to cache file {cache_path}: {e}")
+    def register_rss_feed(self, twitter_handle: str, rss_url: str):
+        """Register an RSS feed URL for a specific Twitter handle"""
+        self.rss_feed_urls[twitter_handle] = rss_url
+        logger.info(f"Registered RSS feed for {twitter_handle}: {rss_url}")
+    def register_rss_feed_batch(self, feed_map: Dict[str, str]):
+        """Register multiple RSS feeds at once"""
+        self.rss_feed_urls.update(feed_map)
+        logger.info(f"Registered {len(feed_map)} RSS feeds")
+    async def get_tweets_from_source(self, source: NewsSource, limit: int = 20, retries: int = 3) -> List[Tweet]:
+        """Get tweets from a specific Twitter source using RSS feed."""
+        cache_key = self._get_cache_key(source.twitter_handle, limit)
+        # Check cache first
+        cached_tweets = self._get_from_cache(cache_key)
+        if cached_tweets:
+            logger.debug(f"Returning cached tweets for {source.twitter_handle}")
+            return cached_tweets
+        self.stats["requests"] += 1
+        # Check if we have a registered RSS feed for this Twitter handle
+        rss_url = self.rss_feed_urls.get(source.twitter_handle)
+        if not rss_url:
+            logger.warning(f"No RSS feed registered for {source.twitter_handle}")
+            return []
+        # Extract tweets with retry logic
+        tweets = []
+        for attempt in range(retries + 1):
+            try:
+                logger.info(f"Fetching tweets from RSS feed for {source.twitter_handle} (attempt {attempt + 1}/{retries + 1})")
+                # Add cache-busting parameter
+                params = {"_": str(int(time.time()))}
+                response = await self.client.get(rss_url, params=params)
+                if response.status_code == 200:
+                    # Parse the RSS JSON feed
+                    try:
+                        self.stats["success"] += 1
+                        rss_data = response.json()
+                        # Parse items from the feed
+                        if "items" in rss_data:
+                            items = rss_data["items"][:limit]
+                            for item in items:
+                                try:
+                                    # Extract tweet ID from the URL
+                                    tweet_id = item.get("id", "").split("/")[-1]
+                                    if not tweet_id:
+                                        continue
+                                    # Extract tweet text
+                                    tweet_text = item.get("content_text", item.get("title", ""))
+                                    # Extract timestamp
+                                    date_str = item.get("date_published", "")
+                                    try:
+                                        # Convert to datetime then strip timezone info to make naive
+                                        dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+                                        created_at = dt.replace(tzinfo=None)
+                                    except (ValueError, TypeError):
+                                        created_at = datetime.now()
+                                    # Extract engagement metrics if available
+                                    engagement = {"likes": 0, "retweets": 0, "replies": 0, "views": 0}
+                                    # Try to extract engagement from extensions or additional fields
+                                    if "x_metadata" in item:
+                                        x_data = item["x_metadata"]
+                                        engagement["likes"] = x_data.get("likes", 0)
+                                        engagement["retweets"] = x_data.get("retweets", 0)
+                                        engagement["replies"] = x_data.get("replies", 0)
+                                        engagement["views"] = x_data.get("views", 0)
+                                    # Construct tweet URL
+                                    tweet_url = item.get("url", f"https://x.com/{source.twitter_handle}/status/{tweet_id}")
+                                    tweets.append(
+                                        Tweet(
+                                            id=tweet_id,
+                                            text=tweet_text,
+                                            author=source.twitter_handle,
+                                            created_at=created_at,
+                                            engagement=engagement,
+                                            url=tweet_url
+                                        )
+                                    )
+                                except Exception as e:
+                                    logger.error(f"Error processing tweet from RSS for {source.twitter_handle}: {str(e)}")
+                        # Cache the results
+                        if tweets:
+                            self._save_to_cache(cache_key, tweets)
+                            logger.info(f"Fetched and cached {len(tweets)} tweets from RSS feed for {source.twitter_handle}")
+                        return tweets
+                    except json.JSONDecodeError:
+                        logger.error(f"Invalid JSON response from RSS feed for {source.twitter_handle}")
+                        self.stats["errors"] += 1
+                        if attempt < retries:
+                            await asyncio.sleep(2)
+                            continue
+                        else:
+                            return []
+                else:
+                    # HTTP error
+                    self.stats["errors"] += 1
+                    logger.error(f"Failed to fetch RSS feed for {source.twitter_handle}: HTTP {response.status_code}")
+                    if attempt < retries:
+                        await asyncio.sleep(5)
+                        continue
+                    else:
+                        return []
+            except Exception as e:
+                self.stats["errors"] += 1
+                logger.error(f"Error fetching RSS feed for {source.twitter_handle}: {str(e)}")
+                if attempt < retries:
+                    await asyncio.sleep(5)
+                    continue
+        return []  # Return empty list if all retries failed
+    async def get_related_tweets(self, keywords: List[str], days_back: int = 2) -> List[Tweet]:
+        """
+        Get tweets related to specific keywords from trusted news sources only.
+        """
+        all_tweets = []
+        # Create naive datetime for consistent comparisons
+        cutoff_date = datetime.now() - timedelta(days=days_back)
+        # Filter to active sources that have RSS feeds
+        active_sources = [source for source in self.news_sources
+                          if source.is_active and source.twitter_handle in self.rss_feed_urls]
+        # Sort sources by reliability score (prioritize higher scores)
+        active_sources.sort(key=lambda s: s.reliability_score, reverse=True)
+        logger.info(f"Collecting tweets from {len(active_sources)} trusted news sources with RSS feeds")
+        # Process sources in parallel
+        tasks = []
+        for source in active_sources:
+            tasks.append(self.get_tweets_from_source(source, limit=50))
+        source_tweets_list = await asyncio.gather(*tasks)
+        # Process results
+        for source_tweets in source_tweets_list:
+            # Filter tweets by keywords and date
+            for tweet in source_tweets:
+                try:
+                    # Make sure we're comparing naive datetimes
+                    tweet_date = tweet.created_at
+                    if hasattr(tweet_date, 'tzinfo') and tweet_date.tzinfo is not None:
+                        tweet_date = tweet_date.replace(tzinfo=None)
+                    if (tweet_date >= cutoff_date and
+                        any(keyword.lower() in tweet.text.lower() for keyword in keywords)):
+                        all_tweets.append(tweet)
+                except Exception as e:
+                    logger.error(f"Error processing tweet during filtering: {str(e)}")
+        # If we have very few results, try with more relaxed filtering
+        if len(all_tweets) < 5 and active_sources:
+            logger.info("Few relevant tweets found, trying more relaxed filtering")
+            # Process all tweets again with more relaxed keyword matching
+            for source_tweets in source_tweets_list:
+                for tweet in source_tweets:
+                    try:
+                        # Make sure we're comparing naive datetimes
+                        tweet_date = tweet.created_at
+                        if hasattr(tweet_date, 'tzinfo') and tweet_date.tzinfo is not None:
+                            tweet_date = tweet_date.replace(tzinfo=None)
+                        if tweet_date >= cutoff_date:
+                            for keyword in keywords:
+                                # Split keyword into parts and check if any part matches
+                                keyword_parts = keyword.lower().split()
+                                if any(part in tweet.text.lower() for part in keyword_parts if len(part) > 3):
+                                    if tweet.id not in [t.id for t in all_tweets]:
+                                        all_tweets.append(tweet)
+                                    break
+                    except Exception as e:
+                        logger.error(f"Error during relaxed filtering: {str(e)}")
+        # Sort by recency
+        all_tweets.sort(key=lambda x: x.created_at, reverse=True)
+        logger.info(f"Found {len(all_tweets)} tweets from trusted sources related to keywords: {keywords}")
+        return all_tweets
+    def update_sources(self, sources: List[NewsSource]) -> None:
+        """Update the list of trusted news sources."""
+        self.news_sources = sources
+        # Clear cache when sources are updated
+        self.in_memory_cache.clear()
+        logger.info(f"Updated trusted news sources. New count: {len(sources)}")
+    def get_sources(self) -> List[NewsSource]:
+        """Get the current list of trusted news sources."""
+        return self.news_sources
+    async def close(self):
+        """Clean up resources."""
+        if self.client:
             await self.client.aclose()