Spaces:

abhisheksan
/

westernfront

Running

App Files Files Community

abhisheksan commited on 13 days ago

Commit

1b855b8

verified ·

1 Parent(s): 4dfce31

Upload 5 files

Browse files

Files changed (5) hide show

analysis_service.py +100 -297
app.py +247 -291
models.py +25 -14
reddit_service.py +135 -0
requirements.txt +9 -11

analysis_service.py CHANGED Viewed

@@ -9,12 +9,12 @@ import google.generativeai as genai
 from loguru import logger
 from tenacity import RetryError, retry, stop_after_attempt, wait_exponential
-from models import ConflictAnalysis, KeyDevelopment, TensionLevel, Tweet
-from twitter_service import RssTwitterService
 class AnalysisService:
-    """Service for analyzing tweets using Google's Gemini AI."""
     def __init__(self):
         self.api_key = os.getenv("GEMINI_API_KEY")
@@ -25,36 +25,19 @@ class AnalysisService:
             "India-Pakistan", "cross-border", "terrorism", "bilateral relations"
         ]
-        # Initialize RSS Twitter Service
-        self.twitter_service = RssTwitterService()
-        # Initialize Gemini AI
-        self.initialize()
-    async def initialize_services(self) -> bool:
-        """Initialize all required services."""
-        gemini_success = self.initialize()
-        twitter_success = await self.twitter_service.initialize()
-        if gemini_success and twitter_success:
-            logger.info("All services initialized successfully")
-            return True
-        else:
-            logger.error("Failed to initialize one or more services")
-            return False
-    def initialize(self) -> bool:
         """Initialize the Gemini AI client."""
         if not self.api_key:
             logger.error("GEMINI_API_KEY not provided")
             return False
         try:
             logger.info("Initializing Gemini AI")
             genai.configure(api_key=self.api_key)
-            # Configure model with lower temperature for more factual responses
             generation_config = {
-                "temperature": 0.1,
                 "top_p": 0.95,
                 "top_k": 40
             }
@@ -65,359 +48,179 @@ class AnalysisService:
             logger.error(f"Failed to initialize Gemini AI: {str(e)}")
             return False
-    def _prepare_prompt(self, tweets: List[Tweet]) -> str:
-        """Prepare the prompt for analysis with intelligence sources data."""
-        # Sort tweets by recency to help with latest status identification
-        sorted_tweets = sorted(tweets, key=lambda x: x.created_at if hasattr(x, 'created_at') else datetime.now(), reverse=True)
-        # Format source data without numbering that could leak into responses
         source_entries = []
-        for i, tweet in enumerate(sorted_tweets):
-            timestamp = tweet.created_at.strftime("%Y-%m-%d %H:%M UTC") if hasattr(tweet, 'created_at') else "unknown time"
-            source_entries.append(f"SOURCE: @{tweet.author} | TIME: {timestamp}\n{tweet.text}")
         intelligence_data = "\n\n---\n\n".join(source_entries)
         prompt = f"""
-        INTELLIGENCE BRIEF: INDIA-PAKISTAN SITUATION ANALYSIS
-        DATE: {datetime.now().strftime("%Y-%m-%d")}
-        CLASSIFICATION: STRATEGIC ASSESSMENT
-        INTELLIGENCE SOURCES:
         {intelligence_data}
-        ANALYSIS REQUIREMENTS:
-        - Produce a professional intelligence assessment on India-Pakistan relations
-        - Write in formal, analytical language suitable for diplomatic/security briefings
-        - Maintain strict neutrality and objectivity throughout the analysis
-        - Base all assessments ONLY on information from the provided sources
-        - Do NOT refer to sources as "Data Points" or use numbered references
-        - When citing sources, use only author handles (e.g., "according to @BBCWorld")
-        - Focus on substantiated facts rather than speculation
-        - Organize information by significance rather than chronology
-        - Assess tension levels based on concrete actions and statements
-        - Structure all outputs in clear, professional format
-        REQUIRED OUTPUT FORMAT:
         {{
-            "latest_status": "Most recent significant development with source attribution",
-            "situation_summary": "Concise, factual summary of current situation without reference numbering",
             "key_developments": [
                 {{
-                    "title": "Specific event title written in formal intelligence style",
-                    "description": "Professional analysis with proper source attribution",
-                    "sources": ["@source1", "@source2"]
                 }}
             ],
             "reliability_assessment": {{
-                "source_credibility": "Assessment of source reliability",
-                "information_gaps": "Critical gaps in intelligence coverage",
-                "confidence_rating": "HIGH|MEDIUM|LOW with justification"
             }},
             "regional_implications": {{
-                "security": "Security implications written in formal analytical style",
-                "diplomatic": "Diplomatic consequences with proper source attribution",
-                "economic": "Economic impacts expressed objectively"
             }},
             "tension_level": "LOW|MEDIUM|HIGH|CRITICAL",
-            "tension_rationale": "Professional justification for tension assessment"
         }}
-        CRITICAL DIRECTIVES:
-        - Generate ONLY valid JSON without additional text or markdown formatting
-        - DO NOT use phrase "Data Point" or numbered references in ANY output field
-        - Use professional intelligence terminology throughout
-        - Attribute information to sources by handle (e.g., "@BBCWorld reports") rather than numbers
-        - Write in concise, authoritative style appropriate for intelligence briefings
-        - Avoid personal opinions, narrative storytelling, or journalistic commentary
-        - Maintain consistent formal tone throughout all sections
         """
         return prompt
-    @retry(wait=wait_exponential(min=1, max=10), stop=stop_after_attempt(3))
-    async def _call_gemini(self, prompt: str) -> Dict:
         """Call the Gemini API with retry logic and improved parsing."""
         if not self.model:
-            if not self.initialize():
-                logger.error("Could not analyze tweets, Gemini AI not initialized")
                 raise Exception("Gemini AI initialization failed")
         try:
             logger.info("Calling Gemini API for conflict analysis")
             response = await self.model.generate_content_async(prompt)
             result = response.text
-            # Better JSON extraction with multiple patterns
             json_match = re.search(r'```(?:json)?\n(.*?)\n```', result, re.DOTALL)
             if json_match:
-                result = json_match.group(1)
             else:
-                # Try to find JSON objects with or without formatting
-                json_pattern = r'({[\s\S]*})'
-                json_match = re.search(json_pattern, result)
-                if json_match:
-                    result = json_match.group(1)
-            # Clean the result of any non-JSON content
-            result = re.sub(r'```', '', result).strip()
-            # Parse JSON with error handling
-            try:
-                analysis_data = json.loads(result)
-                # Additional cleaning to remove "Data Point" references from all string fields
-                for key, value in analysis_data.items():
-                    if isinstance(value, str):
-                        analysis_data[key] = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
-                        analysis_data[key] = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
-                        analysis_data[key] = analysis_data[key].strip()
-                # Clean nested dictionaries
-                for key, value in analysis_data.items():
-                    if isinstance(value, dict):
-                        for subkey, subvalue in value.items():
-                            if isinstance(subvalue, str):
-                                value[subkey] = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', subvalue)
-                                value[subkey] = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', subvalue)
-                                value[subkey] = value[subkey].strip()
-                logger.info("Successfully received and parsed Gemini response")
-                return analysis_data
-            except json.JSONDecodeError as e:
-                logger.error(f"JSON parsing error: {str(e)}")
-                # Attempt cleanup and retry parsing
-                result = re.sub(r'[\n\r\t]', ' ', result)
-                result = re.search(r'({.*})', result).group(1) if re.search(r'({.*})', result) else result
-                analysis_data = json.loads(result)
-                logger.info("Successfully parsed Gemini response after cleanup")
-                return analysis_data
         except Exception as e:
             logger.error(f"Error calling Gemini API: {str(e)}")
-            logger.debug(f"Raw response content: {result if 'result' in locals() else 'No response'}")
             raise
     def _extract_tension_level(self, level_text: str) -> TensionLevel:
-        """Extract tension level enum from text."""
         level_text = level_text.lower()
-        if "critical" in level_text:
-            return TensionLevel.CRITICAL
-        elif "high" in level_text:
-            return TensionLevel.HIGH
-        elif "medium" in level_text:
-            return TensionLevel.MEDIUM
-        else:
-            return TensionLevel.LOW
     def _process_key_developments(self, developments_data: List[Dict]) -> List[KeyDevelopment]:
-        """Process key developments from API response."""
         key_developments = []
-        if not developments_data:
-            return key_developments
         for dev in developments_data:
-            # Clean any Data Point references if they slipped through
-            title = dev.get("title", "Unnamed Development")
-            description = dev.get("description", "No description provided")
-            # Remove any Data Point references
-            title = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', title)
-            description = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', description)
-            title = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', title)
-            description = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', description)
             key_developments.append(
                 KeyDevelopment(
-                    title=title.strip(),
-                    description=description.strip(),
                     sources=dev.get("sources", []),
                     timestamp=datetime.now()
                 )
             )
         return key_developments
-    def _format_reliability_assessment(self, reliability_data: Dict) -> str:
-        """Format reliability assessment data into a structured string."""
-        if isinstance(reliability_data, str):
-            # Clean any Data Point references
-            reliability_data = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', reliability_data)
-            reliability_data = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', reliability_data)
-            return reliability_data.strip()
-        if isinstance(reliability_data, dict):
-            sections = []
-            if "source_credibility" in reliability_data:
-                value = reliability_data["source_credibility"]
-                value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
-                value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
-                sections.append(f"SOURCE CREDIBILITY: {value.strip()}")
-            if "information_gaps" in reliability_data:
-                value = reliability_data["information_gaps"]
-                value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
-                value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
-                sections.append(f"INFORMATION GAPS: {value.strip()}")
-            if "confidence_rating" in reliability_data:
-                value = reliability_data["confidence_rating"]
-                value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
-                value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
-                sections.append(f"CONFIDENCE: {value.strip()}")
-            if sections:
-                return "\n\n".join(sections)
-        return "Assessment unavailable"
-    def _format_regional_implications(self, implications_data: Dict) -> str:
-        """Format regional implications data into a structured string."""
-        if isinstance(implications_data, str):
-            # Clean any Data Point references
-            implications_data = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', implications_data)
-            implications_data = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', implications_data)
-            return implications_data.strip()
-        if isinstance(implications_data, dict):
-            sections = []
-            if "security" in implications_data:
-                value = implications_data["security"]
-                value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
-                value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
-                sections.append(f"SECURITY: {value.strip()}")
-            if "diplomatic" in implications_data:
-                value = implications_data["diplomatic"]
-                value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
-                value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
-                sections.append(f"DIPLOMATIC: {value.strip()}")
-            if "economic" in implications_data:
-                value = implications_data["economic"]
-                value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
-                value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
-                sections.append(f"ECONOMIC: {value.strip()}")
-            if sections:
-                return "\n\n".join(sections)
-        return "Implications unavailable"
-    async def analyze_tweets(self, tweets: List[Tweet], trigger: str = "scheduled") -> ConflictAnalysis:
-        """Analyze tweets using Gemini AI and generate a conflict analysis."""
-        if not tweets:
-            logger.warning("No tweets provided for analysis")
             return None
         try:
-            prompt = self._prepare_prompt(tweets)
             analysis_data = await self._call_gemini(prompt)
-            # Process and extract data with proper error handling
             key_developments = self._process_key_developments(analysis_data.get("key_developments", []))
-            # Format complex nested structures if present
-            reliability_assessment = self._format_reliability_assessment(
-                analysis_data.get("reliability_assessment", "No reliability assessment provided")
-            )
-            regional_implications = self._format_regional_implications(
-                analysis_data.get("regional_implications", "No regional implications provided")
-            )
-            # Extract tension rationale if available and clean it
-            tension_info = analysis_data.get("tension_level", "Low")
-            tension_info = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', tension_info)
-            tension_info = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', tension_info)
-            tension_rationale = analysis_data.get("tension_rationale", "")
-            tension_rationale = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', tension_rationale)
-            tension_rationale = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', tension_rationale)
-            # Combine tension level and rationale if both exist
-            if tension_rationale:
-                tension_display = f"{tension_info.strip()} - {tension_rationale.strip()}"
-            else:
-                tension_display = tension_info.strip()
-            # Get and clean the latest status
-            latest_status = analysis_data.get("latest_status", "No recent status update available")
-            latest_status = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', latest_status)
-            latest_status = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', latest_status)
-            # Clean situation summary
-            situation_summary = analysis_data.get("situation_summary", "No summary provided")
-            situation_summary = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', situation_summary)
-            situation_summary = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', situation_summary)
             analysis = ConflictAnalysis(
                 analysis_id=str(uuid.uuid4()),
                 generated_at=datetime.now(),
-                situation_summary=situation_summary.strip(),
                 key_developments=key_developments,
-                reliability_assessment=reliability_assessment,
-                regional_implications=regional_implications,
-                tension_level=self._extract_tension_level(tension_display),
-                source_tweets=tweets,
                 update_triggered_by=trigger,
-                latest_status=latest_status.strip()
             )
             logger.info(f"Generated conflict analysis with ID: {analysis.analysis_id}")
             return analysis
         except RetryError as e:
             logger.error(f"Failed to generate analysis after multiple retries: {str(e)}")
             return None
         except Exception as e:
-            logger.error(f"Unexpected error in tweet analysis: {str(e)}")
             return None
-    async def get_latest_tweets(self, days_back: int = 2) -> List[Tweet]:
-        """Get latest tweets related to the conflict using RSS feeds."""
-        try:
-            tweets = await self.twitter_service.get_related_tweets(self.search_keywords, days_back)
-            logger.info(f"Retrieved {len(tweets)} tweets related to the conflict")
-            return tweets
-        except Exception as e:
-            logger.error(f"Error retrieving tweets: {str(e)}")
-            return []
     async def generate_conflict_analysis(self, days_back: int = 2, trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
-        """Generate a complete conflict analysis using tweets from RSS feeds."""
-        tweets = await self.get_latest_tweets(days_back)
-        if not tweets:
-            logger.warning("No tweets found for conflict analysis")
             return None
-        return await self.analyze_tweets(tweets, trigger)
-    async def register_rss_feeds(self, feed_map: Dict[str, str]) -> None:
-        """Register RSS feeds for Twitter handles."""
-        self.twitter_service.register_rss_feed_batch(feed_map)
-    def register_rss_feed(self, twitter_handle: str, rss_url: str) -> None:
-        """Register an RSS feed for a Twitter handle."""
-        self.twitter_service.register_rss_feed(twitter_handle, rss_url)
     def get_search_keywords(self) -> List[str]:
-        """Get the current search keywords."""
         return self.search_keywords
-    def update_search_keywords(self, keywords: List[str]) -> None:
-        """Update the search keywords."""
         self.search_keywords = keywords
         logger.info(f"Updated search keywords. New count: {len(keywords)}")
-    def update_sources(self, sources) -> None:
-        """Update the news sources in the Twitter service."""
-        self.twitter_service.update_sources(sources)
     def get_sources(self):
-        """Get the current news sources from the Twitter service."""
-        return self.twitter_service.get_sources()
     async def close(self) -> None:
-        """Clean up resources."""
-        await self.twitter_service.close()

 from loguru import logger
 from tenacity import RetryError, retry, stop_after_attempt, wait_exponential
+from models import ConflictAnalysis, KeyDevelopment, TensionLevel, RedditPost
+from reddit_service import RedditService
 class AnalysisService:
+    """Service for analyzing Reddit posts using Google's Gemini AI."""
     def __init__(self):
         self.api_key = os.getenv("GEMINI_API_KEY")
             "India-Pakistan", "cross-border", "terrorism", "bilateral relations"
         ]
+        self.reddit_service = RedditService()
+        self.initialize_gemini()
+    def initialize_gemini(self) -> bool:
         """Initialize the Gemini AI client."""
         if not self.api_key:
             logger.error("GEMINI_API_KEY not provided")
             return False
         try:
             logger.info("Initializing Gemini AI")
             genai.configure(api_key=self.api_key)
             generation_config = {
+                "temperature": 0.2, # Slightly increased for more nuanced language
                 "top_p": 0.95,
                 "top_k": 40
             }
             logger.error(f"Failed to initialize Gemini AI: {str(e)}")
             return False
+    # --- THIS IS THE UPDATED METHOD ---
+    def _prepare_prompt(self, posts: List[RedditPost]) -> str:
+        """Prepare the prompt for analysis with Reddit post data."""
+        sorted_posts = sorted(posts, key=lambda x: x.created_at, reverse=True)
         source_entries = []
+        for i, post in enumerate(sorted_posts):
+            # The model will use this for context, but won't cite it directly
+            source_entries.append(f"RAW INTEL #{i+1}:\n{post.text}")
         intelligence_data = "\n\n---\n\n".join(source_entries)
         prompt = f"""
+        **TOP SECRET // FOR OFFICIAL USE ONLY**
+        **TO:** Strategic Intelligence Command
+        **FROM:** Senior Geopolitical Analyst (South Asia Desk)
+        **SUBJECT:** INDIA-PAKISTAN CONFLICT SITUATION REPORT
+        **DATE:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} UTC
+        **MISSION OBJECTIVE:**
+        Assume the persona of a senior geopolitical analyst. Your task is to synthesize raw, open-source intelligence feeds into a clinical and dispassionate intelligence product for a national security council. Your analysis must be objective, concise, and devoid of sensationalism. Focus exclusively on the provided intelligence to assess stability, identify threats, and determine the near-term trajectory of the conflict.
+        **RAW INTELLIGENCE FEEDS (FOR CONTEXT ONLY):**
+        ---
         {intelligence_data}
+        ---
+        **ANALYTICAL DIRECTIVES:**
+        1.  **Synthesize, Do Not Summarize:** Do not merely list what each source says. Integrate all data points into a coherent, holistic assessment.
+        2.  **Impersonal and Objective Tone:** Use formal, analytical language. Avoid emotive words, speculation, or personal opinions. Employ the active voice.
+        3.  **No Direct Attribution:** Your final report is a standalone intelligence product. **CRITICAL: DO NOT attribute any information to specific Reddit usernames or subreddits (e.g., "u/user reports...").** The provided intelligence is your source material, not something to be directly quoted.
+        4.  **Fact-Based Assessment:** All conclusions must be logically derived from the provided intelligence feeds. Do not introduce outside information.
+        **REQUIRED OUTPUT FORMAT (Strict JSON):**
+        Produce a single, valid JSON object with no markdown formatting. The structure must be as follows:
         {{
+            "latest_status": "A single, concise sentence describing the most recent, significant event.",
+            "situation_summary": "A 2-3 sentence executive summary of the current geopolitical and military situation. This is the 'bottom line up front' (BLUF).",
             "key_developments": [
                 {{
+                    "title": "A formal title for a key event (e.g., 'Cross-Border Artillery Exchange Reported Near Uri Sector').",
+                    "description": "A synthesized paragraph detailing the event, its context, and immediate impact. Integrate multiple sources implicitly.",
+                    "sources": ["Military Activity", "Diplomatic Statement"]
                 }}
             ],
             "reliability_assessment": {{
+                "source_credibility": "An assessment of the overall credibility of the provided intelligence, considering potential for bias or misinformation without naming sources.",
+                "information_gaps": "Identify what critical information is missing from the feeds (e.g., 'Official casualty figures unconfirmed.').",
+                "confidence_rating": "State your analytical confidence (HIGH, MEDIUM, or LOW) and provide a brief justification."
             }},
             "regional_implications": {{
+                "security": "Analysis of the immediate security and military implications.",
+                "diplomatic": "Analysis of the impact on diplomatic relations and international standing.",
+                "economic": "Analysis of potential economic consequences (e.g., market stability, trade disruptions)."
             }},
             "tension_level": "LOW|MEDIUM|HIGH|CRITICAL",
+            "tension_rationale": "A concise justification for the assessed tension level, referencing the key developments and their potential for escalation."
         }}
         """
         return prompt
+    @retry(wait=wait_exponential(min=2, max=60), stop=stop_after_attempt(3))
+    async def _call_gemini(self, prompt: str) -> Optional[Dict]:
         """Call the Gemini API with retry logic and improved parsing."""
         if not self.model:
+            if not self.initialize_gemini():
+                logger.error("Could not analyze posts, Gemini AI not initialized")
                 raise Exception("Gemini AI initialization failed")
         try:
             logger.info("Calling Gemini API for conflict analysis")
             response = await self.model.generate_content_async(prompt)
             result = response.text
             json_match = re.search(r'```(?:json)?\n(.*?)\n```', result, re.DOTALL)
             if json_match:
+                json_str = json_match.group(1)
             else:
+                json_str = result
+            return json.loads(json_str)
+        except json.JSONDecodeError as e:
+            logger.error(f"JSON parsing error: {e}. Raw response: {result}")
+            raise
         except Exception as e:
             logger.error(f"Error calling Gemini API: {str(e)}")
             raise
     def _extract_tension_level(self, level_text: str) -> TensionLevel:
         level_text = level_text.lower()
+        if "critical" in level_text: return TensionLevel.CRITICAL
+        if "high" in level_text: return TensionLevel.HIGH
+        if "medium" in level_text: return TensionLevel.MEDIUM
+        return TensionLevel.LOW
     def _process_key_developments(self, developments_data: List[Dict]) -> List[KeyDevelopment]:
         key_developments = []
+        if not developments_data: return key_developments
         for dev in developments_data:
             key_developments.append(
                 KeyDevelopment(
+                    title=dev.get("title", "Unnamed Development"),
+                    description=dev.get("description", "No description provided"),
                     sources=dev.get("sources", []),
                     timestamp=datetime.now()
                 )
             )
         return key_developments
+    async def analyze_posts(self, posts: List[RedditPost], trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
+        """Analyze Reddit posts using Gemini AI and generate a conflict analysis."""
+        if not posts:
+            logger.warning("No Reddit posts provided for analysis")
             return None
         try:
+            prompt = self._prepare_prompt(posts)
             analysis_data = await self._call_gemini(prompt)
+            if not analysis_data:
+                logger.error("Received no data from Gemini call.")
+                return None
             key_developments = self._process_key_developments(analysis_data.get("key_developments", []))
             analysis = ConflictAnalysis(
                 analysis_id=str(uuid.uuid4()),
                 generated_at=datetime.now(),
+                situation_summary=analysis_data.get("situation_summary", "No summary provided."),
                 key_developments=key_developments,
+                reliability_assessment=analysis_data.get("reliability_assessment", {}),
+                regional_implications=analysis_data.get("regional_implications", {}),
+                tension_level=self._extract_tension_level(analysis_data.get("tension_level", "LOW")),
+                tension_rationale=analysis_data.get("tension_rationale", "No rationale provided."),
+                source_posts=posts,
                 update_triggered_by=trigger,
+                latest_status=analysis_data.get("latest_status", "No recent status update available.")
             )
             logger.info(f"Generated conflict analysis with ID: {analysis.analysis_id}")
             return analysis
         except RetryError as e:
             logger.error(f"Failed to generate analysis after multiple retries: {str(e)}")
             return None
         except Exception as e:
+            logger.error(f"Unexpected error in post analysis: {str(e)}")
             return None
     async def generate_conflict_analysis(self, days_back: int = 2, trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
+        """Generate a complete conflict analysis using posts from Reddit."""
+        if not self.reddit_service.reddit:
+            if not self.reddit_service.initialize():
+                logger.error("Cannot generate analysis, Reddit Service failed to initialize.")
+                return None
+        posts = await self.reddit_service.get_related_posts(self.search_keywords, days_back)
+        if not posts:
+            logger.warning("No relevant Reddit posts found for conflict analysis")
             return None
+        return await self.analyze_posts(posts, trigger)
     def get_search_keywords(self) -> List[str]:
         return self.search_keywords
+    def update_search_keywords(self, keywords: List[str]):
         self.search_keywords = keywords
         logger.info(f"Updated search keywords. New count: {len(keywords)}")
+    def update_sources(self, sources):
+        self.reddit_service.update_sources(sources)
     def get_sources(self):
+        return self.reddit_service.get_sources()
     async def close(self) -> None:
+        await self.reddit_service.close()

app.py CHANGED Viewed

@@ -1,292 +1,248 @@
-import asyncio
-import os
-from datetime import datetime
-from typing import Dict, List, Optional
-from dotenv import load_dotenv
-from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, status
-from fastapi.middleware.cors import CORSMiddleware
-from loguru import logger
-from analysis_service import AnalysisService
-from twitter_service import RssTwitterService
-from models import (ConflictAnalysis, HealthCheck, NewsSource, TensionLevel,
-                    Tweet, UpdateRequest)
-# Load environment variables from .env file
-load_dotenv()
-# Configure logging
-os.makedirs("logs", exist_ok=True)
-logger.add("logs/app.log", rotation="500 MB", level=os.getenv("LOG_LEVEL", "INFO"))
-# Global readiness flag
-app_ready = False
-# Create FastAPI application
-app = FastAPI(
-    title="WesternFront API",
-    description="AI-powered conflict tracker for monitoring India-Pakistan tensions",
-    version="1.1.0"
-)
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # Adjust this for production
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Services
-twitter_service = RssTwitterService()
-analysis_service = AnalysisService()
-# In-memory store for latest analysis
-latest_analysis: Optional[ConflictAnalysis] = None
-last_update_time: Optional[datetime] = None
-async def get_twitter_service() -> RssTwitterService:
-    """Dependency to get the Twitter service."""
-    return twitter_service
-async def get_analysis_service() -> AnalysisService:
-    """Dependency to get the Analysis service."""
-    return analysis_service
-@app.on_event("startup")
-async def startup_event():
-    """Initialize services on startup."""
-    global app_ready
-    logger.info("Starting up WesternFront API")
-    try:
-        # Initialize Twitter service
-        twitter_initialized = await twitter_service.initialize()
-        if not twitter_initialized:
-            logger.warning("Twitter service initialization failed. Some features may not work.")
-        # Initialize Gemini AI service
-        analysis_service.initialize()
-        # Set analysis service's twitter service reference
-        analysis_service.twitter_service = twitter_service
-        # Schedule first update in background
-        asyncio.create_task(update_analysis_task("startup"))
-        # Set up periodic update task
-        asyncio.create_task(periodic_update())
-        # Mark application as ready to accept requests
-        app_ready = True
-        logger.info("Application ready to accept requests")
-    except Exception as e:
-        logger.error(f"Error during startup: {e}")
-        app_ready = False  # Keep app in not-ready state if startup fails
-@app.on_event("shutdown")
-async def shutdown_event():
-    """Clean up resources on shutdown."""
-    logger.info("Shutting down WesternFront API")
-    if twitter_service and hasattr(twitter_service, 'close'):
-        await twitter_service.close()
-    if analysis_service and hasattr(analysis_service, 'close'):
-        await analysis_service.close()
-async def update_analysis_task(trigger: str = "scheduled") -> None:
-    """Task to update the conflict analysis."""
-    global latest_analysis, last_update_time
-    try:
-        logger.info(f"Starting analysis update ({trigger})")
-        # Get tweets related to India-Pakistan conflict
-        keywords = analysis_service.get_search_keywords()
-        tweets = await twitter_service.get_related_tweets(keywords, days_back=2)
-        if not tweets:
-            logger.warning("No relevant tweets found for analysis")
-            return
-        logger.info(f"Found {len(tweets)} relevant tweets for analysis")
-        # Analyze tweets
-        analysis = await analysis_service.analyze_tweets(tweets, trigger)
-        if analysis:
-            latest_analysis = analysis
-            last_update_time = datetime.now()
-            logger.info(f"Analysis updated successfully. Tension level: {analysis.tension_level}")
-        else:
-            logger.error("Failed to generate analysis")
-    except Exception as e:
-        logger.error(f"Error in update_analysis_task: {str(e)}")
-async def periodic_update() -> None:
-    """Periodically update the analysis."""
-    update_interval = int(os.getenv("UPDATE_INTERVAL_MINUTES", 60))
-    while True:
-        try:
-            await asyncio.sleep(update_interval * 60)  # Convert to seconds
-            await update_analysis_task("scheduled")
-        except Exception as e:
-            logger.error(f"Error in periodic_update: {str(e)}")
-            await asyncio.sleep(300)  # Wait 5 minutes if there was an error
-@app.get("/", response_model=Dict)
-async def root():
-    """Root endpoint with basic information about the API."""
-    return {
-        "name": "WesternFront API",
-        "description": "AI-powered conflict tracker for India-Pakistan tensions",
-        "version": "1.1.0",
-        "status": "ready" if app_ready else "initializing"
-    }
-@app.get("/ready")
-async def readiness_check():
-    """Readiness check endpoint for probes and monitoring."""
-    if not app_ready:
-        raise HTTPException(status_code=503, detail="Application is starting up")
-    return {"status": "ready", "timestamp": datetime.now().isoformat()}
-@app.get("/health", response_model=HealthCheck)
-async def health_check():
-    """Health check endpoint."""
-    twitter_initialized = hasattr(twitter_service, 'client') and twitter_service.client is not None
-    gemini_initialized = analysis_service.model is not None
-    return HealthCheck(
-        status="healthy" if app_ready else "initializing",
-        version="1.1.0",
-        timestamp=datetime.now(),
-        last_update=last_update_time,
-        components_status={
-            "twitter_service": twitter_initialized,
-            "analysis_service": gemini_initialized
-        }
-    )
-@app.head("/health")
-async def health_check_head():
-    """Health check endpoint (HEAD method)."""
-    # This will use the same logic as the GET handler but won't return a response body
-    twitter_initialized = hasattr(twitter_service, 'client') and twitter_service.client is not None
-    gemini_initialized = analysis_service.model is not None
-    # For HEAD requests, FastAPI will strip the response body but keep status codes and headers
-    return HealthCheck(
-        status="healthy" if app_ready else "initializing",
-        version="1.1.0",
-        timestamp=datetime.now(),
-        last_update=last_update_time,
-        components_status={
-            "twitter_service": twitter_initialized,
-            "analysis_service": gemini_initialized
-        }
-    )
-@app.get("/analysis", response_model=Optional[ConflictAnalysis])
-async def get_latest_analysis():
-    """Get the latest conflict analysis."""
-    if not latest_analysis:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="No analysis available yet. Try triggering an update."
-        )
-    return latest_analysis
-@app.post("/analysis/update", response_model=Dict)
-async def trigger_update(request: UpdateRequest):
-    """Trigger an analysis update."""
-    if request.force:
-        # Clear cache to force fresh tweets
-        twitter_service.in_memory_cache.clear()
-    # Start update in background
-    asyncio.create_task(update_analysis_task("manual"))
-    return {
-        "message": "Analysis update triggered",
-        "timestamp": datetime.now().isoformat(),
-        "force_refresh": request.force
-    }
-@app.get("/sources", response_model=List[NewsSource])
-async def get_news_sources(
-    twitter: RssTwitterService = Depends(get_twitter_service)
-):
-    """Get the current list of news sources."""
-    return twitter.get_sources()
-@app.post("/sources", response_model=Dict)
-async def update_news_sources(
-    sources: List[NewsSource],
-    twitter: RssTwitterService = Depends(get_twitter_service)
-):
-    """Update the list of news sources."""
-    twitter.update_sources(sources)
-    return {
-        "message": "News sources updated",
-        "count": len(sources)
-    }
-@app.get("/keywords", response_model=List[str])
-async def get_search_keywords(
-    analysis: AnalysisService = Depends(get_analysis_service)
-):
-    """Get the current search keywords."""
-    return analysis.get_search_keywords()
-@app.post("/keywords", response_model=Dict)
-async def update_search_keywords(
-    keywords: List[str],
-    analysis: AnalysisService = Depends(get_analysis_service)
-):
-    """Update the search keywords."""
-    analysis.update_search_keywords(keywords)
-    return {
-        "message": "Search keywords updated",
-        "count": len(keywords)
-    }
-@app.get("/tension-levels", response_model=List[str])
-async def get_tension_levels():
-    """Get the available tension levels."""
-    return [level.value for level in TensionLevel]
-@app.get("/rss-feeds", response_model=Dict[str, str])
-async def get_registered_rss_feeds(
-    twitter: RssTwitterService = Depends(get_twitter_service)
-):
-    """Get all registered RSS feeds."""
-    return twitter.rss_feed_urls
-if __name__ == "__main__":
-    import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

+import asyncio
+import os
+from datetime import datetime
+from typing import Dict, List, Optional
+from dotenv import load_dotenv
+from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, status
+from fastapi.middleware.cors import CORSMiddleware
+from loguru import logger
+# --- UPDATED IMPORTS ---
+from analysis_service import AnalysisService
+# No longer need RssTwitterService
+from models import (ConflictAnalysis, HealthCheck, SubredditSource, TensionLevel,
+                    UpdateRequest)
+# Load environment variables from .env file
+load_dotenv()
+# Configure logging
+os.makedirs("logs", exist_ok=True)
+logger.add("logs/app.log", rotation="500 MB", level=os.getenv("LOG_LEVEL", "INFO"))
+# Global readiness flag
+app_ready = False
+# Create FastAPI application
+app = FastAPI(
+    title="WesternFront API",
+    description="AI-powered conflict tracker for monitoring India-Pakistan tensions using Reddit data",
+    version="2.0.0" # Version bump for new data source
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Adjust this for production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- UPDATED: Services ---
+# The AnalysisService now manages the RedditService internally
+analysis_service = AnalysisService()
+# In-memory store for latest analysis
+latest_analysis: Optional[ConflictAnalysis] = None
+last_update_time: Optional[datetime] = None
+async def get_analysis_service() -> AnalysisService:
+    """Dependency to get the Analysis service."""
+    return analysis_service
+@app.on_event("startup")
+async def startup_event():
+    """Initialize services on startup."""
+    global app_ready
+    logger.info("Starting up WesternFront API v2.0")
+    try:
+        # Initialize Gemini AI and the internal Reddit service
+        analysis_service.initialize_gemini()
+        analysis_service.reddit_service.initialize()
+        # Schedule first update in background
+        asyncio.create_task(update_analysis_task("startup"))
+        # Set up periodic update task
+        asyncio.create_task(periodic_update())
+        # Mark application as ready to accept requests
+        app_ready = True
+        logger.info("Application ready to accept requests")
+    except Exception as e:
+        logger.error(f"Error during startup: {e}")
+        app_ready = False
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Clean up resources on shutdown."""
+    logger.info("Shutting down WesternFront API")
+    if analysis_service and hasattr(analysis_service, 'close'):
+        await analysis_service.close()
+async def update_analysis_task(trigger: str = "scheduled") -> None:
+    """Task to update the conflict analysis using the AnalysisService."""
+    global latest_analysis, last_update_time
+    try:
+        logger.info(f"Starting analysis update (trigger: {trigger})")
+        # --- REFACTORED: The analysis_service now handles everything ---
+        analysis = await analysis_service.generate_conflict_analysis(trigger=trigger)
+        if analysis:
+            latest_analysis = analysis
+            last_update_time = datetime.now()
+            logger.info(f"Analysis updated successfully. Tension level: {analysis.tension_level}")
+        else:
+            logger.warning("Failed to generate new analysis. No relevant data might be available.")
+    except Exception as e:
+        logger.error(f"Error in update_analysis_task: {str(e)}")
+async def periodic_update() -> None:
+    """Periodically update the analysis."""
+    update_interval = int(os.getenv("UPDATE_INTERVAL_MINUTES", 60))
+    while True:
+        try:
+            await asyncio.sleep(update_interval * 60)
+            await update_analysis_task("scheduled")
+        except Exception as e:
+            logger.error(f"Error in periodic_update: {str(e)}")
+            await asyncio.sleep(300) # Wait 5 minutes if there was an error
+@app.get("/", response_model=Dict)
+async def root():
+    """Root endpoint with basic information about the API."""
+    return {
+        "name": "WesternFront API",
+        "description": "AI-powered conflict tracker for India-Pakistan tensions using Reddit data",
+        "version": "2.0.0",
+        "status": "ready" if app_ready else "initializing"
+    }
+@app.get("/ready")
+async def readiness_check():
+    """Readiness check endpoint."""
+    if not app_ready:
+        raise HTTPException(status_code=503, detail="Application is starting up")
+    return {"status": "ready", "timestamp": datetime.now().isoformat()}
+@app.get("/health", response_model=HealthCheck)
+async def health_check():
+    """Health check endpoint."""
+    # --- UPDATED: Check Reddit service instead of Twitter ---
+    reddit_initialized = analysis_service.reddit_service.reddit is not None
+    gemini_initialized = analysis_service.model is not None
+    return HealthCheck(
+        status="healthy" if app_ready else "initializing",
+        version="2.0.0",
+        timestamp=datetime.now(),
+        last_update=last_update_time,
+        components_status={
+            "reddit_service": reddit_initialized,
+            "analysis_service": gemini_initialized
+        }
+    )
+# The HEAD /health endpoint is a bit redundant with FastAPI, so it can be removed for simplicity
+# unless you have a specific use case for it.
+@app.get("/analysis", response_model=Optional[ConflictAnalysis])
+async def get_latest_analysis():
+    """Get the latest conflict analysis."""
+    if not latest_analysis:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="No analysis available yet. Try triggering an update."
+        )
+    return latest_analysis
+@app.post("/analysis/update", response_model=Dict)
+async def trigger_update(request: UpdateRequest):
+    """Trigger an analysis update."""
+    if request.force:
+        # --- UPDATED: Clear Reddit service cache ---
+        analysis_service.reddit_service.in_memory_cache.clear()
+        logger.info("Cache cleared for forced refresh.")
+    # Start update in background
+    asyncio.create_task(update_analysis_task("manual"))
+    return {
+        "message": "Analysis update triggered",
+        "timestamp": datetime.now().isoformat(),
+        "force_refresh": request.force
+    }
+# --- UPDATED: Now manages subreddit sources ---
+@app.get("/sources", response_model=List[SubredditSource])
+async def get_subreddit_sources(
+    analysis: AnalysisService = Depends(get_analysis_service)
+):
+    """Get the current list of subreddit sources."""
+    return analysis.get_sources()
+# --- UPDATED: Now manages subreddit sources ---
+@app.post("/sources", response_model=Dict)
+async def update_subreddit_sources(
+    sources: List[SubredditSource],
+    analysis: AnalysisService = Depends(get_analysis_service)
+):
+    """Update the list of subreddit sources."""
+    analysis.update_sources(sources)
+    return {
+        "message": "Subreddit sources updated",
+        "count": len(sources)
+    }
+@app.get("/keywords", response_model=List[str])
+async def get_search_keywords(
+    analysis: AnalysisService = Depends(get_analysis_service)
+):
+    """Get the current search keywords."""
+    return analysis.get_search_keywords()
+@app.post("/keywords", response_model=Dict)
+async def update_search_keywords(
+    keywords: List[str],
+    analysis: AnalysisService = Depends(get_analysis_service)
+):
+    """Update the search keywords."""
+    analysis.update_search_keywords(keywords)
+    return {
+        "message": "Search keywords updated",
+        "count": len(keywords)
+    }
+@app.get("/tension-levels", response_model=List[str])
+async def get_tension_levels():
+    """Get the available tension levels."""
+    return [level.value for level in TensionLevel]
+# --- REMOVED: /rss-feeds endpoint is no longer applicable ---
+if __name__ == "__main__":
+    import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

models.py CHANGED Viewed

@@ -13,23 +13,26 @@ class TensionLevel(str, Enum):
     CRITICAL = "Critical"
-class NewsSource(BaseModel):
-    """Model for a news source."""
     name: str
-    twitter_handle: str
-    country: str
-    reliability_score: float = Field(ge=0.0, le=1.0)
     is_active: bool = True
-class Tweet(BaseModel):
-    """Model for a tweet."""
     id: str
-    text: str
     author: str
     created_at: datetime
-    engagement: Dict[str, int] = {"likes": 0, "retweets": 0, "replies": 0, "views": 0}
     url: str
 class KeyDevelopment(BaseModel):
@@ -40,17 +43,25 @@ class KeyDevelopment(BaseModel):
     timestamp: Optional[datetime] = None
 class ConflictAnalysis(BaseModel):
-    """Model for a conflict analysis."""
     analysis_id: str
     generated_at: datetime
-    latest_status: str  # Added this field
     situation_summary: str
     key_developments: List[KeyDevelopment]
-    reliability_assessment: str
-    regional_implications: str
     tension_level: TensionLevel
-    source_tweets: List[Tweet]
     update_triggered_by: str

     CRITICAL = "Critical"
+# --- NEW: Replaces NewsSource ---
+class SubredditSource(BaseModel):
+    """Model for a subreddit as a news source."""
     name: str
+    reliability_score: float = Field(default=0.7, ge=0.0, le=1.0)
     is_active: bool = True
+# --- NEW: Replaces Tweet ---
+class RedditPost(BaseModel):
+    """Model for a Reddit post."""
     id: str
+    title: str
+    text: str  # Combination of title and selftext
+    selftext: str
     author: str
     created_at: datetime
+    score: int
     url: str
+    subreddit: str
 class KeyDevelopment(BaseModel):
     timestamp: Optional[datetime] = None
+# --- UPDATED: ConflictAnalysis now uses Reddit posts ---
 class ConflictAnalysis(BaseModel):
+    """Model for a conflict analysis based on Reddit data."""
     analysis_id: str
     generated_at: datetime
+    latest_status: str
     situation_summary: str
     key_developments: List[KeyDevelopment]
+    # Updated to Dict for a more structured assessment from the AI
+    reliability_assessment: Dict[str, str]
+    regional_implications: Dict[str, str]
     tension_level: TensionLevel
+    tension_rationale: str # Added field for justification
+    # Changed from source_tweets to source_posts
+    source_posts: List[RedditPost]
     update_triggered_by: str

reddit_service.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import asyncio
+import os
+import re
+from datetime import datetime, timedelta, timezone
+from typing import Dict, List, Optional
+import asyncpraw # Import asyncpraw instead of praw
+from cachetools import TTLCache
+from loguru import logger
+from models import RedditPost, SubredditSource
+class RedditService:
+    """Service for collecting posts via the Reddit API using Async PRAW."""
+    def __init__(self):
+        self.reddit = None
+        self.cache_expiry = int(os.getenv("CACHE_EXPIRY_MINUTES", 60))
+        self.in_memory_cache = TTLCache(maxsize=100, ttl=self.cache_expiry * 60)
+        self.sources = [
+            # Primary Subreddits
+            SubredditSource(name="geopolitics", reliability_score=0.85),         # High-quality analysis, neutral discussion
+            SubredditSource(name="anime_titties", reliability_score=0.8),        # International news, quality control
+            SubredditSource(name="CredibleDefense", reliability_score=0.9),      # Military/security analysis
+            SubredditSource(name="worldnews", reliability_score=0.8),            # Broad international coverage
+            # Regional Focus
+            SubredditSource(name="GeopoliticsIndia", reliability_score=0.75),    # India-focused geopolitical discussions
+            SubredditSource(name="SouthAsia", reliability_score=0.7),            # Regional coverage
+            SubredditSource(name="neutralnews", reliability_score=0.8),          # Fact-based reporting standards
+            # Existing sources
+            SubredditSource(name="india", reliability_score=0.7),
+            SubredditSource(name="pakistan", reliability_score=0.7),
+        ]
+        self.stats = {"requests": 0, "cache_hits": 0, "errors": 0}
+    def initialize(self) -> bool:
+        """Initialize the Reddit API client."""
+        try:
+            logger.info("Initializing Async Reddit service")
+            client_id = os.getenv("REDDIT_CLIENT_ID")
+            client_secret = os.getenv("REDDIT_CLIENT_SECRET")
+            user_agent = os.getenv("REDDIT_USER_AGENT")
+            if not all([client_id, client_secret, user_agent]):
+                logger.error("Reddit API credentials not found.")
+                return False
+            # --- Use asyncpraw.Reddit ---
+            self.reddit = asyncpraw.Reddit(
+                client_id=client_id,
+                client_secret=client_secret,
+                user_agent=user_agent,
+            )
+            logger.info("Async Reddit service initialized successfully.")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to initialize Async Reddit service: {e}")
+            return False
+    async def get_posts_from_subreddit(self, source: SubredditSource, limit: int = 100) -> List[RedditPost]:
+        """Get recent posts from a specific subreddit using async calls."""
+        cache_key = f"reddit_{source.name}_{limit}"
+        if cache_key in self.in_memory_cache:
+            self.stats["cache_hits"] += 1
+            logger.debug(f"Returning cached posts for r/{source.name}")
+            return self.in_memory_cache[cache_key]
+        self.stats["requests"] += 1
+        logger.info(f"Fetching posts from r/{source.name}")
+        try:
+            # --- Use async methods directly ---
+            subreddit = await self.reddit.subreddit(source.name)
+            posts = []
+            # Use `async for` to iterate through the async generator
+            async for sub in subreddit.new(limit=limit):
+                post = RedditPost(
+                    id=sub.id,
+                    title=sub.title,
+                    text=f"{sub.title}\n{sub.selftext}",
+                    selftext=sub.selftext,
+                    author=str(sub.author),
+                    created_at=datetime.fromtimestamp(sub.created_utc, tz=timezone.utc),
+                    score=sub.score,
+                    url=sub.url,
+                    subreddit=source.name,
+                )
+                posts.append(post)
+            self.in_memory_cache[cache_key] = posts
+            logger.info(f"Fetched and cached {len(posts)} posts from r/{source.name}")
+            return posts
+        except Exception as e:
+            self.stats["errors"] += 1
+            logger.error(f"Could not fetch from r/{source.name}. Error: {e}")
+            return []
+    async def get_related_posts(self, keywords: List[str], days_back: int = 2) -> List[RedditPost]:
+        # This method's logic doesn't change, as it was already async.
+        all_posts = []
+        cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
+        active_sources = [s for s in self.sources if s.is_active]
+        tasks = [self.get_posts_from_subreddit(source) for source in active_sources]
+        source_posts_list = await asyncio.gather(*tasks)
+        for source_posts in source_posts_list:
+            for post in source_posts:
+                if post.created_at >= cutoff_date:
+                    if any(keyword.lower() in post.text.lower() for keyword in keywords):
+                        all_posts.append(post)
+        unique_posts = {post.id: post for post in all_posts}
+        sorted_posts = sorted(list(unique_posts.values()), key=lambda p: p.created_at, reverse=True)
+        logger.info(f"Found {len(sorted_posts)} relevant posts from Reddit.")
+        return sorted_posts
+    def update_sources(self, sources: List[SubredditSource]):
+        self.sources = sources
+        self.in_memory_cache.clear()
+        logger.info(f"Updated subreddit sources. New count: {len(sources)}")
+    def get_sources(self) -> List[SubredditSource]:
+        return self.sources
+    async def close(self):
+        """Close the Async PRAW client session."""
+        if self.reddit:
+            await self.reddit.close()
+            logger.info("Async Reddit service session closed.")

requirements.txt CHANGED Viewed

@@ -1,11 +1,9 @@
-fastapi==0.103.1
-uvicorn[standard]==0.23.2
-python-dotenv==1.0.0
-loguru==0.7.0
-google-generativeai==0.3.0
-tenacity==8.2.2
-cachetools==5.3.0
-pydantic==2.3.0
-httpx==0.24.1
-beautifulsoup4==4.12.2
-httpx[http2]>=0.24.0

+fastapi==0.103.1
+uvicorn[standard]==0.23.2
+python-dotenv==1.0.0
+loguru==0.7.0
+google-generativeai==0.3.0
+tenacity==8.2.2
+cachetools==5.3.0
+pydantic==2.3.0
+asyncpraw==7.7.2