abhisheksan commited on
Commit
1b855b8
·
verified ·
1 Parent(s): 4dfce31

Upload 5 files

Browse files
Files changed (5) hide show
  1. analysis_service.py +100 -297
  2. app.py +247 -291
  3. models.py +25 -14
  4. reddit_service.py +135 -0
  5. requirements.txt +9 -11
analysis_service.py CHANGED
@@ -9,12 +9,12 @@ import google.generativeai as genai
9
  from loguru import logger
10
  from tenacity import RetryError, retry, stop_after_attempt, wait_exponential
11
 
12
- from models import ConflictAnalysis, KeyDevelopment, TensionLevel, Tweet
13
- from twitter_service import RssTwitterService
14
 
15
 
16
  class AnalysisService:
17
- """Service for analyzing tweets using Google's Gemini AI."""
18
 
19
  def __init__(self):
20
  self.api_key = os.getenv("GEMINI_API_KEY")
@@ -25,36 +25,19 @@ class AnalysisService:
25
  "India-Pakistan", "cross-border", "terrorism", "bilateral relations"
26
  ]
27
 
28
- # Initialize RSS Twitter Service
29
- self.twitter_service = RssTwitterService()
30
-
31
- # Initialize Gemini AI
32
- self.initialize()
33
 
34
- async def initialize_services(self) -> bool:
35
- """Initialize all required services."""
36
- gemini_success = self.initialize()
37
- twitter_success = await self.twitter_service.initialize()
38
-
39
- if gemini_success and twitter_success:
40
- logger.info("All services initialized successfully")
41
- return True
42
- else:
43
- logger.error("Failed to initialize one or more services")
44
- return False
45
-
46
- def initialize(self) -> bool:
47
  """Initialize the Gemini AI client."""
48
  if not self.api_key:
49
  logger.error("GEMINI_API_KEY not provided")
50
  return False
51
-
52
  try:
53
  logger.info("Initializing Gemini AI")
54
  genai.configure(api_key=self.api_key)
55
- # Configure model with lower temperature for more factual responses
56
  generation_config = {
57
- "temperature": 0.1,
58
  "top_p": 0.95,
59
  "top_k": 40
60
  }
@@ -65,359 +48,179 @@ class AnalysisService:
65
  logger.error(f"Failed to initialize Gemini AI: {str(e)}")
66
  return False
67
 
68
- def _prepare_prompt(self, tweets: List[Tweet]) -> str:
69
- """Prepare the prompt for analysis with intelligence sources data."""
70
- # Sort tweets by recency to help with latest status identification
71
- sorted_tweets = sorted(tweets, key=lambda x: x.created_at if hasattr(x, 'created_at') else datetime.now(), reverse=True)
72
 
73
- # Format source data without numbering that could leak into responses
74
  source_entries = []
75
- for i, tweet in enumerate(sorted_tweets):
76
- timestamp = tweet.created_at.strftime("%Y-%m-%d %H:%M UTC") if hasattr(tweet, 'created_at') else "unknown time"
77
- source_entries.append(f"SOURCE: @{tweet.author} | TIME: {timestamp}\n{tweet.text}")
78
 
79
  intelligence_data = "\n\n---\n\n".join(source_entries)
80
 
81
  prompt = f"""
82
- INTELLIGENCE BRIEF: INDIA-PAKISTAN SITUATION ANALYSIS
83
- DATE: {datetime.now().strftime("%Y-%m-%d")}
84
- CLASSIFICATION: STRATEGIC ASSESSMENT
85
-
86
- INTELLIGENCE SOURCES:
 
 
 
 
 
 
 
87
  {intelligence_data}
88
-
89
- ANALYSIS REQUIREMENTS:
90
- - Produce a professional intelligence assessment on India-Pakistan relations
91
- - Write in formal, analytical language suitable for diplomatic/security briefings
92
- - Maintain strict neutrality and objectivity throughout the analysis
93
- - Base all assessments ONLY on information from the provided sources
94
- - Do NOT refer to sources as "Data Points" or use numbered references
95
- - When citing sources, use only author handles (e.g., "according to @BBCWorld")
96
- - Focus on substantiated facts rather than speculation
97
- - Organize information by significance rather than chronology
98
- - Assess tension levels based on concrete actions and statements
99
- - Structure all outputs in clear, professional format
100
-
101
- REQUIRED OUTPUT FORMAT:
102
  {{
103
- "latest_status": "Most recent significant development with source attribution",
104
- "situation_summary": "Concise, factual summary of current situation without reference numbering",
105
  "key_developments": [
106
  {{
107
- "title": "Specific event title written in formal intelligence style",
108
- "description": "Professional analysis with proper source attribution",
109
- "sources": ["@source1", "@source2"]
110
  }}
111
  ],
112
  "reliability_assessment": {{
113
- "source_credibility": "Assessment of source reliability",
114
- "information_gaps": "Critical gaps in intelligence coverage",
115
- "confidence_rating": "HIGH|MEDIUM|LOW with justification"
116
  }},
117
  "regional_implications": {{
118
- "security": "Security implications written in formal analytical style",
119
- "diplomatic": "Diplomatic consequences with proper source attribution",
120
- "economic": "Economic impacts expressed objectively"
121
  }},
122
  "tension_level": "LOW|MEDIUM|HIGH|CRITICAL",
123
- "tension_rationale": "Professional justification for tension assessment"
124
  }}
125
-
126
- CRITICAL DIRECTIVES:
127
- - Generate ONLY valid JSON without additional text or markdown formatting
128
- - DO NOT use phrase "Data Point" or numbered references in ANY output field
129
- - Use professional intelligence terminology throughout
130
- - Attribute information to sources by handle (e.g., "@BBCWorld reports") rather than numbers
131
- - Write in concise, authoritative style appropriate for intelligence briefings
132
- - Avoid personal opinions, narrative storytelling, or journalistic commentary
133
- - Maintain consistent formal tone throughout all sections
134
  """
135
  return prompt
136
 
137
- @retry(wait=wait_exponential(min=1, max=10), stop=stop_after_attempt(3))
138
- async def _call_gemini(self, prompt: str) -> Dict:
139
  """Call the Gemini API with retry logic and improved parsing."""
140
  if not self.model:
141
- if not self.initialize():
142
- logger.error("Could not analyze tweets, Gemini AI not initialized")
143
  raise Exception("Gemini AI initialization failed")
144
-
145
  try:
146
  logger.info("Calling Gemini API for conflict analysis")
147
  response = await self.model.generate_content_async(prompt)
148
  result = response.text
149
-
150
- # Better JSON extraction with multiple patterns
151
  json_match = re.search(r'```(?:json)?\n(.*?)\n```', result, re.DOTALL)
152
  if json_match:
153
- result = json_match.group(1)
154
  else:
155
- # Try to find JSON objects with or without formatting
156
- json_pattern = r'({[\s\S]*})'
157
- json_match = re.search(json_pattern, result)
158
- if json_match:
159
- result = json_match.group(1)
160
-
161
- # Clean the result of any non-JSON content
162
- result = re.sub(r'```', '', result).strip()
163
-
164
- # Parse JSON with error handling
165
- try:
166
- analysis_data = json.loads(result)
167
-
168
- # Additional cleaning to remove "Data Point" references from all string fields
169
- for key, value in analysis_data.items():
170
- if isinstance(value, str):
171
- analysis_data[key] = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
172
- analysis_data[key] = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
173
- analysis_data[key] = analysis_data[key].strip()
174
-
175
- # Clean nested dictionaries
176
- for key, value in analysis_data.items():
177
- if isinstance(value, dict):
178
- for subkey, subvalue in value.items():
179
- if isinstance(subvalue, str):
180
- value[subkey] = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', subvalue)
181
- value[subkey] = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', subvalue)
182
- value[subkey] = value[subkey].strip()
183
-
184
- logger.info("Successfully received and parsed Gemini response")
185
- return analysis_data
186
- except json.JSONDecodeError as e:
187
- logger.error(f"JSON parsing error: {str(e)}")
188
- # Attempt cleanup and retry parsing
189
- result = re.sub(r'[\n\r\t]', ' ', result)
190
- result = re.search(r'({.*})', result).group(1) if re.search(r'({.*})', result) else result
191
- analysis_data = json.loads(result)
192
- logger.info("Successfully parsed Gemini response after cleanup")
193
- return analysis_data
194
 
 
 
 
 
195
  except Exception as e:
196
  logger.error(f"Error calling Gemini API: {str(e)}")
197
- logger.debug(f"Raw response content: {result if 'result' in locals() else 'No response'}")
198
  raise
199
 
200
  def _extract_tension_level(self, level_text: str) -> TensionLevel:
201
- """Extract tension level enum from text."""
202
  level_text = level_text.lower()
203
- if "critical" in level_text:
204
- return TensionLevel.CRITICAL
205
- elif "high" in level_text:
206
- return TensionLevel.HIGH
207
- elif "medium" in level_text:
208
- return TensionLevel.MEDIUM
209
- else:
210
- return TensionLevel.LOW
211
 
212
  def _process_key_developments(self, developments_data: List[Dict]) -> List[KeyDevelopment]:
213
- """Process key developments from API response."""
214
  key_developments = []
215
- if not developments_data:
216
- return key_developments
217
-
218
  for dev in developments_data:
219
- # Clean any Data Point references if they slipped through
220
- title = dev.get("title", "Unnamed Development")
221
- description = dev.get("description", "No description provided")
222
-
223
- # Remove any Data Point references
224
- title = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', title)
225
- description = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', description)
226
- title = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', title)
227
- description = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', description)
228
-
229
  key_developments.append(
230
  KeyDevelopment(
231
- title=title.strip(),
232
- description=description.strip(),
233
  sources=dev.get("sources", []),
234
  timestamp=datetime.now()
235
  )
236
  )
237
  return key_developments
238
 
239
- def _format_reliability_assessment(self, reliability_data: Dict) -> str:
240
- """Format reliability assessment data into a structured string."""
241
- if isinstance(reliability_data, str):
242
- # Clean any Data Point references
243
- reliability_data = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', reliability_data)
244
- reliability_data = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', reliability_data)
245
- return reliability_data.strip()
246
-
247
- if isinstance(reliability_data, dict):
248
- sections = []
249
-
250
- if "source_credibility" in reliability_data:
251
- value = reliability_data["source_credibility"]
252
- value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
253
- value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
254
- sections.append(f"SOURCE CREDIBILITY: {value.strip()}")
255
-
256
- if "information_gaps" in reliability_data:
257
- value = reliability_data["information_gaps"]
258
- value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
259
- value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
260
- sections.append(f"INFORMATION GAPS: {value.strip()}")
261
-
262
- if "confidence_rating" in reliability_data:
263
- value = reliability_data["confidence_rating"]
264
- value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
265
- value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
266
- sections.append(f"CONFIDENCE: {value.strip()}")
267
-
268
- if sections:
269
- return "\n\n".join(sections)
270
-
271
- return "Assessment unavailable"
272
-
273
- def _format_regional_implications(self, implications_data: Dict) -> str:
274
- """Format regional implications data into a structured string."""
275
- if isinstance(implications_data, str):
276
- # Clean any Data Point references
277
- implications_data = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', implications_data)
278
- implications_data = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', implications_data)
279
- return implications_data.strip()
280
-
281
- if isinstance(implications_data, dict):
282
- sections = []
283
-
284
- if "security" in implications_data:
285
- value = implications_data["security"]
286
- value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
287
- value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
288
- sections.append(f"SECURITY: {value.strip()}")
289
-
290
- if "diplomatic" in implications_data:
291
- value = implications_data["diplomatic"]
292
- value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
293
- value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
294
- sections.append(f"DIPLOMATIC: {value.strip()}")
295
-
296
- if "economic" in implications_data:
297
- value = implications_data["economic"]
298
- value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
299
- value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
300
- sections.append(f"ECONOMIC: {value.strip()}")
301
-
302
- if sections:
303
- return "\n\n".join(sections)
304
-
305
- return "Implications unavailable"
306
-
307
- async def analyze_tweets(self, tweets: List[Tweet], trigger: str = "scheduled") -> ConflictAnalysis:
308
- """Analyze tweets using Gemini AI and generate a conflict analysis."""
309
- if not tweets:
310
- logger.warning("No tweets provided for analysis")
311
  return None
312
-
313
  try:
314
- prompt = self._prepare_prompt(tweets)
315
  analysis_data = await self._call_gemini(prompt)
316
 
317
- # Process and extract data with proper error handling
 
 
 
318
  key_developments = self._process_key_developments(analysis_data.get("key_developments", []))
319
-
320
- # Format complex nested structures if present
321
- reliability_assessment = self._format_reliability_assessment(
322
- analysis_data.get("reliability_assessment", "No reliability assessment provided")
323
- )
324
-
325
- regional_implications = self._format_regional_implications(
326
- analysis_data.get("regional_implications", "No regional implications provided")
327
- )
328
-
329
- # Extract tension rationale if available and clean it
330
- tension_info = analysis_data.get("tension_level", "Low")
331
- tension_info = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', tension_info)
332
- tension_info = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', tension_info)
333
-
334
- tension_rationale = analysis_data.get("tension_rationale", "")
335
- tension_rationale = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', tension_rationale)
336
- tension_rationale = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', tension_rationale)
337
-
338
- # Combine tension level and rationale if both exist
339
- if tension_rationale:
340
- tension_display = f"{tension_info.strip()} - {tension_rationale.strip()}"
341
- else:
342
- tension_display = tension_info.strip()
343
-
344
- # Get and clean the latest status
345
- latest_status = analysis_data.get("latest_status", "No recent status update available")
346
- latest_status = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', latest_status)
347
- latest_status = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', latest_status)
348
-
349
- # Clean situation summary
350
- situation_summary = analysis_data.get("situation_summary", "No summary provided")
351
- situation_summary = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', situation_summary)
352
- situation_summary = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', situation_summary)
353
 
354
  analysis = ConflictAnalysis(
355
  analysis_id=str(uuid.uuid4()),
356
  generated_at=datetime.now(),
357
- situation_summary=situation_summary.strip(),
358
  key_developments=key_developments,
359
- reliability_assessment=reliability_assessment,
360
- regional_implications=regional_implications,
361
- tension_level=self._extract_tension_level(tension_display),
362
- source_tweets=tweets,
 
363
  update_triggered_by=trigger,
364
- latest_status=latest_status.strip()
365
  )
366
 
367
  logger.info(f"Generated conflict analysis with ID: {analysis.analysis_id}")
368
  return analysis
369
-
370
  except RetryError as e:
371
  logger.error(f"Failed to generate analysis after multiple retries: {str(e)}")
372
  return None
373
  except Exception as e:
374
- logger.error(f"Unexpected error in tweet analysis: {str(e)}")
375
  return None
376
 
377
- async def get_latest_tweets(self, days_back: int = 2) -> List[Tweet]:
378
- """Get latest tweets related to the conflict using RSS feeds."""
379
- try:
380
- tweets = await self.twitter_service.get_related_tweets(self.search_keywords, days_back)
381
- logger.info(f"Retrieved {len(tweets)} tweets related to the conflict")
382
- return tweets
383
- except Exception as e:
384
- logger.error(f"Error retrieving tweets: {str(e)}")
385
- return []
386
-
387
  async def generate_conflict_analysis(self, days_back: int = 2, trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
388
- """Generate a complete conflict analysis using tweets from RSS feeds."""
389
- tweets = await self.get_latest_tweets(days_back)
390
- if not tweets:
391
- logger.warning("No tweets found for conflict analysis")
 
 
 
 
 
392
  return None
393
 
394
- return await self.analyze_tweets(tweets, trigger)
395
 
396
- async def register_rss_feeds(self, feed_map: Dict[str, str]) -> None:
397
- """Register RSS feeds for Twitter handles."""
398
- self.twitter_service.register_rss_feed_batch(feed_map)
399
-
400
- def register_rss_feed(self, twitter_handle: str, rss_url: str) -> None:
401
- """Register an RSS feed for a Twitter handle."""
402
- self.twitter_service.register_rss_feed(twitter_handle, rss_url)
403
-
404
  def get_search_keywords(self) -> List[str]:
405
- """Get the current search keywords."""
406
  return self.search_keywords
407
 
408
- def update_search_keywords(self, keywords: List[str]) -> None:
409
- """Update the search keywords."""
410
  self.search_keywords = keywords
411
  logger.info(f"Updated search keywords. New count: {len(keywords)}")
412
-
413
- def update_sources(self, sources) -> None:
414
- """Update the news sources in the Twitter service."""
415
- self.twitter_service.update_sources(sources)
416
-
417
  def get_sources(self):
418
- """Get the current news sources from the Twitter service."""
419
- return self.twitter_service.get_sources()
420
 
421
  async def close(self) -> None:
422
- """Clean up resources."""
423
- await self.twitter_service.close()
 
9
  from loguru import logger
10
  from tenacity import RetryError, retry, stop_after_attempt, wait_exponential
11
 
12
+ from models import ConflictAnalysis, KeyDevelopment, TensionLevel, RedditPost
13
+ from reddit_service import RedditService
14
 
15
 
16
  class AnalysisService:
17
+ """Service for analyzing Reddit posts using Google's Gemini AI."""
18
 
19
  def __init__(self):
20
  self.api_key = os.getenv("GEMINI_API_KEY")
 
25
  "India-Pakistan", "cross-border", "terrorism", "bilateral relations"
26
  ]
27
 
28
+ self.reddit_service = RedditService()
29
+ self.initialize_gemini()
 
 
 
30
 
31
+ def initialize_gemini(self) -> bool:
 
 
 
 
 
 
 
 
 
 
 
 
32
  """Initialize the Gemini AI client."""
33
  if not self.api_key:
34
  logger.error("GEMINI_API_KEY not provided")
35
  return False
 
36
  try:
37
  logger.info("Initializing Gemini AI")
38
  genai.configure(api_key=self.api_key)
 
39
  generation_config = {
40
+ "temperature": 0.2, # Slightly increased for more nuanced language
41
  "top_p": 0.95,
42
  "top_k": 40
43
  }
 
48
  logger.error(f"Failed to initialize Gemini AI: {str(e)}")
49
  return False
50
 
51
+ # --- THIS IS THE UPDATED METHOD ---
52
+ def _prepare_prompt(self, posts: List[RedditPost]) -> str:
53
+ """Prepare the prompt for analysis with Reddit post data."""
54
+ sorted_posts = sorted(posts, key=lambda x: x.created_at, reverse=True)
55
 
 
56
  source_entries = []
57
+ for i, post in enumerate(sorted_posts):
58
+ # The model will use this for context, but won't cite it directly
59
+ source_entries.append(f"RAW INTEL #{i+1}:\n{post.text}")
60
 
61
  intelligence_data = "\n\n---\n\n".join(source_entries)
62
 
63
  prompt = f"""
64
+ **TOP SECRET // FOR OFFICIAL USE ONLY**
65
+
66
+ **TO:** Strategic Intelligence Command
67
+ **FROM:** Senior Geopolitical Analyst (South Asia Desk)
68
+ **SUBJECT:** INDIA-PAKISTAN CONFLICT SITUATION REPORT
69
+ **DATE:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} UTC
70
+
71
+ **MISSION OBJECTIVE:**
72
+ Assume the persona of a senior geopolitical analyst. Your task is to synthesize raw, open-source intelligence feeds into a clinical and dispassionate intelligence product for a national security council. Your analysis must be objective, concise, and devoid of sensationalism. Focus exclusively on the provided intelligence to assess stability, identify threats, and determine the near-term trajectory of the conflict.
73
+
74
+ **RAW INTELLIGENCE FEEDS (FOR CONTEXT ONLY):**
75
+ ---
76
  {intelligence_data}
77
+ ---
78
+
79
+ **ANALYTICAL DIRECTIVES:**
80
+ 1. **Synthesize, Do Not Summarize:** Do not merely list what each source says. Integrate all data points into a coherent, holistic assessment.
81
+ 2. **Impersonal and Objective Tone:** Use formal, analytical language. Avoid emotive words, speculation, or personal opinions. Employ the active voice.
82
+ 3. **No Direct Attribution:** Your final report is a standalone intelligence product. **CRITICAL: DO NOT attribute any information to specific Reddit usernames or subreddits (e.g., "u/user reports...").** The provided intelligence is your source material, not something to be directly quoted.
83
+ 4. **Fact-Based Assessment:** All conclusions must be logically derived from the provided intelligence feeds. Do not introduce outside information.
84
+
85
+ **REQUIRED OUTPUT FORMAT (Strict JSON):**
86
+ Produce a single, valid JSON object with no markdown formatting. The structure must be as follows:
 
 
 
 
87
  {{
88
+ "latest_status": "A single, concise sentence describing the most recent, significant event.",
89
+ "situation_summary": "A 2-3 sentence executive summary of the current geopolitical and military situation. This is the 'bottom line up front' (BLUF).",
90
  "key_developments": [
91
  {{
92
+ "title": "A formal title for a key event (e.g., 'Cross-Border Artillery Exchange Reported Near Uri Sector').",
93
+ "description": "A synthesized paragraph detailing the event, its context, and immediate impact. Integrate multiple sources implicitly.",
94
+ "sources": ["Military Activity", "Diplomatic Statement"]
95
  }}
96
  ],
97
  "reliability_assessment": {{
98
+ "source_credibility": "An assessment of the overall credibility of the provided intelligence, considering potential for bias or misinformation without naming sources.",
99
+ "information_gaps": "Identify what critical information is missing from the feeds (e.g., 'Official casualty figures unconfirmed.').",
100
+ "confidence_rating": "State your analytical confidence (HIGH, MEDIUM, or LOW) and provide a brief justification."
101
  }},
102
  "regional_implications": {{
103
+ "security": "Analysis of the immediate security and military implications.",
104
+ "diplomatic": "Analysis of the impact on diplomatic relations and international standing.",
105
+ "economic": "Analysis of potential economic consequences (e.g., market stability, trade disruptions)."
106
  }},
107
  "tension_level": "LOW|MEDIUM|HIGH|CRITICAL",
108
+ "tension_rationale": "A concise justification for the assessed tension level, referencing the key developments and their potential for escalation."
109
  }}
 
 
 
 
 
 
 
 
 
110
  """
111
  return prompt
112
 
113
+ @retry(wait=wait_exponential(min=2, max=60), stop=stop_after_attempt(3))
114
+ async def _call_gemini(self, prompt: str) -> Optional[Dict]:
115
  """Call the Gemini API with retry logic and improved parsing."""
116
  if not self.model:
117
+ if not self.initialize_gemini():
118
+ logger.error("Could not analyze posts, Gemini AI not initialized")
119
  raise Exception("Gemini AI initialization failed")
 
120
  try:
121
  logger.info("Calling Gemini API for conflict analysis")
122
  response = await self.model.generate_content_async(prompt)
123
  result = response.text
124
+
 
125
  json_match = re.search(r'```(?:json)?\n(.*?)\n```', result, re.DOTALL)
126
  if json_match:
127
+ json_str = json_match.group(1)
128
  else:
129
+ json_str = result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ return json.loads(json_str)
132
+ except json.JSONDecodeError as e:
133
+ logger.error(f"JSON parsing error: {e}. Raw response: {result}")
134
+ raise
135
  except Exception as e:
136
  logger.error(f"Error calling Gemini API: {str(e)}")
 
137
  raise
138
 
139
  def _extract_tension_level(self, level_text: str) -> TensionLevel:
 
140
  level_text = level_text.lower()
141
+ if "critical" in level_text: return TensionLevel.CRITICAL
142
+ if "high" in level_text: return TensionLevel.HIGH
143
+ if "medium" in level_text: return TensionLevel.MEDIUM
144
+ return TensionLevel.LOW
 
 
 
 
145
 
146
  def _process_key_developments(self, developments_data: List[Dict]) -> List[KeyDevelopment]:
 
147
  key_developments = []
148
+ if not developments_data: return key_developments
 
 
149
  for dev in developments_data:
 
 
 
 
 
 
 
 
 
 
150
  key_developments.append(
151
  KeyDevelopment(
152
+ title=dev.get("title", "Unnamed Development"),
153
+ description=dev.get("description", "No description provided"),
154
  sources=dev.get("sources", []),
155
  timestamp=datetime.now()
156
  )
157
  )
158
  return key_developments
159
 
160
+ async def analyze_posts(self, posts: List[RedditPost], trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
161
+ """Analyze Reddit posts using Gemini AI and generate a conflict analysis."""
162
+ if not posts:
163
+ logger.warning("No Reddit posts provided for analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  return None
 
165
  try:
166
+ prompt = self._prepare_prompt(posts)
167
  analysis_data = await self._call_gemini(prompt)
168
 
169
+ if not analysis_data:
170
+ logger.error("Received no data from Gemini call.")
171
+ return None
172
+
173
  key_developments = self._process_key_developments(analysis_data.get("key_developments", []))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  analysis = ConflictAnalysis(
176
  analysis_id=str(uuid.uuid4()),
177
  generated_at=datetime.now(),
178
+ situation_summary=analysis_data.get("situation_summary", "No summary provided."),
179
  key_developments=key_developments,
180
+ reliability_assessment=analysis_data.get("reliability_assessment", {}),
181
+ regional_implications=analysis_data.get("regional_implications", {}),
182
+ tension_level=self._extract_tension_level(analysis_data.get("tension_level", "LOW")),
183
+ tension_rationale=analysis_data.get("tension_rationale", "No rationale provided."),
184
+ source_posts=posts,
185
  update_triggered_by=trigger,
186
+ latest_status=analysis_data.get("latest_status", "No recent status update available.")
187
  )
188
 
189
  logger.info(f"Generated conflict analysis with ID: {analysis.analysis_id}")
190
  return analysis
 
191
  except RetryError as e:
192
  logger.error(f"Failed to generate analysis after multiple retries: {str(e)}")
193
  return None
194
  except Exception as e:
195
+ logger.error(f"Unexpected error in post analysis: {str(e)}")
196
  return None
197
 
 
 
 
 
 
 
 
 
 
 
198
  async def generate_conflict_analysis(self, days_back: int = 2, trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
199
+ """Generate a complete conflict analysis using posts from Reddit."""
200
+ if not self.reddit_service.reddit:
201
+ if not self.reddit_service.initialize():
202
+ logger.error("Cannot generate analysis, Reddit Service failed to initialize.")
203
+ return None
204
+
205
+ posts = await self.reddit_service.get_related_posts(self.search_keywords, days_back)
206
+ if not posts:
207
+ logger.warning("No relevant Reddit posts found for conflict analysis")
208
  return None
209
 
210
+ return await self.analyze_posts(posts, trigger)
211
 
 
 
 
 
 
 
 
 
212
  def get_search_keywords(self) -> List[str]:
 
213
  return self.search_keywords
214
 
215
+ def update_search_keywords(self, keywords: List[str]):
 
216
  self.search_keywords = keywords
217
  logger.info(f"Updated search keywords. New count: {len(keywords)}")
218
+
219
+ def update_sources(self, sources):
220
+ self.reddit_service.update_sources(sources)
221
+
 
222
  def get_sources(self):
223
+ return self.reddit_service.get_sources()
 
224
 
225
  async def close(self) -> None:
226
+ await self.reddit_service.close()
 
app.py CHANGED
@@ -1,292 +1,248 @@
1
- import asyncio
2
- import os
3
- from datetime import datetime
4
- from typing import Dict, List, Optional
5
-
6
- from dotenv import load_dotenv
7
- from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, status
8
- from fastapi.middleware.cors import CORSMiddleware
9
- from loguru import logger
10
-
11
- from analysis_service import AnalysisService
12
- from twitter_service import RssTwitterService
13
- from models import (ConflictAnalysis, HealthCheck, NewsSource, TensionLevel,
14
- Tweet, UpdateRequest)
15
-
16
- # Load environment variables from .env file
17
- load_dotenv()
18
-
19
- # Configure logging
20
- os.makedirs("logs", exist_ok=True)
21
- logger.add("logs/app.log", rotation="500 MB", level=os.getenv("LOG_LEVEL", "INFO"))
22
-
23
- # Global readiness flag
24
- app_ready = False
25
-
26
- # Create FastAPI application
27
- app = FastAPI(
28
- title="WesternFront API",
29
- description="AI-powered conflict tracker for monitoring India-Pakistan tensions",
30
- version="1.1.0"
31
- )
32
-
33
- # Add CORS middleware
34
- app.add_middleware(
35
- CORSMiddleware,
36
- allow_origins=["*"], # Adjust this for production
37
- allow_credentials=True,
38
- allow_methods=["*"],
39
- allow_headers=["*"],
40
- )
41
-
42
- # Services
43
- twitter_service = RssTwitterService()
44
- analysis_service = AnalysisService()
45
-
46
- # In-memory store for latest analysis
47
- latest_analysis: Optional[ConflictAnalysis] = None
48
- last_update_time: Optional[datetime] = None
49
-
50
-
51
- async def get_twitter_service() -> RssTwitterService:
52
- """Dependency to get the Twitter service."""
53
- return twitter_service
54
-
55
-
56
- async def get_analysis_service() -> AnalysisService:
57
- """Dependency to get the Analysis service."""
58
- return analysis_service
59
-
60
-
61
- @app.on_event("startup")
62
- async def startup_event():
63
- """Initialize services on startup."""
64
- global app_ready
65
-
66
- logger.info("Starting up WesternFront API")
67
-
68
- try:
69
- # Initialize Twitter service
70
- twitter_initialized = await twitter_service.initialize()
71
- if not twitter_initialized:
72
- logger.warning("Twitter service initialization failed. Some features may not work.")
73
-
74
- # Initialize Gemini AI service
75
- analysis_service.initialize()
76
-
77
- # Set analysis service's twitter service reference
78
- analysis_service.twitter_service = twitter_service
79
-
80
- # Schedule first update in background
81
- asyncio.create_task(update_analysis_task("startup"))
82
-
83
- # Set up periodic update task
84
- asyncio.create_task(periodic_update())
85
-
86
- # Mark application as ready to accept requests
87
- app_ready = True
88
- logger.info("Application ready to accept requests")
89
-
90
- except Exception as e:
91
- logger.error(f"Error during startup: {e}")
92
- app_ready = False # Keep app in not-ready state if startup fails
93
-
94
-
95
- @app.on_event("shutdown")
96
- async def shutdown_event():
97
- """Clean up resources on shutdown."""
98
- logger.info("Shutting down WesternFront API")
99
- if twitter_service and hasattr(twitter_service, 'close'):
100
- await twitter_service.close()
101
- if analysis_service and hasattr(analysis_service, 'close'):
102
- await analysis_service.close()
103
-
104
-
105
- async def update_analysis_task(trigger: str = "scheduled") -> None:
106
- """Task to update the conflict analysis."""
107
- global latest_analysis, last_update_time
108
-
109
- try:
110
- logger.info(f"Starting analysis update ({trigger})")
111
-
112
- # Get tweets related to India-Pakistan conflict
113
- keywords = analysis_service.get_search_keywords()
114
- tweets = await twitter_service.get_related_tweets(keywords, days_back=2)
115
-
116
- if not tweets:
117
- logger.warning("No relevant tweets found for analysis")
118
- return
119
-
120
- logger.info(f"Found {len(tweets)} relevant tweets for analysis")
121
-
122
- # Analyze tweets
123
- analysis = await analysis_service.analyze_tweets(tweets, trigger)
124
-
125
- if analysis:
126
- latest_analysis = analysis
127
- last_update_time = datetime.now()
128
- logger.info(f"Analysis updated successfully. Tension level: {analysis.tension_level}")
129
- else:
130
- logger.error("Failed to generate analysis")
131
-
132
- except Exception as e:
133
- logger.error(f"Error in update_analysis_task: {str(e)}")
134
-
135
-
136
- async def periodic_update() -> None:
137
- """Periodically update the analysis."""
138
- update_interval = int(os.getenv("UPDATE_INTERVAL_MINUTES", 60))
139
-
140
- while True:
141
- try:
142
- await asyncio.sleep(update_interval * 60) # Convert to seconds
143
- await update_analysis_task("scheduled")
144
- except Exception as e:
145
- logger.error(f"Error in periodic_update: {str(e)}")
146
- await asyncio.sleep(300) # Wait 5 minutes if there was an error
147
-
148
-
149
- @app.get("/", response_model=Dict)
150
- async def root():
151
- """Root endpoint with basic information about the API."""
152
- return {
153
- "name": "WesternFront API",
154
- "description": "AI-powered conflict tracker for India-Pakistan tensions",
155
- "version": "1.1.0",
156
- "status": "ready" if app_ready else "initializing"
157
- }
158
-
159
-
160
- @app.get("/ready")
161
- async def readiness_check():
162
- """Readiness check endpoint for probes and monitoring."""
163
- if not app_ready:
164
- raise HTTPException(status_code=503, detail="Application is starting up")
165
- return {"status": "ready", "timestamp": datetime.now().isoformat()}
166
-
167
-
168
- @app.get("/health", response_model=HealthCheck)
169
- async def health_check():
170
- """Health check endpoint."""
171
- twitter_initialized = hasattr(twitter_service, 'client') and twitter_service.client is not None
172
- gemini_initialized = analysis_service.model is not None
173
-
174
- return HealthCheck(
175
- status="healthy" if app_ready else "initializing",
176
- version="1.1.0",
177
- timestamp=datetime.now(),
178
- last_update=last_update_time,
179
- components_status={
180
- "twitter_service": twitter_initialized,
181
- "analysis_service": gemini_initialized
182
- }
183
- )
184
-
185
-
186
- @app.head("/health")
187
- async def health_check_head():
188
- """Health check endpoint (HEAD method)."""
189
- # This will use the same logic as the GET handler but won't return a response body
190
- twitter_initialized = hasattr(twitter_service, 'client') and twitter_service.client is not None
191
- gemini_initialized = analysis_service.model is not None
192
-
193
- # For HEAD requests, FastAPI will strip the response body but keep status codes and headers
194
- return HealthCheck(
195
- status="healthy" if app_ready else "initializing",
196
- version="1.1.0",
197
- timestamp=datetime.now(),
198
- last_update=last_update_time,
199
- components_status={
200
- "twitter_service": twitter_initialized,
201
- "analysis_service": gemini_initialized
202
- }
203
- )
204
-
205
-
206
- @app.get("/analysis", response_model=Optional[ConflictAnalysis])
207
- async def get_latest_analysis():
208
- """Get the latest conflict analysis."""
209
- if not latest_analysis:
210
- raise HTTPException(
211
- status_code=status.HTTP_404_NOT_FOUND,
212
- detail="No analysis available yet. Try triggering an update."
213
- )
214
- return latest_analysis
215
-
216
-
217
- @app.post("/analysis/update", response_model=Dict)
218
- async def trigger_update(request: UpdateRequest):
219
- """Trigger an analysis update."""
220
- if request.force:
221
- # Clear cache to force fresh tweets
222
- twitter_service.in_memory_cache.clear()
223
-
224
- # Start update in background
225
- asyncio.create_task(update_analysis_task("manual"))
226
-
227
- return {
228
- "message": "Analysis update triggered",
229
- "timestamp": datetime.now().isoformat(),
230
- "force_refresh": request.force
231
- }
232
-
233
-
234
- @app.get("/sources", response_model=List[NewsSource])
235
- async def get_news_sources(
236
- twitter: RssTwitterService = Depends(get_twitter_service)
237
- ):
238
- """Get the current list of news sources."""
239
- return twitter.get_sources()
240
-
241
-
242
- @app.post("/sources", response_model=Dict)
243
- async def update_news_sources(
244
- sources: List[NewsSource],
245
- twitter: RssTwitterService = Depends(get_twitter_service)
246
- ):
247
- """Update the list of news sources."""
248
- twitter.update_sources(sources)
249
- return {
250
- "message": "News sources updated",
251
- "count": len(sources)
252
- }
253
-
254
-
255
- @app.get("/keywords", response_model=List[str])
256
- async def get_search_keywords(
257
- analysis: AnalysisService = Depends(get_analysis_service)
258
- ):
259
- """Get the current search keywords."""
260
- return analysis.get_search_keywords()
261
-
262
-
263
- @app.post("/keywords", response_model=Dict)
264
- async def update_search_keywords(
265
- keywords: List[str],
266
- analysis: AnalysisService = Depends(get_analysis_service)
267
- ):
268
- """Update the search keywords."""
269
- analysis.update_search_keywords(keywords)
270
- return {
271
- "message": "Search keywords updated",
272
- "count": len(keywords)
273
- }
274
-
275
-
276
- @app.get("/tension-levels", response_model=List[str])
277
- async def get_tension_levels():
278
- """Get the available tension levels."""
279
- return [level.value for level in TensionLevel]
280
-
281
-
282
- @app.get("/rss-feeds", response_model=Dict[str, str])
283
- async def get_registered_rss_feeds(
284
- twitter: RssTwitterService = Depends(get_twitter_service)
285
- ):
286
- """Get all registered RSS feeds."""
287
- return twitter.rss_feed_urls
288
-
289
-
290
- if __name__ == "__main__":
291
- import uvicorn
292
  uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
 
1
+ import asyncio
2
+ import os
3
+ from datetime import datetime
4
+ from typing import Dict, List, Optional
5
+
6
+ from dotenv import load_dotenv
7
+ from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, status
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from loguru import logger
10
+
11
+ # --- UPDATED IMPORTS ---
12
+ from analysis_service import AnalysisService
13
+ # No longer need RssTwitterService
14
+ from models import (ConflictAnalysis, HealthCheck, SubredditSource, TensionLevel,
15
+ UpdateRequest)
16
+
17
+ # Load environment variables from .env file
18
+ load_dotenv()
19
+
20
+ # Configure logging
21
+ os.makedirs("logs", exist_ok=True)
22
+ logger.add("logs/app.log", rotation="500 MB", level=os.getenv("LOG_LEVEL", "INFO"))
23
+
24
+ # Global readiness flag
25
+ app_ready = False
26
+
27
+ # Create FastAPI application
28
+ app = FastAPI(
29
+ title="WesternFront API",
30
+ description="AI-powered conflict tracker for monitoring India-Pakistan tensions using Reddit data",
31
+ version="2.0.0" # Version bump for new data source
32
+ )
33
+
34
+ # Add CORS middleware
35
+ app.add_middleware(
36
+ CORSMiddleware,
37
+ allow_origins=["*"], # Adjust this for production
38
+ allow_credentials=True,
39
+ allow_methods=["*"],
40
+ allow_headers=["*"],
41
+ )
42
+
43
+ # --- UPDATED: Services ---
44
+ # The AnalysisService now manages the RedditService internally
45
+ analysis_service = AnalysisService()
46
+
47
+ # In-memory store for latest analysis
48
+ latest_analysis: Optional[ConflictAnalysis] = None
49
+ last_update_time: Optional[datetime] = None
50
+
51
+
52
+ async def get_analysis_service() -> AnalysisService:
53
+ """Dependency to get the Analysis service."""
54
+ return analysis_service
55
+
56
+
57
+ @app.on_event("startup")
58
+ async def startup_event():
59
+ """Initialize services on startup."""
60
+ global app_ready
61
+
62
+ logger.info("Starting up WesternFront API v2.0")
63
+
64
+ try:
65
+ # Initialize Gemini AI and the internal Reddit service
66
+ analysis_service.initialize_gemini()
67
+ analysis_service.reddit_service.initialize()
68
+
69
+ # Schedule first update in background
70
+ asyncio.create_task(update_analysis_task("startup"))
71
+
72
+ # Set up periodic update task
73
+ asyncio.create_task(periodic_update())
74
+
75
+ # Mark application as ready to accept requests
76
+ app_ready = True
77
+ logger.info("Application ready to accept requests")
78
+
79
+ except Exception as e:
80
+ logger.error(f"Error during startup: {e}")
81
+ app_ready = False
82
+
83
+
84
+ @app.on_event("shutdown")
85
+ async def shutdown_event():
86
+ """Clean up resources on shutdown."""
87
+ logger.info("Shutting down WesternFront API")
88
+ if analysis_service and hasattr(analysis_service, 'close'):
89
+ await analysis_service.close()
90
+
91
+
92
+ async def update_analysis_task(trigger: str = "scheduled") -> None:
93
+ """Task to update the conflict analysis using the AnalysisService."""
94
+ global latest_analysis, last_update_time
95
+
96
+ try:
97
+ logger.info(f"Starting analysis update (trigger: {trigger})")
98
+
99
+ # --- REFACTORED: The analysis_service now handles everything ---
100
+ analysis = await analysis_service.generate_conflict_analysis(trigger=trigger)
101
+
102
+ if analysis:
103
+ latest_analysis = analysis
104
+ last_update_time = datetime.now()
105
+ logger.info(f"Analysis updated successfully. Tension level: {analysis.tension_level}")
106
+ else:
107
+ logger.warning("Failed to generate new analysis. No relevant data might be available.")
108
+
109
+ except Exception as e:
110
+ logger.error(f"Error in update_analysis_task: {str(e)}")
111
+
112
+
113
+ async def periodic_update() -> None:
114
+ """Periodically update the analysis."""
115
+ update_interval = int(os.getenv("UPDATE_INTERVAL_MINUTES", 60))
116
+
117
+ while True:
118
+ try:
119
+ await asyncio.sleep(update_interval * 60)
120
+ await update_analysis_task("scheduled")
121
+ except Exception as e:
122
+ logger.error(f"Error in periodic_update: {str(e)}")
123
+ await asyncio.sleep(300) # Wait 5 minutes if there was an error
124
+
125
+
126
+ @app.get("/", response_model=Dict)
127
+ async def root():
128
+ """Root endpoint with basic information about the API."""
129
+ return {
130
+ "name": "WesternFront API",
131
+ "description": "AI-powered conflict tracker for India-Pakistan tensions using Reddit data",
132
+ "version": "2.0.0",
133
+ "status": "ready" if app_ready else "initializing"
134
+ }
135
+
136
+
137
+ @app.get("/ready")
138
+ async def readiness_check():
139
+ """Readiness check endpoint."""
140
+ if not app_ready:
141
+ raise HTTPException(status_code=503, detail="Application is starting up")
142
+ return {"status": "ready", "timestamp": datetime.now().isoformat()}
143
+
144
+
145
+ @app.get("/health", response_model=HealthCheck)
146
+ async def health_check():
147
+ """Health check endpoint."""
148
+ # --- UPDATED: Check Reddit service instead of Twitter ---
149
+ reddit_initialized = analysis_service.reddit_service.reddit is not None
150
+ gemini_initialized = analysis_service.model is not None
151
+
152
+ return HealthCheck(
153
+ status="healthy" if app_ready else "initializing",
154
+ version="2.0.0",
155
+ timestamp=datetime.now(),
156
+ last_update=last_update_time,
157
+ components_status={
158
+ "reddit_service": reddit_initialized,
159
+ "analysis_service": gemini_initialized
160
+ }
161
+ )
162
+
163
+ # The HEAD /health endpoint is a bit redundant with FastAPI, so it can be removed for simplicity
164
+ # unless you have a specific use case for it.
165
+
166
+ @app.get("/analysis", response_model=Optional[ConflictAnalysis])
167
+ async def get_latest_analysis():
168
+ """Get the latest conflict analysis."""
169
+ if not latest_analysis:
170
+ raise HTTPException(
171
+ status_code=status.HTTP_404_NOT_FOUND,
172
+ detail="No analysis available yet. Try triggering an update."
173
+ )
174
+ return latest_analysis
175
+
176
+
177
+ @app.post("/analysis/update", response_model=Dict)
178
+ async def trigger_update(request: UpdateRequest):
179
+ """Trigger an analysis update."""
180
+ if request.force:
181
+ # --- UPDATED: Clear Reddit service cache ---
182
+ analysis_service.reddit_service.in_memory_cache.clear()
183
+ logger.info("Cache cleared for forced refresh.")
184
+
185
+ # Start update in background
186
+ asyncio.create_task(update_analysis_task("manual"))
187
+
188
+ return {
189
+ "message": "Analysis update triggered",
190
+ "timestamp": datetime.now().isoformat(),
191
+ "force_refresh": request.force
192
+ }
193
+
194
+
195
+ # --- UPDATED: Now manages subreddit sources ---
196
+ @app.get("/sources", response_model=List[SubredditSource])
197
+ async def get_subreddit_sources(
198
+ analysis: AnalysisService = Depends(get_analysis_service)
199
+ ):
200
+ """Get the current list of subreddit sources."""
201
+ return analysis.get_sources()
202
+
203
+
204
+ # --- UPDATED: Now manages subreddit sources ---
205
+ @app.post("/sources", response_model=Dict)
206
+ async def update_subreddit_sources(
207
+ sources: List[SubredditSource],
208
+ analysis: AnalysisService = Depends(get_analysis_service)
209
+ ):
210
+ """Update the list of subreddit sources."""
211
+ analysis.update_sources(sources)
212
+ return {
213
+ "message": "Subreddit sources updated",
214
+ "count": len(sources)
215
+ }
216
+
217
+
218
+ @app.get("/keywords", response_model=List[str])
219
+ async def get_search_keywords(
220
+ analysis: AnalysisService = Depends(get_analysis_service)
221
+ ):
222
+ """Get the current search keywords."""
223
+ return analysis.get_search_keywords()
224
+
225
+
226
+ @app.post("/keywords", response_model=Dict)
227
+ async def update_search_keywords(
228
+ keywords: List[str],
229
+ analysis: AnalysisService = Depends(get_analysis_service)
230
+ ):
231
+ """Update the search keywords."""
232
+ analysis.update_search_keywords(keywords)
233
+ return {
234
+ "message": "Search keywords updated",
235
+ "count": len(keywords)
236
+ }
237
+
238
+
239
+ @app.get("/tension-levels", response_model=List[str])
240
+ async def get_tension_levels():
241
+ """Get the available tension levels."""
242
+ return [level.value for level in TensionLevel]
243
+
244
+ # --- REMOVED: /rss-feeds endpoint is no longer applicable ---
245
+
246
+ if __name__ == "__main__":
247
+ import uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
models.py CHANGED
@@ -13,23 +13,26 @@ class TensionLevel(str, Enum):
13
  CRITICAL = "Critical"
14
 
15
 
16
- class NewsSource(BaseModel):
17
- """Model for a news source."""
 
18
  name: str
19
- twitter_handle: str
20
- country: str
21
- reliability_score: float = Field(ge=0.0, le=1.0)
22
  is_active: bool = True
23
 
24
 
25
- class Tweet(BaseModel):
26
- """Model for a tweet."""
 
27
  id: str
28
- text: str
 
 
29
  author: str
30
  created_at: datetime
31
- engagement: Dict[str, int] = {"likes": 0, "retweets": 0, "replies": 0, "views": 0}
32
  url: str
 
33
 
34
 
35
  class KeyDevelopment(BaseModel):
@@ -40,17 +43,25 @@ class KeyDevelopment(BaseModel):
40
  timestamp: Optional[datetime] = None
41
 
42
 
 
43
  class ConflictAnalysis(BaseModel):
44
- """Model for a conflict analysis."""
45
  analysis_id: str
46
  generated_at: datetime
47
- latest_status: str # Added this field
48
  situation_summary: str
49
  key_developments: List[KeyDevelopment]
50
- reliability_assessment: str
51
- regional_implications: str
 
 
 
52
  tension_level: TensionLevel
53
- source_tweets: List[Tweet]
 
 
 
 
54
  update_triggered_by: str
55
 
56
 
 
13
  CRITICAL = "Critical"
14
 
15
 
16
+ # --- NEW: Replaces NewsSource ---
17
+ class SubredditSource(BaseModel):
18
+ """Model for a subreddit as a news source."""
19
  name: str
20
+ reliability_score: float = Field(default=0.7, ge=0.0, le=1.0)
 
 
21
  is_active: bool = True
22
 
23
 
24
+ # --- NEW: Replaces Tweet ---
25
+ class RedditPost(BaseModel):
26
+ """Model for a Reddit post."""
27
  id: str
28
+ title: str
29
+ text: str # Combination of title and selftext
30
+ selftext: str
31
  author: str
32
  created_at: datetime
33
+ score: int
34
  url: str
35
+ subreddit: str
36
 
37
 
38
  class KeyDevelopment(BaseModel):
 
43
  timestamp: Optional[datetime] = None
44
 
45
 
46
+ # --- UPDATED: ConflictAnalysis now uses Reddit posts ---
47
  class ConflictAnalysis(BaseModel):
48
+ """Model for a conflict analysis based on Reddit data."""
49
  analysis_id: str
50
  generated_at: datetime
51
+ latest_status: str
52
  situation_summary: str
53
  key_developments: List[KeyDevelopment]
54
+
55
+ # Updated to Dict for a more structured assessment from the AI
56
+ reliability_assessment: Dict[str, str]
57
+ regional_implications: Dict[str, str]
58
+
59
  tension_level: TensionLevel
60
+ tension_rationale: str # Added field for justification
61
+
62
+ # Changed from source_tweets to source_posts
63
+ source_posts: List[RedditPost]
64
+
65
  update_triggered_by: str
66
 
67
 
reddit_service.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import re
4
+ from datetime import datetime, timedelta, timezone
5
+ from typing import Dict, List, Optional
6
+
7
+ import asyncpraw # Import asyncpraw instead of praw
8
+ from cachetools import TTLCache
9
+ from loguru import logger
10
+
11
+ from models import RedditPost, SubredditSource
12
+
13
+ class RedditService:
14
+ """Service for collecting posts via the Reddit API using Async PRAW."""
15
+
16
+ def __init__(self):
17
+ self.reddit = None
18
+ self.cache_expiry = int(os.getenv("CACHE_EXPIRY_MINUTES", 60))
19
+ self.in_memory_cache = TTLCache(maxsize=100, ttl=self.cache_expiry * 60)
20
+
21
+ self.sources = [
22
+ # Primary Subreddits
23
+ SubredditSource(name="geopolitics", reliability_score=0.85), # High-quality analysis, neutral discussion
24
+ SubredditSource(name="anime_titties", reliability_score=0.8), # International news, quality control
25
+ SubredditSource(name="CredibleDefense", reliability_score=0.9), # Military/security analysis
26
+ SubredditSource(name="worldnews", reliability_score=0.8), # Broad international coverage
27
+
28
+ # Regional Focus
29
+ SubredditSource(name="GeopoliticsIndia", reliability_score=0.75), # India-focused geopolitical discussions
30
+ SubredditSource(name="SouthAsia", reliability_score=0.7), # Regional coverage
31
+ SubredditSource(name="neutralnews", reliability_score=0.8), # Fact-based reporting standards
32
+
33
+ # Existing sources
34
+ SubredditSource(name="india", reliability_score=0.7),
35
+ SubredditSource(name="pakistan", reliability_score=0.7),
36
+ ]
37
+
38
+ self.stats = {"requests": 0, "cache_hits": 0, "errors": 0}
39
+
40
+ def initialize(self) -> bool:
41
+ """Initialize the Reddit API client."""
42
+ try:
43
+ logger.info("Initializing Async Reddit service")
44
+ client_id = os.getenv("REDDIT_CLIENT_ID")
45
+ client_secret = os.getenv("REDDIT_CLIENT_SECRET")
46
+ user_agent = os.getenv("REDDIT_USER_AGENT")
47
+
48
+ if not all([client_id, client_secret, user_agent]):
49
+ logger.error("Reddit API credentials not found.")
50
+ return False
51
+
52
+ # --- Use asyncpraw.Reddit ---
53
+ self.reddit = asyncpraw.Reddit(
54
+ client_id=client_id,
55
+ client_secret=client_secret,
56
+ user_agent=user_agent,
57
+ )
58
+ logger.info("Async Reddit service initialized successfully.")
59
+ return True
60
+ except Exception as e:
61
+ logger.error(f"Failed to initialize Async Reddit service: {e}")
62
+ return False
63
+
64
+ async def get_posts_from_subreddit(self, source: SubredditSource, limit: int = 100) -> List[RedditPost]:
65
+ """Get recent posts from a specific subreddit using async calls."""
66
+ cache_key = f"reddit_{source.name}_{limit}"
67
+ if cache_key in self.in_memory_cache:
68
+ self.stats["cache_hits"] += 1
69
+ logger.debug(f"Returning cached posts for r/{source.name}")
70
+ return self.in_memory_cache[cache_key]
71
+
72
+ self.stats["requests"] += 1
73
+ logger.info(f"Fetching posts from r/{source.name}")
74
+ try:
75
+ # --- Use async methods directly ---
76
+ subreddit = await self.reddit.subreddit(source.name)
77
+ posts = []
78
+
79
+ # Use `async for` to iterate through the async generator
80
+ async for sub in subreddit.new(limit=limit):
81
+ post = RedditPost(
82
+ id=sub.id,
83
+ title=sub.title,
84
+ text=f"{sub.title}\n{sub.selftext}",
85
+ selftext=sub.selftext,
86
+ author=str(sub.author),
87
+ created_at=datetime.fromtimestamp(sub.created_utc, tz=timezone.utc),
88
+ score=sub.score,
89
+ url=sub.url,
90
+ subreddit=source.name,
91
+ )
92
+ posts.append(post)
93
+
94
+ self.in_memory_cache[cache_key] = posts
95
+ logger.info(f"Fetched and cached {len(posts)} posts from r/{source.name}")
96
+ return posts
97
+ except Exception as e:
98
+ self.stats["errors"] += 1
99
+ logger.error(f"Could not fetch from r/{source.name}. Error: {e}")
100
+ return []
101
+
102
+ async def get_related_posts(self, keywords: List[str], days_back: int = 2) -> List[RedditPost]:
103
+ # This method's logic doesn't change, as it was already async.
104
+ all_posts = []
105
+ cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
106
+ active_sources = [s for s in self.sources if s.is_active]
107
+
108
+ tasks = [self.get_posts_from_subreddit(source) for source in active_sources]
109
+ source_posts_list = await asyncio.gather(*tasks)
110
+
111
+ for source_posts in source_posts_list:
112
+ for post in source_posts:
113
+ if post.created_at >= cutoff_date:
114
+ if any(keyword.lower() in post.text.lower() for keyword in keywords):
115
+ all_posts.append(post)
116
+
117
+ unique_posts = {post.id: post for post in all_posts}
118
+ sorted_posts = sorted(list(unique_posts.values()), key=lambda p: p.created_at, reverse=True)
119
+
120
+ logger.info(f"Found {len(sorted_posts)} relevant posts from Reddit.")
121
+ return sorted_posts
122
+
123
+ def update_sources(self, sources: List[SubredditSource]):
124
+ self.sources = sources
125
+ self.in_memory_cache.clear()
126
+ logger.info(f"Updated subreddit sources. New count: {len(sources)}")
127
+
128
+ def get_sources(self) -> List[SubredditSource]:
129
+ return self.sources
130
+
131
+ async def close(self):
132
+ """Close the Async PRAW client session."""
133
+ if self.reddit:
134
+ await self.reddit.close()
135
+ logger.info("Async Reddit service session closed.")
requirements.txt CHANGED
@@ -1,11 +1,9 @@
1
- fastapi==0.103.1
2
- uvicorn[standard]==0.23.2
3
- python-dotenv==1.0.0
4
- loguru==0.7.0
5
- google-generativeai==0.3.0
6
- tenacity==8.2.2
7
- cachetools==5.3.0
8
- pydantic==2.3.0
9
- httpx==0.24.1
10
- beautifulsoup4==4.12.2
11
- httpx[http2]>=0.24.0
 
1
+ fastapi==0.103.1
2
+ uvicorn[standard]==0.23.2
3
+ python-dotenv==1.0.0
4
+ loguru==0.7.0
5
+ google-generativeai==0.3.0
6
+ tenacity==8.2.2
7
+ cachetools==5.3.0
8
+ pydantic==2.3.0
9
+ asyncpraw==7.7.2