Spaces:
Running
Running
Upload 5 files
Browse files- analysis_service.py +100 -297
- app.py +247 -291
- models.py +25 -14
- reddit_service.py +135 -0
- requirements.txt +9 -11
analysis_service.py
CHANGED
@@ -9,12 +9,12 @@ import google.generativeai as genai
|
|
9 |
from loguru import logger
|
10 |
from tenacity import RetryError, retry, stop_after_attempt, wait_exponential
|
11 |
|
12 |
-
from models import ConflictAnalysis, KeyDevelopment, TensionLevel,
|
13 |
-
from
|
14 |
|
15 |
|
16 |
class AnalysisService:
|
17 |
-
"""Service for analyzing
|
18 |
|
19 |
def __init__(self):
|
20 |
self.api_key = os.getenv("GEMINI_API_KEY")
|
@@ -25,36 +25,19 @@ class AnalysisService:
|
|
25 |
"India-Pakistan", "cross-border", "terrorism", "bilateral relations"
|
26 |
]
|
27 |
|
28 |
-
|
29 |
-
self.
|
30 |
-
|
31 |
-
# Initialize Gemini AI
|
32 |
-
self.initialize()
|
33 |
|
34 |
-
|
35 |
-
"""Initialize all required services."""
|
36 |
-
gemini_success = self.initialize()
|
37 |
-
twitter_success = await self.twitter_service.initialize()
|
38 |
-
|
39 |
-
if gemini_success and twitter_success:
|
40 |
-
logger.info("All services initialized successfully")
|
41 |
-
return True
|
42 |
-
else:
|
43 |
-
logger.error("Failed to initialize one or more services")
|
44 |
-
return False
|
45 |
-
|
46 |
-
def initialize(self) -> bool:
|
47 |
"""Initialize the Gemini AI client."""
|
48 |
if not self.api_key:
|
49 |
logger.error("GEMINI_API_KEY not provided")
|
50 |
return False
|
51 |
-
|
52 |
try:
|
53 |
logger.info("Initializing Gemini AI")
|
54 |
genai.configure(api_key=self.api_key)
|
55 |
-
# Configure model with lower temperature for more factual responses
|
56 |
generation_config = {
|
57 |
-
"temperature": 0.
|
58 |
"top_p": 0.95,
|
59 |
"top_k": 40
|
60 |
}
|
@@ -65,359 +48,179 @@ class AnalysisService:
|
|
65 |
logger.error(f"Failed to initialize Gemini AI: {str(e)}")
|
66 |
return False
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
# Format source data without numbering that could leak into responses
|
74 |
source_entries = []
|
75 |
-
for i,
|
76 |
-
|
77 |
-
source_entries.append(f"
|
78 |
|
79 |
intelligence_data = "\n\n---\n\n".join(source_entries)
|
80 |
|
81 |
prompt = f"""
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
{intelligence_data}
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
- Assess tension levels based on concrete actions and statements
|
99 |
-
- Structure all outputs in clear, professional format
|
100 |
-
|
101 |
-
REQUIRED OUTPUT FORMAT:
|
102 |
{{
|
103 |
-
"latest_status": "
|
104 |
-
"situation_summary": "
|
105 |
"key_developments": [
|
106 |
{{
|
107 |
-
"title": "
|
108 |
-
"description": "
|
109 |
-
"sources": ["
|
110 |
}}
|
111 |
],
|
112 |
"reliability_assessment": {{
|
113 |
-
"source_credibility": "
|
114 |
-
"information_gaps": "
|
115 |
-
"confidence_rating": "HIGH
|
116 |
}},
|
117 |
"regional_implications": {{
|
118 |
-
"security": "
|
119 |
-
"diplomatic": "
|
120 |
-
"economic": "
|
121 |
}},
|
122 |
"tension_level": "LOW|MEDIUM|HIGH|CRITICAL",
|
123 |
-
"tension_rationale": "
|
124 |
}}
|
125 |
-
|
126 |
-
CRITICAL DIRECTIVES:
|
127 |
-
- Generate ONLY valid JSON without additional text or markdown formatting
|
128 |
-
- DO NOT use phrase "Data Point" or numbered references in ANY output field
|
129 |
-
- Use professional intelligence terminology throughout
|
130 |
-
- Attribute information to sources by handle (e.g., "@BBCWorld reports") rather than numbers
|
131 |
-
- Write in concise, authoritative style appropriate for intelligence briefings
|
132 |
-
- Avoid personal opinions, narrative storytelling, or journalistic commentary
|
133 |
-
- Maintain consistent formal tone throughout all sections
|
134 |
"""
|
135 |
return prompt
|
136 |
|
137 |
-
@retry(wait=wait_exponential(min=
|
138 |
-
async def _call_gemini(self, prompt: str) -> Dict:
|
139 |
"""Call the Gemini API with retry logic and improved parsing."""
|
140 |
if not self.model:
|
141 |
-
if not self.
|
142 |
-
logger.error("Could not analyze
|
143 |
raise Exception("Gemini AI initialization failed")
|
144 |
-
|
145 |
try:
|
146 |
logger.info("Calling Gemini API for conflict analysis")
|
147 |
response = await self.model.generate_content_async(prompt)
|
148 |
result = response.text
|
149 |
-
|
150 |
-
# Better JSON extraction with multiple patterns
|
151 |
json_match = re.search(r'```(?:json)?\n(.*?)\n```', result, re.DOTALL)
|
152 |
if json_match:
|
153 |
-
|
154 |
else:
|
155 |
-
|
156 |
-
json_pattern = r'({[\s\S]*})'
|
157 |
-
json_match = re.search(json_pattern, result)
|
158 |
-
if json_match:
|
159 |
-
result = json_match.group(1)
|
160 |
-
|
161 |
-
# Clean the result of any non-JSON content
|
162 |
-
result = re.sub(r'```', '', result).strip()
|
163 |
-
|
164 |
-
# Parse JSON with error handling
|
165 |
-
try:
|
166 |
-
analysis_data = json.loads(result)
|
167 |
-
|
168 |
-
# Additional cleaning to remove "Data Point" references from all string fields
|
169 |
-
for key, value in analysis_data.items():
|
170 |
-
if isinstance(value, str):
|
171 |
-
analysis_data[key] = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
|
172 |
-
analysis_data[key] = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
|
173 |
-
analysis_data[key] = analysis_data[key].strip()
|
174 |
-
|
175 |
-
# Clean nested dictionaries
|
176 |
-
for key, value in analysis_data.items():
|
177 |
-
if isinstance(value, dict):
|
178 |
-
for subkey, subvalue in value.items():
|
179 |
-
if isinstance(subvalue, str):
|
180 |
-
value[subkey] = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', subvalue)
|
181 |
-
value[subkey] = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', subvalue)
|
182 |
-
value[subkey] = value[subkey].strip()
|
183 |
-
|
184 |
-
logger.info("Successfully received and parsed Gemini response")
|
185 |
-
return analysis_data
|
186 |
-
except json.JSONDecodeError as e:
|
187 |
-
logger.error(f"JSON parsing error: {str(e)}")
|
188 |
-
# Attempt cleanup and retry parsing
|
189 |
-
result = re.sub(r'[\n\r\t]', ' ', result)
|
190 |
-
result = re.search(r'({.*})', result).group(1) if re.search(r'({.*})', result) else result
|
191 |
-
analysis_data = json.loads(result)
|
192 |
-
logger.info("Successfully parsed Gemini response after cleanup")
|
193 |
-
return analysis_data
|
194 |
|
|
|
|
|
|
|
|
|
195 |
except Exception as e:
|
196 |
logger.error(f"Error calling Gemini API: {str(e)}")
|
197 |
-
logger.debug(f"Raw response content: {result if 'result' in locals() else 'No response'}")
|
198 |
raise
|
199 |
|
200 |
def _extract_tension_level(self, level_text: str) -> TensionLevel:
|
201 |
-
"""Extract tension level enum from text."""
|
202 |
level_text = level_text.lower()
|
203 |
-
if "critical" in level_text:
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
elif "medium" in level_text:
|
208 |
-
return TensionLevel.MEDIUM
|
209 |
-
else:
|
210 |
-
return TensionLevel.LOW
|
211 |
|
212 |
def _process_key_developments(self, developments_data: List[Dict]) -> List[KeyDevelopment]:
|
213 |
-
"""Process key developments from API response."""
|
214 |
key_developments = []
|
215 |
-
if not developments_data:
|
216 |
-
return key_developments
|
217 |
-
|
218 |
for dev in developments_data:
|
219 |
-
# Clean any Data Point references if they slipped through
|
220 |
-
title = dev.get("title", "Unnamed Development")
|
221 |
-
description = dev.get("description", "No description provided")
|
222 |
-
|
223 |
-
# Remove any Data Point references
|
224 |
-
title = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', title)
|
225 |
-
description = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', description)
|
226 |
-
title = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', title)
|
227 |
-
description = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', description)
|
228 |
-
|
229 |
key_developments.append(
|
230 |
KeyDevelopment(
|
231 |
-
title=
|
232 |
-
description=
|
233 |
sources=dev.get("sources", []),
|
234 |
timestamp=datetime.now()
|
235 |
)
|
236 |
)
|
237 |
return key_developments
|
238 |
|
239 |
-
def
|
240 |
-
"""
|
241 |
-
if
|
242 |
-
|
243 |
-
reliability_data = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', reliability_data)
|
244 |
-
reliability_data = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', reliability_data)
|
245 |
-
return reliability_data.strip()
|
246 |
-
|
247 |
-
if isinstance(reliability_data, dict):
|
248 |
-
sections = []
|
249 |
-
|
250 |
-
if "source_credibility" in reliability_data:
|
251 |
-
value = reliability_data["source_credibility"]
|
252 |
-
value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
|
253 |
-
value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
|
254 |
-
sections.append(f"SOURCE CREDIBILITY: {value.strip()}")
|
255 |
-
|
256 |
-
if "information_gaps" in reliability_data:
|
257 |
-
value = reliability_data["information_gaps"]
|
258 |
-
value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
|
259 |
-
value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
|
260 |
-
sections.append(f"INFORMATION GAPS: {value.strip()}")
|
261 |
-
|
262 |
-
if "confidence_rating" in reliability_data:
|
263 |
-
value = reliability_data["confidence_rating"]
|
264 |
-
value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
|
265 |
-
value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
|
266 |
-
sections.append(f"CONFIDENCE: {value.strip()}")
|
267 |
-
|
268 |
-
if sections:
|
269 |
-
return "\n\n".join(sections)
|
270 |
-
|
271 |
-
return "Assessment unavailable"
|
272 |
-
|
273 |
-
def _format_regional_implications(self, implications_data: Dict) -> str:
|
274 |
-
"""Format regional implications data into a structured string."""
|
275 |
-
if isinstance(implications_data, str):
|
276 |
-
# Clean any Data Point references
|
277 |
-
implications_data = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', implications_data)
|
278 |
-
implications_data = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', implications_data)
|
279 |
-
return implications_data.strip()
|
280 |
-
|
281 |
-
if isinstance(implications_data, dict):
|
282 |
-
sections = []
|
283 |
-
|
284 |
-
if "security" in implications_data:
|
285 |
-
value = implications_data["security"]
|
286 |
-
value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
|
287 |
-
value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
|
288 |
-
sections.append(f"SECURITY: {value.strip()}")
|
289 |
-
|
290 |
-
if "diplomatic" in implications_data:
|
291 |
-
value = implications_data["diplomatic"]
|
292 |
-
value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
|
293 |
-
value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
|
294 |
-
sections.append(f"DIPLOMATIC: {value.strip()}")
|
295 |
-
|
296 |
-
if "economic" in implications_data:
|
297 |
-
value = implications_data["economic"]
|
298 |
-
value = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', value)
|
299 |
-
value = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', value)
|
300 |
-
sections.append(f"ECONOMIC: {value.strip()}")
|
301 |
-
|
302 |
-
if sections:
|
303 |
-
return "\n\n".join(sections)
|
304 |
-
|
305 |
-
return "Implications unavailable"
|
306 |
-
|
307 |
-
async def analyze_tweets(self, tweets: List[Tweet], trigger: str = "scheduled") -> ConflictAnalysis:
|
308 |
-
"""Analyze tweets using Gemini AI and generate a conflict analysis."""
|
309 |
-
if not tweets:
|
310 |
-
logger.warning("No tweets provided for analysis")
|
311 |
return None
|
312 |
-
|
313 |
try:
|
314 |
-
prompt = self._prepare_prompt(
|
315 |
analysis_data = await self._call_gemini(prompt)
|
316 |
|
317 |
-
|
|
|
|
|
|
|
318 |
key_developments = self._process_key_developments(analysis_data.get("key_developments", []))
|
319 |
-
|
320 |
-
# Format complex nested structures if present
|
321 |
-
reliability_assessment = self._format_reliability_assessment(
|
322 |
-
analysis_data.get("reliability_assessment", "No reliability assessment provided")
|
323 |
-
)
|
324 |
-
|
325 |
-
regional_implications = self._format_regional_implications(
|
326 |
-
analysis_data.get("regional_implications", "No regional implications provided")
|
327 |
-
)
|
328 |
-
|
329 |
-
# Extract tension rationale if available and clean it
|
330 |
-
tension_info = analysis_data.get("tension_level", "Low")
|
331 |
-
tension_info = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', tension_info)
|
332 |
-
tension_info = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', tension_info)
|
333 |
-
|
334 |
-
tension_rationale = analysis_data.get("tension_rationale", "")
|
335 |
-
tension_rationale = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', tension_rationale)
|
336 |
-
tension_rationale = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', tension_rationale)
|
337 |
-
|
338 |
-
# Combine tension level and rationale if both exist
|
339 |
-
if tension_rationale:
|
340 |
-
tension_display = f"{tension_info.strip()} - {tension_rationale.strip()}"
|
341 |
-
else:
|
342 |
-
tension_display = tension_info.strip()
|
343 |
-
|
344 |
-
# Get and clean the latest status
|
345 |
-
latest_status = analysis_data.get("latest_status", "No recent status update available")
|
346 |
-
latest_status = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', latest_status)
|
347 |
-
latest_status = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', latest_status)
|
348 |
-
|
349 |
-
# Clean situation summary
|
350 |
-
situation_summary = analysis_data.get("situation_summary", "No summary provided")
|
351 |
-
situation_summary = re.sub(r'(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*', '', situation_summary)
|
352 |
-
situation_summary = re.sub(r'\(\s*(?:Data Point|data point)s?\s+\d+(?:,\s*\d+)*\s*\)', '', situation_summary)
|
353 |
|
354 |
analysis = ConflictAnalysis(
|
355 |
analysis_id=str(uuid.uuid4()),
|
356 |
generated_at=datetime.now(),
|
357 |
-
situation_summary=situation_summary.
|
358 |
key_developments=key_developments,
|
359 |
-
reliability_assessment=reliability_assessment,
|
360 |
-
regional_implications=regional_implications,
|
361 |
-
tension_level=self._extract_tension_level(
|
362 |
-
|
|
|
363 |
update_triggered_by=trigger,
|
364 |
-
latest_status=latest_status.
|
365 |
)
|
366 |
|
367 |
logger.info(f"Generated conflict analysis with ID: {analysis.analysis_id}")
|
368 |
return analysis
|
369 |
-
|
370 |
except RetryError as e:
|
371 |
logger.error(f"Failed to generate analysis after multiple retries: {str(e)}")
|
372 |
return None
|
373 |
except Exception as e:
|
374 |
-
logger.error(f"Unexpected error in
|
375 |
return None
|
376 |
|
377 |
-
async def get_latest_tweets(self, days_back: int = 2) -> List[Tweet]:
|
378 |
-
"""Get latest tweets related to the conflict using RSS feeds."""
|
379 |
-
try:
|
380 |
-
tweets = await self.twitter_service.get_related_tweets(self.search_keywords, days_back)
|
381 |
-
logger.info(f"Retrieved {len(tweets)} tweets related to the conflict")
|
382 |
-
return tweets
|
383 |
-
except Exception as e:
|
384 |
-
logger.error(f"Error retrieving tweets: {str(e)}")
|
385 |
-
return []
|
386 |
-
|
387 |
async def generate_conflict_analysis(self, days_back: int = 2, trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
|
388 |
-
"""Generate a complete conflict analysis using
|
389 |
-
|
390 |
-
|
391 |
-
|
|
|
|
|
|
|
|
|
|
|
392 |
return None
|
393 |
|
394 |
-
return await self.
|
395 |
|
396 |
-
async def register_rss_feeds(self, feed_map: Dict[str, str]) -> None:
|
397 |
-
"""Register RSS feeds for Twitter handles."""
|
398 |
-
self.twitter_service.register_rss_feed_batch(feed_map)
|
399 |
-
|
400 |
-
def register_rss_feed(self, twitter_handle: str, rss_url: str) -> None:
|
401 |
-
"""Register an RSS feed for a Twitter handle."""
|
402 |
-
self.twitter_service.register_rss_feed(twitter_handle, rss_url)
|
403 |
-
|
404 |
def get_search_keywords(self) -> List[str]:
|
405 |
-
"""Get the current search keywords."""
|
406 |
return self.search_keywords
|
407 |
|
408 |
-
def update_search_keywords(self, keywords: List[str])
|
409 |
-
"""Update the search keywords."""
|
410 |
self.search_keywords = keywords
|
411 |
logger.info(f"Updated search keywords. New count: {len(keywords)}")
|
412 |
-
|
413 |
-
def update_sources(self, sources)
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
def get_sources(self):
|
418 |
-
|
419 |
-
return self.twitter_service.get_sources()
|
420 |
|
421 |
async def close(self) -> None:
|
422 |
-
|
423 |
-
await self.twitter_service.close()
|
|
|
9 |
from loguru import logger
|
10 |
from tenacity import RetryError, retry, stop_after_attempt, wait_exponential
|
11 |
|
12 |
+
from models import ConflictAnalysis, KeyDevelopment, TensionLevel, RedditPost
|
13 |
+
from reddit_service import RedditService
|
14 |
|
15 |
|
16 |
class AnalysisService:
|
17 |
+
"""Service for analyzing Reddit posts using Google's Gemini AI."""
|
18 |
|
19 |
def __init__(self):
|
20 |
self.api_key = os.getenv("GEMINI_API_KEY")
|
|
|
25 |
"India-Pakistan", "cross-border", "terrorism", "bilateral relations"
|
26 |
]
|
27 |
|
28 |
+
self.reddit_service = RedditService()
|
29 |
+
self.initialize_gemini()
|
|
|
|
|
|
|
30 |
|
31 |
+
def initialize_gemini(self) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
"""Initialize the Gemini AI client."""
|
33 |
if not self.api_key:
|
34 |
logger.error("GEMINI_API_KEY not provided")
|
35 |
return False
|
|
|
36 |
try:
|
37 |
logger.info("Initializing Gemini AI")
|
38 |
genai.configure(api_key=self.api_key)
|
|
|
39 |
generation_config = {
|
40 |
+
"temperature": 0.2, # Slightly increased for more nuanced language
|
41 |
"top_p": 0.95,
|
42 |
"top_k": 40
|
43 |
}
|
|
|
48 |
logger.error(f"Failed to initialize Gemini AI: {str(e)}")
|
49 |
return False
|
50 |
|
51 |
+
# --- THIS IS THE UPDATED METHOD ---
|
52 |
+
def _prepare_prompt(self, posts: List[RedditPost]) -> str:
|
53 |
+
"""Prepare the prompt for analysis with Reddit post data."""
|
54 |
+
sorted_posts = sorted(posts, key=lambda x: x.created_at, reverse=True)
|
55 |
|
|
|
56 |
source_entries = []
|
57 |
+
for i, post in enumerate(sorted_posts):
|
58 |
+
# The model will use this for context, but won't cite it directly
|
59 |
+
source_entries.append(f"RAW INTEL #{i+1}:\n{post.text}")
|
60 |
|
61 |
intelligence_data = "\n\n---\n\n".join(source_entries)
|
62 |
|
63 |
prompt = f"""
|
64 |
+
**TOP SECRET // FOR OFFICIAL USE ONLY**
|
65 |
+
|
66 |
+
**TO:** Strategic Intelligence Command
|
67 |
+
**FROM:** Senior Geopolitical Analyst (South Asia Desk)
|
68 |
+
**SUBJECT:** INDIA-PAKISTAN CONFLICT SITUATION REPORT
|
69 |
+
**DATE:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} UTC
|
70 |
+
|
71 |
+
**MISSION OBJECTIVE:**
|
72 |
+
Assume the persona of a senior geopolitical analyst. Your task is to synthesize raw, open-source intelligence feeds into a clinical and dispassionate intelligence product for a national security council. Your analysis must be objective, concise, and devoid of sensationalism. Focus exclusively on the provided intelligence to assess stability, identify threats, and determine the near-term trajectory of the conflict.
|
73 |
+
|
74 |
+
**RAW INTELLIGENCE FEEDS (FOR CONTEXT ONLY):**
|
75 |
+
---
|
76 |
{intelligence_data}
|
77 |
+
---
|
78 |
+
|
79 |
+
**ANALYTICAL DIRECTIVES:**
|
80 |
+
1. **Synthesize, Do Not Summarize:** Do not merely list what each source says. Integrate all data points into a coherent, holistic assessment.
|
81 |
+
2. **Impersonal and Objective Tone:** Use formal, analytical language. Avoid emotive words, speculation, or personal opinions. Employ the active voice.
|
82 |
+
3. **No Direct Attribution:** Your final report is a standalone intelligence product. **CRITICAL: DO NOT attribute any information to specific Reddit usernames or subreddits (e.g., "u/user reports...").** The provided intelligence is your source material, not something to be directly quoted.
|
83 |
+
4. **Fact-Based Assessment:** All conclusions must be logically derived from the provided intelligence feeds. Do not introduce outside information.
|
84 |
+
|
85 |
+
**REQUIRED OUTPUT FORMAT (Strict JSON):**
|
86 |
+
Produce a single, valid JSON object with no markdown formatting. The structure must be as follows:
|
|
|
|
|
|
|
|
|
87 |
{{
|
88 |
+
"latest_status": "A single, concise sentence describing the most recent, significant event.",
|
89 |
+
"situation_summary": "A 2-3 sentence executive summary of the current geopolitical and military situation. This is the 'bottom line up front' (BLUF).",
|
90 |
"key_developments": [
|
91 |
{{
|
92 |
+
"title": "A formal title for a key event (e.g., 'Cross-Border Artillery Exchange Reported Near Uri Sector').",
|
93 |
+
"description": "A synthesized paragraph detailing the event, its context, and immediate impact. Integrate multiple sources implicitly.",
|
94 |
+
"sources": ["Military Activity", "Diplomatic Statement"]
|
95 |
}}
|
96 |
],
|
97 |
"reliability_assessment": {{
|
98 |
+
"source_credibility": "An assessment of the overall credibility of the provided intelligence, considering potential for bias or misinformation without naming sources.",
|
99 |
+
"information_gaps": "Identify what critical information is missing from the feeds (e.g., 'Official casualty figures unconfirmed.').",
|
100 |
+
"confidence_rating": "State your analytical confidence (HIGH, MEDIUM, or LOW) and provide a brief justification."
|
101 |
}},
|
102 |
"regional_implications": {{
|
103 |
+
"security": "Analysis of the immediate security and military implications.",
|
104 |
+
"diplomatic": "Analysis of the impact on diplomatic relations and international standing.",
|
105 |
+
"economic": "Analysis of potential economic consequences (e.g., market stability, trade disruptions)."
|
106 |
}},
|
107 |
"tension_level": "LOW|MEDIUM|HIGH|CRITICAL",
|
108 |
+
"tension_rationale": "A concise justification for the assessed tension level, referencing the key developments and their potential for escalation."
|
109 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
"""
|
111 |
return prompt
|
112 |
|
113 |
+
@retry(wait=wait_exponential(min=2, max=60), stop=stop_after_attempt(3))
|
114 |
+
async def _call_gemini(self, prompt: str) -> Optional[Dict]:
|
115 |
"""Call the Gemini API with retry logic and improved parsing."""
|
116 |
if not self.model:
|
117 |
+
if not self.initialize_gemini():
|
118 |
+
logger.error("Could not analyze posts, Gemini AI not initialized")
|
119 |
raise Exception("Gemini AI initialization failed")
|
|
|
120 |
try:
|
121 |
logger.info("Calling Gemini API for conflict analysis")
|
122 |
response = await self.model.generate_content_async(prompt)
|
123 |
result = response.text
|
124 |
+
|
|
|
125 |
json_match = re.search(r'```(?:json)?\n(.*?)\n```', result, re.DOTALL)
|
126 |
if json_match:
|
127 |
+
json_str = json_match.group(1)
|
128 |
else:
|
129 |
+
json_str = result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
+
return json.loads(json_str)
|
132 |
+
except json.JSONDecodeError as e:
|
133 |
+
logger.error(f"JSON parsing error: {e}. Raw response: {result}")
|
134 |
+
raise
|
135 |
except Exception as e:
|
136 |
logger.error(f"Error calling Gemini API: {str(e)}")
|
|
|
137 |
raise
|
138 |
|
139 |
def _extract_tension_level(self, level_text: str) -> TensionLevel:
|
|
|
140 |
level_text = level_text.lower()
|
141 |
+
if "critical" in level_text: return TensionLevel.CRITICAL
|
142 |
+
if "high" in level_text: return TensionLevel.HIGH
|
143 |
+
if "medium" in level_text: return TensionLevel.MEDIUM
|
144 |
+
return TensionLevel.LOW
|
|
|
|
|
|
|
|
|
145 |
|
146 |
def _process_key_developments(self, developments_data: List[Dict]) -> List[KeyDevelopment]:
|
|
|
147 |
key_developments = []
|
148 |
+
if not developments_data: return key_developments
|
|
|
|
|
149 |
for dev in developments_data:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
key_developments.append(
|
151 |
KeyDevelopment(
|
152 |
+
title=dev.get("title", "Unnamed Development"),
|
153 |
+
description=dev.get("description", "No description provided"),
|
154 |
sources=dev.get("sources", []),
|
155 |
timestamp=datetime.now()
|
156 |
)
|
157 |
)
|
158 |
return key_developments
|
159 |
|
160 |
+
async def analyze_posts(self, posts: List[RedditPost], trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
|
161 |
+
"""Analyze Reddit posts using Gemini AI and generate a conflict analysis."""
|
162 |
+
if not posts:
|
163 |
+
logger.warning("No Reddit posts provided for analysis")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
return None
|
|
|
165 |
try:
|
166 |
+
prompt = self._prepare_prompt(posts)
|
167 |
analysis_data = await self._call_gemini(prompt)
|
168 |
|
169 |
+
if not analysis_data:
|
170 |
+
logger.error("Received no data from Gemini call.")
|
171 |
+
return None
|
172 |
+
|
173 |
key_developments = self._process_key_developments(analysis_data.get("key_developments", []))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
analysis = ConflictAnalysis(
|
176 |
analysis_id=str(uuid.uuid4()),
|
177 |
generated_at=datetime.now(),
|
178 |
+
situation_summary=analysis_data.get("situation_summary", "No summary provided."),
|
179 |
key_developments=key_developments,
|
180 |
+
reliability_assessment=analysis_data.get("reliability_assessment", {}),
|
181 |
+
regional_implications=analysis_data.get("regional_implications", {}),
|
182 |
+
tension_level=self._extract_tension_level(analysis_data.get("tension_level", "LOW")),
|
183 |
+
tension_rationale=analysis_data.get("tension_rationale", "No rationale provided."),
|
184 |
+
source_posts=posts,
|
185 |
update_triggered_by=trigger,
|
186 |
+
latest_status=analysis_data.get("latest_status", "No recent status update available.")
|
187 |
)
|
188 |
|
189 |
logger.info(f"Generated conflict analysis with ID: {analysis.analysis_id}")
|
190 |
return analysis
|
|
|
191 |
except RetryError as e:
|
192 |
logger.error(f"Failed to generate analysis after multiple retries: {str(e)}")
|
193 |
return None
|
194 |
except Exception as e:
|
195 |
+
logger.error(f"Unexpected error in post analysis: {str(e)}")
|
196 |
return None
|
197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
async def generate_conflict_analysis(self, days_back: int = 2, trigger: str = "scheduled") -> Optional[ConflictAnalysis]:
|
199 |
+
"""Generate a complete conflict analysis using posts from Reddit."""
|
200 |
+
if not self.reddit_service.reddit:
|
201 |
+
if not self.reddit_service.initialize():
|
202 |
+
logger.error("Cannot generate analysis, Reddit Service failed to initialize.")
|
203 |
+
return None
|
204 |
+
|
205 |
+
posts = await self.reddit_service.get_related_posts(self.search_keywords, days_back)
|
206 |
+
if not posts:
|
207 |
+
logger.warning("No relevant Reddit posts found for conflict analysis")
|
208 |
return None
|
209 |
|
210 |
+
return await self.analyze_posts(posts, trigger)
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
def get_search_keywords(self) -> List[str]:
|
|
|
213 |
return self.search_keywords
|
214 |
|
215 |
+
def update_search_keywords(self, keywords: List[str]):
|
|
|
216 |
self.search_keywords = keywords
|
217 |
logger.info(f"Updated search keywords. New count: {len(keywords)}")
|
218 |
+
|
219 |
+
def update_sources(self, sources):
|
220 |
+
self.reddit_service.update_sources(sources)
|
221 |
+
|
|
|
222 |
def get_sources(self):
|
223 |
+
return self.reddit_service.get_sources()
|
|
|
224 |
|
225 |
async def close(self) -> None:
|
226 |
+
await self.reddit_service.close()
|
|
app.py
CHANGED
@@ -1,292 +1,248 @@
|
|
1 |
-
import asyncio
|
2 |
-
import os
|
3 |
-
from datetime import datetime
|
4 |
-
from typing import Dict, List, Optional
|
5 |
-
|
6 |
-
from dotenv import load_dotenv
|
7 |
-
from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, status
|
8 |
-
from fastapi.middleware.cors import CORSMiddleware
|
9 |
-
from loguru import logger
|
10 |
-
|
11 |
-
|
12 |
-
from
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
#
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
await
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
logger.
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
return
|
153 |
-
"
|
154 |
-
"
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
return
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
twitter.update_sources(sources)
|
249 |
-
return {
|
250 |
-
"message": "News sources updated",
|
251 |
-
"count": len(sources)
|
252 |
-
}
|
253 |
-
|
254 |
-
|
255 |
-
@app.get("/keywords", response_model=List[str])
|
256 |
-
async def get_search_keywords(
|
257 |
-
analysis: AnalysisService = Depends(get_analysis_service)
|
258 |
-
):
|
259 |
-
"""Get the current search keywords."""
|
260 |
-
return analysis.get_search_keywords()
|
261 |
-
|
262 |
-
|
263 |
-
@app.post("/keywords", response_model=Dict)
|
264 |
-
async def update_search_keywords(
|
265 |
-
keywords: List[str],
|
266 |
-
analysis: AnalysisService = Depends(get_analysis_service)
|
267 |
-
):
|
268 |
-
"""Update the search keywords."""
|
269 |
-
analysis.update_search_keywords(keywords)
|
270 |
-
return {
|
271 |
-
"message": "Search keywords updated",
|
272 |
-
"count": len(keywords)
|
273 |
-
}
|
274 |
-
|
275 |
-
|
276 |
-
@app.get("/tension-levels", response_model=List[str])
|
277 |
-
async def get_tension_levels():
|
278 |
-
"""Get the available tension levels."""
|
279 |
-
return [level.value for level in TensionLevel]
|
280 |
-
|
281 |
-
|
282 |
-
@app.get("/rss-feeds", response_model=Dict[str, str])
|
283 |
-
async def get_registered_rss_feeds(
|
284 |
-
twitter: RssTwitterService = Depends(get_twitter_service)
|
285 |
-
):
|
286 |
-
"""Get all registered RSS feeds."""
|
287 |
-
return twitter.rss_feed_urls
|
288 |
-
|
289 |
-
|
290 |
-
if __name__ == "__main__":
|
291 |
-
import uvicorn
|
292 |
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
|
|
|
1 |
+
import asyncio
|
2 |
+
import os
|
3 |
+
from datetime import datetime
|
4 |
+
from typing import Dict, List, Optional
|
5 |
+
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, status
|
8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
9 |
+
from loguru import logger
|
10 |
+
|
11 |
+
# --- UPDATED IMPORTS ---
|
12 |
+
from analysis_service import AnalysisService
|
13 |
+
# No longer need RssTwitterService
|
14 |
+
from models import (ConflictAnalysis, HealthCheck, SubredditSource, TensionLevel,
|
15 |
+
UpdateRequest)
|
16 |
+
|
17 |
+
# Load environment variables from .env file
|
18 |
+
load_dotenv()
|
19 |
+
|
20 |
+
# Configure logging
|
21 |
+
os.makedirs("logs", exist_ok=True)
|
22 |
+
logger.add("logs/app.log", rotation="500 MB", level=os.getenv("LOG_LEVEL", "INFO"))
|
23 |
+
|
24 |
+
# Global readiness flag
|
25 |
+
app_ready = False
|
26 |
+
|
27 |
+
# Create FastAPI application
|
28 |
+
app = FastAPI(
|
29 |
+
title="WesternFront API",
|
30 |
+
description="AI-powered conflict tracker for monitoring India-Pakistan tensions using Reddit data",
|
31 |
+
version="2.0.0" # Version bump for new data source
|
32 |
+
)
|
33 |
+
|
34 |
+
# Add CORS middleware
|
35 |
+
app.add_middleware(
|
36 |
+
CORSMiddleware,
|
37 |
+
allow_origins=["*"], # Adjust this for production
|
38 |
+
allow_credentials=True,
|
39 |
+
allow_methods=["*"],
|
40 |
+
allow_headers=["*"],
|
41 |
+
)
|
42 |
+
|
43 |
+
# --- UPDATED: Services ---
|
44 |
+
# The AnalysisService now manages the RedditService internally
|
45 |
+
analysis_service = AnalysisService()
|
46 |
+
|
47 |
+
# In-memory store for latest analysis
|
48 |
+
latest_analysis: Optional[ConflictAnalysis] = None
|
49 |
+
last_update_time: Optional[datetime] = None
|
50 |
+
|
51 |
+
|
52 |
+
async def get_analysis_service() -> AnalysisService:
|
53 |
+
"""Dependency to get the Analysis service."""
|
54 |
+
return analysis_service
|
55 |
+
|
56 |
+
|
57 |
+
@app.on_event("startup")
|
58 |
+
async def startup_event():
|
59 |
+
"""Initialize services on startup."""
|
60 |
+
global app_ready
|
61 |
+
|
62 |
+
logger.info("Starting up WesternFront API v2.0")
|
63 |
+
|
64 |
+
try:
|
65 |
+
# Initialize Gemini AI and the internal Reddit service
|
66 |
+
analysis_service.initialize_gemini()
|
67 |
+
analysis_service.reddit_service.initialize()
|
68 |
+
|
69 |
+
# Schedule first update in background
|
70 |
+
asyncio.create_task(update_analysis_task("startup"))
|
71 |
+
|
72 |
+
# Set up periodic update task
|
73 |
+
asyncio.create_task(periodic_update())
|
74 |
+
|
75 |
+
# Mark application as ready to accept requests
|
76 |
+
app_ready = True
|
77 |
+
logger.info("Application ready to accept requests")
|
78 |
+
|
79 |
+
except Exception as e:
|
80 |
+
logger.error(f"Error during startup: {e}")
|
81 |
+
app_ready = False
|
82 |
+
|
83 |
+
|
84 |
+
@app.on_event("shutdown")
|
85 |
+
async def shutdown_event():
|
86 |
+
"""Clean up resources on shutdown."""
|
87 |
+
logger.info("Shutting down WesternFront API")
|
88 |
+
if analysis_service and hasattr(analysis_service, 'close'):
|
89 |
+
await analysis_service.close()
|
90 |
+
|
91 |
+
|
92 |
+
async def update_analysis_task(trigger: str = "scheduled") -> None:
|
93 |
+
"""Task to update the conflict analysis using the AnalysisService."""
|
94 |
+
global latest_analysis, last_update_time
|
95 |
+
|
96 |
+
try:
|
97 |
+
logger.info(f"Starting analysis update (trigger: {trigger})")
|
98 |
+
|
99 |
+
# --- REFACTORED: The analysis_service now handles everything ---
|
100 |
+
analysis = await analysis_service.generate_conflict_analysis(trigger=trigger)
|
101 |
+
|
102 |
+
if analysis:
|
103 |
+
latest_analysis = analysis
|
104 |
+
last_update_time = datetime.now()
|
105 |
+
logger.info(f"Analysis updated successfully. Tension level: {analysis.tension_level}")
|
106 |
+
else:
|
107 |
+
logger.warning("Failed to generate new analysis. No relevant data might be available.")
|
108 |
+
|
109 |
+
except Exception as e:
|
110 |
+
logger.error(f"Error in update_analysis_task: {str(e)}")
|
111 |
+
|
112 |
+
|
113 |
+
async def periodic_update() -> None:
|
114 |
+
"""Periodically update the analysis."""
|
115 |
+
update_interval = int(os.getenv("UPDATE_INTERVAL_MINUTES", 60))
|
116 |
+
|
117 |
+
while True:
|
118 |
+
try:
|
119 |
+
await asyncio.sleep(update_interval * 60)
|
120 |
+
await update_analysis_task("scheduled")
|
121 |
+
except Exception as e:
|
122 |
+
logger.error(f"Error in periodic_update: {str(e)}")
|
123 |
+
await asyncio.sleep(300) # Wait 5 minutes if there was an error
|
124 |
+
|
125 |
+
|
126 |
+
@app.get("/", response_model=Dict)
|
127 |
+
async def root():
|
128 |
+
"""Root endpoint with basic information about the API."""
|
129 |
+
return {
|
130 |
+
"name": "WesternFront API",
|
131 |
+
"description": "AI-powered conflict tracker for India-Pakistan tensions using Reddit data",
|
132 |
+
"version": "2.0.0",
|
133 |
+
"status": "ready" if app_ready else "initializing"
|
134 |
+
}
|
135 |
+
|
136 |
+
|
137 |
+
@app.get("/ready")
|
138 |
+
async def readiness_check():
|
139 |
+
"""Readiness check endpoint."""
|
140 |
+
if not app_ready:
|
141 |
+
raise HTTPException(status_code=503, detail="Application is starting up")
|
142 |
+
return {"status": "ready", "timestamp": datetime.now().isoformat()}
|
143 |
+
|
144 |
+
|
145 |
+
@app.get("/health", response_model=HealthCheck)
|
146 |
+
async def health_check():
|
147 |
+
"""Health check endpoint."""
|
148 |
+
# --- UPDATED: Check Reddit service instead of Twitter ---
|
149 |
+
reddit_initialized = analysis_service.reddit_service.reddit is not None
|
150 |
+
gemini_initialized = analysis_service.model is not None
|
151 |
+
|
152 |
+
return HealthCheck(
|
153 |
+
status="healthy" if app_ready else "initializing",
|
154 |
+
version="2.0.0",
|
155 |
+
timestamp=datetime.now(),
|
156 |
+
last_update=last_update_time,
|
157 |
+
components_status={
|
158 |
+
"reddit_service": reddit_initialized,
|
159 |
+
"analysis_service": gemini_initialized
|
160 |
+
}
|
161 |
+
)
|
162 |
+
|
163 |
+
# The HEAD /health endpoint is a bit redundant with FastAPI, so it can be removed for simplicity
|
164 |
+
# unless you have a specific use case for it.
|
165 |
+
|
166 |
+
@app.get("/analysis", response_model=Optional[ConflictAnalysis])
|
167 |
+
async def get_latest_analysis():
|
168 |
+
"""Get the latest conflict analysis."""
|
169 |
+
if not latest_analysis:
|
170 |
+
raise HTTPException(
|
171 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
172 |
+
detail="No analysis available yet. Try triggering an update."
|
173 |
+
)
|
174 |
+
return latest_analysis
|
175 |
+
|
176 |
+
|
177 |
+
@app.post("/analysis/update", response_model=Dict)
|
178 |
+
async def trigger_update(request: UpdateRequest):
|
179 |
+
"""Trigger an analysis update."""
|
180 |
+
if request.force:
|
181 |
+
# --- UPDATED: Clear Reddit service cache ---
|
182 |
+
analysis_service.reddit_service.in_memory_cache.clear()
|
183 |
+
logger.info("Cache cleared for forced refresh.")
|
184 |
+
|
185 |
+
# Start update in background
|
186 |
+
asyncio.create_task(update_analysis_task("manual"))
|
187 |
+
|
188 |
+
return {
|
189 |
+
"message": "Analysis update triggered",
|
190 |
+
"timestamp": datetime.now().isoformat(),
|
191 |
+
"force_refresh": request.force
|
192 |
+
}
|
193 |
+
|
194 |
+
|
195 |
+
# --- UPDATED: Now manages subreddit sources ---
|
196 |
+
@app.get("/sources", response_model=List[SubredditSource])
|
197 |
+
async def get_subreddit_sources(
|
198 |
+
analysis: AnalysisService = Depends(get_analysis_service)
|
199 |
+
):
|
200 |
+
"""Get the current list of subreddit sources."""
|
201 |
+
return analysis.get_sources()
|
202 |
+
|
203 |
+
|
204 |
+
# --- UPDATED: Now manages subreddit sources ---
|
205 |
+
@app.post("/sources", response_model=Dict)
|
206 |
+
async def update_subreddit_sources(
|
207 |
+
sources: List[SubredditSource],
|
208 |
+
analysis: AnalysisService = Depends(get_analysis_service)
|
209 |
+
):
|
210 |
+
"""Update the list of subreddit sources."""
|
211 |
+
analysis.update_sources(sources)
|
212 |
+
return {
|
213 |
+
"message": "Subreddit sources updated",
|
214 |
+
"count": len(sources)
|
215 |
+
}
|
216 |
+
|
217 |
+
|
218 |
+
@app.get("/keywords", response_model=List[str])
|
219 |
+
async def get_search_keywords(
|
220 |
+
analysis: AnalysisService = Depends(get_analysis_service)
|
221 |
+
):
|
222 |
+
"""Get the current search keywords."""
|
223 |
+
return analysis.get_search_keywords()
|
224 |
+
|
225 |
+
|
226 |
+
@app.post("/keywords", response_model=Dict)
|
227 |
+
async def update_search_keywords(
|
228 |
+
keywords: List[str],
|
229 |
+
analysis: AnalysisService = Depends(get_analysis_service)
|
230 |
+
):
|
231 |
+
"""Update the search keywords."""
|
232 |
+
analysis.update_search_keywords(keywords)
|
233 |
+
return {
|
234 |
+
"message": "Search keywords updated",
|
235 |
+
"count": len(keywords)
|
236 |
+
}
|
237 |
+
|
238 |
+
|
239 |
+
@app.get("/tension-levels", response_model=List[str])
|
240 |
+
async def get_tension_levels():
|
241 |
+
"""Get the available tension levels."""
|
242 |
+
return [level.value for level in TensionLevel]
|
243 |
+
|
244 |
+
# --- REMOVED: /rss-feeds endpoint is no longer applicable ---
|
245 |
+
|
246 |
+
if __name__ == "__main__":
|
247 |
+
import uvicorn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
|
models.py
CHANGED
@@ -13,23 +13,26 @@ class TensionLevel(str, Enum):
|
|
13 |
CRITICAL = "Critical"
|
14 |
|
15 |
|
16 |
-
|
17 |
-
|
|
|
18 |
name: str
|
19 |
-
|
20 |
-
country: str
|
21 |
-
reliability_score: float = Field(ge=0.0, le=1.0)
|
22 |
is_active: bool = True
|
23 |
|
24 |
|
25 |
-
|
26 |
-
|
|
|
27 |
id: str
|
28 |
-
|
|
|
|
|
29 |
author: str
|
30 |
created_at: datetime
|
31 |
-
|
32 |
url: str
|
|
|
33 |
|
34 |
|
35 |
class KeyDevelopment(BaseModel):
|
@@ -40,17 +43,25 @@ class KeyDevelopment(BaseModel):
|
|
40 |
timestamp: Optional[datetime] = None
|
41 |
|
42 |
|
|
|
43 |
class ConflictAnalysis(BaseModel):
|
44 |
-
"""Model for a conflict analysis."""
|
45 |
analysis_id: str
|
46 |
generated_at: datetime
|
47 |
-
latest_status: str
|
48 |
situation_summary: str
|
49 |
key_developments: List[KeyDevelopment]
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
52 |
tension_level: TensionLevel
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
update_triggered_by: str
|
55 |
|
56 |
|
|
|
13 |
CRITICAL = "Critical"
|
14 |
|
15 |
|
16 |
+
# --- NEW: Replaces NewsSource ---
|
17 |
+
class SubredditSource(BaseModel):
|
18 |
+
"""Model for a subreddit as a news source."""
|
19 |
name: str
|
20 |
+
reliability_score: float = Field(default=0.7, ge=0.0, le=1.0)
|
|
|
|
|
21 |
is_active: bool = True
|
22 |
|
23 |
|
24 |
+
# --- NEW: Replaces Tweet ---
|
25 |
+
class RedditPost(BaseModel):
|
26 |
+
"""Model for a Reddit post."""
|
27 |
id: str
|
28 |
+
title: str
|
29 |
+
text: str # Combination of title and selftext
|
30 |
+
selftext: str
|
31 |
author: str
|
32 |
created_at: datetime
|
33 |
+
score: int
|
34 |
url: str
|
35 |
+
subreddit: str
|
36 |
|
37 |
|
38 |
class KeyDevelopment(BaseModel):
|
|
|
43 |
timestamp: Optional[datetime] = None
|
44 |
|
45 |
|
46 |
+
# --- UPDATED: ConflictAnalysis now uses Reddit posts ---
|
47 |
class ConflictAnalysis(BaseModel):
|
48 |
+
"""Model for a conflict analysis based on Reddit data."""
|
49 |
analysis_id: str
|
50 |
generated_at: datetime
|
51 |
+
latest_status: str
|
52 |
situation_summary: str
|
53 |
key_developments: List[KeyDevelopment]
|
54 |
+
|
55 |
+
# Updated to Dict for a more structured assessment from the AI
|
56 |
+
reliability_assessment: Dict[str, str]
|
57 |
+
regional_implications: Dict[str, str]
|
58 |
+
|
59 |
tension_level: TensionLevel
|
60 |
+
tension_rationale: str # Added field for justification
|
61 |
+
|
62 |
+
# Changed from source_tweets to source_posts
|
63 |
+
source_posts: List[RedditPost]
|
64 |
+
|
65 |
update_triggered_by: str
|
66 |
|
67 |
|
reddit_service.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
from datetime import datetime, timedelta, timezone
|
5 |
+
from typing import Dict, List, Optional
|
6 |
+
|
7 |
+
import asyncpraw # Import asyncpraw instead of praw
|
8 |
+
from cachetools import TTLCache
|
9 |
+
from loguru import logger
|
10 |
+
|
11 |
+
from models import RedditPost, SubredditSource
|
12 |
+
|
13 |
+
class RedditService:
|
14 |
+
"""Service for collecting posts via the Reddit API using Async PRAW."""
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
self.reddit = None
|
18 |
+
self.cache_expiry = int(os.getenv("CACHE_EXPIRY_MINUTES", 60))
|
19 |
+
self.in_memory_cache = TTLCache(maxsize=100, ttl=self.cache_expiry * 60)
|
20 |
+
|
21 |
+
self.sources = [
|
22 |
+
# Primary Subreddits
|
23 |
+
SubredditSource(name="geopolitics", reliability_score=0.85), # High-quality analysis, neutral discussion
|
24 |
+
SubredditSource(name="anime_titties", reliability_score=0.8), # International news, quality control
|
25 |
+
SubredditSource(name="CredibleDefense", reliability_score=0.9), # Military/security analysis
|
26 |
+
SubredditSource(name="worldnews", reliability_score=0.8), # Broad international coverage
|
27 |
+
|
28 |
+
# Regional Focus
|
29 |
+
SubredditSource(name="GeopoliticsIndia", reliability_score=0.75), # India-focused geopolitical discussions
|
30 |
+
SubredditSource(name="SouthAsia", reliability_score=0.7), # Regional coverage
|
31 |
+
SubredditSource(name="neutralnews", reliability_score=0.8), # Fact-based reporting standards
|
32 |
+
|
33 |
+
# Existing sources
|
34 |
+
SubredditSource(name="india", reliability_score=0.7),
|
35 |
+
SubredditSource(name="pakistan", reliability_score=0.7),
|
36 |
+
]
|
37 |
+
|
38 |
+
self.stats = {"requests": 0, "cache_hits": 0, "errors": 0}
|
39 |
+
|
40 |
+
def initialize(self) -> bool:
|
41 |
+
"""Initialize the Reddit API client."""
|
42 |
+
try:
|
43 |
+
logger.info("Initializing Async Reddit service")
|
44 |
+
client_id = os.getenv("REDDIT_CLIENT_ID")
|
45 |
+
client_secret = os.getenv("REDDIT_CLIENT_SECRET")
|
46 |
+
user_agent = os.getenv("REDDIT_USER_AGENT")
|
47 |
+
|
48 |
+
if not all([client_id, client_secret, user_agent]):
|
49 |
+
logger.error("Reddit API credentials not found.")
|
50 |
+
return False
|
51 |
+
|
52 |
+
# --- Use asyncpraw.Reddit ---
|
53 |
+
self.reddit = asyncpraw.Reddit(
|
54 |
+
client_id=client_id,
|
55 |
+
client_secret=client_secret,
|
56 |
+
user_agent=user_agent,
|
57 |
+
)
|
58 |
+
logger.info("Async Reddit service initialized successfully.")
|
59 |
+
return True
|
60 |
+
except Exception as e:
|
61 |
+
logger.error(f"Failed to initialize Async Reddit service: {e}")
|
62 |
+
return False
|
63 |
+
|
64 |
+
async def get_posts_from_subreddit(self, source: SubredditSource, limit: int = 100) -> List[RedditPost]:
|
65 |
+
"""Get recent posts from a specific subreddit using async calls."""
|
66 |
+
cache_key = f"reddit_{source.name}_{limit}"
|
67 |
+
if cache_key in self.in_memory_cache:
|
68 |
+
self.stats["cache_hits"] += 1
|
69 |
+
logger.debug(f"Returning cached posts for r/{source.name}")
|
70 |
+
return self.in_memory_cache[cache_key]
|
71 |
+
|
72 |
+
self.stats["requests"] += 1
|
73 |
+
logger.info(f"Fetching posts from r/{source.name}")
|
74 |
+
try:
|
75 |
+
# --- Use async methods directly ---
|
76 |
+
subreddit = await self.reddit.subreddit(source.name)
|
77 |
+
posts = []
|
78 |
+
|
79 |
+
# Use `async for` to iterate through the async generator
|
80 |
+
async for sub in subreddit.new(limit=limit):
|
81 |
+
post = RedditPost(
|
82 |
+
id=sub.id,
|
83 |
+
title=sub.title,
|
84 |
+
text=f"{sub.title}\n{sub.selftext}",
|
85 |
+
selftext=sub.selftext,
|
86 |
+
author=str(sub.author),
|
87 |
+
created_at=datetime.fromtimestamp(sub.created_utc, tz=timezone.utc),
|
88 |
+
score=sub.score,
|
89 |
+
url=sub.url,
|
90 |
+
subreddit=source.name,
|
91 |
+
)
|
92 |
+
posts.append(post)
|
93 |
+
|
94 |
+
self.in_memory_cache[cache_key] = posts
|
95 |
+
logger.info(f"Fetched and cached {len(posts)} posts from r/{source.name}")
|
96 |
+
return posts
|
97 |
+
except Exception as e:
|
98 |
+
self.stats["errors"] += 1
|
99 |
+
logger.error(f"Could not fetch from r/{source.name}. Error: {e}")
|
100 |
+
return []
|
101 |
+
|
102 |
+
async def get_related_posts(self, keywords: List[str], days_back: int = 2) -> List[RedditPost]:
|
103 |
+
# This method's logic doesn't change, as it was already async.
|
104 |
+
all_posts = []
|
105 |
+
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
|
106 |
+
active_sources = [s for s in self.sources if s.is_active]
|
107 |
+
|
108 |
+
tasks = [self.get_posts_from_subreddit(source) for source in active_sources]
|
109 |
+
source_posts_list = await asyncio.gather(*tasks)
|
110 |
+
|
111 |
+
for source_posts in source_posts_list:
|
112 |
+
for post in source_posts:
|
113 |
+
if post.created_at >= cutoff_date:
|
114 |
+
if any(keyword.lower() in post.text.lower() for keyword in keywords):
|
115 |
+
all_posts.append(post)
|
116 |
+
|
117 |
+
unique_posts = {post.id: post for post in all_posts}
|
118 |
+
sorted_posts = sorted(list(unique_posts.values()), key=lambda p: p.created_at, reverse=True)
|
119 |
+
|
120 |
+
logger.info(f"Found {len(sorted_posts)} relevant posts from Reddit.")
|
121 |
+
return sorted_posts
|
122 |
+
|
123 |
+
def update_sources(self, sources: List[SubredditSource]):
|
124 |
+
self.sources = sources
|
125 |
+
self.in_memory_cache.clear()
|
126 |
+
logger.info(f"Updated subreddit sources. New count: {len(sources)}")
|
127 |
+
|
128 |
+
def get_sources(self) -> List[SubredditSource]:
|
129 |
+
return self.sources
|
130 |
+
|
131 |
+
async def close(self):
|
132 |
+
"""Close the Async PRAW client session."""
|
133 |
+
if self.reddit:
|
134 |
+
await self.reddit.close()
|
135 |
+
logger.info("Async Reddit service session closed.")
|
requirements.txt
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
-
fastapi==0.103.1
|
2 |
-
uvicorn[standard]==0.23.2
|
3 |
-
python-dotenv==1.0.0
|
4 |
-
loguru==0.7.0
|
5 |
-
google-generativeai==0.3.0
|
6 |
-
tenacity==8.2.2
|
7 |
-
cachetools==5.3.0
|
8 |
-
pydantic==2.3.0
|
9 |
-
|
10 |
-
beautifulsoup4==4.12.2
|
11 |
-
httpx[http2]>=0.24.0
|
|
|
1 |
+
fastapi==0.103.1
|
2 |
+
uvicorn[standard]==0.23.2
|
3 |
+
python-dotenv==1.0.0
|
4 |
+
loguru==0.7.0
|
5 |
+
google-generativeai==0.3.0
|
6 |
+
tenacity==8.2.2
|
7 |
+
cachetools==5.3.0
|
8 |
+
pydantic==2.3.0
|
9 |
+
asyncpraw==7.7.2
|
|
|
|