diff --git "a/main.py" "b/main.py" --- "a/main.py" +++ "b/main.py" @@ -1,1371 +1,1179 @@ -import socket -socket.setdefaulttimeout(4000) - -from fastapi import FastAPI, HTTPException, Request -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel -from typing import Optional, Any, Dict, List -import aiohttp -import os -from datetime import datetime, timezone -import json -import re -from google.oauth2.service_account import Credentials as ServiceAccountCredentials -from googleapiclient.discovery import build -from googleapiclient.errors import HttpError -from dotenv import load_dotenv -import asyncio -import logging - -# --- Logging Setup --- -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -app = FastAPI() - -# --- Configuration --- -load_dotenv() - -# CORS -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # Consider restricting in production - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -# Google Sheets Config -# Spreadsheet containing Scammer and DWC info -SCAMMER_DWC_SPREADSHEET_ID = os.getenv('SCAMMER_DWC_SPREADSHEET_ID', '1sgkhBNGw_r6tBIxvdeXaI0bVmWBeACN4jiw_oDEeXLw') -# Spreadsheet containing Value lists and Dupe list -VALUES_DUPE_SPREADSHEET_ID = os.getenv('VALUES_DUPE_SPREADSHEET_ID', '1Toe07o3P517q8sm9Qb1e5xyFWCuwgskj71IKJwJNfNU') - -SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly'] - -# Sheet Names and Ranges within SCAMMER_DWC_SPREADSHEET_ID -USER_SCAMMER_SHEET = "User Scammer Files" -USER_SCAMMER_RANGE = "B6:G" -SERVER_SCAMMER_SHEET = "Server Scammer Files" -SERVER_SCAMMER_RANGE = "B6:F" -DWC_SHEET = "DWC Servers / Users" -DWC_RANGE = "B6:G" -TRUSTED_SHEET = "Trusted Users / Servers" -TRUSTED_RANGE = "B6:E" - -# Sheet Names and Ranges within VALUES_DUPE_SPREADSHEET_ID -DUPE_LIST_SHEET = "Dupe List" -DUPE_LIST_RANGE = "B2:B" -# Value Categories (Sheet Names) -CATEGORIES = [ - "Vehicles", "Textures", "Colours", "Spoilers", - "Rims", "Furnitures", "Gun Skins", "Hyperchromes" -] -VALUES_RANGE = 'B6:R' # Range within each category sheet including column R for lastUpdated - -# Cache Update Interval -CACHE_UPDATE_INTERVAL_SECONDS = 60 - -# Sheet Update Delay -SHEET_UPDATE_DELAY_SECONDS = 10 # 10 second delay between sheet updates - -# Webhook URLs -SCAMMER_WEBHOOK_URL = os.getenv("SCAMMER_WEBHOOK_URL") -VALUE_WEBHOOK_URL = os.getenv("VALUE_WEBHOOK_URL") -DUPE_CHECK_WEBHOOK_URL = os.getenv("DUPE_CHECK_WEBHOOK_URL") -VISITOR_WEBHOOK_URL = os.getenv("VISITOR_WEBHOOK_URL") # New webhook URL for visitor tracking - -# Visitor tracking configuration -VISITOR_BATCH_INTERVAL_SECONDS = 60 # Send visitor webhooks every 1 minute -MAX_VISITORS_PER_WEBHOOK = 3 # Maximum number of visitors to include in a single webhook - -# --- Global Cache --- -cache = { - "values": {}, # Dict mapping category name to list of items - "value_changes": {}, # Dict mapping category name to list of recent changes (for API endpoint) - "user_scammers": [], - "server_scammers": [], - "dwc": [], - "trusted": [], # New cache key for trusted entries - "dupes": [], # List of duped usernames - "last_updated": None, # Timestamp of the last successful/partial update - "is_ready": False, # Is the cache populated at least once? - "service_available": True, # Is the Google Sheets service reachable? - "visitors": [], # Recent visitors for batched webhook notifications - "last_visitor_webhook": None # Timestamp of last visitor webhook -} -# --- Google Sheets Initialization --- -sheets_service = None # Initialize as None - -def quote_sheet_name(name: str) -> str: - """Adds single quotes around a sheet name if it needs them.""" - if not name: - return "''" - # Simple check: if it contains spaces or non-alphanumeric chars (excluding _) - if not re.match(r"^[a-zA-Z0-9_]+$", name): - # Escape existing single quotes within the name - escaped_name = name.replace("'", "''") - return f"'{escaped_name}'" - return name - -def init_google_sheets(scopes=SCOPES): - """Initialize Google Sheets credentials from environment variable or file""" - global sheets_service, cache - try: - creds_json_str = os.getenv('CREDENTIALS_JSON') - if creds_json_str: - logger.info("Attempting to load credentials from CREDENTIALS_JSON environment variable.") - creds_json = json.loads(creds_json_str) - creds = ServiceAccountCredentials.from_service_account_info( - creds_json, - scopes=scopes - ) - sheets_service = build('sheets', 'v4', credentials=creds, cache_discovery=False) # Disable discovery cache - logger.info("Google Sheets service initialized successfully from ENV VAR.") - cache["service_available"] = True - return sheets_service - else: - logger.info("CREDENTIALS_JSON environment variable not found. Falling back to file.") - raise ValueError("CREDENTIALS_JSON not set") # Trigger fallback explicitly - - except Exception as e: - logger.warning(f"Error initializing Google Sheets from ENV VAR: {e}. Trying file...") - try: - # Fallback to loading credentials from file 'credentials.json' - creds_file = 'credentials.json' - if os.path.exists(creds_file): - logger.info(f"Attempting to load credentials from file '{creds_file}'") - creds = ServiceAccountCredentials.from_service_account_file( - creds_file, - scopes=scopes - ) - sheets_service = build('sheets', 'v4', credentials=creds, cache_discovery=False) - logger.info("Google Sheets service initialized successfully from file.") - cache["service_available"] = True - return sheets_service - else: - logger.error(f"Credentials file '{creds_file}' not found.") - raise FileNotFoundError(f"'{creds_file}' not found") - - except Exception as file_e: - logger.error(f"Error loading credentials from file: {file_e}") - logger.critical("Google Sheets service could not be initialized. API will be limited.") - cache["service_available"] = False - sheets_service = None - return None - -# Initialize on module load -init_google_sheets() - - -# --- Helper Functions (Data Extraction & Formatting) --- - -def extract_drive_id(url): - if not url or not isinstance(url, str): return None - match = re.search(r'https://drive\.google\.com/file/d/([^/]+)', url) - return match.group(1) if match else None - -def convert_to_thumbnail_url(drive_url): - drive_id = extract_drive_id(drive_url) - return f"https://drive.google.com/thumbnail?id={drive_id}&sz=w1000" if drive_id else drive_url - -def extract_image_url(formula, drive_url=None): - # Priority to explicit drive_url if provided - if drive_url and isinstance(drive_url, str) and 'drive.google.com' in drive_url: - return convert_to_thumbnail_url(drive_url) - if not formula or not isinstance(formula, str): return '' - # Handle direct URLs - if formula.startswith('http://') or formula.startswith('https://'): - return formula - # Handle =IMAGE("...") formula - if formula.startswith('=IMAGE('): - match = re.search(r'=IMAGE\("([^"]+)"', formula) - if match: return match.group(1) - return '' # Return empty string if no valid URL found - -def format_currency(value: Any) -> Optional[str]: - if value is None or str(value).strip() == '': return 'N/A' - try: - num_str = str(value).replace('$', '').replace(',', '').strip() - if not num_str or num_str.lower() == 'n/a': return 'N/A' - num = float(num_str) - return f"${num:,.0f}" - except (ValueError, TypeError): - # Allow text like "Event", "Unobtainable" etc. to pass through - if isinstance(value, str) and value.strip() and not re.match(r'^-?[\d,.$]+\$?$', value.strip()): - return value.strip() # Return original text if non-numeric-like - return 'N/A' # Return N/A for things that look like bad numbers - -def parse_cached_currency(value_str: Optional[str]) -> Optional[float]: - if value_str is None or str(value_str).strip().lower() == 'n/a': - return None - try: - num_str = str(value_str).replace('$', '').replace(',', '').strip() - return float(num_str) - except (ValueError, TypeError): - return None # Return None if it's not a parsable number (e.g., "Event") - -def clean_string(value, default='N/A'): - if value is None: return default - cleaned = str(value).strip() - return cleaned if cleaned else default - -def clean_string_optional(value): - if value is None: return None - cleaned = str(value).strip() - return cleaned if cleaned and cleaned != '-' else None - -def parse_alt_accounts(value): - if value is None: return [] - raw_string = str(value).strip() - if not raw_string or raw_string == '-': return [] - return [acc.strip() for acc in raw_string.split(',') if acc.strip()] - - -# --- Roblox API Helpers --- -async def get_roblox_user_id(session: aiohttp.ClientSession, username: str): - if not username: return None - url = "https://users.roblox.com/v1/usernames/users" - payload = {"usernames": [username], "excludeBannedUsers": False} - try: - # Increased timeout specifically for Roblox API calls which can be slow - async with session.post(url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as response: - if response.status == 200: - data = await response.json() - if data and data.get("data") and len(data["data"]) > 0: - return data["data"][0].get("id") - else: - logger.warning(f"Roblox User API returned status {response.status} for username '{username}'") - return None - except asyncio.TimeoutError: - logger.warning(f"Timeout fetching Roblox User ID for {username}") - return None - except aiohttp.ClientError as e: - logger.warning(f"Network error fetching Roblox User ID for {username}: {e}") - return None - except Exception as e: - logger.error(f"Unexpected exception fetching Roblox User ID for {username}: {e}", exc_info=True) - return None - -async def get_roblox_avatar_url(session: aiohttp.ClientSession, user_id: int): - if not user_id: return None - url = f"https://thumbnails.roblox.com/v1/users/avatar-headshot?userIds={user_id}&size=150x150&format=Png&isCircular=false" - try: - # Increased timeout specifically for Roblox API calls - async with session.get(url, timeout=aiohttp.ClientTimeout(total=10)) as response: - if response.status == 200: - data = await response.json() - if data and data.get("data") and len(data["data"]) > 0: - return data["data"][0].get("imageUrl") - else: - logger.warning(f"Roblox Thumbnail API returned status {response.status} for user ID {user_id}") - return None - except asyncio.TimeoutError: - logger.warning(f"Timeout fetching Roblox avatar for User ID {user_id}") - return None - except aiohttp.ClientError as e: - logger.warning(f"Network error fetching Roblox avatar for User ID {user_id}: {e}") - return None - except Exception as e: - logger.error(f"Unexpected exception fetching Roblox avatar for User ID {user_id}: {e}", exc_info=True) - return None - - -# --- Data Processing Functions --- - -def process_sheet_data(values): # For Value Categories - if not values: return [] - processed_data = [] - - for row_idx, row in enumerate(values): # Start counting from sheet row 6 (index 0 here) - if not row or not any(str(cell).strip() for cell in row if cell is not None): continue - # Skip header-like rows (e.g., "LEVEL 1 | HYPERCHROMES" in column F/index 4) - # Use index 4 for Value column (F) - if len(row) > 4 and isinstance(row[4], str) and re.search(r'(LEVEL \d+ \|)|(VALUE)', row[4], re.IGNORECASE): - #logger.debug(f"Skipping potential header row {row_idx+6}: {row}") - continue - - # Indices based on B6:P (0-indexed from B) - icon_formula = row[0] if len(row) > 0 else '' - name = row[2] if len(row) > 2 else 'N/A' # Column D - value_raw = row[4] if len(row) > 4 else 'N/A' # Column F - duped_value_raw = row[6] if len(row) > 6 else 'N/A' # Column H - market_value_raw = row[8] if len(row) > 8 else 'N/A' # Column J - demand = row[10] if len(row) > 10 else 'N/A' # Column L - notes = row[12] if len(row) > 12 else '' # Column N - drive_url = row[14] if len(row) > 14 else None # Column P - # Extract lastUpdated from column R (index 16) - last_updated = row[16] if len(row) > 16 else None # Column R - - cleaned_name = clean_string(name) - # Also skip if name is clearly a header like "Name" - if cleaned_name == 'N/A' or cleaned_name.lower() == 'name': - #logger.debug(f"Skipping row {row_idx+6} due to missing/header name: {row}") - continue - - processed_item = { - 'icon': extract_image_url(icon_formula, drive_url), - 'name': cleaned_name, - 'value': format_currency(value_raw), - 'dupedValue': format_currency(duped_value_raw), - 'marketValue': format_currency(market_value_raw), - 'demand': clean_string(demand, 'N/A'), - 'notes': clean_string(notes, '') - # Remove lastUpdated field - will only be set when an update is detected - } - - # Add lastUpdated if it exists in the sheet - if last_updated and str(last_updated).strip(): - # Only add if it's in the format yyyy-MM-dd HH:mm:ss - last_updated_str = str(last_updated).strip() - # Check if it matches the expected date format - if re.match(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$', last_updated_str): - processed_item['lastUpdated'] = last_updated_str - - processed_data.append(processed_item) - return processed_data - -def process_user_scammer_data(values): # For User Scammer Sheet - if not values: return [] - processed_data = [] - for row in values: # Expected range like B6:G - if not row or len(row) < 2: continue - # Indices based on B6:G (0-indexed from B) - discord_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B - roblox_username = clean_string_optional(row[1]) if len(row) > 1 else None # Col C - # Skip if both identifiers are missing - if not discord_id and not roblox_username: continue - # Skip if it looks like a header row - if str(discord_id).lower() == 'discord id' or str(roblox_username).lower() == 'roblox username': - continue - processed_item = { - 'discord_id': discord_id, - 'roblox_username': roblox_username, - 'scam_type': clean_string(row[2]) if len(row) > 2 else 'N/A', # Col D - 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E - 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None, # Col F - 'alt_accounts': parse_alt_accounts(row[5]) if len(row) > 5 else [], # Col G - 'roblox_avatar_url': None # Will be filled later - } - processed_data.append(processed_item) - return processed_data - -def process_server_scammer_data(values): # For Server Scammer Sheet - if not values: return [] - processed_data = [] - for row in values: # Expected range like B6:F - if not row or len(row) < 2: continue - # Indices based on B6:F (0-indexed from B) - server_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B - server_name = clean_string_optional(row[1]) if len(row) > 1 else None # Col C - # Skip if both identifiers are missing - if not server_id and not server_name: continue - # Skip if it looks like a header row - if str(server_id).lower() == 'server id' or str(server_name).lower() == 'server name': - continue - processed_item = { - 'server_id': server_id, - 'server_name': server_name, - 'scam_type': clean_string(row[2]) if len(row) > 2 else 'N/A', # Col D - 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E - 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None # Col F - } - processed_data.append(processed_item) - return processed_data - -def process_dwc_data(values): # For DWC Sheet - if not values: return [] - processed_data = [] - for row in values: # Expected range like B6:G - if not row or len(row) < 1: continue # Need at least one ID - # Indices based on B6:G (0-indexed from B) - user_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B - server_id = clean_string_optional(row[1]) if len(row) > 1 else None # Col C - roblox_user = clean_string_optional(row[2]) if len(row) > 2 else None # Col D - # Skip if all identifiers are missing - if not user_id and not server_id and not roblox_user: continue - # Skip if it looks like a header row - if str(user_id).lower() == 'user id' or str(server_id).lower() == 'server id' or str(roblox_user).lower() == 'roblox user': - continue - processed_item = { - 'status': 'DWC', - 'discord_user_id': user_id, - 'discord_server_id': server_id, - 'roblox_username': roblox_user, - 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E - 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None, # Col F - 'alt_accounts': parse_alt_accounts(row[5]) if len(row) > 5 else [], # Col G - 'roblox_avatar_url': None # Will be filled later - } - processed_data.append(processed_item) - return processed_data - -def process_trusted_data(values): # New function for Trusted Sheet - if not values: return [] - processed_data = [] - for row in values: # Expected range like B6:E - if not row or len(row) < 1: continue # Need at least one identifier - # Indices based on B6:E (0-indexed from B) - discord_user_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B - # discord_username = clean_string_optional(row[1]) if len(row) > 1 else None # Col C - Not currently used for matching, but keep for potential future use - discord_server_id = clean_string_optional(row[2]) if len(row) > 2 else None # Col D - roblox_username = clean_string_optional(row[3]) if len(row) > 3 else None # Col E - - # Skip if all relevant identifiers are missing - if not discord_user_id and not discord_server_id and not roblox_username: continue - - # Skip if it looks like a header row (check common header names) - if (str(discord_user_id).lower() == 'discord user id' or - str(discord_server_id).lower() == 'discord server id' or - str(roblox_username).lower() == 'roblox username'): - continue - - processed_item = { - 'status': 'Trusted', # Add a status field - 'discord_user_id': discord_user_id, - 'discord_server_id': discord_server_id, - 'roblox_username': roblox_username, - 'roblox_avatar_url': None # Will be filled later if roblox_username exists - # Note: No explanation or evidence expected for trusted entries based on B6:E - } - processed_data.append(processed_item) - return processed_data - -def process_dupe_list_data(values): # For Dupe List Sheet - if not values: return [] - # Expected range like B2:B - processed_dupes = [] - for row in values: - if row and len(row)>0 and row[0] and isinstance(row[0], str): - username = row[0].strip().lower() - # Skip header or empty strings - if username and username not in ('username', 'usernames'): - processed_dupes.append(username) - return processed_dupes - - -# --- Async Fetching Functions --- - -async def fetch_batch_ranges_async(spreadsheet_id: str, ranges: List[str], value_render_option: str = 'FORMATTED_VALUE') -> List[Dict]: - """Async wrapper to fetch multiple ranges using batchGet and return raw valueRanges.""" - global sheets_service, cache - if not sheets_service: - logger.warning(f"Attempted batch fetch from {spreadsheet_id} but Sheets service is unavailable.") - raise Exception("Google Sheets service not initialized") - if not ranges: - logger.warning(f"Batch fetch called with empty ranges for {spreadsheet_id}.") - return [] - - try: - logger.info(f"Fetching batch ranges from {spreadsheet_id}: {ranges}") - loop = asyncio.get_event_loop() - result = await loop.run_in_executor( - None, - lambda: sheets_service.spreadsheets().values().batchGet( - spreadsheetId=spreadsheet_id, - ranges=ranges, - valueRenderOption=value_render_option, - majorDimension='ROWS' - ).execute() - ) - value_ranges = result.get('valueRanges', []) - logger.info(f"Successfully fetched batch data for {len(value_ranges)} ranges from {spreadsheet_id}.") - return value_ranges # Return the raw list of valueRange objects - - except HttpError as e: - status_code = e.resp.status - error_details = {} - try: - error_details = json.loads(e.content).get('error', {}) - except json.JSONDecodeError: - logger.error(f"Failed to parse JSON error content from Google API: {e.content}") - - status = error_details.get('status', f'HTTP_{status_code}') # Use HTTP status if details missing - message = error_details.get('message', e._get_reason()) # Fallback message - logger.error(f"Google API HTTP Error during batch fetch for {spreadsheet_id}: Status={status}, Message={message}") - - if status in ('PERMISSION_DENIED', 'UNAUTHENTICATED') or status_code == 403 or status_code == 401: - logger.critical(f"Authentication/Permission Error accessing {spreadsheet_id}. Disabling service checks.") - cache["service_available"] = False # Mark service as down - sheets_service = None # Reset service to force re-init attempt - elif status == 'NOT_FOUND' or status_code == 404: - logger.error(f"Spreadsheet or Range not found error for {spreadsheet_id}. Ranges: {ranges}. Check IDs and Sheet Names.") - elif status_code >= 500: # Server-side errors on Google's end - logger.warning(f"Google API server error ({status_code}) for {spreadsheet_id}. May be temporary.") - # Keep service_available as True, retry might work - # else: # Other client errors (e.g., 400 Bad Request for invalid range format) - - raise e # Re-raise after logging - except Exception as e: - logger.error(f"Error during batch fetching from {spreadsheet_id} for ranges {ranges}: {e}", exc_info=True) - # Could be network issues, timeouts handled by aiohttp session typically - # Consider marking service unavailable for persistent non-HTTP errors too? - # cache["service_available"] = False # Optional: Be more aggressive - raise e - -# --- Webhook Sending --- -async def send_webhook_notification(session: aiohttp.ClientSession, webhook_url: str, embed: Dict): - """Sends a Discord webhook notification with the provided embed.""" - if not webhook_url: - # logger.debug("Webhook URL not configured. Skipping notification.") - return - if not embed: - logger.warning("Attempted to send webhook with empty embed.") - return - - webhook_data = {"embeds": [embed]} - try: - # Use a reasonable timeout for webhook posts - async with session.post(webhook_url, json=webhook_data, timeout=aiohttp.ClientTimeout(total=10)) as response: - if response.status not in [200, 204]: - # Log more details on failure - response_text = await response.text() - logger.warning(f"Failed to send webhook to {webhook_url[:30]}... (Status: {response.status}): {response_text[:500]}") # Limit response text length - # else: - # logger.debug(f"Webhook notification sent successfully to {webhook_url[:30]}...") - except asyncio.TimeoutError: - logger.warning(f"Timeout sending webhook to {webhook_url[:30]}...") - except aiohttp.ClientError as e: - logger.error(f"Network error sending webhook to {webhook_url[:30]}...: {e}") - except Exception as e: - logger.error(f"Unexpected error sending webhook: {e}", exc_info=True) - -# --- Background Cache Update Task --- - -async def update_cache_periodically(): - """Fetches data, processes, detects changes/new entries (if not first run), sends webhooks, and updates cache.""" - global cache - # Increase overall session timeout slightly for robustness - async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=45)) as session: - while True: - if not cache["service_available"]: - logger.info("Attempting to re-initialize Google Sheets service...") - init_google_sheets() - if not cache["service_available"]: - logger.warning("Google Sheets service still unavailable, skipping cache update cycle.") - await asyncio.sleep(CACHE_UPDATE_INTERVAL_SECONDS * 2) # Wait longer if service is down - continue - else: - logger.info("Google Sheets service re-initialized. Proceeding with cache update.") - - logger.info(f"Starting cache update cycle... (Cache Ready: {cache['is_ready']})") - start_time = datetime.now(timezone.utc) - webhook_tasks = [] # Store webhook sending tasks - - # Prepare temporary storage for fetched data - fetched_values_categories = {} # { "CategoryName": [items...] } - new_cache_data = { - "user_scammers": [], - "server_scammers": [], - "dwc": [], - "trusted": [], # Add trusted key - "dupes": [], - } - current_errors = {} # Track errors for specific fetches/sheets - - try: - # --- Define Ranges and Processors --- - scammer_dwc_ranges = [ - f"{quote_sheet_name(USER_SCAMMER_SHEET)}!{USER_SCAMMER_RANGE}", - f"{quote_sheet_name(SERVER_SCAMMER_SHEET)}!{SERVER_SCAMMER_RANGE}", - f"{quote_sheet_name(DWC_SHEET)}!{DWC_RANGE}", - f"{quote_sheet_name(TRUSTED_SHEET)}!{TRUSTED_RANGE}", # Add trusted range - ] - scammer_dwc_processor_map = { - USER_SCAMMER_SHEET: process_user_scammer_data, - SERVER_SCAMMER_SHEET: process_server_scammer_data, - DWC_SHEET: process_dwc_data, - TRUSTED_SHEET: process_trusted_data, # Add trusted processor - } - scammer_dwc_target_key_map = { - USER_SCAMMER_SHEET: "user_scammers", - SERVER_SCAMMER_SHEET: "server_scammers", - DWC_SHEET: "dwc", - TRUSTED_SHEET: "trusted", # Add trusted target key - } - - values_dupes_ranges = [f"{quote_sheet_name(DUPE_LIST_SHEET)}!{DUPE_LIST_RANGE}"] - values_dupes_ranges.extend([f"{quote_sheet_name(cat)}!{VALUES_RANGE}" for cat in CATEGORIES]) - - # --- Define Fetch Tasks --- - fetch_tasks = { - "scammer_dwc_batch": fetch_batch_ranges_async( - SCAMMER_DWC_SPREADSHEET_ID, - scammer_dwc_ranges, - value_render_option='FORMATTED_VALUE' - ), - } - - # Execute scammer/dwc batch first - results = await asyncio.gather(*fetch_tasks.values(), return_exceptions=True) - task_keys = list(fetch_tasks.keys()) - - # Add delay between sheet updates - await asyncio.sleep(SHEET_UPDATE_DELAY_SECONDS) - - # Now fetch values/dupes batch - fetch_tasks = { - "values_dupes_batch": fetch_batch_ranges_async( - VALUES_DUPE_SPREADSHEET_ID, - values_dupes_ranges, - value_render_option='FORMATTED_VALUE' - ) - } - - # Execute values/dupes batch - values_results = await asyncio.gather(*fetch_tasks.values(), return_exceptions=True) - task_keys.extend(list(fetch_tasks.keys())) - results.extend(values_results) - - # --- Process Results --- - raw_scammer_dwc_results = None - raw_values_dupes_results = None - - for i, result in enumerate(results): - key = task_keys[i] - if isinstance(result, Exception): - logger.error(f"Failed to fetch batch data for {key}: {result}") - current_errors[key] = str(result) - # If fetch failed, likely service unavailable (handled by fetch_batch_ranges_async) - # No need to explicitly set cache["service_available"] = False here again - else: - if key == "scammer_dwc_batch": - raw_scammer_dwc_results = result - elif key == "values_dupes_batch": - raw_values_dupes_results = result - - # --- Process Scammer/DWC Results --- - if raw_scammer_dwc_results is not None: - logger.info(f"Processing {len(raw_scammer_dwc_results)} valueRanges from Scammer/DWC sheet...") - for vr in raw_scammer_dwc_results: - range_str = vr.get('range', '') - match = re.match(r"^'?([^'!]+)'?!", range_str) - if not match: - logger.warning(f"Could not extract sheet name from range '{range_str}' in Scammer/DWC response.") - continue - sheet_name = match.group(1).replace("''", "'") - - if sheet_name in scammer_dwc_processor_map: - processor = scammer_dwc_processor_map[sheet_name] - target_key = scammer_dwc_target_key_map[sheet_name] - values = vr.get('values', []) - try: - processed_data = processor(values) - new_cache_data[target_key] = processed_data # Store fetched data temporarily - logger.info(f"Processed {len(processed_data)} items for {sheet_name} -> {target_key}") - except Exception as e: - logger.error(f"Error processing data for {sheet_name} using {processor.__name__}: {e}", exc_info=True) - current_errors[f"process_{target_key}"] = str(e) - else: - logger.warning(f"No processor found for sheet name '{sheet_name}' derived from range '{range_str}' in Scammer/DWC sheet.") - else: - logger.warning("Skipping Scammer/DWC processing due to fetch error.") - - - # --- Process Values/Dupes Results --- - if raw_values_dupes_results is not None: - logger.info(f"Processing {len(raw_values_dupes_results)} valueRanges from Values/Dupes sheet...") - for vr in raw_values_dupes_results: - range_str = vr.get('range', '') - match = re.match(r"^'?([^'!]+)'?!", range_str) - if not match: - logger.warning(f"Could not extract sheet name from range '{range_str}' in Values/Dupes response.") - continue - sheet_name = match.group(1).replace("''", "'") - - values = vr.get('values', []) - try: - if sheet_name == DUPE_LIST_SHEET: - processed_data = process_dupe_list_data(values) - new_cache_data["dupes"] = processed_data # Store fetched data temporarily - logger.info(f"Processed {len(processed_data)} items for {DUPE_LIST_SHEET} -> dupes") - elif sheet_name in CATEGORIES: - processed_data = process_sheet_data(values) - fetched_values_categories[sheet_name] = processed_data # Store fetched data temporarily - logger.info(f"Processed {len(processed_data)} items for Category: {sheet_name}") - else: - logger.warning(f"Unrecognized sheet name '{sheet_name}' derived from range '{range_str}' in Values/Dupes sheet.") - except Exception as e: - target_key = "dupes" if sheet_name == DUPE_LIST_SHEET else f"values_{sheet_name}" - logger.error(f"Error processing data for {sheet_name}: {e}", exc_info=True) - current_errors[f"process_{target_key}"] = str(e) - else: - logger.warning("Skipping Values/Dupes processing due to fetch error.") - - # --- Fetch Roblox Avatars (for new data before comparison/webhook) --- - if not current_errors.get("scammer_dwc_batch") and \ - not current_errors.get("process_user_scammers") and \ - not current_errors.get("process_dwc") and \ - not current_errors.get("process_trusted"): # Check trusted processing too - logger.info("Fetching Roblox avatars for newly processed data...") - avatar_tasks = [] - entries_needing_avatars = new_cache_data.get("user_scammers", []) + new_cache_data.get("dwc", []) + new_cache_data.get("trusted", []) # Include trusted list - for entry in entries_needing_avatars: - if entry.get('roblox_username'): - avatar_tasks.append(fetch_avatar_for_entry_update(session, entry)) - if avatar_tasks: - await asyncio.gather(*avatar_tasks) # Exceptions logged within helper - logger.info(f"Finished fetching avatars for {len(avatar_tasks)} potential new entries.") - else: - logger.warning("Skipping avatar fetching due to errors in fetching/processing scammer/dwc/trusted data.") - - - # --- Change Detection & Webhook Preparation (ONLY if cache is ready) --- - current_time = datetime.now(timezone.utc) - timestamp_iso = current_time.isoformat() - detected_value_changes_for_api = {} # Always calculate for API, but only send webhooks if ready - - # Perform comparisons regardless of cache readiness to populate detected_value_changes_for_api - # But only queue webhooks if cache["is_ready"] is True - - # 1. Value Changes Calculation - if "values" not in cache: cache["values"] = {} # Ensure exists for comparison logic - if "values_dupes_batch" not in current_errors and not any(k.startswith("process_values_") for k in current_errors): - fields_to_compare = ['value', 'dupedValue', 'marketValue'] - for category, new_items in fetched_values_categories.items(): - old_items_dict = {item['name'].lower(): item for item in cache["values"].get(category, [])} # Use lower case for comparison robustness - category_changes_for_api = [] - - for new_item in new_items: - item_name = new_item.get('name') - if not item_name or item_name == 'N/A': continue - item_name_lower = item_name.lower() - - old_item = old_items_dict.get(item_name_lower) - change_detected_for_webhook = False - change_info_webhook = {} - - if old_item: # Check existing item for changes - for field in fields_to_compare: - old_val_str = old_item.get(field, 'N/A') - new_val_str = new_item.get(field, 'N/A') - # Simple string comparison is sufficient here as they are formatted consistently - if old_val_str != new_val_str: - logger.info(f"Change detected in {category}: {item_name} - {field}: '{old_val_str}' -> '{new_val_str}'") - change_info = { - "item_name": item_name, "field": field, - "old_value": old_val_str if old_val_str is not None else "N/A", - "new_value": new_val_str if new_val_str is not None else "N/A", - "timestamp": timestamp_iso - } - category_changes_for_api.append(change_info) - change_detected_for_webhook = True - change_info_webhook = change_info # Store last change for potential webhook - else: - # This is a new item, but don't add lastUpdated field here - pass - - # Prepare webhook only if a change was found AND cache was ready - if change_detected_for_webhook and cache["is_ready"] and VALUE_WEBHOOK_URL: - embed = { - "title": f"Value Update: {item_name} ({category})", - "color": 3447003, # Blue - "fields": [ - {"name": "Field Changed", "value": change_info_webhook['field'], "inline": True}, - {"name": "Old Value", "value": f"`{change_info_webhook['old_value']}`", "inline": True}, - {"name": "New Value", "value": f"`{change_info_webhook['new_value']}`", "inline": True}, - {"name": "Item Notes", "value": new_item.get('notes', 'N/A')[:1020] or 'N/A', "inline": False}, # Limit notes length - ], - "timestamp": timestamp_iso - } - if new_item.get('icon'): - embed["thumbnail"] = {"url": new_item['icon']} - webhook_tasks.append(send_webhook_notification(session, VALUE_WEBHOOK_URL, embed)) - - if category_changes_for_api: - detected_value_changes_for_api[category] = category_changes_for_api - if cache["is_ready"] and VALUE_WEBHOOK_URL: - logger.info(f"Prepared {len(webhook_tasks)} value change webhooks.") - elif not VALUE_WEBHOOK_URL: - logger.info("VALUE_WEBHOOK_URL not set, skipping value change webhook sending.") - elif not cache["is_ready"]: - logger.info("Skipping value change webhook sending during initial cache population.") - else: - logger.warning("Skipping value change detection and webhooks due to fetch/processing errors.") - - - # 2. New Scammers / DWC (Only if cache is ready) - if cache["is_ready"] and SCAMMER_WEBHOOK_URL and \ - "scammer_dwc_batch" not in current_errors and \ - not any(k.startswith("process_") and k in ["process_user_scammers", "process_server_scammers", "process_dwc"] for k in current_errors): - - logger.info("Detecting new scammer/DWC entries for webhooks (cache is ready)...") - initial_webhook_task_count = len(webhook_tasks) - added_new_scammer_webhook = False - - # --- User Scammers --- - # Create keys robust to None values - def get_user_scammer_key(item): - return (item.get('discord_id') or 'none', item.get('roblox_username') or 'none') - old_user_keys = set(get_user_scammer_key(item) for item in cache.get("user_scammers", [])) - - for item in new_cache_data.get("user_scammers", []): - key = get_user_scammer_key(item) - if key not in old_user_keys: - logger.info(f"New User Scammer detected: Discord={item.get('discord_id')}, Roblox={item.get('roblox_username')}") - embed = { - "title": "🚨 New User Scammer Added", "color": 15158332, # Red - "fields": [ - {"name": "Discord ID", "value": f"`{item.get('discord_id', 'N/A')}`", "inline": True}, - {"name": "Roblox User", "value": f"`{item.get('roblox_username', 'N/A')}`", "inline": True}, - {"name": "Scam Type", "value": item.get('scam_type', 'N/A')[:1020] or 'N/A', "inline": False}, - {"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, - ], "timestamp": timestamp_iso - } - if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) - if item.get('alt_accounts'): embed["fields"].append({"name": "Alt Accounts", "value": ", ".join([f"`{a}`" for a in item['alt_accounts']])[:1020] or 'N/A', "inline": False}) # Limit length - if item.get('roblox_avatar_url'): embed["thumbnail"] = {"url": item['roblox_avatar_url']} - webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) - added_new_scammer_webhook = True - - # --- Server Scammers --- - def get_server_scammer_key(item): - return (item.get('server_id') or 'none', item.get('server_name') or 'none') - old_server_keys = set(get_server_scammer_key(item) for item in cache.get("server_scammers", [])) - - for item in new_cache_data.get("server_scammers", []): - key = get_server_scammer_key(item) - if key not in old_server_keys: - logger.info(f"New Server Scammer detected: ID={item.get('server_id')}, Name={item.get('server_name')}") - embed = { - "title": "🚨 New Server Scammer Added", "color": 15158332, # Red - "fields": [ - {"name": "Server ID", "value": f"`{item.get('server_id', 'N/A')}`", "inline": True}, - {"name": "Server Name", "value": f"`{item.get('server_name', 'N/A')}`", "inline": True}, - {"name": "Scam Type", "value": item.get('scam_type', 'N/A')[:1020] or 'N/A', "inline": False}, - {"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, - ], "timestamp": timestamp_iso - } - if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) - webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) - added_new_scammer_webhook = True - - # --- DWC Entries --- - def get_dwc_key(item): - # Use a combination of available identifiers as the key - return ( - item.get('discord_user_id') or 'none', - item.get('discord_server_id') or 'none', - item.get('roblox_username') or 'none' - ) - old_dwc_keys = set(get_dwc_key(item) for item in cache.get("dwc", [])) - - for item in new_cache_data.get("dwc", []): - key = get_dwc_key(item) - if key not in old_dwc_keys: - logger.info(f"New DWC Entry detected: User={item.get('discord_user_id')}, Server={item.get('discord_server_id')}, Roblox={item.get('roblox_username')}") - embed = { - "title": "⚠️ New DWC Entry Added", "color": 15105570, # Orange/Dark Yellow - "fields": [ - {"name": "Discord User ID", "value": f"`{item.get('discord_user_id', 'N/A')}`", "inline": True}, - {"name": "Discord Server ID", "value": f"`{item.get('discord_server_id', 'N/A')}`", "inline": True}, - {"name": "Roblox User", "value": f"`{item.get('roblox_username', 'N/A')}`", "inline": True}, - {"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, - ], "timestamp": timestamp_iso - } - if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) - if item.get('alt_accounts'): embed["fields"].append({"name": "Alt Accounts", "value": ", ".join([f"`{a}`" for a in item['alt_accounts']])[:1020] or 'N/A', "inline": False}) - if item.get('roblox_avatar_url'): embed["thumbnail"] = {"url": item['roblox_avatar_url']} - webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) - added_new_scammer_webhook = True - - if added_new_scammer_webhook: - logger.info(f"Prepared {len(webhook_tasks) - initial_webhook_task_count} new scammer/DWC webhooks.") - - elif not cache["is_ready"]: - logger.info("Skipping new scammer webhook detection during initial cache population.") - elif not SCAMMER_WEBHOOK_URL: - logger.info("SCAMMER_WEBHOOK_URL not set, skipping new scammer webhook detection.") - else: # Errors occurred - logger.warning("Skipping new scammer webhook detection due to fetch/processing errors.") - - - # --- Send Webhooks Concurrently --- - if webhook_tasks: - logger.info(f"Sending {len(webhook_tasks)} webhook notifications...") - await asyncio.gather(*webhook_tasks) - logger.info("Finished sending webhook notifications.") - else: - logger.info("No webhooks prepared to send for this cycle.") - - - # --- Final Cache Update --- - update_occurred = False - # Determine if this cycle *should* mark the cache as ready - can_set_ready = not cache["is_ready"] and not current_errors # Only set ready on first *fully successful* run - - if not current_errors: # Perfect cycle - logger.info("Updating full cache (no errors during fetch or processing).") - cache["values"] = fetched_values_categories - cache["user_scammers"] = new_cache_data["user_scammers"] - cache["server_scammers"] = new_cache_data["server_scammers"] - cache["dwc"] = new_cache_data["dwc"] - cache["trusted"] = new_cache_data["trusted"] - cache["dupes"] = new_cache_data["dupes"] - cache["value_changes"] = detected_value_changes_for_api # Store the detected changes - cache["last_updated"] = current_time - if can_set_ready: - logger.info("Marking cache as ready after initial successful population.") - cache["is_ready"] = True - cache["service_available"] = True # Mark as available on success - update_occurred = True - logger.info(f"Cache update cycle completed successfully.") - else: # Errors occurred, attempt partial update - logger.warning(f"Cache update cycle completed with errors: {current_errors}. Attempting partial update.") - partial_update_details = [] - - # Update values only if the values/dupes batch succeeded AND processing succeeded - if "values_dupes_batch" not in current_errors and not any(k.startswith("process_values_") for k in current_errors): - if cache.get("values") != fetched_values_categories: - cache["values"] = fetched_values_categories - cache["value_changes"] = detected_value_changes_for_api # Update changes along with values - partial_update_details.append("values") - update_occurred = True - else: - logger.warning("Skipping update for 'values' due to errors.") - - # Update dupes only if the values/dupes batch succeeded AND processing succeeded - if "values_dupes_batch" not in current_errors and "process_dupes" not in current_errors: - if cache.get("dupes") != new_cache_data["dupes"]: - cache["dupes"] = new_cache_data["dupes"] - partial_update_details.append("dupes") - update_occurred = True - else: - logger.warning("Skipping update for 'dupes' due to errors.") - - # Update scammer/DWC sections if their batch succeeded AND processing succeeded - if "scammer_dwc_batch" not in current_errors: - for key in ["user_scammers", "server_scammers", "dwc", "trusted"]: # Include "trusted" in this list - process_error_key = f"process_{key}" - if process_error_key not in current_errors: - if cache.get(key) != new_cache_data[key]: - cache[key] = new_cache_data[key] - partial_update_details.append(key) - update_occurred = True - else: - logger.warning(f"Skipping update for '{key}' due to processing error.") - else: - logger.warning("Skipping update for 'user_scammers', 'server_scammers', 'dwc', 'trusted' due to batch fetch error.") - - if update_occurred: - cache["last_updated"] = current_time # Mark partial update time - # Mark cache ready only if it was *already* ready and we managed a partial update - # Or if this was the first run AND it was partially successful (maybe relax this?) - # Let's stick to: only mark ready on first FULL success. - if cache["is_ready"]: # If it was already ready, keep it ready - logger.info(f"Partially updated cache sections: {', '.join(partial_update_details)}. Cache remains ready.") - else: - logger.info(f"Partially updated cache sections: {', '.join(partial_update_details)}. Cache remains NOT ready (requires full success on first run).") - # Keep service_available based on whether fetch errors occurred - else: - logger.error(f"Cache update cycle failed, and no parts could be updated based on errors. Errors: {current_errors}") - # Cache readiness and service availability remain unchanged - - except Exception as e: - logger.exception(f"Critical error during cache update cycle: {e}") - # If a critical error happens (e.g., network error during fetch), mark service potentially unavailable - if isinstance(e, (aiohttp.ClientError, HttpError, asyncio.TimeoutError)): - logger.warning("Communication error detected, will re-check service availability next cycle.") - # service_available might have already been set to False by fetch_batch_ranges_async - - # --- Wait for the next cycle --- - end_time = datetime.now(timezone.utc) - duration = (end_time - start_time).total_seconds() - wait_time = max(10, CACHE_UPDATE_INTERVAL_SECONDS - duration) # Ensure at least 10s wait - logger.info(f"Cache update cycle duration: {duration:.2f}s. Waiting {wait_time:.2f}s for next cycle.") - await asyncio.sleep(wait_time) - - -async def fetch_avatar_for_entry_update(session: aiohttp.ClientSession, entry: dict): - """Fetches avatar and updates the provided entry dictionary IN PLACE.""" - roblox_username = entry.get('roblox_username') - if not roblox_username: return - - new_avatar = None # Default to None - try: - user_id = await get_roblox_user_id(session, roblox_username) - if user_id: - new_avatar = await get_roblox_avatar_url(session, user_id) - # else: # User ID not found, keep avatar as None - # logger.debug(f"Roblox user ID not found for username: {roblox_username}") - - except Exception as e: - # Log errors but don't stop the main update loop - logger.warning(f"Failed to fetch avatar for {roblox_username}: {e}") - # Keep new_avatar as None on error - - finally: - # Update the entry dict directly - entry['roblox_avatar_url'] = new_avatar - - -# --- FastAPI Startup Event --- -@app.on_event("startup") -async def startup_event(): - """Starts the background cache update task.""" - if not cache["service_available"]: - logger.warning("Google Sheets service not available at startup. Will attempt re-init in background task.") - logger.info("Starting background cache update task...") - # Check for webhook URLs at startup - if not SCAMMER_WEBHOOK_URL: - logger.warning("SCAMMER_WEBHOOK_URL environment variable not set. New scammer notifications disabled.") - if not VALUE_WEBHOOK_URL: - logger.warning("VALUE_WEBHOOK_URL environment variable not set. Value change notifications disabled.") - if not DUPE_CHECK_WEBHOOK_URL: - logger.warning("WEBHOOK_URL (for dupe checks) environment variable not set. Dupe check notifications disabled.") - if not VISITOR_WEBHOOK_URL: - logger.warning("VISITOR_WEBHOOK_URL environment variable not set. Visitor tracking notifications disabled.") - asyncio.create_task(update_cache_periodically()) - asyncio.create_task(visitor_webhook_task()) # Start visitor webhook background task - - -# --- API Endpoints --- - -def check_cache_readiness(): - """Reusable check for API endpoints - Checks cache readiness""" - if not cache["is_ready"]: - # Be more specific if service is known to be down - if not cache["service_available"]: - raise HTTPException(status_code=503, detail="Service temporarily unavailable due to backend connection issues. Please try again later.") - else: - raise HTTPException(status_code=503, detail="Cache is initializing or data is currently unavailable. Please try again shortly.") - - -@app.get("/") -async def root(): - return {"message": "JVC API - Running"} - -@app.get("/api/status") -async def get_status(): - """Returns the current status of the cache and service availability""" - last_updated_iso = cache["last_updated"].isoformat() if cache["last_updated"] else None - return { - "cache_ready": cache["is_ready"], - "sheets_service_available": cache["service_available"], - "last_updated": last_updated_iso, - "cached_items": { - "value_categories": len(cache.get("values", {})), - "user_scammers": len(cache.get("user_scammers", [])), - "server_scammers": len(cache.get("server_scammers", [])), - "dwc_entries": len(cache.get("dwc", [])), - "trusted_entries": len(cache.get("trusted", [])), # Add trusted count - "duped_usernames": len(cache.get("dupes", [])), - }, - "value_change_categories_in_last_cycle": len(cache.get("value_changes", {})) - } - -@app.get("/api/values") -async def get_values(): - """Get all values data from cache""" - check_cache_readiness() - return cache.get("values", {}) - -@app.get("/api/values/{category}") -async def get_category_values(category: str): - """Get values data for a specific category from cache""" - check_cache_readiness() - # Case-insensitive matching for category name - matched_category = next((c for c in cache.get("values", {}).keys() if c.lower() == category.lower()), None) - if not matched_category: - # Check if the category *exists* conceptually even if empty - valid_categories_lower = [c.lower() for c in CATEGORIES] - if category.lower() in valid_categories_lower: - return {category: []} # Return empty list if category is valid but has no items yet - else: - raise HTTPException(status_code=404, detail=f"Category '{category}' not found.") - return {matched_category: cache.get("values", {}).get(matched_category, [])} - - -@app.get("/api/value-changes/{category}") -async def get_category_value_changes(category: str): - """Get detected value changes for a specific category from the last cache update cycle.""" - check_cache_readiness() - # Case-insensitive matching for category name - matched_category = next((c for c in cache.get("value_changes", {}).keys() if c.lower() == category.lower()), None) - if not matched_category: - # Check if the category *exists* conceptually even if empty - valid_categories_lower = [c.lower() for c in CATEGORIES] - if category.lower() in valid_categories_lower: - return {category: []} # Return empty list if category is valid but had no changes - else: - raise HTTPException(status_code=404, detail=f"Category '{category}' not found.") - return {matched_category: cache.get("value_changes", {}).get(matched_category, [])} - -@app.get("/api/value-changes") -async def get_all_value_changes(): - """Get all detected value changes from the last cache update cycle.""" - check_cache_readiness() - return cache.get("value_changes", {}) - -@app.get("/api/scammers") -async def get_scammers(): - """Get all scammer, DWC, and trusted data (users, servers, dwc, trusted) from cache""" - check_cache_readiness() - return { - "users": cache.get("user_scammers", []), - "servers": cache.get("server_scammers", []), - "dwc": cache.get("dwc", []), - "trusted": cache.get("trusted", []) # Include trusted list - } - -@app.get("/api/dupes") -async def get_dupes(): - """Get all duped usernames from cache""" - check_cache_readiness() - return {"usernames": cache.get("dupes", [])} # Return empty list if not ready or empty - - -class UsernameCheck(BaseModel): - username: str - -@app.post("/api/check") -async def check_username(data: UsernameCheck): - """Check if a username is duped using cached data and optionally send webhook""" - check_cache_readiness() # Use the standard readiness check - - username_to_check = data.username.strip().lower() - # Ensure dupes list is populated before checking - dupes_list = cache.get("dupes", []) - is_duped = username_to_check in dupes_list - - # Webhook notification for checks resulting in "Not Found" - if not is_duped: - if DUPE_CHECK_WEBHOOK_URL: - async def send_check_webhook(): - try: - # Use a short-lived session for this potentially frequent task - async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session: - embed = { - "title": "User Dupe Check - Not Found", - "description": f"Username `{data.username}` was checked against the dupe list but was **not** found.", - "color": 16776960, # Yellow - "timestamp": datetime.now(timezone.utc).isoformat() - } - await send_webhook_notification(session, DUPE_CHECK_WEBHOOK_URL, embed) - except Exception as e: - logger.error(f"Error sending dupe check webhook: {e}") - - asyncio.create_task(send_check_webhook()) # Fire and forget - else: - logger.info("WEBHOOK_URL (for dupe checks) not configured. Skipping notification.") - - return {"username": data.username, "is_duped": is_duped} - - -@app.get("/health") -def health_check(): - """Provides a health status of the API and its cache.""" - now = datetime.now(timezone.utc) - status_detail = {"status": "ok", "last_updated": None, "time_since_update_seconds": None} - - if cache["last_updated"]: - status_detail["last_updated"] = cache["last_updated"].isoformat() - time_since_update = (now - cache["last_updated"]).total_seconds() - status_detail["time_since_update_seconds"] = round(time_since_update) - else: - status_detail["last_updated"] = None - status_detail["time_since_update_seconds"] = None - - - if not cache["is_ready"]: - status_detail["status"] = "initializing" - status_detail["reason"] = "Cache has not been populated yet." - return status_detail - - if not cache["service_available"]: - status_detail["status"] = "degraded" - status_detail["reason"] = "Google Sheets service connection issue detected on last attempt." - return status_detail - - # Check for staleness only if the cache is ready and service *was* available last check - # Allow some grace period (e.g., 3 intervals) - if cache["last_updated"] and time_since_update > CACHE_UPDATE_INTERVAL_SECONDS * 3: - status_detail["status"] = "degraded" - status_detail["reason"] = f"Cache potentially stale (last update > {CACHE_UPDATE_INTERVAL_SECONDS * 3} seconds ago)" - return status_detail - - # If we reach here, status is 'ok' - return status_detail - -# --- Visitor Tracking Model and Endpoint --- -class VisitorData(BaseModel): - device_type: str # "Laptop", "Desktop", "Mobile", etc. - os: str # "Windows 10", "MacOS", etc. - browser: str # "Chrome", "Firefox", etc. - country: str # Country of origin - path: str # URL path visited - ip_address: Optional[str] = None # Optional IP address (for internal use only) - -@app.post("/api/track-visitor") -async def track_visitor(visitor: VisitorData, request: Request): - """Records visitor information and adds it to the visitor queue""" - - # Filter out unwanted paths like embeds, frames, etc. - if any(unwanted in visitor.path.lower() for unwanted in ['embed', 'inner-frame', 'frame-minified']): - logger.info(f"Skipping tracking for non-standard path: {visitor.path}") - return {"status": "skipped", "reason": "non-standard path"} - - # Get client IP (for internal tracking, not included in webhook) - client_host = request.client.host if request.client else "Unknown" - - # Format current time - current_time = datetime.now() - formatted_time = current_time.strftime("%A, %B %d, %Y %I:%M %p") - short_time = current_time.strftime("%I:%M %p") - - # Get country flag emoji - flag_emoji = "��" # Default globe emoji - if visitor.country.lower() == "france": - flag_emoji = "🇫🇷" - elif visitor.country.lower() == "pakistan": - flag_emoji = "🇵🇰" - elif visitor.country.lower() == "united states" or visitor.country.lower() == "usa": - flag_emoji = "🇺🇸" - elif visitor.country.lower() == "united kingdom" or visitor.country.lower() == "uk": - flag_emoji = "🇬🇧" - elif visitor.country.lower() == "canada": - flag_emoji = "🇨🇦" - elif visitor.country.lower() == "australia": - flag_emoji = "🇦🇺" - elif visitor.country.lower() == "germany": - flag_emoji = "🇩🇪" - elif visitor.country.lower() == "india": - flag_emoji = "🇮🇳" - elif visitor.country.lower() == "japan": - flag_emoji = "🇯🇵" - elif visitor.country.lower() == "china": - flag_emoji = "🇨🇳" - # Add more country flags as needed - - # Create visitor entry - visitor_entry = { - "device_type": visitor.device_type, - "os": visitor.os, - "browser": visitor.browser, - "country": visitor.country, - "flag_emoji": flag_emoji, - "path": visitor.path, - "timestamp": current_time, - "formatted_time": short_time, - "ip_address": client_host # Store IP internally but don't send in webhook - } - - # Add to visitor queue - cache["visitors"].append(visitor_entry) - logger.info(f"Recorded visitor from {visitor.country} using {visitor.browser} visiting {visitor.path}") - - # Check if we should send a webhook immediately - should_send_now = len(cache["visitors"]) >= MAX_VISITORS_PER_WEBHOOK - - if cache["last_visitor_webhook"]: - time_since_last = (current_time - cache["last_visitor_webhook"]).total_seconds() - should_send_now = should_send_now or time_since_last >= VISITOR_BATCH_INTERVAL_SECONDS - else: - # First visitor, send after accumulating more or after interval - should_send_now = False - - # Send webhook if needed - if should_send_now and VISITOR_WEBHOOK_URL: - await send_visitor_webhook() - - return {"status": "recorded", "webhook_queued": True} - -async def send_visitor_webhook(): - """Sends a webhook with accumulated visitor information""" - if not cache["visitors"] or not VISITOR_WEBHOOK_URL: - return False - - try: - current_time = datetime.now() - - # Group visitors by device type for better organization - visitors_by_device = {} - for visitor in cache["visitors"]: - key = f"{visitor['device_type']} - {visitor['os']} - {visitor['browser']}" - if key not in visitors_by_device: - visitors_by_device[key] = [] - visitors_by_device[key].append(visitor) - - # Create webhook fields for each device group - fields = [] - for device_key, visitors in visitors_by_device.items(): - visits_text = "" - for v in visitors: - # Format the path more specifically as shown in the example - path_display = v['path'] - if "textures" in path_display.lower(): - path_display = "https://www.jailbreaktrading.xyz/textures" - elif "tires" in path_display.lower() or "rims" in path_display.lower(): - path_display = "https://www.jailbreaktrading.xyz/tires" - elif "calculator" in path_display.lower(): - path_display = "https://www.jailbreaktrading.xyz/value-calculator" - elif "drift" in path_display.lower() or "particles" in path_display.lower(): - path_display = "https://www.jailbreaktrading.xyz/drift-particles" - elif "furniture" in path_display.lower(): - path_display = "https://www.jailbreaktrading.xyz/furniture" - elif "spoilers" in path_display.lower(): - path_display = "https://www.jailbreaktrading.xyz/spoilers" - - # Format date like: Sunday, April 13, 2025 2:06 AM - timestamp_str = v['timestamp'].strftime("%A, %B %d, %Y %I:%M %p") - - # Format similar to the example in the screenshot - visits_text += f"A User from {v['country']} {v['flag_emoji']} browsed:\n" - visits_text += f"{timestamp_str}:\n" - visits_text += f"{path_display}\n\n" - - fields.append({ - "name": device_key, - "value": visits_text.strip(), - "inline": False - }) - - # Create the embed with improved wording - embed = { - "title": "🌐 Jailbreak Value Central Visitors", - "description": "Check out the latest visitor activity from around the world! See what sections are trending right now.", - "color": 3447003, # Blue - "fields": fields, - "timestamp": current_time.isoformat(), - "footer": { - "text": f"Sourced From https://www.jailbreaktrading.xyz/ • Today at {current_time.strftime('%I:%M %p')}" - }, - "thumbnail": { - "url": "https://drive.google.com/thumbnail?id=1pGa8v5npgyGQYZBhJguUxZe9uxO7Dl6h&sz=w1000" # Update with your actual logo URL - } - } - - # Send the webhook notification - async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session: - await send_webhook_notification(session, VISITOR_WEBHOOK_URL, embed) - logger.info(f"Visitor tracking webhook sent with {len(cache['visitors'])} entries") - - # Clear the visitor queue and update timestamp - cache["visitors"] = [] - cache["last_visitor_webhook"] = current_time - - return True - - except Exception as e: - logger.error(f"Error sending visitor tracking webhook: {e}") - return False - -# Background task to periodically send visitor webhooks -async def visitor_webhook_task(): - """Periodically sends visitor webhooks if there are any pending""" - while True: - try: - current_time = datetime.now() - - # Check if we have visitors and if enough time has passed - if cache["visitors"] and VISITOR_WEBHOOK_URL: - if not cache["last_visitor_webhook"] or \ - (current_time - cache["last_visitor_webhook"]).total_seconds() >= VISITOR_BATCH_INTERVAL_SECONDS: - await send_visitor_webhook() - - # Wait for next check - await asyncio.sleep(60) # Check every minute - - except Exception as e: - logger.error(f"Error in visitor webhook background task: {e}") - await asyncio.sleep(60) # Wait and retry \ No newline at end of file +import socket +socket.setdefaulttimeout(4000) + +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from typing import Optional, Any, Dict, List +import aiohttp +import os +from datetime import datetime, timezone +import json +import re +from google.oauth2.service_account import Credentials as ServiceAccountCredentials +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError +from dotenv import load_dotenv +import asyncio +import logging + +# --- Logging Setup --- +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +app = FastAPI() + +# --- Configuration --- +load_dotenv() + +# CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Consider restricting in production + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Google Sheets Config +# Spreadsheet containing Scammer and DWC info +SCAMMER_DWC_SPREADSHEET_ID = os.getenv('SCAMMER_DWC_SPREADSHEET_ID', '1sgkhBNGw_r6tBIxvdeXaI0bVmWBeACN4jiw_oDEeXLw') +# Spreadsheet containing Value lists and Dupe list +VALUES_DUPE_SPREADSHEET_ID = os.getenv('VALUES_DUPE_SPREADSHEET_ID', '1Toe07o3P517q8sm9Qb1e5xyFWCuwgskj71IKJwJNfNU') + +SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly'] + +# Sheet Names and Ranges within SCAMMER_DWC_SPREADSHEET_ID +USER_SCAMMER_SHEET = "User Scammer Files" +USER_SCAMMER_RANGE = "B6:G" +SERVER_SCAMMER_SHEET = "Server Scammer Files" +SERVER_SCAMMER_RANGE = "B6:F" +DWC_SHEET = "DWC Servers / Users" +DWC_RANGE = "B6:G" +TRUSTED_SHEET = "Trusted Users / Servers" +TRUSTED_RANGE = "B6:E" + +# Sheet Names and Ranges within VALUES_DUPE_SPREADSHEET_ID +DUPE_LIST_SHEET = "Dupe List" +DUPE_LIST_RANGE = "B2:B" +# Value Categories (Sheet Names) +CATEGORIES = [ + "Vehicles", "Textures", "Colours", "Spoilers", + "Rims", "Furnitures", "Gun Skins", "Hyperchromes" +] +VALUES_RANGE = 'B6:R' # Range within each category sheet including column R for lastUpdated + +# Cache Update Interval +CACHE_UPDATE_INTERVAL_SECONDS = 60 + +# Sheet Update Delay +SHEET_UPDATE_DELAY_SECONDS = 10 # 10 second delay between sheet updates + +# Webhook URLs +SCAMMER_WEBHOOK_URL = os.getenv("SCAMMER_WEBHOOK_URL") +VALUE_WEBHOOK_URL = os.getenv("VALUE_WEBHOOK_URL") +DUPE_CHECK_WEBHOOK_URL = os.getenv("DUPE_CHECK_WEBHOOK_URL") + + +# --- Global Cache --- +cache = { + "values": {}, # Dict mapping category name to list of items + "value_changes": {}, # Dict mapping category name to list of recent changes (for API endpoint) + "user_scammers": [], + "server_scammers": [], + "dwc": [], + "trusted": [], # New cache key for trusted entries + "dupes": [], # List of duped usernames + "last_updated": None, # Timestamp of the last successful/partial update + "is_ready": False, # Is the cache populated at least once? + "service_available": True # Is the Google Sheets service reachable? +} +# --- Google Sheets Initialization --- +sheets_service = None # Initialize as None + +def quote_sheet_name(name: str) -> str: + """Adds single quotes around a sheet name if it needs them.""" + if not name: + return "''" + # Simple check: if it contains spaces or non-alphanumeric chars (excluding _) + if not re.match(r"^[a-zA-Z0-9_]+$", name): + # Escape existing single quotes within the name + escaped_name = name.replace("'", "''") + return f"'{escaped_name}'" + return name + +def init_google_sheets(scopes=SCOPES): + """Initialize Google Sheets credentials from environment variable or file""" + global sheets_service, cache + try: + creds_json_str = os.getenv('CREDENTIALS_JSON') + if creds_json_str: + logger.info("Attempting to load credentials from CREDENTIALS_JSON environment variable.") + creds_json = json.loads(creds_json_str) + creds = ServiceAccountCredentials.from_service_account_info( + creds_json, + scopes=scopes + ) + sheets_service = build('sheets', 'v4', credentials=creds, cache_discovery=False) # Disable discovery cache + logger.info("Google Sheets service initialized successfully from ENV VAR.") + cache["service_available"] = True + return sheets_service + else: + logger.info("CREDENTIALS_JSON environment variable not found. Falling back to file.") + raise ValueError("CREDENTIALS_JSON not set") # Trigger fallback explicitly + + except Exception as e: + logger.warning(f"Error initializing Google Sheets from ENV VAR: {e}. Trying file...") + try: + # Fallback to loading credentials from file 'credentials.json' + creds_file = 'credentials.json' + if os.path.exists(creds_file): + logger.info(f"Attempting to load credentials from file '{creds_file}'") + creds = ServiceAccountCredentials.from_service_account_file( + creds_file, + scopes=scopes + ) + sheets_service = build('sheets', 'v4', credentials=creds, cache_discovery=False) + logger.info("Google Sheets service initialized successfully from file.") + cache["service_available"] = True + return sheets_service + else: + logger.error(f"Credentials file '{creds_file}' not found.") + raise FileNotFoundError(f"'{creds_file}' not found") + + except Exception as file_e: + logger.error(f"Error loading credentials from file: {file_e}") + logger.critical("Google Sheets service could not be initialized. API will be limited.") + cache["service_available"] = False + sheets_service = None + return None + +# Initialize on module load +init_google_sheets() + + +# --- Helper Functions (Data Extraction & Formatting) --- + +def extract_drive_id(url): + if not url or not isinstance(url, str): return None + match = re.search(r'https://drive\.google\.com/file/d/([^/]+)', url) + return match.group(1) if match else None + +def convert_to_thumbnail_url(drive_url): + drive_id = extract_drive_id(drive_url) + return f"https://drive.google.com/thumbnail?id={drive_id}&sz=w1000" if drive_id else drive_url + +def extract_image_url(formula, drive_url=None): + # Priority to explicit drive_url if provided + if drive_url and isinstance(drive_url, str) and 'drive.google.com' in drive_url: + return convert_to_thumbnail_url(drive_url) + if not formula or not isinstance(formula, str): return '' + # Handle direct URLs + if formula.startswith('http://') or formula.startswith('https://'): + return formula + # Handle =IMAGE("...") formula + if formula.startswith('=IMAGE('): + match = re.search(r'=IMAGE\("([^"]+)"', formula) + if match: return match.group(1) + return '' # Return empty string if no valid URL found + +def format_currency(value: Any) -> Optional[str]: + if value is None or str(value).strip() == '': return 'N/A' + try: + num_str = str(value).replace('$', '').replace(',', '').strip() + if not num_str or num_str.lower() == 'n/a': return 'N/A' + num = float(num_str) + return f"${num:,.0f}" + except (ValueError, TypeError): + # Allow text like "Event", "Unobtainable" etc. to pass through + if isinstance(value, str) and value.strip() and not re.match(r'^-?[\d,.$]+\$?$', value.strip()): + return value.strip() # Return original text if non-numeric-like + return 'N/A' # Return N/A for things that look like bad numbers + +def parse_cached_currency(value_str: Optional[str]) -> Optional[float]: + if value_str is None or str(value_str).strip().lower() == 'n/a': + return None + try: + num_str = str(value_str).replace('$', '').replace(',', '').strip() + return float(num_str) + except (ValueError, TypeError): + return None # Return None if it's not a parsable number (e.g., "Event") + +def clean_string(value, default='N/A'): + if value is None: return default + cleaned = str(value).strip() + return cleaned if cleaned else default + +def clean_string_optional(value): + if value is None: return None + cleaned = str(value).strip() + return cleaned if cleaned and cleaned != '-' else None + +def parse_alt_accounts(value): + if value is None: return [] + raw_string = str(value).strip() + if not raw_string or raw_string == '-': return [] + return [acc.strip() for acc in raw_string.split(',') if acc.strip()] + + +# --- Roblox API Helpers --- +async def get_roblox_user_id(session: aiohttp.ClientSession, username: str): + if not username: return None + url = "https://users.roblox.com/v1/usernames/users" + payload = {"usernames": [username], "excludeBannedUsers": False} + try: + # Increased timeout specifically for Roblox API calls which can be slow + async with session.post(url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as response: + if response.status == 200: + data = await response.json() + if data and data.get("data") and len(data["data"]) > 0: + return data["data"][0].get("id") + else: + logger.warning(f"Roblox User API returned status {response.status} for username '{username}'") + return None + except asyncio.TimeoutError: + logger.warning(f"Timeout fetching Roblox User ID for {username}") + return None + except aiohttp.ClientError as e: + logger.warning(f"Network error fetching Roblox User ID for {username}: {e}") + return None + except Exception as e: + logger.error(f"Unexpected exception fetching Roblox User ID for {username}: {e}", exc_info=True) + return None + +async def get_roblox_avatar_url(session: aiohttp.ClientSession, user_id: int): + if not user_id: return None + url = f"https://thumbnails.roblox.com/v1/users/avatar-headshot?userIds={user_id}&size=150x150&format=Png&isCircular=false" + try: + # Increased timeout specifically for Roblox API calls + async with session.get(url, timeout=aiohttp.ClientTimeout(total=10)) as response: + if response.status == 200: + data = await response.json() + if data and data.get("data") and len(data["data"]) > 0: + return data["data"][0].get("imageUrl") + else: + logger.warning(f"Roblox Thumbnail API returned status {response.status} for user ID {user_id}") + return None + except asyncio.TimeoutError: + logger.warning(f"Timeout fetching Roblox avatar for User ID {user_id}") + return None + except aiohttp.ClientError as e: + logger.warning(f"Network error fetching Roblox avatar for User ID {user_id}: {e}") + return None + except Exception as e: + logger.error(f"Unexpected exception fetching Roblox avatar for User ID {user_id}: {e}", exc_info=True) + return None + + +# --- Data Processing Functions --- + +def process_sheet_data(values): # For Value Categories + if not values: return [] + processed_data = [] + + for row_idx, row in enumerate(values): # Start counting from sheet row 6 (index 0 here) + if not row or not any(str(cell).strip() for cell in row if cell is not None): continue + # Skip header-like rows (e.g., "LEVEL 1 | HYPERCHROMES" in column F/index 4) + # Use index 4 for Value column (F) + if len(row) > 4 and isinstance(row[4], str) and re.search(r'(LEVEL \d+ \|)|(VALUE)', row[4], re.IGNORECASE): + #logger.debug(f"Skipping potential header row {row_idx+6}: {row}") + continue + + # Indices based on B6:P (0-indexed from B) + icon_formula = row[0] if len(row) > 0 else '' + name = row[2] if len(row) > 2 else 'N/A' # Column D + value_raw = row[4] if len(row) > 4 else 'N/A' # Column F + duped_value_raw = row[6] if len(row) > 6 else 'N/A' # Column H + market_value_raw = row[8] if len(row) > 8 else 'N/A' # Column J + demand = row[10] if len(row) > 10 else 'N/A' # Column L + notes = row[12] if len(row) > 12 else '' # Column N + drive_url = row[14] if len(row) > 14 else None # Column P + # Extract lastUpdated from column R (index 16) + last_updated = row[16] if len(row) > 16 else None # Column R + + cleaned_name = clean_string(name) + # Also skip if name is clearly a header like "Name" + if cleaned_name == 'N/A' or cleaned_name.lower() == 'name': + #logger.debug(f"Skipping row {row_idx+6} due to missing/header name: {row}") + continue + + processed_item = { + 'icon': extract_image_url(icon_formula, drive_url), + 'name': cleaned_name, + 'value': format_currency(value_raw), + 'dupedValue': format_currency(duped_value_raw), + 'marketValue': format_currency(market_value_raw), + 'demand': clean_string(demand, 'N/A'), + 'notes': clean_string(notes, '') + # Remove lastUpdated field - will only be set when an update is detected + } + + # Add lastUpdated if it exists in the sheet + if last_updated and str(last_updated).strip(): + # Only add if it's in the format yyyy-MM-dd HH:mm:ss + last_updated_str = str(last_updated).strip() + # Check if it matches the expected date format + if re.match(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$', last_updated_str): + processed_item['lastUpdated'] = last_updated_str + + processed_data.append(processed_item) + return processed_data + +def process_user_scammer_data(values): # For User Scammer Sheet + if not values: return [] + processed_data = [] + for row in values: # Expected range like B6:G + if not row or len(row) < 2: continue + # Indices based on B6:G (0-indexed from B) + discord_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B + roblox_username = clean_string_optional(row[1]) if len(row) > 1 else None # Col C + # Skip if both identifiers are missing + if not discord_id and not roblox_username: continue + # Skip if it looks like a header row + if str(discord_id).lower() == 'discord id' or str(roblox_username).lower() == 'roblox username': + continue + processed_item = { + 'discord_id': discord_id, + 'roblox_username': roblox_username, + 'scam_type': clean_string(row[2]) if len(row) > 2 else 'N/A', # Col D + 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E + 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None, # Col F + 'alt_accounts': parse_alt_accounts(row[5]) if len(row) > 5 else [], # Col G + 'roblox_avatar_url': None # Will be filled later + } + processed_data.append(processed_item) + return processed_data + +def process_server_scammer_data(values): # For Server Scammer Sheet + if not values: return [] + processed_data = [] + for row in values: # Expected range like B6:F + if not row or len(row) < 2: continue + # Indices based on B6:F (0-indexed from B) + server_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B + server_name = clean_string_optional(row[1]) if len(row) > 1 else None # Col C + # Skip if both identifiers are missing + if not server_id and not server_name: continue + # Skip if it looks like a header row + if str(server_id).lower() == 'server id' or str(server_name).lower() == 'server name': + continue + processed_item = { + 'server_id': server_id, + 'server_name': server_name, + 'scam_type': clean_string(row[2]) if len(row) > 2 else 'N/A', # Col D + 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E + 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None # Col F + } + processed_data.append(processed_item) + return processed_data + +def process_dwc_data(values): # For DWC Sheet + if not values: return [] + processed_data = [] + for row in values: # Expected range like B6:G + if not row or len(row) < 1: continue # Need at least one ID + # Indices based on B6:G (0-indexed from B) + user_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B + server_id = clean_string_optional(row[1]) if len(row) > 1 else None # Col C + roblox_user = clean_string_optional(row[2]) if len(row) > 2 else None # Col D + # Skip if all identifiers are missing + if not user_id and not server_id and not roblox_user: continue + # Skip if it looks like a header row + if str(user_id).lower() == 'user id' or str(server_id).lower() == 'server id' or str(roblox_user).lower() == 'roblox user': + continue + processed_item = { + 'status': 'DWC', + 'discord_user_id': user_id, + 'discord_server_id': server_id, + 'roblox_username': roblox_user, + 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E + 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None, # Col F + 'alt_accounts': parse_alt_accounts(row[5]) if len(row) > 5 else [], # Col G + 'roblox_avatar_url': None # Will be filled later + } + processed_data.append(processed_item) + return processed_data + +def process_trusted_data(values): # New function for Trusted Sheet + if not values: return [] + processed_data = [] + for row in values: # Expected range like B6:E + if not row or len(row) < 1: continue # Need at least one identifier + # Indices based on B6:E (0-indexed from B) + discord_user_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B + # discord_username = clean_string_optional(row[1]) if len(row) > 1 else None # Col C - Not currently used for matching, but keep for potential future use + discord_server_id = clean_string_optional(row[2]) if len(row) > 2 else None # Col D + roblox_username = clean_string_optional(row[3]) if len(row) > 3 else None # Col E + + # Skip if all relevant identifiers are missing + if not discord_user_id and not discord_server_id and not roblox_username: continue + + # Skip if it looks like a header row (check common header names) + if (str(discord_user_id).lower() == 'discord user id' or + str(discord_server_id).lower() == 'discord server id' or + str(roblox_username).lower() == 'roblox username'): + continue + + processed_item = { + 'status': 'Trusted', # Add a status field + 'discord_user_id': discord_user_id, + 'discord_server_id': discord_server_id, + 'roblox_username': roblox_username, + 'roblox_avatar_url': None # Will be filled later if roblox_username exists + # Note: No explanation or evidence expected for trusted entries based on B6:E + } + processed_data.append(processed_item) + return processed_data + +def process_dupe_list_data(values): # For Dupe List Sheet + if not values: return [] + # Expected range like B2:B + processed_dupes = [] + for row in values: + if row and len(row)>0 and row[0] and isinstance(row[0], str): + username = row[0].strip().lower() + # Skip header or empty strings + if username and username not in ('username', 'usernames'): + processed_dupes.append(username) + return processed_dupes + + +# --- Async Fetching Functions --- + +async def fetch_batch_ranges_async(spreadsheet_id: str, ranges: List[str], value_render_option: str = 'FORMATTED_VALUE') -> List[Dict]: + """Async wrapper to fetch multiple ranges using batchGet and return raw valueRanges.""" + global sheets_service, cache + if not sheets_service: + logger.warning(f"Attempted batch fetch from {spreadsheet_id} but Sheets service is unavailable.") + raise Exception("Google Sheets service not initialized") + if not ranges: + logger.warning(f"Batch fetch called with empty ranges for {spreadsheet_id}.") + return [] + + try: + logger.info(f"Fetching batch ranges from {spreadsheet_id}: {ranges}") + loop = asyncio.get_event_loop() + result = await loop.run_in_executor( + None, + lambda: sheets_service.spreadsheets().values().batchGet( + spreadsheetId=spreadsheet_id, + ranges=ranges, + valueRenderOption=value_render_option, + majorDimension='ROWS' + ).execute() + ) + value_ranges = result.get('valueRanges', []) + logger.info(f"Successfully fetched batch data for {len(value_ranges)} ranges from {spreadsheet_id}.") + return value_ranges # Return the raw list of valueRange objects + + except HttpError as e: + status_code = e.resp.status + error_details = {} + try: + error_details = json.loads(e.content).get('error', {}) + except json.JSONDecodeError: + logger.error(f"Failed to parse JSON error content from Google API: {e.content}") + + status = error_details.get('status', f'HTTP_{status_code}') # Use HTTP status if details missing + message = error_details.get('message', e._get_reason()) # Fallback message + logger.error(f"Google API HTTP Error during batch fetch for {spreadsheet_id}: Status={status}, Message={message}") + + if status in ('PERMISSION_DENIED', 'UNAUTHENTICATED') or status_code == 403 or status_code == 401: + logger.critical(f"Authentication/Permission Error accessing {spreadsheet_id}. Disabling service checks.") + cache["service_available"] = False # Mark service as down + sheets_service = None # Reset service to force re-init attempt + elif status == 'NOT_FOUND' or status_code == 404: + logger.error(f"Spreadsheet or Range not found error for {spreadsheet_id}. Ranges: {ranges}. Check IDs and Sheet Names.") + elif status_code >= 500: # Server-side errors on Google's end + logger.warning(f"Google API server error ({status_code}) for {spreadsheet_id}. May be temporary.") + # Keep service_available as True, retry might work + # else: # Other client errors (e.g., 400 Bad Request for invalid range format) + + raise e # Re-raise after logging + except Exception as e: + logger.error(f"Error during batch fetching from {spreadsheet_id} for ranges {ranges}: {e}", exc_info=True) + # Could be network issues, timeouts handled by aiohttp session typically + # Consider marking service unavailable for persistent non-HTTP errors too? + # cache["service_available"] = False # Optional: Be more aggressive + raise e + +# --- Webhook Sending --- +async def send_webhook_notification(session: aiohttp.ClientSession, webhook_url: str, embed: Dict): + """Sends a Discord webhook notification with the provided embed.""" + if not webhook_url: + # logger.debug("Webhook URL not configured. Skipping notification.") + return + if not embed: + logger.warning("Attempted to send webhook with empty embed.") + return + + webhook_data = {"embeds": [embed]} + try: + # Use a reasonable timeout for webhook posts + async with session.post(webhook_url, json=webhook_data, timeout=aiohttp.ClientTimeout(total=10)) as response: + if response.status not in [200, 204]: + # Log more details on failure + response_text = await response.text() + logger.warning(f"Failed to send webhook to {webhook_url[:30]}... (Status: {response.status}): {response_text[:500]}") # Limit response text length + # else: + # logger.debug(f"Webhook notification sent successfully to {webhook_url[:30]}...") + except asyncio.TimeoutError: + logger.warning(f"Timeout sending webhook to {webhook_url[:30]}...") + except aiohttp.ClientError as e: + logger.error(f"Network error sending webhook to {webhook_url[:30]}...: {e}") + except Exception as e: + logger.error(f"Unexpected error sending webhook: {e}", exc_info=True) + +# --- Background Cache Update Task --- + +async def update_cache_periodically(): + """Fetches data, processes, detects changes/new entries (if not first run), sends webhooks, and updates cache.""" + global cache + # Increase overall session timeout slightly for robustness + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=45)) as session: + while True: + if not cache["service_available"]: + logger.info("Attempting to re-initialize Google Sheets service...") + init_google_sheets() + if not cache["service_available"]: + logger.warning("Google Sheets service still unavailable, skipping cache update cycle.") + await asyncio.sleep(CACHE_UPDATE_INTERVAL_SECONDS * 2) # Wait longer if service is down + continue + else: + logger.info("Google Sheets service re-initialized. Proceeding with cache update.") + + logger.info(f"Starting cache update cycle... (Cache Ready: {cache['is_ready']})") + start_time = datetime.now(timezone.utc) + webhook_tasks = [] # Store webhook sending tasks + + # Prepare temporary storage for fetched data + fetched_values_categories = {} # { "CategoryName": [items...] } + new_cache_data = { + "user_scammers": [], + "server_scammers": [], + "dwc": [], + "trusted": [], # Add trusted key + "dupes": [], + } + current_errors = {} # Track errors for specific fetches/sheets + + try: + # --- Define Ranges and Processors --- + scammer_dwc_ranges = [ + f"{quote_sheet_name(USER_SCAMMER_SHEET)}!{USER_SCAMMER_RANGE}", + f"{quote_sheet_name(SERVER_SCAMMER_SHEET)}!{SERVER_SCAMMER_RANGE}", + f"{quote_sheet_name(DWC_SHEET)}!{DWC_RANGE}", + f"{quote_sheet_name(TRUSTED_SHEET)}!{TRUSTED_RANGE}", # Add trusted range + ] + scammer_dwc_processor_map = { + USER_SCAMMER_SHEET: process_user_scammer_data, + SERVER_SCAMMER_SHEET: process_server_scammer_data, + DWC_SHEET: process_dwc_data, + TRUSTED_SHEET: process_trusted_data, # Add trusted processor + } + scammer_dwc_target_key_map = { + USER_SCAMMER_SHEET: "user_scammers", + SERVER_SCAMMER_SHEET: "server_scammers", + DWC_SHEET: "dwc", + TRUSTED_SHEET: "trusted", # Add trusted target key + } + + values_dupes_ranges = [f"{quote_sheet_name(DUPE_LIST_SHEET)}!{DUPE_LIST_RANGE}"] + values_dupes_ranges.extend([f"{quote_sheet_name(cat)}!{VALUES_RANGE}" for cat in CATEGORIES]) + + # --- Define Fetch Tasks --- + fetch_tasks = { + "scammer_dwc_batch": fetch_batch_ranges_async( + SCAMMER_DWC_SPREADSHEET_ID, + scammer_dwc_ranges, + value_render_option='FORMATTED_VALUE' + ), + } + + # Execute scammer/dwc batch first + results = await asyncio.gather(*fetch_tasks.values(), return_exceptions=True) + task_keys = list(fetch_tasks.keys()) + + # Add delay between sheet updates + await asyncio.sleep(SHEET_UPDATE_DELAY_SECONDS) + + # Now fetch values/dupes batch + fetch_tasks = { + "values_dupes_batch": fetch_batch_ranges_async( + VALUES_DUPE_SPREADSHEET_ID, + values_dupes_ranges, + value_render_option='FORMATTED_VALUE' + ) + } + + # Execute values/dupes batch + values_results = await asyncio.gather(*fetch_tasks.values(), return_exceptions=True) + task_keys.extend(list(fetch_tasks.keys())) + results.extend(values_results) + + # --- Process Results --- + raw_scammer_dwc_results = None + raw_values_dupes_results = None + + for i, result in enumerate(results): + key = task_keys[i] + if isinstance(result, Exception): + logger.error(f"Failed to fetch batch data for {key}: {result}") + current_errors[key] = str(result) + # If fetch failed, likely service unavailable (handled by fetch_batch_ranges_async) + # No need to explicitly set cache["service_available"] = False here again + else: + if key == "scammer_dwc_batch": + raw_scammer_dwc_results = result + elif key == "values_dupes_batch": + raw_values_dupes_results = result + + # --- Process Scammer/DWC Results --- + if raw_scammer_dwc_results is not None: + logger.info(f"Processing {len(raw_scammer_dwc_results)} valueRanges from Scammer/DWC sheet...") + for vr in raw_scammer_dwc_results: + range_str = vr.get('range', '') + match = re.match(r"^'?([^'!]+)'?!", range_str) + if not match: + logger.warning(f"Could not extract sheet name from range '{range_str}' in Scammer/DWC response.") + continue + sheet_name = match.group(1).replace("''", "'") + + if sheet_name in scammer_dwc_processor_map: + processor = scammer_dwc_processor_map[sheet_name] + target_key = scammer_dwc_target_key_map[sheet_name] + values = vr.get('values', []) + try: + processed_data = processor(values) + new_cache_data[target_key] = processed_data # Store fetched data temporarily + logger.info(f"Processed {len(processed_data)} items for {sheet_name} -> {target_key}") + except Exception as e: + logger.error(f"Error processing data for {sheet_name} using {processor.__name__}: {e}", exc_info=True) + current_errors[f"process_{target_key}"] = str(e) + else: + logger.warning(f"No processor found for sheet name '{sheet_name}' derived from range '{range_str}' in Scammer/DWC sheet.") + else: + logger.warning("Skipping Scammer/DWC processing due to fetch error.") + + + # --- Process Values/Dupes Results --- + if raw_values_dupes_results is not None: + logger.info(f"Processing {len(raw_values_dupes_results)} valueRanges from Values/Dupes sheet...") + for vr in raw_values_dupes_results: + range_str = vr.get('range', '') + match = re.match(r"^'?([^'!]+)'?!", range_str) + if not match: + logger.warning(f"Could not extract sheet name from range '{range_str}' in Values/Dupes response.") + continue + sheet_name = match.group(1).replace("''", "'") + + values = vr.get('values', []) + try: + if sheet_name == DUPE_LIST_SHEET: + processed_data = process_dupe_list_data(values) + new_cache_data["dupes"] = processed_data # Store fetched data temporarily + logger.info(f"Processed {len(processed_data)} items for {DUPE_LIST_SHEET} -> dupes") + elif sheet_name in CATEGORIES: + processed_data = process_sheet_data(values) + fetched_values_categories[sheet_name] = processed_data # Store fetched data temporarily + logger.info(f"Processed {len(processed_data)} items for Category: {sheet_name}") + else: + logger.warning(f"Unrecognized sheet name '{sheet_name}' derived from range '{range_str}' in Values/Dupes sheet.") + except Exception as e: + target_key = "dupes" if sheet_name == DUPE_LIST_SHEET else f"values_{sheet_name}" + logger.error(f"Error processing data for {sheet_name}: {e}", exc_info=True) + current_errors[f"process_{target_key}"] = str(e) + else: + logger.warning("Skipping Values/Dupes processing due to fetch error.") + + # --- Fetch Roblox Avatars (for new data before comparison/webhook) --- + if not current_errors.get("scammer_dwc_batch") and \ + not current_errors.get("process_user_scammers") and \ + not current_errors.get("process_dwc") and \ + not current_errors.get("process_trusted"): # Check trusted processing too + logger.info("Fetching Roblox avatars for newly processed data...") + avatar_tasks = [] + entries_needing_avatars = new_cache_data.get("user_scammers", []) + new_cache_data.get("dwc", []) + new_cache_data.get("trusted", []) # Include trusted list + for entry in entries_needing_avatars: + if entry.get('roblox_username'): + avatar_tasks.append(fetch_avatar_for_entry_update(session, entry)) + if avatar_tasks: + await asyncio.gather(*avatar_tasks) # Exceptions logged within helper + logger.info(f"Finished fetching avatars for {len(avatar_tasks)} potential new entries.") + else: + logger.warning("Skipping avatar fetching due to errors in fetching/processing scammer/dwc/trusted data.") + + + # --- Change Detection & Webhook Preparation (ONLY if cache is ready) --- + current_time = datetime.now(timezone.utc) + timestamp_iso = current_time.isoformat() + detected_value_changes_for_api = {} # Always calculate for API, but only send webhooks if ready + + # Perform comparisons regardless of cache readiness to populate detected_value_changes_for_api + # But only queue webhooks if cache["is_ready"] is True + + # 1. Value Changes Calculation + if "values" not in cache: cache["values"] = {} # Ensure exists for comparison logic + if "values_dupes_batch" not in current_errors and not any(k.startswith("process_values_") for k in current_errors): + fields_to_compare = ['value', 'dupedValue', 'marketValue'] + for category, new_items in fetched_values_categories.items(): + old_items_dict = {item['name'].lower(): item for item in cache["values"].get(category, [])} # Use lower case for comparison robustness + category_changes_for_api = [] + + for new_item in new_items: + item_name = new_item.get('name') + if not item_name or item_name == 'N/A': continue + item_name_lower = item_name.lower() + + old_item = old_items_dict.get(item_name_lower) + change_detected_for_webhook = False + change_info_webhook = {} + + if old_item: # Check existing item for changes + for field in fields_to_compare: + old_val_str = old_item.get(field, 'N/A') + new_val_str = new_item.get(field, 'N/A') + # Simple string comparison is sufficient here as they are formatted consistently + if old_val_str != new_val_str: + logger.info(f"Change detected in {category}: {item_name} - {field}: '{old_val_str}' -> '{new_val_str}'") + change_info = { + "item_name": item_name, "field": field, + "old_value": old_val_str if old_val_str is not None else "N/A", + "new_value": new_val_str if new_val_str is not None else "N/A", + "timestamp": timestamp_iso + } + category_changes_for_api.append(change_info) + change_detected_for_webhook = True + change_info_webhook = change_info # Store last change for potential webhook + else: + # This is a new item, but don't add lastUpdated field here + pass + + # Prepare webhook only if a change was found AND cache was ready + if change_detected_for_webhook and cache["is_ready"] and VALUE_WEBHOOK_URL: + embed = { + "title": f"Value Update: {item_name} ({category})", + "color": 3447003, # Blue + "fields": [ + {"name": "Field Changed", "value": change_info_webhook['field'], "inline": True}, + {"name": "Old Value", "value": f"`{change_info_webhook['old_value']}`", "inline": True}, + {"name": "New Value", "value": f"`{change_info_webhook['new_value']}`", "inline": True}, + {"name": "Item Notes", "value": new_item.get('notes', 'N/A')[:1020] or 'N/A', "inline": False}, # Limit notes length + ], + "timestamp": timestamp_iso + } + if new_item.get('icon'): + embed["thumbnail"] = {"url": new_item['icon']} + webhook_tasks.append(send_webhook_notification(session, VALUE_WEBHOOK_URL, embed)) + + if category_changes_for_api: + detected_value_changes_for_api[category] = category_changes_for_api + if cache["is_ready"] and VALUE_WEBHOOK_URL: + logger.info(f"Prepared {len(webhook_tasks)} value change webhooks.") + elif not VALUE_WEBHOOK_URL: + logger.info("VALUE_WEBHOOK_URL not set, skipping value change webhook sending.") + elif not cache["is_ready"]: + logger.info("Skipping value change webhook sending during initial cache population.") + else: + logger.warning("Skipping value change detection and webhooks due to fetch/processing errors.") + + + # 2. New Scammers / DWC (Only if cache is ready) + if cache["is_ready"] and SCAMMER_WEBHOOK_URL and \ + "scammer_dwc_batch" not in current_errors and \ + not any(k.startswith("process_") and k in ["process_user_scammers", "process_server_scammers", "process_dwc"] for k in current_errors): + + logger.info("Detecting new scammer/DWC entries for webhooks (cache is ready)...") + initial_webhook_task_count = len(webhook_tasks) + added_new_scammer_webhook = False + + # --- User Scammers --- + # Create keys robust to None values + def get_user_scammer_key(item): + return (item.get('discord_id') or 'none', item.get('roblox_username') or 'none') + old_user_keys = set(get_user_scammer_key(item) for item in cache.get("user_scammers", [])) + + for item in new_cache_data.get("user_scammers", []): + key = get_user_scammer_key(item) + if key not in old_user_keys: + logger.info(f"New User Scammer detected: Discord={item.get('discord_id')}, Roblox={item.get('roblox_username')}") + embed = { + "title": "🚨 New User Scammer Added", "color": 15158332, # Red + "fields": [ + {"name": "Discord ID", "value": f"`{item.get('discord_id', 'N/A')}`", "inline": True}, + {"name": "Roblox User", "value": f"`{item.get('roblox_username', 'N/A')}`", "inline": True}, + {"name": "Scam Type", "value": item.get('scam_type', 'N/A')[:1020] or 'N/A', "inline": False}, + {"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, + ], "timestamp": timestamp_iso + } + if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) + if item.get('alt_accounts'): embed["fields"].append({"name": "Alt Accounts", "value": ", ".join([f"`{a}`" for a in item['alt_accounts']])[:1020] or 'N/A', "inline": False}) # Limit length + if item.get('roblox_avatar_url'): embed["thumbnail"] = {"url": item['roblox_avatar_url']} + webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) + added_new_scammer_webhook = True + + # --- Server Scammers --- + def get_server_scammer_key(item): + return (item.get('server_id') or 'none', item.get('server_name') or 'none') + old_server_keys = set(get_server_scammer_key(item) for item in cache.get("server_scammers", [])) + + for item in new_cache_data.get("server_scammers", []): + key = get_server_scammer_key(item) + if key not in old_server_keys: + logger.info(f"New Server Scammer detected: ID={item.get('server_id')}, Name={item.get('server_name')}") + embed = { + "title": "🚨 New Server Scammer Added", "color": 15158332, # Red + "fields": [ + {"name": "Server ID", "value": f"`{item.get('server_id', 'N/A')}`", "inline": True}, + {"name": "Server Name", "value": f"`{item.get('server_name', 'N/A')}`", "inline": True}, + {"name": "Scam Type", "value": item.get('scam_type', 'N/A')[:1020] or 'N/A', "inline": False}, + {"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, + ], "timestamp": timestamp_iso + } + if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) + webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) + added_new_scammer_webhook = True + + # --- DWC Entries --- + def get_dwc_key(item): + # Use a combination of available identifiers as the key + return ( + item.get('discord_user_id') or 'none', + item.get('discord_server_id') or 'none', + item.get('roblox_username') or 'none' + ) + old_dwc_keys = set(get_dwc_key(item) for item in cache.get("dwc", [])) + + for item in new_cache_data.get("dwc", []): + key = get_dwc_key(item) + if key not in old_dwc_keys: + logger.info(f"New DWC Entry detected: User={item.get('discord_user_id')}, Server={item.get('discord_server_id')}, Roblox={item.get('roblox_username')}") + embed = { + "title": "⚠️ New DWC Entry Added", "color": 15105570, # Orange/Dark Yellow + "fields": [ + {"name": "Discord User ID", "value": f"`{item.get('discord_user_id', 'N/A')}`", "inline": True}, + {"name": "Discord Server ID", "value": f"`{item.get('discord_server_id', 'N/A')}`", "inline": True}, + {"name": "Roblox User", "value": f"`{item.get('roblox_username', 'N/A')}`", "inline": True}, + {"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, + ], "timestamp": timestamp_iso + } + if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) + if item.get('alt_accounts'): embed["fields"].append({"name": "Alt Accounts", "value": ", ".join([f"`{a}`" for a in item['alt_accounts']])[:1020] or 'N/A', "inline": False}) + if item.get('roblox_avatar_url'): embed["thumbnail"] = {"url": item['roblox_avatar_url']} + webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) + added_new_scammer_webhook = True + + if added_new_scammer_webhook: + logger.info(f"Prepared {len(webhook_tasks) - initial_webhook_task_count} new scammer/DWC webhooks.") + + elif not cache["is_ready"]: + logger.info("Skipping new scammer webhook detection during initial cache population.") + elif not SCAMMER_WEBHOOK_URL: + logger.info("SCAMMER_WEBHOOK_URL not set, skipping new scammer webhook detection.") + else: # Errors occurred + logger.warning("Skipping new scammer webhook detection due to fetch/processing errors.") + + + # --- Send Webhooks Concurrently --- + if webhook_tasks: + logger.info(f"Sending {len(webhook_tasks)} webhook notifications...") + await asyncio.gather(*webhook_tasks) + logger.info("Finished sending webhook notifications.") + else: + logger.info("No webhooks prepared to send for this cycle.") + + + # --- Final Cache Update --- + update_occurred = False + # Determine if this cycle *should* mark the cache as ready + can_set_ready = not cache["is_ready"] and not current_errors # Only set ready on first *fully successful* run + + if not current_errors: # Perfect cycle + logger.info("Updating full cache (no errors during fetch or processing).") + cache["values"] = fetched_values_categories + cache["user_scammers"] = new_cache_data["user_scammers"] + cache["server_scammers"] = new_cache_data["server_scammers"] + cache["dwc"] = new_cache_data["dwc"] + cache["trusted"] = new_cache_data["trusted"] + cache["dupes"] = new_cache_data["dupes"] + cache["value_changes"] = detected_value_changes_for_api # Store the detected changes + cache["last_updated"] = current_time + if can_set_ready: + logger.info("Marking cache as ready after initial successful population.") + cache["is_ready"] = True + cache["service_available"] = True # Mark as available on success + update_occurred = True + logger.info(f"Cache update cycle completed successfully.") + else: # Errors occurred, attempt partial update + logger.warning(f"Cache update cycle completed with errors: {current_errors}. Attempting partial update.") + partial_update_details = [] + + # Update values only if the values/dupes batch succeeded AND processing succeeded + if "values_dupes_batch" not in current_errors and not any(k.startswith("process_values_") for k in current_errors): + if cache.get("values") != fetched_values_categories: + cache["values"] = fetched_values_categories + cache["value_changes"] = detected_value_changes_for_api # Update changes along with values + partial_update_details.append("values") + update_occurred = True + else: + logger.warning("Skipping update for 'values' due to errors.") + + # Update dupes only if the values/dupes batch succeeded AND processing succeeded + if "values_dupes_batch" not in current_errors and "process_dupes" not in current_errors: + if cache.get("dupes") != new_cache_data["dupes"]: + cache["dupes"] = new_cache_data["dupes"] + partial_update_details.append("dupes") + update_occurred = True + else: + logger.warning("Skipping update for 'dupes' due to errors.") + + # Update scammer/DWC sections if their batch succeeded AND processing succeeded + if "scammer_dwc_batch" not in current_errors: + for key in ["user_scammers", "server_scammers", "dwc", "trusted"]: # Include "trusted" in this list + process_error_key = f"process_{key}" + if process_error_key not in current_errors: + if cache.get(key) != new_cache_data[key]: + cache[key] = new_cache_data[key] + partial_update_details.append(key) + update_occurred = True + else: + logger.warning(f"Skipping update for '{key}' due to processing error.") + else: + logger.warning("Skipping update for 'user_scammers', 'server_scammers', 'dwc', 'trusted' due to batch fetch error.") + + if update_occurred: + cache["last_updated"] = current_time # Mark partial update time + # Mark cache ready only if it was *already* ready and we managed a partial update + # Or if this was the first run AND it was partially successful (maybe relax this?) + # Let's stick to: only mark ready on first FULL success. + if cache["is_ready"]: # If it was already ready, keep it ready + logger.info(f"Partially updated cache sections: {', '.join(partial_update_details)}. Cache remains ready.") + else: + logger.info(f"Partially updated cache sections: {', '.join(partial_update_details)}. Cache remains NOT ready (requires full success on first run).") + # Keep service_available based on whether fetch errors occurred + else: + logger.error(f"Cache update cycle failed, and no parts could be updated based on errors. Errors: {current_errors}") + # Cache readiness and service availability remain unchanged + + except Exception as e: + logger.exception(f"Critical error during cache update cycle: {e}") + # If a critical error happens (e.g., network error during fetch), mark service potentially unavailable + if isinstance(e, (aiohttp.ClientError, HttpError, asyncio.TimeoutError)): + logger.warning("Communication error detected, will re-check service availability next cycle.") + # service_available might have already been set to False by fetch_batch_ranges_async + + # --- Wait for the next cycle --- + end_time = datetime.now(timezone.utc) + duration = (end_time - start_time).total_seconds() + wait_time = max(10, CACHE_UPDATE_INTERVAL_SECONDS - duration) # Ensure at least 10s wait + logger.info(f"Cache update cycle duration: {duration:.2f}s. Waiting {wait_time:.2f}s for next cycle.") + await asyncio.sleep(wait_time) + + +async def fetch_avatar_for_entry_update(session: aiohttp.ClientSession, entry: dict): + """Fetches avatar and updates the provided entry dictionary IN PLACE.""" + roblox_username = entry.get('roblox_username') + if not roblox_username: return + + new_avatar = None # Default to None + try: + user_id = await get_roblox_user_id(session, roblox_username) + if user_id: + new_avatar = await get_roblox_avatar_url(session, user_id) + # else: # User ID not found, keep avatar as None + # logger.debug(f"Roblox user ID not found for username: {roblox_username}") + + except Exception as e: + # Log errors but don't stop the main update loop + logger.warning(f"Failed to fetch avatar for {roblox_username}: {e}") + # Keep new_avatar as None on error + + finally: + # Update the entry dict directly + entry['roblox_avatar_url'] = new_avatar + + +# --- FastAPI Startup Event --- +@app.on_event("startup") +async def startup_event(): + """Starts the background cache update task.""" + if not cache["service_available"]: + logger.warning("Google Sheets service not available at startup. Will attempt re-init in background task.") + logger.info("Starting background cache update task...") + # Check for webhook URLs at startup + if not SCAMMER_WEBHOOK_URL: + logger.warning("SCAMMER_WEBHOOK_URL environment variable not set. New scammer notifications disabled.") + if not VALUE_WEBHOOK_URL: + logger.warning("VALUE_WEBHOOK_URL environment variable not set. Value change notifications disabled.") + if not DUPE_CHECK_WEBHOOK_URL: + logger.warning("WEBHOOK_URL (for dupe checks) environment variable not set. Dupe check notifications disabled.") + asyncio.create_task(update_cache_periodically()) + + +# --- API Endpoints --- + +def check_cache_readiness(): + """Reusable check for API endpoints - Checks cache readiness""" + if not cache["is_ready"]: + # Be more specific if service is known to be down + if not cache["service_available"]: + raise HTTPException(status_code=503, detail="Service temporarily unavailable due to backend connection issues. Please try again later.") + else: + raise HTTPException(status_code=503, detail="Cache is initializing or data is currently unavailable. Please try again shortly.") + + +@app.get("/") +async def root(): + return {"message": "JVC API - Running"} + +@app.get("/api/status") +async def get_status(): + """Returns the current status of the cache and service availability""" + last_updated_iso = cache["last_updated"].isoformat() if cache["last_updated"] else None + return { + "cache_ready": cache["is_ready"], + "sheets_service_available": cache["service_available"], + "last_updated": last_updated_iso, + "cached_items": { + "value_categories": len(cache.get("values", {})), + "user_scammers": len(cache.get("user_scammers", [])), + "server_scammers": len(cache.get("server_scammers", [])), + "dwc_entries": len(cache.get("dwc", [])), + "trusted_entries": len(cache.get("trusted", [])), # Add trusted count + "duped_usernames": len(cache.get("dupes", [])), + }, + "value_change_categories_in_last_cycle": len(cache.get("value_changes", {})) + } + +@app.get("/api/values") +async def get_values(): + """Get all values data from cache""" + check_cache_readiness() + return cache.get("values", {}) + +@app.get("/api/values/{category}") +async def get_category_values(category: str): + """Get values data for a specific category from cache""" + check_cache_readiness() + # Case-insensitive matching for category name + matched_category = next((c for c in cache.get("values", {}).keys() if c.lower() == category.lower()), None) + if not matched_category: + # Check if the category *exists* conceptually even if empty + valid_categories_lower = [c.lower() for c in CATEGORIES] + if category.lower() in valid_categories_lower: + return {category: []} # Return empty list if category is valid but has no items yet + else: + raise HTTPException(status_code=404, detail=f"Category '{category}' not found.") + return {matched_category: cache.get("values", {}).get(matched_category, [])} + + +@app.get("/api/value-changes/{category}") +async def get_category_value_changes(category: str): + """Get detected value changes for a specific category from the last cache update cycle.""" + check_cache_readiness() + # Case-insensitive matching for category name + matched_category = next((c for c in cache.get("value_changes", {}).keys() if c.lower() == category.lower()), None) + if not matched_category: + # Check if the category *exists* conceptually even if empty + valid_categories_lower = [c.lower() for c in CATEGORIES] + if category.lower() in valid_categories_lower: + return {category: []} # Return empty list if category is valid but had no changes + else: + raise HTTPException(status_code=404, detail=f"Category '{category}' not found.") + return {matched_category: cache.get("value_changes", {}).get(matched_category, [])} + +@app.get("/api/value-changes") +async def get_all_value_changes(): + """Get all detected value changes from the last cache update cycle.""" + check_cache_readiness() + return cache.get("value_changes", {}) + +@app.get("/api/scammers") +async def get_scammers(): + """Get all scammer, DWC, and trusted data (users, servers, dwc, trusted) from cache""" + check_cache_readiness() + return { + "users": cache.get("user_scammers", []), + "servers": cache.get("server_scammers", []), + "dwc": cache.get("dwc", []), + "trusted": cache.get("trusted", []) # Include trusted list + } + +@app.get("/api/dupes") +async def get_dupes(): + """Get all duped usernames from cache""" + check_cache_readiness() + return {"usernames": cache.get("dupes", [])} # Return empty list if not ready or empty + + +class UsernameCheck(BaseModel): + username: str + +@app.post("/api/check") +async def check_username(data: UsernameCheck): + """Check if a username is duped using cached data and optionally send webhook""" + check_cache_readiness() # Use the standard readiness check + + username_to_check = data.username.strip().lower() + # Ensure dupes list is populated before checking + dupes_list = cache.get("dupes", []) + is_duped = username_to_check in dupes_list + + # Webhook notification for checks resulting in "Not Found" + if not is_duped: + if DUPE_CHECK_WEBHOOK_URL: + async def send_check_webhook(): + try: + # Use a short-lived session for this potentially frequent task + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session: + embed = { + "title": "User Dupe Check - Not Found", + "description": f"Username `{data.username}` was checked against the dupe list but was **not** found.", + "color": 16776960, # Yellow + "timestamp": datetime.now(timezone.utc).isoformat() + } + await send_webhook_notification(session, DUPE_CHECK_WEBHOOK_URL, embed) + except Exception as e: + logger.error(f"Error sending dupe check webhook: {e}") + + asyncio.create_task(send_check_webhook()) # Fire and forget + else: + logger.info("WEBHOOK_URL (for dupe checks) not configured. Skipping notification.") + + return {"username": data.username, "is_duped": is_duped} + + +@app.get("/health") +def health_check(): + """Provides a health status of the API and its cache.""" + now = datetime.now(timezone.utc) + status_detail = {"status": "ok", "last_updated": None, "time_since_update_seconds": None} + + if cache["last_updated"]: + status_detail["last_updated"] = cache["last_updated"].isoformat() + time_since_update = (now - cache["last_updated"]).total_seconds() + status_detail["time_since_update_seconds"] = round(time_since_update) + else: + status_detail["last_updated"] = None + status_detail["time_since_update_seconds"] = None + + + if not cache["is_ready"]: + status_detail["status"] = "initializing" + status_detail["reason"] = "Cache has not been populated yet." + return status_detail + + if not cache["service_available"]: + status_detail["status"] = "degraded" + status_detail["reason"] = "Google Sheets service connection issue detected on last attempt." + return status_detail + + # Check for staleness only if the cache is ready and service *was* available last check + # Allow some grace period (e.g., 3 intervals) + if cache["last_updated"] and time_since_update > CACHE_UPDATE_INTERVAL_SECONDS * 3: + status_detail["status"] = "degraded" + status_detail["reason"] = f"Cache potentially stale (last update > {CACHE_UPDATE_INTERVAL_SECONDS * 3} seconds ago)" + return status_detail + + # If we reach here, status is 'ok' + return status_detail \ No newline at end of file