Spaces:
Running
Running
import socket | |
socket.setdefaulttimeout(4000) | |
from fastapi import FastAPI, HTTPException | |
from fastapi.middleware.cors import CORSMiddleware | |
from pydantic import BaseModel | |
from typing import Optional, Any, Dict, List | |
import aiohttp | |
import os | |
from datetime import datetime, timezone | |
import json | |
import re | |
from google.oauth2.service_account import Credentials as ServiceAccountCredentials | |
from googleapiclient.discovery import build | |
from googleapiclient.errors import HttpError | |
from dotenv import load_dotenv | |
import asyncio | |
import logging | |
# --- Logging Setup --- | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
app = FastAPI() | |
# --- Configuration --- | |
load_dotenv() | |
# CORS | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], # Consider restricting in production | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Google Sheets Config | |
# Spreadsheet containing Scammer and DWC info | |
SCAMMER_DWC_SPREADSHEET_ID = os.getenv('SCAMMER_DWC_SPREADSHEET_ID', '1sgkhBNGw_r6tBIxvdeXaI0bVmWBeACN4jiw_oDEeXLw') | |
# Spreadsheet containing Value lists and Dupe list | |
VALUES_DUPE_SPREADSHEET_ID = os.getenv('VALUES_DUPE_SPREADSHEET_ID', '1Toe07o3P517q8sm9Qb1e5xyFWCuwgskj71IKJwJNfNU') | |
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly'] | |
# Sheet Names and Ranges within SCAMMER_DWC_SPREADSHEET_ID | |
USER_SCAMMER_SHEET = "User Scammer Files" | |
USER_SCAMMER_RANGE = "B6:G" | |
SERVER_SCAMMER_SHEET = "Server Scammer Files" | |
SERVER_SCAMMER_RANGE = "B6:F" | |
DWC_SHEET = "DWC Servers / Users" | |
DWC_RANGE = "B6:G" | |
TRUSTED_SHEET = "Trusted Users / Servers" | |
TRUSTED_RANGE = "B6:E" | |
# Sheet Names and Ranges within VALUES_DUPE_SPREADSHEET_ID | |
DUPE_LIST_SHEET = "Dupe List" | |
DUPE_LIST_RANGE = "B2:B" | |
# Value Categories (Sheet Names) | |
CATEGORIES = [ | |
"Vehicles", "Textures", "Colours", "Spoilers", | |
"Rims", "Furnitures", "Gun Skins", "Hyperchromes" | |
] | |
VALUES_RANGE = 'B6:R' # Range within each category sheet including column R for lastUpdated | |
# Cache Update Interval | |
CACHE_UPDATE_INTERVAL_SECONDS = 60 | |
# Sheet Update Delay | |
SHEET_UPDATE_DELAY_SECONDS = 10 # 10 second delay between sheet updates | |
# Webhook URLs | |
SCAMMER_WEBHOOK_URL = os.getenv("SCAMMER_WEBHOOK_URL") | |
VALUE_WEBHOOK_URL = os.getenv("VALUE_WEBHOOK_URL") | |
DUPE_CHECK_WEBHOOK_URL = os.getenv("DUPE_CHECK_WEBHOOK_URL") | |
# --- Global Cache --- | |
cache = { | |
"values": {}, # Dict mapping category name to list of items | |
"value_changes": {}, # Dict mapping category name to list of recent changes (for API endpoint) | |
"user_scammers": [], | |
"server_scammers": [], | |
"dwc": [], | |
"trusted": [], # New cache key for trusted entries | |
"dupes": [], # List of duped usernames | |
"last_updated": None, # Timestamp of the last successful/partial update | |
"is_ready": False, # Is the cache populated at least once? | |
"service_available": True # Is the Google Sheets service reachable? | |
} | |
# --- Google Sheets Initialization --- | |
sheets_service = None # Initialize as None | |
def quote_sheet_name(name: str) -> str: | |
"""Adds single quotes around a sheet name if it needs them.""" | |
if not name: | |
return "''" | |
# Simple check: if it contains spaces or non-alphanumeric chars (excluding _) | |
if not re.match(r"^[a-zA-Z0-9_]+$", name): | |
# Escape existing single quotes within the name | |
escaped_name = name.replace("'", "''") | |
return f"'{escaped_name}'" | |
return name | |
def init_google_sheets(scopes=SCOPES): | |
"""Initialize Google Sheets credentials from environment variable or file""" | |
global sheets_service, cache | |
try: | |
creds_json_str = os.getenv('CREDENTIALS_JSON') | |
if creds_json_str: | |
logger.info("Attempting to load credentials from CREDENTIALS_JSON environment variable.") | |
creds_json = json.loads(creds_json_str) | |
creds = ServiceAccountCredentials.from_service_account_info( | |
creds_json, | |
scopes=scopes | |
) | |
sheets_service = build('sheets', 'v4', credentials=creds, cache_discovery=False) # Disable discovery cache | |
logger.info("Google Sheets service initialized successfully from ENV VAR.") | |
cache["service_available"] = True | |
return sheets_service | |
else: | |
logger.info("CREDENTIALS_JSON environment variable not found. Falling back to file.") | |
raise ValueError("CREDENTIALS_JSON not set") # Trigger fallback explicitly | |
except Exception as e: | |
logger.warning(f"Error initializing Google Sheets from ENV VAR: {e}. Trying file...") | |
try: | |
# Fallback to loading credentials from file 'credentials.json' | |
creds_file = 'credentials.json' | |
if os.path.exists(creds_file): | |
logger.info(f"Attempting to load credentials from file '{creds_file}'") | |
creds = ServiceAccountCredentials.from_service_account_file( | |
creds_file, | |
scopes=scopes | |
) | |
sheets_service = build('sheets', 'v4', credentials=creds, cache_discovery=False) | |
logger.info("Google Sheets service initialized successfully from file.") | |
cache["service_available"] = True | |
return sheets_service | |
else: | |
logger.error(f"Credentials file '{creds_file}' not found.") | |
raise FileNotFoundError(f"'{creds_file}' not found") | |
except Exception as file_e: | |
logger.error(f"Error loading credentials from file: {file_e}") | |
logger.critical("Google Sheets service could not be initialized. API will be limited.") | |
cache["service_available"] = False | |
sheets_service = None | |
return None | |
# Initialize on module load | |
init_google_sheets() | |
# --- Helper Functions (Data Extraction & Formatting) --- | |
def extract_drive_id(url): | |
if not url or not isinstance(url, str): return None | |
match = re.search(r'https://drive\.google\.com/file/d/([^/]+)', url) | |
return match.group(1) if match else None | |
def convert_to_thumbnail_url(drive_url): | |
drive_id = extract_drive_id(drive_url) | |
return f"https://drive.google.com/thumbnail?id={drive_id}&sz=w1000" if drive_id else drive_url | |
def extract_image_url(formula, drive_url=None): | |
# Priority to explicit drive_url if provided | |
if drive_url and isinstance(drive_url, str) and 'drive.google.com' in drive_url: | |
return convert_to_thumbnail_url(drive_url) | |
if not formula or not isinstance(formula, str): return '' | |
# Handle direct URLs | |
if formula.startswith('http://') or formula.startswith('https://'): | |
return formula | |
# Handle =IMAGE("...") formula | |
if formula.startswith('=IMAGE('): | |
match = re.search(r'=IMAGE\("([^"]+)"', formula) | |
if match: return match.group(1) | |
return '' # Return empty string if no valid URL found | |
def format_currency(value: Any) -> Optional[str]: | |
if value is None or str(value).strip() == '': return 'N/A' | |
try: | |
num_str = str(value).replace('$', '').replace(',', '').strip() | |
if not num_str or num_str.lower() == 'n/a': return 'N/A' | |
num = float(num_str) | |
return f"${num:,.0f}" | |
except (ValueError, TypeError): | |
# Allow text like "Event", "Unobtainable" etc. to pass through | |
if isinstance(value, str) and value.strip() and not re.match(r'^-?[\d,.$]+\$?$', value.strip()): | |
return value.strip() # Return original text if non-numeric-like | |
return 'N/A' # Return N/A for things that look like bad numbers | |
def parse_cached_currency(value_str: Optional[str]) -> Optional[float]: | |
if value_str is None or str(value_str).strip().lower() == 'n/a': | |
return None | |
try: | |
num_str = str(value_str).replace('$', '').replace(',', '').strip() | |
return float(num_str) | |
except (ValueError, TypeError): | |
return None # Return None if it's not a parsable number (e.g., "Event") | |
def clean_string(value, default='N/A'): | |
if value is None: return default | |
cleaned = str(value).strip() | |
return cleaned if cleaned else default | |
def clean_string_optional(value): | |
if value is None: return None | |
cleaned = str(value).strip() | |
return cleaned if cleaned and cleaned != '-' else None | |
def parse_alt_accounts(value): | |
if value is None: return [] | |
raw_string = str(value).strip() | |
if not raw_string or raw_string == '-': return [] | |
return [acc.strip() for acc in raw_string.split(',') if acc.strip()] | |
# --- Roblox API Helpers --- | |
async def get_roblox_user_id(session: aiohttp.ClientSession, username: str): | |
if not username: return None | |
url = "https://users.roblox.com/v1/usernames/users" | |
payload = {"usernames": [username], "excludeBannedUsers": False} | |
try: | |
# Increased timeout specifically for Roblox API calls which can be slow | |
async with session.post(url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as response: | |
if response.status == 200: | |
data = await response.json() | |
if data and data.get("data") and len(data["data"]) > 0: | |
return data["data"][0].get("id") | |
else: | |
logger.warning(f"Roblox User API returned status {response.status} for username '{username}'") | |
return None | |
except asyncio.TimeoutError: | |
logger.warning(f"Timeout fetching Roblox User ID for {username}") | |
return None | |
except aiohttp.ClientError as e: | |
logger.warning(f"Network error fetching Roblox User ID for {username}: {e}") | |
return None | |
except Exception as e: | |
logger.error(f"Unexpected exception fetching Roblox User ID for {username}: {e}", exc_info=True) | |
return None | |
async def get_roblox_avatar_url(session: aiohttp.ClientSession, user_id: int): | |
if not user_id: return None | |
url = f"https://thumbnails.roblox.com/v1/users/avatar-headshot?userIds={user_id}&size=150x150&format=Png&isCircular=false" | |
try: | |
# Increased timeout specifically for Roblox API calls | |
async with session.get(url, timeout=aiohttp.ClientTimeout(total=10)) as response: | |
if response.status == 200: | |
data = await response.json() | |
if data and data.get("data") and len(data["data"]) > 0: | |
return data["data"][0].get("imageUrl") | |
else: | |
logger.warning(f"Roblox Thumbnail API returned status {response.status} for user ID {user_id}") | |
return None | |
except asyncio.TimeoutError: | |
logger.warning(f"Timeout fetching Roblox avatar for User ID {user_id}") | |
return None | |
except aiohttp.ClientError as e: | |
logger.warning(f"Network error fetching Roblox avatar for User ID {user_id}: {e}") | |
return None | |
except Exception as e: | |
logger.error(f"Unexpected exception fetching Roblox avatar for User ID {user_id}: {e}", exc_info=True) | |
return None | |
# --- Data Processing Functions --- | |
def process_sheet_data(values): # For Value Categories | |
if not values: return [] | |
processed_data = [] | |
for row_idx, row in enumerate(values): # Start counting from sheet row 6 (index 0 here) | |
if not row or not any(str(cell).strip() for cell in row if cell is not None): continue | |
# Skip header-like rows (e.g., "LEVEL 1 | HYPERCHROMES" in column F/index 4) | |
# Use index 4 for Value column (F) | |
if len(row) > 4 and isinstance(row[4], str) and re.search(r'(LEVEL \d+ \|)|(VALUE)', row[4], re.IGNORECASE): | |
#logger.debug(f"Skipping potential header row {row_idx+6}: {row}") | |
continue | |
# Indices based on B6:P (0-indexed from B) | |
icon_formula = row[0] if len(row) > 0 else '' | |
name = row[2] if len(row) > 2 else 'N/A' # Column D | |
value_raw = row[4] if len(row) > 4 else 'N/A' # Column F | |
duped_value_raw = row[6] if len(row) > 6 else 'N/A' # Column H | |
market_value_raw = row[8] if len(row) > 8 else 'N/A' # Column J | |
demand = row[10] if len(row) > 10 else 'N/A' # Column L | |
notes = row[12] if len(row) > 12 else '' # Column N | |
drive_url = row[14] if len(row) > 14 else None # Column P | |
# Extract lastUpdated from column R (index 16) | |
last_updated = row[16] if len(row) > 16 else None # Column R | |
cleaned_name = clean_string(name) | |
# Also skip if name is clearly a header like "Name" | |
if cleaned_name == 'N/A' or cleaned_name.lower() == 'name': | |
#logger.debug(f"Skipping row {row_idx+6} due to missing/header name: {row}") | |
continue | |
processed_item = { | |
'icon': extract_image_url(icon_formula, drive_url), | |
'name': cleaned_name, | |
'value': format_currency(value_raw), | |
'dupedValue': format_currency(duped_value_raw), | |
'marketValue': format_currency(market_value_raw), | |
'demand': clean_string(demand, 'N/A'), | |
'notes': clean_string(notes, '') | |
# Remove lastUpdated field - will only be set when an update is detected | |
} | |
# Add lastUpdated if it exists in the sheet | |
if last_updated and str(last_updated).strip(): | |
processed_item['lastUpdated'] = str(last_updated).strip() | |
processed_data.append(processed_item) | |
return processed_data | |
def process_user_scammer_data(values): # For User Scammer Sheet | |
if not values: return [] | |
processed_data = [] | |
for row in values: # Expected range like B6:G | |
if not row or len(row) < 2: continue | |
# Indices based on B6:G (0-indexed from B) | |
discord_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B | |
roblox_username = clean_string_optional(row[1]) if len(row) > 1 else None # Col C | |
# Skip if both identifiers are missing | |
if not discord_id and not roblox_username: continue | |
# Skip if it looks like a header row | |
if str(discord_id).lower() == 'discord id' or str(roblox_username).lower() == 'roblox username': | |
continue | |
processed_item = { | |
'discord_id': discord_id, | |
'roblox_username': roblox_username, | |
'scam_type': clean_string(row[2]) if len(row) > 2 else 'N/A', # Col D | |
'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E | |
'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None, # Col F | |
'alt_accounts': parse_alt_accounts(row[5]) if len(row) > 5 else [], # Col G | |
'roblox_avatar_url': None # Will be filled later | |
} | |
processed_data.append(processed_item) | |
return processed_data | |
def process_server_scammer_data(values): # For Server Scammer Sheet | |
if not values: return [] | |
processed_data = [] | |
for row in values: # Expected range like B6:F | |
if not row or len(row) < 2: continue | |
# Indices based on B6:F (0-indexed from B) | |
server_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B | |
server_name = clean_string_optional(row[1]) if len(row) > 1 else None # Col C | |
# Skip if both identifiers are missing | |
if not server_id and not server_name: continue | |
# Skip if it looks like a header row | |
if str(server_id).lower() == 'server id' or str(server_name).lower() == 'server name': | |
continue | |
processed_item = { | |
'server_id': server_id, | |
'server_name': server_name, | |
'scam_type': clean_string(row[2]) if len(row) > 2 else 'N/A', # Col D | |
'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E | |
'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None # Col F | |
} | |
processed_data.append(processed_item) | |
return processed_data | |
def process_dwc_data(values): # For DWC Sheet | |
if not values: return [] | |
processed_data = [] | |
for row in values: # Expected range like B6:G | |
if not row or len(row) < 1: continue # Need at least one ID | |
# Indices based on B6:G (0-indexed from B) | |
user_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B | |
server_id = clean_string_optional(row[1]) if len(row) > 1 else None # Col C | |
roblox_user = clean_string_optional(row[2]) if len(row) > 2 else None # Col D | |
# Skip if all identifiers are missing | |
if not user_id and not server_id and not roblox_user: continue | |
# Skip if it looks like a header row | |
if str(user_id).lower() == 'user id' or str(server_id).lower() == 'server id' or str(roblox_user).lower() == 'roblox user': | |
continue | |
processed_item = { | |
'status': 'DWC', | |
'discord_user_id': user_id, | |
'discord_server_id': server_id, | |
'roblox_username': roblox_user, | |
'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', # Col E | |
'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None, # Col F | |
'alt_accounts': parse_alt_accounts(row[5]) if len(row) > 5 else [], # Col G | |
'roblox_avatar_url': None # Will be filled later | |
} | |
processed_data.append(processed_item) | |
return processed_data | |
def process_trusted_data(values): # New function for Trusted Sheet | |
if not values: return [] | |
processed_data = [] | |
for row in values: # Expected range like B6:E | |
if not row or len(row) < 1: continue # Need at least one identifier | |
# Indices based on B6:E (0-indexed from B) | |
discord_user_id = clean_string_optional(row[0]) if len(row) > 0 else None # Col B | |
# discord_username = clean_string_optional(row[1]) if len(row) > 1 else None # Col C - Not currently used for matching, but keep for potential future use | |
discord_server_id = clean_string_optional(row[2]) if len(row) > 2 else None # Col D | |
roblox_username = clean_string_optional(row[3]) if len(row) > 3 else None # Col E | |
# Skip if all relevant identifiers are missing | |
if not discord_user_id and not discord_server_id and not roblox_username: continue | |
# Skip if it looks like a header row (check common header names) | |
if (str(discord_user_id).lower() == 'discord user id' or | |
str(discord_server_id).lower() == 'discord server id' or | |
str(roblox_username).lower() == 'roblox username'): | |
continue | |
processed_item = { | |
'status': 'Trusted', # Add a status field | |
'discord_user_id': discord_user_id, | |
'discord_server_id': discord_server_id, | |
'roblox_username': roblox_username, | |
'roblox_avatar_url': None # Will be filled later if roblox_username exists | |
# Note: No explanation or evidence expected for trusted entries based on B6:E | |
} | |
processed_data.append(processed_item) | |
return processed_data | |
def process_dupe_list_data(values): # For Dupe List Sheet | |
if not values: return [] | |
# Expected range like B2:B | |
processed_dupes = [] | |
for row in values: | |
if row and len(row)>0 and row[0] and isinstance(row[0], str): | |
username = row[0].strip().lower() | |
# Skip header or empty strings | |
if username and username not in ('username', 'usernames'): | |
processed_dupes.append(username) | |
return processed_dupes | |
# --- Async Fetching Functions --- | |
async def fetch_batch_ranges_async(spreadsheet_id: str, ranges: List[str], value_render_option: str = 'FORMATTED_VALUE') -> List[Dict]: | |
"""Async wrapper to fetch multiple ranges using batchGet and return raw valueRanges.""" | |
global sheets_service, cache | |
if not sheets_service: | |
logger.warning(f"Attempted batch fetch from {spreadsheet_id} but Sheets service is unavailable.") | |
raise Exception("Google Sheets service not initialized") | |
if not ranges: | |
logger.warning(f"Batch fetch called with empty ranges for {spreadsheet_id}.") | |
return [] | |
try: | |
logger.info(f"Fetching batch ranges from {spreadsheet_id}: {ranges}") | |
loop = asyncio.get_event_loop() | |
result = await loop.run_in_executor( | |
None, | |
lambda: sheets_service.spreadsheets().values().batchGet( | |
spreadsheetId=spreadsheet_id, | |
ranges=ranges, | |
valueRenderOption=value_render_option, | |
majorDimension='ROWS' | |
).execute() | |
) | |
value_ranges = result.get('valueRanges', []) | |
logger.info(f"Successfully fetched batch data for {len(value_ranges)} ranges from {spreadsheet_id}.") | |
return value_ranges # Return the raw list of valueRange objects | |
except HttpError as e: | |
status_code = e.resp.status | |
error_details = {} | |
try: | |
error_details = json.loads(e.content).get('error', {}) | |
except json.JSONDecodeError: | |
logger.error(f"Failed to parse JSON error content from Google API: {e.content}") | |
status = error_details.get('status', f'HTTP_{status_code}') # Use HTTP status if details missing | |
message = error_details.get('message', e._get_reason()) # Fallback message | |
logger.error(f"Google API HTTP Error during batch fetch for {spreadsheet_id}: Status={status}, Message={message}") | |
if status in ('PERMISSION_DENIED', 'UNAUTHENTICATED') or status_code == 403 or status_code == 401: | |
logger.critical(f"Authentication/Permission Error accessing {spreadsheet_id}. Disabling service checks.") | |
cache["service_available"] = False # Mark service as down | |
sheets_service = None # Reset service to force re-init attempt | |
elif status == 'NOT_FOUND' or status_code == 404: | |
logger.error(f"Spreadsheet or Range not found error for {spreadsheet_id}. Ranges: {ranges}. Check IDs and Sheet Names.") | |
elif status_code >= 500: # Server-side errors on Google's end | |
logger.warning(f"Google API server error ({status_code}) for {spreadsheet_id}. May be temporary.") | |
# Keep service_available as True, retry might work | |
# else: # Other client errors (e.g., 400 Bad Request for invalid range format) | |
raise e # Re-raise after logging | |
except Exception as e: | |
logger.error(f"Error during batch fetching from {spreadsheet_id} for ranges {ranges}: {e}", exc_info=True) | |
# Could be network issues, timeouts handled by aiohttp session typically | |
# Consider marking service unavailable for persistent non-HTTP errors too? | |
# cache["service_available"] = False # Optional: Be more aggressive | |
raise e | |
# --- Webhook Sending --- | |
async def send_webhook_notification(session: aiohttp.ClientSession, webhook_url: str, embed: Dict): | |
"""Sends a Discord webhook notification with the provided embed.""" | |
if not webhook_url: | |
# logger.debug("Webhook URL not configured. Skipping notification.") | |
return | |
if not embed: | |
logger.warning("Attempted to send webhook with empty embed.") | |
return | |
webhook_data = {"embeds": [embed]} | |
try: | |
# Use a reasonable timeout for webhook posts | |
async with session.post(webhook_url, json=webhook_data, timeout=aiohttp.ClientTimeout(total=10)) as response: | |
if response.status not in [200, 204]: | |
# Log more details on failure | |
response_text = await response.text() | |
logger.warning(f"Failed to send webhook to {webhook_url[:30]}... (Status: {response.status}): {response_text[:500]}") # Limit response text length | |
# else: | |
# logger.debug(f"Webhook notification sent successfully to {webhook_url[:30]}...") | |
except asyncio.TimeoutError: | |
logger.warning(f"Timeout sending webhook to {webhook_url[:30]}...") | |
except aiohttp.ClientError as e: | |
logger.error(f"Network error sending webhook to {webhook_url[:30]}...: {e}") | |
except Exception as e: | |
logger.error(f"Unexpected error sending webhook: {e}", exc_info=True) | |
# --- Background Cache Update Task --- | |
async def update_cache_periodically(): | |
"""Fetches data, processes, detects changes/new entries (if not first run), sends webhooks, and updates cache.""" | |
global cache | |
# Increase overall session timeout slightly for robustness | |
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=45)) as session: | |
while True: | |
if not cache["service_available"]: | |
logger.info("Attempting to re-initialize Google Sheets service...") | |
init_google_sheets() | |
if not cache["service_available"]: | |
logger.warning("Google Sheets service still unavailable, skipping cache update cycle.") | |
await asyncio.sleep(CACHE_UPDATE_INTERVAL_SECONDS * 2) # Wait longer if service is down | |
continue | |
else: | |
logger.info("Google Sheets service re-initialized. Proceeding with cache update.") | |
logger.info(f"Starting cache update cycle... (Cache Ready: {cache['is_ready']})") | |
start_time = datetime.now(timezone.utc) | |
webhook_tasks = [] # Store webhook sending tasks | |
# Prepare temporary storage for fetched data | |
fetched_values_categories = {} # { "CategoryName": [items...] } | |
new_cache_data = { | |
"user_scammers": [], | |
"server_scammers": [], | |
"dwc": [], | |
"trusted": [], # Add trusted key | |
"dupes": [], | |
} | |
current_errors = {} # Track errors for specific fetches/sheets | |
try: | |
# --- Define Ranges and Processors --- | |
scammer_dwc_ranges = [ | |
f"{quote_sheet_name(USER_SCAMMER_SHEET)}!{USER_SCAMMER_RANGE}", | |
f"{quote_sheet_name(SERVER_SCAMMER_SHEET)}!{SERVER_SCAMMER_RANGE}", | |
f"{quote_sheet_name(DWC_SHEET)}!{DWC_RANGE}", | |
f"{quote_sheet_name(TRUSTED_SHEET)}!{TRUSTED_RANGE}", # Add trusted range | |
] | |
scammer_dwc_processor_map = { | |
USER_SCAMMER_SHEET: process_user_scammer_data, | |
SERVER_SCAMMER_SHEET: process_server_scammer_data, | |
DWC_SHEET: process_dwc_data, | |
TRUSTED_SHEET: process_trusted_data, # Add trusted processor | |
} | |
scammer_dwc_target_key_map = { | |
USER_SCAMMER_SHEET: "user_scammers", | |
SERVER_SCAMMER_SHEET: "server_scammers", | |
DWC_SHEET: "dwc", | |
TRUSTED_SHEET: "trusted", # Add trusted target key | |
} | |
values_dupes_ranges = [f"{quote_sheet_name(DUPE_LIST_SHEET)}!{DUPE_LIST_RANGE}"] | |
values_dupes_ranges.extend([f"{quote_sheet_name(cat)}!{VALUES_RANGE}" for cat in CATEGORIES]) | |
# --- Define Fetch Tasks --- | |
fetch_tasks = { | |
"scammer_dwc_batch": fetch_batch_ranges_async( | |
SCAMMER_DWC_SPREADSHEET_ID, | |
scammer_dwc_ranges, | |
value_render_option='FORMATTED_VALUE' | |
), | |
} | |
# Execute scammer/dwc batch first | |
results = await asyncio.gather(*fetch_tasks.values(), return_exceptions=True) | |
task_keys = list(fetch_tasks.keys()) | |
# Add delay between sheet updates | |
await asyncio.sleep(SHEET_UPDATE_DELAY_SECONDS) | |
# Now fetch values/dupes batch | |
fetch_tasks = { | |
"values_dupes_batch": fetch_batch_ranges_async( | |
VALUES_DUPE_SPREADSHEET_ID, | |
values_dupes_ranges, | |
value_render_option='FORMATTED_VALUE' | |
) | |
} | |
# Execute values/dupes batch | |
values_results = await asyncio.gather(*fetch_tasks.values(), return_exceptions=True) | |
task_keys.extend(list(fetch_tasks.keys())) | |
results.extend(values_results) | |
# --- Process Results --- | |
raw_scammer_dwc_results = None | |
raw_values_dupes_results = None | |
for i, result in enumerate(results): | |
key = task_keys[i] | |
if isinstance(result, Exception): | |
logger.error(f"Failed to fetch batch data for {key}: {result}") | |
current_errors[key] = str(result) | |
# If fetch failed, likely service unavailable (handled by fetch_batch_ranges_async) | |
# No need to explicitly set cache["service_available"] = False here again | |
else: | |
if key == "scammer_dwc_batch": | |
raw_scammer_dwc_results = result | |
elif key == "values_dupes_batch": | |
raw_values_dupes_results = result | |
# --- Process Scammer/DWC Results --- | |
if raw_scammer_dwc_results is not None: | |
logger.info(f"Processing {len(raw_scammer_dwc_results)} valueRanges from Scammer/DWC sheet...") | |
for vr in raw_scammer_dwc_results: | |
range_str = vr.get('range', '') | |
match = re.match(r"^'?([^'!]+)'?!", range_str) | |
if not match: | |
logger.warning(f"Could not extract sheet name from range '{range_str}' in Scammer/DWC response.") | |
continue | |
sheet_name = match.group(1).replace("''", "'") | |
if sheet_name in scammer_dwc_processor_map: | |
processor = scammer_dwc_processor_map[sheet_name] | |
target_key = scammer_dwc_target_key_map[sheet_name] | |
values = vr.get('values', []) | |
try: | |
processed_data = processor(values) | |
new_cache_data[target_key] = processed_data # Store fetched data temporarily | |
logger.info(f"Processed {len(processed_data)} items for {sheet_name} -> {target_key}") | |
except Exception as e: | |
logger.error(f"Error processing data for {sheet_name} using {processor.__name__}: {e}", exc_info=True) | |
current_errors[f"process_{target_key}"] = str(e) | |
else: | |
logger.warning(f"No processor found for sheet name '{sheet_name}' derived from range '{range_str}' in Scammer/DWC sheet.") | |
else: | |
logger.warning("Skipping Scammer/DWC processing due to fetch error.") | |
# --- Process Values/Dupes Results --- | |
if raw_values_dupes_results is not None: | |
logger.info(f"Processing {len(raw_values_dupes_results)} valueRanges from Values/Dupes sheet...") | |
for vr in raw_values_dupes_results: | |
range_str = vr.get('range', '') | |
match = re.match(r"^'?([^'!]+)'?!", range_str) | |
if not match: | |
logger.warning(f"Could not extract sheet name from range '{range_str}' in Values/Dupes response.") | |
continue | |
sheet_name = match.group(1).replace("''", "'") | |
values = vr.get('values', []) | |
try: | |
if sheet_name == DUPE_LIST_SHEET: | |
processed_data = process_dupe_list_data(values) | |
new_cache_data["dupes"] = processed_data # Store fetched data temporarily | |
logger.info(f"Processed {len(processed_data)} items for {DUPE_LIST_SHEET} -> dupes") | |
elif sheet_name in CATEGORIES: | |
processed_data = process_sheet_data(values) | |
fetched_values_categories[sheet_name] = processed_data # Store fetched data temporarily | |
logger.info(f"Processed {len(processed_data)} items for Category: {sheet_name}") | |
else: | |
logger.warning(f"Unrecognized sheet name '{sheet_name}' derived from range '{range_str}' in Values/Dupes sheet.") | |
except Exception as e: | |
target_key = "dupes" if sheet_name == DUPE_LIST_SHEET else f"values_{sheet_name}" | |
logger.error(f"Error processing data for {sheet_name}: {e}", exc_info=True) | |
current_errors[f"process_{target_key}"] = str(e) | |
else: | |
logger.warning("Skipping Values/Dupes processing due to fetch error.") | |
# --- Fetch Roblox Avatars (for new data before comparison/webhook) --- | |
if not current_errors.get("scammer_dwc_batch") and \ | |
not current_errors.get("process_user_scammers") and \ | |
not current_errors.get("process_dwc") and \ | |
not current_errors.get("process_trusted"): # Check trusted processing too | |
logger.info("Fetching Roblox avatars for newly processed data...") | |
avatar_tasks = [] | |
entries_needing_avatars = new_cache_data.get("user_scammers", []) + new_cache_data.get("dwc", []) + new_cache_data.get("trusted", []) # Include trusted list | |
for entry in entries_needing_avatars: | |
if entry.get('roblox_username'): | |
avatar_tasks.append(fetch_avatar_for_entry_update(session, entry)) | |
if avatar_tasks: | |
await asyncio.gather(*avatar_tasks) # Exceptions logged within helper | |
logger.info(f"Finished fetching avatars for {len(avatar_tasks)} potential new entries.") | |
else: | |
logger.warning("Skipping avatar fetching due to errors in fetching/processing scammer/dwc/trusted data.") | |
# --- Change Detection & Webhook Preparation (ONLY if cache is ready) --- | |
current_time = datetime.now(timezone.utc) | |
timestamp_iso = current_time.isoformat() | |
detected_value_changes_for_api = {} # Always calculate for API, but only send webhooks if ready | |
# Perform comparisons regardless of cache readiness to populate detected_value_changes_for_api | |
# But only queue webhooks if cache["is_ready"] is True | |
# 1. Value Changes Calculation | |
if "values" not in cache: cache["values"] = {} # Ensure exists for comparison logic | |
if "values_dupes_batch" not in current_errors and not any(k.startswith("process_values_") for k in current_errors): | |
fields_to_compare = ['value', 'dupedValue', 'marketValue'] | |
for category, new_items in fetched_values_categories.items(): | |
old_items_dict = {item['name'].lower(): item for item in cache["values"].get(category, [])} # Use lower case for comparison robustness | |
category_changes_for_api = [] | |
for new_item in new_items: | |
item_name = new_item.get('name') | |
if not item_name or item_name == 'N/A': continue | |
item_name_lower = item_name.lower() | |
old_item = old_items_dict.get(item_name_lower) | |
change_detected_for_webhook = False | |
change_info_webhook = {} | |
if old_item: # Check existing item for changes | |
for field in fields_to_compare: | |
old_val_str = old_item.get(field, 'N/A') | |
new_val_str = new_item.get(field, 'N/A') | |
# Simple string comparison is sufficient here as they are formatted consistently | |
if old_val_str != new_val_str: | |
logger.info(f"Change detected in {category}: {item_name} - {field}: '{old_val_str}' -> '{new_val_str}'") | |
change_info = { | |
"item_name": item_name, "field": field, | |
"old_value": old_val_str if old_val_str is not None else "N/A", | |
"new_value": new_val_str if new_val_str is not None else "N/A", | |
"timestamp": timestamp_iso | |
} | |
category_changes_for_api.append(change_info) | |
change_detected_for_webhook = True | |
change_info_webhook = change_info # Store last change for potential webhook | |
# Add lastUpdated field to new_item when changes detected | |
new_item['lastUpdated'] = timestamp_iso | |
else: | |
# This is a new item, add lastUpdated field | |
new_item['lastUpdated'] = timestamp_iso | |
# Prepare webhook only if a change was found AND cache was ready | |
if change_detected_for_webhook and cache["is_ready"] and VALUE_WEBHOOK_URL: | |
embed = { | |
"title": f"Value Update: {item_name} ({category})", | |
"color": 3447003, # Blue | |
"fields": [ | |
{"name": "Field Changed", "value": change_info_webhook['field'], "inline": True}, | |
{"name": "Old Value", "value": f"`{change_info_webhook['old_value']}`", "inline": True}, | |
{"name": "New Value", "value": f"`{change_info_webhook['new_value']}`", "inline": True}, | |
{"name": "Item Notes", "value": new_item.get('notes', 'N/A')[:1020] or 'N/A', "inline": False}, # Limit notes length | |
], | |
"timestamp": timestamp_iso | |
} | |
if new_item.get('icon'): | |
embed["thumbnail"] = {"url": new_item['icon']} | |
webhook_tasks.append(send_webhook_notification(session, VALUE_WEBHOOK_URL, embed)) | |
if category_changes_for_api: | |
detected_value_changes_for_api[category] = category_changes_for_api | |
if cache["is_ready"] and VALUE_WEBHOOK_URL: | |
logger.info(f"Prepared {len(webhook_tasks)} value change webhooks.") | |
elif not VALUE_WEBHOOK_URL: | |
logger.info("VALUE_WEBHOOK_URL not set, skipping value change webhook sending.") | |
elif not cache["is_ready"]: | |
logger.info("Skipping value change webhook sending during initial cache population.") | |
else: | |
logger.warning("Skipping value change detection and webhooks due to fetch/processing errors.") | |
# 2. New Scammers / DWC (Only if cache is ready) | |
if cache["is_ready"] and SCAMMER_WEBHOOK_URL and \ | |
"scammer_dwc_batch" not in current_errors and \ | |
not any(k.startswith("process_") and k in ["process_user_scammers", "process_server_scammers", "process_dwc"] for k in current_errors): | |
logger.info("Detecting new scammer/DWC entries for webhooks (cache is ready)...") | |
initial_webhook_task_count = len(webhook_tasks) | |
added_new_scammer_webhook = False | |
# --- User Scammers --- | |
# Create keys robust to None values | |
def get_user_scammer_key(item): | |
return (item.get('discord_id') or 'none', item.get('roblox_username') or 'none') | |
old_user_keys = set(get_user_scammer_key(item) for item in cache.get("user_scammers", [])) | |
for item in new_cache_data.get("user_scammers", []): | |
key = get_user_scammer_key(item) | |
if key not in old_user_keys: | |
logger.info(f"New User Scammer detected: Discord={item.get('discord_id')}, Roblox={item.get('roblox_username')}") | |
embed = { | |
"title": "🚨 New User Scammer Added", "color": 15158332, # Red | |
"fields": [ | |
{"name": "Discord ID", "value": f"`{item.get('discord_id', 'N/A')}`", "inline": True}, | |
{"name": "Roblox User", "value": f"`{item.get('roblox_username', 'N/A')}`", "inline": True}, | |
{"name": "Scam Type", "value": item.get('scam_type', 'N/A')[:1020] or 'N/A', "inline": False}, | |
{"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, | |
], "timestamp": timestamp_iso | |
} | |
if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) | |
if item.get('alt_accounts'): embed["fields"].append({"name": "Alt Accounts", "value": ", ".join([f"`{a}`" for a in item['alt_accounts']])[:1020] or 'N/A', "inline": False}) # Limit length | |
if item.get('roblox_avatar_url'): embed["thumbnail"] = {"url": item['roblox_avatar_url']} | |
webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) | |
added_new_scammer_webhook = True | |
# --- Server Scammers --- | |
def get_server_scammer_key(item): | |
return (item.get('server_id') or 'none', item.get('server_name') or 'none') | |
old_server_keys = set(get_server_scammer_key(item) for item in cache.get("server_scammers", [])) | |
for item in new_cache_data.get("server_scammers", []): | |
key = get_server_scammer_key(item) | |
if key not in old_server_keys: | |
logger.info(f"New Server Scammer detected: ID={item.get('server_id')}, Name={item.get('server_name')}") | |
embed = { | |
"title": "🚨 New Server Scammer Added", "color": 15158332, # Red | |
"fields": [ | |
{"name": "Server ID", "value": f"`{item.get('server_id', 'N/A')}`", "inline": True}, | |
{"name": "Server Name", "value": f"`{item.get('server_name', 'N/A')}`", "inline": True}, | |
{"name": "Scam Type", "value": item.get('scam_type', 'N/A')[:1020] or 'N/A', "inline": False}, | |
{"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, | |
], "timestamp": timestamp_iso | |
} | |
if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) | |
webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) | |
added_new_scammer_webhook = True | |
# --- DWC Entries --- | |
def get_dwc_key(item): | |
# Use a combination of available identifiers as the key | |
return ( | |
item.get('discord_user_id') or 'none', | |
item.get('discord_server_id') or 'none', | |
item.get('roblox_username') or 'none' | |
) | |
old_dwc_keys = set(get_dwc_key(item) for item in cache.get("dwc", [])) | |
for item in new_cache_data.get("dwc", []): | |
key = get_dwc_key(item) | |
if key not in old_dwc_keys: | |
logger.info(f"New DWC Entry detected: User={item.get('discord_user_id')}, Server={item.get('discord_server_id')}, Roblox={item.get('roblox_username')}") | |
embed = { | |
"title": "⚠️ New DWC Entry Added", "color": 15105570, # Orange/Dark Yellow | |
"fields": [ | |
{"name": "Discord User ID", "value": f"`{item.get('discord_user_id', 'N/A')}`", "inline": True}, | |
{"name": "Discord Server ID", "value": f"`{item.get('discord_server_id', 'N/A')}`", "inline": True}, | |
{"name": "Roblox User", "value": f"`{item.get('roblox_username', 'N/A')}`", "inline": True}, | |
{"name": "Explanation", "value": item.get('explanation', 'N/A')[:1020] or 'N/A', "inline": False}, | |
], "timestamp": timestamp_iso | |
} | |
if item.get('evidence_link'): embed["fields"].append({"name": "Evidence", "value": item['evidence_link'], "inline": False}) | |
if item.get('alt_accounts'): embed["fields"].append({"name": "Alt Accounts", "value": ", ".join([f"`{a}`" for a in item['alt_accounts']])[:1020] or 'N/A', "inline": False}) | |
if item.get('roblox_avatar_url'): embed["thumbnail"] = {"url": item['roblox_avatar_url']} | |
webhook_tasks.append(send_webhook_notification(session, SCAMMER_WEBHOOK_URL, embed)) | |
added_new_scammer_webhook = True | |
if added_new_scammer_webhook: | |
logger.info(f"Prepared {len(webhook_tasks) - initial_webhook_task_count} new scammer/DWC webhooks.") | |
elif not cache["is_ready"]: | |
logger.info("Skipping new scammer webhook detection during initial cache population.") | |
elif not SCAMMER_WEBHOOK_URL: | |
logger.info("SCAMMER_WEBHOOK_URL not set, skipping new scammer webhook detection.") | |
else: # Errors occurred | |
logger.warning("Skipping new scammer webhook detection due to fetch/processing errors.") | |
# --- Send Webhooks Concurrently --- | |
if webhook_tasks: | |
logger.info(f"Sending {len(webhook_tasks)} webhook notifications...") | |
await asyncio.gather(*webhook_tasks) | |
logger.info("Finished sending webhook notifications.") | |
else: | |
logger.info("No webhooks prepared to send for this cycle.") | |
# --- Final Cache Update --- | |
update_occurred = False | |
# Determine if this cycle *should* mark the cache as ready | |
can_set_ready = not cache["is_ready"] and not current_errors # Only set ready on first *fully successful* run | |
if not current_errors: # Perfect cycle | |
logger.info("Updating full cache (no errors during fetch or processing).") | |
cache["values"] = fetched_values_categories | |
cache["user_scammers"] = new_cache_data["user_scammers"] | |
cache["server_scammers"] = new_cache_data["server_scammers"] | |
cache["dwc"] = new_cache_data["dwc"] | |
cache["trusted"] = new_cache_data["trusted"] | |
cache["dupes"] = new_cache_data["dupes"] | |
cache["value_changes"] = detected_value_changes_for_api # Store the detected changes | |
cache["last_updated"] = current_time | |
if can_set_ready: | |
logger.info("Marking cache as ready after initial successful population.") | |
cache["is_ready"] = True | |
cache["service_available"] = True # Mark as available on success | |
update_occurred = True | |
logger.info(f"Cache update cycle completed successfully.") | |
else: # Errors occurred, attempt partial update | |
logger.warning(f"Cache update cycle completed with errors: {current_errors}. Attempting partial update.") | |
partial_update_details = [] | |
# Update values only if the values/dupes batch succeeded AND processing succeeded | |
if "values_dupes_batch" not in current_errors and not any(k.startswith("process_values_") for k in current_errors): | |
if cache.get("values") != fetched_values_categories: | |
cache["values"] = fetched_values_categories | |
cache["value_changes"] = detected_value_changes_for_api # Update changes along with values | |
partial_update_details.append("values") | |
update_occurred = True | |
else: | |
logger.warning("Skipping update for 'values' due to errors.") | |
# Update dupes only if the values/dupes batch succeeded AND processing succeeded | |
if "values_dupes_batch" not in current_errors and "process_dupes" not in current_errors: | |
if cache.get("dupes") != new_cache_data["dupes"]: | |
cache["dupes"] = new_cache_data["dupes"] | |
partial_update_details.append("dupes") | |
update_occurred = True | |
else: | |
logger.warning("Skipping update for 'dupes' due to errors.") | |
# Update scammer/DWC sections if their batch succeeded AND processing succeeded | |
if "scammer_dwc_batch" not in current_errors: | |
for key in ["user_scammers", "server_scammers", "dwc", "trusted"]: # Include "trusted" in this list | |
process_error_key = f"process_{key}" | |
if process_error_key not in current_errors: | |
if cache.get(key) != new_cache_data[key]: | |
cache[key] = new_cache_data[key] | |
partial_update_details.append(key) | |
update_occurred = True | |
else: | |
logger.warning(f"Skipping update for '{key}' due to processing error.") | |
else: | |
logger.warning("Skipping update for 'user_scammers', 'server_scammers', 'dwc', 'trusted' due to batch fetch error.") | |
if update_occurred: | |
cache["last_updated"] = current_time # Mark partial update time | |
# Mark cache ready only if it was *already* ready and we managed a partial update | |
# Or if this was the first run AND it was partially successful (maybe relax this?) | |
# Let's stick to: only mark ready on first FULL success. | |
if cache["is_ready"]: # If it was already ready, keep it ready | |
logger.info(f"Partially updated cache sections: {', '.join(partial_update_details)}. Cache remains ready.") | |
else: | |
logger.info(f"Partially updated cache sections: {', '.join(partial_update_details)}. Cache remains NOT ready (requires full success on first run).") | |
# Keep service_available based on whether fetch errors occurred | |
else: | |
logger.error(f"Cache update cycle failed, and no parts could be updated based on errors. Errors: {current_errors}") | |
# Cache readiness and service availability remain unchanged | |
except Exception as e: | |
logger.exception(f"Critical error during cache update cycle: {e}") | |
# If a critical error happens (e.g., network error during fetch), mark service potentially unavailable | |
if isinstance(e, (aiohttp.ClientError, HttpError, asyncio.TimeoutError)): | |
logger.warning("Communication error detected, will re-check service availability next cycle.") | |
# service_available might have already been set to False by fetch_batch_ranges_async | |
# --- Wait for the next cycle --- | |
end_time = datetime.now(timezone.utc) | |
duration = (end_time - start_time).total_seconds() | |
wait_time = max(10, CACHE_UPDATE_INTERVAL_SECONDS - duration) # Ensure at least 10s wait | |
logger.info(f"Cache update cycle duration: {duration:.2f}s. Waiting {wait_time:.2f}s for next cycle.") | |
await asyncio.sleep(wait_time) | |
async def fetch_avatar_for_entry_update(session: aiohttp.ClientSession, entry: dict): | |
"""Fetches avatar and updates the provided entry dictionary IN PLACE.""" | |
roblox_username = entry.get('roblox_username') | |
if not roblox_username: return | |
new_avatar = None # Default to None | |
try: | |
user_id = await get_roblox_user_id(session, roblox_username) | |
if user_id: | |
new_avatar = await get_roblox_avatar_url(session, user_id) | |
# else: # User ID not found, keep avatar as None | |
# logger.debug(f"Roblox user ID not found for username: {roblox_username}") | |
except Exception as e: | |
# Log errors but don't stop the main update loop | |
logger.warning(f"Failed to fetch avatar for {roblox_username}: {e}") | |
# Keep new_avatar as None on error | |
finally: | |
# Update the entry dict directly | |
entry['roblox_avatar_url'] = new_avatar | |
# --- FastAPI Startup Event --- | |
async def startup_event(): | |
"""Starts the background cache update task.""" | |
if not cache["service_available"]: | |
logger.warning("Google Sheets service not available at startup. Will attempt re-init in background task.") | |
logger.info("Starting background cache update task...") | |
# Check for webhook URLs at startup | |
if not SCAMMER_WEBHOOK_URL: | |
logger.warning("SCAMMER_WEBHOOK_URL environment variable not set. New scammer notifications disabled.") | |
if not VALUE_WEBHOOK_URL: | |
logger.warning("VALUE_WEBHOOK_URL environment variable not set. Value change notifications disabled.") | |
if not DUPE_CHECK_WEBHOOK_URL: | |
logger.warning("WEBHOOK_URL (for dupe checks) environment variable not set. Dupe check notifications disabled.") | |
asyncio.create_task(update_cache_periodically()) | |
# --- API Endpoints --- | |
def check_cache_readiness(): | |
"""Reusable check for API endpoints - Checks cache readiness""" | |
if not cache["is_ready"]: | |
# Be more specific if service is known to be down | |
if not cache["service_available"]: | |
raise HTTPException(status_code=503, detail="Service temporarily unavailable due to backend connection issues. Please try again later.") | |
else: | |
raise HTTPException(status_code=503, detail="Cache is initializing or data is currently unavailable. Please try again shortly.") | |
async def root(): | |
return {"message": "JVC API - Running"} | |
async def get_status(): | |
"""Returns the current status of the cache and service availability""" | |
last_updated_iso = cache["last_updated"].isoformat() if cache["last_updated"] else None | |
return { | |
"cache_ready": cache["is_ready"], | |
"sheets_service_available": cache["service_available"], | |
"last_updated": last_updated_iso, | |
"cached_items": { | |
"value_categories": len(cache.get("values", {})), | |
"user_scammers": len(cache.get("user_scammers", [])), | |
"server_scammers": len(cache.get("server_scammers", [])), | |
"dwc_entries": len(cache.get("dwc", [])), | |
"trusted_entries": len(cache.get("trusted", [])), # Add trusted count | |
"duped_usernames": len(cache.get("dupes", [])), | |
}, | |
"value_change_categories_in_last_cycle": len(cache.get("value_changes", {})) | |
} | |
async def get_values(): | |
"""Get all values data from cache""" | |
check_cache_readiness() | |
return cache.get("values", {}) | |
async def get_category_values(category: str): | |
"""Get values data for a specific category from cache""" | |
check_cache_readiness() | |
# Case-insensitive matching for category name | |
matched_category = next((c for c in cache.get("values", {}).keys() if c.lower() == category.lower()), None) | |
if not matched_category: | |
# Check if the category *exists* conceptually even if empty | |
valid_categories_lower = [c.lower() for c in CATEGORIES] | |
if category.lower() in valid_categories_lower: | |
return {category: []} # Return empty list if category is valid but has no items yet | |
else: | |
raise HTTPException(status_code=404, detail=f"Category '{category}' not found.") | |
return {matched_category: cache.get("values", {}).get(matched_category, [])} | |
async def get_category_value_changes(category: str): | |
"""Get detected value changes for a specific category from the last cache update cycle.""" | |
check_cache_readiness() | |
# Case-insensitive matching for category name | |
matched_category = next((c for c in cache.get("value_changes", {}).keys() if c.lower() == category.lower()), None) | |
if not matched_category: | |
# Check if the category *exists* conceptually even if empty | |
valid_categories_lower = [c.lower() for c in CATEGORIES] | |
if category.lower() in valid_categories_lower: | |
return {category: []} # Return empty list if category is valid but had no changes | |
else: | |
raise HTTPException(status_code=404, detail=f"Category '{category}' not found.") | |
return {matched_category: cache.get("value_changes", {}).get(matched_category, [])} | |
async def get_all_value_changes(): | |
"""Get all detected value changes from the last cache update cycle.""" | |
check_cache_readiness() | |
return cache.get("value_changes", {}) | |
async def get_scammers(): | |
"""Get all scammer, DWC, and trusted data (users, servers, dwc, trusted) from cache""" | |
check_cache_readiness() | |
return { | |
"users": cache.get("user_scammers", []), | |
"servers": cache.get("server_scammers", []), | |
"dwc": cache.get("dwc", []), | |
"trusted": cache.get("trusted", []) # Include trusted list | |
} | |
async def get_dupes(): | |
"""Get all duped usernames from cache""" | |
check_cache_readiness() | |
return {"usernames": cache.get("dupes", [])} # Return empty list if not ready or empty | |
class UsernameCheck(BaseModel): | |
username: str | |
async def check_username(data: UsernameCheck): | |
"""Check if a username is duped using cached data and optionally send webhook""" | |
check_cache_readiness() # Use the standard readiness check | |
username_to_check = data.username.strip().lower() | |
# Ensure dupes list is populated before checking | |
dupes_list = cache.get("dupes", []) | |
is_duped = username_to_check in dupes_list | |
# Webhook notification for checks resulting in "Not Found" | |
if not is_duped: | |
if DUPE_CHECK_WEBHOOK_URL: | |
async def send_check_webhook(): | |
try: | |
# Use a short-lived session for this potentially frequent task | |
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session: | |
embed = { | |
"title": "User Dupe Check - Not Found", | |
"description": f"Username `{data.username}` was checked against the dupe list but was **not** found.", | |
"color": 16776960, # Yellow | |
"timestamp": datetime.now(timezone.utc).isoformat() | |
} | |
await send_webhook_notification(session, DUPE_CHECK_WEBHOOK_URL, embed) | |
except Exception as e: | |
logger.error(f"Error sending dupe check webhook: {e}") | |
asyncio.create_task(send_check_webhook()) # Fire and forget | |
else: | |
logger.info("WEBHOOK_URL (for dupe checks) not configured. Skipping notification.") | |
return {"username": data.username, "is_duped": is_duped} | |
def health_check(): | |
"""Provides a health status of the API and its cache.""" | |
now = datetime.now(timezone.utc) | |
status_detail = {"status": "ok", "last_updated": None, "time_since_update_seconds": None} | |
if cache["last_updated"]: | |
status_detail["last_updated"] = cache["last_updated"].isoformat() | |
time_since_update = (now - cache["last_updated"]).total_seconds() | |
status_detail["time_since_update_seconds"] = round(time_since_update) | |
else: | |
status_detail["last_updated"] = None | |
status_detail["time_since_update_seconds"] = None | |
if not cache["is_ready"]: | |
status_detail["status"] = "initializing" | |
status_detail["reason"] = "Cache has not been populated yet." | |
return status_detail | |
if not cache["service_available"]: | |
status_detail["status"] = "degraded" | |
status_detail["reason"] = "Google Sheets service connection issue detected on last attempt." | |
return status_detail | |
# Check for staleness only if the cache is ready and service *was* available last check | |
# Allow some grace period (e.g., 3 intervals) | |
if cache["last_updated"] and time_since_update > CACHE_UPDATE_INTERVAL_SECONDS * 3: | |
status_detail["status"] = "degraded" | |
status_detail["reason"] = f"Cache potentially stale (last update > {CACHE_UPDATE_INTERVAL_SECONDS * 3} seconds ago)" | |
return status_detail | |
# If we reach here, status is 'ok' | |
return status_detail |