# main.py from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional, Any import aiohttp import os from datetime import datetime, timezone # <-- Add timezone import json import re from google.oauth2.service_account import Credentials as ServiceAccountCredentials from googleapiclient.discovery import build from dotenv import load_dotenv import asyncio import logging # <-- Add logging # --- Logging Setup --- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) app = FastAPI() # --- Configuration --- load_dotenv() # CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], # Consider restricting in production allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Google Sheets Config SPREADSHEET_ID = '1sgkhBNGw_r6tBIxvdeXaI0bVmWBeACN4jiw_oDEeXLw' VALUES_SPREADSHEET_ID = '1Toe07o3P517q8sm9Qb1e5xyFWCuwgskj71IKJwJNfNU' SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly'] USER_SCAMMER_SHEET = "User Scammer Files" SERVER_SCAMMER_SHEET = "Server Scammer Files" DWC_SHEET = "DWC Servers / Users" DUPE_LIST_SHEET = "Dupe List" CATEGORIES = [ "Vehicles", "Textures", "Colours", "Spoilers", "Rims", "Furnitures", "Gun Skins", "Hyperchromes" ] # Cache Update Interval CACHE_UPDATE_INTERVAL_SECONDS = 60 # 1 minute # --- Global Cache --- cache = { "values": {}, "value_changes": {}, "user_scammers": [], "server_scammers": [], "dwc": [], "dupes": [], "last_updated": None, "is_ready": False, "service_available": True } # --- Google Sheets Initialization --- sheets_service = None # Initialize as None def init_google_sheets(scopes=SCOPES): """Initialize Google Sheets credentials from environment variable""" global sheets_service, cache # Allow modifying global vars try: creds_json_str = os.getenv('CREDENTIALS_JSON') if not creds_json_str: logger.error("CREDENTIALS_JSON environment variable not found") raise ValueError("CREDENTIALS_JSON environment variable not found") creds_json = json.loads(creds_json_str) creds = ServiceAccountCredentials.from_service_account_info( creds_json, scopes=scopes ) sheets_service = build('sheets', 'v4', credentials=creds) logger.info("Google Sheets service initialized successfully from ENV VAR.") cache["service_available"] = True return sheets_service except Exception as e: logger.error(f"Error initializing Google Sheets from ENV VAR: {e}") # Fallback attempt (optional) try: logger.info("Falling back to loading credentials from file 'credentials.json'") creds = ServiceAccountCredentials.from_service_account_file( 'credentials.json', scopes=scopes ) sheets_service = build('sheets', 'v4', credentials=creds) logger.info("Google Sheets service initialized successfully from file.") cache["service_available"] = True return sheets_service except Exception as file_e: logger.error(f"Error loading credentials from file: {file_e}") logger.critical("Google Sheets service could not be initialized. API will be limited.") cache["service_available"] = False sheets_service = None # Ensure it's None if failed return None # Initialize on module load init_google_sheets() # --- Helper Functions (Mostly unchanged) --- def extract_drive_id(url): if not url or not isinstance(url, str): return None match = re.search(r'https://drive\.google\.com/file/d/([^/]+)', url) return match.group(1) if match else None def convert_to_thumbnail_url(drive_url): drive_id = extract_drive_id(drive_url) return f"https://drive.google.com/thumbnail?id={drive_id}&sz=w1000" if drive_id else drive_url def extract_image_url(formula, drive_url=None): if drive_url and isinstance(drive_url, str) and 'drive.google.com' in drive_url: return convert_to_thumbnail_url(drive_url) if not formula or not isinstance(formula, str): return '' if formula.startswith('=IMAGE('): match = re.search(r'=IMAGE\("([^"]+)"', formula) if match: return match.group(1) return formula def format_currency(value: Any) -> Optional[str]: if value is None or str(value).strip() == '': return 'N/A' try: num_str = str(value).replace('$', '').replace(',', '').strip() if not num_str or num_str.lower() == 'n/a': return 'N/A' num = float(num_str) return f"${num:,.0f}" except (ValueError, TypeError): # Check if it's non-numeric text before returning N/A if isinstance(value, str) and not re.match(r'^-?[\d,.]+\$?$', value.strip()): return value.strip() # Return original text if it doesn't look like a number/currency return 'N/A' # Default to N/A if conversion fails def parse_cached_currency(value_str: Optional[str]) -> Optional[float]: if value_str is None or value_str.lower() == 'n/a': return None try: num_str = value_str.replace('$', '').replace(',', '').strip() return float(num_str) except (ValueError, TypeError): return None # Cannot parse def clean_string(value, default='N/A'): if value is None: return default cleaned = str(value).strip() return cleaned if cleaned else default def clean_string_optional(value): if value is None: return None cleaned = str(value).strip() return cleaned if cleaned and cleaned != '-' else None def parse_alt_accounts(value): if value is None: return [] raw_string = str(value).strip() if not raw_string or raw_string == '-': return [] return [acc.strip() for acc in raw_string.split(',') if acc.strip()] # --- Roblox API Helpers (Unchanged) --- async def get_roblox_user_id(session: aiohttp.ClientSession, username: str): if not username: return None url = "https://users.roblox.com/v1/usernames/users" payload = {"usernames": [username], "excludeBannedUsers": False} try: async with session.post(url, json=payload) as response: if response.status == 200: data = await response.json() if data and data.get("data") and len(data["data"]) > 0: return data["data"][0].get("id") # else: logger.warning(f"Roblox User ID API non-200 status for {username}: {response.status}") # Maybe too noisy return None except asyncio.TimeoutError: logger.warning(f"Timeout fetching Roblox User ID for {username}") return None except aiohttp.ClientError as e: logger.warning(f"Network error fetching Roblox User ID for {username}: {e}") return None except Exception as e: logger.error(f"Unexpected exception fetching Roblox User ID for {username}: {e}") return None async def get_roblox_avatar_url(session: aiohttp.ClientSession, user_id: int): if not user_id: return None url = f"https://thumbnails.roblox.com/v1/users/avatar-headshot?userIds={user_id}&size=150x150&format=Png&isCircular=false" try: async with session.get(url) as response: if response.status == 200: data = await response.json() if data and data.get("data") and len(data["data"]) > 0: return data["data"][0].get("imageUrl") # else: logger.warning(f"Roblox Avatar API non-200 status for User ID {user_id}: {response.status}") # Maybe too noisy return None except asyncio.TimeoutError: logger.warning(f"Timeout fetching Roblox avatar for User ID {user_id}") return None except aiohttp.ClientError as e: logger.warning(f"Network error fetching Roblox avatar for User ID {user_id}: {e}") return None except Exception as e: logger.error(f"Unexpected exception fetching Roblox avatar for User ID {user_id}: {e}") return None # --- Data Processing Functions (Unchanged) --- def process_sheet_data(values): """Process raw sheet data into structured format for values""" if not values: return [] processed_data = [] for row in values: if not row or not any(cell.strip() for cell in row if cell): continue drive_url = row[14] if len(row) > 14 else None filtered_row = [cell for i, cell in enumerate(row) if i % 2 == 0] if len(filtered_row) >= 4 and isinstance(filtered_row[3], str) and re.search(r'LEVEL \d+ \| HYPERCHROMES', filtered_row[3]): continue if len(filtered_row) >= 6: processed_item = { 'icon': extract_image_url(filtered_row[0], drive_url), 'name': clean_string(filtered_row[1], 'N/A') if len(filtered_row) > 1 else 'N/A', 'value': format_currency(filtered_row[2]) if len(filtered_row) > 2 else 'N/A', 'dupedValue': format_currency(filtered_row[3]) if len(filtered_row) > 3 else 'N/A', 'marketValue': format_currency(filtered_row[4]) if len(filtered_row) > 4 else 'N/A', 'demand': clean_string(filtered_row[5], 'N/A') if len(filtered_row) > 5 else 'N/A', 'notes': clean_string(filtered_row[6], '') if len(filtered_row) > 6 else '' } processed_data.append(processed_item) return processed_data def process_user_scammer_data(values): """Process raw user scammer data""" if not values: return [] processed_data = [] for row in values: if not row or len(row) < 2 or not any(clean_string_optional(cell) for cell in row[:2]): continue discord_id = clean_string_optional(row[0]) if len(row) > 0 else None roblox_username = clean_string_optional(row[1]) if len(row) > 1 else None if not discord_id and not roblox_username: continue processed_item = { 'discord_id': discord_id, 'roblox_username': roblox_username, 'scam_type': clean_string(row[2]) if len(row) > 2 else 'N/A', 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None, 'alt_accounts': parse_alt_accounts(row[5]) if len(row) > 5 else [], 'roblox_avatar_url': None # Placeholder } processed_data.append(processed_item) return processed_data def process_server_scammer_data(values): """Process raw server scammer data""" if not values: return [] processed_data = [] for row in values: if not row or len(row) < 2 or not any(clean_string_optional(cell) for cell in row[:2]): continue server_id = clean_string_optional(row[0]) if len(row) > 0 else None server_name = clean_string_optional(row[1]) if len(row) > 1 else None if not server_id and not server_name: continue processed_item = { 'server_id': server_id, 'server_name': server_name, 'scam_type': clean_string(row[2]) if len(row) > 2 else 'N/A', 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None } processed_data.append(processed_item) return processed_data def process_dwc_data(values): """Process raw DWC data""" if not values: return [] processed_data = [] for row in values: if not row or len(row) < 3 or not any(clean_string_optional(cell) for cell in row[:3]): continue user_id = clean_string_optional(row[0]) if len(row) > 0 else None server_id = clean_string_optional(row[1]) if len(row) > 1 else None roblox_user = clean_string_optional(row[2]) if len(row) > 2 else None if not user_id and not server_id and not roblox_user: continue processed_item = { 'status': 'DWC', 'discord_user_id': user_id, 'discord_server_id': server_id, 'roblox_username': roblox_user, 'explanation': clean_string(row[3]) if len(row) > 3 else 'N/A', 'evidence_link': clean_string_optional(row[4]) if len(row) > 4 else None, 'alt_accounts': parse_alt_accounts(row[5]) if len(row) > 5 else [], 'roblox_avatar_url': None # Placeholder } processed_data.append(processed_item) return processed_data def process_dupe_list_data(values): """Process raw dupe list data""" if not values: return [] return [row[0].strip().lower() for row in values if row and row[0] and isinstance(row[0], str) and row[0].strip()] # --- Async Fetching Functions (Used by background task) --- async def fetch_sheet_data_async(sheet_name, range_name, processor, value_render_option='FORMATTED_VALUE', spreadsheet_id=SPREADSHEET_ID): """Async wrapper to fetch and process sheet data""" global sheets_service # Access the initialized service if not sheets_service: logger.warning(f"Attempted to fetch {sheet_name} but Sheets service is unavailable.") raise Exception("Google Sheets service not initialized") # Raise to signal failure in update task try: quoted_sheet_name = f"'{sheet_name}'" if not sheet_name.isalnum() else sheet_name full_range = f"{quoted_sheet_name}!{range_name}" loop = asyncio.get_event_loop() result = await loop.run_in_executor( None, # Default executor lambda: sheets_service.spreadsheets().values().get( spreadsheetId=spreadsheet_id, range=full_range, valueRenderOption=value_render_option ).execute() ) values = result.get('values', []) return processor(values) except Exception as e: logger.error(f"Error fetching/processing {sheet_name} from {spreadsheet_id}: {e}") # Re-raise the exception so the update loop knows this part failed raise e # --- Background Cache Update Task --- async def update_cache_periodically(): """Fetches data from sheets, detects value changes, and updates the cache periodically.""" global cache async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15)) as session: while True: if not cache["service_available"]: logger.warning("Google Sheets service unavailable, skipping cache update cycle.") await asyncio.sleep(CACHE_UPDATE_INTERVAL_SECONDS) continue logger.info("Starting cache update cycle...") start_time = datetime.now(timezone.utc) success = True new_cache_data = { "values": {}, "user_scammers": [], "server_scammers": [], "dwc": [], "dupes": [], } detected_value_changes = {} # Store changes detected *in this cycle* try: # --- Fetch all data concurrently --- fetch_tasks = { "user_scammers": fetch_sheet_data_async(USER_SCAMMER_SHEET, 'B6:G', process_user_scammer_data, spreadsheet_id=SPREADSHEET_ID), "server_scammers": fetch_sheet_data_async(SERVER_SCAMMER_SHEET, 'B6:F', process_server_scammer_data, spreadsheet_id=SPREADSHEET_ID), "dwc": fetch_sheet_data_async(DWC_SHEET, 'B6:G', process_dwc_data, spreadsheet_id=SPREADSHEET_ID), "dupes": fetch_sheet_data_async(DUPE_LIST_SHEET, 'B2:B', process_dupe_list_data, spreadsheet_id=VALUES_SPREADSHEET_ID), # Add tasks for each value category **{f"values_{cat}": fetch_sheet_data_async( cat, 'B6:P', process_sheet_data, value_render_option='FORMULA', spreadsheet_id=VALUES_SPREADSHEET_ID ) for cat in CATEGORIES} } results = await asyncio.gather(*fetch_tasks.values(), return_exceptions=True) task_keys = list(fetch_tasks.keys()) # --- Process results and update temporary cache --- fetched_values = {} current_errors = {} # Track errors for specific keys for i, result in enumerate(results): key = task_keys[i] if isinstance(result, Exception): logger.error(f"Failed to fetch data for {key}: {result}") success = False current_errors[key] = str(result) # Log the error # Decide: keep old data or clear? We'll keep old cache data by not updating below else: if key.startswith("values_"): category_name = key.split("_", 1)[1] fetched_values[category_name] = result elif key in new_cache_data: # Only update if key exists new_cache_data[key] = result else: logger.warning(f"Fetched data for unknown key: {key}") # --- Detect Value Changes --- logger.info("Comparing fetched values with cached values to detect changes...") current_time = datetime.now(timezone.utc) fields_to_compare = ['value', 'dupedValue', 'marketValue'] for category, new_items in fetched_values.items(): if category not in cache["values"]: # Category is new or wasn't cached before cache["values"][category] = [] # Ensure old category exists for comparison logic below # Create a lookup for old items by name for efficient comparison old_items_dict = {item['name']: item for item in cache["values"].get(category, [])} category_changes = [] for new_item in new_items: item_name = new_item['name'] if item_name in old_items_dict: old_item = old_items_dict[item_name] for field in fields_to_compare: old_val_str = old_item.get(field) new_val_str = new_item.get(field) # Normalize "N/A" and potential whitespace before comparison old_norm = old_val_str.strip().lower() if isinstance(old_val_str, str) else old_val_str new_norm = new_val_str.strip().lower() if isinstance(new_val_str, str) else new_val_str if old_norm == 'n/a': old_norm = None if new_norm == 'n/a': new_norm = None # Only record change if values are meaningfully different if old_norm != new_norm: # Try parsing for better comparison (optional but recommended) old_numeric = parse_cached_currency(old_val_str) new_numeric = parse_cached_currency(new_val_str) # Compare numeric if possible, otherwise string representations # (Handles cases like $10 vs $10.00 becoming same numeric 10.0) values_differ = False if old_numeric is not None and new_numeric is not None: if old_numeric != new_numeric: values_differ = True elif old_val_str != new_val_str: # Fallback to string comparison if parsing fails or types differ values_differ = True if values_differ: logger.info(f"Change detected in {category}: {item_name} - {field}: '{old_val_str}' -> '{new_val_str}'") category_changes.append({ "item_name": item_name, "field": field, "old_value": old_val_str if old_val_str is not None else "N/A", "new_value": new_val_str if new_val_str is not None else "N/A", "timestamp": current_time.isoformat() }) if category_changes: detected_value_changes[category] = category_changes # --- Fetch Roblox Avatars (no changes needed here) --- # ... (avatar fetching logic remains the same) # --- Final Cache Update --- # Only update the main cache if the fetch cycle didn't have critical errors # We allow partial updates if only some fetches failed. if not current_errors: # If no errors at all logger.info("Updating full cache.") cache["values"] = fetched_values cache["user_scammers"] = new_cache_data["user_scammers"] cache["server_scammers"] = new_cache_data["server_scammers"] cache["dwc"] = new_cache_data["dwc"] cache["dupes"] = new_cache_data["dupes"] cache["value_changes"] = detected_value_changes # Store the detected changes cache["last_updated"] = current_time cache["is_ready"] = True logger.info(f"Cache update cycle completed successfully.") else: # Update parts that *did* succeed, if any partial_update_occurred = False if fetched_values: # Only update values if *all* value fetches succeeded all_values_fetched = True for cat in CATEGORIES: if f"values_{cat}" in current_errors: all_values_fetched = False break if all_values_fetched: cache["values"] = fetched_values cache["value_changes"] = detected_value_changes # Update changes if values updated partial_update_occurred = True logger.info("Partially updated cache: Values updated.") else: logger.warning("Values cache not updated due to fetch errors in some categories.") # Update other sections if they succeeded for key in ["user_scammers", "server_scammers", "dwc", "dupes"]: if key not in current_errors and new_cache_data.get(key) is not None: cache[key] = new_cache_data[key] partial_update_occurred = True logger.info(f"Partially updated cache: {key} updated.") if partial_update_occurred: cache["last_updated"] = current_time # Mark partial update time cache["is_ready"] = True # Allow access even if partial logger.warning(f"Cache update cycle completed with errors: {current_errors}. Some data might be stale.") else: logger.error(f"Cache update cycle failed completely. No parts updated. Errors: {current_errors}") # Keep cache["is_ready"] as it was. except Exception as e: logger.exception(f"Critical error during cache update cycle: {e}") success = False # Should already be false if exception bubbled up # --- Wait for the next cycle --- end_time = datetime.now(timezone.utc) duration = (end_time - start_time).total_seconds() wait_time = max(0, CACHE_UPDATE_INTERVAL_SECONDS - duration) logger.info(f"Cache update duration: {duration:.2f}s. Waiting {wait_time:.2f}s for next cycle.") await asyncio.sleep(wait_time) # Helper specifically for the background task to update the dict in place async def fetch_avatar_for_entry_update(session: aiohttp.ClientSession, entry: dict): """Fetches avatar and updates the provided entry dictionary.""" roblox_username = entry.get('roblox_username') if not roblox_username: return try: user_id = await get_roblox_user_id(session, roblox_username) if user_id: avatar_url = await get_roblox_avatar_url(session, user_id) entry['roblox_avatar_url'] = avatar_url # Update the dict directly # logger.debug(f"Avatar found for {roblox_username}") # Debug level # else: logger.debug(f"No Roblox user ID found for {roblox_username}") # Debug level except Exception as e: # Log errors but don't stop the main update loop logger.warning(f"Failed to fetch avatar for {roblox_username}: {e}") entry['roblox_avatar_url'] = None # Ensure it's None on error # --- FastAPI Startup Event --- @app.on_event("startup") async def startup_event(): """Starts the background cache update task.""" if cache["service_available"]: logger.info("Starting background cache update task...") asyncio.create_task(update_cache_periodically()) else: logger.warning("Google Sheets service not available. Cache update task will not start.") # --- API Endpoints (Modified to use Cache) --- def check_service_availability(): """Reusable check for API endpoints""" if not cache["service_available"]: raise HTTPException(status_code=503, detail="Google Sheets service unavailable. Cannot fetch data.") if not cache["is_ready"]: raise HTTPException(status_code=503, detail="Cache is not ready yet. Please try again shortly.") @app.get("/") async def root(): return {"message": "JB Vanta API - Running"} @app.get("/api/status") async def get_status(): """Returns the current status of the cache""" return { "cache_ready": cache["is_ready"], "sheets_service_available": cache["service_available"], "last_updated": cache["last_updated"].isoformat() if cache["last_updated"] else None, "cached_items": { "value_categories": len(cache["values"]), "user_scammers": len(cache["user_scammers"]), "server_scammers": len(cache["server_scammers"]), "dwc_entries": len(cache["dwc"]), "duped_usernames": len(cache["dupes"]), } } @app.get("/api/values") async def get_values(): """Get all values data from cache""" check_service_availability() return cache["values"] @app.get("/api/values/{category}") async def get_category_values(category: str): """Get values data for a specific category from cache""" check_service_availability() category = category.capitalize() if category not in CATEGORIES: raise HTTPException(status_code=404, detail=f"Category '{category}' not found or configured.") return {category: cache["values"].get(category, [])} @app.get("/api/value-changes/{category}") async def get_category_value_changes(category: str): """Get detected value changes for a specific category from the last cache update cycle.""" check_service_availability() category = category.capitalize() if category not in CATEGORIES: raise HTTPException(status_code=404, detail=f"Category '{category}' not found or configured.") return {category: cache.get("value_changes", {}).get(category, [])} @app.get("/api/value-changes") async def get_all_value_changes(): """Get all detected value changes from the last cache update cycle.""" check_service_availability() return cache.get("value_changes", {}) @app.get("/api/scammers") async def get_scammers(): """Get all scammer and DWC data (users, servers, dwc) from cache""" check_service_availability() # Data is already fetched and processed (including avatars) by the background task return { "users": cache["user_scammers"], "servers": cache["server_scammers"], "dwc": cache["dwc"] } @app.get("/api/dupes") async def get_dupes(): """Get all duped usernames from cache""" check_service_availability() return {"usernames": cache["dupes"]} class UsernameCheck(BaseModel): username: str @app.post("/api/check") async def check_username(data: UsernameCheck): """Check if a username is duped using cached data and send webhook""" check_service_availability() # Ensure cache is ready before checking username_to_check = data.username.strip().lower() is_duped = username_to_check in cache["dupes"] # Webhook notification logic (remains the same, consider making it non-blocking) if not is_duped: webhook_url = os.getenv("WEBHOOK_URL") if webhook_url: async def send_webhook_notification(): # Wrap in async func try: async with aiohttp.ClientSession() as session: webhook_data = { "content": None, "embeds": [{ "title": "New Dupe Check - Not Found", "description": f"Username `{data.username}` was checked but not found in the dupe database.", "color": 16776960, # Yellow "timestamp": datetime.now(timezone.utc).isoformat() # Use timezone aware }] } async with session.post(webhook_url, json=webhook_data) as response: if response.status not in [200, 204]: logger.warning(f"Failed to send webhook (Status: {response.status}): {await response.text()}") except Exception as e: logger.error(f"Error sending webhook: {e}") # Run the webhook sending in the background so it doesn't delay the API response asyncio.create_task(send_webhook_notification()) else: logger.info("Webhook URL not configured. Skipping notification.") return {"username": data.username, "is_duped": is_duped} # Optional: Add a health check endpoint (simple version) @app.get("/health") def health_check(): # Basic check: is the app running? # More advanced: could check cache['is_ready'] or cache['last_updated'] return {"status": "ok"} # Run with: uvicorn main:app --reload (for development) # For production: uvicorn main:app --host 0.0.0.0 --port 8000 (or your preferred port)