Spaces:

angusfung
/

Kickstarter-prediction-embedding

Sleeping

App Files Files Community

angusfung commited on Apr 23

Commit

6c3a56c

verified ·

1 Parent(s): 01783ce

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -6

app.py CHANGED Viewed

@@ -1,3 +1,14 @@
 import os
 import json
 import torch
@@ -49,6 +60,18 @@ device = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Load resources on startup
     global model, explainer, processor, device
@@ -125,6 +148,12 @@ app.add_middleware(
 @app.get("/")
 async def root():
     return {
         "message": "Kickstarter Success Prediction API",
         "description": "Send a POST request to /predict with campaign data to get a prediction"
@@ -132,6 +161,24 @@ async def root():
 @app.post("/predict")
 async def predict(request: Request):
     try:
         # Parse the incoming JSON data
         logger.info("Received prediction request")
@@ -193,28 +240,114 @@ async def predict(request: Request):
         raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}")
 def preprocess_raw_data(campaign_data):
-    """Preprocess raw data using CampaignProcessor"""
     try:
         # Process the single campaign
         logger.info("Processing campaign with CampaignProcessor...")
-        processed_data = processor.process_campaign(campaign_data, idx=0)
-        # Preserve existing numerical values from input if present
-        for field in NUMERICAL_FIELDS:
             if field in campaign_data:
                 processed_data[field] = campaign_data[field]
                 logger.info(f"Using provided value for {field}: {campaign_data[field]}")
         return processed_data
     except Exception as e:
         logger.error(f"Error preprocessing raw data: {str(e)}", exc_info=True)
         raise Exception(f"Error preprocessing raw data: {str(e)}")
-# Debugging endpoint to check the environment and loaded resources
 @app.get("/debug")
 async def debug():
-    """Endpoint for checking the status of the API and its components"""
     global model, explainer, processor, device
     # Check internet connectivity

+"""
+Kickstarter Success Prediction API
+This module serves as the main FastAPI application for the Kickstarter Success Prediction service.
+It provides endpoints for predicting the success probability of Kickstarter campaigns and
+includes the Longformer embedding in the response for further analysis.
+Author: Angus Fung
+Date: April 2025
+"""
 import os
 import json
 import torch
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    """
+    Lifecycle manager for the FastAPI application.
+    This function handles the startup and shutdown of the application,
+    managing resources like model loading and caching directories.
+    Args:
+        app: The FastAPI application instance
+    Yields:
+        None: Control is yielded back to the application while it's running
+    """
     # Load resources on startup
     global model, explainer, processor, device
 @app.get("/")
 async def root():
+    """
+    Root endpoint providing API information.
+    Returns:
+        dict: Basic API information and usage instructions
+    """
     return {
         "message": "Kickstarter Success Prediction API",
         "description": "Send a POST request to /predict with campaign data to get a prediction"
 @app.post("/predict")
 async def predict(request: Request):
+    """
+    Prediction endpoint for Kickstarter campaign success.
+    This endpoint processes campaign data and returns:
+    - Success probability
+    - Predicted outcome (Success/Failure)
+    - SHAP values for feature importance explanation
+    - Longformer embedding of the campaign description
+    Args:
+        request: FastAPI request object containing campaign data as JSON
+    Returns:
+        JSONResponse: Prediction results and explanations
+    Raises:
+        HTTPException: If an error occurs during prediction
+    """
     try:
         # Parse the incoming JSON data
         logger.info("Received prediction request")
         raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}")
 def preprocess_raw_data(campaign_data):
+    """
+    Preprocess raw campaign data using CampaignProcessor.
+    This function transforms raw text and numerical campaign data into
+    the format required by the prediction model, including:
+    - Text embeddings generation for description, blurb, and risks
+    - Logarithmic transformation of monetary values (funding goals, pledged amounts)
+    - Country name standardization (conversion to ISO alpha-2 codes)
+    - Category and country encoding
+    - Extraction and normalization of numerical features
+    Args:
+        campaign_data (dict): Raw campaign data with text and numerical features
+    Returns:
+        dict: Processed data with embeddings and normalized numerical features
+    Raises:
+        Exception: If preprocessing fails
+    """
     try:
         # Process the single campaign
         logger.info("Processing campaign with CampaignProcessor...")
+        # Log country conversion if present
+        if 'raw_country' in campaign_data:
+            country_name = campaign_data.get('raw_country', '')
+            if country_name:
+                logger.info(f"Found country in input data: '{country_name}' (will be converted to ISO alpha-2 code)")
+        # Map field names to the expected structure for the processor
+        # Make a deep copy to avoid modifying the original
+        import copy
+        prepared_data = copy.deepcopy(campaign_data)
+        # Log input values for debugging
+        logger.info(f"Input previous_projects_count: {prepared_data.get('previous_projects_count', 'N/A')}")
+        logger.info(f"Input previous_success_rate: {prepared_data.get('previous_success_rate', 'N/A')}")
+        logger.info(f"Input previous_pledged: {prepared_data.get('previous_pledged', 'N/A')}")
+        logger.info(f"Input previous_funding_goal: {prepared_data.get('previous_funding_goal', 'N/A')}")
+        # Special handling for success rate calculation
+        if 'previous_success_rate' in campaign_data and 'previous_projects_count' in campaign_data:
+            success_rate = float(campaign_data['previous_success_rate'])
+            projects_count = int(campaign_data['previous_projects_count'])
+            # Calculate successful projects from rate and count
+            if projects_count > 0:
+                prepared_data['previous_successful_projects'] = round(success_rate * projects_count)
+                logger.info(f"Calculated previous_successful_projects: {prepared_data['previous_successful_projects']} " +
+                           f"from success rate: {success_rate} and count: {projects_count}")
+        # Now process the prepared data
+        processed_data = processor.process_campaign(prepared_data, idx=0)
+        # SELECTIVE OVERRIDE: Only override non-transformed numeric fields
+        # Fields that should NOT undergo logarithmic transformation
+        non_transformed_fields = [
+            'description_length', 'image_count', 'video_count',
+            'campaign_duration', 'previous_projects_count', 'previous_success_rate'
+        ]
+        # Fields that SHOULD undergo logarithmic transformation
+        transformed_fields = [
+            'funding_goal', 'previous_funding_goal', 'previous_pledged'
+        ]
+        # Override only the non-transformed fields if they exist in input
+        for field in non_transformed_fields:
             if field in campaign_data:
                 processed_data[field] = campaign_data[field]
                 logger.info(f"Using provided value for {field}: {campaign_data[field]}")
+        # For transformed fields, check if the user explicitly wants to bypass transformation
+        for field in transformed_fields:
+            if field in campaign_data and campaign_data.get('bypass_transformation', False):
+                processed_data[field] = campaign_data[field]
+                logger.warning(
+                    f"Bypassing logarithmic transformation for {field} as requested. "
+                    "This may affect model performance."
+                )
+            elif field in campaign_data:
+                # Log that we're keeping the transformed value
+                logger.info(f"Using logarithmically transformed {field} value for better model performance.")
+        # Verify that the previous metrics are set correctly
+        logger.info(f"Final previous_projects_count: {processed_data.get('previous_projects_count', 'N/A')}")
+        logger.info(f"Final previous_success_rate: {processed_data.get('previous_success_rate', 'N/A')}")
+        logger.info(f"Final previous_pledged: {processed_data.get('previous_pledged', 'N/A')}")
+        logger.info(f"Final previous_funding_goal: {processed_data.get('previous_funding_goal', 'N/A')}")
+        logger.info("Preprocessing complete with numerical transformations applied")
         return processed_data
     except Exception as e:
         logger.error(f"Error preprocessing raw data: {str(e)}", exc_info=True)
         raise Exception(f"Error preprocessing raw data: {str(e)}")
 @app.get("/debug")
 async def debug():
+    """
+    Debug endpoint for checking API status and component health.
+    This endpoint provides diagnostic information about the API's status,
+    model loading, connectivity, disk space, and other components.
+    Returns:
+        JSONResponse: Comprehensive diagnostic information
+    """
     global model, explainer, processor, device
     # Check internet connectivity