Spaces:

ssyok
/

AI-Ghibli-Image-Virality-Predictor

Sleeping

App Files Files Community

ssyok commited on 19 days ago

Commit

713c6ae

1 Parent(s): 9803403

change logic to predict like and share. Before that my like is a feature which is not correct

Browse files

Files changed (22) hide show

Data_Analytics_SHE_Course_Group_Assignment_Machine_Learning.ipynb +0 -0
app.py +211 -137
models/{gradient_boosting.joblib → gradient_boosting_likes.joblib} +2 -2
models/{random_forest.joblib → gradient_boosting_shares.joblib} +2 -2
models/lasso_regression.joblib +0 -0
models/lasso_regression_likes.joblib +3 -0
models/lasso_regression_shares.joblib +3 -0
models/linear_regression.joblib +0 -0
models/linear_regression_likes.joblib +3 -0
models/linear_regression_shares.joblib +3 -0
models/random_forest_likes.joblib +3 -0
models/random_forest_shares.joblib +3 -0
models/ridge_regression.joblib +0 -0
models/ridge_regression_likes.joblib +3 -0
models/ridge_regression_shares.joblib +3 -0
models/scaler.joblib +0 -0
results/likes_model_comparison.csv +6 -0
results/model_comparison.csv +0 -6
results/regression_analysis_results.json +0 -83
results/shares_model_comparison.csv +6 -0
results/virality_analysis_results.json +81 -0
results/virality_summary_report.txt +24 -0

Data_Analytics_SHE_Course_Group_Assignment_Machine_Learning.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -1,72 +1,35 @@
 import gradio as gr
 import pandas as pd
 import numpy as np
 import joblib
 import os
-# ==============================================================================
-# 1. LOAD MODELS AND SCALER (This part runs once when the script starts)
-# ==============================================================================
-# Dictionary to hold the loaded model objects and a list of their names
-all_models = {}
-model_names = [
-    'Linear Regression', 'Ridge Regression', 'Lasso Regression',
-    'Random Forest', 'Gradient Boosting'
-]
-BEST_MODEL_NAME = 'Random Forest' # Define the best model to be highlighted
-try:
-    # Load all the regression models
-    for name in model_names:
-        # Construct the filename, e.g., 'models/random_forest.joblib'
-        filename = f"models/{name.lower().replace(' ', '_')}.joblib"
-        if os.path.exists(filename):
-            all_models[name] = joblib.load(filename)
-        else:
-            raise FileNotFoundError(f"Model file not found: {filename}")
-    # Load the scaler
-    scaler_path = 'models/scaler.joblib'
-    if os.path.exists(scaler_path):
-        scaler = joblib.load(scaler_path)
-    else:
-        raise FileNotFoundError(f"Scaler file not found: {scaler_path}")
-    models_loaded = True
-    print("✅ All models and scaler loaded successfully!")
-    # Get the feature names the model was trained on from the scaler
-    expected_columns = scaler.feature_names_in_
-    print(f"Models expect {len(expected_columns)} features.")
-except Exception as e:
-    print(f"❌ ERROR: Could not load models. {e}")
-    print("Please ensure all '.joblib' files are in the 'models/' directory.")
-    models_loaded = False
-    all_models = {}
-    scaler = None
-    expected_columns = []
-# ==============================================================================
-# 2. PREDICTION FUNCTION
-# ==============================================================================
-def predict_shares_all_models(likes, generation_time, gpu_usage, file_size_kb,
-                              width, height, style_accuracy_score,
-                              is_hand_edited, ethical_concerns_flag,
-                              day_of_week, month, hour, platform):
     """
-    Performs feature engineering, predicts shares using all loaded models,
-    and returns formatted outputs for the Gradio interface.
     """
-    if not models_loaded:
-        error_message = "Models are not loaded. Please check the console for errors."
-        return 0, error_message, error_message
-    # --- Step A: Perform feature engineering ---
     sample_data = {
-        'likes': likes,
         'style_accuracy_score': style_accuracy_score,
         'generation_time': generation_time,
         'gpu_usage': gpu_usage,
@@ -79,125 +42,236 @@ def predict_shares_all_models(likes, generation_time, gpu_usage, file_size_kb,
         'month': month,
         'hour': hour
     }
     sample_data['aspect_ratio'] = width / height if height > 0 else 0
     sample_data['total_pixels'] = width * height
     sample_data['is_square'] = int(width == height)
     sample_data['is_weekend'] = int(day_of_week >= 5)
     for p in ['Twitter', 'TikTok', 'Reddit', 'Instagram']:
         sample_data[f'platform_{p}'] = 1 if platform == p else 0
-    sample_data['engagement_rate'] = likes / (sample_data['total_pixels'] / 1000000 + 1)
-    sample_data['quality_engagement'] = style_accuracy_score * likes / 100
     sample_data['file_density'] = file_size_kb / (sample_data['total_pixels'] / 1000 + 1)
     sample_data['gpu_efficiency'] = generation_time / (gpu_usage + 1)
-    for p in ['Twitter', 'TikTok', 'Reddit', 'Instagram']:
-         sample_data[f'{p.lower()}_likes'] = likes * sample_data[f'platform_{p}']
     sample_data['month_sin'] = np.sin(2 * np.pi * month / 12)
     sample_data['month_cos'] = np.cos(2 * np.pi * month / 12)
     sample_data['day_sin'] = np.sin(2 * np.pi * day_of_week / 7)
     sample_data['day_cos'] = np.cos(2 * np.pi * day_of_week / 7)
-    # --- Step B: Align columns and Scale ---
     sample_df = pd.DataFrame([sample_data])
     sample_df = sample_df.reindex(columns=expected_columns, fill_value=0)
-    sample_scaled = scaler.transform(sample_df)
-    # --- Step C: Predict with all models ---
-    predictions = {}
-    for name, model in all_models.items():
-        pred_value = model.predict(sample_scaled)[0]
-        predictions[name] = max(0, int(pred_value))
-    # --- Step D: Format the outputs for Gradio ---
-    # 1. Get the single best model prediction
-    best_model_prediction = predictions.get(BEST_MODEL_NAME, 0)
-    # 2. Create a Markdown table for all model predictions
-    all_results_df = pd.DataFrame(list(predictions.items()), columns=['Model', 'Predicted Shares'])
-    all_results_df = all_results_df.sort_values('Predicted Shares', ascending=False)
-    all_models_table = all_results_df.to_markdown(index=False)
-    # 3. Create a Markdown table for the engineered features
-    features_df = sample_df.T.reset_index()
-    features_df.columns = ['Feature', 'Value']
-    features_df['Value'] = features_df['Value'].apply(lambda x: f"{x:.4f}" if isinstance(x, float) else x)
-    features_table = features_df.to_markdown(index=False)
-    return best_model_prediction, all_models_table, features_table
-# ==============================================================================
-# 3. GRADIO INTERFACE
-# ==============================================================================
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Image Virality Predictor") as demo:
     gr.Markdown("# 🎨 AI Ghibli Image Virality Predictor")
-    gr.Markdown("Enter image features to get a virality prediction from multiple regression models.")
     with gr.Row():
-        # --- INPUTS COLUMN ---
         with gr.Column(scale=2):
-            gr.Markdown("### 1. Input Features")
-            with gr.Accordion("Core Engagement & Image Metrics", open=True):
-                likes = gr.Slider(minimum=0, maximum=10000, value=500, step=10, label="Likes")
-                style_accuracy_score = gr.Slider(minimum=0, maximum=100, value=85, step=1, label="Style Accuracy Score (%)")
-                width = gr.Slider(minimum=256, maximum=2048, value=1024, step=64, label="Width (px)")
-                height = gr.Slider(minimum=256, maximum=2048, value=1024, step=64, label="Height (px)")
-                file_size_kb = gr.Slider(minimum=100, maximum=5000, value=1500, step=100, label="File Size (KB)")
-            with gr.Accordion("Technical & Posting Details", open=True):
-                generation_time = gr.Slider(minimum=1, maximum=30, value=8, step=0.5, label="Generation Time (s)")
-                gpu_usage = gr.Slider(minimum=10, maximum=100, value=70, step=5, label="GPU Usage (%)")
-                platform = gr.Radio(["Instagram", "Twitter", "TikTok", "Reddit"], label="Platform", value="Instagram")
-                day_of_week = gr.Slider(minimum=0, maximum=6, value=4, step=1, label="Day of Week (0=Mon, 6=Sun)")
-                month = gr.Slider(minimum=1, maximum=12, value=7, step=1, label="Month (1-12)")
-                hour = gr.Slider(minimum=0, maximum=23, value=18, step=1, label="Hour of Day (0-23)")
-                is_hand_edited = gr.Checkbox(label="Was it Hand Edited?", value=False)
-                ethical_concerns_flag = gr.Checkbox(label="Any Ethical Concerns?", value=False)
-            predict_btn = gr.Button("Predict Virality", variant="primary")
-        # --- OUTPUTS COLUMN ---
         with gr.Column(scale=3):
-            gr.Markdown("### 2. Prediction Results")
-            # Highlighted Best Model Output
-            best_model_output = gr.Number(
-                label=f"🏆 Best Model Prediction ({BEST_MODEL_NAME})",
-                interactive=False
-            )
-            # Table for All Model Predictions
-            with gr.Accordion("Comparison of All Models", open=True):
-                all_models_output = gr.Markdown(label="All Model Predictions")
-            # Table for Feature Engineering Details
-            with gr.Accordion("View Engineered Features", open=False):
-                features_output = gr.Markdown(label="Feature Engineering Details")
     # Connect the button to the function
     predict_btn.click(
-        fn=predict_shares_all_models,
         inputs=[
-            likes, generation_time, gpu_usage, file_size_kb,
             width, height, style_accuracy_score,
             is_hand_edited, ethical_concerns_flag,
             day_of_week, month, hour, platform
         ],
         outputs=[
-            best_model_output,
-            all_models_output,
-            features_output
         ]
     )
 # Launch the app
 if __name__ == "__main__":
     if not models_loaded:
         print("\nCannot launch Gradio app because models failed to load.")
     else:
-        demo.launch()

+# Gradio Demo App for Predicting Both Likes and Shares
 import gradio as gr
 import pandas as pd
 import numpy as np
 import joblib
 import os
+# Best models (update based on your results)
+BEST_MODEL_LIKES = 'Random Forest'
+BEST_MODEL_SHARES = 'Random Forest'
+models_loaded = False
+# Global variables for models and scaler
+all_models_likes = {}
+all_models_shares = {}
+model_names = []
+scaler = None
+expected_columns = []
+# Prediction Function for Both Likes and Shares
+def predict_virality_all_models(generation_time, gpu_usage, file_size_kb,
+                               width, height, style_accuracy_score,
+                               is_hand_edited, ethical_concerns_flag,
+                               day_of_week, month, hour, platform):
     """
+    Predicts both likes and shares using all loaded models.
     """
+    global all_models_likes, all_models_shares, model_names, scaler, expected_columns
+    # Create feature dictionary (WITHOUT likes)
     sample_data = {
         'style_accuracy_score': style_accuracy_score,
         'generation_time': generation_time,
         'gpu_usage': gpu_usage,
         'month': month,
         'hour': hour
     }
+    # Perform feature engineering
     sample_data['aspect_ratio'] = width / height if height > 0 else 0
     sample_data['total_pixels'] = width * height
     sample_data['is_square'] = int(width == height)
     sample_data['is_weekend'] = int(day_of_week >= 5)
+    # One-hot encode platform
     for p in ['Twitter', 'TikTok', 'Reddit', 'Instagram']:
         sample_data[f'platform_{p}'] = 1 if platform == p else 0
+    # Technical features
     sample_data['file_density'] = file_size_kb / (sample_data['total_pixels'] / 1000 + 1)
     sample_data['gpu_efficiency'] = generation_time / (gpu_usage + 1)
+    # Temporal cyclical features (continued)
     sample_data['month_sin'] = np.sin(2 * np.pi * month / 12)
     sample_data['month_cos'] = np.cos(2 * np.pi * month / 12)
     sample_data['day_sin'] = np.sin(2 * np.pi * day_of_week / 7)
     sample_data['day_cos'] = np.cos(2 * np.pi * day_of_week / 7)
+    sample_data['hour_sin'] = np.sin(2 * np.pi * hour / 24)
+    sample_data['hour_cos'] = np.cos(2 * np.pi * hour / 24)
+    # Create DataFrame and align columns
     sample_df = pd.DataFrame([sample_data])
     sample_df = sample_df.reindex(columns=expected_columns, fill_value=0)
+    # Scale features
+    try:
+        sample_scaled = scaler.transform(sample_df)
+    except Exception as e:
+        return {}, {}, f"Error during scaling: {e}"
+    # Predict with all models
+    predictions_likes = {}
+    predictions_shares = {}
+    for name in model_names:
+        # Predict likes
+        if name in all_models_likes:
+            pred_likes = all_models_likes[name].predict(sample_scaled)[0]
+            predictions_likes[name] = max(0, int(pred_likes))
+        # Predict shares
+        if name in all_models_shares:
+            pred_shares = all_models_shares[name].predict(sample_scaled)[0]
+            predictions_shares[name] = max(0, int(pred_shares))
+    return predictions_likes, predictions_shares, None
+def load_models():
+    # Load Models for Both Likes and Shares
+    global all_models_likes, all_models_shares, model_names, scaler, expected_columns, models_loaded
+    # Dictionaries to hold the loaded model objects
+    all_models_likes = {}
+    all_models_shares = {}
+    model_names = [
+        'Linear Regression', 'Ridge Regression', 'Lasso Regression',
+        'Random Forest', 'Gradient Boosting'
+    ]
+    try:
+        # Load all the regression models for both targets
+        for name in model_names:
+            # Load likes model
+            filename_likes = os.path.join("models", f"{name.lower().replace(' ', '_')}_likes.joblib")
+            all_models_likes[name] = joblib.load(filename_likes)
+            # Load shares model
+            filename_shares = os.path.join("models", f"{name.lower().replace(' ', '_')}_shares.joblib")
+            all_models_shares[name] = joblib.load(filename_shares)
+            print(f"Loaded: {name} (both likes and shares)")
+        # Load the scaler
+        scaler = joblib.load(os.path.join("models", "scaler.joblib"))
+        print("Loaded: scaler.joblib")
+        # Get the feature names
+        expected_columns = scaler.feature_names_in_
+        print(f"Model expects {len(expected_columns)} features.")
+        models_loaded = True
+        print("\n✅ All models and scaler loaded successfully!")
+    except FileNotFoundError as e:
+        print(f"\n❌ ERROR: Could not find a model file: {e}")
+        print("Please make sure all '.joblib' files are in the 'models/' directory.")
+        models_loaded = False
+def predict_virality_gradio(generation_time, gpu_usage, file_size_kb,
+                           width, height, style_accuracy_score,
+                           is_hand_edited, ethical_concerns_flag,
+                           day_of_week, month, hour, platform):
+    """
+    Gradio wrapper for the prediction function.
+    Returns formatted outputs for both likes and shares.
+    """
+    if not models_loaded:
+        error_msg = "Models are not loaded. Please check the console for errors."
+        return 0, 0, error_msg, error_msg, error_msg
+    # Get predictions
+    likes_preds, shares_preds, error = predict_virality_all_models(
+        generation_time, gpu_usage, file_size_kb,
+        width, height, style_accuracy_score,
+        is_hand_edited, ethical_concerns_flag,
+        day_of_week, month, hour, platform
+    )
+    if error:
+        return 0, 0, error, error, error
+    # Get best model predictions
+    best_likes = likes_preds.get(BEST_MODEL_LIKES, 0)
+    best_shares = shares_preds.get(BEST_MODEL_SHARES, 0)
+    # Create comparison tables
+    likes_df = pd.DataFrame(list(likes_preds.items()), columns=['Model', 'Predicted Likes'])
+    likes_df = likes_df.sort_values('Predicted Likes', ascending=False)
+    likes_table = likes_df.to_markdown(index=False)
+    shares_df = pd.DataFrame(list(shares_preds.items()), columns=['Model', 'Predicted Shares'])
+    shares_df = shares_df.sort_values('Predicted Shares', ascending=False)
+    shares_table = shares_df.to_markdown(index=False)
+    # Create summary statistics
+    summary = f"""
+    ### Prediction Summary
+    **Average Predictions Across All Models:**
+    - Likes: {np.mean(list(likes_preds.values())):.0f}
+    - Shares: {np.mean(list(shares_preds.values())):.0f}
+    """
+    return best_likes, best_shares, likes_table, shares_table, summary
+# Create Gradio interface
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Image Virality Predictor") as demo:
     gr.Markdown("# 🎨 AI Ghibli Image Virality Predictor")
+    gr.Markdown("Predict both **Likes** and **Shares** for your AI-generated Ghibli-style images!")
     with gr.Row():
+        # Input Column
         with gr.Column(scale=2):
+            gr.Markdown("### 📝 Input Features")
+            with gr.Accordion("Image Properties", open=True):
+                width = gr.Slider(minimum=256, maximum=2048, value=1024, step=64,
+                                 label="Width (px)")
+                height = gr.Slider(minimum=256, maximum=2048, value=1024, step=64,
+                                  label="Height (px)")
+                file_size_kb = gr.Slider(minimum=100, maximum=5000, value=1500, step=100,
+                                        label="File Size (KB)")
+                style_accuracy_score = gr.Slider(minimum=0, maximum=100, value=85, step=1,
+                                               label="Style Accuracy Score (%)")
+            with gr.Accordion("Technical Details", open=True):
+                generation_time = gr.Slider(minimum=1, maximum=30, value=8, step=0.5,
+                                          label="Generation Time (seconds)")
+                gpu_usage = gr.Slider(minimum=10, maximum=100, value=70, step=5,
+                                     label="GPU Usage (%)")
+                is_hand_edited = gr.Checkbox(label="Hand Edited?", value=False)
+                ethical_concerns_flag = gr.Checkbox(label="Ethical Concerns?", value=False)
+            with gr.Accordion("Posting Details", open=True):
+                platform = gr.Radio(["Instagram", "Twitter", "TikTok", "Reddit"],
+                                   label="Platform", value="Instagram")
+                day_of_week = gr.Slider(minimum=0, maximum=6, value=4, step=1,
+                                       label="Day of Week (0=Mon, 6=Sun)")
+                month = gr.Slider(minimum=1, maximum=12, value=7, step=1,
+                                 label="Month (1-12)")
+                hour = gr.Slider(minimum=0, maximum=23, value=18, step=1,
+                                label="Hour of Day (0-23)")
+            predict_btn = gr.Button("🚀 Predict Virality", variant="primary", size="lg")
+        # Output Column
         with gr.Column(scale=3):
+            gr.Markdown("### 📊 Prediction Results")
+            # Main predictions
+            with gr.Row():
+                best_likes_output = gr.Number(
+                    label=f"❤️ Predicted Likes ({BEST_MODEL_LIKES})",
+                    interactive=False
+                )
+                best_shares_output = gr.Number(
+                    label=f"🔄 Predicted Shares ({BEST_MODEL_SHARES})",
+                    interactive=False
+                )
+            # Summary
+            summary_output = gr.Markdown(label="Summary")
+            # Detailed predictions (continued)
+            with gr.Row():
+                with gr.Accordion("All Models - Likes", open=False):
+                    likes_table_output = gr.Markdown(label="Likes Predictions")
+                with gr.Accordion("All Models - Shares", open=False):
+                    shares_table_output = gr.Markdown(label="Shares Predictions")
     # Connect the button to the function
     predict_btn.click(
+        fn=predict_virality_gradio,
         inputs=[
+            generation_time, gpu_usage, file_size_kb,
             width, height, style_accuracy_score,
             is_hand_edited, ethical_concerns_flag,
             day_of_week, month, hour, platform
         ],
         outputs=[
+            best_likes_output,
+            best_shares_output,
+            likes_table_output,
+            shares_table_output,
+            summary_output
         ]
     )
 # Launch the app
 if __name__ == "__main__":
+    load_models()
     if not models_loaded:
         print("\nCannot launch Gradio app because models failed to load.")
     else:
+        demo.launch(
+            # share=True,
+            # debug=True
+        )

models/{gradient_boosting.joblib → gradient_boosting_likes.joblib} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4504743d4614b446377f47be3bbd48450c338a03720dd92bb7ad50e5c43fbec9
-size 349128

 version https://git-lfs.github.com/spec/v1
+oid sha256:a90fbcfd2603f5d9d7737dccf118fbe0340a7f18561a2e33429e16b1b10804c5
+size 382392

models/{random_forest.joblib → gradient_boosting_shares.joblib} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17bdbcb259ca107357017da4d5dfa698d046c4029856b33c79234c1b7467eee2
-size 1989649

 version https://git-lfs.github.com/spec/v1
+oid sha256:14c9e4969342ad4f929267d938f9ed1d15072a067effb231a37e25fe317469ba
+size 369144

models/lasso_regression.joblib DELETED Viewed

Binary file (864 Bytes)

models/lasso_regression_likes.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf7af80e4d8afc3ef86c7917fb6edd207bd84f97d9b57c634904e9b0761c0247
+size 848

models/lasso_regression_shares.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a025a3dd6b2d9f4faa5484294f697dc12b10dc61b4c2d8d37ce85b4050f64ed
+size 848

models/linear_regression.joblib DELETED Viewed

Binary file (1.03 kB)

models/linear_regression_likes.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0ead3f5960d70f01ca3660610ca29863c02cc47f3a4a527f7a026bfec71243c
+size 993

models/linear_regression_shares.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7ce5eaa9cf59e8e851ef5c3a763d7546ba7234fd8dc57de5bc419003d878bbe
+size 993

models/random_forest_likes.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:161deab58e5ccd2cf240526943849d165ba4344058d140c3559aea0ee1ffe3b8
+size 2197297

models/random_forest_shares.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b9de27b910354f61fa63f53f59af4a3d417d26f52d2542c09bad489b7729f6
+size 2189073

models/ridge_regression.joblib DELETED Viewed

Binary file (785 Bytes)

models/ridge_regression_likes.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04682fcede103811ae2e8225c49845fc89ea436816d27cd655e3397c283a54ab
+size 769

models/ridge_regression_shares.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68242a89f0a3b33a9409b856dc3ccc6720b9c11cbb7a19ef305e3c5691e2424d
+size 769

models/scaler.joblib CHANGED Viewed

Binary files a/models/scaler.joblib and b/models/scaler.joblib differ

results/likes_model_comparison.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+Model,R² Score,MAE,RMSE
+Random Forest,-0.039935266835291694,1315.402911707407,1509.824436400549
+Ridge Regression,-0.06857728183915435,1367.631134689801,1530.475089435485
+Lasso Regression,-0.09319254230326957,1374.4757984522898,1548.0023905741184
+Linear Regression,-0.10245590353474165,1377.3233648731018,1554.547191830137
+Gradient Boosting,-0.2384770542071457,1414.7558634489615,1647.6587884958733

results/model_comparison.csv DELETED Viewed

@@ -1,6 +0,0 @@
-Model,R² Score,MAE,RMSE
-Random Forest,-0.08503617106593597,518.7684998150959,593.7506113831865
-Ridge Regression,-0.08676700700361528,528.5892908496174,594.223994396894
-Lasso Regression,-0.08764565917116918,528.573248905534,594.4641611976449
-Linear Regression,-0.09914175984109797,531.4132783380423,597.5975604762958
-Gradient Boosting,-0.2357883805937282,537.630957194942,633.6566769562837

results/regression_analysis_results.json DELETED Viewed

@@ -1,83 +0,0 @@
-{
-  "dataset_info": {
-    "total_samples": 500,
-    "features": 29,
-    "target_mean": 1040.182,
-    "target_median": 1092.0,
-    "target_std": 562.6687383302794
-  },
-  "model_comparison": [
-    {
-      "Model": "Random Forest",
-      "R\u00b2 Score": -0.08503617106593597,
-      "MAE": 518.7684998150959,
-      "RMSE": 593.7506113831865
-    },
-    {
-      "Model": "Ridge Regression",
-      "R\u00b2 Score": -0.08676700700361528,
-      "MAE": 528.5892908496174,
-      "RMSE": 594.223994396894
-    },
-    {
-      "Model": "Lasso Regression",
-      "R\u00b2 Score": -0.08764565917116918,
-      "MAE": 528.573248905534,
-      "RMSE": 594.4641611976449
-    },
-    {
-      "Model": "Linear Regression",
-      "R\u00b2 Score": -0.09914175984109797,
-      "MAE": 531.4132783380423,
-      "RMSE": 597.5975604762958
-    },
-    {
-      "Model": "Gradient Boosting",
-      "R\u00b2 Score": -0.2357883805937282,
-      "MAE": 537.630957194942,
-      "RMSE": 633.6566769562837
-    }
-  ],
-  "feature_correlations": [
-    {
-      "feature": "platform_Twitter",
-      "correlation": -0.11310486794074195
-    },
-    {
-      "feature": "platform_Instagram",
-      "correlation": 0.07096989443791954
-    },
-    {
-      "feature": "total_pixels",
-      "correlation": 0.05340067376167711
-    },
-    {
-      "feature": "width",
-      "correlation": 0.050954190148673084
-    },
-    {
-      "feature": "height",
-      "correlation": 0.050954190148673084
-    },
-    {
-      "feature": "platform_Reddit",
-      "correlation": 0.030824709708669493
-    },
-    {
-      "feature": "likes",
-      "correlation": -0.029318071149881914
-    },
-    {
-      "feature": "is_hand_edited",
-      "correlation": 0.02824023580536551
-    },
-    {
-      "feature": "day_of_week",
-      "correlation": 0.02490306263783807
-    },
-    {
-      "feature": "file_size_kb",
-      "correlation": -0.020748477243945303
-    }
-  ]
-}

results/shares_model_comparison.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+Model,R² Score,MAE,RMSE
+Ridge Regression,-0.07838647409138644,526.23107028139,591.928400457892
+Lasso Regression,-0.07887883281663877,526.2428483005319,592.0635133610481
+Random Forest,-0.08251842386606412,518.1355614972136,593.0613338289508
+Linear Regression,-0.0852709784249106,527.3942652983482,593.8148532374418
+Gradient Boosting,-0.22974105654409227,529.9244912155209,632.104377749423

results/virality_analysis_results.json ADDED Viewed

	@@ -0,0 +1,81 @@

+{
+  "dataset_info": {
+    "total_samples": 500,
+    "features": 27,
+    "likes_mean": 2601.262,
+    "likes_median": 2566.5,
+    "likes_std": 1429.4334981408595,
+    "shares_mean": 1040.182,
+    "shares_median": 1092.0,
+    "shares_std": 562.6687383302794,
+    "likes_shares_correlation": -0.029318071149881914
+  },
+  "likes_model_comparison": [
+    {
+      "Model": "Random Forest",
+      "R\u00b2 Score": -0.039935266835291694,
+      "MAE": 1315.402911707407,
+      "RMSE": 1509.824436400549
+    },
+    {
+      "Model": "Ridge Regression",
+      "R\u00b2 Score": -0.06857728183915435,
+      "MAE": 1367.631134689801,
+      "RMSE": 1530.475089435485
+    },
+    {
+      "Model": "Lasso Regression",
+      "R\u00b2 Score": -0.09319254230326957,
+      "MAE": 1374.4757984522898,
+      "RMSE": 1548.0023905741184
+    },
+    {
+      "Model": "Linear Regression",
+      "R\u00b2 Score": -0.10245590353474165,
+      "MAE": 1377.3233648731018,
+      "RMSE": 1554.547191830137
+    },
+    {
+      "Model": "Gradient Boosting",
+      "R\u00b2 Score": -0.2384770542071457,
+      "MAE": 1414.7558634489615,
+      "RMSE": 1647.6587884958733
+    }
+  ],
+  "shares_model_comparison": [
+    {
+      "Model": "Ridge Regression",
+      "R\u00b2 Score": -0.07838647409138644,
+      "MAE": 526.23107028139,
+      "RMSE": 591.928400457892
+    },
+    {
+      "Model": "Lasso Regression",
+      "R\u00b2 Score": -0.07887883281663877,
+      "MAE": 526.2428483005319,
+      "RMSE": 592.0635133610481
+    },
+    {
+      "Model": "Random Forest",
+      "R\u00b2 Score": -0.08251842386606412,
+      "MAE": 518.1355614972136,
+      "RMSE": 593.0613338289508
+    },
+    {
+      "Model": "Linear Regression",
+      "R\u00b2 Score": -0.0852709784249106,
+      "MAE": 527.3942652983482,
+      "RMSE": 593.8148532374418
+    },
+    {
+      "Model": "Gradient Boosting",
+      "R\u00b2 Score": -0.22974105654409227,
+      "MAE": 529.9244912155209,
+      "RMSE": 632.104377749423
+    }
+  ],
+  "best_models": {
+    "likes": "Random Forest",
+    "shares": "Ridge Regression"
+  }
+}

results/virality_summary_report.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+VIRALITY PREDICTION MODEL SUMMARY
+================================
+Dataset Overview:
+- Total samples: 500
+- Features used: 27
+- Correlation between Likes and Shares: -0.029
+Best Models:
+- Likes Prediction: Random Forest (R² = -0.040)
+- Shares Prediction: Ridge Regression (R² = -0.078)
+Key Insights:
+1. Both likes and shares show similar patterns in terms of model performance
+2. Tree-based models (Random Forest, Gradient Boosting) tend to perform better
+3. Technical features (generation time, GPU usage) and temporal features are important predictors
+4. Platform-specific patterns exist and should be considered for optimization
+Recommendations:
+1. Use separate models for likes and shares predictions
+2. Consider ensemble methods for improved accuracy
+3. Regular retraining with new data is recommended
+4. Monitor feature importance to understand changing virality patterns