{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['date', 'target', 'cpim_lag1', 'cpim_lag2', 'cpim_lag3', 'cpih_lag1',\n", " 'cpih_lag2', 'cpih_lag3', 'APC_Finished_Consultant',\n", " 'APC_FCEs_with_a_procedure', 'APC_Percent_FCEs_with_procedure',\n", " 'APC_Ordinary_Episodes', 'APC_Day_Case_Episodes',\n", " 'APC_Day_Case_Episodes_with_proc', 'APC_Percent_Day_Cases_with_proc',\n", " 'APC_Finished_Admission_Episodes', 'APC_Emergency',\n", " 'Outpatient_Total_Appointments', 'Outpatient_Attended_Appointments',\n", " 'Outpatient_Percent_Attended', 'Outpatient_DNA_Appointment',\n", " 'Outpatient_Percent_DNA', 'Outpatient_Follow_Up_Attendance',\n", " 'Outpatient_Attendance_Type_1', 'Outpatient_Attendance_Type_2'],\n", " dtype='object')\n" ] } ], "source": [ "from data_preprocessing import read_cpih, read_hes, get_global_df, get_final_df\n", "cpih_df = read_cpih(\"data/cpih.csv\", medical=False)\n", "cpim_df = read_cpih(\"data/cpih_medical.csv\", medical=True)\n", "hes = read_hes(\"data/HES_M5_OPEN_DATA.csv\")\n", "df = get_global_df(cpih_df, cpim_df, hes)\n", "df = get_final_df(df)\n", "print(df.columns)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "# Ensure the 'date' column is a datetime type and set it as the index\n", "df['date'] = pd.to_datetime(df['date'])\n", "df = df.set_index('date')\n", "\n", "# Ensure the target column (e.g., 'target') is properly defined\n", "target = \"target\" # The column to forecast\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/var/folders/c7/tz9fbdy52kg7nrm3g0wp_tv00000gn/T/ipykernel_11497/3834418672.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpycaret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime_series\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# Initialize the PyCaret time series experiment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m exp = setup(\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Pass the data without the target column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mtarget\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Pass the target column (series)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mfold\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Number of cross-validation folds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pycaret/time_series/forecasting/functional.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(data, data_func, target, index, ignore_features, numeric_imputation_target, numeric_imputation_exogenous, transform_target, transform_exogenous, fe_target_rr, fe_exogenous, scale_target, scale_exogenous, fold_strategy, fold, fh, hyperparameter_split, seasonal_period, ignore_seasonality_test, sp_detection, max_sp_to_consider, remove_harmonics, harmonic_order_method, num_sps_to_use, seasonality_type, point_alpha, coverage, enforce_exogenous, n_jobs, use_gpu, custom_pipeline, html, session_id, system_log, log_experiment, experiment_name, log_plots, log_profile, log_data, verbose, profile, profile_kwargs, fig_kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \"\"\"\n\u001b[1;32m 587\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0mexp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_EXPERIMENT_CLASS\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[0mset_current_experiment\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 590\u001b[0;31m return exp.setup(\n\u001b[0m\u001b[1;32m 591\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 592\u001b[0m \u001b[0mdata_func\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata_func\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 593\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pycaret/time_series/forecasting/oop.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, data, data_func, target, index, ignore_features, numeric_imputation_target, numeric_imputation_exogenous, transform_target, transform_exogenous, scale_target, scale_exogenous, fe_target_rr, fe_exogenous, fold_strategy, fold, fh, hyperparameter_split, seasonal_period, ignore_seasonality_test, sp_detection, max_sp_to_consider, remove_harmonics, harmonic_order_method, num_sps_to_use, seasonality_type, point_alpha, coverage, enforce_exogenous, n_jobs, use_gpu, custom_pipeline, html, session_id, system_log, log_experiment, experiment_name, experiment_custom_tags, log_plots, log_profile, log_data, engine, verbose, profile, profile_kwargs, fig_kwargs)\u001b[0m\n\u001b[1;32m 2104\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2105\u001b[0m )\n\u001b[1;32m 2106\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_check_clean_and_set_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2107\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_check_and_clean_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseasonal_period\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mseasonal_period\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2108\u001b[0;31m \u001b[0;34m.\u001b[0m\u001b[0m_check_and_set_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2109\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_set_exogenous_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2110\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_check_and_set_forecasting_types\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2111\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_check_and_set_fh\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfh\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pycaret/time_series/forecasting/oop.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, target)\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 453\u001b[0m \u001b[0mtarget\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 455\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mtarget\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 456\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Target Column '{target}' is not present in the data.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 458\u001b[0m \u001b[0;31m# Check type of target values - must be numeric ----\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1517\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mfinal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mNoReturn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1519\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 1520\u001b[0m \u001b[0;34mf\"The truth value of a {type(self).__name__} is ambiguous. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1521\u001b[0m \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m )\n", "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." ] } ], "source": [ "from pycaret.time_series import *\n", "\n", "# Initialize the PyCaret time series experiment\n", "exp = setup(\n", " data=df, # Pass the data without the target column\n", " target = df[target], # Pass the target column (series)\n", " fold=5, # Number of cross-validation folds\n", " session_id=42, # For reproducibility\n", " seasonal_period=12, # If you know the seasonal period, set it\n", " fh=24, # Forecast horizon (number of future periods to predict)\n", ")\n", "\n", "# Compare baseline models\n", "best_model = compare_models()\n", "\n", "# Get predictions from the best model\n", "final_model = finalize_model(best_model)\n", "future_forecast = predict_model(final_model, fh=24) # Forecast next 24 periods\n", "print(future_forecast)\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from data_preprocessing import read_cpih, read_hes, get_global_df, get_final_df\n", "cpih_df = read_cpih(\"data/cpih.csv\", medical=False)\n", "cpim_df = read_cpih(\"data/cpih_medical.csv\", medical=True)\n", "hes = read_hes(\"data/HES_M5_OPEN_DATA.csv\")\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datevalue
2172007-02-012.7
2182007-03-012.9
2192007-04-012.7
2202007-05-012.5
2212007-06-012.5
.........
4242024-05-012.8
4252024-06-012.8
4262024-07-013.1
4272024-08-013.1
4282024-09-012.6
\n", "

212 rows × 2 columns

\n", "
" ], "text/plain": [ " date value\n", "217 2007-02-01 2.7\n", "218 2007-03-01 2.9\n", "219 2007-04-01 2.7\n", "220 2007-05-01 2.5\n", "221 2007-06-01 2.5\n", ".. ... ...\n", "424 2024-05-01 2.8\n", "425 2024-06-01 2.8\n", "426 2024-07-01 3.1\n", "427 2024-08-01 3.1\n", "428 2024-09-01 2.6\n", "\n", "[212 rows x 2 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cpih_df[cpih_df[\"date\"] > \"2007-01-01\"]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cpihcpih_medicalAPC_Finished_ConsultantAPC_FCEs_with_a_procedureAPC_Percent_FCEs_with_procedureAPC_Ordinary_EpisodesAPC_Day_Case_EpisodesAPC_Day_Case_Episodes_with_procAPC_Percent_Day_Cases_with_procAPC_Finished_Admission_Episodes...Outpatient_Total_AppointmentsOutpatient_Attended_AppointmentsOutpatient_Percent_AttendedOutpatient_DNA_AppointmentOutpatient_Percent_DNAOutpatient_Follow_Up_AttendanceOutpatient_Attendance_Type_1Outpatient_Attendance_Type_2yearmonth
02.74.811940416504180.548396513543903247170.921045931...509804341609490.804273150.102.3710691232231292170520074
12.54.613071177308270.569060094011083679530.921147940...561432945874020.804690440.102.3103081383498319630720075
22.54.912666547151020.568760093906453599440.921111356...549713844829700.804655440.102.2845551362691311314320076
32.05.113012807321050.568981084031723715850.921141788...574330746574270.804763120.102.2761471419227323036920077
42.05.012746677194570.568842143904533605650.921116986...547579244378880.804594390.102.2568721360489307044920078
..................................................................
2043.06.5185186511029090.6011497977020686555410.931516960...1174963491162870.786656860.062.0552222983520613179620244
2052.86.0191284011328150.5911893237235176744310.931571634...1195552593071370.786829570.062.0267243074579623132420245
2062.86.0180308210601760.5911266246764586286750.931479951...1134519187546740.776510370.062.0021772915717583778220246
2073.16.0192475511150650.5811837777409786822570.921579131...1244734595575500.777145580.062.0337823149970640635320247
2083.15.817797097713290.4311034756762344955600.731461354...1101680884149750.766360810.062.0582462751184566261420248
\n", "

209 rows × 21 columns

\n", "
" ], "text/plain": [ " cpih cpih_medical APC_Finished_Consultant APC_FCEs_with_a_procedure \\\n", "0 2.7 4.8 1194041 650418 \n", "1 2.5 4.6 1307117 730827 \n", "2 2.5 4.9 1266654 715102 \n", "3 2.0 5.1 1301280 732105 \n", "4 2.0 5.0 1274667 719457 \n", ".. ... ... ... ... \n", "204 3.0 6.5 1851865 1102909 \n", "205 2.8 6.0 1912840 1132815 \n", "206 2.8 6.0 1803082 1060176 \n", "207 3.1 6.0 1924755 1115065 \n", "208 3.1 5.8 1779709 771329 \n", "\n", " APC_Percent_FCEs_with_procedure APC_Ordinary_Episodes \\\n", "0 0.54 839651 \n", "1 0.56 906009 \n", "2 0.56 876009 \n", "3 0.56 898108 \n", "4 0.56 884214 \n", ".. ... ... \n", "204 0.60 1149797 \n", "205 0.59 1189323 \n", "206 0.59 1126624 \n", "207 0.58 1183777 \n", "208 0.43 1103475 \n", "\n", " APC_Day_Case_Episodes APC_Day_Case_Episodes_with_proc \\\n", "0 354390 324717 \n", "1 401108 367953 \n", "2 390645 359944 \n", "3 403172 371585 \n", "4 390453 360565 \n", ".. ... ... \n", "204 702068 655541 \n", "205 723517 674431 \n", "206 676458 628675 \n", "207 740978 682257 \n", "208 676234 495560 \n", "\n", " APC_Percent_Day_Cases_with_proc APC_Finished_Admission_Episodes ... \\\n", "0 0.92 1045931 ... \n", "1 0.92 1147940 ... \n", "2 0.92 1111356 ... \n", "3 0.92 1141788 ... \n", "4 0.92 1116986 ... \n", ".. ... ... ... \n", "204 0.93 1516960 ... \n", "205 0.93 1571634 ... \n", "206 0.93 1479951 ... \n", "207 0.92 1579131 ... \n", "208 0.73 1461354 ... \n", "\n", " Outpatient_Total_Appointments Outpatient_Attended_Appointments \\\n", "0 5098043 4160949 \n", "1 5614329 4587402 \n", "2 5497138 4482970 \n", "3 5743307 4657427 \n", "4 5475792 4437888 \n", ".. ... ... \n", "204 11749634 9116287 \n", "205 11955525 9307137 \n", "206 11345191 8754674 \n", "207 12447345 9557550 \n", "208 11016808 8414975 \n", "\n", " Outpatient_Percent_Attended Outpatient_DNA_Appointment \\\n", "0 0.80 427315 \n", "1 0.80 469044 \n", "2 0.80 465544 \n", "3 0.80 476312 \n", "4 0.80 459439 \n", ".. ... ... \n", "204 0.78 665686 \n", "205 0.78 682957 \n", "206 0.77 651037 \n", "207 0.77 714558 \n", "208 0.76 636081 \n", "\n", " Outpatient_Percent_DNA Outpatient_Follow_Up_Attendance \\\n", "0 0.10 2.371069 \n", "1 0.10 2.310308 \n", "2 0.10 2.284555 \n", "3 0.10 2.276147 \n", "4 0.10 2.256872 \n", ".. ... ... \n", "204 0.06 2.055222 \n", "205 0.06 2.026724 \n", "206 0.06 2.002177 \n", "207 0.06 2.033782 \n", "208 0.06 2.058246 \n", "\n", " Outpatient_Attendance_Type_1 Outpatient_Attendance_Type_2 year month \n", "0 1232231 2921705 2007 4 \n", "1 1383498 3196307 2007 5 \n", "2 1362691 3113143 2007 6 \n", "3 1419227 3230369 2007 7 \n", "4 1360489 3070449 2007 8 \n", ".. ... ... ... ... \n", "204 2983520 6131796 2024 4 \n", "205 3074579 6231324 2024 5 \n", "206 2915717 5837782 2024 6 \n", "207 3149970 6406353 2024 7 \n", "208 2751184 5662614 2024 8 \n", "\n", "[209 rows x 21 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = get_global_df(cpih_df, cpim_df, hes)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetargetcpim_lag1cpim_lag2cpim_lag3cpih_lag1cpih_lag2cpih_lag3APC_Finished_ConsultantAPC_FCEs_with_a_procedure...APC_Finished_Admission_EpisodesAPC_EmergencyOutpatient_Total_AppointmentsOutpatient_Attended_AppointmentsOutpatient_Percent_AttendedOutpatient_DNA_AppointmentOutpatient_Percent_DNAOutpatient_Follow_Up_AttendanceOutpatient_Attendance_Type_1Outpatient_Attendance_Type_2
02007-07-015.14.94.64.82.52.52.71266654.0715102.0...1111356.0390411.05497138.04482970.00.80465544.00.102.2845551362691.03113143.0
12007-08-015.05.14.94.62.02.52.51301280.0732105.0...1141788.0397923.05743307.04657427.00.80476312.00.102.2761471419227.03230369.0
22007-09-015.25.05.14.92.02.02.51274667.0719457.0...1116986.0391334.05475792.04437888.00.80459439.00.102.2568721360489.03070449.0
32007-10-014.95.25.05.12.02.02.01225860.0691203.0...1079019.0376896.05438116.04401026.00.80460482.00.102.2454151353967.03040218.0
42007-11-014.74.95.25.02.32.02.01354103.0769356.0...1190762.0410475.06144081.05018526.00.80522083.00.102.2682991533201.03477759.0
..................................................................
2012024-04-016.55.95.96.13.83.84.21855318.01071637.0...1529117.0573401.011176694.08710477.00.78625936.00.062.0768172833517.05876176.0
2022024-05-016.06.55.95.93.03.83.81851865.01102909.0...1516960.0557776.011749634.09116287.00.78665686.00.062.0552222983520.06131796.0
2032024-06-016.06.06.55.92.83.03.81912840.01132815.0...1571634.0575755.011955525.09307137.00.78682957.00.062.0267243074579.06231324.0
2042024-07-016.06.06.06.52.82.83.01803082.01060176.0...1479951.0542409.011345191.08754674.00.77651037.00.062.0021772915717.05837782.0
2052024-08-015.86.06.06.03.12.82.81924755.01115065.0...1579131.0561228.012447345.09557550.00.77714558.00.062.0337823149970.06406353.0
\n", "

206 rows × 25 columns

\n", "
" ], "text/plain": [ " date target cpim_lag1 cpim_lag2 cpim_lag3 cpih_lag1 cpih_lag2 \\\n", "0 2007-07-01 5.1 4.9 4.6 4.8 2.5 2.5 \n", "1 2007-08-01 5.0 5.1 4.9 4.6 2.0 2.5 \n", "2 2007-09-01 5.2 5.0 5.1 4.9 2.0 2.0 \n", "3 2007-10-01 4.9 5.2 5.0 5.1 2.0 2.0 \n", "4 2007-11-01 4.7 4.9 5.2 5.0 2.3 2.0 \n", ".. ... ... ... ... ... ... ... \n", "201 2024-04-01 6.5 5.9 5.9 6.1 3.8 3.8 \n", "202 2024-05-01 6.0 6.5 5.9 5.9 3.0 3.8 \n", "203 2024-06-01 6.0 6.0 6.5 5.9 2.8 3.0 \n", "204 2024-07-01 6.0 6.0 6.0 6.5 2.8 2.8 \n", "205 2024-08-01 5.8 6.0 6.0 6.0 3.1 2.8 \n", "\n", " cpih_lag3 APC_Finished_Consultant APC_FCEs_with_a_procedure ... \\\n", "0 2.7 1266654.0 715102.0 ... \n", "1 2.5 1301280.0 732105.0 ... \n", "2 2.5 1274667.0 719457.0 ... \n", "3 2.0 1225860.0 691203.0 ... \n", "4 2.0 1354103.0 769356.0 ... \n", ".. ... ... ... ... \n", "201 4.2 1855318.0 1071637.0 ... \n", "202 3.8 1851865.0 1102909.0 ... \n", "203 3.8 1912840.0 1132815.0 ... \n", "204 3.0 1803082.0 1060176.0 ... \n", "205 2.8 1924755.0 1115065.0 ... \n", "\n", " APC_Finished_Admission_Episodes APC_Emergency \\\n", "0 1111356.0 390411.0 \n", "1 1141788.0 397923.0 \n", "2 1116986.0 391334.0 \n", "3 1079019.0 376896.0 \n", "4 1190762.0 410475.0 \n", ".. ... ... \n", "201 1529117.0 573401.0 \n", "202 1516960.0 557776.0 \n", "203 1571634.0 575755.0 \n", "204 1479951.0 542409.0 \n", "205 1579131.0 561228.0 \n", "\n", " Outpatient_Total_Appointments Outpatient_Attended_Appointments \\\n", "0 5497138.0 4482970.0 \n", "1 5743307.0 4657427.0 \n", "2 5475792.0 4437888.0 \n", "3 5438116.0 4401026.0 \n", "4 6144081.0 5018526.0 \n", ".. ... ... \n", "201 11176694.0 8710477.0 \n", "202 11749634.0 9116287.0 \n", "203 11955525.0 9307137.0 \n", "204 11345191.0 8754674.0 \n", "205 12447345.0 9557550.0 \n", "\n", " Outpatient_Percent_Attended Outpatient_DNA_Appointment \\\n", "0 0.80 465544.0 \n", "1 0.80 476312.0 \n", "2 0.80 459439.0 \n", "3 0.80 460482.0 \n", "4 0.80 522083.0 \n", ".. ... ... \n", "201 0.78 625936.0 \n", "202 0.78 665686.0 \n", "203 0.78 682957.0 \n", "204 0.77 651037.0 \n", "205 0.77 714558.0 \n", "\n", " Outpatient_Percent_DNA Outpatient_Follow_Up_Attendance \\\n", "0 0.10 2.284555 \n", "1 0.10 2.276147 \n", "2 0.10 2.256872 \n", "3 0.10 2.245415 \n", "4 0.10 2.268299 \n", ".. ... ... \n", "201 0.06 2.076817 \n", "202 0.06 2.055222 \n", "203 0.06 2.026724 \n", "204 0.06 2.002177 \n", "205 0.06 2.033782 \n", "\n", " Outpatient_Attendance_Type_1 Outpatient_Attendance_Type_2 \n", "0 1362691.0 3113143.0 \n", "1 1419227.0 3230369.0 \n", "2 1360489.0 3070449.0 \n", "3 1353967.0 3040218.0 \n", "4 1533201.0 3477759.0 \n", ".. ... ... \n", "201 2833517.0 5876176.0 \n", "202 2983520.0 6131796.0 \n", "203 3074579.0 6231324.0 \n", "204 2915717.0 5837782.0 \n", "205 3149970.0 6406353.0 \n", "\n", "[206 rows x 25 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "df = get_final_df(df)\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 2 }