diff --git "a/model.ipynb" "b/model.ipynb" --- "a/model.ipynb" +++ "b/model.ipynb" @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 470, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 471, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -145,7 +145,7 @@ "4 NaN Yes " ] }, - "execution_count": 471, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 472, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -188,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 473, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -282,7 +282,7 @@ "max 89.000000 6.000000 14.000000" ] }, - "execution_count": 473, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -293,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 474, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -304,7 +304,7 @@ " dtype='object')" ] }, - "execution_count": 474, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -316,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 475, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -325,7 +325,7 @@ "" ] }, - "execution_count": 475, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, @@ -353,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 476, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -544,7 +544,7 @@ "[1964 rows x 8 columns]" ] }, - "execution_count": 476, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -562,7 +562,7 @@ }, { "cell_type": "code", - "execution_count": 477, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -579,7 +579,7 @@ "dtype: int64" ] }, - "execution_count": 477, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -599,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 478, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -630,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 479, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -740,7 +740,7 @@ "4 7.528571 Yes " ] }, - "execution_count": 479, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -751,7 +751,7 @@ }, { "cell_type": "code", - "execution_count": 480, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -850,7 +850,7 @@ }, { "cell_type": "code", - "execution_count": 481, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -960,7 +960,7 @@ "4 7.528571 Yes " ] }, - "execution_count": 481, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -971,7 +971,7 @@ }, { "cell_type": "code", - "execution_count": 482, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -980,7 +980,7 @@ "\" \\n# target encoding due to many classes\\nencoder = TargetEncoder(cols=['Disease Type'], smoothing=0.3)\\ndf['DiseaseTypeEncoded'] = encoder.fit_transform(df['Disease Type'], df['Recovered'])\\n\\nencoder = TargetEncoder(cols=['Treatment Type'], smoothing=0.3)\\ndf['TreatmentTypeEncoded'] = encoder.fit_transform(df['Treatment Type'], df['Recovered']) \"" ] }, - "execution_count": 482, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1010,7 +1010,7 @@ }, { "cell_type": "code", - "execution_count": 483, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -1020,7 +1020,7 @@ }, { "cell_type": "code", - "execution_count": 484, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1192,7 +1192,7 @@ "4 0 1 0 " ] }, - "execution_count": 484, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1204,7 +1204,7 @@ }, { "cell_type": "code", - "execution_count": 485, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -1214,7 +1214,7 @@ }, { "cell_type": "code", - "execution_count": 505, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -1229,7 +1229,7 @@ }, { "cell_type": "code", - "execution_count": 506, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1389,7 +1389,7 @@ "4 0 1 0 " ] }, - "execution_count": 506, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1407,7 +1407,7 @@ }, { "cell_type": "code", - "execution_count": 507, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1416,7 +1416,7 @@ "((5200, 13), (5200,))" ] }, - "execution_count": 507, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1435,13 +1435,13 @@ }, { "cell_type": "code", - "execution_count": 509, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
LogisticRegression(solver='saga')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
LogisticRegression(solver='saga')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LogisticRegression(solver='saga')" ] }, - "execution_count": 509, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1898,7 +1898,7 @@ }, { "cell_type": "code", - "execution_count": 510, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -1947,7 +1947,7 @@ }, { "cell_type": "code", - "execution_count": 511, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1974,7 +1974,7 @@ }, { "cell_type": "code", - "execution_count": 512, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -2001,7 +2001,7 @@ }, { "cell_type": "code", - "execution_count": 513, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -2028,7 +2028,7 @@ }, { "cell_type": "code", - "execution_count": 514, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -2040,7 +2040,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -2055,7 +2055,7 @@ }, { "cell_type": "code", - "execution_count": 515, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -2082,20 +2082,231 @@ }, { "cell_type": "code", - "execution_count": 520, + "execution_count": 28, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeHospital VisitsLung CapacityGenderSmoking StatusRecoveredDisease Type_AsthmaDisease Type_BronchitisDisease Type_COPDDisease Type_Lung CancerDisease Type_PneumoniaTreatment Type_MedicationTreatment Type_SurgeryTreatment Type_Therapy
071144.49000000100100001
13473.50186501001000010
28041.95000011100100100
34013.50186501001000100
44374.60000011100100010
\n", + "
" + ], "text/plain": [ - "['models\\\\LogisticRegression.pkl']" + " Age Hospital Visits Lung Capacity Gender Smoking Status Recovered \\\n", + "0 71 14 4.490000 0 0 1 \n", + "1 34 7 3.501865 0 1 0 \n", + "2 80 4 1.950000 1 1 1 \n", + "3 40 1 3.501865 0 1 0 \n", + "4 43 7 4.600000 1 1 1 \n", + "\n", + " Disease Type_Asthma Disease Type_Bronchitis Disease Type_COPD \\\n", + "0 0 0 1 \n", + "1 0 1 0 \n", + "2 0 0 1 \n", + "3 0 1 0 \n", + "4 0 0 1 \n", + "\n", + " Disease Type_Lung Cancer Disease Type_Pneumonia \\\n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "\n", + " Treatment Type_Medication Treatment Type_Surgery Treatment Type_Therapy \n", + "0 0 0 1 \n", + "1 0 1 0 \n", + "2 1 0 0 \n", + "3 1 0 0 \n", + "4 0 1 0 " ] }, - "execution_count": 520, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "df_oh.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def prediction(model, age: int, gender: str,\n", + " smoke_status: str, lung_capacity: float,\n", + " disease_type: str, treatment_type: str,\n", + " hospital_visits: int\n", + " ) -> int:\n", + " \n", + " df_input = pd.DataFrame(\n", + " {'Age': [age],\n", + " 'Hospital Visits': [hospital_visits],\n", + " 'Lung Capacity': [lung_capacity],\n", + " 'Gender': [1 if gender == \"Male\" else 0],\n", + " 'Smoking Status': [1 if smoke_status == \"Yes\" else 0],\n", + " 'Disease Type_Asthma': [1 if disease_type in 'Disease Type_Asthma' else 0],\n", + " 'Disease Type_Bronchitis': [1 if disease_type in 'Disease Type_Bronchitis' else 0],\n", + " 'Disease Type_COPD': [1 if disease_type in 'Disease Type_COPD' else 0],\n", + " 'Disease Type_Lung Cancer': [1 if disease_type in 'Disease Type_Lung Cancer' else 0],\n", + " 'Disease Type_Pneumonia': [1 if disease_type in 'Disease Type_Pneumonia' else 0],\n", + " \n", + " 'Treatment Type_Medication': [1 if treatment_type in 'Treatment Type_Medication' else 0],\n", + " 'Treatment Type_Surgery': [1 if treatment_type in 'Treatment Type_Surgery' else 0],\n", + " 'Treatment Type_Therapy': [1 if treatment_type in 'Treatment Type_Therapy' else 0]\n", + " }\n", + " )\n", + " \n", + " input_arr = np.array(df_input)\n", + " \n", + " prediction = model.predict(input_arr)[0]\n", + " \n", + " return prediction.item()\n", + " \n", + " \n", + "prediction(lg, 20, 'Male', 'Yes', 7.14, 'COPD', 'Surgery', 5)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], "source": [ "# Saving the models\n", "from joblib import dump\n", @@ -2112,7 +2323,8 @@ "dump(rf, model_dir/'RandomForests.pkl')\n", "dump(xgb, model_dir/'XGBoost.pkl')\n", "dump(svm, model_dir/'SVM.pkl')\n", - "dump(lg, model_dir/'LogisticRegression.pkl')\n" + "dump(lg, model_dir/'LogisticRegression.pkl')\n", + "df_oh.to_csv('preprocessed_data.csv', index=False)\n" ] } ],