{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "Importing all the essential stuff" ], "metadata": { "id": "GY0lAyEVygnv" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "OT7h2znxcoLp" }, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score\n", "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import classification_report" ] }, { "cell_type": "markdown", "source": [ "Reading and acquiring the dataset" ], "metadata": { "id": "9o1rKoKHymIw" } }, { "cell_type": "code", "source": [ "data=pd.read_csv('HeartDisease.csv')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8V_mP798d15d", "outputId": "aeebfaf6-d62b-4bc0-eaa6-bd55fdc6333c", "collapsed": true }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " male age currentSmoker cigsPerDay BPMeds prevalentStroke \\\n", "0 1 39 0 0 0 0 \n", "1 0 46 0 0 0 0 \n", "2 1 48 1 20 0 0 \n", "3 0 61 1 30 0 0 \n", "4 0 46 1 23 0 0 \n", "... ... ... ... ... ... ... \n", "4132 1 68 0 0 0 0 \n", "4133 1 50 1 1 0 0 \n", "4134 1 51 1 43 0 0 \n", "4135 0 44 1 15 0 0 \n", "4136 0 52 0 0 0 0 \n", "\n", " prevalentHyp diabetes BMI TenYearCHD \n", "0 0 0 26.97 0 \n", "1 0 0 28.73 0 \n", "2 0 0 25.34 0 \n", "3 1 0 28.58 1 \n", "4 0 0 23.10 0 \n", "... ... ... ... ... \n", "4132 1 0 23.14 1 \n", "4133 1 0 25.97 1 \n", "4134 0 0 19.71 0 \n", "4135 0 0 19.16 0 \n", "4136 0 0 21.47 0 \n", "\n", "[4137 rows x 10 columns]\n", "[]\n" ] } ] }, { "cell_type": "markdown", "source": [ "Getting pandas to understand the data" ], "metadata": { "id": "fFJpTqnZy54q" } }, { "cell_type": "code", "source": [ "df = pd.DataFrame(data)" ], "metadata": { "id": "_uL_UiU9eSqS" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Determining the Predicting column" ], "metadata": { "id": "9L7gSh_6zrP1" } }, { "cell_type": "code", "source": [ "X=df.drop('TenYearCHD',axis=1)\n", "y=df['TenYearCHD']" ], "metadata": { "id": "2tt1BYjEed0h" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Spliting the dataset into Training and Testing datasets" ], "metadata": { "id": "2PvD1TRizwyr" } }, { "cell_type": "code", "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" ], "metadata": { "id": "H-cuKGVZe2y0" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Determining the Model" ], "metadata": { "id": "T8SwT0C0z3z6" } }, { "cell_type": "code", "source": [ "model = LogisticRegression(random_state=42)\n", "model.fit(X_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 234 }, "id": "ofqGp7tjlu5P", "outputId": "5071185d-e3aa-4fcb-b0b4-18104e2d313c", "collapsed": true }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "LogisticRegression(random_state=42)" ], "text/html": [ "
LogisticRegression(random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(random_state=42)