{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "14741086", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sklearn version: 1.0.2\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import sklearn\n", "print(\"sklearn version: \" + sklearn.__version__)\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn import metrics\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 2, "id": "96b17451", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('Iris.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "cefb0143", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdSepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmSpecies
015.13.51.40.2Iris-setosa
124.93.01.40.2Iris-setosa
234.73.21.30.2Iris-setosa
344.63.11.50.2Iris-setosa
455.03.61.40.2Iris-setosa
\n", "
" ], "text/plain": [ " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n", "0 1 5.1 3.5 1.4 0.2 Iris-setosa\n", "1 2 4.9 3.0 1.4 0.2 Iris-setosa\n", "2 3 4.7 3.2 1.3 0.2 Iris-setosa\n", "3 4 4.6 3.1 1.5 0.2 Iris-setosa\n", "4 5 5.0 3.6 1.4 0.2 Iris-setosa" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "f3c67f44", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shape of dataset: (150, 6)\n" ] } ], "source": [ "print(f\"Shape of dataset: {df.shape}\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "60e037d4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
Id150.075.50000043.4453681.038.2575.50112.75150.0
SepalLengthCm150.05.8433330.8280664.35.105.806.407.9
SepalWidthCm150.03.0540000.4335942.02.803.003.304.4
PetalLengthCm150.03.7586671.7644201.01.604.355.106.9
PetalWidthCm150.01.1986670.7631610.10.301.301.802.5
\n", "
" ], "text/plain": [ " count mean std min 25% 50% 75% max\n", "Id 150.0 75.500000 43.445368 1.0 38.25 75.50 112.75 150.0\n", "SepalLengthCm 150.0 5.843333 0.828066 4.3 5.10 5.80 6.40 7.9\n", "SepalWidthCm 150.0 3.054000 0.433594 2.0 2.80 3.00 3.30 4.4\n", "PetalLengthCm 150.0 3.758667 1.764420 1.0 1.60 4.35 5.10 6.9\n", "PetalWidthCm 150.0 1.198667 0.763161 0.1 0.30 1.30 1.80 2.5" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe().T" ] }, { "cell_type": "code", "execution_count": 6, "id": "60f28e3c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(150, 4)\n", "(150,)\n" ] } ], "source": [ "X = df.drop(['Id', 'Species'], axis=1)\n", "y = df['Species']\n", "# print(X.head())\n", "print(X.shape)\n", "# print(y.head())\n", "print(y.shape)" ] }, { "cell_type": "code", "execution_count": 7, "id": "d76a6b95", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(90, 4)\n", "(90,)\n", "(60, 4)\n", "(60,)\n" ] } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)\n", "print(X_train.shape)\n", "print(y_train.shape)\n", "print(X_test.shape)\n", "print(y_test.shape)" ] }, { "cell_type": "code", "execution_count": 8, "id": "b1da053e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9833333333333333\n" ] } ], "source": [ "logreg = LogisticRegression()\n", "logreg.fit(X_train, y_train)\n", "y_pred = logreg.predict(X_test)\n", "print(metrics.accuracy_score(y_test, y_pred))" ] }, { "cell_type": "code", "execution_count": 9, "id": "2d47b3df", "metadata": {}, "outputs": [], "source": [ "filename = 'finalized_model.sav'\n", "pickle.dump(logreg, open(filename, 'wb'))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }