{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "14741086",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sklearn version: 1.0.2\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn\n",
    "print(\"sklearn version: \" + sklearn.__version__)\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn import metrics\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "96b17451",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('Iris.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cefb0143",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "      <th>Species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species\n",
       "0   1            5.1           3.5            1.4           0.2  Iris-setosa\n",
       "1   2            4.9           3.0            1.4           0.2  Iris-setosa\n",
       "2   3            4.7           3.2            1.3           0.2  Iris-setosa\n",
       "3   4            4.6           3.1            1.5           0.2  Iris-setosa\n",
       "4   5            5.0           3.6            1.4           0.2  Iris-setosa"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f3c67f44",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of dataset: (150, 6)\n"
     ]
    }
   ],
   "source": [
    "print(f\"Shape of dataset: {df.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "60e037d4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Id</th>\n",
       "      <td>150.0</td>\n",
       "      <td>75.500000</td>\n",
       "      <td>43.445368</td>\n",
       "      <td>1.0</td>\n",
       "      <td>38.25</td>\n",
       "      <td>75.50</td>\n",
       "      <td>112.75</td>\n",
       "      <td>150.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <td>150.0</td>\n",
       "      <td>5.843333</td>\n",
       "      <td>0.828066</td>\n",
       "      <td>4.3</td>\n",
       "      <td>5.10</td>\n",
       "      <td>5.80</td>\n",
       "      <td>6.40</td>\n",
       "      <td>7.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <td>150.0</td>\n",
       "      <td>3.054000</td>\n",
       "      <td>0.433594</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.80</td>\n",
       "      <td>3.00</td>\n",
       "      <td>3.30</td>\n",
       "      <td>4.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <td>150.0</td>\n",
       "      <td>3.758667</td>\n",
       "      <td>1.764420</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.60</td>\n",
       "      <td>4.35</td>\n",
       "      <td>5.10</td>\n",
       "      <td>6.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PetalWidthCm</th>\n",
       "      <td>150.0</td>\n",
       "      <td>1.198667</td>\n",
       "      <td>0.763161</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.30</td>\n",
       "      <td>1.30</td>\n",
       "      <td>1.80</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               count       mean        std  min    25%    50%     75%    max\n",
       "Id             150.0  75.500000  43.445368  1.0  38.25  75.50  112.75  150.0\n",
       "SepalLengthCm  150.0   5.843333   0.828066  4.3   5.10   5.80    6.40    7.9\n",
       "SepalWidthCm   150.0   3.054000   0.433594  2.0   2.80   3.00    3.30    4.4\n",
       "PetalLengthCm  150.0   3.758667   1.764420  1.0   1.60   4.35    5.10    6.9\n",
       "PetalWidthCm   150.0   1.198667   0.763161  0.1   0.30   1.30    1.80    2.5"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe().T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "60f28e3c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(150, 4)\n",
      "(150,)\n"
     ]
    }
   ],
   "source": [
    "X = df.drop(['Id', 'Species'], axis=1)\n",
    "y = df['Species']\n",
    "# print(X.head())\n",
    "print(X.shape)\n",
    "# print(y.head())\n",
    "print(y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d76a6b95",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(90, 4)\n",
      "(90,)\n",
      "(60, 4)\n",
      "(60,)\n"
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)\n",
    "print(X_train.shape)\n",
    "print(y_train.shape)\n",
    "print(X_test.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b1da053e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9833333333333333\n"
     ]
    }
   ],
   "source": [
    "logreg = LogisticRegression()\n",
    "logreg.fit(X_train, y_train)\n",
    "y_pred = logreg.predict(X_test)\n",
    "print(metrics.accuracy_score(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2d47b3df",
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = 'finalized_model.sav'\n",
    "pickle.dump(logreg, open(filename, 'wb'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}