{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "14741086",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sklearn version: 1.0.2\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import sklearn\n",
"print(\"sklearn version: \" + sklearn.__version__)\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn import metrics\n",
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "96b17451",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('Iris.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "cefb0143",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" SepalLengthCm | \n",
" SepalWidthCm | \n",
" PetalLengthCm | \n",
" PetalWidthCm | \n",
" Species | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 5.1 | \n",
" 3.5 | \n",
" 1.4 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 4.9 | \n",
" 3.0 | \n",
" 1.4 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 4.7 | \n",
" 3.2 | \n",
" 1.3 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 4.6 | \n",
" 3.1 | \n",
" 1.5 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 5.0 | \n",
" 3.6 | \n",
" 1.4 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n",
"0 1 5.1 3.5 1.4 0.2 Iris-setosa\n",
"1 2 4.9 3.0 1.4 0.2 Iris-setosa\n",
"2 3 4.7 3.2 1.3 0.2 Iris-setosa\n",
"3 4 4.6 3.1 1.5 0.2 Iris-setosa\n",
"4 5 5.0 3.6 1.4 0.2 Iris-setosa"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f3c67f44",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Shape of dataset: (150, 6)\n"
]
}
],
"source": [
"print(f\"Shape of dataset: {df.shape}\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "60e037d4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" mean | \n",
" std | \n",
" min | \n",
" 25% | \n",
" 50% | \n",
" 75% | \n",
" max | \n",
"
\n",
" \n",
" \n",
" \n",
" Id | \n",
" 150.0 | \n",
" 75.500000 | \n",
" 43.445368 | \n",
" 1.0 | \n",
" 38.25 | \n",
" 75.50 | \n",
" 112.75 | \n",
" 150.0 | \n",
"
\n",
" \n",
" SepalLengthCm | \n",
" 150.0 | \n",
" 5.843333 | \n",
" 0.828066 | \n",
" 4.3 | \n",
" 5.10 | \n",
" 5.80 | \n",
" 6.40 | \n",
" 7.9 | \n",
"
\n",
" \n",
" SepalWidthCm | \n",
" 150.0 | \n",
" 3.054000 | \n",
" 0.433594 | \n",
" 2.0 | \n",
" 2.80 | \n",
" 3.00 | \n",
" 3.30 | \n",
" 4.4 | \n",
"
\n",
" \n",
" PetalLengthCm | \n",
" 150.0 | \n",
" 3.758667 | \n",
" 1.764420 | \n",
" 1.0 | \n",
" 1.60 | \n",
" 4.35 | \n",
" 5.10 | \n",
" 6.9 | \n",
"
\n",
" \n",
" PetalWidthCm | \n",
" 150.0 | \n",
" 1.198667 | \n",
" 0.763161 | \n",
" 0.1 | \n",
" 0.30 | \n",
" 1.30 | \n",
" 1.80 | \n",
" 2.5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count mean std min 25% 50% 75% max\n",
"Id 150.0 75.500000 43.445368 1.0 38.25 75.50 112.75 150.0\n",
"SepalLengthCm 150.0 5.843333 0.828066 4.3 5.10 5.80 6.40 7.9\n",
"SepalWidthCm 150.0 3.054000 0.433594 2.0 2.80 3.00 3.30 4.4\n",
"PetalLengthCm 150.0 3.758667 1.764420 1.0 1.60 4.35 5.10 6.9\n",
"PetalWidthCm 150.0 1.198667 0.763161 0.1 0.30 1.30 1.80 2.5"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe().T"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "60f28e3c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(150, 4)\n",
"(150,)\n"
]
}
],
"source": [
"X = df.drop(['Id', 'Species'], axis=1)\n",
"y = df['Species']\n",
"# print(X.head())\n",
"print(X.shape)\n",
"# print(y.head())\n",
"print(y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d76a6b95",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(90, 4)\n",
"(90,)\n",
"(60, 4)\n",
"(60,)\n"
]
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)\n",
"print(X_train.shape)\n",
"print(y_train.shape)\n",
"print(X_test.shape)\n",
"print(y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b1da053e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9833333333333333\n"
]
}
],
"source": [
"logreg = LogisticRegression()\n",
"logreg.fit(X_train, y_train)\n",
"y_pred = logreg.predict(X_test)\n",
"print(metrics.accuracy_score(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "2d47b3df",
"metadata": {},
"outputs": [],
"source": [
"filename = 'finalized_model.sav'\n",
"pickle.dump(logreg, open(filename, 'wb'))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}