Spaces:

Aiaxis
/

crops_predicts

Running

App Files Files Community

isurulkh commited on May 4, 2024

Commit

c234a0e

verified ·

1 Parent(s): b7614b2

Upload 11 files

Browse files

Files changed (11) hide show

Crop_Dataset.csv +0 -0
Task_1_intellihack.ipynb +446 -0
app.py +58 -0
models/base_feature_scaler.joblib +3 -0
models/decision_tree_model.joblib +3 -0
models/gradient_boosting_model.joblib +3 -0
models/knn_model.joblib +3 -0
models/label_encoder.joblib +3 -0
models/random_forest_model.joblib +3 -0
models/svm_model.joblib +3 -0
requirements.txt +5 -0

Crop_Dataset.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Task_1_intellihack.ipynb ADDED Viewed

	@@ -0,0 +1,446 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install -U scikit-learn"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "yBUpTF0liOBf",
+        "outputId": "e71b2db0-5438-400e-891c-53ee35c10e4f"
+      },
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.4.2)\n",
+            "Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.25.2)\n",
+            "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.11.4)\n",
+            "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.0)\n",
+            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "QhegGo_LT4a_",
+        "outputId": "59e8d839-82e5-41ab-ca92-35721dcd69a0"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "    N   P   K  temperature   humidity        ph    rainfall  Total_Nutrients  \\\n",
+            "0  90  42  43    20.879744  82.002744  6.502985  202.935536              175   \n",
+            "1  85  58  41    21.770462  80.319644  7.038096  226.655537              184   \n",
+            "2  60  55  44    23.004459  82.320763  7.840207  263.964248              159   \n",
+            "3  74  35  40    26.491096  80.158363  6.980401  242.864034              149   \n",
+            "4  78  42  42    20.130175  81.604873  7.628473  262.717340              162   \n",
+            "\n",
+            "   Temperature_Humidity  Log_Rainfall  Label  Label_Encoded  \n",
+            "0           1712.196283      5.317804  wheat              0  \n",
+            "1           1748.595734      5.427834  wheat              0  \n",
+            "2           1893.744627      5.579595  wheat              0  \n",
+            "3           2123.482908      5.496611  wheat              0  \n",
+            "4           1642.720357      5.574878  wheat              0  \n",
+            "<class 'pandas.core.frame.DataFrame'>\n",
+            "RangeIndex: 2200 entries, 0 to 2199\n",
+            "Data columns (total 12 columns):\n",
+            " #   Column                Non-Null Count  Dtype  \n",
+            "---  ------                --------------  -----  \n",
+            " 0   N                     2200 non-null   int64  \n",
+            " 1   P                     2200 non-null   int64  \n",
+            " 2   K                     2200 non-null   int64  \n",
+            " 3   temperature           2200 non-null   float64\n",
+            " 4   humidity              2200 non-null   float64\n",
+            " 5   ph                    2200 non-null   float64\n",
+            " 6   rainfall              2200 non-null   float64\n",
+            " 7   Total_Nutrients       2200 non-null   int64  \n",
+            " 8   Temperature_Humidity  2200 non-null   float64\n",
+            " 9   Log_Rainfall          2200 non-null   float64\n",
+            " 10  Label                 2200 non-null   object \n",
+            " 11  Label_Encoded         2200 non-null   int64  \n",
+            "dtypes: float64(6), int64(5), object(1)\n",
+            "memory usage: 206.4+ KB\n",
+            "None\n"
+          ]
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "\n",
+        "# Load the dataset\n",
+        "data = pd.read_csv('/content/Crop_Dataset.csv')\n",
+        "\n",
+        "# Display the first few rows and the data info\n",
+        "print(data.head())\n",
+        "print(data.info())\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
+        "\n",
+        "# Assuming 'Label' is the column with categorical data\n",
+        "if data['Label'].dtype == 'object':\n",
+        "    encoder = LabelEncoder()\n",
+        "    data['Label_Encoded'] = encoder.fit_transform(data['Label'])\n",
+        "    y = data['Label_Encoded']\n",
+        "else:\n",
+        "    y = data['Label']\n",
+        "\n",
+        "# Exclude the label column from numeric operations\n",
+        "numeric_features = data.select_dtypes(include=['int64', 'float64'])\n",
+        "X = numeric_features.drop(['Label_Encoded'], axis=1, errors='ignore')\n",
+        "\n",
+        "# Scaling numeric features\n",
+        "scaler = StandardScaler()\n",
+        "X_scaled = scaler.fit_transform(X)"
+      ],
+      "metadata": {
+        "id": "8YDm7cLGVAdC"
+      },
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(X.head())"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "bxlFqxemUwVN",
+        "outputId": "8b0006fe-4fe9-4b98-8d8f-f66bdb4c9b0e"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "    N   P   K  temperature   humidity        ph    rainfall  Total_Nutrients  \\\n",
+            "0  90  42  43    20.879744  82.002744  6.502985  202.935536              175   \n",
+            "1  85  58  41    21.770462  80.319644  7.038096  226.655537              184   \n",
+            "2  60  55  44    23.004459  82.320763  7.840207  263.964248              159   \n",
+            "3  74  35  40    26.491096  80.158363  6.980401  242.864034              149   \n",
+            "4  78  42  42    20.130175  81.604873  7.628473  262.717340              162   \n",
+            "\n",
+            "   Temperature_Humidity  Log_Rainfall  \n",
+            "0           1712.196283      5.317804  \n",
+            "1           1748.595734      5.427834  \n",
+            "2           1893.744627      5.579595  \n",
+            "3           2123.482908      5.496611  \n",
+            "4           1642.720357      5.574878  \n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(y.head())\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Xfyef1ZHVlv9",
+        "outputId": "1124a98a-7088-4beb-c99f-fc99695bce26"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0    21\n",
+            "1    21\n",
+            "2    21\n",
+            "3    21\n",
+            "4    21\n",
+            "Name: Label_Encoded, dtype: int64\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.model_selection import train_test_split\n",
+        "\n",
+        "# Split the dataset into training and testing sets\n",
+        "X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n",
+        "X_train, X_test, y_train, y_test\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qeet3FQuWMYa",
+        "outputId": "3134ee1c-da4a-49c9-b23a-a2824087bce7"
+      },
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(array([[-0.90904306, -1.13294593, -0.67439784, ..., -1.31493084,\n",
+              "         -0.49027085,  0.24780902],\n",
+              "        [-0.36716896,  0.77739624, -0.57565467, ..., -0.21356106,\n",
+              "          0.07991257, -0.46657409],\n",
+              "        [-1.17998011,  0.59545889, -0.45716288, ..., -0.58902803,\n",
+              "         -0.16692839, -1.2389468 ],\n",
+              "        ...,\n",
+              "        [-1.07160529, -0.5264881 , -0.33867109, ..., -0.9269483 ,\n",
+              "         -0.5842483 ,  0.199803  ],\n",
+              "        [-1.07160529,  2.14192637,  3.07784228, ...,  2.33961433,\n",
+              "         -1.1140468 , -0.41541788],\n",
+              "        [-0.50263749,  0.74707335, -0.51640878, ..., -0.25110776,\n",
+              "         -0.51417889, -0.93933906]]),\n",
+              " array([[ 1.36682815, -1.10262304, -0.02269297, ...,  0.16190591,\n",
+              "          1.34399451, -2.20354942],\n",
+              "        [ 1.28554704, -1.37552907,  0.05630155, ...,  0.06178138,\n",
+              "          0.58762688, -1.07859766],\n",
+              "        [ 0.22889255,  0.26190709,  0.01680429, ...,  0.22448374,\n",
+              "          3.13720326,  0.44554626],\n",
+              "        ...,\n",
+              "        [ 1.90870225, -0.19293629, -0.63490057, ...,  0.39970166,\n",
+              "          0.02516414, -0.38782438],\n",
+              "        [ 1.77323373, -0.04132183, -0.57565467, ...,  0.43724835,\n",
+              "         -0.17876826, -0.5282515 ],\n",
+              "        [-1.23416752,  0.44384444, -0.55590604, ..., -0.73921482,\n",
+              "         -1.75019501,  0.99674145]]),\n",
+              " 1656     4\n",
+              " 752      2\n",
+              " 892     12\n",
+              " 1041     7\n",
+              " 1179     3\n",
+              "         ..\n",
+              " 1638     4\n",
+              " 1095     7\n",
+              " 1130     3\n",
+              " 1294     9\n",
+              " 860     12\n",
+              " Name: Label_Encoded, Length: 1760, dtype: int64,\n",
+              " 1451    16\n",
+              " 1334    13\n",
+              " 1761    18\n",
+              " 1735    18\n",
+              " 1576    11\n",
+              "         ..\n",
+              " 59      21\n",
+              " 71      21\n",
+              " 1908    14\n",
+              " 1958    14\n",
+              " 482      8\n",
+              " Name: Label_Encoded, Length: 440, dtype: int64)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 8
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.tree import DecisionTreeClassifier\n",
+        "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
+        "from sklearn.svm import SVC\n",
+        "from sklearn.neighbors import KNeighborsClassifier\n",
+        "from sklearn.metrics import accuracy_score\n",
+        "import joblib\n",
+        "\n",
+        "\n",
+        "# Define the models\n",
+        "models = {\n",
+        "    'Decision Tree': DecisionTreeClassifier(random_state=42),\n",
+        "    'Random Forest': RandomForestClassifier(random_state=42),\n",
+        "    'SVM': SVC(kernel='rbf', random_state=42),\n",
+        "    'KNN': KNeighborsClassifier(),\n",
+        "    'Gradient Boosting': GradientBoostingClassifier(random_state=42)\n",
+        "}\n",
+        "\n",
+        "# Train each model and evaluate on the training set\n",
+        "train_accuracies = {}\n",
+        "for name, model in models.items():\n",
+        "    model.fit(X_train, y_train)\n",
+        "    y_train_pred = model.predict(X_train)\n",
+        "    train_accuracy = accuracy_score(y_train, y_train_pred)\n",
+        "    train_accuracies[name] = train_accuracy\n",
+        "    print(f\"{name} training accuracy: {train_accuracy:.4f}\")\n",
+        "\n",
+        "    # Save the model\n",
+        "    model_filename = f'{name.replace(\" \", \"_\").lower()}_model.joblib'\n",
+        "    joblib.dump(model, model_filename)\n",
+        "    print(f\"Saved {name} model as {model_filename}\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TsmaeAEYbj6Y",
+        "outputId": "0b6e493c-9421-4d88-8e97-0591713968e3"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Decision Tree training accuracy: 1.0000\n",
+            "Saved Decision Tree model as decision_tree_model.joblib\n",
+            "Random Forest training accuracy: 1.0000\n",
+            "Saved Random Forest model as random_forest_model.joblib\n",
+            "SVM training accuracy: 0.9875\n",
+            "Saved SVM model as svm_model.joblib\n",
+            "KNN training accuracy: 0.9881\n",
+            "Saved KNN model as knn_model.joblib\n",
+            "Gradient Boosting training accuracy: 1.0000\n",
+            "Saved Gradient Boosting model as gradient_boosting_model.joblib\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Example new data for prediction\n",
+        "new_data = [[129,\t43,\t16, 25.5503704,\t77.85055621,\t6.73210948,\t78.58488484,\t188,\t1989.110547,\t4.376824186]]  # Adjust these values as necessary\n",
+        "new_data_scaled = scaler.transform(new_data)  # Assuming 'scaler' is already fitted and saved/loaded similarly\n",
+        "\n",
+        "# Load models and make predictions\n",
+        "predictions = {}\n",
+        "for name in models.keys():\n",
+        "    model_filename = f'{name.replace(\" \", \"_\").lower()}_model.joblib'\n",
+        "    loaded_model = joblib.load(model_filename)\n",
+        "    prediction = loaded_model.predict(new_data_scaled)\n",
+        "    predictions[name] = prediction\n",
+        "\n",
+        "    # Assuming you have loaded your LabelEncoder as 'encoder'\n",
+        "    decoded_prediction = encoder.inverse_transform(prediction)\n",
+        "    print(f\"{name} prediction: {decoded_prediction}\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "448K06w7cT6d",
+        "outputId": "9263c1d3-228a-4e45-95c0-87b5d2f7c0b9"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Decision Tree prediction: ['potatoes']\n",
+            "Random Forest prediction: ['potatoes']\n",
+            "SVM prediction: ['potatoes']\n",
+            "KNN prediction: ['potatoes']\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Gradient Boosting prediction: ['potatoes']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Save the scaler to a file\n",
+        "joblib.dump(scaler, 'base_feature_scaler.joblib')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ByINvSM1gSHN",
+        "outputId": "6bbfe644-762c-4502-a07d-05dbcf26fd4b"
+      },
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['base_feature_scaler.joblib']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 11
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Save the LabelEncoder to a file\n",
+        "joblib.dump(encoder, 'label_encoder.joblib')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "c9Uu7lsPgSk7",
+        "outputId": "d3bade81-25b7-47a6-e9c0-c14d2ea39339"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['label_encoder.joblib']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    }
+  ]
+}

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+import numpy as np
+import joblib
+# Load models and scaler
+scaler = joblib.load('models/base_feature_scaler.joblib')
+models = {
+    'Decision Tree': joblib.load('models/decision_tree_model.joblib'),
+    'Random Forest': joblib.load('models/random_forest_model.joblib'),
+    'SVM': joblib.load('models/svm_model.joblib'),
+    'KNN': joblib.load('models/knn_model.joblib'),
+    'Gradient Boosting': joblib.load('models/gradient_boosting_model.joblib')
+}
+encoder = joblib.load('models/label_encoder.joblib')
+# Streamlit app layout
+st.title('Crop Recommendation System')
+st.markdown("""
+This application provides recommendations for the most suitable crops based on environmental and soil conditions.
+Please adjust the input values in the sidebar to reflect your local conditions. Here's a brief explanation of each input:
+- **Nitrogen (N), Phosphorus (P), Potassium (K):** Essential nutrients required by crops. Values should be in kg/ha.
+- **Temperature (C):** The average temperature of the area in degrees Celsius.
+- **Humidity (%):** Average relative humidity in percentage.
+- **pH Level:** Soil acidity or alkalinity on a scale from 0 to 14.
+- **Rainfall (mm):** Annual rainfall in millimeters.
+- **Total Nutrients:** Sum of all nutrient inputs.
+- **Temperature Humidity Index:** A combined index of temperature and humidity.
+- **Log Rainfall:** The logarithmic value of rainfall, providing a transformed perspective of rainfall data.
+Each model will provide a prediction based on these inputs.
+""")
+# Input fields for all 10 features
+with st.sidebar:
+    st.header('Input Features')
+    N = st.number_input('Nitrogen (N)', min_value=0, max_value=200, value=50, help="Enter the amount of Nitrogen in the soil")
+    P = st.number_input('Phosphorus (P)', min_value=0, max_value=200, value=40, help="Enter the amount of Phosphorus in the soil")
+    K = st.number_input('Potassium (K)', min_value=0, max_value=200, value=30, help="Enter the amount of Potassium in the soil")
+    with st.expander("Advanced Environmental Settings"):
+        temperature = st.slider('Temperature (C)', -10.0, 50.0, 25.0)
+        humidity = st.slider('Humidity (%)', 0.0, 100.0, 80.0)
+        ph = st.slider('pH Level', 0.0, 14.0, 6.5)
+        rainfall = st.slider('Rainfall (mm)', 0.0, 400.0, 100.0)
+        total_nutrients = st.number_input('Total Nutrients', min_value=0, max_value=500, value=150)
+        temperature_humidity = st.slider('Temperature Humidity Index', 0.0, 5000.0, 1500.0)
+        log_rainfall = st.slider('Log Rainfall', 0.0, 10.0, 5.0)
+# Create feature array and scale
+features = np.array([[N, P, K, temperature, humidity, ph, rainfall, total_nutrients, temperature_humidity, log_rainfall]])
+features_scaled = scaler.transform(features)
+# Display predictions
+st.header('Predictions')
+for name, model in models.items():
+    prediction = model.predict(features_scaled)
+    crop = encoder.inverse_transform(prediction)[0]  # Decode prediction
+    st.write(f'{name} predicts: {crop}')

models/base_feature_scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5462e69f94a707cd3cda8b66a1f693afc64a9be192f7d3feca8f7ee64dfcd643
+size 1303

models/decision_tree_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4166c8b4063e7c545aef5a958868d54bae0aef0f901a1c7c3f71b8ee2da5b88
+size 18505

models/gradient_boosting_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aee64ca5191be5cd39c0021153add91351dc7718756e1a319db3ac4f93546fc7
+size 3947837

models/knn_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51f069da3144eccfd9b2a78312fc1bb1af2d8d8f5bc5db06537efcdfa27fff93
+size 323478

models/label_encoder.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8107a00fca8f49e7858b6301572734f45ffe132064db908c8d9ca1fd25d6de6c
+size 843

models/random_forest_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5c73d908a14e26bb64f71f22b1715945d5735229a25230957ee869017e82f1e
+size 2704185

models/svm_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b78b197c8d1ea2e83eb937ea9b26c622d9e248fa9f6fb4ee5f2d82d539013ad6
+size 376307

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+pandas
+scikit-learn
+streamlit
+joblib