In [3]:
!pip install -U scikit-learn



In [4]:
import pandas as pd

# Load the dataset
data = pd.read_csv('/content/Crop_Dataset.csv')

# Display the first few rows and the data info
print(data.head())
print(data.info())


    N   P   K  temperature   humidity        ph    rainfall  Total_Nutrients  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536              175   
1  85  58  41    21.770462  80.319644  7.038096  226.655537              184   
2  60  55  44    23.004459  82.320763  7.840207  263.964248              159   
3  74  35  40    26.491096  80.158363  6.980401  242.864034              149   
4  78  42  42    20.130175  81.604873  7.628473  262.717340              162   

   Temperature_Humidity  Log_Rainfall  Label  Label_Encoded  
0           1712.196283      5.317804  wheat              0  
1           1748.595734      5.427834  wheat              0  
2           1893.744627      5.579595  wheat              0  
3           2123.482908      5.496611  wheat              0  
4           1642.720357      5.574878  wheat              0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 12 columns):
 #   Column                Non-Null Coun

In [5]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Assuming 'Label' is the column with categorical data
if data['Label'].dtype == 'object':
    encoder = LabelEncoder()
    data['Label_Encoded'] = encoder.fit_transform(data['Label'])
    y = data['Label_Encoded']
else:
    y = data['Label']

# Exclude the label column from numeric operations
numeric_features = data.select_dtypes(include=['int64', 'float64'])
X = numeric_features.drop(['Label_Encoded'], axis=1, errors='ignore')

# Scaling numeric features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
print(X.head())

    N   P   K  temperature   humidity        ph    rainfall  Total_Nutrients  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536              175   
1  85  58  41    21.770462  80.319644  7.038096  226.655537              184   
2  60  55  44    23.004459  82.320763  7.840207  263.964248              159   
3  74  35  40    26.491096  80.158363  6.980401  242.864034              149   
4  78  42  42    20.130175  81.604873  7.628473  262.717340              162   

   Temperature_Humidity  Log_Rainfall  
0           1712.196283      5.317804  
1           1748.595734      5.427834  
2           1893.744627      5.579595  
3           2123.482908      5.496611  
4           1642.720357      5.574878  


In [7]:
print(y.head())


0    21
1    21
2    21
3    21
4    21
Name: Label_Encoded, dtype: int64


In [8]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test


(array([[-0.90904306, -1.13294593, -0.67439784, ..., -1.31493084,
         -0.49027085,  0.24780902],
        [-0.36716896,  0.77739624, -0.57565467, ..., -0.21356106,
          0.07991257, -0.46657409],
        [-1.17998011,  0.59545889, -0.45716288, ..., -0.58902803,
         -0.16692839, -1.2389468 ],
        ...,
        [-1.07160529, -0.5264881 , -0.33867109, ..., -0.9269483 ,
         -0.5842483 ,  0.199803  ],
        [-1.07160529,  2.14192637,  3.07784228, ...,  2.33961433,
         -1.1140468 , -0.41541788],
        [-0.50263749,  0.74707335, -0.51640878, ..., -0.25110776,
         -0.51417889, -0.93933906]]),
 array([[ 1.36682815, -1.10262304, -0.02269297, ...,  0.16190591,
          1.34399451, -2.20354942],
        [ 1.28554704, -1.37552907,  0.05630155, ...,  0.06178138,
          0.58762688, -1.07859766],
        [ 0.22889255,  0.26190709,  0.01680429, ...,  0.22448374,
          3.13720326,  0.44554626],
        ...,
        [ 1.90870225, -0.19293629, -0.63490057, ...,  

In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import joblib


# Define the models
models = {
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'SVM': SVC(kernel='rbf', random_state=42),
    'KNN': KNeighborsClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}

# Train each model and evaluate on the training set
train_accuracies = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    train_accuracies[name] = train_accuracy
    print(f"{name} training accuracy: {train_accuracy:.4f}")

    # Save the model
    model_filename = f'{name.replace(" ", "_").lower()}_model.joblib'
    joblib.dump(model, model_filename)
    print(f"Saved {name} model as {model_filename}")


Decision Tree training accuracy: 1.0000
Saved Decision Tree model as decision_tree_model.joblib
Random Forest training accuracy: 1.0000
Saved Random Forest model as random_forest_model.joblib
SVM training accuracy: 0.9875
Saved SVM model as svm_model.joblib
KNN training accuracy: 0.9881
Saved KNN model as knn_model.joblib
Gradient Boosting training accuracy: 1.0000
Saved Gradient Boosting model as gradient_boosting_model.joblib


In [10]:
# Example new data for prediction
new_data = [[129,	43,	16, 25.5503704,	77.85055621,	6.73210948,	78.58488484,	188,	1989.110547,	4.376824186]]  # Adjust these values as necessary
new_data_scaled = scaler.transform(new_data)  # Assuming 'scaler' is already fitted and saved/loaded similarly

# Load models and make predictions
predictions = {}
for name in models.keys():
    model_filename = f'{name.replace(" ", "_").lower()}_model.joblib'
    loaded_model = joblib.load(model_filename)
    prediction = loaded_model.predict(new_data_scaled)
    predictions[name] = prediction

    # Assuming you have loaded your LabelEncoder as 'encoder'
    decoded_prediction = encoder.inverse_transform(prediction)
    print(f"{name} prediction: {decoded_prediction}")


Decision Tree prediction: ['potatoes']
Random Forest prediction: ['potatoes']
SVM prediction: ['potatoes']
KNN prediction: ['potatoes']




Gradient Boosting prediction: ['potatoes']


In [11]:
# Save the scaler to a file
joblib.dump(scaler, 'base_feature_scaler.joblib')

['base_feature_scaler.joblib']

In [12]:
# Save the LabelEncoder to a file
joblib.dump(encoder, 'label_encoder.joblib')

['label_encoder.joblib']