Spaces:

bacancydataprophets
/

Insurance_Claiming_Automation

Sleeping

App Files Files Community

Insurance_Claiming_Automation / train.py

kothariyashhh

Upload 7 files

60434a7 verified about 1 year ago

raw

history blame contribute delete

2.35 kB

	# train_model.py

	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn import tree
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	import joblib

	class InsuranceClaimModelTrainer:
	def __init__(self, data_path):
	self.data_path = data_path
	self.model = None

	def load_data(self):
	# Load the dataset
	df = pd.read_csv(self.data_path)
	# Separate features and target
	X = df.drop(columns=['insuranceclaim'])
	y = df['insuranceclaim']
	return X, y

	def preprocess_data(self, X):
	# Define preprocessing for numerical features (scaling)
	numerical_features = ['age', 'bmi', 'children', 'charges']
	numerical_transformer = StandardScaler()

	# Define preprocessing for categorical features (one-hot encoding)
	categorical_features = ['sex', 'smoker', 'region']
	categorical_transformer = OneHotEncoder(handle_unknown='ignore', drop='first')

	# Combine preprocessing steps
	preprocessor = ColumnTransformer(
	transformers=[
	('num', numerical_transformer, numerical_features),
	('cat', categorical_transformer, categorical_features)
	])

	return preprocessor

	def train_model(self):
	# Load and preprocess the data
	X, y = self.load_data()
	preprocessor = self.preprocess_data(X)

	# Create a preprocessing and modeling pipeline
	self.model = Pipeline(steps=[
	('preprocessor', preprocessor),
	('classifier', tree.DecisionTreeClassifier(random_state=42))
	])

	# Split data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

	# Train the model
	self.model.fit(X_train, y_train)

	# Save the preprocessor and the trained model using joblib
	joblib.dump(self.model, 'model/insurance_claim_prediction_model.joblib')
	print("Model trained and saved successfully!")

	if __name__ == "__main__":
	trainer = InsuranceClaimModelTrainer('dataset/insurance2.csv')
	trainer.train_model()