Spaces:

Keytaro
/

InsuranceChargePredictor

Sleeping

App Files Files Community

InsuranceChargePredictor / app.py

Keytaro

renew

1b5e7ca 10 months ago

raw

history blame contribute delete

3.28 kB

	# Import the libraries
	import os
	import uuid
	import joblib
	import json

	import gradio as gr
	import pandas as pd

	from huggingface_hub import CommitScheduler
	from pathlib import Path


	# Run the training script placed in the same directory as app.py
	# The training script will train and persist a linear regression
	# model with the filename 'model.joblib'
	import subprocess
	try:
	result = subprocess.run(['python', 'train.py'], check=True, capture_output=True, text=True)
	print(f"Training done.{result.stdout}")
	except subprocess.CalledProcessError as e:
	print(f"Error occurred:{e.stderr}")
	exit(1)

	# Load the freshly trained model from disk
	insurance_charge_predictor = joblib.load('model.joblib')

	# Prepare the logging functionality
	log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
	log_folder = log_file.parent
	log_folder.mkdir(parents=True, exist_ok=True) # mkdir!

	scheduler = CommitScheduler(
	repo_id="Keytaro/insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id
	repo_type="dataset",
	folder_path=log_folder,
	path_in_repo="data",
	every=2,
	)

	# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
	# the functions runs when 'Submit' is clicked or when a API request is made
	def predict_insurance_charge(age, bmi, children, sex, smoker, region):
	# Create a DataFrame from the input values
	input_data = pd.DataFrame(
	[[age, bmi, children, sex, smoker, region]],
	columns=['age', 'bmi', 'children', 'sex', 'smoker', 'region']
	)

	# Make a prediction using the loaded model
	prediction = insurance_charge_predictor.predict(input_data)

	# While the prediction is made, log both the inputs and outputs to a log file
	# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
	# access

	with scheduler.lock:
	with log_file.open("a") as f:
	f.write(json.dumps(
	{
	'age': age,
	'bmi': bmi,
	'children': children,
	'sex': sex,
	'smoker': smoker,
	'region': region,
	'prediction': prediction[0]
	}
	))
	f.write("\n")

	return prediction[0]



	# Set up UI components for input and output
	inputs = [
	gr.Number(label="Age (Number)"),
	gr.Number(label="BMI (Number)"),
	gr.Number(label="The number of children (Number)"),
	gr.Dropdown(label="Sex", choices=["male", "female"]),
	gr.Dropdown(label="Smoker", choices=["yes", "no"]),
	gr.Dropdown(label="Region", choices=["northeast", "northwest", "southeast", "southwest"])
	]

	output = gr.Number(label="Predicted Insurance Charge")

	# Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
	demo = gr.Interface(
	fn=predict_insurance_charge,
	inputs=inputs,
	outputs=output,
	title="HealthyLife Insurance Charge Predictor",
	description="This API allows you to predict the insurance charges based on personal health data.",
	allow_flagging="auto", #
	concurrency_limit=8 #
	)

	# Launch with a load balancer
	demo.queue()
	demo.launch(share=False)