Spaces:

Henok21
/

Economic_Inequality_Loader

Sleeping

App Files Files Community

Economic_Inequality_Loader / app.py

Henok21

correcting path

b86ddaa over 1 year ago

raw

history blame

2.94 kB

	# For app development
	from fastapi import FastAPI, File, UploadFile
	from typing import Annotated
	from fastapi import FastAPI, Form, Depends
	# Import the Response class from fastapi
	from fastapi import Response
	import pandas as pd
	import uvicorn
	from pydantic import BaseModel
	# For data frame
	import pandas as pd
	# For loading pipeline
	import joblib
	# For controlling warnings
	import warnings
	warnings.filterwarnings('ignore')
	# For system functions
	from io import BytesIO
	import os

	# Pieline loading
	with open("catboost_pipeline_4.job", "rb") as f:
	pipe = joblib.load(f)
	print(pipe)

	# instantiating fastAPI object
	app = FastAPI( title = "Machine Learning Classification API for predicting \
	Income limit as Above Limit / Below Limit")

	@app.post("/predict")
	async def predict(file: UploadFile = File(...)):
	try:
	content = await file.read()
	df = pd.read_csv(BytesIO(content))

	columns = ['age', 'gender', 'education', 'class', 'marital_status', 'race',
	'is_hispanic', 'employment_commitment', 'employment_stat',
	'wage_per_hour', 'working_week_per_year', 'industry_code',
	'industry_code_main', 'occupation_code', 'occupation_code_main',
	'total_employed', 'household_stat', 'household_summary', 'vet_benefit',
	'tax_status', 'gains', 'losses', 'stocks_status', 'citizenship',
	'mig_year', 'country_of_birth_own', 'country_of_birth_father',
	'country_of_birth_mother', 'importance_of_record']

	# Copying neccesary columns

	id = df["ID"].copy()
	print(id)

	df = df[ columns ].copy()
	print(df)

	# Renaming column class
	df.rename(columns={'class': 'class_'}, inplace=True)

	# Predicting...
	output = pipe.predict_proba(df)
	print(output)

	df["predicted_label"] = output.argmax(axis = -1)
	mapping = {0: "Below Limit", 1: "Above Limit"}
	df["predicted_label"] = [mapping[x] for x in df["predicted_label"]]
	print(df["predicted_label"])

	# Calculating confidence score
	confidence_score = output.max(axis= -1)
	df["confidence_score"] = f"{round( ( confidence_score[0] * 100 ) , 2) }%"

	# Create a dataframe with a column named 'id'
	df_final = pd.DataFrame(id, columns=['ID'])
	print(df_final)

	df_final["predicted_label"] = df["predicted_label"]
	df_final["confidence_score"] = df["confidence_score"]
	print(df_final)

	print("****************")

	# Convert the dataframe to a CSV string
	df_csv = df_final.to_csv(index=False)

	# Return the CSV string with the response class
	return Response(content=df_csv, media_type="text/csv")

	except Exception as e:
	return {"error": str(e)} # return an error message if something goes wrong

	if __name__ == "__main__":
	uvicorn.run("main:app" , reload = True)