Spaces:
Sleeping
Sleeping
# For app development | |
from fastapi import FastAPI, File, UploadFile | |
from typing import Annotated | |
from fastapi import FastAPI, Form, Depends | |
# Import the Response class from fastapi | |
from fastapi import Response | |
import pandas as pd | |
import uvicorn | |
from pydantic import BaseModel | |
# For data frame | |
import pandas as pd | |
# For loading pipeline | |
import joblib | |
# For controlling warnings | |
import warnings | |
warnings.filterwarnings('ignore') | |
# For system functions | |
from io import BytesIO | |
import os | |
# Pieline loading | |
with open("catboost_pipeline_4.job", "rb") as f: | |
pipe = joblib.load(f) | |
print(pipe) | |
# instantiating fastAPI object | |
app = FastAPI( title = "Machine Learning Classification API for predicting \ | |
Income limit as Above Limit / Below Limit") | |
async def predict(file: UploadFile = File(...)): | |
try: | |
content = await file.read() | |
df = pd.read_csv(BytesIO(content)) | |
columns = ['age', 'gender', 'education', 'class', 'marital_status', 'race', | |
'is_hispanic', 'employment_commitment', 'employment_stat', | |
'wage_per_hour', 'working_week_per_year', 'industry_code', | |
'industry_code_main', 'occupation_code', 'occupation_code_main', | |
'total_employed', 'household_stat', 'household_summary', 'vet_benefit', | |
'tax_status', 'gains', 'losses', 'stocks_status', 'citizenship', | |
'mig_year', 'country_of_birth_own', 'country_of_birth_father', | |
'country_of_birth_mother', 'importance_of_record'] | |
# Copying neccesary columns | |
id = df["ID"].copy() | |
print(id) | |
df = df[ columns ].copy() | |
print(df) | |
# Renaming column class | |
df.rename(columns={'class': 'class_'}, inplace=True) | |
# Predicting... | |
output = pipe.predict_proba(df) | |
print(output) | |
df["predicted_label"] = output.argmax(axis = -1) | |
mapping = {0: "Below Limit", 1: "Above Limit"} | |
df["predicted_label"] = [mapping[x] for x in df["predicted_label"]] | |
print(df["predicted_label"]) | |
# Calculating confidence score | |
confidence_score = output.max(axis= -1) | |
df["confidence_score"] = f"{round( ( confidence_score[0] * 100 ) , 2) }%" | |
# Create a dataframe with a column named 'id' | |
df_final = pd.DataFrame(id, columns=['ID']) | |
print(df_final) | |
df_final["predicted_label"] = df["predicted_label"] | |
df_final["confidence_score"] = df["confidence_score"] | |
print(df_final) | |
print("****************") | |
# Convert the dataframe to a CSV string | |
df_csv = df_final.to_csv(index=False) | |
# Return the CSV string with the response class | |
return Response(content=df_csv, media_type="text/csv") | |
except Exception as e: | |
return {"error": str(e)} # return an error message if something goes wrong | |
if __name__ == "__main__": | |
uvicorn.run("main:app" , reload = True) |