File size: 2,942 Bytes
9de1c7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b86ddaa
9de1c7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# For app development
from fastapi import FastAPI, File, UploadFile
from typing import Annotated
from fastapi import FastAPI, Form, Depends
# Import the Response class from fastapi
from fastapi import Response
import pandas as pd
import uvicorn
from pydantic import BaseModel
# For data frame
import pandas as pd
# For loading pipeline
import joblib
# For controlling warnings
import warnings
warnings.filterwarnings('ignore')
# For system functions
from io import BytesIO
import os

# Pieline loading
with open("catboost_pipeline_4.job", "rb") as f:
    pipe = joblib.load(f) 
print(pipe)

# instantiating fastAPI object
app = FastAPI( title = "Machine Learning Classification API for predicting \
              Income limit as Above Limit / Below Limit") 
  
@app.post("/predict")
async def predict(file: UploadFile = File(...)):
    try:
        content = await file.read() 
        df = pd.read_csv(BytesIO(content)) 
        
        columns = ['age', 'gender', 'education', 'class', 'marital_status', 'race',
       'is_hispanic', 'employment_commitment', 'employment_stat',
       'wage_per_hour', 'working_week_per_year', 'industry_code',
       'industry_code_main', 'occupation_code', 'occupation_code_main',
       'total_employed', 'household_stat', 'household_summary', 'vet_benefit',
       'tax_status', 'gains', 'losses', 'stocks_status', 'citizenship',
       'mig_year', 'country_of_birth_own', 'country_of_birth_father',
       'country_of_birth_mother', 'importance_of_record']

        # Copying neccesary columns

        id = df["ID"].copy()
        print(id)

        df = df[ columns ].copy()
        print(df)

        # Renaming column class
        df.rename(columns={'class': 'class_'}, inplace=True)

        # Predicting...
        output = pipe.predict_proba(df)
        print(output)
               
        df["predicted_label"] = output.argmax(axis = -1)
        mapping = {0: "Below Limit", 1: "Above Limit"}
        df["predicted_label"] = [mapping[x] for x in df["predicted_label"]]
        print(df["predicted_label"])

        # Calculating confidence score
        confidence_score = output.max(axis= -1)
        df["confidence_score"] = f"{round( ( confidence_score[0] * 100 ) , 2) }%"

        # Create a dataframe with a column named 'id'
        df_final = pd.DataFrame(id, columns=['ID'])
        print(df_final)

        df_final["predicted_label"] = df["predicted_label"]
        df_final["confidence_score"] = df["confidence_score"]
        print(df_final)

        print("****************")

        # Convert the dataframe to a CSV string
        df_csv = df_final.to_csv(index=False)

        # Return the CSV string with the response class
        return Response(content=df_csv, media_type="text/csv")
    
    except Exception as e:
        return {"error": str(e)} # return an error message if something goes wrong

if __name__ == "__main__":
    uvicorn.run("main:app" , reload = True)