Spaces:
Sleeping
Sleeping
adding application
Browse files
main.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# For app development
|
2 |
+
from fastapi import FastAPI, File, UploadFile
|
3 |
+
from typing import Annotated
|
4 |
+
from fastapi import FastAPI, Form, Depends
|
5 |
+
# Import the Response class from fastapi
|
6 |
+
from fastapi import Response
|
7 |
+
import pandas as pd
|
8 |
+
import uvicorn
|
9 |
+
from pydantic import BaseModel
|
10 |
+
# For data frame
|
11 |
+
import pandas as pd
|
12 |
+
# For loading pipeline
|
13 |
+
import joblib
|
14 |
+
# For controlling warnings
|
15 |
+
import warnings
|
16 |
+
warnings.filterwarnings('ignore')
|
17 |
+
# For system functions
|
18 |
+
from io import BytesIO
|
19 |
+
import os
|
20 |
+
|
21 |
+
# Pieline loading
|
22 |
+
with open("..\\..\\data\\catboost_pipeline_4.job", "rb") as f:
|
23 |
+
pipe = joblib.load(f)
|
24 |
+
print(pipe)
|
25 |
+
|
26 |
+
# instantiating fastAPI object
|
27 |
+
app = FastAPI( title = "Machine Learning Classification API for predicting \
|
28 |
+
Income limit as Above Limit / Below Limit")
|
29 |
+
|
30 |
+
@app.post("/predict")
|
31 |
+
async def predict(file: UploadFile = File(...)):
|
32 |
+
try:
|
33 |
+
content = await file.read()
|
34 |
+
df = pd.read_csv(BytesIO(content))
|
35 |
+
|
36 |
+
columns = ['age', 'gender', 'education', 'class', 'marital_status', 'race',
|
37 |
+
'is_hispanic', 'employment_commitment', 'employment_stat',
|
38 |
+
'wage_per_hour', 'working_week_per_year', 'industry_code',
|
39 |
+
'industry_code_main', 'occupation_code', 'occupation_code_main',
|
40 |
+
'total_employed', 'household_stat', 'household_summary', 'vet_benefit',
|
41 |
+
'tax_status', 'gains', 'losses', 'stocks_status', 'citizenship',
|
42 |
+
'mig_year', 'country_of_birth_own', 'country_of_birth_father',
|
43 |
+
'country_of_birth_mother', 'importance_of_record']
|
44 |
+
|
45 |
+
# Copying neccesary columns
|
46 |
+
|
47 |
+
id = df["ID"].copy()
|
48 |
+
print(id)
|
49 |
+
|
50 |
+
df = df[ columns ].copy()
|
51 |
+
print(df)
|
52 |
+
|
53 |
+
# Renaming column class
|
54 |
+
df.rename(columns={'class': 'class_'}, inplace=True)
|
55 |
+
|
56 |
+
# Predicting...
|
57 |
+
output = pipe.predict_proba(df)
|
58 |
+
print(output)
|
59 |
+
|
60 |
+
df["predicted_label"] = output.argmax(axis = -1)
|
61 |
+
mapping = {0: "Below Limit", 1: "Above Limit"}
|
62 |
+
df["predicted_label"] = [mapping[x] for x in df["predicted_label"]]
|
63 |
+
print(df["predicted_label"])
|
64 |
+
|
65 |
+
# Calculating confidence score
|
66 |
+
confidence_score = output.max(axis= -1)
|
67 |
+
df["confidence_score"] = f"{round( ( confidence_score[0] * 100 ) , 2) }%"
|
68 |
+
|
69 |
+
# Create a dataframe with a column named 'id'
|
70 |
+
df_final = pd.DataFrame(id, columns=['ID'])
|
71 |
+
print(df_final)
|
72 |
+
|
73 |
+
df_final["predicted_label"] = df["predicted_label"]
|
74 |
+
df_final["confidence_score"] = df["confidence_score"]
|
75 |
+
print(df_final)
|
76 |
+
|
77 |
+
print("****************")
|
78 |
+
|
79 |
+
# Convert the dataframe to a CSV string
|
80 |
+
df_csv = df_final.to_csv(index=False)
|
81 |
+
|
82 |
+
# Return the CSV string with the response class
|
83 |
+
return Response(content=df_csv, media_type="text/csv")
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
return {"error": str(e)} # return an error message if something goes wrong
|
87 |
+
|
88 |
+
if __name__ == "__main__":
|
89 |
+
uvicorn.run("main:app" , reload = True)
|