sanbatte commited on
Commit
18a33a3
·
1 Parent(s): 4a0ff0f

Add application file

Browse files
Files changed (6) hide show
  1. Dockerfile +14 -0
  2. data/dataTest.csv +0 -0
  3. data/lightgbm_deuda.pkl +0 -0
  4. main.py +71 -0
  5. requirements.txt +12 -0
  6. utils.py +23 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
data/dataTest.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/lightgbm_deuda.pkl ADDED
Binary file (341 kB). View file
 
main.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from os import getenv
4
+ import pandas as pd
5
+ import joblib
6
+
7
+ # import pickle
8
+ import numpy as np
9
+ from utils import prepare_data
10
+
11
+ app = FastAPI()
12
+
13
+ # Cargar el modelo XGBoost desde el archivo .pkl
14
+ # with open("data/lightgbm_deuda.pkl", "rb") as file:
15
+ # model = pickle.load(file)
16
+ with open("data/lightgbm_deuda.pkl", "rb") as file:
17
+ # model = joblib.load(file)
18
+ model = joblib.load(file)
19
+
20
+ # Cargar el DataFrame desde el archivo CSV
21
+ df = pd.read_csv("data/dataTest.csv")
22
+ # df = df.set_index("invoiceId")
23
+
24
+
25
+ df = prepare_data(df)
26
+
27
+
28
+ class PredictionRequest(BaseModel):
29
+ invoiceId: list[int]
30
+ country: str
31
+
32
+
33
+ class PredictionResponse(BaseModel):
34
+ invoiceId: int
35
+ prediction: float
36
+
37
+
38
+ @app.post("/predict")
39
+ def predict(request: PredictionRequest):
40
+ # Verificar que los invoiceId enviados estén en el DataFrame
41
+
42
+ invalid_ids = set(request.invoiceId) - set(df.index)
43
+ if invalid_ids:
44
+ raise HTTPException(
45
+ status_code=400, detail=f"Invalid invoiceId(s): {invalid_ids}"
46
+ )
47
+ if request.country not in ["CL", "MX"]:
48
+ raise HTTPException(
49
+ status_code=400, detail=f"Invalid country code: {request.country}"
50
+ )
51
+
52
+ # Filtrar el DataFrame para obtener solo las filas correspondientes a los invoiceId enviados
53
+ prediction_data = df.loc[request.invoiceId]
54
+
55
+ # Realizar la predicción con el modelo
56
+ predictions = model.predict(prediction_data)
57
+
58
+ # Crear la respuesta
59
+ response_data = [
60
+ {"invoiceId": invoice_id, "prediction": float(prediction)}
61
+ for invoice_id, prediction in zip(request.invoiceId, predictions)
62
+ ]
63
+
64
+ return response_data
65
+
66
+
67
+ if __name__ == "__main__":
68
+ import uvicorn
69
+
70
+ print("building")
71
+ uvicorn.run(app, host="0.0.0.0", reload=True)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.108.0
2
+ joblib==1.3.2
3
+ lightgbm==4.2.0
4
+ numpy==1.26.3
5
+ pandas==2.1.4
6
+ pydantic==2.5.3
7
+ pydantic_core==2.14.6
8
+ scikit-learn==1.3.2
9
+ scipy==1.11.4
10
+ uvicorn==0.25.0
11
+
12
+
utils.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ def prepare_data(df: pd.DataFrame = None) -> pd.DataFrame:
5
+ """
6
+ Prepare data.
7
+ """
8
+ # Assuming no additional preprocessing is required for this example
9
+ df = df.drop(["Unnamed: 0", "overdueDays"], axis=1)
10
+ df = df.drop(["businessId", "payerId"], axis=1)
11
+ df = df.set_index("invoiceId")
12
+ df = df[
13
+ [
14
+ "receiptAmount",
15
+ "relationDays",
16
+ "relationRecurrence",
17
+ "issuerInvoicesAmount",
18
+ "issuerCancelledInvoices",
19
+ "activityDaysPayer",
20
+ "clients12Months",
21
+ ]
22
+ ]
23
+ return df.copy()