Spaces:

sanbatte
/

finance_api

Running

App Files Files Community

sanbatte commited on Jan 11, 2024

Commit

18a33a3

1 Parent(s): 4a0ff0f

Add application file

Browse files

Files changed (6) hide show

Dockerfile +14 -0
data/dataTest.csv +0 -0
data/lightgbm_deuda.pkl +0 -0
main.py +71 -0
requirements.txt +12 -0
utils.py +23 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

data/dataTest.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/lightgbm_deuda.pkl ADDED Viewed

Binary file (341 kB). View file

main.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from os import getenv
+import pandas as pd
+import joblib
+# import pickle
+import numpy as np
+from utils import prepare_data
+app = FastAPI()
+# Cargar el modelo XGBoost desde el archivo .pkl
+# with open("data/lightgbm_deuda.pkl", "rb") as file:
+#    model = pickle.load(file)
+with open("data/lightgbm_deuda.pkl", "rb") as file:
+    # model = joblib.load(file)
+    model = joblib.load(file)
+# Cargar el DataFrame desde el archivo CSV
+df = pd.read_csv("data/dataTest.csv")
+# df = df.set_index("invoiceId")
+df = prepare_data(df)
+class PredictionRequest(BaseModel):
+    invoiceId: list[int]
+    country: str
+class PredictionResponse(BaseModel):
+    invoiceId: int
+    prediction: float
+@app.post("/predict")
+def predict(request: PredictionRequest):
+    # Verificar que los invoiceId enviados estén en el DataFrame
+    invalid_ids = set(request.invoiceId) - set(df.index)
+    if invalid_ids:
+        raise HTTPException(
+            status_code=400, detail=f"Invalid invoiceId(s): {invalid_ids}"
+        )
+    if request.country not in ["CL", "MX"]:
+        raise HTTPException(
+            status_code=400, detail=f"Invalid country code: {request.country}"
+        )
+    # Filtrar el DataFrame para obtener solo las filas correspondientes a los invoiceId enviados
+    prediction_data = df.loc[request.invoiceId]
+    # Realizar la predicción con el modelo
+    predictions = model.predict(prediction_data)
+    # Crear la respuesta
+    response_data = [
+        {"invoiceId": invoice_id, "prediction": float(prediction)}
+        for invoice_id, prediction in zip(request.invoiceId, predictions)
+    ]
+    return response_data
+if __name__ == "__main__":
+    import uvicorn
+    print("building")
+    uvicorn.run(app, host="0.0.0.0", reload=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+fastapi==0.108.0
+joblib==1.3.2
+lightgbm==4.2.0
+numpy==1.26.3
+pandas==2.1.4
+pydantic==2.5.3
+pydantic_core==2.14.6
+scikit-learn==1.3.2
+scipy==1.11.4
+uvicorn==0.25.0

utils.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import pandas as pd
+def prepare_data(df: pd.DataFrame = None) -> pd.DataFrame:
+    """
+    Prepare data.
+    """
+    # Assuming no additional preprocessing is required for this example
+    df = df.drop(["Unnamed: 0", "overdueDays"], axis=1)
+    df = df.drop(["businessId", "payerId"], axis=1)
+    df = df.set_index("invoiceId")
+    df = df[
+        [
+            "receiptAmount",
+            "relationDays",
+            "relationRecurrence",
+            "issuerInvoicesAmount",
+            "issuerCancelledInvoices",
+            "activityDaysPayer",
+            "clients12Months",
+        ]
+    ]
+    return df.copy()