Spaces:
Sleeping
Sleeping
Commit
·
a6bbf63
1
Parent(s):
daa5938
feat: added all the files
Browse files- Dockerfile +18 -0
- __init__.py +0 -0
- components/__init__.py +0 -0
- components/model_ingestion.py +14 -0
- components/model_loader.py +13 -0
- components/pipeline_preparer.py +11 -0
- components/predictor.py +12 -0
- dvc.yaml +17 -0
- main.py +38 -0
- requirements.txt +0 -0
- utils/__init__.py +0 -0
- utils/commons.py +3 -0
Dockerfile
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
RUN useradd -m -u 1000 user
|
4 |
+
USER user
|
5 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
6 |
+
|
7 |
+
WORKDIR /app
|
8 |
+
|
9 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
11 |
+
|
12 |
+
COPY --chown=user . /app
|
13 |
+
|
14 |
+
# Install DVC if needed (uncomment if using DVC in container)
|
15 |
+
RUN pip install --no-cache-dir dvc
|
16 |
+
|
17 |
+
CMD ["dvc","repro"]
|
18 |
+
# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
__init__.py
ADDED
File without changes
|
components/__init__.py
ADDED
File without changes
|
components/model_ingestion.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
|
2 |
+
from utils.commons import HUGGINGFACE_MODEL_PATH, MODEL_SAVE_PATH
|
3 |
+
import os
|
4 |
+
|
5 |
+
class ModelIngestor:
|
6 |
+
@staticmethod
|
7 |
+
def download_model():
|
8 |
+
"""Download and save the model"""
|
9 |
+
if not os.path.exists(MODEL_SAVE_PATH):
|
10 |
+
os.makedirs(MODEL_SAVE_PATH)
|
11 |
+
|
12 |
+
RobertaTokenizerFast.from_pretrained(HUGGINGFACE_MODEL_PATH).save_pretrained(MODEL_SAVE_PATH)
|
13 |
+
RobertaForSequenceClassification.from_pretrained(HUGGINGFACE_MODEL_PATH).save_pretrained(MODEL_SAVE_PATH)
|
14 |
+
print(f"Model saved to {MODEL_SAVE_PATH}")
|
components/model_loader.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
|
2 |
+
from utils.commons import MODEL_SAVE_PATH
|
3 |
+
|
4 |
+
class ModelLoader:
|
5 |
+
def __init__(self):
|
6 |
+
self.tokenizer = None
|
7 |
+
self.model = None
|
8 |
+
|
9 |
+
def load_model(self):
|
10 |
+
"""Load model from local path"""
|
11 |
+
self.tokenizer = RobertaTokenizerFast.from_pretrained(MODEL_SAVE_PATH)
|
12 |
+
self.model = RobertaForSequenceClassification.from_pretrained(MODEL_SAVE_PATH)
|
13 |
+
return self.tokenizer, self.model
|
components/pipeline_preparer.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import TextClassificationPipeline
|
2 |
+
|
3 |
+
class PipelinePreparer:
|
4 |
+
@staticmethod
|
5 |
+
def prepare_pipeline(tokenizer, model):
|
6 |
+
"""Create text classification pipeline"""
|
7 |
+
return TextClassificationPipeline(
|
8 |
+
tokenizer=tokenizer,
|
9 |
+
model=model,
|
10 |
+
return_all_scores=True
|
11 |
+
)
|
components/predictor.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Predictor:
|
2 |
+
def __init__(self, pipeline):
|
3 |
+
self.pipeline = pipeline
|
4 |
+
|
5 |
+
def predict(self, text):
|
6 |
+
"""Make prediction"""
|
7 |
+
preds_list = self.pipeline(text)
|
8 |
+
best_pred = max(preds_list[0], key=lambda x: x["score"])
|
9 |
+
return {
|
10 |
+
"predicted_intent": best_pred["label"],
|
11 |
+
"confidence": best_pred["score"]
|
12 |
+
}
|
dvc.yaml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
stages:
|
2 |
+
model_ingestion:
|
3 |
+
cmd: python -c "from components.model_ingestion import ModelIngestor; ModelIngestor.download_model()"
|
4 |
+
deps:
|
5 |
+
- components/model_ingestion.py
|
6 |
+
- utils/commons.py
|
7 |
+
outs:
|
8 |
+
- models/
|
9 |
+
|
10 |
+
serve_model:
|
11 |
+
cmd: python main.py
|
12 |
+
deps:
|
13 |
+
- main.py
|
14 |
+
- components/
|
15 |
+
- utils/
|
16 |
+
outs:
|
17 |
+
- logs/
|
main.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from components.model_loader import ModelLoader
|
4 |
+
from components.pipeline_preparer import PipelinePreparer
|
5 |
+
from components.predictor import Predictor
|
6 |
+
import uvicorn
|
7 |
+
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
+
class PredictionRequest(BaseModel):
|
11 |
+
sentence: str
|
12 |
+
|
13 |
+
@app.on_event("startup")
|
14 |
+
async def startup_event():
|
15 |
+
print("Initializing model...")
|
16 |
+
try:
|
17 |
+
# Model initialization
|
18 |
+
loader = ModelLoader()
|
19 |
+
tokenizer, model = loader.load_model()
|
20 |
+
pipeline = PipelinePreparer.prepare_pipeline(tokenizer, model)
|
21 |
+
app.state.predictor = Predictor(pipeline)
|
22 |
+
print("Model initialized successfully")
|
23 |
+
except Exception as e:
|
24 |
+
print(f"Error initializing model: {e}")
|
25 |
+
app.state.predictor = None
|
26 |
+
|
27 |
+
@app.get("/")
|
28 |
+
def health_check():
|
29 |
+
return {"Message": "Service is healthy", "Status": "OK"}
|
30 |
+
|
31 |
+
@app.post("/predict")
|
32 |
+
def predict(request: PredictionRequest):
|
33 |
+
if not app.state.predictor:
|
34 |
+
return {"error": "Model not initialized"}
|
35 |
+
return app.state.predictor.predict(request.sentence)
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
requirements.txt
ADDED
File without changes
|
utils/__init__.py
ADDED
File without changes
|
utils/commons.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Commonly used variables
|
2 |
+
HUGGINGFACE_MODEL_PATH = "bespin-global/klue-roberta-small-3i4k-intent-classification"
|
3 |
+
MODEL_SAVE_PATH = "./models"
|