Spaces:
Runtime error
Runtime error
Commit
·
d1e319b
1
Parent(s):
a711866
create a new file
Browse files- Dockerfile +22 -0
- main.py +13 -0
- models/__init__.py +0 -0
- models/train_model.py +30 -0
- requirements.txt +6 -0
- routers/__init__.py +0 -0
- routers/predict.py +13 -0
- schema/__init__.py +0 -0
- schema/schemas.py +7 -0
- service/__init__.py +0 -0
- service/classifier.py +23 -0
Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
COPY . /app
|
5 |
+
|
6 |
+
ENV HF_HOME=/app/.cache
|
7 |
+
|
8 |
+
RUN mkdir -p /app/.cache/huggingface/hub && \
|
9 |
+
chmod -R 777 /app/.cache && \
|
10 |
+
chmod -R 777 /app/.cache/huggingface
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
RUN pip install --upgrade pip
|
15 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
16 |
+
|
17 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
18 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
19 |
+
|
20 |
+
EXPOSE 7860
|
21 |
+
|
22 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from routers import predict
|
3 |
+
|
4 |
+
app = FastAPI(title="Logistic Regression API")
|
5 |
+
|
6 |
+
# Include API router
|
7 |
+
app.include_router(predict.router)
|
8 |
+
|
9 |
+
@app.get("/")
|
10 |
+
def home():
|
11 |
+
return {"message": "Welcome to the Logistic Regression API"}
|
12 |
+
|
13 |
+
# Run using: uvicorn main:app --reload
|
models/__init__.py
ADDED
File without changes
|
models/train_model.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.model_selection import train_test_split
|
3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
+
from sklearn.linear_model import LogisticRegression
|
5 |
+
import pickle
|
6 |
+
|
7 |
+
# Load dataset
|
8 |
+
file_path = "data/sms_process_data_main.xlsx"
|
9 |
+
df = pd.read_excel(file_path)
|
10 |
+
|
11 |
+
# Prepare training data
|
12 |
+
X_train, X_test, y_train, y_test = train_test_split(df['MessageText'], df['label'], test_size=0.2, random_state=42)
|
13 |
+
|
14 |
+
# Convert text into numerical vectors
|
15 |
+
vectorizer = TfidfVectorizer()
|
16 |
+
X_train_vec = vectorizer.fit_transform(X_train)
|
17 |
+
X_test_vec = vectorizer.transform(X_test)
|
18 |
+
|
19 |
+
# Train model
|
20 |
+
model = LogisticRegression(max_iter=1000)
|
21 |
+
model.fit(X_train_vec, y_train)
|
22 |
+
|
23 |
+
# Save model and vectorizer
|
24 |
+
with open("models/logistic_regression.pkl", "wb") as model_file:
|
25 |
+
pickle.dump(model, model_file)
|
26 |
+
|
27 |
+
with open("models/vectorizer.pkl", "wb") as vec_file:
|
28 |
+
pickle.dump(vectorizer, vec_file)
|
29 |
+
|
30 |
+
print("Model trained and saved successfully!")
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn
|
3 |
+
scikit-learn
|
4 |
+
pandas
|
5 |
+
sentence-transformers
|
6 |
+
openpyxl
|
routers/__init__.py
ADDED
File without changes
|
routers/predict.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from schema.schemas import PredictionInput, PredictionOutput
|
3 |
+
from service.classifier import load_model, predict
|
4 |
+
|
5 |
+
router = APIRouter(prefix="/predict", tags=["Prediction"])
|
6 |
+
|
7 |
+
# Load the model once
|
8 |
+
model, vectorizer = load_model()
|
9 |
+
|
10 |
+
@router.post("/", response_model=PredictionOutput)
|
11 |
+
def make_prediction(input_data: PredictionInput):
|
12 |
+
prediction = predict(input_data.text, model, vectorizer)
|
13 |
+
return {"prediction": prediction}
|
schema/__init__.py
ADDED
File without changes
|
schema/schemas.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
class PredictionInput(BaseModel):
|
4 |
+
text: str
|
5 |
+
|
6 |
+
class PredictionOutput(BaseModel):
|
7 |
+
prediction: str
|
service/__init__.py
ADDED
File without changes
|
service/classifier.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import os
|
3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
+
from sklearn.linear_model import LogisticRegression
|
5 |
+
|
6 |
+
MODEL_PATH = "models/logistic_regression.pkl"
|
7 |
+
VECTORIZER_PATH = "models/vectorizer.pkl"
|
8 |
+
|
9 |
+
def load_model():
|
10 |
+
"""Load trained model and vectorizer from disk."""
|
11 |
+
if os.path.exists(MODEL_PATH) and os.path.exists(VECTORIZER_PATH):
|
12 |
+
with open(MODEL_PATH, "rb") as model_file, open(VECTORIZER_PATH, "rb") as vec_file:
|
13 |
+
model = pickle.load(model_file)
|
14 |
+
vectorizer = pickle.load(vec_file)
|
15 |
+
return model, vectorizer
|
16 |
+
else:
|
17 |
+
raise FileNotFoundError("Model or vectorizer not found!")
|
18 |
+
|
19 |
+
def predict(text, model, vectorizer):
|
20 |
+
"""Make predictions using the trained model."""
|
21 |
+
text_vectorized = vectorizer.transform([text])
|
22 |
+
prediction = model.predict(text_vectorized)[0]
|
23 |
+
return prediction
|