yamunasivan commited on
Commit
d1e319b
·
1 Parent(s): a711866

create a new file

Browse files
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /app
4
+ COPY . /app
5
+
6
+ ENV HF_HOME=/app/.cache
7
+
8
+ RUN mkdir -p /app/.cache/huggingface/hub && \
9
+ chmod -R 777 /app/.cache && \
10
+ chmod -R 777 /app/.cache/huggingface
11
+
12
+
13
+
14
+ RUN pip install --upgrade pip
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ COPY --chown=user ./requirements.txt requirements.txt
18
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
19
+
20
+ EXPOSE 7860
21
+
22
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from routers import predict
3
+
4
+ app = FastAPI(title="Logistic Regression API")
5
+
6
+ # Include API router
7
+ app.include_router(predict.router)
8
+
9
+ @app.get("/")
10
+ def home():
11
+ return {"message": "Welcome to the Logistic Regression API"}
12
+
13
+ # Run using: uvicorn main:app --reload
models/__init__.py ADDED
File without changes
models/train_model.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.linear_model import LogisticRegression
5
+ import pickle
6
+
7
+ # Load dataset
8
+ file_path = "data/sms_process_data_main.xlsx"
9
+ df = pd.read_excel(file_path)
10
+
11
+ # Prepare training data
12
+ X_train, X_test, y_train, y_test = train_test_split(df['MessageText'], df['label'], test_size=0.2, random_state=42)
13
+
14
+ # Convert text into numerical vectors
15
+ vectorizer = TfidfVectorizer()
16
+ X_train_vec = vectorizer.fit_transform(X_train)
17
+ X_test_vec = vectorizer.transform(X_test)
18
+
19
+ # Train model
20
+ model = LogisticRegression(max_iter=1000)
21
+ model.fit(X_train_vec, y_train)
22
+
23
+ # Save model and vectorizer
24
+ with open("models/logistic_regression.pkl", "wb") as model_file:
25
+ pickle.dump(model, model_file)
26
+
27
+ with open("models/vectorizer.pkl", "wb") as vec_file:
28
+ pickle.dump(vectorizer, vec_file)
29
+
30
+ print("Model trained and saved successfully!")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ scikit-learn
4
+ pandas
5
+ sentence-transformers
6
+ openpyxl
routers/__init__.py ADDED
File without changes
routers/predict.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from schema.schemas import PredictionInput, PredictionOutput
3
+ from service.classifier import load_model, predict
4
+
5
+ router = APIRouter(prefix="/predict", tags=["Prediction"])
6
+
7
+ # Load the model once
8
+ model, vectorizer = load_model()
9
+
10
+ @router.post("/", response_model=PredictionOutput)
11
+ def make_prediction(input_data: PredictionInput):
12
+ prediction = predict(input_data.text, model, vectorizer)
13
+ return {"prediction": prediction}
schema/__init__.py ADDED
File without changes
schema/schemas.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class PredictionInput(BaseModel):
4
+ text: str
5
+
6
+ class PredictionOutput(BaseModel):
7
+ prediction: str
service/__init__.py ADDED
File without changes
service/classifier.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import os
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.linear_model import LogisticRegression
5
+
6
+ MODEL_PATH = "models/logistic_regression.pkl"
7
+ VECTORIZER_PATH = "models/vectorizer.pkl"
8
+
9
+ def load_model():
10
+ """Load trained model and vectorizer from disk."""
11
+ if os.path.exists(MODEL_PATH) and os.path.exists(VECTORIZER_PATH):
12
+ with open(MODEL_PATH, "rb") as model_file, open(VECTORIZER_PATH, "rb") as vec_file:
13
+ model = pickle.load(model_file)
14
+ vectorizer = pickle.load(vec_file)
15
+ return model, vectorizer
16
+ else:
17
+ raise FileNotFoundError("Model or vectorizer not found!")
18
+
19
+ def predict(text, model, vectorizer):
20
+ """Make predictions using the trained model."""
21
+ text_vectorized = vectorizer.transform([text])
22
+ prediction = model.predict(text_vectorized)[0]
23
+ return prediction