Spaces:
Sleeping
Sleeping
Add initial implementation of text classification API with Docker support
Browse files- Dockerfile +16 -0
- app.py +56 -0
- dto.py +11 -0
- packges.txt +9 -0
- textclassifier.py +13 -0
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
RUN useradd -m -u 1000 user
|
7 |
+
USER user
|
8 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
9 |
+
|
10 |
+
WORKDIR /app
|
11 |
+
|
12 |
+
COPY --chown=user ./packges.txt packges.txt
|
13 |
+
RUN pip install --no-cache-dir --upgrade -r packges.txt
|
14 |
+
|
15 |
+
COPY --chown=user . /app
|
16 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.responses import JSONResponse
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
from pydantic import BaseModel
|
5 |
+
import tensorflow as tf
|
6 |
+
import joblib
|
7 |
+
import numpy as np
|
8 |
+
from huggingface_hub import hf_hub_download
|
9 |
+
import tensorflow as tf
|
10 |
+
import joblib
|
11 |
+
|
12 |
+
# Load the model and tokenizer from Hugging Face Hub
|
13 |
+
model_path = hf_hub_download(repo_id="rio3210/amharic-hate-speech-using-rnn-bidirectional", filename="amharic_hate_speech_rnn_model.keras")
|
14 |
+
tokenizer_path = hf_hub_download(repo_id="rio3210/amharic-hate-speech-using-rnn-bidirectional", filename="tokenizer.joblib")
|
15 |
+
|
16 |
+
# Define the FastAPI application
|
17 |
+
app = FastAPI()
|
18 |
+
|
19 |
+
# Setup CORS
|
20 |
+
app.add_middleware(
|
21 |
+
CORSMiddleware,
|
22 |
+
allow_origins=["*"],
|
23 |
+
allow_credentials=True,
|
24 |
+
allow_methods=["*"],
|
25 |
+
allow_headers=["*"],
|
26 |
+
)
|
27 |
+
|
28 |
+
# Define the request body schema
|
29 |
+
class ClassifyRequest(BaseModel):
|
30 |
+
text: str
|
31 |
+
|
32 |
+
# Preprocessing function
|
33 |
+
def preprocess_text(text: str, tokenizer, max_length: int = 100):
|
34 |
+
sequences = tokenizer.texts_to_sequences([text]) # Tokenize the input text
|
35 |
+
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(
|
36 |
+
sequences, maxlen=max_length, padding="post", truncating="post"
|
37 |
+
)
|
38 |
+
return padded_sequences
|
39 |
+
|
40 |
+
# Classification route
|
41 |
+
@app.post("/textclassify")
|
42 |
+
def classify_text(request_body: ClassifyRequest):
|
43 |
+
text = request_body.text
|
44 |
+
processed_text = preprocess_text(text, tokenizer) # Preprocess the input text
|
45 |
+
prediction = keras_model.predict(processed_text) # Predict using the Keras model
|
46 |
+
label = "Hate" if prediction[0] > 0.5 else "Free" # Threshold-based classification
|
47 |
+
confidence = float(prediction[0]) # Get confidence score
|
48 |
+
|
49 |
+
# Return the result
|
50 |
+
response = {"label": label, "confidence": confidence}
|
51 |
+
return JSONResponse(content=response, status_code=201)
|
52 |
+
|
53 |
+
# Root route
|
54 |
+
@app.get("/")
|
55 |
+
def home():
|
56 |
+
return {"hello": "world"}
|
dto.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
from typing import List
|
3 |
+
from typing import Optional
|
4 |
+
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from pydantic import Field
|
7 |
+
|
8 |
+
|
9 |
+
class ClassifyRequest(BaseModel):
|
10 |
+
"""Text Summarize request model."""
|
11 |
+
text: str = Field(..., description="The text you want to summarize", examples=["αα£α± α ααα α΅αα«α ααα α¨α¦ααα± ααα₯α΅ α ααα«α ..."])
|
packges.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
nltk
|
2 |
+
numpy
|
3 |
+
pandas
|
4 |
+
fastapi
|
5 |
+
uvicorn[standard]
|
6 |
+
scikit-learn
|
7 |
+
tensorflow
|
8 |
+
huggingface_hub
|
9 |
+
joblib
|
textclassifier.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class TextClassifier:
|
2 |
+
|
3 |
+
def __init__(self, model) -> None:
|
4 |
+
self.text_classifier = model
|
5 |
+
|
6 |
+
def classify(self,text):
|
7 |
+
|
8 |
+
data = self.text_classifier(text)[0]
|
9 |
+
print("response from hugging \n", data)
|
10 |
+
|
11 |
+
return data
|
12 |
+
|
13 |
+
|