rio3210 commited on
Commit
01ad777
Β·
1 Parent(s): 82e0d9e

Add initial implementation of text classification API with Docker support

Browse files
Files changed (5) hide show
  1. Dockerfile +16 -0
  2. app.py +56 -0
  3. dto.py +11 -0
  4. packges.txt +9 -0
  5. textclassifier.py +13 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./packges.txt packges.txt
13
+ RUN pip install --no-cache-dir --upgrade -r packges.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.responses import JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ import tensorflow as tf
6
+ import joblib
7
+ import numpy as np
8
+ from huggingface_hub import hf_hub_download
9
+ import tensorflow as tf
10
+ import joblib
11
+
12
+ # Load the model and tokenizer from Hugging Face Hub
13
+ model_path = hf_hub_download(repo_id="rio3210/amharic-hate-speech-using-rnn-bidirectional", filename="amharic_hate_speech_rnn_model.keras")
14
+ tokenizer_path = hf_hub_download(repo_id="rio3210/amharic-hate-speech-using-rnn-bidirectional", filename="tokenizer.joblib")
15
+
16
+ # Define the FastAPI application
17
+ app = FastAPI()
18
+
19
+ # Setup CORS
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=["*"],
23
+ allow_credentials=True,
24
+ allow_methods=["*"],
25
+ allow_headers=["*"],
26
+ )
27
+
28
+ # Define the request body schema
29
+ class ClassifyRequest(BaseModel):
30
+ text: str
31
+
32
+ # Preprocessing function
33
+ def preprocess_text(text: str, tokenizer, max_length: int = 100):
34
+ sequences = tokenizer.texts_to_sequences([text]) # Tokenize the input text
35
+ padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(
36
+ sequences, maxlen=max_length, padding="post", truncating="post"
37
+ )
38
+ return padded_sequences
39
+
40
+ # Classification route
41
+ @app.post("/textclassify")
42
+ def classify_text(request_body: ClassifyRequest):
43
+ text = request_body.text
44
+ processed_text = preprocess_text(text, tokenizer) # Preprocess the input text
45
+ prediction = keras_model.predict(processed_text) # Predict using the Keras model
46
+ label = "Hate" if prediction[0] > 0.5 else "Free" # Threshold-based classification
47
+ confidence = float(prediction[0]) # Get confidence score
48
+
49
+ # Return the result
50
+ response = {"label": label, "confidence": confidence}
51
+ return JSONResponse(content=response, status_code=201)
52
+
53
+ # Root route
54
+ @app.get("/")
55
+ def home():
56
+ return {"hello": "world"}
dto.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from typing import List
3
+ from typing import Optional
4
+
5
+ from pydantic import BaseModel
6
+ from pydantic import Field
7
+
8
+
9
+ class ClassifyRequest(BaseModel):
10
+ """Text Summarize request model."""
11
+ text: str = Field(..., description="The text you want to summarize", examples=["α‹ˆαŒ£α‰± አክሎም α‰΅αŒαˆ«α‹­ ክልል αŠ¨αŒ¦αˆ­αŠα‰± αˆ›αŒαˆ₯ቡ αŠ αŠ•αƒαˆ«α‹Š ..."])
packges.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ nltk
2
+ numpy
3
+ pandas
4
+ fastapi
5
+ uvicorn[standard]
6
+ scikit-learn
7
+ tensorflow
8
+ huggingface_hub
9
+ joblib
textclassifier.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class TextClassifier:
2
+
3
+ def __init__(self, model) -> None:
4
+ self.text_classifier = model
5
+
6
+ def classify(self,text):
7
+
8
+ data = self.text_classifier(text)[0]
9
+ print("response from hugging \n", data)
10
+
11
+ return data
12
+
13
+