Spaces:

navindusa
/

hopeline-ai

Sleeping

App Files Files Community

navindusa commited on Apr 8

Commit

f1c3cd3

1 Parent(s): 2d02134

Enhance API with bulk prediction and model metadata; improve error handling and text processing

Browse files

Files changed (1) hide show

app.py +86 -12

app.py CHANGED Viewed

@@ -1,15 +1,36 @@
-from fastapi import FastAPI
-from transformers import pipeline
 import re
-pipe = pipeline("text-classification", model="JungleLee/bert-toxic-comment-classification")
 app = FastAPI(
     title="Hopeline - AI Inference API",
-    description="API for detecting toxic comments",
-    version="0.1"
 )
 def preprocess_text(text: str) -> str:
     # Remove special characters and extra whitespace
     text = re.sub(r'[^\w\s]', '', text)
@@ -23,17 +44,70 @@ def preprocess_text(text: str) -> str:
 async def welcome():
     return "Welcome to Hopeline - AI Inference API"
 @app.post('/predict')
-async def predict_post(request_body: dict):
-    text = request_body.get('text', '')
-    if not text:
-        return {"error": "No text provided"}
     # Preprocess text
-    processed_text = preprocess_text(text)
     # Get prediction
-    prediction = pipe(processed_text)
-    return prediction

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Optional
+from transformers import pipeline, AutoTokenizer
 import re
+# Load the model and tokenizer
+model_name = "JungleLee/bert-toxic-comment-classification"
+pipe = pipeline("text-classification", model=model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Model metadata
+model_info = {
+    "name": "BERT for Toxic Comment Classification",
+    "description": "A fine-tuned BERT model that detects toxic content in text",
+    "labels": ["toxic", "non-toxic"],
+    "max_sequence_length": tokenizer.model_max_length,
+    "author": "JungleLee"
+}
 app = FastAPI(
     title="Hopeline - AI Inference API",
+    description="API for detecting toxic comments using a BERT-based model",
+    version="0.2"
 )
+class TextRequest(BaseModel):
+    text: str
+class BulkTextRequest(BaseModel):
+    texts: List[str]
+    threshold: Optional[float] = 0.5
 def preprocess_text(text: str) -> str:
     # Remove special characters and extra whitespace
     text = re.sub(r'[^\w\s]', '', text)
 async def welcome():
     return "Welcome to Hopeline - AI Inference API"
+@app.get("/model-info")
+async def get_model_info():
+    return model_info
 @app.post('/predict')
+async def predict_post(request: TextRequest):
+    if not request.text:
+        raise HTTPException(status_code=400, detail="No text provided")
     # Preprocess text
+    processed_text = preprocess_text(request.text)
+    # Check token length and truncate if needed
+    tokens = tokenizer.tokenize(processed_text)
+    if len(tokens) > tokenizer.model_max_length - 2:  # -2 for special tokens
+        tokens = tokens[:tokenizer.model_max_length - 2]
+        processed_text = tokenizer.convert_tokens_to_string(tokens)
     # Get prediction
+    prediction = pipe(processed_text)[0]
+    return {
+        "text": request.text,
+        "label": prediction["label"],
+        "score": prediction["score"],
+        "is_toxic": prediction["label"] == "toxic"
+    }
+@app.post('/predict-bulk')
+async def predict_bulk(request: BulkTextRequest):
+    if not request.texts:
+        raise HTTPException(status_code=400, detail="No texts provided")
+    results = []
+    for text in request.texts:
+        # Preprocess text
+        processed_text = preprocess_text(text)
+        # Check token length and truncate if needed
+        tokens = tokenizer.tokenize(processed_text)
+        if len(tokens) > tokenizer.model_max_length - 2:
+            tokens = tokens[:tokenizer.model_max_length - 2]
+            processed_text = tokenizer.convert_tokens_to_string(tokens)
+        # Get prediction
+        prediction = pipe(processed_text)[0]
+        results.append({
+            "text": text,
+            "label": prediction["label"],
+            "score": prediction["score"],
+            "is_toxic": prediction["label"] == "toxic",
+            "exceeds_threshold": prediction["score"] > request.threshold if prediction["label"] == "toxic" else False
+        })
+    return {
+        "results": results,
+        "summary": {
+            "total": len(results),
+            "toxic_count": sum(1 for r in results if r["is_toxic"]),
+            "non_toxic_count": sum(1 for r in results if not r["is_toxic"]),
+            "threshold_exceeded_count": sum(1 for r in results if r["exceeds_threshold"])
+        }
+    }