|
import os |
|
|
|
os.environ["TRANSFORMERS_CACHE"] = "/tmp" |
|
|
|
from flask import Flask, request, jsonify |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
app = Flask(__name__) |
|
|
|
|
|
model_name = "s-nlp/roberta-base-formality-ranker" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
def fuzzy_formality(score, threshold=0.75): |
|
if score < threshold: |
|
formal_weight = 0.5 * (score / threshold) ** 2 |
|
else: |
|
formal_weight = 1 - 0.5 * ((1 - score) / (1 - threshold)) ** 2 |
|
|
|
informal_weight = 1 - formal_weight |
|
formal_percent = round(formal_weight * 100) |
|
informal_percent = round(informal_weight * 100) |
|
|
|
return { |
|
"formal_percent": formal_percent, |
|
"informal_percent": informal_percent, |
|
"classification": f"Your speech is {formal_percent}% formal and {informal_percent}% informal." |
|
} |
|
|
|
@app.route("/predict", methods=["POST"]) |
|
def predict_formality(): |
|
text = request.json.get("text") |
|
if not text: |
|
return jsonify({"error": "Text input is required"}), 400 |
|
|
|
|
|
encoding = tokenizer( |
|
text, add_special_tokens=True, truncation=True, padding="max_length", return_tensors="pt" |
|
) |
|
|
|
|
|
with torch.no_grad(): |
|
output = model(**encoding) |
|
|
|
|
|
softmax_scores = output.logits.softmax(dim=1) |
|
formality_score = softmax_scores[:, 1].item() |
|
|
|
|
|
result = fuzzy_formality(formality_score) |
|
|
|
return jsonify({ |
|
"text": text, |
|
"formality_score": round(formality_score, 3), |
|
**result |
|
}) |
|
|
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860) |
|
|