Ehrii's picture
Update main.py
7eced3d
raw
history blame
1.78 kB
import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory
# Ensure consistent language detection results
DetectorFactory.seed = 0
# Set Hugging Face cache directory to a writable location
os.environ["HF_HOME"] = "/tmp/huggingface"
os.makedirs(os.environ["HF_HOME"], exist_ok=True)
app = FastAPI()
# Load the original tokenizer from the base model
original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis")
# Load the fine-tuned model and pass the tokenizer explicitly
multilingual_model = pipeline(
"sentiment-analysis",
model="Ehrii/sentiment",
tokenizer=original_tokenizer
)
# English model remains unchanged
english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")
class SentimentRequest(BaseModel):
text: str
class SentimentResponse(BaseModel):
original_text: str
language_detected: str
sentiment: str
confidence_score: float
def detect_language(text):
try:
return detect(text)
except Exception:
return "unknown"
@app.get("/")
def home():
return {"message": "Sentiment Analysis API is running!"}
@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
text = request.text
language = detect_language(text)
# Choose the appropriate model based on language
if language == "en":
result = english_model(text)
else:
result = multilingual_model(text)
return SentimentResponse(
original_text=text,
language_detected=language,
sentiment=result[0]["label"].lower(),
confidence_score=result[0]["score"],
)