import os from fastapi import FastAPI from pydantic import BaseModel from transformers import pipeline, AutoTokenizer from langdetect import detect, DetectorFactory # Ensure consistent language detection results DetectorFactory.seed = 0 # Set Hugging Face cache directory to a writable location os.environ["HF_HOME"] = "/tmp/huggingface" os.makedirs(os.environ["HF_HOME"], exist_ok=True) app = FastAPI() # Load the original tokenizer from the base model original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis") hf_token = os.getenv("HF_TOKEN") # Load the fine-tuned model and pass the tokenizer explicitly multilingual_model = pipeline( "sentiment-analysis", model="Ehrii/sentiment", tokenizer=original_tokenizer, token=hf_token ) # English model remains unchanged english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english") class SentimentRequest(BaseModel): text: str class SentimentResponse(BaseModel): original_text: str language_detected: str sentiment: str confidence_score: float def detect_language(text): try: return detect(text) except Exception: return "unknown" @app.get("/") def home(): return {"message": "Sentiment Analysis API is running!"} @app.post("/analyze/", response_model=SentimentResponse) def analyze_sentiment(request: SentimentRequest): text = request.text language = detect_language(text) # Choose the appropriate model based on language if language == "en": result = english_model(text) else: result = multilingual_model(text) # Debugging: Print model output print("Model Output:", result) # Handle label mapping if model uses generic labels label_map = { "LABEL_0": "negative", "LABEL_1": "positive", "LABEL_2": "neutral", "0": "negative", "1": "positive", "2": "neutral", "NEGATIVE": "negative", "POSITIVE": "positive", "NEUTRAL": "neutral", } # Ensure label exists and map it correctly sentiment_label = result[0]["label"].upper() # Convert to uppercase for consistency sentiment = label_map.get(sentiment_label, sentiment_label).lower() # Default fallback return SentimentResponse( original_text=text, language_detected=language, sentiment=sentiment, # Always "positive" or "negative" confidence_score=result[0]["score"], )