Spaces:
Running
Running
import os | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import pipeline, AutoTokenizer | |
from langdetect import detect, DetectorFactory | |
# Ensure consistent language detection results | |
DetectorFactory.seed = 0 | |
# Set Hugging Face cache directory to a writable location | |
os.environ["HF_HOME"] = "/tmp/huggingface" | |
os.makedirs(os.environ["HF_HOME"], exist_ok=True) | |
app = FastAPI() | |
# Load the original tokenizer from the base model | |
original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis") | |
# Load the fine-tuned model and pass the tokenizer explicitly | |
multilingual_model = pipeline( | |
"sentiment-analysis", | |
model="Ehrii/sentiment", | |
tokenizer=original_tokenizer | |
) | |
# English model remains unchanged | |
english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english") | |
class SentimentRequest(BaseModel): | |
text: str | |
class SentimentResponse(BaseModel): | |
original_text: str | |
language_detected: str | |
sentiment: str | |
confidence_score: float | |
def detect_language(text): | |
try: | |
return detect(text) | |
except Exception: | |
return "unknown" | |
def home(): | |
return {"message": "Sentiment Analysis API is running!"} | |
def analyze_sentiment(request: SentimentRequest): | |
text = request.text | |
language = detect_language(text) | |
# Choose the appropriate model based on language | |
if language == "en": | |
result = english_model(text) | |
else: | |
result = multilingual_model(text) | |
return SentimentResponse( | |
original_text=text, | |
language_detected=language, | |
sentiment=result[0]["label"].lower(), | |
confidence_score=result[0]["score"], | |
) | |