File size: 1,869 Bytes
95fc527
c43a80c
20712aa
4d93fa1
25b797a
20712aa
25b797a
 
 
c43a80c
8aec507
53fd703
1294d13
95fc527
20712aa
 
4d93fa1
 
 
1294d13
4d93fa1
 
c43a80c
 
4d93fa1
 
c43a80c
1294d13
4d93fa1
 
20712aa
 
 
 
 
b147674
 
 
 
 
25b797a
b147674
25b797a
1294d13
b147674
20712aa
 
 
 
 
b147674
 
 
 
25b797a
4d93fa1
 
 
25b797a
b147674
 
 
25b797a
 
b147674
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory

# Ensure consistent language detection results
DetectorFactory.seed = 0

# Set Hugging Face cache directory to a writable location
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
os.makedirs(os.environ["HF_HOME"], exist_ok=True)

app = FastAPI()

# Model names
multilingual_model_name = "johndoee/sentiment"
english_model_name = "siebert/sentiment-roberta-large-english"

# Load tokenizer and model for multilingual sentiment analysis
multilingual_tokenizer = AutoTokenizer.from_pretrained(multilingual_model_name)
multilingual_model = pipeline(
    "sentiment-analysis",
    model=multilingual_model_name,
    tokenizer=multilingual_tokenizer
)

# Load English sentiment model
english_model = pipeline("sentiment-analysis", model=english_model_name)

class SentimentRequest(BaseModel):
    text: str

class SentimentResponse(BaseModel):
    original_text: str
    language_detected: str
    sentiment: str
    confidence_score: float

def detect_language(text):
    try:
        return detect(text)
    except Exception:
        return "unknown"

@app.get("/")
def home():
    return {"message": "Sentiment Analysis API is running!"}

@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
    text = request.text
    language = detect_language(text)

    # Choose the appropriate model based on detected language
    model = english_model if language == "en" else multilingual_model
    result = model(text)

    return SentimentResponse(
        original_text=text,
        language_detected=language,
        sentiment=result[0]["label"].lower(),
        confidence_score=result[0]["score"],
    )