File size: 4,174 Bytes
c7f5132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d701c20
c7f5132
d701c20
c7f5132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import torch
import numpy as np
from transformers import BertModel, AutoTokenizer, BertForSequenceClassification
from model_services.model import IndoBertEcommerceReview, IndoBertCNNEcommerceReview, IndoBertLSTMEcommerceReview
import streamlit as st


ready_status = False
bert = None
tokenizer = None
indobert_model = None
indobertcnn_model = None
indobertlstm_model = None


with st.status("Loading models...", expanded=True, state='running') as status:
    # Load the base model and tokenizer
    bertSequence = BertForSequenceClassification.from_pretrained("indobenchmark/indobert-base-p1",
                                                            num_labels=3,
                                                           problem_type="multi_label_classification")
    bert = BertModel.from_pretrained("indobenchmark/indobert-base-p1")
    tokenizer = AutoTokenizer.from_pretrained("fahrendrakhoirul/indobert-finetuned-ecommerce-reviews")

    # Load custom models
    indobert_model = IndoBertEcommerceReview.from_pretrained("fahrendrakhoirul/indobert-finetuned-ecommerce-reviews", bert=bertSequence)
    st.write("IndoBERT model loaded")
    indobertcnn_model = IndoBertCNNEcommerceReview.from_pretrained("fahrendrakhoirul/indobert-cnn-finetuned-ecommerce-reviews", bert=bert)
    st.write("IndoBERT-CNN model loaded")
    indobertlstm_model = IndoBertLSTMEcommerceReview.from_pretrained("fahrendrakhoirul/indobert-lstm-finetuned-ecommerce-reviews", bert=bert)
    st.write("IndoBERT-LSTM model loaded")

    # Update status to indicate models are ready
    if indobert_model and indobertcnn_model and indobertlstm_model != None:
        ready_status = True
    if ready_status:
        status.update(label="Models loaded successfully", expanded=False)
        status.success("Models loaded successfully", icon="✅")
    else:
        status.error("Failed to load models")


# def init():
#     global ready_status, bert, tokenizer, indobert_model, indobertcnn_model, indobertlstm_model
#     try:
#         # Load the base model and tokenizer
#         bert = BertModel.from_pretrained("indobenchmark/indobert-base-p1")
#         tokenizer = AutoTokenizer.from_pretrained("fahrendrakhoirul/indobert-finetuned-ecommerce-reviews")
        
#         # Load custom models
#         indobert_model = IndoBertEcommerceReview.from_pretrained("fahrendrakhoirul/indobert-finetuned-ecommerce-reviews", bert=bert)
#         print("IndoBERT model loaded")
#         indobertcnn_model = IndoBertCNNEcommerceReview.from_pretrained("fahrendrakhoirul/indobert-cnn-finetuned-ecommerce-reviews", bert=bert)
#         print("IndoBERT-CNN model loaded")
#         indobertlstm_model = IndoBertLSTMEcommerceReview.from_pretrained("fahrendrakhoirul/indobert-lstm-finetuned-ecommerce-reviews", bert=bert)
#         print("IndoBERT-LSTM model loaded")
#         ready_status = True
#         return True
#     except Exception as e:
#         print(f"Failed to initialize models: {e}")
#         ready_status = False
#         return False

def predict(text: str, model_name: str):
    token_result = tokenizer(text, return_tensors="pt")
    model = None
    if model_name == "IndoBERT":
        model = indobert_model
    if model_name == "IndoBERT-CNN":
        model = indobertcnn_model
    if model_name == "IndoBERT-LSTM (Best)":
        model = indobertlstm_model
    input_ids = token_result['input_ids']
    attention_mask = token_result['attention_mask']
    with torch.no_grad():
        logits = model(input_ids=input_ids, attention_mask=attention_mask)
        preds = torch.sigmoid(logits).detach().cpu().numpy()[0]
    return preds

def get_label(preds):
    labels = ["Product", "Customer Service", "Shipping/Delivery"]
    result = [label for i, label in enumerate(labels) if preds[i] > 0.6]
    return result

def get_result(reviews: list[str], model_name: str):
    outputs = []
    for review in reviews:
        preds = predict(review, model_name)
        labels = get_label(preds)
        output = {
            "review": review,
            "predicted_score": preds,
            "predicted_labels": labels
        }
        outputs.append(output)
    return outputs