app.py
CHANGED
@@ -2,8 +2,9 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
from transformers import BertTokenizer, BertModel
|
4 |
import torch.nn.functional as F
|
|
|
5 |
|
6 |
-
# Load model
|
7 |
model_name = "shobrunjb/mtl-indoBERT-product-review"
|
8 |
tokenizer = BertTokenizer.from_pretrained(model_name)
|
9 |
|
@@ -25,34 +26,38 @@ class IndoBERTMultiTaskClassifier(torch.nn.Module):
|
|
25 |
|
26 |
return logits_task1, logits_task2
|
27 |
|
28 |
-
#
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
|
37 |
# Define label mappings
|
38 |
label_mapping_task1 = ["trusted", "fake", "non"] # Adjust with your task1 labels
|
39 |
label_mapping_task2 = ["positive", "negative", "neutral"] # Adjust with your task2 labels
|
40 |
|
41 |
def classify(text):
|
42 |
-
#
|
43 |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
44 |
input_ids = inputs['input_ids']
|
45 |
attention_mask = inputs['attention_mask']
|
46 |
|
47 |
-
#
|
48 |
with torch.no_grad():
|
49 |
logits_task1, logits_task2 = model(input_ids, attention_mask)
|
50 |
|
51 |
-
# Softmax
|
52 |
probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()
|
53 |
probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()
|
54 |
|
55 |
-
#
|
56 |
pred_task1 = label_mapping_task1[probs_task1.argmax()]
|
57 |
pred_task2 = label_mapping_task2[probs_task2.argmax()]
|
58 |
|
@@ -61,8 +66,8 @@ def classify(text):
|
|
61 |
# Gradio Interface
|
62 |
iface = gr.Interface(fn=classify,
|
63 |
inputs="text",
|
64 |
-
outputs=[gr.
|
65 |
-
gr.
|
66 |
title="Multitask IndoBERT: Fake Review & Sentiment Classification",
|
67 |
description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.")
|
68 |
|
|
|
2 |
import torch
|
3 |
from transformers import BertTokenizer, BertModel
|
4 |
import torch.nn.functional as F
|
5 |
+
import os
|
6 |
|
7 |
+
# Load model and tokenizer from Hugging Face
|
8 |
model_name = "shobrunjb/mtl-indoBERT-product-review"
|
9 |
tokenizer = BertTokenizer.from_pretrained(model_name)
|
10 |
|
|
|
26 |
|
27 |
return logits_task1, logits_task2
|
28 |
|
29 |
+
# Check if the model file exists
|
30 |
+
model_path = "pytorch_model.bin"
|
31 |
+
if os.path.exists(model_path):
|
32 |
+
model = IndoBERTMultiTaskClassifier(
|
33 |
+
bert_model_name=model_name,
|
34 |
+
num_labels_task1=3, # Adjust with your task1 classes
|
35 |
+
num_labels_task2=3 # Adjust with your task2 classes
|
36 |
+
)
|
37 |
+
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
|
38 |
+
model.eval()
|
39 |
+
else:
|
40 |
+
raise FileNotFoundError(f"Model file '{model_path}' not found. Please ensure the file is available.")
|
41 |
|
42 |
# Define label mappings
|
43 |
label_mapping_task1 = ["trusted", "fake", "non"] # Adjust with your task1 labels
|
44 |
label_mapping_task2 = ["positive", "negative", "neutral"] # Adjust with your task2 labels
|
45 |
|
46 |
def classify(text):
|
47 |
+
# Tokenize input text
|
48 |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
49 |
input_ids = inputs['input_ids']
|
50 |
attention_mask = inputs['attention_mask']
|
51 |
|
52 |
+
# Prediction with model
|
53 |
with torch.no_grad():
|
54 |
logits_task1, logits_task2 = model(input_ids, attention_mask)
|
55 |
|
56 |
+
# Softmax to get probabilities
|
57 |
probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()
|
58 |
probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()
|
59 |
|
60 |
+
# Predict label with highest probability
|
61 |
pred_task1 = label_mapping_task1[probs_task1.argmax()]
|
62 |
pred_task2 = label_mapping_task2[probs_task2.argmax()]
|
63 |
|
|
|
66 |
# Gradio Interface
|
67 |
iface = gr.Interface(fn=classify,
|
68 |
inputs="text",
|
69 |
+
outputs=[gr.Label(label="Fake Review Detection"),
|
70 |
+
gr.Label(label="Sentiment Classification")],
|
71 |
title="Multitask IndoBERT: Fake Review & Sentiment Classification",
|
72 |
description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.")
|
73 |
|