File size: 3,110 Bytes
246135d
 
 
 
 
 
 
612a64b
 
6c66e7f
0121888
6c66e7f
246135d
 
612a64b
246135d
 
 
 
 
 
 
612a64b
246135d
 
 
 
 
 
 
612a64b
246135d
 
 
 
 
 
 
612a64b
53fd1b9
246135d
 
 
612a64b
246135d
b5c3986
 
246135d
 
612a64b
246135d
b5c3986
612a64b
246135d
 
 
 
 
 
b5c3986
246135d
612a64b
246135d
 
 
 
 
 
612a64b
246135d
 
 
612a64b
246135d
 
612a64b
246135d
 
 
 
 
 
 
 
9ee936a
 
 
790f5bc
9ee936a
 
246135d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import nltk
import numpy as np
from nltk.stem.porter import PorterStemmer
import json
import torch
import torch.nn as nn
import random
import gradio as gr

nltk.download('punkt')
nltk.download('punkt_tab')

with open('dataset.json', 'r') as file:
    dataset = json.load(file)

class NeuralNetwork(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNetwork, self).__init__()
    self.l1 = nn.Linear(input_size, hidden_size)
    self.l2 = nn.Linear(hidden_size, hidden_size)
    self.l3 = nn.Linear(hidden_size, num_classes)
    self.relu = nn.ReLU()

  def forward(self, x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out)
    out = self.relu(out)
    out = self.l3(out)
    return out

data = torch.load('model_chatbot.pth', map_location=torch.device('cpu'))
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data["all_words"]
tags = data["tags"]
model_state = data["model_state"]

device = torch.device('cpu')
model = NeuralNetwork(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()

# Fungsi untuk tokenisasi, stemming, dan bag-of-words
stemmer = PorterStemmer()

def tokenize(sentence):
    return nltk.word_tokenize(sentence)

def stem(word):
    return stemmer.stem(word.lower())

def bag_of_words(tokenized_sentence, all_words):
    tokenized_sentence = [stem(w) for w in tokenized_sentence]
    bag = np.zeros(len(all_words), dtype=np.float32)
    for idx, w in enumerate(all_words):
        if w in tokenized_sentence:
            bag[idx] = 1.0

    return bag

# Fungsi prediksi
def predict(sentence):
    sentence = tokenize(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    output = model(X)
    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.75:
        for intent in dataset['intents']:
            if tag == intent["tag"]:
                return random.choice(intent['response'])
    else:
        return "Saya tidak mengerti pertanyaan Anda."

# Buat interface Gradio
iface = gr.Interface(
    fn=predict, 
    inputs=gr.Textbox(placeholder="Silahkan masukkan pertanyaan...."), 
    outputs="markdown", 
    title="Chatbot Hukum Pajak",
    description="Selamat Datang di chatbot pajak.\n\nChatbot ini bisa hanya bisa menjawab beberapa pertanyaan dasar tentang hukum pajak dari pasal 21, pasal 22, pasal 23, dan pasal 24.\n\nContoh pertanyaan seperti berikut:\n\n1. Apa itu pajak?\n\n2. Apa itu pajak penghasilan?\n\n3. Bisakah kamu jelaskan apa itu pajak penghasilan berdasarkan pasal 22?\n\n4. dan lain-lain.\n\nJika ingin melihat bagaimana chatbot ini dilatih, kalian bisa melihat dataset sederhana yang saya buat:\n\nhttps://huggingface.co/chandra10/chatbot_pajak/resolve/main/dataset.json\n\nModel chatbot ini belum sempurna. Jadi, silahkan beri saran di community")

# Jalankan interface
iface.launch(share=True)