File size: 2,745 Bytes
d3830cc
 
 
056dc2d
d3830cc
 
 
e542c59
 
 
 
 
 
 
 
98186a7
e542c59
 
 
 
 
aea35b3
e542c59
 
 
 
faf7c96
e542c59
 
 
98186a7
e542c59
 
 
19a55cd
e542c59
 
 
 
 
 
 
 
 
 
 
bb7d89b
 
056dc2d
813b46e
056dc2d
e542c59
813b46e
e542c59
 
19a55cd
 
 
e542c59
 
 
 
 
 
 
 
 
813b46e
 
19a55cd
 
 
 
 
e542c59
19a55cd
e542c59
19a55cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""
python interactive.py
"""
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TextClassificationPipeline
import gradio as gr

# global var
MODEL_NAME = 'momo/KcBERT-base_Hate_speech_Privacy_Detection'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels= 15,
    problem_type="multi_label_classification"
)

MODEL_BUF = {
    "name": MODEL_NAME,
    "tokenizer": tokenizer,
    "model": model,
}

def change_model_name(name):
    MODEL_BUF["name"] = name
    MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
    MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)

def predict(model_name, text):
    if model_name != MODEL_BUF["name"]:
        change_model_name(model_name)
    
    tokenizer = MODEL_BUF["tokenizer"]
    model = MODEL_BUF["model"]

    unsmile_labels = ["์—ฌ์„ฑ/๊ฐ€์กฑ","๋‚จ์„ฑ","์„ฑ์†Œ์ˆ˜์ž","์ธ์ข…/๊ตญ์ ","์—ฐ๋ น","์ง€์—ญ","์ข…๊ต","๊ธฐํƒ€ ํ˜์˜ค","์•…ํ”Œ/์š•์„ค", "clean", '์ด๋ฆ„', '์ „ํ™”๋ฒˆํ˜ธ', '์ฃผ์†Œ', '๊ณ„์ขŒ๋ฒˆํ˜ธ', '์ฃผ๋ฏผ๋ฒˆํ˜ธ']
    num_labels = len(unsmile_labels)

    model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
    model.config.label2id = {label: i for i, label in zip(range(num_labels), unsmile_labels)}

    pipe = TextClassificationPipeline(
    model = model,
    tokenizer = tokenizer,
    return_all_scores=True,
    function_to_apply='sigmoid'
    )
    print(pipe(text)[0])

    output = []
    for i in pipe(text)[0]:
        output.append(i + '\t')

    return output

if __name__ == '__main__':
    exam1 = '๊ฒฝ๊ธฐ๋„ ์„ฑ๋‚จ์‹œ ์ˆ˜์ •๊ตฌ ํƒœํ‰3๋™์€ ์šฐ๋ฆฌ ๋™๋„ค์•ผ!'
    exam2 = '๋‚ด ํ•ธ๋“œํฐ ๋ฒˆํ˜ธ๋Š” 010-3930-8237 ์ด์•ผ!'
    exam3 = '์•„ ์  ์žฅ ๋„ˆ๋ฌด ์งœ์ฆ๋‚œ๋‹ค'

    model_name_list = [
        'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
        "momo/KcBERT-base_Hate_speech_Privacy_Detection",
    ]

    #Create a gradio app with a button that calls predict()
    app = gr.Interface(
        fn=predict,
        inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'],
        outputs='text', 
        examples = [
            [MODEL_BUF["name"], exam1], 
            [MODEL_BUF["name"], exam2],
            [MODEL_BUF["name"], exam3]
            ],
        title="ํ•œ๊ตญ์–ด ํ˜์˜คํ‘œํ˜„, ๊ฐœ์ธ์ •๋ณด ํŒ๋ณ„๊ธฐ (Korean Hate Speech and Privacy Detection)",
        description="Korean Hate Speech and Privacy Detection. \t 15๊ฐœ label Detection: ์—ฌ์„ฑ/๊ฐ€์กฑ, ๋‚จ์„ฑ, ์„ฑ์†Œ์ˆ˜์ž, ์ธ์ข…/๊ตญ์ , ์—ฐ๋ น, ์ง€์—ญ, ์ข…๊ต, ๊ธฐํƒ€ ํ˜์˜ค, ์•…ํ”Œ/์š•์„ค, clean, name, number, address, bank, person"
        )
    app.launch()