Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,14 +5,18 @@ import os
|
|
5 |
import re
|
6 |
import emoji
|
7 |
|
8 |
-
MODEL_NAME = "NeuroSpaceX/ruSpamNS"
|
9 |
TOKEN = os.getenv("HF_TOKEN")
|
10 |
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
15 |
-
model.to(device)
|
16 |
|
17 |
def clean_text(text):
|
18 |
text = emoji.replace_emoji(text, replace='')
|
@@ -23,18 +27,18 @@ def clean_text(text):
|
|
23 |
return text
|
24 |
|
25 |
def classify_text(text, model_choice):
|
26 |
-
|
27 |
-
|
28 |
-
model = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=TOKEN)
|
29 |
-
model.to(device)
|
30 |
|
31 |
message = clean_text(text)
|
32 |
encoding = tokenizer(message, padding='max_length', truncation=True, max_length=128, return_tensors='pt')
|
33 |
input_ids = encoding['input_ids'].to(device)
|
34 |
attention_mask = encoding['attention_mask'].to(device)
|
|
|
35 |
with torch.no_grad():
|
36 |
outputs = model(input_ids, attention_mask=attention_mask).logits
|
37 |
prediction = torch.sigmoid(outputs).cpu().numpy()[0][0]
|
|
|
38 |
label = "СПАМ" if prediction >= 0.5 else "НЕ СПАМ"
|
39 |
return f"{label} (вероятность: {prediction*100:.2f}%)"
|
40 |
|
@@ -42,7 +46,7 @@ iface = gr.Interface(
|
|
42 |
fn=classify_text,
|
43 |
inputs=[
|
44 |
gr.Textbox(lines=3, placeholder="Введите текст..."),
|
45 |
-
gr.Radio(["
|
46 |
],
|
47 |
outputs="text",
|
48 |
title="ruSpamNS - Проверка на спам",
|
|
|
5 |
import re
|
6 |
import emoji
|
7 |
|
|
|
8 |
TOKEN = os.getenv("HF_TOKEN")
|
9 |
|
10 |
+
models = {
|
11 |
+
"ruSpamNS_v13": "NeuroSpaceX/ruSpamNS_v13",
|
12 |
+
"ruSpamNS_big": "NeuroSpaceX/ruSpamNS_big",
|
13 |
+
"ruSpamNS_small": "NeuroSpaceX/ruSpamNS_small"
|
14 |
+
}
|
15 |
+
|
16 |
+
tokenizers = {name: AutoTokenizer.from_pretrained(path, use_auth_token=TOKEN) for name, path in models.items()}
|
17 |
+
models = {name: AutoModelForSequenceClassification.from_pretrained(path, use_auth_token=TOKEN).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu')) for name, path in models.items()}
|
18 |
|
19 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
20 |
|
21 |
def clean_text(text):
|
22 |
text = emoji.replace_emoji(text, replace='')
|
|
|
27 |
return text
|
28 |
|
29 |
def classify_text(text, model_choice):
|
30 |
+
tokenizer = tokenizers[model_choice]
|
31 |
+
model = models[model_choice]
|
|
|
|
|
32 |
|
33 |
message = clean_text(text)
|
34 |
encoding = tokenizer(message, padding='max_length', truncation=True, max_length=128, return_tensors='pt')
|
35 |
input_ids = encoding['input_ids'].to(device)
|
36 |
attention_mask = encoding['attention_mask'].to(device)
|
37 |
+
|
38 |
with torch.no_grad():
|
39 |
outputs = model(input_ids, attention_mask=attention_mask).logits
|
40 |
prediction = torch.sigmoid(outputs).cpu().numpy()[0][0]
|
41 |
+
|
42 |
label = "СПАМ" if prediction >= 0.5 else "НЕ СПАМ"
|
43 |
return f"{label} (вероятность: {prediction*100:.2f}%)"
|
44 |
|
|
|
46 |
fn=classify_text,
|
47 |
inputs=[
|
48 |
gr.Textbox(lines=3, placeholder="Введите текст..."),
|
49 |
+
gr.Radio(["ruSpamNS_v13", "ruSpamNS_big", "ruSpamNS_small"], label="Выберите модель")
|
50 |
],
|
51 |
outputs="text",
|
52 |
title="ruSpamNS - Проверка на спам",
|