NeuroSpaceX commited on
Commit
67d2259
·
verified ·
1 Parent(s): a9b4211

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -5,14 +5,18 @@ import os
5
  import re
6
  import emoji
7
 
8
- MODEL_NAME = "NeuroSpaceX/ruSpamNS"
9
  TOKEN = os.getenv("HF_TOKEN")
10
 
11
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=TOKEN)
12
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, use_auth_token=TOKEN)
 
 
 
 
 
 
13
 
14
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
- model.to(device)
16
 
17
  def clean_text(text):
18
  text = emoji.replace_emoji(text, replace='')
@@ -23,18 +27,18 @@ def clean_text(text):
23
  return text
24
 
25
  def classify_text(text, model_choice):
26
- model_name = f"NeuroSpaceX/ruSpamNS_{model_choice}"
27
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=TOKEN)
28
- model = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=TOKEN)
29
- model.to(device)
30
 
31
  message = clean_text(text)
32
  encoding = tokenizer(message, padding='max_length', truncation=True, max_length=128, return_tensors='pt')
33
  input_ids = encoding['input_ids'].to(device)
34
  attention_mask = encoding['attention_mask'].to(device)
 
35
  with torch.no_grad():
36
  outputs = model(input_ids, attention_mask=attention_mask).logits
37
  prediction = torch.sigmoid(outputs).cpu().numpy()[0][0]
 
38
  label = "СПАМ" if prediction >= 0.5 else "НЕ СПАМ"
39
  return f"{label} (вероятность: {prediction*100:.2f}%)"
40
 
@@ -42,7 +46,7 @@ iface = gr.Interface(
42
  fn=classify_text,
43
  inputs=[
44
  gr.Textbox(lines=3, placeholder="Введите текст..."),
45
- gr.Radio(["small", "big"], label="Выберите модель")
46
  ],
47
  outputs="text",
48
  title="ruSpamNS - Проверка на спам",
 
5
  import re
6
  import emoji
7
 
 
8
  TOKEN = os.getenv("HF_TOKEN")
9
 
10
+ models = {
11
+ "ruSpamNS_v13": "NeuroSpaceX/ruSpamNS_v13",
12
+ "ruSpamNS_big": "NeuroSpaceX/ruSpamNS_big",
13
+ "ruSpamNS_small": "NeuroSpaceX/ruSpamNS_small"
14
+ }
15
+
16
+ tokenizers = {name: AutoTokenizer.from_pretrained(path, use_auth_token=TOKEN) for name, path in models.items()}
17
+ models = {name: AutoModelForSequenceClassification.from_pretrained(path, use_auth_token=TOKEN).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu')) for name, path in models.items()}
18
 
19
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
20
 
21
  def clean_text(text):
22
  text = emoji.replace_emoji(text, replace='')
 
27
  return text
28
 
29
  def classify_text(text, model_choice):
30
+ tokenizer = tokenizers[model_choice]
31
+ model = models[model_choice]
 
 
32
 
33
  message = clean_text(text)
34
  encoding = tokenizer(message, padding='max_length', truncation=True, max_length=128, return_tensors='pt')
35
  input_ids = encoding['input_ids'].to(device)
36
  attention_mask = encoding['attention_mask'].to(device)
37
+
38
  with torch.no_grad():
39
  outputs = model(input_ids, attention_mask=attention_mask).logits
40
  prediction = torch.sigmoid(outputs).cpu().numpy()[0][0]
41
+
42
  label = "СПАМ" if prediction >= 0.5 else "НЕ СПАМ"
43
  return f"{label} (вероятность: {prediction*100:.2f}%)"
44
 
 
46
  fn=classify_text,
47
  inputs=[
48
  gr.Textbox(lines=3, placeholder="Введите текст..."),
49
+ gr.Radio(["ruSpamNS_v13", "ruSpamNS_big", "ruSpamNS_small"], label="Выберите модель")
50
  ],
51
  outputs="text",
52
  title="ruSpamNS - Проверка на спам",