Spaces:

NeuroSpaceX
/

ForModelTest

Sleeping

NeuroSpaceX commited on Mar 21

Commit

a9b4211

verified ·

1 Parent(s): b7a5d14

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import re
 import emoji
-MODEL_NAME = "NeuroSpaceX/ruSpamNS_test"
 TOKEN = os.getenv("HF_TOKEN")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=TOKEN)
@@ -15,24 +15,19 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model.to(device)
 def clean_text(text):
-    # Удаляем эмодзи
     text = emoji.replace_emoji(text, replace='')
-    # Удаляем цифры и символы, кроме букв и пробела
     text = re.sub(r'[^a-zA-Zа-яА-ЯёЁ ]', '', text, flags=re.UNICODE)
-    # Приводим текст в нижний регистр
     text = text.lower()
-    # Делаем первую букву заглавной
     text = text.capitalize()
-    # Убираем лишние пробелы
     text = re.sub(r'\s+', ' ', text).strip()
     return text
-def classify_text(text):
     message = clean_text(text)
     encoding = tokenizer(message, padding='max_length', truncation=True, max_length=128, return_tensors='pt')
     input_ids = encoding['input_ids'].to(device)
@@ -45,10 +40,13 @@ def classify_text(text):
 iface = gr.Interface(
     fn=classify_text,
-    inputs=gr.Textbox(lines=3, placeholder="Введите текст..."),
     outputs="text",
     title="ruSpamNS - Проверка на спам",
     description="Введите текст, чтобы проверить, является ли он спамом."
 )
-iface.launch()

 import re
 import emoji
+MODEL_NAME = "NeuroSpaceX/ruSpamNS"
 TOKEN = os.getenv("HF_TOKEN")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=TOKEN)
 model.to(device)
 def clean_text(text):
     text = emoji.replace_emoji(text, replace='')
     text = re.sub(r'[^a-zA-Zа-яА-ЯёЁ ]', '', text, flags=re.UNICODE)
     text = text.lower()
     text = text.capitalize()
     text = re.sub(r'\s+', ' ', text).strip()
     return text
+def classify_text(text, model_choice):
+    model_name = f"NeuroSpaceX/ruSpamNS_{model_choice}"
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=TOKEN)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=TOKEN)
+    model.to(device)
     message = clean_text(text)
     encoding = tokenizer(message, padding='max_length', truncation=True, max_length=128, return_tensors='pt')
     input_ids = encoding['input_ids'].to(device)
 iface = gr.Interface(
     fn=classify_text,
+    inputs=[
+        gr.Textbox(lines=3, placeholder="Введите текст..."),
+        gr.Radio(["small", "big"], label="Выберите модель")
+    ],
     outputs="text",
     title="ruSpamNS - Проверка на спам",
     description="Введите текст, чтобы проверить, является ли он спамом."
 )
+iface.launch()