mihalykiss commited on
Commit
a08a74d
·
1 Parent(s): b55fefb
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -2,8 +2,9 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
 
5
- model_path = "modernbert.bin"
6
- huggingface_model_url = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
 
7
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
8
 
9
  tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
@@ -16,6 +17,11 @@ model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/Modern
16
  model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
17
  model_2.to(device).eval()
18
 
 
 
 
 
 
19
  label_mapping = {
20
  0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
21
  6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
@@ -31,18 +37,24 @@ label_mapping = {
31
 
32
  def classify_text(text):
33
  if not text.strip():
34
- return "----"
 
 
 
 
35
 
36
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
37
 
38
  with torch.no_grad():
39
  logits_1 = model_1(**inputs).logits
40
  logits_2 = model_2(**inputs).logits
 
41
 
42
  softmax_1 = torch.softmax(logits_1, dim=1)
43
  softmax_2 = torch.softmax(logits_2, dim=1)
 
44
 
45
- averaged_probabilities = (softmax_1 + softmax_2) / 2
46
  probabilities = averaged_probabilities[0]
47
 
48
  ai_probs = probabilities.clone()
@@ -73,8 +85,8 @@ title = "AI Text Detector"
73
 
74
  description = """
75
 
76
- This tool uses the **ModernBERT** model to identify whether a given text was written by a human or generated by artificial intelligence (AI).
77
 
 
78
  <br>
79
 
80
  <div style="line-height: 1.8;">
@@ -200,11 +212,10 @@ with iface:
200
  text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
201
  result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
202
  text_input.change(classify_text, inputs=text_input, outputs=result_output)
203
- with gr.Tab("AI Text Examples"):
204
  gr.Examples(AI_texts, inputs=text_input)
205
- with gr.Tab("Human Text Examples"):
206
  gr.Examples(Human_texts, inputs=text_input)
207
  gr.Markdown(bottom_text, elem_id="bottom_text")
208
 
209
  iface.launch(share=True)
210
-
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
 
5
+ model1 = "modernbert.bin"
6
+ model2 = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
7
+ model3 = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
8
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
 
10
  tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
 
17
  model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
18
  model_2.to(device).eval()
19
 
20
+ model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
21
+ model_3.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
22
+ model_3.to(device).eval()
23
+
24
+
25
  label_mapping = {
26
  0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
27
  6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
 
37
 
38
  def classify_text(text):
39
  if not text.strip():
40
+ result_message = (
41
+ f"----"
42
+ f"Results will appear here..."
43
+ )
44
+ return results_message
45
 
46
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
47
 
48
  with torch.no_grad():
49
  logits_1 = model_1(**inputs).logits
50
  logits_2 = model_2(**inputs).logits
51
+ logits_3 = model_3(**inputs).logits
52
 
53
  softmax_1 = torch.softmax(logits_1, dim=1)
54
  softmax_2 = torch.softmax(logits_2, dim=1)
55
+ softmax_3 = torch.softmax(logits_3, dim=1)
56
 
57
+ averaged_probabilities = (softmax_1 + softmax_2 + softmax_3) / 3
58
  probabilities = averaged_probabilities[0]
59
 
60
  ai_probs = probabilities.clone()
 
85
 
86
  description = """
87
 
 
88
 
89
+ This tool uses the ModernBERT model to identify whether a given text was written by a human or generated by artificial intelligence (AI). It works with a soft voting ensemble using three models, combining their outputs to improve the accuracy.
90
  <br>
91
 
92
  <div style="line-height: 1.8;">
 
212
  text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
213
  result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
214
  text_input.change(classify_text, inputs=text_input, outputs=result_output)
215
+ with gr.Tab("AI text examples"):
216
  gr.Examples(AI_texts, inputs=text_input)
217
+ with gr.Tab("Human text examples"):
218
  gr.Examples(Human_texts, inputs=text_input)
219
  gr.Markdown(bottom_text, elem_id="bottom_text")
220
 
221
  iface.launch(share=True)