Spaces:
Running
Running
Commit
·
a08a74d
1
Parent(s):
b55fefb
examples
Browse files
app.py
CHANGED
@@ -2,8 +2,9 @@ import gradio as gr
|
|
2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
import torch
|
4 |
|
5 |
-
|
6 |
-
|
|
|
7 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
8 |
|
9 |
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
@@ -16,6 +17,11 @@ model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/Modern
|
|
16 |
model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
|
17 |
model_2.to(device).eval()
|
18 |
|
|
|
|
|
|
|
|
|
|
|
19 |
label_mapping = {
|
20 |
0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
|
21 |
6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
|
@@ -31,18 +37,24 @@ label_mapping = {
|
|
31 |
|
32 |
def classify_text(text):
|
33 |
if not text.strip():
|
34 |
-
|
|
|
|
|
|
|
|
|
35 |
|
36 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
|
37 |
|
38 |
with torch.no_grad():
|
39 |
logits_1 = model_1(**inputs).logits
|
40 |
logits_2 = model_2(**inputs).logits
|
|
|
41 |
|
42 |
softmax_1 = torch.softmax(logits_1, dim=1)
|
43 |
softmax_2 = torch.softmax(logits_2, dim=1)
|
|
|
44 |
|
45 |
-
averaged_probabilities = (softmax_1 + softmax_2) /
|
46 |
probabilities = averaged_probabilities[0]
|
47 |
|
48 |
ai_probs = probabilities.clone()
|
@@ -73,8 +85,8 @@ title = "AI Text Detector"
|
|
73 |
|
74 |
description = """
|
75 |
|
76 |
-
This tool uses the **ModernBERT** model to identify whether a given text was written by a human or generated by artificial intelligence (AI).
|
77 |
|
|
|
78 |
<br>
|
79 |
|
80 |
<div style="line-height: 1.8;">
|
@@ -200,11 +212,10 @@ with iface:
|
|
200 |
text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
|
201 |
result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
|
202 |
text_input.change(classify_text, inputs=text_input, outputs=result_output)
|
203 |
-
with gr.Tab("AI
|
204 |
gr.Examples(AI_texts, inputs=text_input)
|
205 |
-
with gr.Tab("Human
|
206 |
gr.Examples(Human_texts, inputs=text_input)
|
207 |
gr.Markdown(bottom_text, elem_id="bottom_text")
|
208 |
|
209 |
iface.launch(share=True)
|
210 |
-
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
import torch
|
4 |
|
5 |
+
model1 = "modernbert.bin"
|
6 |
+
model2 = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
|
7 |
+
model3 = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
|
8 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
9 |
|
10 |
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
|
|
17 |
model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
|
18 |
model_2.to(device).eval()
|
19 |
|
20 |
+
model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
|
21 |
+
model_3.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
|
22 |
+
model_3.to(device).eval()
|
23 |
+
|
24 |
+
|
25 |
label_mapping = {
|
26 |
0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
|
27 |
6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
|
|
|
37 |
|
38 |
def classify_text(text):
|
39 |
if not text.strip():
|
40 |
+
result_message = (
|
41 |
+
f"----"
|
42 |
+
f"Results will appear here..."
|
43 |
+
)
|
44 |
+
return results_message
|
45 |
|
46 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
|
47 |
|
48 |
with torch.no_grad():
|
49 |
logits_1 = model_1(**inputs).logits
|
50 |
logits_2 = model_2(**inputs).logits
|
51 |
+
logits_3 = model_3(**inputs).logits
|
52 |
|
53 |
softmax_1 = torch.softmax(logits_1, dim=1)
|
54 |
softmax_2 = torch.softmax(logits_2, dim=1)
|
55 |
+
softmax_3 = torch.softmax(logits_3, dim=1)
|
56 |
|
57 |
+
averaged_probabilities = (softmax_1 + softmax_2 + softmax_3) / 3
|
58 |
probabilities = averaged_probabilities[0]
|
59 |
|
60 |
ai_probs = probabilities.clone()
|
|
|
85 |
|
86 |
description = """
|
87 |
|
|
|
88 |
|
89 |
+
This tool uses the ModernBERT model to identify whether a given text was written by a human or generated by artificial intelligence (AI). It works with a soft voting ensemble using three models, combining their outputs to improve the accuracy.
|
90 |
<br>
|
91 |
|
92 |
<div style="line-height: 1.8;">
|
|
|
212 |
text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
|
213 |
result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
|
214 |
text_input.change(classify_text, inputs=text_input, outputs=result_output)
|
215 |
+
with gr.Tab("AI text examples"):
|
216 |
gr.Examples(AI_texts, inputs=text_input)
|
217 |
+
with gr.Tab("Human text examples"):
|
218 |
gr.Examples(Human_texts, inputs=text_input)
|
219 |
gr.Markdown(bottom_text, elem_id="bottom_text")
|
220 |
|
221 |
iface.launch(share=True)
|
|