Advik commited on
Commit
256e9ec
·
1 Parent(s): ee138cb
Files changed (2) hide show
  1. app.py +31 -14
  2. software.py +11 -58
app.py CHANGED
@@ -1,26 +1,20 @@
1
  import gradio as gr
2
  import numpy as np
 
3
  import pandas as pd
4
  from software import Software
 
 
5
 
6
- software = None
7
  theme = gr.Theme.from_hub("gstaff/xkcd")
8
 
9
- def warmup():
10
- global software
11
- print("[DivEye] Warming up models...")
12
- software = Software()
13
- print("[DivEye] Models are ready.")
14
-
15
  def detect_ai_text(text):
16
- global software
17
  if software is None:
18
- warmup()
19
- return " Models not loaded yet. Please wait for a few moments.", 0.0, pd.DataFrame({
20
- "Source": ["AI Generated", "Human Written"],
21
- "Probability (%)": [0, 0]
22
- })
23
-
24
  text = text.strip()
25
  if not text or len(text.split()) < 15:
26
  return (
@@ -50,6 +44,29 @@ def detect_ai_text(text):
50
 
51
  return message, round(ai_prob, 3), bar_data
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Gradio app setup
54
  with gr.Blocks(title="DivEye") as demo:
55
  gr.HTML("""
 
1
  import gradio as gr
2
  import numpy as np
3
+ import os
4
  import pandas as pd
5
  from software import Software
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+ import torch
8
 
 
9
  theme = gr.Theme.from_hub("gstaff/xkcd")
10
 
 
 
 
 
 
 
11
  def detect_ai_text(text):
 
12
  if software is None:
13
+ return "❗ Model not loaded. We require a GPU to run DivEye.", 0.0, pd.DataFrame({
14
+ "Source": ["AI Generated", "Human Written"],
15
+ "Probability (%)": [0, 0]
16
+ })
17
+
 
18
  text = text.strip()
19
  if not text or len(text.split()) < 15:
20
  return (
 
44
 
45
  return message, round(ai_prob, 3), bar_data
46
 
47
+ # Token from environment variable
48
+ token = os.getenv("HF_TOKEN")
49
+
50
+ if not torch.cuda.is_available():
51
+ print("[DivEye] CUDA not available. Running on CPU.")
52
+ DESCRIPTION = "This demo requires a GPU to run efficiently. Please use a machine with CUDA support."
53
+
54
+ # Import necessary models and tokenizers
55
+ if torch.cuda.is_available():
56
+ model_name_div = "tiiuae/falcon-7b"
57
+ model_name_bi = "google/gemma-1.1-2b-it"
58
+
59
+ div_model = AutoModelForCausalLM.from_pretrained(model_name_div, torch_dtype=torch.float16, device_map="cuda:0", use_auth_token=token)
60
+ div_tokenizer = AutoTokenizer.from_pretrained(model_name_div, use_fast=False, trust_remote_code=True, use_auth_token=token)
61
+
62
+ bi_model = AutoModelForCausalLM.from_pretrained(model_name_bi, torch_dtype=torch.float16, device_map="cuda:1", use_auth_token=token)
63
+ bi_tokenizer = AutoTokenizer.from_pretrained(model_name_bi, use_fast=False, trust_remote_code=True, use_auth_token=token)
64
+
65
+ div_model.eval()
66
+ bi_model.eval()
67
+
68
+ software = Software(div_model, div_tokenizer, bi_model, bi_tokenizer, div_model.device, bi_model.device)
69
+
70
  # Gradio app setup
71
  with gr.Blocks(title="DivEye") as demo:
72
  gr.HTML("""
software.py CHANGED
@@ -15,7 +15,7 @@ import os
15
  class Diversity:
16
  def __init__(self, model, tokenizer, device):
17
  self.tokenizer = tokenizer
18
- self.model = model.to(device)
19
  self.device = device
20
 
21
  def compute_log_likelihoods(self, text):
@@ -56,7 +56,7 @@ class BiScope:
56
  def __init__(self, model, tokenizer, device):
57
  self.COMPLETION_PROMPT_ONLY = "Complete the following text: "
58
  self.tokenizer = tokenizer
59
- self.model = model.to(device)
60
  self.device = device
61
 
62
  def compute_fce_loss(self, logits, targets, text_slice):
@@ -97,70 +97,23 @@ class BiScope:
97
 
98
 
99
  class Software:
100
- def __init__(self):
101
- self.token = os.getenv("HF_TOKEN")
102
- self.device_div = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
103
- self.device_bi = self.device_div
 
104
 
105
- self.div_model = None
106
- self.div_tokenizer = None
107
- self.bi_model = None
108
- self.bi_tokenizer = None
109
 
110
  self.model_path = Path(__file__).parent / "model.json"
111
 
112
  self.model = xgb.XGBClassifier()
113
  self.model.load_model(self.model_path)
114
-
115
- def _load_div_models(self):
116
- if self.div_model is None or self.div_tokenizer is None:
117
- self.div_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b", use_fast=False, trust_remote_code=True, use_auth_token=self.token)
118
- self.div_model = AutoModelForCausalLM.from_pretrained(
119
- "tiiuae/falcon-7b",
120
- device_map="cuda",
121
- torch_dtype=torch.float16,
122
- trust_remote_code=True,
123
- use_auth_token=self.token
124
- )
125
- self.div_model.to(self.device_div)
126
-
127
- def _load_bi_models(self):
128
- if self.bi_model is None or self.bi_tokenizer is None:
129
- self.bi_tokenizer = AutoTokenizer.from_pretrained("google/gemma-1.1-2b-it", use_fast=False, trust_remote_code=True, use_auth_token=self.token)
130
- self.bi_model = AutoModelForCausalLM.from_pretrained(
131
- "google/gemma-1.1-2b-it",
132
- device_map="cuda",
133
- torch_dtype=torch.float16,
134
- trust_remote_code=True,
135
- use_auth_token=self.token
136
- )
137
- self.bi_model.to(self.device_bi)
138
-
139
- def load_data(self, jsonl_path):
140
- ids, texts = [], []
141
- with open(jsonl_path, 'r') as f:
142
- for line in f:
143
- obj = json.loads(line)
144
- ids.append(obj["id"])
145
- texts.append(obj["text"])
146
- return ids, texts
147
 
148
  @spaces.GPU
149
- def evaluate(self, text):
150
- self._load_div_models()
151
- self._load_bi_models()
152
-
153
- # Load models to GPUs.
154
- device_div = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
155
- if torch.cuda.device_count() > 1:
156
- device_bi = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
157
-
158
- if not next(self.div_model.parameters()).is_cuda:
159
- self.div_model = self.div_model.to(device_div)
160
- if not next(self.bi_model.parameters()).is_cuda:
161
- self.bi_model = self.bi_model.to(device_bi)
162
-
163
- diveye = Diversity(self.div_model, self.div_tokenizer, device_div)
164
  biscope = BiScope(self.bi_model, self.bi_tokenizer, self.device_bi)
165
 
166
  diveye_features = diveye.compute_features(text)
 
15
  class Diversity:
16
  def __init__(self, model, tokenizer, device):
17
  self.tokenizer = tokenizer
18
+ self.model = model
19
  self.device = device
20
 
21
  def compute_log_likelihoods(self, text):
 
56
  def __init__(self, model, tokenizer, device):
57
  self.COMPLETION_PROMPT_ONLY = "Complete the following text: "
58
  self.tokenizer = tokenizer
59
+ self.model = model
60
  self.device = device
61
 
62
  def compute_fce_loss(self, logits, targets, text_slice):
 
97
 
98
 
99
  class Software:
100
+ def __init__(self, div_model, div_tokenizer, bi_model, bi_tokenizer, device_div="cuda", device_bi="cuda"):
101
+ self.div_model = div_model
102
+ self.div_tokenizer = div_tokenizer
103
+ self.bi_model = bi_model
104
+ self.bi_tokenizer = bi_tokenizer
105
 
106
+ self.device_div = device_div
107
+ self.device_bi = device_bi
 
 
108
 
109
  self.model_path = Path(__file__).parent / "model.json"
110
 
111
  self.model = xgb.XGBClassifier()
112
  self.model.load_model(self.model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  @spaces.GPU
115
+ def evaluate(self, text):
116
+ diveye = Diversity(self.div_model, self.div_tokenizer, self.device_div)
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  biscope = BiScope(self.bi_model, self.bi_tokenizer, self.device_bi)
118
 
119
  diveye_features = diveye.compute_features(text)