idkash1 commited on
Commit
7fbf405
·
verified ·
1 Parent(s): b8e3567

Update human_text_detect.py

Browse files
Files changed (1) hide show
  1. human_text_detect.py +6 -17
human_text_detect.py CHANGED
@@ -87,7 +87,7 @@ def detect_human_text(model_name, topic, text):
87
  df_null = df_null[df_null.num > 1]
88
 
89
  # Get survival function
90
- print('Get survival function')
91
  pval_functions = get_survival_function(df_null, G=43)
92
 
93
  min_tokens_per_sentence = 10
@@ -95,30 +95,19 @@ def detect_human_text(model_name, topic, text):
95
 
96
  cache_dir = f"/tmp/cacheHuggingface/{model_name}"
97
 
98
- print('Create dir')
99
- # Use a writable directory inside the Hugging Face Space
100
- # os.makedirs("/tmp/cacheHuggingface/PHI2", exist_ok=True)
101
- # os.makedirs("/tmp/cacheHuggingface/GPT2XL", exist_ok=True)
102
-
103
  # Init model
104
  print('Init tokenizer')
105
  lm_name = 'gpt2-xl' if model_name == 'GPT2XL' else 'microsoft/phi-2'
106
  tokenizer = AutoTokenizer.from_pretrained(lm_name, cache_dir=cache_dir)
107
-
108
- print("Before saved tokenizer files in:", cache_dir)
109
- print(os.listdir(cache_dir))
110
 
111
- print("Save tokenizer")
112
- tokenizer.save_pretrained(cache_dir)
113
-
114
- print("Checking saved tokenizer files in:", cache_dir)
115
- print(os.listdir(cache_dir))
116
 
117
  print('Init model')
118
- model = AutoModelForCausalLM.from_pretrained(lm_name) #, cache_dir=cache_dir
119
 
120
- print("Save model")
121
- model.save_pretrained(cache_dir)
122
 
123
  print('Init PerplexityEvaluator')
124
  sentence_detector = PerplexityEvaluator(model, tokenizer)
 
87
  df_null = df_null[df_null.num > 1]
88
 
89
  # Get survival function
90
+ print('Get survival function')
91
  pval_functions = get_survival_function(df_null, G=43)
92
 
93
  min_tokens_per_sentence = 10
 
95
 
96
  cache_dir = f"/tmp/cacheHuggingface/{model_name}"
97
 
 
 
 
 
 
98
  # Init model
99
  print('Init tokenizer')
100
  lm_name = 'gpt2-xl' if model_name == 'GPT2XL' else 'microsoft/phi-2'
101
  tokenizer = AutoTokenizer.from_pretrained(lm_name, cache_dir=cache_dir)
 
 
 
102
 
103
+ # print("Save tokenizer")
104
+ # tokenizer.save_pretrained(cache_dir)
 
 
 
105
 
106
  print('Init model')
107
+ model = AutoModelForCausalLM.from_pretrained(lm_name, cache_dir=cache_dir)
108
 
109
+ # print("Save model")
110
+ # model.save_pretrained(cache_dir)
111
 
112
  print('Init PerplexityEvaluator')
113
  sentence_detector = PerplexityEvaluator(model, tokenizer)