idkash1 commited on
Commit
0c743a6
·
verified ·
1 Parent(s): 42a5a6d

Update human_text_detect.py

Browse files
Files changed (1) hide show
  1. human_text_detect.py +8 -7
human_text_detect.py CHANGED
@@ -93,21 +93,22 @@ def detect_human_text(model_name, topic, text):
93
  min_tokens_per_sentence = 10
94
  max_tokens_per_sentence = 100
95
 
96
- cache_dir = f"/tmp/cacheHuggingface/{model_name}"
97
 
98
  # Init model
99
  print('Init tokenizer')
100
  lm_name = 'gpt2-xl' if model_name == 'GPT2XL' else 'microsoft/phi-2'
101
- tokenizer = AutoTokenizer.from_pretrained(lm_name) #, cache_dir=cache_dir
102
 
103
- # print("Save tokenizer")
104
- # tokenizer.save_pretrained(cache_dir)
105
 
106
  print('Init model')
107
- model = AutoModelForCausalLM.from_pretrained(lm_name) #, cache_dir=cache_dir
 
108
 
109
- # print("Save model")
110
- # model.save_pretrained(cache_dir)
111
 
112
  print('Init PerplexityEvaluator')
113
  sentence_detector = PerplexityEvaluator(model, tokenizer)
 
93
  min_tokens_per_sentence = 10
94
  max_tokens_per_sentence = 100
95
 
96
+ cache_dir_tokenizer = f"/data/cacheHuggingface/{model_name}/tokenizer"
97
 
98
  # Init model
99
  print('Init tokenizer')
100
  lm_name = 'gpt2-xl' if model_name == 'GPT2XL' else 'microsoft/phi-2'
101
+ tokenizer = AutoTokenizer.from_pretrained(lm_name, cache_dir=cache_dir_tokenizer)
102
 
103
+ print("Save tokenizer")
104
+ tokenizer.save_pretrained(cache_dir_tokenizer)
105
 
106
  print('Init model')
107
+ cache_dir_model = f"/data/cacheHuggingface/{model_name}/model"
108
+ model = AutoModelForCausalLM.from_pretrained(lm_name, cache_dir=cache_dir_model)
109
 
110
+ print("Save model")
111
+ model.save_pretrained(cache_dir_model)
112
 
113
  print('Init PerplexityEvaluator')
114
  sentence_detector = PerplexityEvaluator(model, tokenizer)