idkash1 commited on
Commit
327ae61
·
verified ·
1 Parent(s): e135674

Update human_text_detect.py

Browse files
Files changed (1) hide show
  1. human_text_detect.py +27 -0
human_text_detect.py CHANGED
@@ -11,6 +11,8 @@ from src.fit_survival_function import fit_per_length_survival_function
11
  from glob import glob
12
  import spacy
13
  import re
 
 
14
 
15
 
16
  logging.basicConfig(level=logging.INFO)
@@ -96,12 +98,37 @@ def detect_human_text(model_name, topic, text):
96
  min_tokens_per_sentence = 10
97
  max_tokens_per_sentence = 100
98
 
 
 
 
 
 
 
 
 
 
 
 
99
  # Init model
100
  print('Init model')
101
  lm_name = 'gpt2-xl' if model_name == 'GPT2XL' else 'microsoft/phi-2'
102
  cache_dir = "/cache/huggingface"
103
  tokenizer = AutoTokenizer.from_pretrained(lm_name, cache_dir=cache_dir)
104
  model = AutoModelForCausalLM.from_pretrained(lm_name, cache_dir=cache_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  print('Init PerplexityEvaluator')
107
  sentence_detector = PerplexityEvaluator(model, tokenizer)
 
11
  from glob import glob
12
  import spacy
13
  import re
14
+ import os
15
+ from transformers.utils import logging
16
 
17
 
18
  logging.basicConfig(level=logging.INFO)
 
98
  min_tokens_per_sentence = 10
99
  max_tokens_per_sentence = 100
100
 
101
+ ####
102
+ cache_dir = "/cache/huggingface"
103
+ # Check if the directory exists and is writable
104
+ print(f"Cache directory exists: {os.path.exists(cache_dir)}")
105
+ print(f"Cache directory is writable: {os.access(cache_dir, os.W_OK)}")
106
+
107
+ # List contents of the directory
108
+ print("Contents of cache directory before loading model:")
109
+ os.system(f"ls -lah {cache_dir}")
110
+ ###
111
+
112
  # Init model
113
  print('Init model')
114
  lm_name = 'gpt2-xl' if model_name == 'GPT2XL' else 'microsoft/phi-2'
115
  cache_dir = "/cache/huggingface"
116
  tokenizer = AutoTokenizer.from_pretrained(lm_name, cache_dir=cache_dir)
117
  model = AutoModelForCausalLM.from_pretrained(lm_name, cache_dir=cache_dir)
118
+
119
+ ###
120
+ print("Contents of cache directory after loading model:")
121
+ os.system(f"ls -lah {cache_dir}")
122
+
123
+ logging.set_verbosity_info()
124
+
125
+ print(f"Current HF_HOME: {os.getenv('HF_HOME')}")
126
+ print(f"Current TRANSFORMERS_CACHE: {os.getenv('TRANSFORMERS_CACHE')}")
127
+
128
+ # Check where the tokenizer and model are actually downloaded
129
+ print(f"Tokenizer saved at: {tokenizer.save_pretrained(cache_dir)}")
130
+ print(f"Model saved at: {model.save_pretrained(cache_dir)}")
131
+ ###
132
 
133
  print('Init PerplexityEvaluator')
134
  sentence_detector = PerplexityEvaluator(model, tokenizer)