idkash1 commited on
Commit
60ea201
·
verified ·
1 Parent(s): e2a8742

Update human_text_detect.py

Browse files
Files changed (1) hide show
  1. human_text_detect.py +5 -22
human_text_detect.py CHANGED
@@ -6,7 +6,7 @@ import numpy as np
6
  import pickle
7
  from src.DetectLM import DetectLM
8
  from src.PerplexityEvaluator import PerplexityEvaluator
9
- from src.PrepareArticles import PrepareArticles #Idan
10
  from src.fit_survival_function import fit_per_length_survival_function
11
  from glob import glob
12
  import spacy
@@ -96,16 +96,7 @@ def detect_human_text(model_name, topic, text):
96
  min_tokens_per_sentence = 10
97
  max_tokens_per_sentence = 100
98
 
99
- ####
100
- cache_dir = "/cache/huggingface"
101
- # Check if the directory exists and is writable
102
- print(f"Cache directory exists: {os.path.exists(cache_dir)}")
103
- print(f"Cache directory is writable: {os.access(cache_dir, os.W_OK)}")
104
-
105
- # List contents of the directory
106
- print("Contents of cache directory before loading model:")
107
- os.system(f"ls -lah {cache_dir}")
108
- ###
109
 
110
  # Init model
111
  print('Init model')
@@ -114,17 +105,9 @@ def detect_human_text(model_name, topic, text):
114
  tokenizer = AutoTokenizer.from_pretrained(lm_name, cache_dir=cache_dir)
115
  model = AutoModelForCausalLM.from_pretrained(lm_name, cache_dir=cache_dir)
116
 
117
- ###
118
- print("Contents of cache directory after loading model:")
119
- os.system(f"ls -lah {cache_dir}")
120
-
121
- print(f"Current HF_HOME: {os.getenv('HF_HOME')}")
122
- print(f"Current TRANSFORMERS_CACHE: {os.getenv('TRANSFORMERS_CACHE')}")
123
-
124
- # Check where the tokenizer and model are actually downloaded
125
- print(f"Tokenizer saved at: {tokenizer.save_pretrained(cache_dir)}")
126
- print(f"Model saved at: {model.save_pretrained(cache_dir)}")
127
- ###
128
 
129
  print('Init PerplexityEvaluator')
130
  sentence_detector = PerplexityEvaluator(model, tokenizer)
 
6
  import pickle
7
  from src.DetectLM import DetectLM
8
  from src.PerplexityEvaluator import PerplexityEvaluator
9
+ from src.PrepareArticles import PrepareArticles
10
  from src.fit_survival_function import fit_per_length_survival_function
11
  from glob import glob
12
  import spacy
 
96
  min_tokens_per_sentence = 10
97
  max_tokens_per_sentence = 100
98
 
99
+ cache_dir = f"/cache/huggingface/{model_name}"
 
 
 
 
 
 
 
 
 
100
 
101
  # Init model
102
  print('Init model')
 
105
  tokenizer = AutoTokenizer.from_pretrained(lm_name, cache_dir=cache_dir)
106
  model = AutoModelForCausalLM.from_pretrained(lm_name, cache_dir=cache_dir)
107
 
108
+ print("Save model")
109
+ tokenizer.save_pretrained(cache_dir)
110
+ model.save_pretrained(cache_dir)
 
 
 
 
 
 
 
 
111
 
112
  print('Init PerplexityEvaluator')
113
  sentence_detector = PerplexityEvaluator(model, tokenizer)