ozayezerceli commited on
Commit
e8b11eb
1 Parent(s): faaad26

Update src/submission/check_validity.py

Browse files
Files changed (1) hide show
  1. src/submission/check_validity.py +11 -7
src/submission/check_validity.py CHANGED
@@ -32,20 +32,24 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
32
  return True, ""
33
 
34
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
35
- """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
36
  try:
37
  config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
38
  if test_tokenizer:
39
- try:
40
- tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
41
- except ValueError as e:
 
 
 
 
 
 
42
  return (
43
  False,
44
- f"uses a tokenizer which is not in a transformers release: {e}",
45
  None
46
  )
47
- except Exception as e:
48
- return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
49
  return True, None, config
50
 
51
  except ValueError:
 
32
  return True, ""
33
 
34
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
35
+ """Makes sure the model is on the hub, and uses a valid configuration (in the latest transformers version)"""
36
  try:
37
  config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
38
  if test_tokenizer:
39
+ tokenizer_config = get_tokenizer_config(model_name)
40
+ if tokenizer_config is not None:
41
+ tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
42
+ else:
43
+ tokenizer_class_candidate = config.tokenizer_class
44
+
45
+
46
+ tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
47
+ if tokenizer_class is None:
48
  return (
49
  False,
50
+ f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
51
  None
52
  )
 
 
53
  return True, None, config
54
 
55
  except ValueError: