ffreemt commited on
Commit
2321903
·
1 Parent(s): 0b6d9b3
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -23,28 +23,35 @@ except Exception: # pylint: disable=broad-except
23
  logger.warning("Windows, cant run time.tzset()")
24
 
25
  model = None
26
- gc.collect()
27
 
28
  logger.info("start")
29
  has_cuda = torch.cuda.is_available()
30
 
 
 
31
  if has_cuda:
32
  model = AutoModelForCausalLM.from_pretrained(
33
  "model", # loc
34
- # device_map="auto",
35
  torch_dtype=torch.bfloat16, # pylint: disable=no-member
36
  load_in_8bit=True,
37
  trust_remote_code=True,
38
  # use_ram_optimized_load=False,
39
  # offload_folder="offload_folder",
40
- ).cuda()
41
  else:
42
- # model = AutoModel.from_pretrained(model_name, trust_remote_code=True).float()
43
- model = AutoModelForCausalLM.from_pretrained(
44
- model_name, trust_remote_code=True
45
- ).float()
 
 
 
 
 
46
 
47
- model = model.eval()
48
 
49
  rich.print(f"{model=}")
50
 
 
23
  logger.warning("Windows, cant run time.tzset()")
24
 
25
  model = None
26
+ gc.collect() # for interactive testing
27
 
28
  logger.info("start")
29
  has_cuda = torch.cuda.is_available()
30
 
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True)
32
+
33
  if has_cuda:
34
  model = AutoModelForCausalLM.from_pretrained(
35
  "model", # loc
36
+ device_map="auto",
37
  torch_dtype=torch.bfloat16, # pylint: disable=no-member
38
  load_in_8bit=True,
39
  trust_remote_code=True,
40
  # use_ram_optimized_load=False,
41
  # offload_folder="offload_folder",
42
+ ) # .cuda()
43
  else:
44
+ try:
45
+ # model = AutoModel.from_pretrained(model_name, trust_remote_code=True).float()
46
+ model = AutoModelForCausalLM.from_pretrained(
47
+ model_name, trust_remote_code=True
48
+ ).float()
49
+ except Exception as exc:
50
+ logger.error(exc)
51
+ logger.warning("Doesnt seem to load for CPU...")
52
+ raise SystemExit(1) from exc
53
 
54
+ model = model.eval()
55
 
56
  rich.print(f"{model=}")
57