harishnair04 commited on
Commit
60b705d
1 Parent(s): 349753a

feat: faster cpu inference

Browse files
Files changed (1) hide show
  1. app.py +1 -0
app.py CHANGED
@@ -44,6 +44,7 @@ model_id = "harishnair04/Gemma-medtr-2b-sft"
44
  # gemma_model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
45
  tokenizer = AutoTokenizer.from_pretrained(model_id)
46
  gemma_model = AutoModelForCausalLM.from_pretrained(model_id)
 
47
 
48
  tokenizer.pad_token_id = tokenizer.eos_token_id
49
 
 
44
  # gemma_model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
45
  tokenizer = AutoTokenizer.from_pretrained(model_id)
46
  gemma_model = AutoModelForCausalLM.from_pretrained(model_id)
47
+ gemma_model.to_bettertransformer()
48
 
49
  tokenizer.pad_token_id = tokenizer.eos_token_id
50