Blancior commited on
Commit
4496835
·
verified ·
1 Parent(s): cf0fd37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -4,15 +4,18 @@ import torch
4
 
5
  def load_model():
6
  model_name = "TheBloke/Llama-2-13B-chat-GPTQ"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_name,
10
  device_map="auto",
11
  trust_remote_code=True,
12
- revision="main"
 
13
  )
14
  return model, tokenizer
15
 
 
 
16
  def generate_response(prompt, max_length=100):
17
  try:
18
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
4
 
5
  def load_model():
6
  model_name = "TheBloke/Llama-2-13B-chat-GPTQ"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_name,
10
  device_map="auto",
11
  trust_remote_code=True,
12
+ revision="main",
13
+ quantization_config={"load_in_4bit": True} # Dodajemy kwantyzację 4-bitową
14
  )
15
  return model, tokenizer
16
 
17
+ # Reszta kodu pozostaje bez zmian
18
+
19
  def generate_response(prompt, max_length=100):
20
  try:
21
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)