Sergidev commited on
Commit
c2c6f60
Β·
verified Β·
1 Parent(s): 4fd6546

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -18,7 +18,7 @@ DEFAULT_MAX_NEW_TOKENS = 1024
18
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
19
 
20
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
21
- model_id = "meta-llama/Llama-3.1-8B"
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  model = AutoModelForCausalLM.from_pretrained(
24
  model_id,
@@ -26,6 +26,7 @@ model = AutoModelForCausalLM.from_pretrained(
26
  torch_dtype=torch.float16,
27
  load_in_8bit=True,
28
  )
 
29
  model.eval()
30
  @spaces.GPU(duration=90)
31
  def generate(
@@ -36,7 +37,7 @@ def generate(
36
  top_k: int = 50,
37
  repetition_penalty: float = 1.2,
38
  ) -> Iterator[str]:
39
- prompt = f"[INST] {message} [/INST]"
40
  input_ids = tokenizer.encode(prompt, return_tensors="pt")
41
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
42
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
 
18
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
19
 
20
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
21
+ model_id = "meta-llama/Llama-3.2-1B"
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  model = AutoModelForCausalLM.from_pretrained(
24
  model_id,
 
26
  torch_dtype=torch.float16,
27
  load_in_8bit=True,
28
  )
29
+
30
  model.eval()
31
  @spaces.GPU(duration=90)
32
  def generate(
 
37
  top_k: int = 50,
38
  repetition_penalty: float = 1.2,
39
  ) -> Iterator[str]:
40
+ prompt = f"{message}"
41
  input_ids = tokenizer.encode(prompt, return_tensors="pt")
42
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
43
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]