ranamhamoud commited on
Commit
64868e1
·
verified ·
1 Parent(s): b5bcfdd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -36,10 +36,10 @@ if torch.cuda.is_available():
36
  )
37
  model_id = "meta-llama/Llama-2-7b-chat-hf"
38
  base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto",quantization_config=bnb_config)
39
- model = PeftModel.from_pretrained(
40
- base_model,"ranamhamoud/storytell")
41
  tokenizer = AutoTokenizer.from_pretrained(model_id)
42
- tokenizer.use_default_system_prompt = False
 
43
 
44
  def make_prompt(entry):
45
  return f"### Human: YOUR INSTRUCTION HERE: {entry} ### Assistant:"
@@ -49,17 +49,20 @@ def generate(
49
  message: str,
50
  chat_history: list[tuple[str, str]],
51
  max_new_tokens: int = 1024,
52
- # temperature: float = 0.6,
53
- # top_p: float = 0.9,
54
- # top_k: int = 50,
55
- repetition_penalty: float = 1.2,
56
  ) -> Iterator[str]:
57
  conversation = []
58
  for user, assistant in chat_history:
59
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
60
  conversation.append({"role": "user", "content": make_prompt(message)})
61
 
62
- input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
 
 
 
63
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
64
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
65
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
@@ -71,9 +74,9 @@ def generate(
71
  streamer=streamer,
72
  max_new_tokens=max_new_tokens,
73
  do_sample=True,
74
- # top_p=top_p,
75
- # top_k=top_k,
76
- # temperature=temperature,
77
  num_beams=1,
78
  repetition_penalty=repetition_penalty,
79
  )
 
36
  )
37
  model_id = "meta-llama/Llama-2-7b-chat-hf"
38
  base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto",quantization_config=bnb_config)
39
+ model = PeftModel.from_pretrained(model,"ranamhamoud/storytell")
 
40
  tokenizer = AutoTokenizer.from_pretrained(model_id)
41
+ tokenizer.pad_token = tokenizer.eos_token
42
+
43
 
44
  def make_prompt(entry):
45
  return f"### Human: YOUR INSTRUCTION HERE: {entry} ### Assistant:"
 
49
  message: str,
50
  chat_history: list[tuple[str, str]],
51
  max_new_tokens: int = 1024,
52
+ temperature: float = 0.1, # Lower -> less random
53
+ top_p: float = 0.1, # Lower -> less random, considering only the top 10% of tokens at each step
54
+ top_k: int = 1, # Least random, only the most likely next token is considered
55
+ repetition_penalty: float = 1.0, # No repetition penalty
56
  ) -> Iterator[str]:
57
  conversation = []
58
  for user, assistant in chat_history:
59
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
60
  conversation.append({"role": "user", "content": make_prompt(message)})
61
 
62
+ enc = tokenizer(make_prompt(message), return_tensors="pt", padding=True, truncation=True)
63
+
64
+
65
+ input_ids = enc.input_ids
66
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
67
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
68
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
 
74
  streamer=streamer,
75
  max_new_tokens=max_new_tokens,
76
  do_sample=True,
77
+ top_p=top_p,
78
+ top_k=top_k,
79
+ temperature=temperature,
80
  num_beams=1,
81
  repetition_penalty=repetition_penalty,
82
  )