chaseharmon commited on
Commit
19b4114
·
1 Parent(s): f9019ad
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -4,7 +4,6 @@ import torch
4
 
5
  base_model_name = "chaseharmon/Rap-Mistral-Big"
6
 
7
-
8
  @st.cache_resource
9
  def load_model():
10
  nf4_config = BitsAndBytesConfig(
@@ -27,19 +26,16 @@ def load_model():
27
  @st.cache_resource
28
  def load_tokenizer():
29
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
30
-
31
  tokenizer.pad_token = tokenizer.eos_token
32
  tokenizer.padding_side = "right"
33
-
34
  return tokenizer
35
 
36
  def build_prompt(question):
37
- prompt=f"[INST] {question} [/INST] "
38
- return prompt
39
 
40
  model = load_model()
41
  model.eval()
42
-
43
  tokenizer = load_tokenizer()
44
 
45
  st.title("Rap Verse Generation V1 Demo")
@@ -52,6 +48,9 @@ display_placeholder = st.empty()
52
  prompt_placeholder.write("Ask Rap-Mistral Something")
53
  display_placeholder.write("")
54
 
 
 
 
55
  question = st.chat_input("Write a verse in the style of Lupe Fiasco")
56
  if question:
57
  display_placeholder.write("Loading...")
@@ -59,7 +58,7 @@ if question:
59
  prompt = build_prompt(question)
60
  inputs = tokenizer(prompt, return_tensors="pt")
61
  model_inputs = inputs.to('cuda')
62
- generated_ids = model.generate(**model_inputs, max_new_tokens=300, do_sample=True, pad_token_id=tokenizer.eos_token_id)
63
  response = tokenizer.batch_decode(generated_ids)[0]
64
  end_of_inst = response.find("[/INST]") + len("[/INST]")
65
  if end_of_inst > -1:
@@ -68,4 +67,3 @@ if question:
68
  actual_response = response
69
  actual_response = actual_response.replace("\n", " \n")
70
  display_placeholder.write(actual_response)
71
-
 
4
 
5
  base_model_name = "chaseharmon/Rap-Mistral-Big"
6
 
 
7
  @st.cache_resource
8
  def load_model():
9
  nf4_config = BitsAndBytesConfig(
 
26
  @st.cache_resource
27
  def load_tokenizer():
28
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 
29
  tokenizer.pad_token = tokenizer.eos_token
30
  tokenizer.padding_side = "right"
 
31
  return tokenizer
32
 
33
  def build_prompt(question):
34
+ prompt = f"[INST] {question} [/INST] "
35
+ return prompt
36
 
37
  model = load_model()
38
  model.eval()
 
39
  tokenizer = load_tokenizer()
40
 
41
  st.title("Rap Verse Generation V1 Demo")
 
48
  prompt_placeholder.write("Ask Rap-Mistral Something")
49
  display_placeholder.write("")
50
 
51
+ # Temperature slider
52
+ temperature = st.slider('Temperature', min_value=0.0, max_value=1.0, value=0.5, step=0.01)
53
+
54
  question = st.chat_input("Write a verse in the style of Lupe Fiasco")
55
  if question:
56
  display_placeholder.write("Loading...")
 
58
  prompt = build_prompt(question)
59
  inputs = tokenizer(prompt, return_tensors="pt")
60
  model_inputs = inputs.to('cuda')
61
+ generated_ids = model.generate(**model_inputs, max_new_tokens=300, do_sample=True, pad_token_id=tokenizer.eos_token_id, temperature=temperature)
62
  response = tokenizer.batch_decode(generated_ids)[0]
63
  end_of_inst = response.find("[/INST]") + len("[/INST]")
64
  if end_of_inst > -1:
 
67
  actual_response = response
68
  actual_response = actual_response.replace("\n", " \n")
69
  display_placeholder.write(actual_response)