Hadiboo commited on
Commit
24ea58e
·
verified ·
1 Parent(s): b619e1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -1,24 +1,33 @@
1
  import streamlit as st
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
  def main():
5
  st.title("Chatbot with Hugging Face Model")
6
 
7
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
 
 
 
 
 
10
 
11
  user_input = st.text_input("User Input:", "What is your favourite condiment?")
12
 
13
  if st.button("Generate Response"):
14
  messages = [
15
  {"role": "user", "content": user_input},
16
- {"role": "assistant", "content": "Placeholder assistant message"} # You can modify this as needed
17
  ]
18
 
19
- inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
 
 
 
 
20
 
21
- outputs = model.generate(inputs, max_new_tokens=20)
22
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
 
24
  st.text_area("Assistant's Response:", response)
 
1
  import streamlit as st
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from accelerate import Accelerator # Import Accelerator from the accelerate library
4
 
5
  def main():
6
  st.title("Chatbot with Hugging Face Model")
7
 
8
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
+
11
+ # Create an Accelerator instance
12
+ accelerator = Accelerator()
13
+
14
+ # Use the Accelerator for initializing the model
15
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map=accelerator.device)
16
 
17
  user_input = st.text_input("User Input:", "What is your favourite condiment?")
18
 
19
  if st.button("Generate Response"):
20
  messages = [
21
  {"role": "user", "content": user_input},
22
+ {"role": "assistant", "content": "Placeholder assistant message"}
23
  ]
24
 
25
+ inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(accelerator.device)
26
+
27
+ # Use the Accelerator for generating outputs
28
+ with accelerator.device():
29
+ outputs = model.generate(inputs, max_new_tokens=20)
30
 
 
31
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
 
33
  st.text_area("Assistant's Response:", response)