CamiloVega commited on
Commit
e308af0
·
verified ·
1 Parent(s): 45fe324

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -14,7 +14,7 @@ logging.basicConfig(
14
  logger = logging.getLogger(__name__)
15
 
16
  # Define the model name
17
- model_name = "microsoft/phi-2"
18
 
19
  try:
20
  logger.info("Starting model initialization...")
@@ -32,19 +32,22 @@ try:
32
  logger.info("Loading tokenizer...")
33
  tokenizer = AutoTokenizer.from_pretrained(
34
  model_name,
35
- trust_remote_code=True
 
36
  )
 
37
  logger.info("Tokenizer loaded successfully")
38
 
39
- # Load model
40
  logger.info("Loading model...")
41
  model = AutoModelForCausalLM.from_pretrained(
42
  model_name,
43
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
44
- trust_remote_code=True
 
 
 
45
  )
46
- if device == "cuda":
47
- model = model.to(device)
48
  logger.info("Model loaded successfully")
49
 
50
  # Create pipeline
@@ -58,7 +61,7 @@ try:
58
  temperature=0.7,
59
  top_p=0.9,
60
  repetition_penalty=1.1,
61
- device=0 if device == "cuda" else -1
62
  )
63
  logger.info("Pipeline created successfully")
64
 
@@ -69,8 +72,9 @@ except Exception as e:
69
  # Configure system message
70
  system_message = """You are AQuaBot, an AI assistant aware of environmental impact.
71
  You help users with any topic while raising awareness about water consumption
72
- in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
73
- equivalent to the daily consumption of a city of 10,000 people?"""
 
74
 
75
  # Constants for water consumption calculation
76
  WATER_PER_TOKEN = {
@@ -110,13 +114,13 @@ def generate_response(user_input, chat_history):
110
  input_water_consumption = calculate_water_consumption(user_input, True)
111
  total_water_consumption += input_water_consumption
112
 
113
- # Create prompt
114
  conversation_history = ""
115
  if chat_history:
116
  for message in chat_history:
117
- conversation_history += f"User: {message[0]}\nAssistant: {message[1]}\n"
118
 
119
- prompt = f"{system_message}\n\n{conversation_history}User: {user_input}\nAssistant:"
120
 
121
  logger.info("Generating model response...")
122
  outputs = model_gen(
@@ -167,8 +171,8 @@ try:
167
  <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
168
  <h1 style="color: #2d333a;">AQuaBot</h1>
169
  <p style="color: #4a5568;">
170
- Welcome to AQuaBot - An AI assistant that helps raise awareness about water
171
- consumption in language models.
172
  </p>
173
  </div>
174
  """)
@@ -207,10 +211,9 @@ try:
207
  </div>
208
  <div style="border-top: 1px solid #ddd; padding-top: 15px;">
209
  <p style="color: #666; font-size: 14px;">
210
- <strong>Important note:</strong> This application uses Microsoft's Phi-2 model
211
- instead of GPT-3 for availability and cost reasons. However,
212
- the water consumption calculations per token (input/output) are based on the
213
- conclusions from the cited paper.
214
  </p>
215
  </div>
216
  </div>
 
14
  logger = logging.getLogger(__name__)
15
 
16
  # Define the model name
17
+ model_name = "meta-llama/Llama-2-7b-hf"
18
 
19
  try:
20
  logger.info("Starting model initialization...")
 
32
  logger.info("Loading tokenizer...")
33
  tokenizer = AutoTokenizer.from_pretrained(
34
  model_name,
35
+ trust_remote_code=True,
36
+ token=True # You'll need to set your HF token here
37
  )
38
+ tokenizer.pad_token = tokenizer.eos_token
39
  logger.info("Tokenizer loaded successfully")
40
 
41
+ # Load model with device map
42
  logger.info("Loading model...")
43
  model = AutoModelForCausalLM.from_pretrained(
44
  model_name,
45
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
46
+ trust_remote_code=True,
47
+ token=True, # You'll need to set your HF token here
48
+ device_map="auto",
49
+ load_in_8bit=True
50
  )
 
 
51
  logger.info("Model loaded successfully")
52
 
53
  # Create pipeline
 
61
  temperature=0.7,
62
  top_p=0.9,
63
  repetition_penalty=1.1,
64
+ device_map="auto"
65
  )
66
  logger.info("Pipeline created successfully")
67
 
 
72
  # Configure system message
73
  system_message = """You are AQuaBot, an AI assistant aware of environmental impact.
74
  You help users with any topic while raising awareness about water consumption
75
+ in AI. Did you know that training large language models like Llama 2 can consume
76
+ substantial amounts of water due to the cooling requirements of data centers?
77
+ Let's work together while being mindful of our environmental impact."""
78
 
79
  # Constants for water consumption calculation
80
  WATER_PER_TOKEN = {
 
114
  input_water_consumption = calculate_water_consumption(user_input, True)
115
  total_water_consumption += input_water_consumption
116
 
117
+ # Create prompt with Llama 2 chat format
118
  conversation_history = ""
119
  if chat_history:
120
  for message in chat_history:
121
+ conversation_history += f"[INST] {message[0]} [/INST] {message[1]} "
122
 
123
+ prompt = f"{system_message}\n\n{conversation_history}[INST] {user_input} [/INST]"
124
 
125
  logger.info("Generating model response...")
126
  outputs = model_gen(
 
171
  <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
172
  <h1 style="color: #2d333a;">AQuaBot</h1>
173
  <p style="color: #4a5568;">
174
+ Welcome to AQuaBot - An AI assistant powered by Llama 2 that helps raise awareness
175
+ about water consumption in language models.
176
  </p>
177
  </div>
178
  """)
 
211
  </div>
212
  <div style="border-top: 1px solid #ddd; padding-top: 15px;">
213
  <p style="color: #666; font-size: 14px;">
214
+ <strong>Important note:</strong> This application uses Meta's Llama 2 (7B parameters) model.
215
+ The water consumption calculations per token (input/output) are based on the
216
+ general conclusions from the cited paper about large language models.
 
217
  </p>
218
  </div>
219
  </div>