Spaces:

CamiloVega
/

aQuaBot

Sleeping

App Files Files Community

aQuaBot / app.py

CamiloVega

Update app.py

485dd71 verified 8 months ago

raw

history blame

9.25 kB

	import spaces
	from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
	import gradio as gr
	import torch
	import logging
	import sys
	from accelerate import infer_auto_device_map, init_empty_weights

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Define the model name
	model_name = "microsoft/phi-2"

	try:
	logger.info("Starting model initialization...")

	# Check CUDA availability
	device = "cuda" if torch.cuda.is_available() else "cpu"
	logger.info(f"Using device: {device}")

	# Configure PyTorch settings
	if device == "cuda":
	torch.backends.cuda.matmul.allow_tf32 = True
	torch.backends.cudnn.allow_tf32 = True

	# Load tokenizer
	logger.info("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)
	logger.info("Tokenizer loaded successfully")

	# Load model
	logger.info("Loading model...")
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	trust_remote_code=True
	)
	if device == "cuda":
	model = model.to(device)
	logger.info("Model loaded successfully")

	# Create pipeline
	logger.info("Creating generation pipeline...")
	model_gen = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=256,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	device=0 if device == "cuda" else -1
	)
	logger.info("Pipeline created successfully")

	except Exception as e:
	logger.error(f"Error during initialization: {str(e)}")
	raise

	# Configure system message
	system_message = """You are AQuaBot, an AI assistant aware of environmental impact.
	You help users with any topic while raising awareness about water consumption
	in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
	equivalent to the daily consumption of a city of 10,000 people?"""

	# Constants for water consumption calculation
	WATER_PER_TOKEN = {
	"input_training": 0.0000309,
	"output_training": 0.0000309,
	"input_inference": 0.05,
	"output_inference": 0.05
	}

	# Initialize variables
	total_water_consumption = 0

	def calculate_tokens(text):
	try:
	return len(tokenizer.encode(text))
	except Exception as e:
	logger.error(f"Error calculating tokens: {str(e)}")
	return len(text.split()) + len(text) // 4 # Fallback to approximation

	def calculate_water_consumption(text, is_input=True):
	tokens = calculate_tokens(text)
	if is_input:
	return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
	return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])

	def format_message(role, content):
	return {"role": role, "content": content}

	@spaces.GPU(duration=60)
	@torch.inference_mode()
	def generate_response(user_input, chat_history):
	try:
	logger.info("Generating response for user input...")
	global total_water_consumption

	# Calculate water consumption for input
	input_water_consumption = calculate_water_consumption(user_input, True)
	total_water_consumption += input_water_consumption

	# Create prompt
	conversation_history = ""
	if chat_history:
	for message in chat_history:
	conversation_history += f"User: {message[0]}\nAssistant: {message[1]}\n"

	prompt = f"{system_message}\n\n{conversation_history}User: {user_input}\nAssistant:"

	logger.info("Generating model response...")
	outputs = model_gen(
	prompt,
	max_new_tokens=256,
	return_full_text=False,
	pad_token_id=tokenizer.eos_token_id,
	)
	logger.info("Model response generated successfully")

	assistant_response = outputs[0]['generated_text'].strip()

	# Calculate water consumption for output
	output_water_consumption = calculate_water_consumption(assistant_response, False)
	total_water_consumption += output_water_consumption

	# Update chat history with the new formatted messages
	chat_history.append([user_input, assistant_response])

	# Prepare water consumption message
	water_message = f"""
	<div style="position: fixed; top: 20px; right: 20px;
	background-color: white; padding: 15px;
	border: 2px solid #ff0000; border-radius: 10px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
	<div style="color: #ff0000; font-size: 24px; font-weight: bold;">
	💧 {total_water_consumption:.4f} ml
	</div>
	<div style="color: #666; font-size: 14px;">
	Water Consumed
	</div>
	</div>
	"""

	return chat_history, water_message

	except Exception as e:
	logger.error(f"Error in generate_response: {str(e)}")
	error_message = f"An error occurred: {str(e)}"
	chat_history.append([user_input, error_message])
	return chat_history, show_water

	# Create Gradio interface
	try:
	logger.info("Creating Gradio interface...")
	with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
	gr.HTML("""
	<div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
	<h1 style="color: #2d333a;">AQuaBot</h1>
	<p style="color: #4a5568;">
	Welcome to AQuaBot - An AI assistant that helps raise awareness about water
	consumption in language models.
	</p>
	</div>
	""")

	chatbot = gr.Chatbot()
	message = gr.Textbox(
	placeholder="Type your message here...",
	show_label=False
	)
	show_water = gr.HTML(f"""
	<div style="position: fixed; top: 20px; right: 20px;
	background-color: white; padding: 15px;
	border: 2px solid #ff0000; border-radius: 10px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
	<div style="color: #ff0000; font-size: 24px; font-weight: bold;">
	💧 0.0000 ml
	</div>
	<div style="color: #666; font-size: 14px;">
	Water Consumed
	</div>
	</div>
	""")
	clear = gr.Button("Clear Chat")

	# Add footer with citation and disclaimer
	gr.HTML("""
	<div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
	background-color: #f8f9fa; border-radius: 10px;">
	<div style="margin-bottom: 15px;">
	<p style="color: #666; font-size: 14px; font-style: italic;">
	Water consumption calculations are based on the study:<br>
	Li, P. et al. (2023). Making AI Less Thirsty: Uncovering and Addressing the Secret Water
	Footprint of AI Models. ArXiv Preprint,
	<a href="https://arxiv.org/abs/2304.03271" target="_blank">https://arxiv.org/abs/2304.03271</a>
	</p>
	</div>
	<div style="border-top: 1px solid #ddd; padding-top: 15px;">
	<p style="color: #666; font-size: 14px;">
	<strong>Important note:</strong> This application uses Microsoft's Phi-2 model
	instead of GPT-3 for availability and cost reasons. However,
	the water consumption calculations per token (input/output) are based on the
	conclusions from the cited paper.
	</p>
	</div>
	</div>
	""")

	def submit(user_input, chat_history):
	return generate_response(user_input, chat_history)

	# Configure event handlers
	message.submit(submit, [message, chatbot], [chatbot, show_water])
	clear.click(
	lambda: ([], f"""
	<div style="position: fixed; top: 20px; right: 20px;
	background-color: white; padding: 15px;
	border: 2px solid #ff0000; border-radius: 10px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
	<div style="color: #ff0000; font-size: 24px; font-weight: bold;">
	💧 0.0000 ml
	</div>
	<div style="color: #666; font-size: 14px;">
	Water Consumed
	</div>
	</div>
	"""),
	None,
	[chatbot, show_water]
	)

	logger.info("Gradio interface created successfully")

	# Launch the application
	logger.info("Launching application...")
	demo.launch()

	except Exception as e:
	logger.error(f"Error in Gradio interface creation: {str(e)}")
	raise