Spaces:

tomaszki
/

visualize_attention

Sleeping

App Files Files Community

visualize_attention / app.py

tomaszki

Switch model from llama to a small 1.5B Qwen

39de0ac verified 10 months ago

raw

history blame contribute delete

1.94 kB

	import streamlit as st
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import plotly.express as px


	model_name = 'Qwen/Qwen2-1.5B'
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	@st.cache_resource
	def load_model():
	return AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	token=st.secrets['hf_token']
	).to(device)

	@st.cache_resource
	def load_tokenizer():
	return AutoTokenizer.from_pretrained(
	model_name,
	token=st.secrets['hf_token']
	)

	@torch.no_grad()
	@st.cache_data()
	def get_attention_weights_and_tokens(text):
	tokenized = tokenizer(text, return_tensors='pt')
	tokens = [tokenizer.decode(token) for token in tokenized.input_ids[0]]
	tokenized = tokenized.to(device)
	output = model(**tokenized, output_attentions=True)
	attentions = [attention.to(torch.float32) for attention in output.attentions]
	return attentions, tokens

	model = load_model()
	tokenizer = load_tokenizer()

	st.title('Attention visualizer')
	text = st.text_area('Write your text here and see attention weights.')
	layer = st.slider(
	'Which layer do you want to see?',
	min_value=1,
	max_value=model.config.num_hidden_layers
	) - 1

	head = st.select_slider(
	'Which head do you want to see?',
	options = ['Average'] + list(range(1, model.config.num_attention_heads + 1))
	)
	if text:
	attentions, tokens = get_attention_weights_and_tokens(text)
	if head == 'Average':
	weights = attentions[layer].cpu()[0].mean(dim=0)
	else:
	weights = attentions[layer].cpu()[0][head - 1]
	fig = px.imshow(
	weights,
	)
	fig.update_layout(xaxis={
	'ticktext': tokens,
	'tickvals': list(range(len(tokens))),
	}, yaxis={
	'ticktext': tokens,
	'tickvals': list(range(len(tokens))),
	},
	height=800,
	)

	st.plotly_chart(fig)