Summarizer / app.py
isimorfizam's picture
Add application file
30bf6ab
raw
history blame
6.4 kB
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.utils import is_flash_attn_2_available
from transformers import BitsAndBytesConfig
import pandas as pd
import os
import torch
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
# CHOOSE DEVICE
model_id = 'google/gemma-2b-it'
HF_TOKEN = os.environ['HF_TOKEN']
@st.cache_resource
def load_model(model_id) :
print(torch.backends.mps.is_available())
device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
print(device)
if device=='cpu' :
print('Warning! No GPU available')
# IMPORT MODEL
print(model_id)
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16)
# if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
# attn_implementation = "flash_attention_2"
# else:
# attn_implementation = "sdpa"
# print(f"[INFO] Using attention implementation: {attn_implementation}")
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id, token=HF_TOKEN)
llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id,
token=HF_TOKEN,
torch_dtype=torch.float16,
#quantization_config=quantization_config if quantization_config else None,
low_cpu_mem_usage=False,) # use full memory
#attn_implementation=attn_implementation) # which attention version to use
llm_model.to(device)
return llm_model, tokenizer, device
# Create a text element and let the reader know the data is loading.
model_load_state = st.text('Loading model...')
# Load 10,000 rows of data into the dataframe.
llm_model, tokenizer, device = load_model(model_id)
# Notify the reader that the data was successfully loaded.
model_load_state.text('Loading model...done!')
# INFERENCE
# def prompt_formatter(reviews, type_of_doc):
# return f"""You are a summarization bot.
# You will receive {type_of_doc} and you will extract all relevant information from {type_of_doc} and return one paragraph in which you will summarize what was said.
# {type_of_doc} are listed below under inputs.
# Inputs: {reviews}
# Answer :
# """
def prompt_formatter(reviews, type_of_doc):
return f"""You are a summarization bot.
You will receive {type_of_doc} and you will summarize what was said in the input.
{type_of_doc} are listed below under inputs.
Inputs: {reviews}
Answer :
"""
def mirror_mirror(inputs, prompt_formatter, tokenizer, type_of_doc):
prompt = prompt_formatter(inputs, type_of_doc)
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
outputs = llm_model.generate(**input_ids,
temperature=0.3,
do_sample=True,
max_new_tokens=275)
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return prompt, output_text.replace(prompt, '')
def summarization(example : list[str], type_of_doc : str, results_df : pd.DataFrame = pd.DataFrame()) -> pd.DataFrame :
print(type_of_doc)
# INFERENCE
results = []
for cnt in range(0,5) :
print(cnt)
prompt, result = mirror_mirror(example, prompt_formatter, tokenizer, type_of_doc)
list_temp = [result, example]
tokenized = tokenizer(list_temp, return_tensors="pt", padding = True)
A = tokenized.input_ids.numpy()
A = sparse.csr_matrix(A)
score = cosine_similarity(A)[0,1]
#print(cosine_similarity(A)[0,1])
#print(cosine_similarity(A)[1,0])
if score>0.1 :
fin_result = result
max_score = score
break
results.append(result)
#print(result+'\n\n')
# tokenize results and example together
try :
fin_result
except :
# if fin_result not already defined, use the best of available results
# add example to results so tokenization is done together (due to padding limitations)
results.append(example)
tokenized = tokenizer(results, return_tensors="pt", padding = True)
A = tokenized.input_ids.numpy()
A = sparse.csr_matrix(A)
# calculate cosine similarity of each pair
# keep only example X result column
scores = cosine_similarity(A)[:,5]
# final result is the one with greaters cos_score
fin_result = results[np.argmax(scores)]
max_score = max(scores)
#print(fin_result)
# save final result and its attributes
row = pd.DataFrame({'model' : model_id, 'prompt' : prompt, 'reviews' : example, 'summarization' : fin_result, 'score' : [max_score] })
results_df = pd.concat([results_df,row], ignore_index = True)
return results_df
# adding the text that will show in the text box as default
default_value = "I am a summarization bot! Let me summarize your reading for you!"
st.title("Mirror, mirror, on the cloud, what do Clockify users say aloud?")
st.subheader("--Clockify review summarizer--")
inputs = st.text_area("Your text", default_value, height = 275)
type_of_doc = st.text_area("Type of text", 'text', height = 25)
button = st.button('Summon the summarizer!')
result = ''
score = ''
if button :
results_df = summarization(inputs,type_of_doc)
# only one input
result = results_df.summarization[0]
score = results_df.score[0]
outputs = st.text_area("Summarized text", result)
score = st.text_area("Cosine similarity score", score)
# max_length = st.sidebar.slider("Max Length", min_value = 10, max_value=30)
# temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
# top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=5, value = 0)
# top_p = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.05, value = 0.9)
# num_return_sequences = st.sidebar.number_input('Number of Return Sequences', min_value=1, max_value=5, value=1, step=1)