Spaces:

isimorfizam
/

Summarizer

Sleeping

File size: 6,404 Bytes

30bf6ab

import streamlit as st

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.utils import is_flash_attn_2_available
from transformers import BitsAndBytesConfig
import pandas as pd
import os
import torch
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse


# CHOOSE DEVICE

model_id = 'google/gemma-2b-it'
HF_TOKEN = os.environ['HF_TOKEN']

@st.cache_resource
def load_model(model_id) :
    print(torch.backends.mps.is_available())
    device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
    print(device)

    if device=='cpu' :
        print('Warning! No GPU available')

    # IMPORT MODEL
    
    print(model_id)

    quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                            bnb_4bit_compute_dtype=torch.float16)

    # if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
    #   attn_implementation = "flash_attention_2"
    # else:
    #   attn_implementation = "sdpa"
    # print(f"[INFO] Using attention implementation: {attn_implementation}")

    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id, token=HF_TOKEN)

    llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id,
                                                    token=HF_TOKEN,
                                                    torch_dtype=torch.float16,
                                                    #quantization_config=quantization_config if quantization_config else None,
                                                    low_cpu_mem_usage=False,) # use full memory
                                                    #attn_implementation=attn_implementation) # which attention version to use
    llm_model.to(device)
    return llm_model, tokenizer, device

# Create a text element and let the reader know the data is loading.
model_load_state = st.text('Loading model...')
# Load 10,000 rows of data into the dataframe.
llm_model, tokenizer, device = load_model(model_id)
# Notify the reader that the data was successfully loaded.
model_load_state.text('Loading model...done!')

# INFERENCE
# def prompt_formatter(reviews, type_of_doc):
#     return f"""You are a summarization bot.
#     You will receive {type_of_doc} and you will extract all relevant information from {type_of_doc} and return one paragraph in which you will summarize what was said.
#     {type_of_doc} are listed below under inputs.
#     Inputs: {reviews}
#     Answer :
#     """
def prompt_formatter(reviews, type_of_doc):
    return f"""You are a summarization bot.
    You will receive {type_of_doc} and you will summarize what was said in the input.
    {type_of_doc} are listed below under inputs.
    Inputs: {reviews}
    Answer :
    """
def mirror_mirror(inputs, prompt_formatter, tokenizer, type_of_doc):
    prompt = prompt_formatter(inputs, type_of_doc)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = llm_model.generate(**input_ids,
                                 temperature=0.3,
                                 do_sample=True,
                                 max_new_tokens=275)
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return prompt, output_text.replace(prompt, '')



def summarization(example : list[str], type_of_doc : str, results_df : pd.DataFrame = pd.DataFrame()) -> pd.DataFrame :

    print(type_of_doc)
    # INFERENCE
    results = []
    for cnt in range(0,5) : 
        print(cnt)
        prompt, result = mirror_mirror(example, prompt_formatter, tokenizer, type_of_doc)
        list_temp = [result, example]
        tokenized = tokenizer(list_temp, return_tensors="pt", padding = True)
        A = tokenized.input_ids.numpy()
        A = sparse.csr_matrix(A)
        score = cosine_similarity(A)[0,1]
        #print(cosine_similarity(A)[0,1])
        #print(cosine_similarity(A)[1,0])

        if score>0.1 :
            fin_result = result
            max_score = score
            break

        results.append(result)
        #print(result+'\n\n')

    # tokenize results and example together
    try  :
        fin_result 
    except :
    # if fin_result not already defined, use the best of available results
        # add example to results so tokenization is done together (due to padding limitations)
        results.append(example)
        tokenized = tokenizer(results, return_tensors="pt", padding = True)
        A = tokenized.input_ids.numpy()
        A = sparse.csr_matrix(A)
        # calculate cosine similarity of each pair 
        # keep only example X result column
        scores = cosine_similarity(A)[:,5]
        # final result is the one with greaters cos_score
        fin_result = results[np.argmax(scores)]
        max_score = max(scores)

    #print(fin_result)
    # save final result and its attributes
    row = pd.DataFrame({'model' : model_id, 'prompt' : prompt, 'reviews' : example, 'summarization' : fin_result, 'score' : [max_score] })
    results_df = pd.concat([results_df,row], ignore_index = True)

    return results_df




# adding the text that will show in the text box as default
default_value = "I am a summarization bot! Let me summarize your reading for you!"
st.title("Mirror, mirror, on the cloud, what do Clockify users say aloud?")
st.subheader("--Clockify review summarizer--")

 

inputs = st.text_area("Your text", default_value, height = 275)
type_of_doc = st.text_area("Type of text", 'text', height = 25)
button = st.button('Summon the summarizer!')
result = ''
score = ''
if  button :
    results_df = summarization(inputs,type_of_doc)
    # only one input
    result = results_df.summarization[0]
    score = results_df.score[0]

outputs = st.text_area("Summarized text", result)
score = st.text_area("Cosine similarity score", score)
# max_length = st.sidebar.slider("Max Length", min_value = 10, max_value=30)
# temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
# top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=5, value = 0)
# top_p = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.05, value = 0.9)
# num_return_sequences = st.sidebar.number_input('Number of Return Sequences', min_value=1, max_value=5, value=1, step=1)