import gradio as gr from datetime import datetime import pandas as pd from transformers import pipeline # # Load in packages # + import os # Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023) #os.system("pip uninstall -y gradio") os.system("pip install gradio==3.50.0") from typing import TypeVar #from langchain.embeddings import HuggingFaceEmbeddings#, HuggingFaceInstructEmbeddings #from langchain.vectorstores import FAISS import gradio as gr from transformers import AutoTokenizer # Alternative model sources import ctransformers PandasDataFrame = TypeVar('pd.core.frame.DataFrame') import chatfuncs.chatfuncs as chatf # Disable cuda devices if necessary #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' def create_hf_model(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length) summariser = pipeline("summarization", model=model_name, tokenizer=tokenizer) # philschmid/bart-large-cnn-samsum #from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM # if torch_device == "cuda": # if "flan" in model_name: # model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto") # else: # model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # else: # if "flan" in model_name: # model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # else: # model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) return summariser, tokenizer, model_name def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None): print("Loading model ", model_type) # Default values inside the function if gpu_config is None: gpu_config = chatf.gpu_config if cpu_config is None: cpu_config = chatf.cpu_config if torch_device is None: torch_device = chatf.torch_device if model_type == "Mistral Open Orca (larger, slow)": hf_checkpoint = 'TheBloke/MistralLite-7B-GGUF' if torch_device == "cuda": gpu_config.update_gpu(gpu_layers) else: gpu_config.update_gpu(gpu_layers) cpu_config.update_gpu(gpu_layers) print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.") print(vars(gpu_config)) print(vars(cpu_config)) #try: #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu()) #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu()) #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu()) #except: #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu()) #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu()) #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config), hf=True) # **asdict(CtransRunConfig_cpu()) #tokenizer = ctransformers.AutoTokenizer.from_pretrained(model) #summariser = pipeline("text-generation", model=model, tokenizer=tokenizer) model = [] tokenizer = [] summariser = [] if model_type == "flan-t5-large-stacked-samsum": # Huggingface chat model hf_checkpoint = 'stacked-summaries/flan-t5-large-stacked-samsum-1024'#'declare-lab/flan-alpaca-base' # # # summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint) if model_type == "flan-t5-small-stacked-samsum": # Huggingface chat model hf_checkpoint = 'stacked-summaries/flan-t5-small-stacked-samsum-1024' #'philschmid/flan-t5-small-stacked-samsum'#'declare-lab/flan-alpaca-base' # # # summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint) chatf.model = summariser chatf.tokenizer = tokenizer chatf.model_type = model_type load_confirmation = "Finished loading model: " + model_type print(load_confirmation) return model_type, load_confirmation, model_type # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded #model_type = "Mistral Open Orca (larger, slow)" #load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device) model_type = "flan-t5-large-stacked-samsum" load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device) model_type = "flan-t5-small-stacked-samsum" load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device) today ="%d%m%Y") today_rev ="%Y%m%d") def summarise_text(text, text_df, length_slider, in_colnames, model_type): if text_df == None: in_colnames="text" in_colnames_list_first = in_colnames in_text_df = pd.DataFrame({in_colnames_list_first:[text]}) else: in_text_df = pd.read_csv(, delimiter = ",", low_memory=False, encoding='cp1252') in_colnames_list_first = in_colnames.tolist()[0][0] if model_type != "Mistral Open Orca (larger, slow)": summarised_text = chatf.model(list(in_text_df[in_colnames_list_first]), max_length=length_slider) if model_type == "Mistral Open Orca (larger, slow)": length = str(length_slider) prompt = """<|im_start|>system You are an AI assistant that follows instruction extremely well. Help as much as you can. <|im_start|>user Summarise the following text in less than {length} words. Text: {text} Answer:<|im_end|>""" formatted_string = prompt.format(length=length, text=text) print(formatted_string) #summarised_text = chatf.model(formatted_string, max_new_tokens=length_slider) summarised_text = "Mistral Open Orca summaries currently not working. Sorry!" if text_df == None: if model_type != "Mistral Open Orca (larger, slow)": summarised_text_out = summarised_text[0].values() if model_type == "Mistral Open Orca (larger, slow)": summarised_text_out = summarised_text else: summarised_text_out = [d['summary_text'] for d in summarised_text] #summarised_text[0].values() output_name = "summarise_output_" + today_rev + ".csv" output_df = pd.DataFrame({"Original text":in_text_df[in_colnames_list_first], "Summarised text":summarised_text_out}) summarised_text_out_str = str(output_df["Summarised text"][0])#.str.replace("dict_values([","").str.replace("])","")) output_df.to_csv(output_name, index = None) return summarised_text_out_str, output_name # ## Gradio app - summarise block = gr.Blocks(theme = gr.themes.Base()) with block: model_type_state = gr.State(model_type) gr.Markdown( """ # Text summariser Enter open text below to get a summary. You can copy and paste text directly, or upload a file and specify the column that you want to summarise. Note that summarisation with Mistral Open Orca is still in development and does not currently work. """) with gr.Tab("Summariser"): current_model = gr.Textbox(label="Current model", value=model_type, scale = 3) with gr.Accordion("Paste open text", open = False): in_text = gr.Textbox(label="Copy and paste your open text here", lines = 5) with gr.Accordion("Summarise open text from a file", open = False): in_text_df = gr.File(label="Input text from file") in_colnames = gr.Dataframe(label="Write the column name for the open text to summarise", type="numpy", row_count=(1,"fixed"), col_count = (1,"fixed"), headers=["Open text column name"])#, "Address column name 2", "Address column name 3", "Address column name 4"]) with gr.Row(): summarise_btn = gr.Button("Summarise") length_slider = gr.Slider(minimum = 30, maximum = 200, value = 100, step = 10, label = "Maximum length of summary") with gr.Row(): output_single_text = gr.Textbox(label="Output example (first example in dataset)") output_file = gr.File(label="Output file") with gr.Tab("Advanced features"): #out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.") #temp_slide = gr.Slider(minimum=0.1, value = 0.1, maximum=1, step=0.1, label="Choose temperature setting for response generation.") with gr.Row(): model_choice = gr.Radio(label="Choose a summariser model", value="flan-t5-small-stacked-samsum", choices = ["flan-t5-small-stacked-samsum", "flan-t5-large-stacked-samsum", "Mistral Open Orca (larger, slow)"]) change_model_button = gr.Button(value="Load model", scale=0) with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False): gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=5, step = 1, visible=True) load_text = gr.Text(label="Load status"), inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model]), inputs=[in_text, in_text_df, length_slider, in_colnames, model_type_state], outputs=[output_single_text, output_file], api_name="summarise_single_text") block.queue(concurrency_count=1).launch() # -