# -*- coding: utf-8 -*- # file: app.py # time: 18:37 23/09/2023 # author: Amir Khan # github: https://github.com/Amir22010 import os import numpy as np import ast import gradio as gr import pandas as pd from transformers import AutoTokenizer, AutoModelForSeq2SeqLM try: tokenizer_english = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) double_english_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) except Exception as e: print(e) print("english model load error") try: tokenizer_multilingual = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) double_multilingual_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) except Exception as e: print(e) print("multilingual model load error") try: tokenizer_keybert = AutoTokenizer.from_pretrained("amir22010/KeyBert_ABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model") double_keybert_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/KeyBert_ABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model") except Exception as e: print(e) print("keybert model load error") def perform_asde_inference(text, dataset, model_id): if not text: if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model": df = pd.read_csv('pyabsa_english.csv')#validation dataset elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model": df = pd.read_csv('pyabsa_multilingual.csv')#validation dataset elif model_id == "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model": df = pd.read_csv('keybert_valid.csv')#validation dataset random_i = np.random.randint(low=0, high=df.shape[0], size=(1,)).flat[0] selected_df = df.iloc[random_i] text = selected_df['clean_text'] true_aspect = selected_df['actual_aspects'] true_sentiment = selected_df['actual_sentiments'] true_doubles = pd.DataFrame(list(map(list, zip(ast.literal_eval(true_aspect), ast.literal_eval(true_sentiment)))),columns=['Aspect','Sentiment']) else: true_doubles = pd.DataFrame([["NA","NA"]],columns=['Aspect','Sentiment']) bos_instruction = """Definition: The output will be the aspects (both implicit and explicit) and the aspects sentiment polarity. In cases where there are no aspects the output should be noaspectterm:none. Positive example 1- input: this hospital has a good team of doctors who will take care of all your needs brilliantly. output: doctors:positive Positive example 2- input: Arthur as Irv at ham hospital ran an Nagar , Madurai has a doctor who engages you in a conversation and tries to take your mind off the pain and he has trained the staff to do so as well. output: doctor:positive, staff:positive Now complete the following example- input: """ delim_instruct = '' eos_instruct = ' \noutput:' if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model": tokenized_text = tokenizer_english(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") output = double_english_generator.generate(tokenized_text.input_ids,max_length=512) model_generated = tokenizer_english.decode(output[0], skip_special_tokens=True) elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model": tokenized_text = tokenizer_multilingual(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") output = double_multilingual_generator.generate(tokenized_text.input_ids,max_length=512) model_generated = tokenizer_multilingual.decode(output[0], skip_special_tokens=True) elif model_id == "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model": tokenized_text = tokenizer_keybert(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") output = double_keybert_generator.generate(tokenized_text.input_ids,max_length=512) model_generated = tokenizer_keybert.decode(output[0], skip_special_tokens=True) pred_asp = [i.split(':')[0] for i in model_generated.split(',')] pred_sent = [i.split(':')[1] for i in model_generated.split(',')] pred_doubles = pd.DataFrame(list(map(list, zip(pred_asp, pred_sent))),columns=['Aspect','Sentiment']) return pred_doubles, true_doubles, text, model_generated def run_demo(text, dataset, model_id): try: return inference(text, dataset, model_id) except Exception as e: print(e) def inference(text, dataset, model_id): return perform_asde_inference(text, dataset, model_id) if __name__ == "__main__": demo = gr.Blocks() with demo: with gr.Row(): with gr.Column(): gr.Markdown( "#
Hospital Review Aspect Sentiment Generation
" ) with gr.Row(): with gr.Column(): asde_input_sentence = gr.Textbox( placeholder="Leave this box blank and choose a dataset will give you a random example...", label="Example:", ) gr.Markdown( "You can find code and dataset at [MTech Thesis Project 2023](https://github.com/Amir22010/MTP_Thesis_Project_2023/tree/main)" ) asde_dataset_ids = gr.Radio( choices=[ "HospitalReviews" ], value="HospitalReviews", label="Datasets", ) asde_model_ids = gr.Radio( choices=[ "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model", "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model", "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model" ], value="PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model", label="Fine-tuned Models on Hospital Review custom data", ) asde_inference_button = gr.Button("Let's go!") asde_output_text = gr.TextArea(label="Example:") asde_model_output_generated_sentence = gr.Textbox( placeholder="Text Generated...", label="Model Prediction Text Generated:", ) asde_output_pred_df = gr.DataFrame( label="Predicted Aspect & Sentiment:" ) asde_output_true_df = gr.DataFrame( label="Original Aspect & Sentiment:" ) asde_inference_button.click( fn=run_demo, inputs=[ asde_input_sentence, asde_dataset_ids, asde_model_ids ], outputs=[ asde_output_pred_df, asde_output_true_df, asde_output_text, asde_model_output_generated_sentence ], ) gr.Markdown( """### Author: [Amir Khan](https://github.com/Amir22010) """ ) demo.launch()