Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
# file: app.py | |
# time: 18:37 23/09/2023 | |
# author: Amir Khan | |
# github: https://github.com/Amir22010 | |
import os | |
import numpy as np | |
import ast | |
import gradio as gr | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
try: | |
tokenizer_english = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) | |
double_english_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) | |
except Exception as e: | |
print(e) | |
print("english model load error") | |
try: | |
tokenizer_multilingual = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) | |
double_multilingual_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) | |
except Exception as e: | |
print(e) | |
print("multilingual model load error") | |
try: | |
tokenizer_keybert = AutoTokenizer.from_pretrained("amir22010/KeyBert_ABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model") | |
double_keybert_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/KeyBert_ABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model") | |
except Exception as e: | |
print(e) | |
print("keybert model load error") | |
def perform_asde_inference(text, dataset, model_id): | |
if not text: | |
if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
df = pd.read_csv('pyabsa_english.csv')#validation dataset | |
elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
df = pd.read_csv('pyabsa_multilingual.csv')#validation dataset | |
elif model_id == "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
df = pd.read_csv('keybert_valid.csv')#validation dataset | |
random_i = np.random.randint(low=0, high=df.shape[0], size=(1,)).flat[0] | |
selected_df = df.iloc[random_i] | |
text = selected_df['clean_text'] | |
true_aspect = selected_df['actual_aspects'] | |
true_sentiment = selected_df['actual_sentiments'] | |
true_doubles = pd.DataFrame(list(map(list, zip(ast.literal_eval(true_aspect), ast.literal_eval(true_sentiment)))),columns=['Aspect','Sentiment']) | |
else: | |
true_doubles = pd.DataFrame([["NA","NA"]],columns=['Aspect','Sentiment']) | |
bos_instruction = """Definition: The output will be the aspects (both implicit and explicit) and the aspects sentiment polarity. In cases where there are no aspects the output should be noaspectterm:none. | |
Positive example 1- | |
input: this hospital has a good team of doctors who will take care of all your needs brilliantly. | |
output: doctors:positive | |
Positive example 2- | |
input: Arthur as Irv at ham hospital ran an Nagar , Madurai has a doctor who engages you in a conversation and tries to take your mind off the pain and he has trained the staff to do so as well. | |
output: doctor:positive, staff:positive | |
Now complete the following example- | |
input: """ | |
delim_instruct = '' | |
eos_instruct = ' \noutput:' | |
if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
tokenized_text = tokenizer_english(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") | |
output = double_english_generator.generate(tokenized_text.input_ids,max_length=512) | |
model_generated = tokenizer_english.decode(output[0], skip_special_tokens=True) | |
elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
tokenized_text = tokenizer_multilingual(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") | |
output = double_multilingual_generator.generate(tokenized_text.input_ids,max_length=512) | |
model_generated = tokenizer_multilingual.decode(output[0], skip_special_tokens=True) | |
elif model_id == "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
tokenized_text = tokenizer_keybert(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") | |
output = double_keybert_generator.generate(tokenized_text.input_ids,max_length=512) | |
model_generated = tokenizer_keybert.decode(output[0], skip_special_tokens=True) | |
pred_asp = [i.split(':')[0] for i in model_generated.split(',')] | |
pred_sent = [i.split(':')[1] for i in model_generated.split(',')] | |
pred_doubles = pd.DataFrame(list(map(list, zip(pred_asp, pred_sent))),columns=['Aspect','Sentiment']) | |
return pred_doubles, true_doubles, text, model_generated | |
def run_demo(text, dataset, model_id): | |
try: | |
return inference(text, dataset, model_id) | |
except Exception as e: | |
print(e) | |
def inference(text, dataset, model_id): | |
return perform_asde_inference(text, dataset, model_id) | |
if __name__ == "__main__": | |
demo = gr.Blocks() | |
with demo: | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown( | |
"# <p align='center'>Hospital Review Aspect Sentiment Generation</p>" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
asde_input_sentence = gr.Textbox( | |
placeholder="Leave this box blank and choose a dataset will give you a random example...", | |
label="Example:", | |
) | |
gr.Markdown( | |
"You can find code and dataset at [MTech Thesis Project 2023](https://github.com/Amir22010/MTP_Thesis_Project_2023/tree/main)" | |
) | |
asde_dataset_ids = gr.Radio( | |
choices=[ | |
"HospitalReviews" | |
], | |
value="HospitalReviews", | |
label="Datasets", | |
) | |
asde_model_ids = gr.Radio( | |
choices=[ | |
"PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model", | |
"PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model", | |
"KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model" | |
], | |
value="PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model", | |
label="Fine-tuned Models on Hospital Review custom data", | |
) | |
asde_inference_button = gr.Button("Let's go!") | |
asde_output_text = gr.TextArea(label="Example:") | |
asde_model_output_generated_sentence = gr.Textbox( | |
placeholder="Text Generated...", | |
label="Model Prediction Text Generated:", | |
) | |
asde_output_pred_df = gr.DataFrame( | |
label="Predicted Aspect & Sentiment:" | |
) | |
asde_output_true_df = gr.DataFrame( | |
label="Original Aspect & Sentiment:" | |
) | |
asde_inference_button.click( | |
fn=run_demo, | |
inputs=[ | |
asde_input_sentence, | |
asde_dataset_ids, | |
asde_model_ids | |
], | |
outputs=[ | |
asde_output_pred_df, | |
asde_output_true_df, | |
asde_output_text, | |
asde_model_output_generated_sentence | |
], | |
) | |
gr.Markdown( | |
"""### Author: [Amir Khan](https://github.com/Amir22010) | |
""" | |
) | |
demo.launch() |