Spaces:
Runtime error
Runtime error
File size: 8,431 Bytes
3cf2fd6 9d0cab3 743168f 3cf2fd6 60b4a84 f02efab 3cf2fd6 3f46e22 3cf2fd6 60b4a84 f02efab 3cf2fd6 3f46e22 f02efab 3cf2fd6 3f46e22 3cf2fd6 abccac8 3f46e22 7938b36 3cf2fd6 fca3fde 46b1b94 ac52f8e 3cf2fd6 c8e5dba 3cf2fd6 3f46e22 b667066 3f46e22 3cf2fd6 3f46e22 8df7942 3cf2fd6 8df7942 3cf2fd6 8a48d2c 9a91428 8a48d2c 7938b36 8a48d2c c700516 8a48d2c 3cf2fd6 9a91428 3cf2fd6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# -*- coding: utf-8 -*-
# file: app.py
# time: 18:37 23/09/2023
# author: Amir Khan
# github: https://github.com/Amir22010
import os
import numpy as np
import ast
import gradio as gr
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
try:
tokenizer_english = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd())
double_english_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd())
except Exception as e:
print(e)
print("english model load error")
try:
tokenizer_multilingual = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd())
double_multilingual_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd())
except Exception as e:
print(e)
print("multilingual model load error")
try:
tokenizer_keybert = AutoTokenizer.from_pretrained("amir22010/KeyBert_ABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model")
double_keybert_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/KeyBert_ABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model")
except Exception as e:
print(e)
print("keybert model load error")
def perform_asde_inference(text, dataset, model_id):
if not text:
if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
df = pd.read_csv('pyabsa_english.csv')#validation dataset
elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
df = pd.read_csv('pyabsa_multilingual.csv')#validation dataset
elif model_id == "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
df = pd.read_csv('keybert_valid.csv')#validation dataset
random_i = np.random.randint(low=0, high=df.shape[0], size=(1,)).flat[0]
selected_df = df.iloc[random_i]
text = selected_df['clean_text']
true_aspect = selected_df['actual_aspects']
true_sentiment = selected_df['actual_sentiments']
true_doubles = pd.DataFrame(list(map(list, zip(ast.literal_eval(true_aspect), ast.literal_eval(true_sentiment)))),columns=['Aspect','Sentiment'])
else:
true_doubles = pd.DataFrame([["NA","NA"]],columns=['Aspect','Sentiment'])
bos_instruction = """Definition: The output will be the aspects (both implicit and explicit) and the aspects sentiment polarity. In cases where there are no aspects the output should be noaspectterm:none.
Positive example 1-
input: this hospital has a good team of doctors who will take care of all your needs brilliantly.
output: doctors:positive
Positive example 2-
input: Arthur as Irv at ham hospital ran an Nagar , Madurai has a doctor who engages you in a conversation and tries to take your mind off the pain and he has trained the staff to do so as well.
output: doctor:positive, staff:positive
Now complete the following example-
input: """
delim_instruct = ''
eos_instruct = ' \noutput:'
if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
tokenized_text = tokenizer_english(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
output = double_english_generator.generate(tokenized_text.input_ids,max_length=512)
model_generated = tokenizer_english.decode(output[0], skip_special_tokens=True)
elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
tokenized_text = tokenizer_multilingual(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
output = double_multilingual_generator.generate(tokenized_text.input_ids,max_length=512)
model_generated = tokenizer_multilingual.decode(output[0], skip_special_tokens=True)
elif model_id == "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
tokenized_text = tokenizer_keybert(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
output = double_keybert_generator.generate(tokenized_text.input_ids,max_length=512)
model_generated = tokenizer_keybert.decode(output[0], skip_special_tokens=True)
pred_asp = [i.split(':')[0] for i in model_generated.split(',')]
pred_sent = [i.split(':')[1] for i in model_generated.split(',')]
pred_doubles = pd.DataFrame(list(map(list, zip(pred_asp, pred_sent))),columns=['Aspect','Sentiment'])
return pred_doubles, true_doubles, text, model_generated
def run_demo(text, dataset, model_id):
try:
return inference(text, dataset, model_id)
except Exception as e:
print(e)
def inference(text, dataset, model_id):
return perform_asde_inference(text, dataset, model_id)
if __name__ == "__main__":
demo = gr.Blocks()
with demo:
with gr.Row():
with gr.Column():
gr.Markdown(
"# <p align='center'>Hospital Review Aspect Sentiment Generation</p>"
)
with gr.Row():
with gr.Column():
asde_input_sentence = gr.Textbox(
placeholder="Leave this box blank and choose a dataset will give you a random example...",
label="Example:",
)
gr.Markdown(
"You can find code and dataset at [MTech Thesis Project 2023](https://github.com/Amir22010/MTP_Thesis_Project_2023/tree/main)"
)
asde_dataset_ids = gr.Radio(
choices=[
"HospitalReviews"
],
value="HospitalReviews",
label="Datasets",
)
asde_model_ids = gr.Radio(
choices=[
"PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model",
"PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model",
"KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model"
],
value="PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model",
label="Fine-tuned Models on Hospital Review custom data",
)
asde_inference_button = gr.Button("Let's go!")
asde_output_text = gr.TextArea(label="Example:")
asde_model_output_generated_sentence = gr.Textbox(
placeholder="Text Generated...",
label="Model Prediction Text Generated:",
)
asde_output_pred_df = gr.DataFrame(
label="Predicted Aspect & Sentiment:"
)
asde_output_true_df = gr.DataFrame(
label="Original Aspect & Sentiment:"
)
asde_inference_button.click(
fn=run_demo,
inputs=[
asde_input_sentence,
asde_dataset_ids,
asde_model_ids
],
outputs=[
asde_output_pred_df,
asde_output_true_df,
asde_output_text,
asde_model_output_generated_sentence
],
)
gr.Markdown(
"""### Author: [Amir Khan](https://github.com/Amir22010)
"""
)
demo.launch() |