File size: 7,854 Bytes
3cf2fd6
 
 
 
 
 
743168f
3cf2fd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abccac8
 
3cf2fd6
 
fca3fde
1718e55
 
 
fca3fde
 
3cf2fd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8e5dba
3cf2fd6
 
 
 
 
 
 
 
 
 
8df7942
 
3cf2fd6
8df7942
 
 
 
 
c8e5dba
3cf2fd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a48d2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c700516
8a48d2c
 
 
 
 
 
 
 
3cf2fd6
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# -*- coding: utf-8 -*-
# file: app.py
# time: 18:37 23/09/2023
# author: Amir Khan
# github: https://github.com/Amir22010

import numpy as np
import ast
import gradio as gr
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

try:
    tokenizer_english = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model")
    double_english_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model")
except:
    print("english model load error")
'''
try:
    tokenizer_multilingual = AutoTokenizer.from_pretrained("amir22010/layoutxlm-xfund-ja")
    double_multilingual_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/layoutxlm-xfund-ja")
except:
    print("multilingual model load error")

try:
    tokenizer_keybert = AutoTokenizer.from_pretrained("amir22010/layoutxlm-xfund-ja")
    double_keybert_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/layoutxlm-xfund-ja")
except:
    print("keybert model load error")

'''
def perform_asde_inference(text, dataset, model_id):
    if not text:
        if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
            df = pd.read_csv('pyabsa_english.csv')#validation dataset
        random_i = np.random.randint(low=0, high=df.shape[0], size=(1,)).flat[0]
        selected_df = df.iloc[random_i]
        text = selected_df['clean_text']
        print(type(selected_df['actual_aspects']))
        print(type(selected_df['actual_sentiments']))
        print(selected_df['actual_aspects'])
        true_aspect = selected_df['actual_aspects']
        true_sentiment = selected_df['actual_sentiments']

    bos_instruction = """Definition: The output will be the aspects (both implicit and explicit) and the aspects sentiment polarity. In cases where there are no aspects the output should be noaspectterm:none.
        Positive example 1-
        input: this hospital has a good team of doctors who will take care of all your needs brilliantly.
        output: doctors:positive
        Positive example 2- 
        input: Arthur as Irv at ham hospital ran an Nagar , Madurai has a doctor who engages you in a conversation and tries to take your mind off the pain and he has trained the staff to do so as well.
        output: doctor:positive, staff:positive
        Now complete the following example-
        input: """
    delim_instruct = ''
    eos_instruct = ' \noutput:'

    if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
        tokenized_text = tokenizer_english(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
        output = double_english_generator.generate(tokenized_text.input_ids,max_length=512)
        model_generated = tokenizer_english.decode(output[0], skip_special_tokens=True)

    '''
    elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
        tokenized_text = tokenizer_multilingual(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
        output = double_multilingual_generator.generate(tokenized_text.input_ids,max_length=512)
        result = tokenizer_multilingual.decode(output[0], skip_special_tokens=True)
    elif model_id == "PyABSA_Hospital_KeyBert_allenai/tk-instruct-base-def-pos_FinedTuned_Model":
        tokenized_text = tokenizer_keybert(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
        output = double_keybert_generator.generate(tokenized_text.input_ids,max_length=512)
        result = tokenizer_keybert.decode(output[0], skip_special_tokens=True)
    '''
    pred_asp = [i.split(':')[0] for i in model_generated.split(',')]
    pred_sent = [i.split(':')[1] for i in model_generated.split(',')]

    pred_doubles = pd.DataFrame(list(map(list, zip(pred_asp, pred_sent))),columns=['Aspect','Sentiment'])

    if not text:
        true_doubles = pd.DataFrame(list(map(list, zip(ast.literal_eval(true_aspect), ast.literal_eval(true_sentiment)))),columns=['Aspect','Sentiment'])
    else:
        true_doubles = pd.DataFrame([["",""]],columns=['Aspect','Sentiment'])

    return pred_doubles, true_doubles, text, model_generated

def run_demo(text, dataset, model_id):
    try:
        return inference(text, dataset, model_id)
    except Exception as e:
        print(e)


def inference(text, dataset, model_id):
    return perform_asde_inference(text, dataset, model_id)

if __name__ == "__main__":
    demo = gr.Blocks()
    with demo:
        with gr.Row():
            with gr.Column():
                gr.Markdown(
                    "# <p align='center'>Hospital Review Aspect Sentiment Generation</p>"
                )
                with gr.Row():
                    with gr.Column():
                        asde_input_sentence = gr.Textbox(
                            placeholder="Leave this box blank and choose a dataset will give you a random example...",
                            label="Example:",
                        )
                        gr.Markdown(
                            "You can find code and dataset at [MTech Thesis Project](https://github.com/Amir22010)"
                        )
                        asde_dataset_ids = gr.Radio(
                            choices=[
                                "HospitalReviews"
                            ],
                            value="HospitalReviews",
                            label="Datasets",
                        )
                        
                        asde_model_ids = gr.Radio(
                            choices=[
                                "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model", 
                                # "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model", 
                                # "PyABSA_Hospital_KeyBert_allenai/tk-instruct-base-def-pos_FinedTuned_Model"
                            ],
                            value="PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model",
                            label="Fine-tuned Models on Hospital Review custom data",
                        )

                        asde_inference_button = gr.Button("Let's go!")

                        asde_output_text = gr.TextArea(label="Example:")

                        asde_model_output_generated_sentence = gr.Textbox(
                            placeholder="Text Generated...",
                            label="Model Prediction Text Generated:",
                        )
                        asde_output_pred_df = gr.DataFrame(
                            label="Predicted Aspect & Sentiment:"
                        )
                        asde_output_true_df = gr.DataFrame(
                            label="Original Aspect & Sentiment:"
                        )

                        asde_inference_button.click(
                            fn=run_demo,
                            inputs=[
                                asde_input_sentence,
                                asde_dataset_ids,
                                asde_model_ids
                            ],
                            outputs=[
                                asde_output_pred_df,
                                asde_output_true_df,
                                asde_output_text,
                                asde_model_output_generated_sentence
                            ],
                        )
        gr.Markdown(
            """### GitHub Repo: [MTech Thesis Project](https://github.com/Amir22010)
            ### Author: [Amir Khan](https://github.com/Amir22010)
            """
        )

    demo.launch()