File size: 4,005 Bytes
2e9744a
 
 
 
 
d42914b
2e9744a
93c4011
 
2e9744a
c4bf109
 
 
 
 
93c4011
2e9744a
93c4011
2e9744a
 
93c4011
2e9744a
 
 
 
 
 
 
 
 
 
 
 
 
93c4011
2e9744a
 
 
93c4011
 
2e9744a
 
 
 
 
 
93c4011
 
2e9744a
 
93c4011
 
2e9744a
93c4011
 
2e9744a
 
 
 
 
 
 
 
93c4011
2e9744a
 
 
 
 
93c4011
 
2e9744a
 
 
 
 
93c4011
2e9744a
 
 
 
 
 
 
 
 
 
 
93c4011
2e9744a
c60af26
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import random
import gradio as gr
import pandas as pd
import requests
import shutil

from pyabsa import download_all_available_datasets, AspectTermExtraction as ATEPC, TaskCodeOption
from pyabsa.utils.data_utils.dataset_manager import detect_infer_dataset

if os.path.exists("integrated_datasets"):
    shutil.rmtree("integrated_datasets")
if os.path.exists("source_datasets.backup"):
    shutil.rmtree("source_datasets.backup")

download_all_available_datasets()

dataset_items = {dataset.name: dataset for dataset in ATEPC.ATEPCDatasetList()}

def get_example(dataset):
    task = TaskCodeOption.Aspect_Polarity_Classification
    dataset_file = detect_infer_dataset(dataset_items[dataset], task)

    for fname in dataset_file:
        lines = []
        if isinstance(fname, str):
            fname = [fname]

        for f in fname:
            print('loading: {}'.format(f))
            fin = open(f, 'r', encoding='utf-8')
            lines.extend(fin.readlines())
            fin.close()
        for i in range(len(lines)):
            lines[i] = lines[i][:lines[i].find('$LABEL$')].replace('[B-ASP]', '').replace('[E-ASP]', '').strip()
        return sorted(set(lines), key=lines.index)


dataset_dict = {dataset.name: get_example(dataset.name) for dataset in ATEPC.ATEPCDatasetList()}
aspect_extractor = ATEPC.AspectExtractor(checkpoint='multilingual')


def perform_inference(text, dataset):
    if not text:
        text = dataset_dict[dataset][random.randint(0, len(dataset_dict[dataset]) - 1)]

    result = aspect_extractor.predict(text,
                                      pred_sentiment=True)

    result = pd.DataFrame({
        'aspect': result['aspect'],
        'sentiment': result['sentiment'],
        # 'probability': result[0]['probs'],
        'confidence': [round(x, 4) for x in result['confidence']],
        'position': result['position']
    })
    return result, '{}'.format(text)


demo = gr.Blocks()

with demo:
    gr.Markdown("# <p align='center'>Multilingual Aspect-based Sentiment Analysis !</p>")
    gr.Markdown("""### Repo: [PyABSA V2](https://github.com/yangheng95/PyABSA)
                ### Author: [Heng Yang](https://github.com/yangheng95) (杨恒)
                [![Downloads](https://pepy.tech/badge/pyabsa)](https://pepy.tech/project/pyabsa) 
                [![Downloads](https://pepy.tech/badge/pyabsa/month)](https://pepy.tech/project/pyabsa)
                """
                )
    gr.Markdown("Your input text should be no more than 80 words, that's the longest text we used in trainer. However, you can try longer text in self-trainer ")
    gr.Markdown("**You don't need to split each Chinese (Korean, etc.) token as the provided, just input the natural language text.**")
    output_dfs = []
    with gr.Row():
        with gr.Column():
            input_sentence = gr.Textbox(placeholder='Leave this box blank and choose a dataset will give you a random example...', label="Example:")
            gr.Markdown("You can find the datasets at [github.com/yangheng95/ABSADatasets](https://github.com/yangheng95/ABSADatasets/tree/v1.2/datasets/text_classification)")
            dataset_ids = gr.Radio(choices=[dataset.name for dataset in ATEPC.ATEPCDatasetList()[:-1]], value='Laptop14', label="Datasets")
            inference_button = gr.Button("Let's go!")
            gr.Markdown("There is a [demo](https://huggingface.co/spaces/yangheng/PyABSA-ATEPC-Chinese) specialized for the Chinese langauge")
            gr.Markdown("This demo support many other language as well, you can try and explore the results of other languages by yourself.")

        with gr.Column():
            output_text = gr.TextArea(label="Example:")
            output_df = gr.DataFrame(label="Prediction Results:")
            output_dfs.append(output_df)

        inference_button.click(fn=perform_inference,
                               inputs=[input_sentence, dataset_ids],
                               outputs=[output_df, output_text])

demo.launch()