File size: 4,005 Bytes
2e9744a d42914b 2e9744a 93c4011 2e9744a c4bf109 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a 93c4011 2e9744a c60af26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import random
import gradio as gr
import pandas as pd
import requests
import shutil
from pyabsa import download_all_available_datasets, AspectTermExtraction as ATEPC, TaskCodeOption
from pyabsa.utils.data_utils.dataset_manager import detect_infer_dataset
if os.path.exists("integrated_datasets"):
shutil.rmtree("integrated_datasets")
if os.path.exists("source_datasets.backup"):
shutil.rmtree("source_datasets.backup")
download_all_available_datasets()
dataset_items = {dataset.name: dataset for dataset in ATEPC.ATEPCDatasetList()}
def get_example(dataset):
task = TaskCodeOption.Aspect_Polarity_Classification
dataset_file = detect_infer_dataset(dataset_items[dataset], task)
for fname in dataset_file:
lines = []
if isinstance(fname, str):
fname = [fname]
for f in fname:
print('loading: {}'.format(f))
fin = open(f, 'r', encoding='utf-8')
lines.extend(fin.readlines())
fin.close()
for i in range(len(lines)):
lines[i] = lines[i][:lines[i].find('$LABEL$')].replace('[B-ASP]', '').replace('[E-ASP]', '').strip()
return sorted(set(lines), key=lines.index)
dataset_dict = {dataset.name: get_example(dataset.name) for dataset in ATEPC.ATEPCDatasetList()}
aspect_extractor = ATEPC.AspectExtractor(checkpoint='multilingual')
def perform_inference(text, dataset):
if not text:
text = dataset_dict[dataset][random.randint(0, len(dataset_dict[dataset]) - 1)]
result = aspect_extractor.predict(text,
pred_sentiment=True)
result = pd.DataFrame({
'aspect': result['aspect'],
'sentiment': result['sentiment'],
# 'probability': result[0]['probs'],
'confidence': [round(x, 4) for x in result['confidence']],
'position': result['position']
})
return result, '{}'.format(text)
demo = gr.Blocks()
with demo:
gr.Markdown("# <p align='center'>Multilingual Aspect-based Sentiment Analysis !</p>")
gr.Markdown("""### Repo: [PyABSA V2](https://github.com/yangheng95/PyABSA)
### Author: [Heng Yang](https://github.com/yangheng95) (杨恒)
[](https://pepy.tech/project/pyabsa)
[](https://pepy.tech/project/pyabsa)
"""
)
gr.Markdown("Your input text should be no more than 80 words, that's the longest text we used in trainer. However, you can try longer text in self-trainer ")
gr.Markdown("**You don't need to split each Chinese (Korean, etc.) token as the provided, just input the natural language text.**")
output_dfs = []
with gr.Row():
with gr.Column():
input_sentence = gr.Textbox(placeholder='Leave this box blank and choose a dataset will give you a random example...', label="Example:")
gr.Markdown("You can find the datasets at [github.com/yangheng95/ABSADatasets](https://github.com/yangheng95/ABSADatasets/tree/v1.2/datasets/text_classification)")
dataset_ids = gr.Radio(choices=[dataset.name for dataset in ATEPC.ATEPCDatasetList()[:-1]], value='Laptop14', label="Datasets")
inference_button = gr.Button("Let's go!")
gr.Markdown("There is a [demo](https://huggingface.co/spaces/yangheng/PyABSA-ATEPC-Chinese) specialized for the Chinese langauge")
gr.Markdown("This demo support many other language as well, you can try and explore the results of other languages by yourself.")
with gr.Column():
output_text = gr.TextArea(label="Example:")
output_df = gr.DataFrame(label="Prediction Results:")
output_dfs.append(output_df)
inference_button.click(fn=perform_inference,
inputs=[input_sentence, dataset_ids],
outputs=[output_df, output_text])
demo.launch()
|