|
import torch |
|
import gradio as gr |
|
import pandas as pd |
|
from utils import save_dataframe_to_file, tokenize_Df |
|
from model import load_model |
|
|
|
with open("./description.md", "r", encoding="utf-8") as file: |
|
description_text = file.read() |
|
|
|
with open("./input_demo.txt", "r", encoding="utf-8") as file: |
|
demo = file.read() |
|
|
|
def process_data(task_name, model_name, pooling_method, input_text=None, file=None): |
|
output = "" |
|
dataframe_output = pd.DataFrame() |
|
file_output = None |
|
|
|
|
|
if file is None and (input_text is None or input_text.strip() == ""): |
|
output = "No valid input detected. Please check your input and ensure it follows the expected format." |
|
|
|
|
|
elif file is not None and input_text is not None: |
|
output = "Detected both text and file input. Prioritizing file input." |
|
|
|
if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')): |
|
output += " File format must be xlsx or csv." |
|
elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT": |
|
output += " One-phase Fine-tuned BERT model does not support Appropriateness task." |
|
else: |
|
|
|
df = pd.read_csv(file) if file.name.endswith('.csv') else pd.read_excel(file) |
|
|
|
if list(df.columns) == ['prompt', 'response']: |
|
dataframe_output = df |
|
else: |
|
df_values = [list(df.columns)] + df.values.tolist() |
|
dataframe_output = pd.DataFrame(df_values, columns=['prompt', 'response']) |
|
|
|
|
|
loaded_net = load_model(model_name, pooling_method) |
|
example = tokenize_Df(dataframe_output) |
|
with torch.no_grad(): |
|
score = loaded_net(example) |
|
|
|
if model_name == "One-phase Fine-tuned BERT": |
|
dataframe_output['evaluation'] = score.numpy() |
|
else: |
|
dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy() |
|
file_output = save_dataframe_to_file(dataframe_output, file_format="csv") |
|
output += f" Processed {len(dataframe_output)} rows from uploaded file using task: {task_name}, model: {model_name}, pooling: {pooling_method}." |
|
|
|
|
|
elif file is not None: |
|
|
|
if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')): |
|
output = "File format must be xlsx or csv." |
|
elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT": |
|
output = " One-phase Fine-tuned BERT model does not support Appropriateness task." |
|
else: |
|
|
|
df = pd.read_csv(file) if file.name.endswith('.csv') else pd.read_excel(file) |
|
|
|
|
|
if list(df.columns) == ['prompt', 'response']: |
|
dataframe_output = df |
|
else: |
|
df_values = [list(df.columns)] + df.values.tolist() |
|
dataframe_output = pd.DataFrame(df_values, columns=['prompt', 'response']) |
|
|
|
|
|
loaded_net = load_model(model_name, pooling_method) |
|
example = tokenize_Df(dataframe_output) |
|
with torch.no_grad(): |
|
score = loaded_net(example) |
|
|
|
if model_name == "One-phase Fine-tuned BERT": |
|
dataframe_output['evaluation'] = score.numpy() |
|
else: |
|
dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy() |
|
file_output = save_dataframe_to_file(dataframe_output, file_format="csv") |
|
output = f"Processed {len(dataframe_output)} rows from uploaded file using task: {task_name}, model: {model_name}, pooling: {pooling_method}." |
|
|
|
|
|
elif input_text is not None: |
|
if task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT": |
|
output = "One-phase Fine-tuned BERT model does not support Appropriateness task." |
|
else: |
|
lines = input_text.strip().split("\n") |
|
rows = [] |
|
for line in lines: |
|
try: |
|
split_line = line.split(",", maxsplit=1) |
|
if len(split_line) == 2: |
|
rows.append(split_line) |
|
except Exception as e: |
|
output = f"Error processing line: {line}" |
|
break |
|
|
|
if output == "": |
|
dataframe_output = pd.DataFrame(rows[1:], columns=['prompt', 'response']) if rows[0] == ['prompt', 'response'] else pd.DataFrame(rows, columns=['prompt', 'response']) |
|
|
|
|
|
loaded_net = load_model(model_name, pooling_method) |
|
example = tokenize_Df(dataframe_output) |
|
with torch.no_grad(): |
|
score = loaded_net(example) |
|
|
|
if model_name == "One-phase Fine-tuned BERT": |
|
dataframe_output['evaluation'] = score.numpy() |
|
else: |
|
dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy() |
|
file_output = save_dataframe_to_file(dataframe_output, file_format="csv") |
|
output = f"Processed {len(dataframe_output)} rows of text using task: {task_name}, model: {model_name}, pooling: {pooling_method}." |
|
|
|
return output, dataframe_output, file_output |
|
|
|
|
|
task_dropdown = gr.Dropdown( |
|
label="Task Name", |
|
choices=["Creativity", "Appropriateness"], |
|
value="Appropriateness") |
|
|
|
model_dropdown = gr.Dropdown( |
|
label="Model Name", |
|
choices=[ |
|
"One-phase Fine-tuned BERT", |
|
"Two-phase Fine-tuned BERT"], |
|
value="Two-phase Fine-tuned BERT") |
|
|
|
pooling_dropdown = gr.Dropdown( |
|
label="Pooling", |
|
choices=["mean", "cls"], |
|
value="cls") |
|
|
|
text_input = gr.Textbox( |
|
label="Text Input", |
|
lines=10, |
|
value=demo) |
|
|
|
file_input = gr.File( |
|
label="Input File", |
|
type="filepath", |
|
file_types=[".csv", ".xlsx"]) |
|
|
|
|
|
output_box = gr.Textbox(label="Output", lines=5, interactive=False) |
|
|
|
dataframe_output = gr.Dataframe(label="DataFrame", interactive=False) |
|
|
|
file_output = gr.File(label="Output File", interactive=False) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=process_data, |
|
inputs=[task_dropdown, model_dropdown, pooling_dropdown, text_input, file_input], |
|
outputs=[output_box, dataframe_output, file_output], |
|
css=(""".file-download {display: none !important;} |
|
h1 {text-align: center;}"""), |
|
title="TwoPhaseBERT-CreativityAutoEvaluation", |
|
description=description_text, |
|
theme=gr.themes.Soft(), |
|
) |
|
|
|
|
|
interface.launch() |
|
|