Spaces:

Siyunb323
/

Two-phase_Fine-tuned_LLMs-CreativityAutoEvaluation

Sleeping

Two-phase_Fine-tuned_LLMs-CreativityAutoEvaluation

File size: 7,290 Bytes

e1af10a
c0035d4
49b4797
e1af10a
 
c0035d4
f57196e
 
 
f08229e
 
f57196e
1d021ff
 
 
06ae167
1d021ff
 
 
 
 
 
 
 
 
 
 
06ae167
 
1d021ff
 
e1af10a
1d021ff
 
 
 
 
 
e1af10a
 
 
 
 
 
 
 
 
 
 
1d021ff
e1af10a
1d021ff
 
 
 
 
 
06ae167
e1af10a
1d021ff
 
e1af10a
 
1d021ff
 
 
 
 
 
e1af10a
 
 
 
 
 
 
 
 
 
 
1d021ff
 
 
 
 
06ae167
e1af10a
06ae167
 
 
 
 
 
 
 
 
 
 
 
 
e1af10a
 
 
 
 
 
 
 
 
 
 
 
06ae167
 
1d021ff
f57196e
49b4797
2af82d9
 
49b4797
 
9b462b6
2af82d9
 
49b4797
 
 
 
2af82d9
 
 
49b4797
 
2af82d9
 
 
49b4797
 
2af82d9
 
 
49b4797
 
2af82d9
 
 
 
 
 
 
 
f57196e
2af82d9
 
 
 
 
031ca4e
 
cf32cd6
2af82d9
 
 
49b4797

import torch
import gradio as gr
import pandas as pd
from utils import save_dataframe_to_file, tokenize_Df
from model import load_model

with open("./description.md", "r", encoding="utf-8") as file:
    description_text = file.read()

with open("./input_demo.txt", "r", encoding="utf-8") as file:
    demo = file.read()

def process_data(task_name, model_name, pooling_method, input_text=None, file=None):
    output = ""
    dataframe_output = pd.DataFrame()
    file_output = None

    # 情况 1: file 和 input_text 都为 None
    if file is None and (input_text is None or input_text.strip() == ""):
        output = "No valid input detected. Please check your input and ensure it follows the expected format."

    # 情况 2: file 和 input_text 都不为 None
    elif file is not None and input_text is not None:
        output = "Detected both text and file input. Prioritizing file input."
        # 检查文件类型
        if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
            output += " File format must be xlsx or csv."
        elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
            output += " One-phase Fine-tuned BERT model does not support Appropriateness task."
        else:
            # 读取文件
            df = pd.read_csv(file) if file.name.endswith('.csv') else pd.read_excel(file)
            # 检查第一行是否为 "prompt" 和 "response"
            if list(df.columns) == ['prompt', 'response']:
                dataframe_output = df
            else:
                df_values = [list(df.columns)] + df.values.tolist()
                dataframe_output = pd.DataFrame(df_values, columns=['prompt', 'response'])

            # model 运行
            loaded_net = load_model(model_name, pooling_method)
            example = tokenize_Df(dataframe_output)
            with torch.no_grad():  
                score = loaded_net(example)

            if model_name == "One-phase Fine-tuned BERT":
                dataframe_output['evaluation'] = score.numpy()
            else: 
                dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy()
            file_output = save_dataframe_to_file(dataframe_output, file_format="csv") 
            output += f" Processed {len(dataframe_output)} rows from uploaded file using task: {task_name}, model: {model_name}, pooling: {pooling_method}."

    # 情况 3: 只有 file
    elif file is not None:
        # 检查文件类型
        if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
            output = "File format must be xlsx or csv."
        elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
            output = " One-phase Fine-tuned BERT model does not support Appropriateness task."            
        else:
            # 读取文件
            df = pd.read_csv(file) if file.name.endswith('.csv') else pd.read_excel(file)

            # 检查第一行是否为 "prompt" 和 "response"
            if list(df.columns) == ['prompt', 'response']:
                dataframe_output = df
            else:
                df_values = [list(df.columns)] + df.values.tolist()
                dataframe_output = pd.DataFrame(df_values, columns=['prompt', 'response'])
            
            # model 运行
            loaded_net = load_model(model_name, pooling_method)
            example = tokenize_Df(dataframe_output)
            with torch.no_grad():
                score = loaded_net(example)
            
            if model_name == "One-phase Fine-tuned BERT":
                dataframe_output['evaluation'] = score.numpy()
            else: 
                dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy()
            file_output = save_dataframe_to_file(dataframe_output, file_format="csv") 
            output = f"Processed {len(dataframe_output)} rows from uploaded file using task: {task_name}, model: {model_name}, pooling: {pooling_method}."

    # 情况 4: 只有 input_text
    elif input_text is not None:
        if task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
            output = "One-phase Fine-tuned BERT model does not support Appropriateness task."
        else:
            lines = input_text.strip().split("\n")
            rows = []
            for line in lines:
                try:
                    split_line = line.split(",", maxsplit=1)
                    if len(split_line) == 2:
                        rows.append(split_line)
                except Exception as e:
                    output = f"Error processing line: {line}"
                    break
            
            if output == "":
                dataframe_output = pd.DataFrame(rows[1:], columns=['prompt', 'response']) if rows[0] == ['prompt', 'response'] else pd.DataFrame(rows, columns=['prompt', 'response'])
                
                # model 运行
                loaded_net = load_model(model_name, pooling_method)
                example = tokenize_Df(dataframe_output)
                with torch.no_grad(): 
                    score = loaded_net(example)

                if model_name == "One-phase Fine-tuned BERT":
                    dataframe_output['evaluation'] = score.numpy()
                else: 
                    dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy()
                file_output = save_dataframe_to_file(dataframe_output, file_format="csv") 
                output = f"Processed {len(dataframe_output)} rows of text using task: {task_name}, model: {model_name}, pooling: {pooling_method}."

    return output, dataframe_output, file_output

## 输入组件
task_dropdown = gr.Dropdown(
                label="Task Name", 
                choices=["Creativity", "Appropriateness"], 
                value="Appropriateness")

model_dropdown = gr.Dropdown(
                label="Model Name", 
                choices=[
                    "One-phase Fine-tuned BERT",
                    "Two-phase Fine-tuned BERT"], 
                value="Two-phase Fine-tuned BERT")

pooling_dropdown = gr.Dropdown(
                label="Pooling", 
                choices=["mean", "cls"], 
                value="cls")

text_input = gr.Textbox(
                label="Text Input", 
                lines=10, 
                value=demo)

file_input = gr.File(
                label="Input File", 
                type="filepath", 
                file_types=[".csv", ".xlsx"])

## 输出组件
output_box = gr.Textbox(label="Output", lines=5, interactive=False)

dataframe_output = gr.Dataframe(label="DataFrame", interactive=False)

file_output = gr.File(label="Output File", interactive=False)

# 构建Gradio界面
interface = gr.Interface(
    fn=process_data,
    inputs=[task_dropdown, model_dropdown, pooling_dropdown, text_input, file_input],
    outputs=[output_box, dataframe_output, file_output],
    css=(""".file-download {display: none !important;}
            h1 {text-align: center;}"""),
    title="TwoPhaseLLMs-CreativityAutoEvaluation",
    description=description_text,
    theme=gr.themes.Soft(),
)

# 启动界面
interface.launch()