Spaces:

Siyunb323
/

Two-phase_Fine-tuned_BERT-CreativityAutoEvaluation

Sleeping

App Files Files Community

Siyunb323 commited on 29 days ago

Commit

06ae167

1 Parent(s): 9b18daa

delete models

Browse files

Files changed (6) hide show

__pycache__/utils.cpython-39.pyc +0 -0
app.py +27 -33
model.py +137 -0
model/OnePhase_BERT_cls.pth +0 -3
model/TwoPhase_BERT_cls.pth +0 -3
utils.py +11 -0

__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (578 Bytes). View file

app.py CHANGED Viewed

@@ -1,16 +1,6 @@
-import os
 import gradio as gr
 import pandas as pd
-import tempfile
-def save_dataframe_to_file(dataframe, file_format="csv"):
-    temp_dir = tempfile.gettempdir()  # 获取系统临时目录
-    file_path = os.path.join(temp_dir, f"output.{file_format}")
-    if file_format == "csv":
-        dataframe.to_csv(file_path, index=False)
-    elif file_format == "xlsx":
-        dataframe.to_excel(file_path, index=False)
-    return file_path
 with open("./description.md", "r", encoding="utf-8") as file:
     description_text = file.read()
@@ -18,13 +8,10 @@ with open("./description.md", "r", encoding="utf-8") as file:
 with open("./input_demo.txt", "r", encoding="utf-8") as file:
     demo = file.read()
-# 定义处理函数
-import pandas as pd
 def process_data(task_name, model_name, pooling_method, input_text=None, file=None):
     output = ""
     dataframe_output = pd.DataFrame()
-    file_output = pd.DataFrame()
     # 情况 1: file 和 input_text 都为 None
     if file is None and (input_text is None or input_text.strip() == ""):
@@ -36,6 +23,8 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
         # 检查文件类型
         if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
             output += " File format must be xlsx or csv."
         else:
             # 读取文件
             if file.name.endswith('.csv'):
@@ -55,6 +44,8 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
         # 检查文件类型
         if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
             output = "File format must be xlsx or csv."
         else:
             # 读取文件
             if file.name.endswith('.csv'):
@@ -72,24 +63,27 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
     # 情况 4: 只有 input_text
     elif input_text is not None:
-        lines = input_text.strip().split("\n")
-        rows = []
-        for line in lines:
-            try:
-                split_line = line.split(",", maxsplit=1)
-                if len(split_line) == 2:
-                    rows.append(split_line)
-            except Exception as e:
-                output = f"Error processing line: {line}"
-                break
-        if output == "":
-            if rows[0] == ['prompt', 'response']:
-                dataframe_output = pd.DataFrame(rows[1:], columns=['prompt', 'response'])
-            else:
-                dataframe_output = pd.DataFrame(rows, columns=['prompt', 'response'])
-            file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
-            output = f"Processed {len(dataframe_output)} rows of text using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
     return output, dataframe_output, file_output

 import gradio as gr
 import pandas as pd
+from utils import save_dataframe_to_file
 with open("./description.md", "r", encoding="utf-8") as file:
     description_text = file.read()
 with open("./input_demo.txt", "r", encoding="utf-8") as file:
     demo = file.read()
 def process_data(task_name, model_name, pooling_method, input_text=None, file=None):
     output = ""
     dataframe_output = pd.DataFrame()
+    file_output = None
     # 情况 1: file 和 input_text 都为 None
     if file is None and (input_text is None or input_text.strip() == ""):
         # 检查文件类型
         if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
             output += " File format must be xlsx or csv."
+        elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
+            output += " One-phase Fine-tuned BERT model does not support Appropriateness task."
         else:
             # 读取文件
             if file.name.endswith('.csv'):
         # 检查文件类型
         if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
             output = "File format must be xlsx or csv."
+        elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
+            output += " One-phase Fine-tuned BERT model does not support Appropriateness task."
         else:
             # 读取文件
             if file.name.endswith('.csv'):
     # 情况 4: 只有 input_text
     elif input_text is not None:
+        if task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
+            output = " One-phase Fine-tuned BERT model does not support Appropriateness task."
+        else:
+            lines = input_text.strip().split("\n")
+            rows = []
+            for line in lines:
+                try:
+                    split_line = line.split(",", maxsplit=1)
+                    if len(split_line) == 2:
+                        rows.append(split_line)
+                except Exception as e:
+                    output = f"Error processing line: {line}"
+                    break
+            if output == "":
+                if rows[0] == ['prompt', 'response']:
+                    dataframe_output = pd.DataFrame(rows[1:], columns=['prompt', 'response'])
+                else:
+                    dataframe_output = pd.DataFrame(rows, columns=['prompt', 'response'])
+                file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
+                output = f"Processed {len(dataframe_output)} rows of text using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
     return output, dataframe_output, file_output

model.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import torch
+from torch import nn
+class BERTregressor(nn.Module):
+    def __init__(self, bert, hidden_size=768, num_linear=1, dropout=0.1,
+                 o_type='cls', t_type= 'C', use_sigmoid=False):
+        super(BERTregressor, self).__init__()
+        self.encoder = bert
+        self.o_type = o_type
+        self.t_type = t_type
+        self.sigmoid = use_sigmoid
+        if num_linear == 2:
+            layers = [nn.Linear(hidden_size, 128),
+                      nn.ReLU(),
+                      nn.Dropout(dropout),
+                      nn.Linear(128, 1)]
+        elif num_linear == 1:
+            layers = [nn.Dropout(dropout),
+                      nn.Linear(hidden_size, 1)]
+        if use_sigmoid:
+            layers.append(nn.Sigmoid())
+        self.output = nn.Sequential(*layers)
+    def forward(self, inputs, return_attention=False):
+        X = {'input_ids':inputs['input_ids'],
+             'token_type_ids':inputs['token_type_ids'],
+             'attention_mask':inputs['attention_mask'],
+             'output_attentions':return_attention}
+        encoded_X = self.encoder(**X)
+        if self.o_type == 'cls':
+            output = self.output(encoded_X.last_hidden_state[:, 0, :])
+        elif self.o_type == 'pooler':
+            output = self.output(encoded_X.pooler_output)
+        output = 4 * output.squeeze(-1) + 1 if self.sigmoid and self.t_type == 'C' else output.squeeze(-1)
+        return output if not return_attention else (output, encoded_X.attentions)
+class Effectiveness(nn.Module):
+    def __init__(self, num_layers, hidden_size=768, use_sigmoid=True, dropout=0.2, **kwargs):
+        super(Effectiveness, self).__init__(**kwargs)
+        self.sigmoid = use_sigmoid
+        if num_layers == 2:
+            layers = [
+                nn.Linear(hidden_size, 128),
+                nn.ReLU(),
+                nn.Dropout(dropout),
+                nn.Linear(128, 1)
+            ]
+        else:
+            layers = [
+                nn.ReLU(),
+                nn.Dropout(dropout),
+                nn.Linear(hidden_size, 1)
+            ]
+        if use_sigmoid:
+            layers.append(nn.Sigmoid())  # 仅在需要时添加 Sigmoid 层
+        self.output = nn.Sequential(*layers)
+    def forward(self, X):
+        output = self.output(X)
+        # 如果使用 Sigmoid 层，调整输出范围到 [1, 5]
+        if self.sigmoid:
+            return 4 * output.squeeze(-1) + 1
+        else:
+            return output.squeeze(-1)
+class Creativity(nn.Module):
+    """BERT的下一句预测任务"""
+    def __init__(self, num_layers, hidden_size=768, use_sigmoid=True, dropout=0.2, **kwargs):
+        super(Creativity, self).__init__(**kwargs)
+        self.sigmoid = use_sigmoid
+        if num_layers == 2:
+            layers = [
+                nn.Linear(hidden_size, 128),
+                nn.ReLU(),
+                nn.Dropout(dropout),
+                nn.Linear(128, 1)
+            ]
+        else:
+            layers = [
+                nn.ReLU(),
+                nn.Dropout(dropout),
+                nn.Linear(hidden_size, 1)
+            ]
+        if use_sigmoid:
+            layers.append(nn.Sigmoid())  # 仅在需要时添加 Sigmoid 层
+        self.output = nn.Sequential(*layers)
+    def forward(self, X):
+        output = self.output(X)
+        # 如果使用 Sigmoid 层，调整输出范围到 [1, 5]
+        if self.sigmoid:
+            return 4 * output.squeeze(-1) + 1
+        else:
+            return output.squeeze(-1)
+class BERT2Phase(nn.Module):
+    def __init__(self, bert, hidden_size=768, type='cls',
+                 num_linear=1, dropout=0.1, use_sigmoid=False):
+        super(BERT2Phase, self).__init__()
+        self.encoder = bert
+        self.type = type
+        self.sigmoid = use_sigmoid
+        self.effectiveness = Effectiveness(num_linear, hidden_size, use_sigmoid, dropout)
+        self.creativity = Creativity(num_linear, hidden_size, use_sigmoid, dropout)
+    def forward(self, inputs, return_attention=False):
+        X = {'input_ids':inputs['input_ids'],
+             'token_type_ids':inputs['token_type_ids'],
+             'attention_mask':inputs['attention_mask'],
+             'output_attentions':return_attention}
+        encoded_X = self.encoder(**X)
+        if self.type == 'cls':
+            e_pred = self.effectiveness(encoded_X.last_hidden_state[:, 0, :])
+            c_pred = self.creativity(encoded_X.last_hidden_state[:, 0, :])
+        elif self.type == 'pooler':
+            e_pred = self.effectiveness(encoded_X.pooler_output)
+            c_pred = self.creativity(encoded_X.pooler_output)
+        return (c_pred, e_pred) if not return_attention else (c_pred, e_pred, encoded_X.attentions)

model/OnePhase_BERT_cls.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5ae3966f5420a322992c46fb2c69325cf034152015c0e447f35d1ee273e08366
-size 442557021

model/TwoPhase_BERT_cls.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:632ec5b262616d688fd9e655699d4e921f4d1e26f77465a86da0755ee215e260
-size 442561133

utils.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import os
+import tempfile
+def save_dataframe_to_file(dataframe, file_format="csv"):
+    temp_dir = tempfile.gettempdir()  # 获取系统临时目录
+    file_path = os.path.join(temp_dir, f"output.{file_format}")
+    if file_format == "csv":
+        dataframe.to_csv(file_path, index=False)
+    elif file_format == "xlsx":
+        dataframe.to_excel(file_path, index=False)
+    return file_path