Siyunb323 commited on
Commit
06ae167
·
1 Parent(s): 9b18daa

delete models

Browse files
__pycache__/utils.cpython-39.pyc ADDED
Binary file (578 Bytes). View file
 
app.py CHANGED
@@ -1,16 +1,6 @@
1
- import os
2
  import gradio as gr
3
  import pandas as pd
4
- import tempfile
5
-
6
- def save_dataframe_to_file(dataframe, file_format="csv"):
7
- temp_dir = tempfile.gettempdir() # 获取系统临时目录
8
- file_path = os.path.join(temp_dir, f"output.{file_format}")
9
- if file_format == "csv":
10
- dataframe.to_csv(file_path, index=False)
11
- elif file_format == "xlsx":
12
- dataframe.to_excel(file_path, index=False)
13
- return file_path
14
 
15
  with open("./description.md", "r", encoding="utf-8") as file:
16
  description_text = file.read()
@@ -18,13 +8,10 @@ with open("./description.md", "r", encoding="utf-8") as file:
18
  with open("./input_demo.txt", "r", encoding="utf-8") as file:
19
  demo = file.read()
20
 
21
- # 定义处理函数
22
- import pandas as pd
23
-
24
  def process_data(task_name, model_name, pooling_method, input_text=None, file=None):
25
  output = ""
26
  dataframe_output = pd.DataFrame()
27
- file_output = pd.DataFrame()
28
 
29
  # 情况 1: file 和 input_text 都为 None
30
  if file is None and (input_text is None or input_text.strip() == ""):
@@ -36,6 +23,8 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
36
  # 检查文件类型
37
  if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
38
  output += " File format must be xlsx or csv."
 
 
39
  else:
40
  # 读取文件
41
  if file.name.endswith('.csv'):
@@ -55,6 +44,8 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
55
  # 检查文件类型
56
  if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
57
  output = "File format must be xlsx or csv."
 
 
58
  else:
59
  # 读取文件
60
  if file.name.endswith('.csv'):
@@ -72,24 +63,27 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
72
 
73
  # 情况 4: 只有 input_text
74
  elif input_text is not None:
75
- lines = input_text.strip().split("\n")
76
- rows = []
77
- for line in lines:
78
- try:
79
- split_line = line.split(",", maxsplit=1)
80
- if len(split_line) == 2:
81
- rows.append(split_line)
82
- except Exception as e:
83
- output = f"Error processing line: {line}"
84
- break
85
-
86
- if output == "":
87
- if rows[0] == ['prompt', 'response']:
88
- dataframe_output = pd.DataFrame(rows[1:], columns=['prompt', 'response'])
89
- else:
90
- dataframe_output = pd.DataFrame(rows, columns=['prompt', 'response'])
91
- file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
92
- output = f"Processed {len(dataframe_output)} rows of text using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
 
 
 
93
 
94
  return output, dataframe_output, file_output
95
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
+ from utils import save_dataframe_to_file
 
 
 
 
 
 
 
 
 
4
 
5
  with open("./description.md", "r", encoding="utf-8") as file:
6
  description_text = file.read()
 
8
  with open("./input_demo.txt", "r", encoding="utf-8") as file:
9
  demo = file.read()
10
 
 
 
 
11
  def process_data(task_name, model_name, pooling_method, input_text=None, file=None):
12
  output = ""
13
  dataframe_output = pd.DataFrame()
14
+ file_output = None
15
 
16
  # 情况 1: file 和 input_text 都为 None
17
  if file is None and (input_text is None or input_text.strip() == ""):
 
23
  # 检查文件类型
24
  if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
25
  output += " File format must be xlsx or csv."
26
+ elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
27
+ output += " One-phase Fine-tuned BERT model does not support Appropriateness task."
28
  else:
29
  # 读取文件
30
  if file.name.endswith('.csv'):
 
44
  # 检查文件类型
45
  if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
46
  output = "File format must be xlsx or csv."
47
+ elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
48
+ output += " One-phase Fine-tuned BERT model does not support Appropriateness task."
49
  else:
50
  # 读取文件
51
  if file.name.endswith('.csv'):
 
63
 
64
  # 情况 4: 只有 input_text
65
  elif input_text is not None:
66
+ if task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
67
+ output = " One-phase Fine-tuned BERT model does not support Appropriateness task."
68
+ else:
69
+ lines = input_text.strip().split("\n")
70
+ rows = []
71
+ for line in lines:
72
+ try:
73
+ split_line = line.split(",", maxsplit=1)
74
+ if len(split_line) == 2:
75
+ rows.append(split_line)
76
+ except Exception as e:
77
+ output = f"Error processing line: {line}"
78
+ break
79
+
80
+ if output == "":
81
+ if rows[0] == ['prompt', 'response']:
82
+ dataframe_output = pd.DataFrame(rows[1:], columns=['prompt', 'response'])
83
+ else:
84
+ dataframe_output = pd.DataFrame(rows, columns=['prompt', 'response'])
85
+ file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
86
+ output = f"Processed {len(dataframe_output)} rows of text using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
87
 
88
  return output, dataframe_output, file_output
89
 
model.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+
4
+ class BERTregressor(nn.Module):
5
+ def __init__(self, bert, hidden_size=768, num_linear=1, dropout=0.1,
6
+ o_type='cls', t_type= 'C', use_sigmoid=False):
7
+
8
+ super(BERTregressor, self).__init__()
9
+ self.encoder = bert
10
+ self.o_type = o_type
11
+ self.t_type = t_type
12
+ self.sigmoid = use_sigmoid
13
+
14
+ if num_linear == 2:
15
+ layers = [nn.Linear(hidden_size, 128),
16
+ nn.ReLU(),
17
+ nn.Dropout(dropout),
18
+ nn.Linear(128, 1)]
19
+ elif num_linear == 1:
20
+ layers = [nn.Dropout(dropout),
21
+ nn.Linear(hidden_size, 1)]
22
+
23
+ if use_sigmoid:
24
+ layers.append(nn.Sigmoid())
25
+
26
+ self.output = nn.Sequential(*layers)
27
+
28
+ def forward(self, inputs, return_attention=False):
29
+
30
+ X = {'input_ids':inputs['input_ids'],
31
+ 'token_type_ids':inputs['token_type_ids'],
32
+ 'attention_mask':inputs['attention_mask'],
33
+ 'output_attentions':return_attention}
34
+ encoded_X = self.encoder(**X)
35
+ if self.o_type == 'cls':
36
+ output = self.output(encoded_X.last_hidden_state[:, 0, :])
37
+ elif self.o_type == 'pooler':
38
+ output = self.output(encoded_X.pooler_output)
39
+
40
+ output = 4 * output.squeeze(-1) + 1 if self.sigmoid and self.t_type == 'C' else output.squeeze(-1)
41
+
42
+ return output if not return_attention else (output, encoded_X.attentions)
43
+
44
+ class Effectiveness(nn.Module):
45
+ def __init__(self, num_layers, hidden_size=768, use_sigmoid=True, dropout=0.2, **kwargs):
46
+ super(Effectiveness, self).__init__(**kwargs)
47
+ self.sigmoid = use_sigmoid
48
+
49
+ if num_layers == 2:
50
+ layers = [
51
+ nn.Linear(hidden_size, 128),
52
+ nn.ReLU(),
53
+ nn.Dropout(dropout),
54
+ nn.Linear(128, 1)
55
+ ]
56
+ else:
57
+ layers = [
58
+ nn.ReLU(),
59
+ nn.Dropout(dropout),
60
+ nn.Linear(hidden_size, 1)
61
+ ]
62
+
63
+ if use_sigmoid:
64
+ layers.append(nn.Sigmoid()) # 仅在需要时添加 Sigmoid 层
65
+
66
+ self.output = nn.Sequential(*layers)
67
+
68
+ def forward(self, X):
69
+ output = self.output(X)
70
+
71
+ # 如果使用 Sigmoid 层,调整输出范围到 [1, 5]
72
+ if self.sigmoid:
73
+ return 4 * output.squeeze(-1) + 1
74
+ else:
75
+ return output.squeeze(-1)
76
+
77
+ class Creativity(nn.Module):
78
+ """BERT的下一句预测任务"""
79
+ def __init__(self, num_layers, hidden_size=768, use_sigmoid=True, dropout=0.2, **kwargs):
80
+ super(Creativity, self).__init__(**kwargs)
81
+ self.sigmoid = use_sigmoid
82
+
83
+ if num_layers == 2:
84
+ layers = [
85
+ nn.Linear(hidden_size, 128),
86
+ nn.ReLU(),
87
+ nn.Dropout(dropout),
88
+ nn.Linear(128, 1)
89
+ ]
90
+ else:
91
+ layers = [
92
+ nn.ReLU(),
93
+ nn.Dropout(dropout),
94
+ nn.Linear(hidden_size, 1)
95
+ ]
96
+
97
+ if use_sigmoid:
98
+ layers.append(nn.Sigmoid()) # 仅在需要时添加 Sigmoid 层
99
+
100
+ self.output = nn.Sequential(*layers)
101
+
102
+ def forward(self, X):
103
+ output = self.output(X)
104
+
105
+ # 如果使用 Sigmoid 层,调整输出范围到 [1, 5]
106
+ if self.sigmoid:
107
+ return 4 * output.squeeze(-1) + 1
108
+ else:
109
+ return output.squeeze(-1)
110
+
111
+ class BERT2Phase(nn.Module):
112
+ def __init__(self, bert, hidden_size=768, type='cls',
113
+ num_linear=1, dropout=0.1, use_sigmoid=False):
114
+
115
+ super(BERT2Phase, self).__init__()
116
+ self.encoder = bert
117
+ self.type = type
118
+ self.sigmoid = use_sigmoid
119
+
120
+ self.effectiveness = Effectiveness(num_linear, hidden_size, use_sigmoid, dropout)
121
+ self.creativity = Creativity(num_linear, hidden_size, use_sigmoid, dropout)
122
+
123
+ def forward(self, inputs, return_attention=False):
124
+ X = {'input_ids':inputs['input_ids'],
125
+ 'token_type_ids':inputs['token_type_ids'],
126
+ 'attention_mask':inputs['attention_mask'],
127
+ 'output_attentions':return_attention}
128
+ encoded_X = self.encoder(**X)
129
+
130
+ if self.type == 'cls':
131
+ e_pred = self.effectiveness(encoded_X.last_hidden_state[:, 0, :])
132
+ c_pred = self.creativity(encoded_X.last_hidden_state[:, 0, :])
133
+ elif self.type == 'pooler':
134
+ e_pred = self.effectiveness(encoded_X.pooler_output)
135
+ c_pred = self.creativity(encoded_X.pooler_output)
136
+
137
+ return (c_pred, e_pred) if not return_attention else (c_pred, e_pred, encoded_X.attentions)
model/OnePhase_BERT_cls.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ae3966f5420a322992c46fb2c69325cf034152015c0e447f35d1ee273e08366
3
- size 442557021
 
 
 
 
model/TwoPhase_BERT_cls.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:632ec5b262616d688fd9e655699d4e921f4d1e26f77465a86da0755ee215e260
3
- size 442561133
 
 
 
 
utils.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ def save_dataframe_to_file(dataframe, file_format="csv"):
5
+ temp_dir = tempfile.gettempdir() # 获取系统临时目录
6
+ file_path = os.path.join(temp_dir, f"output.{file_format}")
7
+ if file_format == "csv":
8
+ dataframe.to_csv(file_path, index=False)
9
+ elif file_format == "xlsx":
10
+ dataframe.to_excel(file_path, index=False)
11
+ return file_path