Siyunb323 commited on
Commit
e1af10a
·
1 Parent(s): 06ae167
__pycache__/model.cpython-39.pyc ADDED
Binary file (4.79 kB). View file
 
__pycache__/utils.cpython-39.pyc CHANGED
Binary files a/__pycache__/utils.cpython-39.pyc and b/__pycache__/utils.cpython-39.pyc differ
 
app.py CHANGED
@@ -1,6 +1,8 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
- from utils import save_dataframe_to_file
 
4
 
5
  with open("./description.md", "r", encoding="utf-8") as file:
6
  description_text = file.read()
@@ -27,17 +29,26 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
27
  output += " One-phase Fine-tuned BERT model does not support Appropriateness task."
28
  else:
29
  # 读取文件
30
- if file.name.endswith('.csv'):
31
- df = pd.read_csv(file)
32
- else:
33
- df = pd.read_excel(file)
34
  # 检查第一行是否为 "prompt" 和 "response"
35
  if list(df.columns) == ['prompt', 'response']:
36
  dataframe_output = df
37
  else:
38
  df_values = [list(df.columns)] + df.values.tolist()
39
  dataframe_output = pd.DataFrame(df_values, columns=['prompt', 'response'])
 
 
 
 
 
 
 
 
 
 
 
40
  file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
 
41
 
42
  # 情况 3: 只有 file
43
  elif file is not None:
@@ -45,26 +56,35 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
45
  if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
46
  output = "File format must be xlsx or csv."
47
  elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
48
- output += " One-phase Fine-tuned BERT model does not support Appropriateness task."
49
  else:
50
  # 读取文件
51
- if file.name.endswith('.csv'):
52
- df = pd.read_csv(file)
53
- else:
54
- df = pd.read_excel(file)
55
  # 检查第一行是否为 "prompt" 和 "response"
56
  if list(df.columns) == ['prompt', 'response']:
57
  dataframe_output = df
58
  else:
59
  df_values = [list(df.columns)] + df.values.tolist()
60
  dataframe_output = pd.DataFrame(df_values, columns=['prompt', 'response'])
 
 
 
 
 
 
 
 
 
 
 
61
  file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
62
  output = f"Processed {len(dataframe_output)} rows from uploaded file using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
63
 
64
  # 情况 4: 只有 input_text
65
  elif input_text is not None:
66
  if task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
67
- output = " One-phase Fine-tuned BERT model does not support Appropriateness task."
68
  else:
69
  lines = input_text.strip().split("\n")
70
  rows = []
@@ -78,10 +98,18 @@ def process_data(task_name, model_name, pooling_method, input_text=None, file=No
78
  break
79
 
80
  if output == "":
81
- if rows[0] == ['prompt', 'response']:
82
- dataframe_output = pd.DataFrame(rows[1:], columns=['prompt', 'response'])
83
- else:
84
- dataframe_output = pd.DataFrame(rows, columns=['prompt', 'response'])
 
 
 
 
 
 
 
 
85
  file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
86
  output = f"Processed {len(dataframe_output)} rows of text using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
87
 
 
1
+ import torch
2
  import gradio as gr
3
  import pandas as pd
4
+ from utils import save_dataframe_to_file, tokenize_Df
5
+ from model import load_model
6
 
7
  with open("./description.md", "r", encoding="utf-8") as file:
8
  description_text = file.read()
 
29
  output += " One-phase Fine-tuned BERT model does not support Appropriateness task."
30
  else:
31
  # 读取文件
32
+ df = pd.read_csv(file) if file.name.endswith('.csv') else pd.read_excel(file)
 
 
 
33
  # 检查第一行是否为 "prompt" 和 "response"
34
  if list(df.columns) == ['prompt', 'response']:
35
  dataframe_output = df
36
  else:
37
  df_values = [list(df.columns)] + df.values.tolist()
38
  dataframe_output = pd.DataFrame(df_values, columns=['prompt', 'response'])
39
+
40
+ # model 运行
41
+ loaded_net = load_model(model_name, pooling_method)
42
+ example = tokenize_Df(dataframe_output)
43
+ with torch.no_grad():
44
+ score = loaded_net(example)
45
+
46
+ if model_name == "One-phase Fine-tuned BERT":
47
+ dataframe_output['evaluation'] = score.numpy()
48
+ else:
49
+ dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy()
50
  file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
51
+ output += f" Processed {len(dataframe_output)} rows from uploaded file using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
52
 
53
  # 情况 3: 只有 file
54
  elif file is not None:
 
56
  if not (file.name.endswith('.csv') or file.name.endswith('.xlsx')):
57
  output = "File format must be xlsx or csv."
58
  elif task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
59
+ output = " One-phase Fine-tuned BERT model does not support Appropriateness task."
60
  else:
61
  # 读取文件
62
+ df = pd.read_csv(file) if file.name.endswith('.csv') else pd.read_excel(file)
63
+
 
 
64
  # 检查第一行是否为 "prompt" 和 "response"
65
  if list(df.columns) == ['prompt', 'response']:
66
  dataframe_output = df
67
  else:
68
  df_values = [list(df.columns)] + df.values.tolist()
69
  dataframe_output = pd.DataFrame(df_values, columns=['prompt', 'response'])
70
+
71
+ # model 运行
72
+ loaded_net = load_model(model_name, pooling_method)
73
+ example = tokenize_Df(dataframe_output)
74
+ with torch.no_grad():
75
+ score = loaded_net(example)
76
+
77
+ if model_name == "One-phase Fine-tuned BERT":
78
+ dataframe_output['evaluation'] = score.numpy()
79
+ else:
80
+ dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy()
81
  file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
82
  output = f"Processed {len(dataframe_output)} rows from uploaded file using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
83
 
84
  # 情况 4: 只有 input_text
85
  elif input_text is not None:
86
  if task_name == "Appropriateness" and model_name == "One-phase Fine-tuned BERT":
87
+ output = "One-phase Fine-tuned BERT model does not support Appropriateness task."
88
  else:
89
  lines = input_text.strip().split("\n")
90
  rows = []
 
98
  break
99
 
100
  if output == "":
101
+ dataframe_output = pd.DataFrame(rows[1:], columns=['prompt', 'response']) if rows[0] == ['prompt', 'response'] else pd.DataFrame(rows, columns=['prompt', 'response'])
102
+
103
+ # model 运行
104
+ loaded_net = load_model(model_name, pooling_method)
105
+ example = tokenize_Df(dataframe_output)
106
+ with torch.no_grad():
107
+ score = loaded_net(example)
108
+
109
+ if model_name == "One-phase Fine-tuned BERT":
110
+ dataframe_output['evaluation'] = score.numpy()
111
+ else:
112
+ dataframe_output['evaluation'] = score[0].numpy() if task_name=='Creativity' else score[1].numpy()
113
  file_output = save_dataframe_to_file(dataframe_output, file_format="csv")
114
  output = f"Processed {len(dataframe_output)} rows of text using task: {task_name}, model: {model_name}, pooling: {pooling_method}."
115
 
model.py CHANGED
@@ -1,5 +1,12 @@
 
1
  import torch
2
  from torch import nn
 
 
 
 
 
 
3
 
4
  class BERTregressor(nn.Module):
5
  def __init__(self, bert, hidden_size=768, num_linear=1, dropout=0.1,
@@ -134,4 +141,17 @@ class BERT2Phase(nn.Module):
134
  e_pred = self.effectiveness(encoded_X.pooler_output)
135
  c_pred = self.creativity(encoded_X.pooler_output)
136
 
137
- return (c_pred, e_pred) if not return_attention else (c_pred, e_pred, encoded_X.attentions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import torch
3
  from torch import nn
4
+ from transformers import AutoModel
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ token=os.getenv("HF_TOKEN")
8
+ repo_id = "Siyunb323/CreativityEvaluation"
9
+ model = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
10
 
11
  class BERTregressor(nn.Module):
12
  def __init__(self, bert, hidden_size=768, num_linear=1, dropout=0.1,
 
141
  e_pred = self.effectiveness(encoded_X.pooler_output)
142
  c_pred = self.creativity(encoded_X.pooler_output)
143
 
144
+ return (c_pred, e_pred) if not return_attention else (c_pred, e_pred, encoded_X.attentions)
145
+
146
+ def load_model(model_name, pooling_method):
147
+ pooling = pooling_method if pooling_method == 'cls' else 'pooler'
148
+ if model_name == "One-phase Fine-tuned BERT":
149
+ loaded_net = BERTregressor(model, hidden_size=768, num_linear=1, dropout=0.1, o_type=pooling, t_type='C', use_sigmoid=True)
150
+ filename = 'model' + f"/OnePhase_BERT_{pooling_method}.pth"
151
+ elif model_name == "Two-phase Fine-tuned BERT":
152
+ loaded_net = BERT2Phase(model, hidden_size=768, num_linear=1, dropout=0.1, type=pooling, use_sigmoid=True)
153
+ filename = 'model' + f"/TwoPhase_BERT_{pooling_method}.pth"
154
+ model_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
155
+ loaded_net.load_state_dict(torch.load(model_path))
156
+ loaded_net.eval()
157
+ return loaded_net
utils.py CHANGED
@@ -1,5 +1,9 @@
1
  import os
2
  import tempfile
 
 
 
 
3
 
4
  def save_dataframe_to_file(dataframe, file_format="csv"):
5
  temp_dir = tempfile.gettempdir() # 获取系统临时目录
@@ -8,4 +12,11 @@ def save_dataframe_to_file(dataframe, file_format="csv"):
8
  dataframe.to_csv(file_path, index=False)
9
  elif file_format == "xlsx":
10
  dataframe.to_excel(file_path, index=False)
11
- return file_path
 
 
 
 
 
 
 
 
1
  import os
2
  import tempfile
3
+ import fugashi
4
+ import unidic_lite
5
+ from transformers import AutoTokenizer
6
+ tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
7
 
8
  def save_dataframe_to_file(dataframe, file_format="csv"):
9
  temp_dir = tempfile.gettempdir() # 获取系统临时目录
 
12
  dataframe.to_csv(file_path, index=False)
13
  elif file_format == "xlsx":
14
  dataframe.to_excel(file_path, index=False)
15
+ return file_path
16
+
17
+ def tokenize_Df(examples):
18
+ return tokenizer(list(examples['prompt']), list(examples['response']),
19
+ return_tensors="pt",
20
+ padding='max_length',
21
+ max_length=60,
22
+ truncation='longest_first')