MCK-02 commited on
Commit
52963c7
·
1 Parent(s): f12d18c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -29
app.py CHANGED
@@ -64,16 +64,16 @@ bertscore = load_metric('bertscore')
64
  MAX_INPUT_LENGTH = 256
65
  MAX_TARGET_LENGTH = 128
66
 
67
- """
68
  def preprocess_function(examples):
69
-
70
  Preprocess entries of the given dataset
71
 
72
  Params:
73
  examples (Dataset): dataset to be preprocessed
74
  Returns:
75
  model_inputs (BatchEncoding): tokenized dataset entries
76
-
77
  inputs, targets = [], []
78
  for i in range(len(examples['question'])):
79
  inputs.append(f"Antwort: {examples['provided_answer'][i]} Lösung: {examples['reference_answer'][i]} Frage: {examples['question'][i]}")
@@ -86,7 +86,7 @@ def preprocess_function(examples):
86
  model_inputs['labels'] = labels['input_ids']
87
 
88
  return model_inputs
89
- """
90
 
91
 
92
  def flatten_list(l):
@@ -190,9 +190,6 @@ def get_predictions_labels(model, dataloader, tokenizer):
190
  return predictions, labels
191
 
192
 
193
-
194
-
195
-
196
  def load_data():
197
  df = pd.DataFrame(columns=['Model', 'Dataset', 'SacreBLEU', 'ROUGE-2', 'METEOR', 'BERTScore', 'Accuracy', 'Weighted F1', 'Macro F1'])
198
  for ds in all_datasets:
@@ -200,28 +197,6 @@ def load_data():
200
  model = AutoModelForSeq2SeqLM.from_pretrained(get_model(ds))
201
  tokenizer = AutoTokenizer.from_pretrained(get_tokenizer(ds))
202
 
203
- def preprocess_function(examples):
204
- """
205
- Preprocess entries of the given dataset
206
-
207
- Params:
208
- examples (Dataset): dataset to be preprocessed
209
- Returns:
210
- model_inputs (BatchEncoding): tokenized dataset entries
211
- """
212
- inputs, targets = [], []
213
- for i in range(len(examples['question'])):
214
- inputs.append(f"Antwort: {examples['provided_answer'][i]} Lösung: {examples['reference_answer'][i]} Frage: {examples['question'][i]}")
215
- targets.append(f"{examples['verification_feedback'][i]} Feedback: {examples['answer_feedback'][i]}")
216
-
217
- # apply tokenization to inputs and labels
218
- model_inputs = tokenizer(inputs, max_length=MAX_INPUT_LENGTH, padding='max_length', truncation=True)
219
- labels = tokenizer(text_target=targets, max_length=MAX_TARGET_LENGTH, padding='max_length', truncation=True)
220
-
221
- model_inputs['labels'] = labels['input_ids']
222
-
223
- return model_inputs
224
-
225
  processed_dataset = split.map(
226
  preprocess_function,
227
  batched=True,
 
64
  MAX_INPUT_LENGTH = 256
65
  MAX_TARGET_LENGTH = 128
66
 
67
+
68
  def preprocess_function(examples):
69
+ """
70
  Preprocess entries of the given dataset
71
 
72
  Params:
73
  examples (Dataset): dataset to be preprocessed
74
  Returns:
75
  model_inputs (BatchEncoding): tokenized dataset entries
76
+ """
77
  inputs, targets = [], []
78
  for i in range(len(examples['question'])):
79
  inputs.append(f"Antwort: {examples['provided_answer'][i]} Lösung: {examples['reference_answer'][i]} Frage: {examples['question'][i]}")
 
86
  model_inputs['labels'] = labels['input_ids']
87
 
88
  return model_inputs
89
+
90
 
91
 
92
  def flatten_list(l):
 
190
  return predictions, labels
191
 
192
 
 
 
 
193
  def load_data():
194
  df = pd.DataFrame(columns=['Model', 'Dataset', 'SacreBLEU', 'ROUGE-2', 'METEOR', 'BERTScore', 'Accuracy', 'Weighted F1', 'Macro F1'])
195
  for ds in all_datasets:
 
197
  model = AutoModelForSeq2SeqLM.from_pretrained(get_model(ds))
198
  tokenizer = AutoTokenizer.from_pretrained(get_tokenizer(ds))
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  processed_dataset = split.map(
201
  preprocess_function,
202
  batched=True,