Spaces:

Short-Answer-Feedback
/

Leaderboard

Sleeping

App Files Files Community

MCK-02 commited on Jan 26, 2023

Commit

52963c7

1 Parent(s): f12d18c

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -29

app.py CHANGED Viewed

@@ -64,16 +64,16 @@ bertscore = load_metric('bertscore')
 MAX_INPUT_LENGTH = 256
 MAX_TARGET_LENGTH = 128
-"""
 def preprocess_function(examples):
     Preprocess entries of the given dataset
     Params:
         examples (Dataset): dataset to be preprocessed
     Returns:
         model_inputs (BatchEncoding): tokenized dataset entries
     inputs, targets = [], []
     for i in range(len(examples['question'])):
         inputs.append(f"Antwort: {examples['provided_answer'][i]} Lösung: {examples['reference_answer'][i]} Frage: {examples['question'][i]}")
@@ -86,7 +86,7 @@ def preprocess_function(examples):
     model_inputs['labels'] = labels['input_ids']
     return model_inputs
-"""
 def flatten_list(l):
@@ -190,9 +190,6 @@ def get_predictions_labels(model, dataloader, tokenizer):
     return predictions, labels
 def load_data():
     df = pd.DataFrame(columns=['Model', 'Dataset', 'SacreBLEU', 'ROUGE-2', 'METEOR', 'BERTScore', 'Accuracy', 'Weighted F1', 'Macro F1'])
     for ds in all_datasets:
@@ -200,28 +197,6 @@ def load_data():
         model = AutoModelForSeq2SeqLM.from_pretrained(get_model(ds))
         tokenizer = AutoTokenizer.from_pretrained(get_tokenizer(ds))
-        def preprocess_function(examples):
-            """
-            Preprocess entries of the given dataset
-            Params:
-            examples (Dataset): dataset to be preprocessed
-            Returns:
-            model_inputs (BatchEncoding): tokenized dataset entries
-            """
-            inputs, targets = [], []
-            for i in range(len(examples['question'])):
-                inputs.append(f"Antwort: {examples['provided_answer'][i]} Lösung: {examples['reference_answer'][i]} Frage: {examples['question'][i]}")
-                targets.append(f"{examples['verification_feedback'][i]} Feedback: {examples['answer_feedback'][i]}")
-            # apply tokenization to inputs and labels
-            model_inputs = tokenizer(inputs, max_length=MAX_INPUT_LENGTH, padding='max_length', truncation=True)
-            labels = tokenizer(text_target=targets, max_length=MAX_TARGET_LENGTH, padding='max_length', truncation=True)
-            model_inputs['labels'] = labels['input_ids']
-            return model_inputs
         processed_dataset = split.map(
             preprocess_function,
             batched=True,

 MAX_INPUT_LENGTH = 256
 MAX_TARGET_LENGTH = 128
 def preprocess_function(examples):
+    """
     Preprocess entries of the given dataset
     Params:
         examples (Dataset): dataset to be preprocessed
     Returns:
         model_inputs (BatchEncoding): tokenized dataset entries
+    """
     inputs, targets = [], []
     for i in range(len(examples['question'])):
         inputs.append(f"Antwort: {examples['provided_answer'][i]} Lösung: {examples['reference_answer'][i]} Frage: {examples['question'][i]}")
     model_inputs['labels'] = labels['input_ids']
     return model_inputs
 def flatten_list(l):
     return predictions, labels
 def load_data():
     df = pd.DataFrame(columns=['Model', 'Dataset', 'SacreBLEU', 'ROUGE-2', 'METEOR', 'BERTScore', 'Accuracy', 'Weighted F1', 'Macro F1'])
     for ds in all_datasets:
         model = AutoModelForSeq2SeqLM.from_pretrained(get_model(ds))
         tokenizer = AutoTokenizer.from_pretrained(get_tokenizer(ds))
         processed_dataset = split.map(
             preprocess_function,
             batched=True,