File size: 1,890 Bytes
49079cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import re
def answer_cleansing_zero_shot(dataset, pred, must_choice=False):
pred = pred.strip()
if dataset in ("commonsense-mc"):
pred = re.findall(r'A|B|C|D|E', pred)
elif dataset in ("arithmetic"):
if must_choice:
pred = re.findall(r'A|B|C|D', pred)
else:
pred = pred.replace(",", "")
pred = [s for s in re.findall(r'-?\d+\.?\d*', pred)]
elif dataset in ("commonsense-verify", "symbolic-coin"):
pred = pred.lower()
pred = re.sub("\"|\'|\n|\.|\s|\:|\,", " ", pred)
pred = pred.split(" ")
pred = [i for i in pred if i in ("yes", "no")]
elif dataset == "symbolic-letter":
pred = re.sub("\"|\'|\n|\.|\s", "", pred)
pred = [pred]
else:
raise ValueError("dataset is not properly defined ...")
# If there is no candidate in list, null is set.
if len(pred) == 0:
pred = ""
else:
# choose the first element in list ...
pred = pred[0]
# (For arithmetic tasks) if a word ends with period, it will be omitted ...
if pred != "":
if pred[-1] == ".":
pred = pred[:-1]
return pred
def type_cleasing(type):
type = re.findall(r'arithmetic|commonsense-mc|commonsense-verify|symbolic-coin|symbolic-letter', type)
if len(type) == 0:
type = "UNDEFINED"
else:
type = type[0]
return type
def entity_cleansing(ent):
ent = re.sub("\n|\s*-\s*|\.", ",", ent)
ent = ent.split(",")
ent = [e.strip() for e in ent if e != ""]
return ent
def knowledge_cleansing(knowledge):
#print("Knowledge Before: " + knowledge)
knowledge = knowledge.strip()
if knowledge.startswith("No, "):
knowledge = re.sub("No, ", "", knowledge)
knowledge = re.sub("\s"," ", knowledge)
#print("Knowledge After: " + knowledge)
return knowledge
|