File size: 1,918 Bytes
477fa2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""
    Usage: python scripts/task_identification_generator.py
"""

import json
import random
random.seed(42)

SYMBOLS = list("abcdefghijklmnopqrstuvwxyz")
DELIMETER = "->"
INSTRUCTION = "Figure out the pattern in the below examples, and then answer with just \"foo\" or \"bar\"."
TASK_NAME = "pattern_identification"


def generate_example():
    num_symbols = int(len(SYMBOLS) / 2)
    target_symbol = random.choice(SYMBOLS)
    symbol_list = random.sample(SYMBOLS, num_symbols)
    target = "foo" if target_symbol in symbol_list else "bar"
    return (target_symbol, symbol_list, target)


def generate_exemplars_str(num_exemplars: int = 8):
    exemplars = [generate_example() for _ in range(num_exemplars)]
    exemplars_str = [f"({exemplar[0]}, {exemplar[1]}) {DELIMETER} {exemplar[2]}".replace("'", "") for exemplar in exemplars]
    return "\n".join([INSTRUCTION] + exemplars_str)


def generate_eval_examples(num_eval_examples: int = 250):
    eval_examples = [generate_example() for _ in range(num_eval_examples)]
    eval_examples_str = [f"{generate_exemplars_str()}\n({example[0]}, {example[1]}) {DELIMETER}".replace("'", "") for example in eval_examples]
    targets = [example[2] for example in eval_examples]
    return eval_examples_str, targets


if __name__ == "__main__":
    eval_examples_str, targets = generate_eval_examples()
    output_path = f"evals/registry/data/{TASK_NAME}/samples.v0.jsonl"
    with open(output_path, "w") as writer:
        for eval_example_str, target in zip(eval_examples_str, targets):
            d = {
                "input": [
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": eval_example_str},
                    ],
                "ideal": target,
            }
            writer.write(json.dumps(d) + "\n")
    print(f"{len(eval_examples_str)} lines written to {output_path}.")