File size: 1,492 Bytes
246d201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# This file was used to create the hugging face dataset from the exercism/python
# github repo.
# Refer to: https://github.com/exercism/python/tree/main/exercises/practice

import os
from pathlib import Path

from datasets import Dataset

tests = sorted(os.listdir('practice/'))
dataset = {
    'instance_id': [],
    'instance_name': [],
    'instruction': [],
    'signature': [],
    'test': [],
}

for i, test in enumerate(tests):
    testdir = Path(f'practice/{test}/')

    dataset['instance_id'].append(i)
    dataset['instance_name'].append(testdir.name.replace('-', '_'))

    # if len(glob.glob(f'practice/{testdir.name}/*.py')) != 2:
    #     print(testdir.name)

    instructions = ''
    introduction = testdir / '.docs/introduction.md'
    if introduction.exists():
        instructions += introduction.read_text()
    instructions += (testdir / '.docs/instructions.md').read_text()
    instructions_append = testdir / '.docs/instructions.append.md'
    if instructions_append.exists():
        instructions += instructions_append.read_text()

    dataset['instruction'].append(instructions)

    signature_file = testdir / (testdir.name + '.py').replace('-', '_')
    dataset['signature'].append(signature_file.read_text())

    test_file = testdir / (testdir.name + '_test.py').replace('-', '_')
    dataset['test'].append(test_file.read_text())

ds = Dataset.from_dict(dataset)

ds.push_to_hub('RajMaheshwari/Exercism-Python')