File size: 2,781 Bytes
bae4131
6454c0e
 
bae4131
 
3d76e98
bae4131
 
 
 
 
 
 
6454c0e
 
5289522
bae4131
 
3d76e98
bae4131
 
 
 
 
 
 
 
 
 
 
3d76e98
 
 
 
 
 
6454c0e
 
 
cf69537
 
bae4131
6454c0e
 
cf69537
bae4131
6454c0e
3d76e98
6454c0e
 
 
 
 
 
bae4131
6454c0e
bae4131
6454c0e
 
 
bae4131
6454c0e
3d76e98
6454c0e
3d76e98
 
bae4131
3d76e98
 
bae4131
6454c0e
 
3d76e98
6454c0e
 
bae4131
 
 
6454c0e
bae4131
 
6454c0e
 
 
1e75b2e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import yaml
from yourbench_space.utils import CONFIG_PATH

def generate_base_config(
    hf_org,
    hf_dataset_name,
    model_name,
    provider,
    base_url,
    model_api_key,
    max_concurrent_requests,
    private_dataset,
):
    config = {
        "hf_configuration": {
            "token": "$HF_TOKEN",
            "private": private_dataset,
            "hf_organization": hf_org,
            "hf_dataset_name": hf_dataset_name,
        },
        "model_list": [
            {
                "model_name": model_name,
                "provider": provider,
                "base_url": base_url,
                "api_key": "$MODEL_API_KEY",
                "max_concurrent_requests": max_concurrent_requests,
            }
        ],
        "model_roles": {
            "ingestion": [model_name],
            "summarization": [model_name],
            "single_shot_question_generation": [model_name],
            "multi_hop_question_generation": [model_name],
            "answer_generation": [model_name],
            "judge_answers": [model_name],
        },
        "pipeline": {
            "ingestion": {
                "source_documents_dir": "/app/uploaded_files",
                "output_dir": "/app/ingested",
                "run": True,
            },
            "upload_ingest_to_hub": {
                "source_documents_dir": "/app/ingested",
                "run": True,
            },
            "summarization": {"run": True},
            "chunking": {
                "chunking_configuration": {
                    "l_min_tokens": 64,
                    "l_max_tokens": 128,
                    "tau_threshold": 0.3,
                    "h_min": 2,
                    "h_max": 4,
                },
                "run": True,
            },
            "single_shot_question_generation": {
                "diversification_seed": "24 year old adult",
                "run": True,
            },
            "multi_hop_question_generation": {"run": True},
            "answer_generation": {
                "question_type": "single_shot",
                "run": True,
                "strategies": [
                    {"name": "zeroshot", "prompt": "ZEROSHOT_QA_USER_PROMPT", "model_name": model_name},
                    {"name": "gold", "prompt": "GOLD_QA_USER_PROMPT", "model_name": model_name},
                ],
            },
            "judge_answers": {
                "run": True,
                "comparing_strategies": [["zeroshot", "gold"]],
                "chunk_column_index": 0,
                "random_seed": 42,
            },
        },
    }
    return yaml.dump(config, sort_keys=False)

def save_config(yaml_text):
    with open(CONFIG_PATH, "w") as file:
        file.write(yaml_text)
    return "✅ Config saved!"