Kqte commited on
Commit
64407ae
·
verified ·
1 Parent(s): 1da62dd

Upload 12 files

Browse files
Nebula-baseline/NeBuLa-org_adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/tmpdir/chaturve/llama-3-8b-hf/",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 64,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "q_proj",
23
+ "v_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
Nebula-baseline/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff3d5d873f8094e23565ca85b675aca9b86457c03bafb339967ffd49eabe50c0
3
+ size 109069176
Nebula-synth/NeBuLa-synth_adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/tmpdir/chaturve/llama-3-8b-hf/",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 64,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "q_proj",
23
+ "v_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
Nebula-synth/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ead40404c9439f63060938a58d0f406a56977775099a6723af2e41e36247dd8e
3
+ size 109069176
README.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: other
3
+ tags:
4
+ - minecraft
5
+ - action prediction
6
+ language:
7
+ - en
8
+ ---
9
+
10
+ # Nebula: A Minecraft Neural Builder
11
+
12
+ Nebula is an LLM (Llama3-8B) finetuned on the minecraft action prediction task (https://aclanthology.org/2020.acl-main.232/).
13
+
14
+ ## Model Details
15
+ We provide two variants of the model: Nebula-baseline, which was trained on minecraft data set; and Nebula-synth, which was trained on level-1
16
+ and level-2 synthetic data along with the minecraft data set. Both the models achieve similar net-action F1 score on minecraft test set.
17
+
18
+ ### Model Description
19
+
20
+ - **Language(s) (NLP):** English
21
+ - **Finetuned from model:** Llama3-8B
22
+
23
+ ### Model Sources
24
+
25
+ **Paper:** https://arxiv.org/abs/2406.18164
data/data_actseq-test-new.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/data_actseq-train-new.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/data_actseq-val-new.csv ADDED
The diff for this file is too large to render. See raw diff
 
merge.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import json
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
+ from datasets import load_dataset
5
+ from peft import LoraConfig, PeftModel
6
+
7
+ device_map = "auto"
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ "/path/to/meta-llama3-8b",
10
+ #low_cpu_mem_usage=True,
11
+ return_dict=True,
12
+ torch_dtype=torch.float16,
13
+ device_map=device_map,
14
+ )
15
+
16
+ model = PeftModel.from_pretrained(model, "/path/to/llama3-8b-adapter", device_map=device_map)
17
+ model = model.merge_and_unload()
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained("/path/to/meta-llama3-8b", trust_remote_code=True)
20
+ tokenizer.pad_token_id = tokenizer.eos_token_id + 1
21
+ model.config.pad_token_id = tokenizer.pad_token_id
22
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=4096, do_sample=False)
23
+ print("Padding side:",tokenizer.padding_side)
24
+ val_dataset = load_dataset("csv", data_files={'val':'/path/to/actseq-val-new.csv'})["val"]
25
+ test_dataset = load_dataset("csv", data_files={'test':'/path/to/actseq-test-new.csv'})["test"]
26
+
27
+
28
+ def formatting_prompts_func(example):
29
+ output_texts = []
30
+ for i in range(len(example['dial_with_actions'])):
31
+ text = f"<|begin_of_text|>Predict the action sequence (AS) for the Minecraft excerpt:\n {example['dial_with_actions'][i]}\n ### AS:"
32
+ output_texts.append(text)
33
+ return output_texts
34
+
35
+
36
+ val_texts = formatting_prompts_func(val_dataset)
37
+ test_texts = formatting_prompts_func(test_dataset)
38
+
39
+ print("Val Length:", len(val_texts))
40
+ print("Test Length:", len(test_texts))
41
+
42
+ f = open("/path/to/val-output-file","w")
43
+
44
+ for text in val_texts:
45
+ print(text)
46
+ print(pipe(text)[0]["generated_text"], file=f)
47
+
48
+ f.close()
49
+
50
+ f = open("/path/to/test-output-file","w")
51
+
52
+ for text in test_texts:
53
+ print(text)
54
+ print(pipe(text)[0]["generated_text"], file=f)
55
+
56
+ f.close()
requirements.txt ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.27.2
2
+ aiohttp==3.9.3
3
+ aiosignal==1.3.1
4
+ async-timeout==4.0.3
5
+ attrs==23.2.0
6
+ bitsandbytes==0.41.3
7
+ brotlipy==0.7.0
8
+ certifi==2020.6.20
9
+ cffi==1.14.3
10
+ chardet==3.0.4
11
+ conda-package-handling==1.7.2
12
+ cryptography==3.2.1
13
+ datasets==2.17.1
14
+ dill==0.3.8
15
+ docstring-parser==0.15
16
+ filelock==3.13.1
17
+ frozenlist==1.4.1
18
+ fsspec==2023.10.0
19
+ huggingface-hub==0.20.3
20
+ idna==2.10
21
+ Jinja2==3.1.3
22
+ markdown-it-py==3.0.0
23
+ MarkupSafe==2.1.5
24
+ mdurl==0.1.2
25
+ mpmath==1.3.0
26
+ multidict==6.0.5
27
+ multiprocess==0.70.16
28
+ networkx==3.1
29
+ numpy==1.24.4
30
+ nvidia-cublas-cu11==11.11.3.6
31
+ nvidia-cublas-cu12==12.1.3.1
32
+ nvidia-cuda-cupti-cu11==11.8.87
33
+ nvidia-cuda-cupti-cu12==12.1.105
34
+ nvidia-cuda-nvrtc-cu11==11.8.89
35
+ nvidia-cuda-nvrtc-cu12==12.1.105
36
+ nvidia-cuda-runtime-cu11==11.8.89
37
+ nvidia-cuda-runtime-cu12==12.1.105
38
+ nvidia-cudnn-cu11==8.7.0.84
39
+ nvidia-cudnn-cu12==8.9.2.26
40
+ nvidia-cufft-cu11==10.9.0.58
41
+ nvidia-cufft-cu12==11.0.2.54
42
+ nvidia-curand-cu11==10.3.0.86
43
+ nvidia-curand-cu12==10.3.2.106
44
+ nvidia-cusolver-cu11==11.4.1.48
45
+ nvidia-cusolver-cu12==11.4.5.107
46
+ nvidia-cusparse-cu11==11.7.5.86
47
+ nvidia-cusparse-cu12==12.1.0.106
48
+ nvidia-nccl-cu11==2.19.3
49
+ nvidia-nccl-cu12==2.19.3
50
+ nvidia-nvjitlink-cu12==12.3.101
51
+ nvidia-nvtx-cu11==11.8.86
52
+ nvidia-nvtx-cu12==12.1.105
53
+ packaging==23.2
54
+ pandas==2.0.3
55
+ peft==0.7.1
56
+ pillow==10.2.0
57
+ pip==20.2.4
58
+ psutil==5.9.8
59
+ pyarrow==15.0.0
60
+ pyarrow-hotfix==0.6
61
+ pycosat==0.6.3
62
+ pycparser==2.20
63
+ pygments==2.17.2
64
+ pyOpenSSL==19.1.0
65
+ PySocks==1.7.1
66
+ python-dateutil==2.8.2
67
+ pytz==2024.1
68
+ PyYAML==6.0.1
69
+ regex==2023.12.25
70
+ requests==2.24.0
71
+ rich==13.7.0
72
+ ruamel-yaml==0.15.87
73
+ safetensors==0.4.2
74
+ scipy==1.10.1
75
+ setuptools==50.3.1.post20201107
76
+ shtab==1.7.0
77
+ six==1.15.0
78
+ sympy==1.12
79
+ tokenizers==0.15.2
80
+ tqdm==4.66.2
81
+ transformers==4.38.1
82
+ triton==2.2.0
83
+ trl==0.7.11
84
+ typing-extensions==4.9.0
85
+ tyro==0.7.3
86
+ tzdata==2024.1
87
+ urllib3==1.25.11
88
+ wheel==0.35.1
89
+ xxhash==3.4.1
90
+ yarl==1.9.4
synth-data/synth-data_level-one-synth-data.csv ADDED
The diff for this file is too large to render. See raw diff
 
synth-data/synth-data_level-two-synth-data.csv ADDED
The diff for this file is too large to render. See raw diff