Upload 12 files
Browse files- Nebula-baseline/NeBuLa-org_adapter_config.json +26 -0
- Nebula-baseline/adapter_model.safetensors +3 -0
- Nebula-synth/NeBuLa-synth_adapter_config.json +26 -0
- Nebula-synth/adapter_model.safetensors +3 -0
- README.md +25 -0
- data/data_actseq-test-new.csv +0 -0
- data/data_actseq-train-new.csv +0 -0
- data/data_actseq-val-new.csv +0 -0
- merge.py +56 -0
- requirements.txt +90 -0
- synth-data/synth-data_level-one-synth-data.csv +0 -0
- synth-data/synth-data_level-two-synth-data.csv +0 -0
Nebula-baseline/NeBuLa-org_adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "/tmpdir/chaturve/llama-3-8b-hf/",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.1,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 64,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"q_proj",
|
23 |
+
"v_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
Nebula-baseline/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff3d5d873f8094e23565ca85b675aca9b86457c03bafb339967ffd49eabe50c0
|
3 |
+
size 109069176
|
Nebula-synth/NeBuLa-synth_adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "/tmpdir/chaturve/llama-3-8b-hf/",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.1,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 64,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"q_proj",
|
23 |
+
"v_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
Nebula-synth/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ead40404c9439f63060938a58d0f406a56977775099a6723af2e41e36247dd8e
|
3 |
+
size 109069176
|
README.md
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
pipeline_tag: other
|
3 |
+
tags:
|
4 |
+
- minecraft
|
5 |
+
- action prediction
|
6 |
+
language:
|
7 |
+
- en
|
8 |
+
---
|
9 |
+
|
10 |
+
# Nebula: A Minecraft Neural Builder
|
11 |
+
|
12 |
+
Nebula is an LLM (Llama3-8B) finetuned on the minecraft action prediction task (https://aclanthology.org/2020.acl-main.232/).
|
13 |
+
|
14 |
+
## Model Details
|
15 |
+
We provide two variants of the model: Nebula-baseline, which was trained on minecraft data set; and Nebula-synth, which was trained on level-1
|
16 |
+
and level-2 synthetic data along with the minecraft data set. Both the models achieve similar net-action F1 score on minecraft test set.
|
17 |
+
|
18 |
+
### Model Description
|
19 |
+
|
20 |
+
- **Language(s) (NLP):** English
|
21 |
+
- **Finetuned from model:** Llama3-8B
|
22 |
+
|
23 |
+
### Model Sources
|
24 |
+
|
25 |
+
**Paper:** https://arxiv.org/abs/2406.18164
|
data/data_actseq-test-new.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/data_actseq-train-new.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/data_actseq-val-new.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
merge.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import json
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
4 |
+
from datasets import load_dataset
|
5 |
+
from peft import LoraConfig, PeftModel
|
6 |
+
|
7 |
+
device_map = "auto"
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(
|
9 |
+
"/path/to/meta-llama3-8b",
|
10 |
+
#low_cpu_mem_usage=True,
|
11 |
+
return_dict=True,
|
12 |
+
torch_dtype=torch.float16,
|
13 |
+
device_map=device_map,
|
14 |
+
)
|
15 |
+
|
16 |
+
model = PeftModel.from_pretrained(model, "/path/to/llama3-8b-adapter", device_map=device_map)
|
17 |
+
model = model.merge_and_unload()
|
18 |
+
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained("/path/to/meta-llama3-8b", trust_remote_code=True)
|
20 |
+
tokenizer.pad_token_id = tokenizer.eos_token_id + 1
|
21 |
+
model.config.pad_token_id = tokenizer.pad_token_id
|
22 |
+
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=4096, do_sample=False)
|
23 |
+
print("Padding side:",tokenizer.padding_side)
|
24 |
+
val_dataset = load_dataset("csv", data_files={'val':'/path/to/actseq-val-new.csv'})["val"]
|
25 |
+
test_dataset = load_dataset("csv", data_files={'test':'/path/to/actseq-test-new.csv'})["test"]
|
26 |
+
|
27 |
+
|
28 |
+
def formatting_prompts_func(example):
|
29 |
+
output_texts = []
|
30 |
+
for i in range(len(example['dial_with_actions'])):
|
31 |
+
text = f"<|begin_of_text|>Predict the action sequence (AS) for the Minecraft excerpt:\n {example['dial_with_actions'][i]}\n ### AS:"
|
32 |
+
output_texts.append(text)
|
33 |
+
return output_texts
|
34 |
+
|
35 |
+
|
36 |
+
val_texts = formatting_prompts_func(val_dataset)
|
37 |
+
test_texts = formatting_prompts_func(test_dataset)
|
38 |
+
|
39 |
+
print("Val Length:", len(val_texts))
|
40 |
+
print("Test Length:", len(test_texts))
|
41 |
+
|
42 |
+
f = open("/path/to/val-output-file","w")
|
43 |
+
|
44 |
+
for text in val_texts:
|
45 |
+
print(text)
|
46 |
+
print(pipe(text)[0]["generated_text"], file=f)
|
47 |
+
|
48 |
+
f.close()
|
49 |
+
|
50 |
+
f = open("/path/to/test-output-file","w")
|
51 |
+
|
52 |
+
for text in test_texts:
|
53 |
+
print(text)
|
54 |
+
print(pipe(text)[0]["generated_text"], file=f)
|
55 |
+
|
56 |
+
f.close()
|
requirements.txt
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.27.2
|
2 |
+
aiohttp==3.9.3
|
3 |
+
aiosignal==1.3.1
|
4 |
+
async-timeout==4.0.3
|
5 |
+
attrs==23.2.0
|
6 |
+
bitsandbytes==0.41.3
|
7 |
+
brotlipy==0.7.0
|
8 |
+
certifi==2020.6.20
|
9 |
+
cffi==1.14.3
|
10 |
+
chardet==3.0.4
|
11 |
+
conda-package-handling==1.7.2
|
12 |
+
cryptography==3.2.1
|
13 |
+
datasets==2.17.1
|
14 |
+
dill==0.3.8
|
15 |
+
docstring-parser==0.15
|
16 |
+
filelock==3.13.1
|
17 |
+
frozenlist==1.4.1
|
18 |
+
fsspec==2023.10.0
|
19 |
+
huggingface-hub==0.20.3
|
20 |
+
idna==2.10
|
21 |
+
Jinja2==3.1.3
|
22 |
+
markdown-it-py==3.0.0
|
23 |
+
MarkupSafe==2.1.5
|
24 |
+
mdurl==0.1.2
|
25 |
+
mpmath==1.3.0
|
26 |
+
multidict==6.0.5
|
27 |
+
multiprocess==0.70.16
|
28 |
+
networkx==3.1
|
29 |
+
numpy==1.24.4
|
30 |
+
nvidia-cublas-cu11==11.11.3.6
|
31 |
+
nvidia-cublas-cu12==12.1.3.1
|
32 |
+
nvidia-cuda-cupti-cu11==11.8.87
|
33 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
34 |
+
nvidia-cuda-nvrtc-cu11==11.8.89
|
35 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
36 |
+
nvidia-cuda-runtime-cu11==11.8.89
|
37 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
38 |
+
nvidia-cudnn-cu11==8.7.0.84
|
39 |
+
nvidia-cudnn-cu12==8.9.2.26
|
40 |
+
nvidia-cufft-cu11==10.9.0.58
|
41 |
+
nvidia-cufft-cu12==11.0.2.54
|
42 |
+
nvidia-curand-cu11==10.3.0.86
|
43 |
+
nvidia-curand-cu12==10.3.2.106
|
44 |
+
nvidia-cusolver-cu11==11.4.1.48
|
45 |
+
nvidia-cusolver-cu12==11.4.5.107
|
46 |
+
nvidia-cusparse-cu11==11.7.5.86
|
47 |
+
nvidia-cusparse-cu12==12.1.0.106
|
48 |
+
nvidia-nccl-cu11==2.19.3
|
49 |
+
nvidia-nccl-cu12==2.19.3
|
50 |
+
nvidia-nvjitlink-cu12==12.3.101
|
51 |
+
nvidia-nvtx-cu11==11.8.86
|
52 |
+
nvidia-nvtx-cu12==12.1.105
|
53 |
+
packaging==23.2
|
54 |
+
pandas==2.0.3
|
55 |
+
peft==0.7.1
|
56 |
+
pillow==10.2.0
|
57 |
+
pip==20.2.4
|
58 |
+
psutil==5.9.8
|
59 |
+
pyarrow==15.0.0
|
60 |
+
pyarrow-hotfix==0.6
|
61 |
+
pycosat==0.6.3
|
62 |
+
pycparser==2.20
|
63 |
+
pygments==2.17.2
|
64 |
+
pyOpenSSL==19.1.0
|
65 |
+
PySocks==1.7.1
|
66 |
+
python-dateutil==2.8.2
|
67 |
+
pytz==2024.1
|
68 |
+
PyYAML==6.0.1
|
69 |
+
regex==2023.12.25
|
70 |
+
requests==2.24.0
|
71 |
+
rich==13.7.0
|
72 |
+
ruamel-yaml==0.15.87
|
73 |
+
safetensors==0.4.2
|
74 |
+
scipy==1.10.1
|
75 |
+
setuptools==50.3.1.post20201107
|
76 |
+
shtab==1.7.0
|
77 |
+
six==1.15.0
|
78 |
+
sympy==1.12
|
79 |
+
tokenizers==0.15.2
|
80 |
+
tqdm==4.66.2
|
81 |
+
transformers==4.38.1
|
82 |
+
triton==2.2.0
|
83 |
+
trl==0.7.11
|
84 |
+
typing-extensions==4.9.0
|
85 |
+
tyro==0.7.3
|
86 |
+
tzdata==2024.1
|
87 |
+
urllib3==1.25.11
|
88 |
+
wheel==0.35.1
|
89 |
+
xxhash==3.4.1
|
90 |
+
yarl==1.9.4
|
synth-data/synth-data_level-one-synth-data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
synth-data/synth-data_level-two-synth-data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|