Upload 5 files

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,3 +1,20 @@
----
-license: mit
----

+---
+license: mit
+---
+# iVideoGPT (Fine-tuned to BAIR Robot Pushing, 64x64 resolution, action-free)
+Fine-tuned model introduced in the paper [iVideoGPT: Interactive VideoGPTs are Scalable World Models](https://arxiv.org/abs/2405.15223).
+See https://github.com/thuml/iVideoGPT for examples for using this model.
+## Citation
+```
+@inproceedings{wu2024ivideogpt,
+    title={iVideoGPT: Interactive VideoGPTs are Scalable World Models},
+    author={Jialong Wu and Shaofeng Yin and Ningya Feng and Xu He and Dong Li and Jianye Hao and Mingsheng Long},
+    booktitle={Advances in Neural Information Processing Systems},
+    year={2024}
+}
+```

tokenizer/config.json ADDED Viewed

+{
+  "_class_name": "CompressiveVQModel",
+  "_diffusers_version": "0.27.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512
+  ],
+  "context_length": 1,
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 64,
+  "layers_per_block": 2,
+  "lookup_from_codebook": true,
+  "max_att_resolution": 16,
+  "mid_block_add_attention": false,
+  "norm_num_groups": 32,
+  "norm_type": "group",
+  "num_dyn_embeddings": 8192,
+  "num_vq_embeddings": 8192,
+  "out_channels": 3,
+  "patch_size": 4,
+  "resolution": 64,
+  "sample_size": 32,
+  "scaling_factor": 0.18215,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "vq_embed_dim": null
+}

tokenizer/diffusion_pytorch_model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f29ac860b2ab88552d504d94e1aac0c39dcee45ea90856a7f44dc1a7e46dd1ec
+size 455374152

transformer/config.json ADDED Viewed

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "hidden_act": "silu",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_length": 1024,
+  "max_position_embeddings": 1024,
+  "model_type": "llama",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "num_key_value_heads": 12,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 16386
+}

transformer/model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:af8b961b52aa99963380d4c3672ef146fd7b614a12d57d85ba8da00165a6fa66
+size 553749536