Spaces:

1x-technologies
/

1X_World_Model_Challenge_Home

Running

File size: 14,691 Bytes

992d47a
29924b3
4b7e1bc
4cedfff
 
3aac702
7a19191
e530d5a
0a7536d
e530d5a
4cd31b6
 
e530d5a
 
 
 
 
 
 
 
 
4cd31b6
e530d5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cd31b6
e530d5a
 
4cd31b6
e530d5a
4cd31b6
c5472a2
06b53c4
e530d5a
 
4cd31b6
 
c5472a2
06b53c4
 
e530d5a
4cd31b6
 
e530d5a
87626f0
e530d5a
 
4cd31b6
 
e530d5a
87626f0
e530d5a
 
4cd31b6
 
e530d5a
 
 
 
 
 
4cd31b6
e530d5a
 
4b7e1bc
4cd31b6
03997d7
4cd31b6
 
 
e584ed3
4cd31b6
 
 
71cede3
4cd31b6
e584ed3
4cd31b6
ca1fd84
 
4cd31b6
 
 
 
 
 
 
 
ca1fd84
 
4cd31b6
 
 
 
 
 
 
 
 
4b7e1bc
4cd31b6
 
03753e2
4cd31b6
4b7e1bc
4cd31b6
 
04aac6e
e584ed3
4cd31b6
80e3dd0
4cd31b6
4b7e1bc
4cd31b6
 
 
 
 
e584ed3
4cd31b6
 
c708305
4cd31b6
 
 
 
 
 
 
 
 
 
 
 
ea818e2
 
 
 
4cd31b6
f3b18f0
4cd31b6
 
 
abbf6a7
4cd31b6
 
 
 
 
 
ea818e2
 
 
 
 
 
 
4cd31b6
 
 
 
ea818e2
 
 
 
 
 
e584ed3
4cd31b6
 
 
 
8a2f0fa
4cd31b6
308e1d3
4cd31b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea818e2
4cd31b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea818e2
 
4cd31b6
ea818e2
4cd31b6
ea818e2
4cd31b6
ea818e2
4cd31b6
ea818e2
 
4cd31b6
 
 
 
 
 
e584ed3
4cd31b6
e584ed3
4cd31b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86065a1
4cd31b6
 
 
 
e584ed3
4cd31b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea818e2

import streamlit as st
import pandas as pd
import streamlit.components.v1 as components
import glob 
import os
import random
 
def resources_section():
    st.markdown("## Additional Resources")
    st.write("For more information and updates, explore our blog posts or join our community channels:")

    # Define a custom HTML string to create dark-themed card-style links in a flexbox layout
    resources_html = """
    <style>
    .resource-cards {
      display: flex;
      flex-wrap: wrap;
      gap: 20px;
      margin-top: 10px;
    }
    .resource-card {
      background-color: #1f1f1f; /* Dark card background */
      border: 1px solid #333333;
      border-radius: 6px;
      width: 220px;
      padding: 12px;
      text-align: center;
      transition: box-shadow 0.2s ease;
    }
    .resource-card:hover {
      box-shadow: 0 4px 8px rgba(255, 255, 255, 0.15);
    }
    .resource-card h4 {
      margin: 0;
      color: #ffffff;
    }
    .resource-card p {
      color: #cccccc;
      margin: 6px 0 0 0;
      font-size: 14px;
    }
    .resource-card a {
      text-decoration: none;
      color: inherit; /* Inherits card's text color */
    }
    </style>

    <div class="resource-cards">
      <!-- Phase 1 Blog Post -->
      <a href="https://www.1x.tech/discover/1x-world-model" target="_blank" class="resource-card">
        <h4>Phase 1 Blog Post</h4>
        <p>World Model Challenge Launch</p>
      </a>

      <!-- Phase 2 Blog Post -->
      <a href="https://www.1x.tech/discover/1x-world-model-sampling-challenge" target="_blank" class="resource-card">
        <h4>Phase 2 Blog Post</h4>
        <p>Challenge Updates</p>
      </a>

      <!-- GitHub -->
      <a href="https://github.com/1x-technologies/world-model-challenge" target="_blank" class="resource-card">
        <h4>GitHub Repository</h4>
        <p>View code and issues</p>
      </a>

      <!-- Hugging Face -->
      <a href="https://huggingface.co/1x-technologies" target="_blank" class="resource-card">
        <h4>Hugging Face Repository</h4>
        <p>Datasets and Models</p>
      </a>

      <!-- Discord -->
      <a href="https://discord.gg/your-invite" target="_blank" class="resource-card">
        <h4>Discord Channel</h4>
        <p>Join the discussion</p>
      </a>
    </div>
    """

    st.markdown(resources_html, unsafe_allow_html=True)

def scoring_section():
    # Title
    st.markdown("## Scoring")

    # Intro text
    st.write(
        "Scores combine points from both the Compression and Sampling Challenges. Final rankings are based on total points."
    )
    
    # Points Breakdown in a table
    st.markdown("### Points Breakdown")
    # Create three columns for a more interesting layout
    col1, col2 = st.columns(2)
    
    with col1:
        st.markdown('<h3 style="margin-left:15px;">Compression</h3>', unsafe_allow_html=True)
        st.markdown(
            """
            - **1st Place**: 10 points  
            - **2nd Place**: 7 points  
            - **3rd Place**: 5 points
            """
        )
        
    with col2:
        st.markdown('<h3 style="margin-left:15px;">Sampling</h3>', unsafe_allow_html=True)
        st.markdown(
            """
            - **1st Place**: 10 points  
            - **2nd Place**: 7 points  
            - **3rd Place**: 5 points
            """
        )
        
    # Tie-Breakers in an expander for a cleaner layout
    with st.expander("Tie-Breakers"):
        st.write(
            "The overall winner will be the team with the highest total points. "
            "In the event of a tie, the team with the highest score on the Sampling challenge will be declared the winner."
        )

    # Overall Leaderboard Section
    st.write(
        "The leaderboard, which shows the total points across the challenges, will go live in March. "
        "Additionally, both challenges—**Compression** and **Sampling**—will have its own leaderboard on their "
        "respective Hugging Face submission servers."
    )

def main():
    st.set_page_config(page_title="1X World Model Challenge")
    
    st.title("1X World Model Challenge")
    st.markdown("## Welcome")
    st.write(
        "Welcome to the 1X World Model Challenge. This platform hosts two challenges—Compression and Sampling—focused on advancing research in world models for robotics."
    )
    st.write(
        "In partnership with OpenDriveLab, we are launching this challenge as part of the [Autonomous Grand Challenge 2025](https://opendrivelab.com/challenge2025/), held in conjunction with the CVPR 2025 (confirmed) and ICCV 2025 (tentative) workshops."
    )

    st.markdown("---")

    st.markdown("## Motivation")
    st.write(
        "Real-world robotics faces a fundamental challenge: environments are dynamic and change over time, "
        "making consistent evaluation of policy performance difficult. Driven by recent advances in video generation, **world models** offer a solution by "
        "learning to simulate complex real-world interactions from image tokens paired with action data. We believe these learned simulators will enable "
        "robust evaluation and iterative improvement of robot policies without the constraints of a physical testbed."
    )
    st.image(
        "assets/model_performance_over_time.webp",
        caption="An example T-shirt folding model we trained that degrades in performance over the course of 50 days.",
        use_container_width=True
    )
    st.markdown("---")

    st.markdown("## The Challenges")

    st.write(
        "The 1X World Model Challenge is focused on predicting future first-person observations of the [EVE Android](https://www.1x.tech/eve) with the ultimate goal of solving policy evaluation."
    )

    st.markdown("#### Compression Challenge")
    st.write(
        "In the Compression Challenge, your task is to train a model to compress our robots logs effectively while preserving the critical details needed to understand and predict future interactions. Success in this challenge is measured by the loss of your model—the lower the loss, the better your model captures the complexities of real-world robot behavior."
    )
    # -- ADDED SECTION: LINKS TO THE LIVE SUBMISSION SERVER FOR COMPRESSION --
    st.markdown(
        "**The Compression Challenge server is now live!** "
        "Submit your models here: "
        "[1X World Model Challenge – Compression Server](https://huggingface.co/spaces/1x-technologies/1X_World_Model_Challenge_Compression)"
    )

    st.markdown("#### Sampling Challenge")
    st.write(
        "In the Sampling Challenge, your task is to predict a future video frame two seconds in the future given a short clip of robot interactions. The goal is to produce a coherent and plausible continuation of the video, which accurately reflects the dynamics of the scene. Your submission will be judged on how closely it matches the actual frame."
    )
    # -- ADDED SECTION: LINKS TO THE LIVE SUBMISSION SERVER FOR SAMPLING --
    st.markdown(
        "**The Sampling Challenge server is now live!** "
        "Submit your models here: "
        "[1X World Model Challenge – Sampling Server](https://huggingface.co/spaces/1x-technologies/1X_World_Model_Challenge_Sampling)"
    )

    st.markdown("---")

    st.markdown("## Datasets")
    st.write(
        "We provide two datasets for the challenge:\n\n"
        "**Raw Data:** The [world_model_raw_data](https://huggingface.co/datasets/1x-technologies/world_model_raw_data) dataset "
        "provides video logs and state sequences gathered from diverse real-world scenarios. "
        "This dataset is split into 100 shards—each containing a 512x512 MP4 video, a segment index mapping, and state arrays—"
        "and is licensed under CC-BY-NC-SA 4.0.\n\n"
        "**Tokenized Data:** The [world_model_tokenized_data](https://huggingface.co/datasets/1x-technologies/world_model_tokenized_data) dataset "
        "tokenizes the raw video sequences generated using the NVIDIA Cosmos Tokenizer. This compact representation of the raw data "
        "is optimal for the compression challenge and is released under the Apache 2.0 license.\n\n"
    )

    gif_folder = "assets/v1.0"
    
    # Get all GIF file paths from the folder, sorted by name
    gif_paths = glob.glob(os.path.join(gif_folder, "*.gif"))
    random.shuffle(gif_paths)
    
    # Display 4 GIFs per row
    for i in range(0, 16, 4):
        row_gifs = gif_paths[i:i+4]
        cols = st.columns(len(row_gifs))
        
        for col, gif_path in zip(cols, row_gifs):
            col.image(gif_path, use_container_width=True)

    st.markdown("---")
    scoring_section()

    def display_faq(question, answer):
        st.markdown(
            f"""
            <div style="
                padding: 12px; 
                margin-bottom: 12px; 
                background-color: #0d1b2a; 
                border-radius: 8px; 
                border: 1px solid #0d1b2a;">
                <p style="font-weight: bold; margin: 0 0 4px 0; color: #ffffff;">{question}</p>
                <p style="margin: 0; color: #ffffff;">{answer}</p>
            </div>
            """,
            unsafe_allow_html=True
        )

    st.markdown("---")
    st.markdown("## Rules")
    st.markdown(
        """
    **General Guidelines:**
    - The use of publicly available datasets and pretrained weights is allowed. The use of private datasets or pretrained weights is prohibited.
    - You may use future actions to guide your frame predictions, but you must not use any actual future frames during inference.
    - There is no limit on the inference time for any of the challenges.
    - Naive nearest-neighbor retrieval combined with seeking ahead to the next frames from the training set may yield reasonable performance but is not permitted in solutions.

    **Submissions:**
    - All submissions must be reproducible. Please expect to include code, configuration files, and any necessary instructions to replicate your results.
    - The leaderboard will display results on a public test set. However, the final winner will be determined based on performance on a private test set.

    **Eligibility:**
    - Prizes cannot be awarded to individuals in U.S. sanctioned countries. We reserve the right to withhold prizes if a submission violates the spirit of the challenge.
        """,
        unsafe_allow_html=True
    )


    st.markdown("## Already Started Working on These Challenges?")
    st.write(
        """
        Before partnering with OpenDriveLab to launch the World Model Challenge at CVPR 2025 and (tentatively) ICCV 2025, we had already released the Compression and Sampling challenges publicly, offering $10K for each. However, due to the rapid progress in the field, we are updating the challenge setups.

        For the Compression Challenge, we initially used the MAGVIT spatial tokenizer to compare participant losses. We are now switching to the highly performant Cosmos spatial-temporal tokenizer as our new standard.

        For the Sampling Challenge, the requirement has changed from predicting a frame 0.5 seconds in the future to predicting a frame 2 seconds ahead to emphasize action controllability.

        Additionally, a previous rule prohibited the use of future actions to condition frame predictions; this restriction has been lifted, and using future actions is now allowed.

        Please note that we will continue to honor solutions for the Compression Challenge $10K award submitted to [[email protected]](mailto:[email protected]) using the MAGVIT tokenizer—if you achieve a loss below 8.0 on our held-out test set—for six months from March 1, 2025. However, these solutions will not be eligible for the CVPR or ICCV competitions.
        """
    )

    st.markdown("---")

    st.markdown("## FAQs")

    with st.expander("Do I have to participate in both challenges?"):
        st.write(
            "No, you may choose to participate in one challenge. However, participating in both challenges may improve your overall ranking."
        )

    with st.expander("Can I work in a team?"):
        st.write(
            "Yes, team submissions are welcome."
        )

    with st.expander("What are the submission deadlines?"):
        st.write(
            "Deadlines for challenges soon to be announced."
        )

    with st.expander("Are there constraints on model size or computational resources for submissions?"):
        st.write(
            "There are no strict limits on model size, parameter count, or inference time for any challenge."
        )

    with st.expander("Can we fine-tune pre-trained models, and how do we disclose this in our submission?"):
        st.write(
            "Yes, fine-tuning pre-trained models is allowed as long as they are publicly available and not trained on private datasets."
        )

    with st.expander("What preprocessing steps are applied to the raw data, and can we apply our own?"):
        st.write(
            "The raw data in the `world_model_raw_data` dataset includes unprocessed 512x512 MP4 video logs as collected from the EVE Android. You are free to apply your own preprocessing techniques—such as frame resizing, color normalization, etc."
        )

    with st.expander("How is the Cosmos tokenizer used in the Tokenized Data dataset, and can we use a different tokenizer?"):
        st.write(
            "The `world_model_tokenized_data` dataset uses NVIDIA’s Discrete Video 8x8x8 Cosmos Tokenizer to convert raw 256x256 video into tokens. For the Compression Challenge, this tokenizer is mandatory for a consistent benchmark. Alternative tokenizers are permitted for the Sampling Challenge."
        )

    with st.expander("What metrics are used to evaluate the Sampling Challenge submissions?"):
        st.write(
            "Submissions are evaluated by comparing the predicted frame (2 seconds ahead) to the ground-truth frame using Peak Signal-to-Noise Ratio (PSNR)."
        )

    with st.expander("Can we use generative models like diffusion models or GANs for the Sampling Challenge?"):
        st.write(
            "Yes, you are welcome to use generative models such as diffusion models, GANs, or autoregressive approaches for the Sampling Challenge, as long as they adhere to the rules (e.g., no use of actual future frames during inference). The challenge evaluates the quality of the predicted frame, not the method used, so feel free to experiment with cutting-edge techniques to achieve plausible and accurate predictions."
        )

    st.markdown("---")

    resources_section()

if __name__ == '__main__':
    main()