{ "builder_name": "parquet", "citation": "", "config_name": "default", "dataset_name": "pathfinder_arxiv_data", "dataset_size": 4065510154, "description": "", "download_checksums": { "hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00000-of-00008.parquet": { "num_bytes": 406754152, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00001-of-00008.parquet": { "num_bytes": 405109745, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00002-of-00008.parquet": { "num_bytes": 405466052, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00003-of-00008.parquet": { "num_bytes": 406784839, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00004-of-00008.parquet": { "num_bytes": 404752067, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00005-of-00008.parquet": { "num_bytes": 404624503, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00006-of-00008.parquet": { "num_bytes": 392634525, "checksum": null }, "hf://datasets/kiyer/pathfinder_arxiv_data@1a8eaa7eef5a503386a1487e20f13bedba605245/data/train-00007-of-00008.parquet": { "num_bytes": 397101125, "checksum": null } }, "download_size": 3223227008, "features": { "ads_id": { "dtype": "string", "_type": "Value" }, "arxiv_id": { "dtype": "string", "_type": "Value" }, "title": { "dtype": "string", "_type": "Value" }, "abstract": { "dtype": "string", "_type": "Value" }, "embed": { "feature": { "dtype": "float32", "_type": "Value" }, "_type": "Sequence" }, "umap_x": { "dtype": "float32", "_type": "Value" }, "umap_y": { "dtype": "float32", "_type": "Value" }, "date": { "dtype": "date32", "_type": "Value" }, "cites": { "dtype": "int64", "_type": "Value" }, "bibcode": { "dtype": "string", "_type": "Value" }, "keywords": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "ads_keywords": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "read_count": { "dtype": "int64", "_type": "Value" }, "doi": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "authors": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "aff": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "cite_bibcodes": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" }, "ref_bibcodes": { "feature": { "dtype": "string", "_type": "Value" }, "_type": "Sequence" } }, "homepage": "", "license": "", "size_in_bytes": 7288737162, "splits": { "train": { "name": "train", "num_bytes": 4065510154, "num_examples": 352194, "shard_lengths": [ 43000, 43025, 43025, 43024, 44024, 44024, 46024, 44024, 2024 ], "dataset_name": "pathfinder_arxiv_data" } }, "version": { "version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0 } }