charlieoneill's picture
Upload astro_paper_metadata/train/dataset_info.json with huggingface_hub
8ca8ccd verified
{
"builder_name": "parquet",
"citation": "",
"config_name": "default",
"dataset_name": "astro_paper_corpus",
"dataset_size": 4128813829,
"description": "",
"download_checksums": {
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00000-of-00009.parquet": {
"num_bytes": 240072323,
"checksum": null
},
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00001-of-00009.parquet": {
"num_bytes": 235851056,
"checksum": null
},
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00002-of-00009.parquet": {
"num_bytes": 236413937,
"checksum": null
},
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00003-of-00009.parquet": {
"num_bytes": 237728419,
"checksum": null
},
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00004-of-00009.parquet": {
"num_bytes": 236710419,
"checksum": null
},
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00005-of-00009.parquet": {
"num_bytes": 239567004,
"checksum": null
},
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00006-of-00009.parquet": {
"num_bytes": 234863979,
"checksum": null
},
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00007-of-00009.parquet": {
"num_bytes": 232662046,
"checksum": null
},
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00008-of-00009.parquet": {
"num_bytes": 237444927,
"checksum": null
}
},
"download_size": 2131314110,
"features": {
"id": {
"dtype": "string",
"_type": "Value"
},
"author": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"bibcode": {
"dtype": "string",
"_type": "Value"
},
"title": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"citation_count": {
"dtype": "int64",
"_type": "Value"
},
"aff": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"citation": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"database": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"read_count": {
"dtype": "int64",
"_type": "Value"
},
"keyword": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"reference": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"doi": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"subfolder": {
"dtype": "string",
"_type": "Value"
},
"filename": {
"dtype": "string",
"_type": "Value"
},
"introduction": {
"dtype": "string",
"_type": "Value"
},
"conclusions": {
"dtype": "string",
"_type": "Value"
},
"year": {
"dtype": "int64",
"_type": "Value"
},
"month": {
"dtype": "int64",
"_type": "Value"
},
"arxiv_id": {
"dtype": "string",
"_type": "Value"
},
"abstract": {
"dtype": "string",
"_type": "Value"
},
"failed_ids": {
"dtype": "bool",
"_type": "Value"
},
"keyword_search": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"umap_x": {
"dtype": "float32",
"_type": "Value"
},
"umap_y": {
"dtype": "float32",
"_type": "Value"
},
"clust_id": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": "",
"size_in_bytes": 6260127939,
"splits": {
"train": {
"name": "train",
"num_bytes": 4128813829,
"num_examples": 271544,
"shard_lengths": [
33172,
33172,
33172,
33172,
33172,
33171,
34171,
34171,
4171
],
"dataset_name": "astro_paper_corpus"
}
},
"version": {
"version_str": "0.0.0",
"major": 0,
"minor": 0,
"patch": 0
}
}