|
{ |
|
"builder_name": "parquet", |
|
"citation": "", |
|
"config_name": "default", |
|
"dataset_name": "astro_paper_corpus", |
|
"dataset_size": 4128813829, |
|
"description": "", |
|
"download_checksums": { |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00000-of-00009.parquet": { |
|
"num_bytes": 240072323, |
|
"checksum": null |
|
}, |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00001-of-00009.parquet": { |
|
"num_bytes": 235851056, |
|
"checksum": null |
|
}, |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00002-of-00009.parquet": { |
|
"num_bytes": 236413937, |
|
"checksum": null |
|
}, |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00003-of-00009.parquet": { |
|
"num_bytes": 237728419, |
|
"checksum": null |
|
}, |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00004-of-00009.parquet": { |
|
"num_bytes": 236710419, |
|
"checksum": null |
|
}, |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00005-of-00009.parquet": { |
|
"num_bytes": 239567004, |
|
"checksum": null |
|
}, |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00006-of-00009.parquet": { |
|
"num_bytes": 234863979, |
|
"checksum": null |
|
}, |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00007-of-00009.parquet": { |
|
"num_bytes": 232662046, |
|
"checksum": null |
|
}, |
|
"hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00008-of-00009.parquet": { |
|
"num_bytes": 237444927, |
|
"checksum": null |
|
} |
|
}, |
|
"download_size": 2131314110, |
|
"features": { |
|
"id": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"author": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"bibcode": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"title": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"citation_count": { |
|
"dtype": "int64", |
|
"_type": "Value" |
|
}, |
|
"aff": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"citation": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"database": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"read_count": { |
|
"dtype": "int64", |
|
"_type": "Value" |
|
}, |
|
"keyword": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"reference": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"doi": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"subfolder": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"filename": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"introduction": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"conclusions": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"year": { |
|
"dtype": "int64", |
|
"_type": "Value" |
|
}, |
|
"month": { |
|
"dtype": "int64", |
|
"_type": "Value" |
|
}, |
|
"arxiv_id": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"abstract": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"failed_ids": { |
|
"dtype": "bool", |
|
"_type": "Value" |
|
}, |
|
"keyword_search": { |
|
"feature": { |
|
"dtype": "string", |
|
"_type": "Value" |
|
}, |
|
"_type": "Sequence" |
|
}, |
|
"umap_x": { |
|
"dtype": "float32", |
|
"_type": "Value" |
|
}, |
|
"umap_y": { |
|
"dtype": "float32", |
|
"_type": "Value" |
|
}, |
|
"clust_id": { |
|
"dtype": "int64", |
|
"_type": "Value" |
|
} |
|
}, |
|
"homepage": "", |
|
"license": "", |
|
"size_in_bytes": 6260127939, |
|
"splits": { |
|
"train": { |
|
"name": "train", |
|
"num_bytes": 4128813829, |
|
"num_examples": 271544, |
|
"shard_lengths": [ |
|
33172, |
|
33172, |
|
33172, |
|
33172, |
|
33172, |
|
33171, |
|
34171, |
|
34171, |
|
4171 |
|
], |
|
"dataset_name": "astro_paper_corpus" |
|
} |
|
}, |
|
"version": { |
|
"version_str": "0.0.0", |
|
"major": 0, |
|
"minor": 0, |
|
"patch": 0 |
|
} |
|
} |