{ "builder_name": "imdb", "citation": "@InProceedings{maas-EtAl:2011:ACL-HLT2011,\n author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},\n title = {Learning Word Vectors for Sentiment Analysis},\n booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},\n month = {June},\n year = {2011},\n address = {Portland, Oregon, USA},\n publisher = {Association for Computational Linguistics},\n pages = {142--150},\n url = {http://www.aclweb.org/anthology/P11-1015}\n}\n", "config_name": "plain_text", "dataset_size": 133190302, "description": "Large Movie Review Dataset.\nThis is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.", "download_checksums": { "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz": { "num_bytes": 84125825, "checksum": "c40f74a18d3b61f90feba1e17730e0d38e8b97c05fde7008942e91923d1658fe" } }, "download_size": 84125825, "features": { "text": { "dtype": "string", "_type": "Value" }, "docid": { "dtype": "int64", "_type": "Value" } }, "homepage": "http://ai.stanford.edu/~amaas/data/sentiment/", "license": "", "size_in_bytes": 217316127, "splits": { "train": { "name": "train", "num_bytes": 33432823, "num_examples": 25000, "dataset_name": "imdb" }, "test": { "name": "test", "num_bytes": 32650685, "num_examples": 25000, "dataset_name": "imdb" }, "unsupervised": { "name": "unsupervised", "num_bytes": 67106794, "num_examples": 50000, "dataset_name": "imdb" } }, "task_templates": [], "version": { "version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0 } }