File size: 641 Bytes
11d3b20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from datasets import load_dataset
import pandas as pd

divider = 1
data_size = 25000 // divider
case_size = data_size // 2

dataset = load_dataset("imdb")

train_df = pd.DataFrame(dataset['train'])
test_df = pd.DataFrame(dataset['test'])

train_df = train_df.iloc[::divider, :]
test_df = test_df.iloc[::divider, :]

train_df['label'] = train_df['label'].apply(lambda x: 'NEGATIVE' if x == 0 else 'POSITIVE')
test_df['label'] = test_df['label'].apply(lambda x: 'NEGATIVE' if x == 0 else 'POSITIVE')

train_df.to_csv(f'imdb_train_{case_size}_{case_size}.csv', index=False)
test_df.to_csv(f'imdb_test_{case_size}_{case_size}.csv', index=False)