S-Dreamer commited on
Commit
7eb356c
·
verified ·
1 Parent(s): 872b9ea

Create data_loader.py

Browse files
Files changed (1) hide show
  1. data_loader.py +18 -0
data_loader.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dask.dataframe as dd
2
+ import polars as pl
3
+
4
+ # Function to read parquet files using Dask
5
+ def read_dask_data():
6
+ splits = {'train': 'data/train-*.parquet', 'test': 'data/test-00000-of-00001.parquet'}
7
+ dask_df = dd.read_parquet("hf://datasets/MatrixStudio/Codeforces-Python-Submissions/" + splits["train"])
8
+ return dask_df
9
+
10
+ # Function to read parquet files using Polars
11
+ def read_polars_data():
12
+ polars_df = pl.read_parquet('hf://datasets/sdiazlor/python-reasoning-dataset/data/train-00000-of-00001.parquet')
13
+ return polars_df
14
+
15
+ # Function to read parquet files using Dask for another dataset
16
+ def read_another_dask_data():
17
+ dask_df = dd.read_parquet("hf://datasets/angie-chen55/python-github-code/data/train-*-of-*.parquet")
18
+ return dask_df