Spaces:
Runtime error
Runtime error
Create data_loader.py
Browse files- data_loader.py +18 -0
data_loader.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dask.dataframe as dd
|
2 |
+
import polars as pl
|
3 |
+
|
4 |
+
# Function to read parquet files using Dask
|
5 |
+
def read_dask_data():
|
6 |
+
splits = {'train': 'data/train-*.parquet', 'test': 'data/test-00000-of-00001.parquet'}
|
7 |
+
dask_df = dd.read_parquet("hf://datasets/MatrixStudio/Codeforces-Python-Submissions/" + splits["train"])
|
8 |
+
return dask_df
|
9 |
+
|
10 |
+
# Function to read parquet files using Polars
|
11 |
+
def read_polars_data():
|
12 |
+
polars_df = pl.read_parquet('hf://datasets/sdiazlor/python-reasoning-dataset/data/train-00000-of-00001.parquet')
|
13 |
+
return polars_df
|
14 |
+
|
15 |
+
# Function to read parquet files using Dask for another dataset
|
16 |
+
def read_another_dask_data():
|
17 |
+
dask_df = dd.read_parquet("hf://datasets/angie-chen55/python-github-code/data/train-*-of-*.parquet")
|
18 |
+
return dask_df
|