File size: 762 Bytes
7eb356c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import dask.dataframe as dd
import polars as pl

# Function to read parquet files using Dask
def read_dask_data():
    splits = {'train': 'data/train-*.parquet', 'test': 'data/test-00000-of-00001.parquet'}
    dask_df = dd.read_parquet("hf://datasets/MatrixStudio/Codeforces-Python-Submissions/" + splits["train"])
    return dask_df

# Function to read parquet files using Polars
def read_polars_data():
    polars_df = pl.read_parquet('hf://datasets/sdiazlor/python-reasoning-dataset/data/train-00000-of-00001.parquet')
    return polars_df

# Function to read parquet files using Dask for another dataset
def read_another_dask_data():
    dask_df = dd.read_parquet("hf://datasets/angie-chen55/python-github-code/data/train-*-of-*.parquet")
    return dask_df