File size: 1,069 Bytes
bd65e34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from pathlib import Path
import polars as pl


def build_labels(label_file: Path, fps: int = 3):
    df = pl.read_parquet(label_file)
    highlights = df.select(
        "vid_id",
        frame=pl.int_ranges(
            pl.col("start").cast(pl.Duration).dt.seconds() * fps,
            pl.col("stop").cast(pl.Duration).dt.seconds() * fps,
        ),
        label=pl.lit(1),
    ).explode("frame")

    dfs = []
    for vid in df["vid_id"].unique():
        frames = len(list(Path(str(vid)).glob("*.jpg")))
        dfs.append(
            pl.DataFrame({"vid_id": [vid] * frames, "frame": np.arange(1, frames + 1)})
        )

    labeled_df = pl.concat(dfs)
    labeled_df = labeled_df.join(
        highlights, on=["vid_id", "frame"], how="left"
    ).fill_null(0)
    labeled_df = labeled_df.with_columns(
        path=pl.concat_str(
            [
                pl.col("vid_id").cast(pl.Utf8) + "/img",
                pl.col("frame").cast(pl.Utf8) + ".jpg",
            ]
        )
    )
    labeled_df = labeled_df.sort("vid_id", "frame")
    labeled_df.head(2)