lunde's picture
Initial commit
bd65e34
from pathlib import Path
import polars as pl
def build_labels(label_file: Path, fps: int = 3):
df = pl.read_parquet(label_file)
highlights = df.select(
"vid_id",
frame=pl.int_ranges(
pl.col("start").cast(pl.Duration).dt.seconds() * fps,
pl.col("stop").cast(pl.Duration).dt.seconds() * fps,
),
label=pl.lit(1),
).explode("frame")
dfs = []
for vid in df["vid_id"].unique():
frames = len(list(Path(str(vid)).glob("*.jpg")))
dfs.append(
pl.DataFrame({"vid_id": [vid] * frames, "frame": np.arange(1, frames + 1)})
)
labeled_df = pl.concat(dfs)
labeled_df = labeled_df.join(
highlights, on=["vid_id", "frame"], how="left"
).fill_null(0)
labeled_df = labeled_df.with_columns(
path=pl.concat_str(
[
pl.col("vid_id").cast(pl.Utf8) + "/img",
pl.col("frame").cast(pl.Utf8) + ".jpg",
]
)
)
labeled_df = labeled_df.sort("vid_id", "frame")
labeled_df.head(2)