lunde commited on
Commit
bd65e34
·
0 Parent(s):

Initial commit

Browse files

initial commit

downscaling and initial labeling

WIP

WIP

fix: make modules work better

WIP: refactor

Create FUNDING.yml

wip: present

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

wip

fix: deps lock

bump

add solara

WIP

fix: more options

fix: devcontainer

fix: simplify app

fix: improve devconatiner

WIP

WIP

WIP

WIP

WIP

feat: finished app

fix: inference more than 2k frames

WIP: finalizing and prettifying

Swap tab order

fix: empty file/model dir

fix: allow model downloads from r2

fix: add out_folder to download

fix: add end_time

WIP

wip

WIP

fix: env

WIP

fix: make Dockerfile work by default

fix: pre-commit

fix: improve UI looks and clarify divider, minor UX improvements too

fix: minor UI change

WIP: Adding plotly callback to enable interactive time selection

wip

fix: update

.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "build": {
3
+ "dockerfile": "../Dockerfile"
4
+ },
5
+ "postCreateCommand": "conda init",
6
+ "postAttachCommand": "conda activate highlights",
7
+ "features": {
8
+ "ghcr.io/devcontainers/features/git": {},
9
+ "ghcr.io/devcontainers-contrib/features/apt-get-packages": {},
10
+ "ghcr.io/devcontainers-contrib/features/ffmpeg-apt-get:1": {},
11
+ "ghcr.io/devcontainers-contrib/features/rclone:1": {},
12
+ "ghcr.io/devcontainers/features/github-cli:1": {}
13
+ },
14
+ "forwardPorts": [
15
+ 8765
16
+ ],
17
+ "customizations": {
18
+ // Configure properties specific to VS Code.
19
+ "vscode": {
20
+ // Add the IDs of extensions you want installed when the container is created.
21
+ "extensions": [
22
+ "ms-azuretools.vscode-docker",
23
+ "ms-python.python",
24
+ "ms-python.black-formatter"
25
+ ]
26
+ }
27
+ }
28
+ }
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ converted/
2
+ downloaded/
3
+ lightning_logs/
4
+ mlruns/
5
+ tmp/
6
+ highlights/
7
+ out/
8
+ Dockerfile
9
+ env.yml
.github/FUNDING.yml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # These are supported funding model platforms
2
+
3
+ github: [londogard]# Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4
+ patreon: # Replace with a single Patreon username
5
+ open_collective: # Replace with a single Open Collective username
6
+ ko_fi: # Replace with a single Ko-fi username
7
+ tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8
+ community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9
+ liberapay: # Replace with a single Liberapay username
10
+ issuehunt: # Replace with a single IssueHunt username
11
+ otechie: # Replace with a single Otechie username
12
+ lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13
+ custom: [https://www.buymeacoffee.com/hlondogard]# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
.gitignore ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ frames/
163
+ bauss.mkv
164
+ .DS_Store
165
+ *.mkv
166
+
167
+ lightning_logs
168
+ *.keras
169
+ *.ckpt
170
+
171
+ **/*.jpg
172
+ rclone.conf
173
+
174
+ quarto*
175
+ highlights/*.mp4
176
+ mlruns/
177
+ downloaded/
178
+ *.mp3
179
+ *.mp4
180
+ llamafile-server-0.1-llava-v1.5-7b-q4
.gitpod.yml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image: gitpod/workspace-python-3.11
2
+
3
+ tasks:
4
+ - init: |
5
+ pip install -U lightning twitch-dl polars mlflow dagshub mlflow "pydantic<2.0.0" torch torchvision
6
+ sudo apt install rclone
7
+
8
+ ports:
9
+ - port: 3000
10
+ onOpen: open-preview
11
+ name: Website
12
+ description: Website Preview
.pre-commit-config.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://pre-commit.com for more information
2
+ # See https://pre-commit.com/hooks.html for more hooks
3
+ repos:
4
+ - repo: https://github.com/pre-commit/pre-commit-hooks
5
+ rev: v3.2.0
6
+ hooks:
7
+ - id: trailing-whitespace
8
+ - id: end-of-file-fixer
9
+ - id: check-yaml
10
+ - id: check-added-large-files
11
+ - repo: https://github.com/astral-sh/ruff-pre-commit
12
+ # Ruff version.
13
+ rev: v0.1.13
14
+ hooks:
15
+ # Run the linter.
16
+ - id: ruff
17
+ args: [ --fix ]
18
+ # Run the formatter.
19
+ - id: ruff-format
.vscode/settings.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[python]": {
3
+ "editor.defaultFormatter": "ms-python.black-formatter"
4
+ },
5
+ "python.analysis.typeCheckingMode": "basic",
6
+ "python.analysis.autoImportCompletions": true,
7
+ "python.analysis.packageIndexDepths": [
8
+ {
9
+ "name": "",
10
+ "depth": 5
11
+ }
12
+ ],
13
+ "python.analysis.diagnosticSeverityOverrides": {
14
+ "reportPrivateImportUsage": "none"
15
+ },
16
+ }
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM mcr.microsoft.com/vscode/devcontainers/miniconda:latest
2
+
3
+ COPY env.yml .
4
+ RUN conda env create -f env.yml
5
+ RUN conda clean -a -y
6
+
7
+ EXPOSE 8765
8
+
9
+ COPY . /app
10
+ WORKDIR /app
11
+
12
+ RUN conda init
13
+ RUN echo "source activate highlights" > ~/.bashrc
14
+ ENV PATH /opt/conda/envs/highlights/bin:$PATH
15
+
16
+ CMD solara run sol_app.py --host=0.0.0.0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Hampus Londögård
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
LolHighlight.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lol_highlight_detection
2
+ League of Legend Highlight Detection
3
+
4
+ ## Running project
5
+
6
+ 1. Use `devcontainer`, should be automatic
7
+ 2. Use `conda`/`mamba` and install `env.yml`, simple enough!
8
+ 3. `solara run sol_app.py`
9
+
10
+ ## Presentation
11
+
12
+ slides.google.com
13
+
14
+ ## Resources:
15
+
16
+ 1. Fast.AI - https://docs.fast.ai/tutorial.image_sequence.html
17
+ 2. TIMM/HF - https://github.com/huggingface/pytorch-image-models
18
+ 3. HF/ViT - see `video_classification.ipynb`
19
+ 4. VideoMAE - https://huggingface.co/docs/transformers/model_doc/videomae
20
+ 5. Keras - https://keras.io/examples/vision/video_classification/ (keras-core??)
21
+ 6. TF - https://www.tensorflow.org/tutorials/video/video_classification
22
+ 7. Papers - https://paperswithcode.com/search?q_meta=&q_type=&q=videomae
23
+ 8. Hiera - https://github.com/facebookresearch/hiera
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import shutil
3
+ import streamlit as st
4
+ import r2
5
+ from streamlit_app import page_inference
6
+
7
+ from streamlit_app.page_download import download_convert_persist
8
+
9
+
10
+ def sidebar():
11
+ with st.sidebar:
12
+ r2_config = st.file_uploader("Upload R2 Config")
13
+ if r2_config is not None:
14
+ open("rclone.conf", "w").write(r2_config)
15
+
16
+
17
+ def download_if_missing():
18
+ Path("ckpts/timm").mkdir(exist_ok=True, parents=True)
19
+ if len(list(Path("ckpts/timm").glob("*"))) == 0:
20
+ with st.spinner("Download model"):
21
+ r2.download("models/ckpts/timm/tf_efficientnet_b3.aa_in1k.ckpt")
22
+ shutil.move("tf_efficientnet_b3.aa_in1k.ckpt", "ckpts/timm")
23
+
24
+
25
+ def main():
26
+ sidebar()
27
+ st.header("League of Legend Highlight Extractor")
28
+ download_if_missing()
29
+ mode = st.selectbox(
30
+ "Select Mode", ["Inference", "Download, Convert and Persist Twitch Clips"]
31
+ )
32
+
33
+ if mode == "Download, Convert and Persist Twitch Clips":
34
+ download_convert_persist()
35
+ else:
36
+ page_inference.inference_page()
37
+
38
+
39
+ if __name__ == "__main__":
40
+ main()
data_utils/frame_datamodule.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import lightning as L
2
+ import numpy as np
3
+ from torch.utils.data import DataLoader, Subset, Dataset
4
+
5
+ from data_utils.splitter import chunk_splitter
6
+
7
+
8
+ class FrameDataModule(L.LightningDataModule):
9
+ def __init__(
10
+ self,
11
+ dataset: Dataset,
12
+ batch_size: int = 32,
13
+ chunk_size_for_splitting: int = 3 * 30,
14
+ num_workers: int = 2,
15
+ pin_memory: bool = False,
16
+ ):
17
+ super().__init__()
18
+ self.dataset = dataset
19
+ self.batch_size = batch_size
20
+ self.num_workers = num_workers
21
+ self.pin_memory = pin_memory
22
+ self.chunk_size_for_splitting = chunk_size_for_splitting
23
+ split = chunk_splitter(
24
+ len(dataset), chunk_size=self.chunk_size_for_splitting, split=0.15 # type: ignore
25
+ )
26
+ val_indices = np.where(split)[0]
27
+ train_indices = np.where(split == 0)[0]
28
+ self.ds_train = Subset(self.dataset, train_indices) # type: ignore
29
+ self.ds_val = Subset(self.dataset, val_indices) # type: ignore
30
+
31
+ def train_dataloader(self):
32
+ return DataLoader(
33
+ self.ds_train,
34
+ shuffle=True,
35
+ batch_size=self.batch_size,
36
+ num_workers=self.num_workers,
37
+ pin_memory=self.pin_memory,
38
+ )
39
+
40
+ def val_dataloader(self):
41
+ return DataLoader(
42
+ self.ds_val,
43
+ batch_size=self.batch_size,
44
+ num_workers=self.num_workers,
45
+ pin_memory=self.pin_memory,
46
+ )
data_utils/frame_dataset.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchvision.transforms import Compose
2
+ import torch
3
+ from torch.utils.data import Dataset
4
+ from torchvision.datasets.folder import default_loader
5
+ import polars as pl
6
+
7
+
8
+ class FrameDataset(Dataset):
9
+ def __init__(
10
+ self,
11
+ df: pl.DataFrame,
12
+ augments: Compose,
13
+ frames_per_clip: int,
14
+ stride: int | None = None,
15
+ is_train: bool = True,
16
+ ):
17
+ super().__init__()
18
+ self.paths = df["path"].to_list()
19
+ self.is_train = is_train
20
+ if is_train:
21
+ self.y = torch.tensor(df["label"])
22
+ self.frames_per_clip = frames_per_clip
23
+ self.augments = augments
24
+ self.stride = stride or frames_per_clip
25
+
26
+ def __len__(self):
27
+ return len(self.paths) // self.stride
28
+
29
+ def __getitem__(self, idx):
30
+ start = idx * self.stride
31
+ stop = start + self.frames_per_clip
32
+ if stop - start <= 1:
33
+ path = self.paths[start]
34
+ frames_tr = self._open_augment_img(path)
35
+ if self.is_train:
36
+ y = self.y[start]
37
+ else:
38
+ frames = [self._open_augment_img(path) for path in self.paths[start:stop]]
39
+ frames_tr = torch.stack(frames)
40
+ if self.is_train:
41
+ y = self.y[start:stop].max()
42
+ if self.is_train:
43
+ return frames_tr, y
44
+ else:
45
+ return frames_tr
46
+
47
+ def _open_augment_img(self, path):
48
+ img = default_loader(path)
49
+ img = self.augments(img)
50
+ return img
data_utils/rebalancing.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+
3
+
4
+ def balance_labels(df: pl.DataFrame, fraction: float = 0.5) -> pl.DataFrame:
5
+ prev_num_labels, prev_total = df["label"].sum(), len(df)
6
+ to_remove = df.filter(pl.col("label") == 0).sample(fraction=fraction)
7
+ df = df.join(to_remove, on="path", how="anti")
8
+
9
+ print(
10
+ f"Previously {prev_num_labels / prev_total:.1%} highlights, now {df['label'].sum() / len(df):.1%}"
11
+ )
12
+
13
+ return df
data_utils/splitter.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.model_selection import train_test_split
3
+
4
+
5
+ def chunk_splitter(total_size: int, chunk_size: int, split: int | float) -> np.array:
6
+ _, val_idxs = train_test_split(
7
+ np.arange(total_size // chunk_size), test_size=split, random_state=42
8
+ ) # ignoring final unsized chunk
9
+ is_valid = np.zeros(total_size, dtype="int")
10
+
11
+ for index in val_idxs:
12
+ index *= chunk_size
13
+ is_valid[index : index + chunk_size] = 1
14
+
15
+ return is_valid
env.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: highlights
2
+ channels:
3
+ - conda-forge
4
+ - pytorch
5
+ dependencies:
6
+ - python=3.11
7
+ - lightning
8
+ - polars
9
+ - mlflow
10
+ - pydantic<2.0.0
11
+ - solara
12
+ - pytorch
13
+ - torchvision
14
+ - transformers
15
+ - moviepy
16
+ - plotly
17
+ - streamlit
18
+ - rclone
19
+ - shap
20
+ - timm
21
+ - pre-commit
22
+ - pip:
23
+ - requests_html
24
+ - twitch-dl
highights_nb.ipynb ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "!pip install solara"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 1,
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "data": {
19
+ "application/vnd.jupyter.widget-view+json": {
20
+ "model_id": "a7070c5e0d31446cb0096d9a0abe044d",
21
+ "version_major": 2,
22
+ "version_minor": 0
23
+ },
24
+ "text/html": [
25
+ "Cannot show widget. You probably want to rerun the code cell above (<i>Click in the code cell, and press Shift+Enter <kbd>⇧</kbd>+<kbd>↩</kbd></i>)."
26
+ ],
27
+ "text/plain": [
28
+ "Cannot show ipywidgets in text"
29
+ ]
30
+ },
31
+ "metadata": {},
32
+ "output_type": "display_data"
33
+ }
34
+ ],
35
+ "source": [
36
+ "from sol_app import Page\n",
37
+ "\n",
38
+ "Page()"
39
+ ]
40
+ }
41
+ ],
42
+ "metadata": {
43
+ "kernelspec": {
44
+ "display_name": "Python 3",
45
+ "language": "python",
46
+ "name": "python3"
47
+ },
48
+ "language_info": {
49
+ "codemirror_mode": {
50
+ "name": "ipython",
51
+ "version": 3
52
+ },
53
+ "file_extension": ".py",
54
+ "mimetype": "text/x-python",
55
+ "name": "python",
56
+ "nbconvert_exporter": "python",
57
+ "pygments_lexer": "ipython3",
58
+ "version": "3.11.6"
59
+ }
60
+ },
61
+ "nbformat": 4,
62
+ "nbformat_minor": 2
63
+ }
highlights.tsv ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ vid_id start stop
2
+ 1863051677 00:11:56 00:12:20
3
+ 1863051677 00:12:33 00:13:10
4
+ 1863051677 00:14:20 00:14:32
5
+ 1863051677 00:15:21 00:15:32
6
+ 1863051677 00:16:30 00:16:47
7
+ 1863051677 00:17:45 00:18:00
8
+ 1863051677 00:22:04 00:22:15
9
+ 1863051677 00:22:20 00:22:36
10
+ 1863051677 00:23:00 00:23:22
11
+ 1863051677 00:25:15 00:25:36
12
+ 1863051677 00:25:37 00:26:25
13
+ 1863051677 00:27:08 00:27:16
14
+ 1863051677 00:28:00 00:28:22
15
+ 1863051677 00:30:08 00:30:45
16
+ 1863051677 00:31:00 00:31:20
17
+ 1863051677 00:54:15 00:54:34
18
+ 1863051677 00:55:26 00:55:50
19
+ 1863051677 00:57:17 00:57:32
20
+ 1863051677 00:58:38 00:58:54
21
+ 1863051677 00:58:34 00:59:44
22
+ 1863051677 01:00:28 01:00:43
23
+ 1863051677 01:01:15 01:01:30
24
+ 1863051677 01:02:55 01:03:28
25
+ 1863051677 01:05:20 01:05:42
26
+ 1863051677 01:08:20 01:09:22
27
+ 1863051677 01:10:15 01:10:55
28
+ 1863051677 01:11:20 01:11:38
29
+ 1863051677 01:13:28 01:14:00
30
+ 1863051677 01:15:07 01:15:48
31
+ 1863051677 01:16:58 01:17:38
32
+ 1863051677 01:17:53 01:18:24
33
+ 1863051677 01:19:32 01:20:00
34
+ 1863051677 01:33:00 01:34:00
35
+ 1863051677 01:35:05 01:35:27
36
+ 1863051677 01:36:40 01:36:57
37
+ 1863051677 01:38:37 01:39:00
38
+ 1863051677 01:37:55 01:40:22
39
+ 1863051677 01:41:40 01:41:55
40
+ 1863051677 01:42:40 01:43:06
41
+ 1863051677 01:43:20 01:43:29
42
+ 1863051677 01:43:36 01:43:45
43
+ 1863051677 01:45:48 01:46:19
44
+ 1863051677 01:47:03 01:47:22
45
+ 1863051677 01:50:00 01:50:08
46
+ 1863051677 01:51:35 01:52:10
47
+ 1863051677 01:53:45 01:53:55
48
+ 1863051677 01:54:40 01:55:23
49
+ 1863051677 01:56:25 01:56:48
50
+ 1863051677 01:57:28 01:57:38
51
+ 1863051677 01:59:15 01:59:26
52
+ 1863051677 02:14:07 02:14:22
53
+ 1863051677 02:16:05 02:16:15
54
+ 1863051677 02:17:55 02:18:05
55
+ 1863051677 02:18:47 02:18:57
56
+ 1863051677 02:19:53 02:20:02
57
+ 1863051677 02:24:20 02:24:30
58
+ 1863051677 02:25:30 02:25:55
59
+ 1863051677 02:27:55 02:28:03
60
+ 1863051677 02:31:23 02:31:30
61
+ 1863051677 02:33:47 02:34:05
62
+ 1863051677 02:35:55 02:36:07
63
+ 1863051677 02:37:35 02:37:43
64
+ 1863051677 02:38:45 02:38:55
65
+ 1863051677 02:48:25 02:48:55
66
+ 1863051677 02:50:30 02:50:47
67
+ 1863051677 02:51:18 02:51:45
68
+ 1863051677 02:52:15 02:52:43
69
+ 1863051677 02:54:25 02:54:40
70
+ 1863051677 02:55:40 02:55:57
71
+ 1863051677 02:58:08 02:58:14
72
+ 1863051677 02:59:15 3:00:00
73
+ 1863051677 03:02:00 03:02:08
74
+ 1863051677 03:02:55 03:03:05
75
+ 1863051677 03:03:16 03:03:52
76
+ 1863051677 03:04:20 03:04:58
77
+ 1863051677 03:06:50 03:07:21
78
+ 1863051677 03:09:08 03:09:40
79
+ 1863051677 03:10:17 03:11:07
80
+ 1863051677 03:21:04 03:21:17
81
+ 1863051677 03:21:55 03:22:08
82
+ 1863051677 03:22:42 03:22:49
83
+ 1863051677 03:24:05 03:24:20
84
+ 1863051677 03:26:34 03:26:45
85
+ 1863051677 03:29:20 03:29:28
86
+ 1863051677 03:30:31 03:30:38
87
+ 1863051677 03:32:20 03:32:33
88
+ 1863051677 03:34:40 03:34:54
89
+ 1863051677 03:36:15 03:36:25
90
+ 1863051677 03:37:20 03:37:29
91
+ 1863051677 03:38:22 03:38:56
92
+ 1863051677 03:39:15 03:39:27
93
+ 1863051677 03:39:35 03:39:43
94
+ 1863051677 03:39:47 03:39:53
95
+ 1863051677 03:41:28 03:41:42
96
+ 1863051677 03:43:22 03:43:41
97
+ 1863051677 03:44:49 03:45:01
98
+ 1863051677 03:46:42 03:47:07
99
+ 1863051677 03:49:30 03:49:46
100
+ 1863051677 03:50:42 03:51:02
101
+ 1863051677 03:53:15 03:53:25
102
+ 1863051677 04:06:45 04:07:09
103
+ 1863051677 04:08:12 04:08:27
104
+ 1863051677 04:09:20 04:09:28
105
+ 1863051677 04:10:20 04:10:47
106
+ 1863051677 04:11:55 04:12:05
107
+ 1863051677 04:12:55 04:13:08
108
+ 1863051677 04:13:17 04:13:23
109
+ 1863051677 04:14:15 04:14:30
110
+ 1863051677 04:15:38 04:15:45
111
+ 1863051677 04:17:40 04:17:47
112
+ 1863051677 04:18:33 04:18:40
113
+ 1863051677 04:19:26 04:19:37
114
+ 1863051677 04:20:13 04:20:21
115
+ 1863051677 04:21:57 04:22:13
116
+ 1863051677 04:22:20 04:22:31
117
+ 1863051677 04:22:50 04:22:58
118
+ 1863051677 04:24:35 04:24:45
119
+ 1863051677 04:26:03 04:26:16
120
+ 1863051677 04:27:03 04:27:10
121
+ 1863051677 04:27:26 04:27:42
122
+ 1863051677 04:29:45 04:30:16
123
+ 1863051677 04:31:27 04:31:45
124
+ 1863051677 04:32:58 04:33:10
125
+ 1863051677 04:33:36 04:33:47
126
+ 1863051677 04:34:22 04:34:28
127
+ 1863051677 04:35:55 04:36:17
128
+ 1863051677 04:38:40 04:39:04
129
+ 1863051677 04:46:39 04:46:46
130
+ 1863051677 04:49:19 04:49:28
131
+ 1863051677 04:51:20 04:51:35
132
+ 1863051677 04:52:37 04:52:48
133
+ 1863051677 04:54:20 04:54:39
134
+ 1863051677 04:56:18 04:56:23
135
+ 1863051677 04:57:30 04:57:48
136
+ 1863051677 04:59:05 04:59:21
137
+ 1863051677 05:01:10 05:01:25
138
+ 1863051677 05:02:59 05:03:20
139
+ 1863051677 05:04:35 05:05:04
140
+ 1863051677 05:05:52 05:05:58
141
+ 1863051677 05:06:50 05:07:13
142
+ 1863051677 05:07:37 05:07:42
143
+ 1863051677 05:08:35 05:08:42
144
+ 1863051677 05:10:01 05:10:10
145
+ 1863051677 05:10:18 05:10:23
146
+ 1863051677 05:11:55 05:12:03
147
+ 1863051677 05:13:52 05:14:03
148
+ 1863051677 05:14:47 05:14:56
149
+ 1863051677 05:15:56 05:16:18
150
+ 1863051677 05:19:48 05:20:06
151
+ 1863051677 05:21:43 05:22:03
152
+ 1863051677 05:24:39 05:25:11
153
+ 1863051677 05:39:37 05:39:49
154
+ 1863051677 05:42:48 05:42:52
155
+ 1863051677 05:45:25 05:45:37
156
+ 1863051677 05:46:41 05:46:59
157
+ 1863051677 05:47:34 05:47:38
158
+ 1863051677 05:48:11 05:48:20
159
+ 1863051677 05:50:41 05:50:51
160
+ 1863051677 05:52:13 05:52:18
161
+ 1863051677 05:52:59 05:53:16
162
+ 1863051677 05:54:17 05:54:40
163
+ 1863051677 05:57:13 05:57:29
164
+ 1863051677 05:58:45 05:59:00
165
+ 1863051677 05:59:28 05:59:35
166
+ 1863051677 06:00:45 06:00:55
167
+ 1863051677 06:02:00 06:02:08
168
+ 1863051677 06:03:38 06:03:48
169
+ 1863051677 06:04:47 06:05:05
170
+ 1886367077 00:06:45 00:07:00
171
+ 1886367077 00:09:20 00:09:32
172
+ 1886367077 00:09:47 00:09:53
173
+ 1886367077 00:10:43 00:10:51
174
+ 1886367077 00:12:12 00:12:18
175
+ 1886367077 00:13:14 00:13:19
176
+ 1886367077 00:14:07 00:14:25
177
+ 1886367077 00:16:52 00:17:03
178
+ 1886367077 00:19:00 00:19:04
179
+ 1886367077 00:20:10 00:20:20
180
+ 1886367077 00:21:35 00:21:45
181
+ 1886367077 00:22:56 00:23:10
182
+ 1886367077 00:24:19 00:24:34
183
+ 1886367077 00:26:24 00:26:42
184
+ 1886367077 00:27:55 00:28:05
185
+ 1886367077 00:28:57 00:29:25
186
+ 1886367077 00:30:40 00:31:04
187
+ 1886367077 00:31:09 00:31:15
188
+ 1886367077 00:32:33 00:32:48
189
+ 1886367077 00:34:06 00:34:21
190
+ 1886367077 00:35:30 00:35:45
191
+ 1886367077 00:38:45 00:39:10
192
+ 1886367077 00:48:40 00:49:00
193
+ 1886367077 00:50:38 00:50:56
194
+ 1886367077 00:52:02 00:52:24
195
+ 1886367077 00:56:30 00:56:53
196
+ 1886367077 00:58:48 00:59:15
197
+ 1886367077 01:01:10 01:01:40
198
+ 1886367077 01:03:03 01:03:40
199
+ 1886367077 01:05:53 01:06:10
200
+ 1886367077 01:07:17 01:07:42
201
+ 1886367077 01:08:20 01:08:55
202
+ 1886367077 01:09:20 01:09:41
203
+ 1886367077 01:11:42 01:12:27
204
+ 1863359610 00:20:15 00:20:25
205
+ 1863359610 00:23:12 00:23:20
206
+ 1863359610 00:24:10 00:24:21
207
+ 1863359610 00:25:08 00:25:25
208
+ 1863359610 00:27:35 00:27:45
209
+ 1863359610 00:28:44 00:29:00
210
+ 1863359610 00:29:39 00:30:15
211
+ 1863359610 00:31:16 00:31:25
212
+ 1863359610 00:33:44 00:33:54
213
+ 1863359610 00:37:05 00:37:14
214
+ 1863359610 00:39:00 00:39:10
215
+ 1863359610 00:40:35 00:41:15
216
+ 1863359610 00:41:56 00:42:06
217
+ 1863359610 00:54:36 00:54:42
218
+ 1863359610 00:55:37 00:55:51
219
+ 1863359610 00:57:35 00:57:42
220
+ 1863359610 00:59:06 00:59:15
221
+ 1863359610 01:01:25 01:01:31
222
+ 1863359610 01:02:22 01:02:40
223
+ 1863359610 01:03:25 01:03:35
224
+ 1863359610 01:05:34 01:05:40
225
+ 1863359610 01:07:23 01:07:41
226
+ 1863359610 01:08:04 01:08:22
227
+ 1863359610 01:09:48 01:10:03
228
+ 1863359610 01:11:15 01:11:31
229
+ 1863359610 01:11:55 01:12:22
230
+ 1863359610 01:14:48 01:15:15
231
+ 1863359610 01:16:20 01:16:32
232
+ 1863359610 01:17:22 01:17:32
233
+ 1863359610 01:18:25 01:18:46
234
+ 1863359610 01:19:33 01:20:07
235
+ 1863359610 01:27:31 01:27:47
236
+ 1863359610 01:30:40 01:30:46
237
+ 1863359610 01:31:16 01:31:36
238
+ 1863359610 01:32:32 01:33:22
239
+ 1863359610 01:34:02 01:34:10
240
+ 1863359610 01:34:48 01:35:00
241
+ 1863359610 01:48:09 01:48:18
242
+ 1863359610 01:50:12 01:50:25
243
+ 1863359610 01:55:32 01:55:50
244
+ 1863359610 01:58:58 01:59:16
245
+ 1863359610 02:00:58 02:01:10
246
+ 1863359610 02:04:10 02:04:28
247
+ 1863359610 02:06:22 02:06:32
248
+ 1863359610 02:07:27 02:07:32
249
+ 1863359610 02:08:04 02:08:17
250
+ 1863359610 02:11:17 02:11:39
251
+ 1863359610 02:11:57 02:12:03
252
+ 1863359610 02:14:14 02:14:35
253
+ 1863359610 02:16:40 02:16:52
254
+ 1863359610 02:17:45 02:18:10
255
+ 1863359610 02:33:28 02:33:47
256
+ 1863359610 02:35:15 02:35:29
257
+ 1863359610 02:37:15 02:37:42
258
+ 1863359610 02:38:51 02:39:03
259
+ 1863359610 02:40:21 02:40:31
260
+ 1863359610 02:41:51 02:42:10
261
+ 1863359610 02:42:34 02:42:47
262
+ 1863359610 02:44:06 02:44:36
263
+ 1863359610 02:48:02 02:48:25
264
+ 1863359610 02:48:48 02:48:58
inference.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import numpy as np
3
+ from torch.utils.data import DataLoader
4
+ import polars as pl
5
+ import lightning as L
6
+ from data_utils.frame_dataset import FrameDataset
7
+ import torch
8
+
9
+ from models.lightning_wrapper import LightningWrapper
10
+
11
+
12
+ def run_inference(
13
+ model_path: Path,
14
+ image_folder: Path,
15
+ aggregate_duration: int = 30,
16
+ fps: int = 3,
17
+ ) -> pl.DataFrame:
18
+ model = LightningWrapper.load_from_checkpoint(model_path)
19
+ trainer = L.Trainer()
20
+
21
+ paths = list(image_folder.rglob("*.jpg"))
22
+ df = pl.DataFrame(
23
+ {"path": paths, "frame": [int(p.stem.removeprefix("img")) for p in paths]}
24
+ ).sort("frame")
25
+
26
+ ds = FrameDataset(df, model.get_transforms(is_training=False), 1, is_train=False)
27
+ dls = DataLoader(ds, batch_size=32, num_workers=2, pin_memory=True)
28
+
29
+ preds_list: list[torch.Tensor] = trainer.predict(model, dataloaders=dls) # type: ignore
30
+ preds = torch.cat(preds_list)
31
+ pred_class = torch.argmax(preds, dim=1)
32
+ preds_class = np.repeat(pred_class.numpy(), ds.frames_per_clip)
33
+
34
+ df = df.with_columns(preds=pl.Series(preds_class))
35
+
36
+ df_g = df.group_by(pl.col("frame") // (aggregate_duration * fps)).agg(
37
+ pl.sum("preds")
38
+ )
39
+ seconds = pl.col("frame")
40
+ df_g = (
41
+ df_g.with_columns(pl.col("frame") * aggregate_duration)
42
+ .with_columns(
43
+ hour=seconds // (60 * 60), minute=(seconds // 60) % 60, second=seconds % 60
44
+ )
45
+ .with_columns(
46
+ timestamp=pl.datetime(
47
+ year=2023,
48
+ month=12,
49
+ day=10,
50
+ hour=pl.col("hour"),
51
+ minute="minute",
52
+ second="second",
53
+ )
54
+ )
55
+ .sort("timestamp")
56
+ )
57
+
58
+ return df_g
ingest.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ from pathlib import Path
3
+
4
+
5
+ def download_twitch_stream(TWITCH_ID: str, end_time: str | None = None):
6
+ out_path = Path(f"downloaded/{TWITCH_ID}.mp4")
7
+ out_path.parent.mkdir(exist_ok=True, parents=True)
8
+ if out_path.exists():
9
+ print(f"Already downloaded {TWITCH_ID}")
10
+ return
11
+
12
+ end_time = ["-e", end_time] if end_time is not None else []
13
+ subprocess.Popen(
14
+ [
15
+ "twitch-dl",
16
+ "download",
17
+ TWITCH_ID,
18
+ "-q",
19
+ "720p60",
20
+ *end_time,
21
+ "--output",
22
+ str(out_path),
23
+ ],
24
+ ).communicate()
25
+ return True
26
+
27
+
28
+ def vid_to_frames(TWITCH_ID: str, use_cuda: bool = True, frames: int = 3):
29
+ in_path = Path(f"downloaded/{TWITCH_ID}.mp4")
30
+ out_path = Path(f"converted/{TWITCH_ID}")
31
+ if out_path.exists():
32
+ print(f"Already converted {TWITCH_ID} to frames")
33
+ return
34
+ out_path.mkdir(parents=True, exist_ok=True)
35
+
36
+ use_cuda = ["-hwaccel", "cuda"] if use_cuda else []
37
+ subprocess.Popen(
38
+ [
39
+ "ffmpeg",
40
+ *use_cuda,
41
+ "-i",
42
+ str(in_path),
43
+ "-vf",
44
+ f"fps={frames}",
45
+ "-q:v",
46
+ "25",
47
+ f"{out_path}/img%d.jpg",
48
+ ],
49
+ ).communicate()
models/lightning_wrapper.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ import lightning as L
4
+ import torchmetrics
5
+ import timm
6
+
7
+
8
+ class LightningWrapper(L.LightningModule):
9
+ def __init__(self, timm_model: str, num_classes: int, learning_rate: float = 1e-3):
10
+ super().__init__()
11
+ self.timm_model = timm_model
12
+ self.lr = learning_rate
13
+ self.model = timm.create_model(
14
+ self.timm_model, pretrained=True, num_classes=num_classes
15
+ )
16
+ self.save_hyperparameters(ignore=["model"])
17
+
18
+ metrics = torchmetrics.MetricCollection(
19
+ {
20
+ "accuracy": torchmetrics.Accuracy(
21
+ task="multiclass", num_classes=self.model.num_classes
22
+ )
23
+ }
24
+ )
25
+
26
+ self.train_metrics = metrics.clone(prefix="train_")
27
+ self.val_metrics = metrics.clone(prefix="val_")
28
+
29
+ def get_transforms(self, is_training: bool):
30
+ data_config = timm.data.resolve_model_data_config(self.timm_model)
31
+ return timm.data.create_transform(**data_config, is_training=is_training)
32
+
33
+ def forward(self, x):
34
+ return self.model(x)
35
+
36
+ def training_step(self, batch, batch_idx):
37
+ x, y = batch
38
+ logits = self(x)
39
+ loss = F.cross_entropy(logits, y)
40
+ self.log("train_loss", loss)
41
+ self.train_metrics(logits, y)
42
+ self.log_dict(self.train_metrics, prog_bar=True)
43
+ return loss
44
+
45
+ def validation_step(self, batch, batch_idx):
46
+ x, y = batch
47
+ logits = self(x)
48
+ loss = F.cross_entropy(logits, y)
49
+ self.log("val_loss", loss)
50
+ self.val_metrics(logits, y)
51
+ self.log_dict(self.val_metrics, prog_bar=True)
52
+
53
+ def configure_optimizers(self):
54
+ optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
55
+ return optimizer
models/resnet.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ from torchvision.models import ResNet
3
+
4
+
5
+ class ResNetClassifier(nn.Module):
6
+ def __init__(self, model: ResNet, num_classes: int = 2):
7
+ super().__init__()
8
+ self.num_classes = num_classes
9
+ self.model = model
10
+ self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
11
+
12
+ def forward(self, x):
13
+ return self.model(x)
models/rnn.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision.models import ResNet
4
+
5
+
6
+ class RNNClassifier(nn.Module):
7
+ def __init__(self, model: ResNet, num_classes: int = 2):
8
+ super().__init__()
9
+ self.num_classes = num_classes
10
+ self.feature_extractor = model # repeat thrice
11
+ self.feature_extractor.fc = nn.Linear(512, 512) # New fc layer
12
+ self.rnn = nn.LSTM(
13
+ input_size=512, hidden_size=256, num_layers=1, batch_first=True
14
+ )
15
+ self.classifier = nn.Linear(256, num_classes)
16
+
17
+ def forward(self, x):
18
+ features = []
19
+
20
+ # Pass each frame through ResNet sequentially
21
+ for i in range(x.shape[1]):
22
+ frame_feat = self.feature_extractor(x[:, i])
23
+ features.append(frame_feat)
24
+
25
+ x = torch.reshape(torch.stack(features), [x.shape[0], x.shape[1], -1])
26
+
27
+ # Apply RNN
28
+ out, _ = self.rnn(x)
29
+ out = out[:, -1, :]
30
+
31
+ # Classify
32
+ out = self.classifier(out)
33
+
34
+ return out
r2.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import subprocess
3
+
4
+
5
+ def compress(TWITCH_ID: str) -> str:
6
+ file = f"{TWITCH_ID}.tar.lz4"
7
+ subprocess.Popen(["tar", "-clvf", file, TWITCH_ID]).communicate()
8
+
9
+ return file
10
+
11
+
12
+ def upload(file: str, prefix: str = "frames/"):
13
+ subprocess.Popen(
14
+ [
15
+ "rclone",
16
+ "--config",
17
+ "rclone.conf",
18
+ "copy",
19
+ file,
20
+ f"r2:lol-highlights-eu/{prefix}/",
21
+ ]
22
+ ).communicate()
23
+
24
+
25
+ def download(file: str, out_folder: str = "."):
26
+ if not Path(file).exists():
27
+ print(
28
+ subprocess.Popen(
29
+ [
30
+ "rclone",
31
+ "--config",
32
+ "rclone.conf",
33
+ "copy",
34
+ f"r2:lol-highlights-eu/{file}",
35
+ out_folder,
36
+ ]
37
+ ).communicate()
38
+ )
39
+ return file
40
+
41
+
42
+ def list_files(directory: str) -> list[str]:
43
+ out, _ = subprocess.Popen(
44
+ [
45
+ "rclone",
46
+ "--config",
47
+ "rclone.conf",
48
+ "ls",
49
+ "--exclude",
50
+ "*.jpg",
51
+ f"r2:lol-highlights-eu/{directory}",
52
+ ],
53
+ stdout=subprocess.PIPE,
54
+ ).communicate()
55
+ out = [x.strip().split(" ")[-1] for x in out.decode("utf-8").split("\n") if len(x)]
56
+ return out
57
+
58
+
59
+ def decompress(file: str):
60
+ subprocess.Popen(["tar", "-xvf", file]).communicate()
61
+
62
+
63
+ def download_frames_and_unpack(filename: str):
64
+ download(f"frames/{filename}")
65
+ decompress(filename)
66
+ Path(filename).unlink()
sol_app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import solara
3
+ import solara.lab
4
+ from solara_app import folders, sol_utils
5
+ from solara_app.mini_components.simple import Progress
6
+ from solara_app.page_download import DownloadConvertPersist
7
+ from solara_app.page_inference import Inference
8
+ from solara_app.page_models import DownloadModels
9
+
10
+ PROMPT = """<|system|>
11
+ You are a chatbot who help write successful titles for Youtube videos of League of Legends Highlights from TheBaus that are generated using AI!</s>
12
+ <|user|>
13
+ Write me a title that fits a video of TheBaus who wins games as Sion even through having a lot of deaths - good deaths.</s>
14
+ <|assistant|>"""
15
+
16
+
17
+ @solara.component
18
+ def SidebarUpload(selected_page: solara.Reactive[str]):
19
+ with solara.Sidebar():
20
+ solara.Title("League of Legend Highlight Extractor")
21
+ if Path("rclone.conf").exists():
22
+ solara.Success("rclone.conf uploaded.")
23
+ else:
24
+ dump_file = sol_utils.persist_uploaded_file("rclone.conf")
25
+ solara.FileDrop(label="Drop R2 Config", lazy=False, on_file=dump_file)
26
+ solara.Error("Upload rclone.conf first!")
27
+ solara.Select(
28
+ "Select Page",
29
+ [
30
+ "Inference",
31
+ "Download, Convert and Persist Twitch Clips",
32
+ "Download Model(s)",
33
+ "Generate Video Title",
34
+ ],
35
+ value=selected_page,
36
+ )
37
+
38
+
39
+ @solara.component
40
+ def Page():
41
+ folders.create_default_folders()
42
+
43
+ selected_page = solara.use_reactive("Inference")
44
+ SidebarUpload(selected_page)
45
+
46
+ if not Path("rclone.conf").exists():
47
+ solara.Error("Upload rclone.conf first!")
48
+ else:
49
+ if selected_page.value == "Inference":
50
+ Inference()
51
+ elif selected_page.value == "Download, Convert and Persist Twitch Clips":
52
+ DownloadConvertPersist()
53
+ elif selected_page.value == "Download Model(s)":
54
+ DownloadModels()
55
+ elif selected_page.value == "Generate Video Title":
56
+ solara.Markdown(
57
+ """
58
+ ## Title Generator
59
+
60
+ Generate a title using a Large Language Model (**LLM**).
61
+ """
62
+ )
63
+ solara.InputText(
64
+ "What should title be based on?",
65
+ "TheBaus is a famous streamer who usually plays Sion, this highlight sections show-cases both (good) deaths and wins!",
66
+ )
67
+ from transformers import pipeline
68
+
69
+ title = solara.use_reactive(None)
70
+ clicks = solara.use_reactive(0)
71
+
72
+ def gen_title():
73
+ pipe = pipeline(
74
+ "text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
75
+ )
76
+ out = pipe(PROMPT)
77
+ title.value = out[0]["generated_text"].replace(PROMPT, "")
78
+
79
+ solara.Button("Generate!", on_click=lambda: clicks.set(clicks.value + 1))
80
+
81
+ if clicks.value > 0:
82
+ res = solara.use_thread(gen_title)
83
+ if res.state == solara.ResultState.RUNNING:
84
+ Progress("Running...")
85
+ if title.value:
86
+ solara.Markdown("Title:")
87
+ solara.Text(title.value)
solara_app/__init__.py ADDED
File without changes
solara_app/css.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ JUSTIFY_CENTER = {"justify-content": "center"}
2
+ ALIGN_CENTER = {"align-items": "center"}
solara_app/folders.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+
4
+ TMP = Path("tmp")
5
+ OUT = Path("out")
6
+ DOWNLOADED = Path("downloaded")
7
+ CONVERTED = Path("converted")
8
+ CHECKPOINTS = Path("ckpts")
9
+ _ALL_PATHS = [TMP, OUT, DOWNLOADED, CONVERTED, CHECKPOINTS]
10
+
11
+
12
+ def create_default_folders():
13
+ for path in _ALL_PATHS:
14
+ path.mkdir(parents=True, exist_ok=True)
solara_app/infer.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import solara
3
+ import polars as pl
4
+
5
+ from inference import run_inference
6
+ from utils.movie_clips import build_video
7
+
8
+
9
+ @solara.memoize
10
+ def solara_run_inference(
11
+ model_path: Path,
12
+ image_folder: Path,
13
+ aggregate_duration: int = 30,
14
+ fps: int = 3,
15
+ ) -> pl.DataFrame:
16
+ return run_inference(model_path, image_folder, aggregate_duration, fps)
17
+
18
+
19
+ @solara.memoize(key=lambda _, _2, higlight_vid: higlight_vid)
20
+ def convert_vid(
21
+ file_name: str | Path, time_dict: list[dict[str, str]], highlight_vid: Path
22
+ ):
23
+ return build_video(file_name, time_dict, highlight_vid)
solara_app/mini_components/c_inference.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import solara
2
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
3
+ import torch
4
+
5
+ CODEC = {"codec": "h264_nvenc"} if torch.cuda.is_available() else {}
6
+
7
+
8
+ @solara.memoize(
9
+ key=lambda _, disabled, file_name, cache_key: f"{disabled}_{file_name}{cache_key}"
10
+ )
11
+ def write_full_video(
12
+ start_stop: list[dict[str, str]], disabled: dict, file_name: str, cache_key: str
13
+ ) -> str:
14
+ vid_clip = VideoFileClip(f"downloaded/{file_name}.mp4")
15
+ clips = []
16
+ for i, tstamp in enumerate(start_stop):
17
+ if disabled.get(i):
18
+ continue
19
+ clips.append(vid_clip.subclip(tstamp["start"], tstamp["end"]))
20
+
21
+ # Concatenate the video clips with transitions
22
+ final_clip = concatenate_videoclips(clips)
23
+
24
+ # Write the final concatenated movie to a file
25
+ file = vid_clip.filename.replace("downloaded", "out")
26
+
27
+ final_clip.write_videofile(file)
28
+
29
+ return file
30
+
31
+
32
+ @solara.memoize
33
+ def write_video(start: str, stop: str, id: int, file_name: str) -> str:
34
+ vid_clip = VideoFileClip(f"downloaded/{file_name}.mp4")
35
+
36
+ clip = vid_clip.subclip(start, stop)
37
+ file = f"tmp/{file_name}_{start}_{stop}_{id}.mp4"
38
+
39
+ clip.write_videofile(file)
40
+
41
+ return file
solara_app/mini_components/simple.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+ import solara
3
+ from ipywidgets import Video as iPyVideo
4
+
5
+
6
+ @solara.component()
7
+ def Progress(msg: str):
8
+ with solara.Column(align="center", style={"justify-content": "center"}):
9
+ solara.SpinnerSolara()
10
+ solara.Markdown(msg)
11
+
12
+
13
+ @solara.component()
14
+ def ProgressDynamic(
15
+ msg: str,
16
+ result: solara.Result[Any],
17
+ dynamic_progress: solara.Reactive[str | int | float] | None = None,
18
+ ):
19
+ if result.state == solara.ResultState.RUNNING:
20
+ Progress(msg)
21
+ if dynamic_progress is not None:
22
+ progress = dynamic_progress.value
23
+ match progress:
24
+ case int():
25
+ solara.ProgressLinear(progress)
26
+ case float():
27
+ solara.ProgressLinear(int(progress * 100))
28
+ case str():
29
+ solara.Markdown(progress)
30
+
31
+
32
+ @solara.component
33
+ def Video(file_name: str, width: int = 500, autoplay: bool = False, loop: bool = False):
34
+ vid = iPyVideo.from_file(file_name, width=width, autoplay=autoplay, loop=loop)
35
+ solara.display(vid)
solara_app/page_download.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import solara
2
+ import torch
3
+ import ingest
4
+ from solara_app.mini_components.simple import Progress
5
+
6
+
7
+ @solara.component()
8
+ def DownloadConvertPersist():
9
+ twitch_id = solara.use_reactive("")
10
+ is_downloading, set_downloading = solara.use_state(False)
11
+ status, set_status = solara.use_state("")
12
+ end_time = solara.use_reactive(None)
13
+
14
+ def start_download():
15
+ set_downloading(True)
16
+ set_status("")
17
+ ingest.download_twitch_stream(twitch_id.value, end_time=end_time.value)
18
+ set_status("Converting to frames...")
19
+ ingest.vid_to_frames(twitch_id.value, use_cuda=torch.cuda.is_available())
20
+
21
+ set_status("Download completed")
22
+ set_downloading(False)
23
+
24
+ solara.InputText("Select Twitch ID", twitch_id, disabled=is_downloading)
25
+ solara.InputText("End Time (hh:mm:ss)", end_time)
26
+
27
+ solara.Markdown(f"You Selected {twitch_id.value}")
28
+ solara.Button("Download", start_download, disabled=is_downloading)
29
+
30
+ if is_downloading:
31
+ Progress("Downloading...")
32
+
33
+ solara.Text(status)
solara_app/page_inference.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from pathlib import Path
3
+ import polars as pl
4
+ import solara
5
+
6
+ from solara_app import sol_utils
7
+ from solara_app.css import ALIGN_CENTER, JUSTIFY_CENTER
8
+ from solara_app.folders import CHECKPOINTS, CONVERTED
9
+ from solara_app.infer import solara_run_inference
10
+ from solara_app.mini_components.c_inference import write_full_video, write_video
11
+ from solara_app.mini_components.simple import Progress, ProgressDynamic, Video
12
+ from utils import time_slice
13
+
14
+
15
+ def false() -> bool:
16
+ return False
17
+
18
+
19
+ @solara.component
20
+ def DfSelectComponent(df: pl.DataFrame, file: str):
21
+ extend_forward = solara.use_reactive({})
22
+ extend_backward = solara.use_reactive({})
23
+ disabled = solara.use_reactive({})
24
+ selected_vid, set_selected_vid = solara.use_state(0)
25
+ cut_off = solara.use_reactive(5)
26
+ start_stop: solara.Reactive[list[datetime.datetime]] = solara.use_reactive(
27
+ [df["timestamp"].min(), df["timestamp"].max()]
28
+ ) # type: ignore
29
+ clicks, set_clicks = solara.use_state(0)
30
+
31
+ with solara.Card(
32
+ "Highlight Selection & Editing",
33
+ "Select highlight threshold, remove or expand clips",
34
+ ):
35
+ sol_utils.CutOffChartSelection(cut_off, start_stop, df)
36
+ df = df.filter(
37
+ pl.col("timestamp").is_between(start_stop.value[0], start_stop.value[1])
38
+ )
39
+
40
+ time_df = time_slice.create_start_end_time(
41
+ df, cut_off.value, extend_forward.value, extend_backward.value
42
+ )
43
+
44
+ time_dict = time_df.select(pl.all().dt.strftime("%H:%M:%S")).to_dicts()
45
+ time_dict = solara.use_reactive(time_dict)
46
+ file_name = f"{file.replace('converted', 'downloaded')}.mp4"
47
+
48
+ if len(time_dict.value) == 0:
49
+ solara.Warning("No Highlights available...")
50
+ return
51
+
52
+ tstamp = time_dict.value[selected_vid]
53
+ res = write_video.use_thread(
54
+ tstamp["start"],
55
+ tstamp["end"],
56
+ selected_vid,
57
+ Path(file_name).stem,
58
+ )
59
+
60
+ ProgressDynamic("Building Clip...", res)
61
+
62
+ # TODO: extract into component.
63
+ if res.state == solara.ResultState.FINISHED:
64
+ with solara.Row(style={**JUSTIFY_CENTER, **ALIGN_CENTER}):
65
+ solara.Button(
66
+ "<",
67
+ disabled=selected_vid == 0,
68
+ on_click=lambda: set_selected_vid(selected_vid - 1),
69
+ )
70
+ Video(res.value)
71
+ solara.Button(
72
+ ">",
73
+ disabled=selected_vid == (len(time_dict.value) - 1),
74
+ on_click=lambda: set_selected_vid(selected_vid + 1),
75
+ )
76
+
77
+ with solara.Column(style=JUSTIFY_CENTER):
78
+ with solara.Row(style=JUSTIFY_CENTER):
79
+ solara.InputInt(
80
+ "Expand Leftwards (s)",
81
+ value=extend_backward.value.get(selected_vid, 0),
82
+ on_value=lambda v: extend_backward.set(
83
+ {**extend_backward.value, selected_vid: v}
84
+ ),
85
+ )
86
+ solara.InputInt(
87
+ "Expand Rightwards (s)",
88
+ value=extend_forward.value.get(selected_vid, 0),
89
+ on_value=lambda v: extend_forward.set(
90
+ {**extend_forward.value, selected_vid: v}
91
+ ),
92
+ )
93
+
94
+ def disable_vid(vid: int):
95
+ return lambda: disabled.set(
96
+ {**disabled.value, vid: not disabled.value.get(vid)}
97
+ )
98
+
99
+ solara.Button(
100
+ (
101
+ "✅ Add Video"
102
+ if disabled.value.get(selected_vid)
103
+ else "❌ Remove Video"
104
+ ),
105
+ on_click=disable_vid(selected_vid),
106
+ style={"width": "25%"},
107
+ )
108
+
109
+ with solara.Card("Full Video", "Build the full video!"):
110
+ solara.Button(
111
+ "Build Full Video",
112
+ color="primary",
113
+ on_click=lambda: set_clicks(clicks + 1),
114
+ )
115
+
116
+ if clicks > 0:
117
+ res_full = write_full_video.use_thread(
118
+ time_dict.value,
119
+ disabled.value,
120
+ Path(file_name).stem,
121
+ str(time_dict),
122
+ )
123
+ if res_full.state == solara.ResultState.RUNNING:
124
+ Progress("Building Full Clip...")
125
+ elif res_full.state == solara.ResultState.FINISHED:
126
+ solara.FileDownload(
127
+ lambda: open(res_full.value, "rb"), Path(res_full.value).name
128
+ )
129
+
130
+
131
+ @solara.component
132
+ def ShowDfComponent(model: str, file: str):
133
+ df = solara_run_inference.use_thread(
134
+ Path(model),
135
+ Path(file),
136
+ aggregate_duration=10,
137
+ )
138
+
139
+ if df.state == solara.ResultState.RUNNING:
140
+ Progress("Running...")
141
+ elif df.state == solara.ResultState.FINISHED and df.value is not None:
142
+ DfSelectComponent(df.value, file)
143
+
144
+
145
+ @solara.component()
146
+ def Inference():
147
+ files = [str(p) for p in CONVERTED.glob("*") if p.is_dir()]
148
+ models = [str(p) for p in CHECKPOINTS.rglob("*.ckpt")]
149
+ file = solara.use_reactive(files[0] if len(files) else None)
150
+ model = solara.use_reactive(models[0] if len(models) else None)
151
+
152
+ if model.value is None or file.value is None:
153
+ return solara.Markdown(
154
+ "**It's required to at least download one stream and have one model available!**"
155
+ )
156
+
157
+ clicked = solara.use_reactive(False)
158
+
159
+ sol_utils.ModelFileSelectComponent(file, model, clicked)
160
+
161
+ if clicked.value:
162
+ ShowDfComponent(model.value, file.value)
163
+ else:
164
+ solara.Markdown("**Start running to get further. 🚀**")
solara_app/page_models.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import solara
3
+
4
+ import r2
5
+ from solara_app.folders import CHECKPOINTS
6
+
7
+ from solara_app.mini_components.simple import Progress
8
+
9
+
10
+ @solara.component
11
+ def DownloadModels():
12
+ models = solara.use_thread(lambda: r2.list_files("models"))
13
+ selected_models: solara.Reactive[list[str]] = solara.use_reactive([])
14
+
15
+ if models.state == solara.ResultState.FINISHED:
16
+ unavailable_models: list[str] = [
17
+ m for m in (models.value or []) if not Path(m).exists()
18
+ ]
19
+ solara.SelectMultiple(
20
+ "Select model(s) to download",
21
+ selected_models,
22
+ unavailable_models, # type: ignore
23
+ )
24
+
25
+ for m in selected_models.value:
26
+ output = solara.use_thread(
27
+ lambda: r2.download(f"models/{m}", out_folder=CHECKPOINTS)
28
+ )
29
+ if output.state == solara.ResultState.RUNNING:
30
+ Progress(f"Downloading {m}...")
31
+ elif output.state == solara.ResultState.FINISHED:
32
+ solara.Success(f"Downloaded {output.value}", icon=True)
solara_app/sol_utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from typing import Any, Callable
3
+ from solara.components.file_drop import FileInfo
4
+ import solara
5
+ import polars as pl
6
+ import plotly.express as px
7
+ from dateutil import parser
8
+ from solara_app.folders import CHECKPOINTS, CONVERTED
9
+
10
+
11
+ def persist_uploaded_file(
12
+ filename: str, key: str = "data"
13
+ ) -> Callable[[FileInfo], None]:
14
+ def func(data: FileInfo) -> None:
15
+ with open(filename, "wb") as f:
16
+ f.write(data[key])
17
+
18
+ return func
19
+
20
+
21
+ @solara.component
22
+ def ModelFileSelectComponent(
23
+ file: solara.Reactive[str],
24
+ model: solara.Reactive[str],
25
+ clicked: solara.Reactive[bool],
26
+ ):
27
+ files = [str(p) for p in CONVERTED.glob("*") if p.is_dir()]
28
+ models = [str(p) for p in CHECKPOINTS.rglob("*.ckpt")]
29
+ _clicked = solara.use_reactive(clicked)
30
+ with solara.Card("Select Video/Model"):
31
+ with solara.Columns():
32
+ solara.Select(
33
+ "Select File",
34
+ values=files,
35
+ value=file,
36
+ )
37
+ solara.Select(
38
+ "Select Model",
39
+ values=models,
40
+ value=model,
41
+ )
42
+ solara.Button(
43
+ "Run Inference!",
44
+ color="primary",
45
+ on_click=lambda: _clicked.set(True),
46
+ )
47
+
48
+
49
+ @solara.component
50
+ def CutOffChartSelection(
51
+ cut_off: solara.Reactive[int],
52
+ start_stop: solara.Reactive[list[datetime.datetime]],
53
+ df: pl.DataFrame,
54
+ ):
55
+ div = solara.Column()
56
+
57
+ solara.SliderInt(
58
+ "Highlight Y-Cutoff",
59
+ cut_off,
60
+ min=df["preds"].min() + 1,
61
+ max=df["preds"].max(),
62
+ thumb_label="always",
63
+ tick_labels="end_points",
64
+ )
65
+ with div:
66
+ fig = px.line(
67
+ df, x="timestamp", y="preds", line_shape="hv", range_x=start_stop.value
68
+ )
69
+ fig.add_hline(y=cut_off.value, line_color="red")
70
+
71
+ def update_vals(relayout_dict: dict[str, Any] | None):
72
+ if relayout_dict is not None:
73
+ layout = relayout_dict["relayout_data"]
74
+ if "xaxis.range[0]" in layout:
75
+ start_stop.value = [
76
+ parser.parse(layout["xaxis.range[0]"], ignoretz=True),
77
+ parser.parse(layout["xaxis.range[1]"], ignoretz=True),
78
+ ]
79
+ else:
80
+ xaxis_range = layout["xaxis.range"]
81
+ start_stop.value = [
82
+ parser.parse(xaxis_range[0], ignoretz=True),
83
+ parser.parse(xaxis_range[1], ignoretz=True),
84
+ ]
85
+
86
+ solara.FigurePlotly(fig, on_relayout=update_vals)
streamlit_app/__init__.py ADDED
File without changes
streamlit_app/explainer.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shap
2
+
3
+
4
+ def explain(images):
5
+ topk = 4
6
+ batch_size = 50
7
+ n_evals = 10000
8
+
9
+ # define a masker that is used to mask out partitions of the input image.
10
+ masker_blur = shap.maskers.Image("blur(128,128)", Xtr[0].shape)
11
+
12
+ # create an explainer with model and image masker
13
+ explainer = shap.Explainer(
14
+ predict, masker_blur, output_names=["Nothing", "Highlight"]
15
+ )
16
+
17
+ # feed only one image
18
+ # here we explain two images using 100 evaluations of the underlying model to estimate the SHAP values
19
+ shap_values = explainer(
20
+ Xtr[1:2],
21
+ max_evals=n_evals,
22
+ batch_size=batch_size,
23
+ outputs=shap.Explanation.argsort.flip[:topk],
24
+ )
streamlit_app/page_download.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ import subprocess
4
+ import streamlit as st
5
+ import ingest
6
+ from utils import kick_dl
7
+
8
+
9
+ def download_convert_persist():
10
+ service = st.radio("Streaming service", ["Twitch", "Kick"])
11
+ if service == "Twitch":
12
+ twitch_id = st.text_input("Enter Twitch ID")
13
+ st.write(f"You Selected {twitch_id}")
14
+
15
+ if st.button("Download"):
16
+ with st.spinner():
17
+ st.write("Downloading...")
18
+ ingest.download_twitch_stream(twitch_id)
19
+ st.write("Converting...")
20
+ ingest.vid_to_frames(twitch_id, use_cuda=False)
21
+ st.success("Downloaded!")
22
+
23
+ elif service == "Kick":
24
+ kick_id = st.text_input("Enter Kick ID")
25
+ name = st.text_input("Nickname of video")
26
+ kick_id = Path(kick_id).name
27
+ API_PATH = "https://kick.com/api/v1/video/"
28
+ st.write(f"Open [this]({API_PATH}{kick_id}) and copy text into the box below.")
29
+ json_data = st.text_input("Copy and paste here.")
30
+ if len(json_data):
31
+ json_data = json.loads(json_data)["source"]
32
+
33
+ if st.button("Download"):
34
+ with st.spinner():
35
+ st.write("Downloading...")
36
+ if not Path(f"converted/{name}").exists():
37
+ subprocess.Popen(
38
+ [
39
+ "ffmpeg",
40
+ "-i",
41
+ json_data,
42
+ "-vcodec",
43
+ "copy",
44
+ "-acodec",
45
+ "copy",
46
+ f"downloaded/{name}.mp4",
47
+ ]
48
+ )
49
+
50
+ st.write("Converting...")
51
+ ingest.vid_to_frames(name, use_cuda=False)
52
+ Path(f"downloaded/{name}.mp4").unlink()
53
+
54
+ st.success("Downloaded!")
streamlit_app/page_inference.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from pathlib import Path
3
+ import inference
4
+
5
+ import plotly.express as px
6
+ import streamlit as st
7
+ import polars as pl
8
+ from utils import time_slice
9
+ from utils.movie_clips import build_video, get_vid_path
10
+
11
+
12
+ @st.cache_data
13
+ def st_run_inference(
14
+ model_path: Path,
15
+ image_folder: Path,
16
+ aggregate_duration: int = 30,
17
+ fps: int = 3,
18
+ ) -> pl.DataFrame:
19
+ return inference.run_inference(model_path, image_folder, aggregate_duration, fps)
20
+
21
+
22
+ def inference_page():
23
+ with st.form("random"):
24
+ selected_file = st.selectbox(
25
+ "Select File", [str(p) for p in Path("converted").glob("*") if p.is_dir()]
26
+ )
27
+ selected_model = st.selectbox(
28
+ "Select Model", [str(p) for p in Path("ckpts").rglob("*.ckpt")]
29
+ )
30
+ st.form_submit_button("Extract Highlights!")
31
+
32
+ df_out = st_run_inference(
33
+ Path(selected_model),
34
+ Path(selected_file),
35
+ aggregate_duration=10,
36
+ )
37
+ chart_container = st.container()
38
+ cut_off = st.slider(
39
+ "Y-Cutoff Highlight",
40
+ min_value=df_out["preds"].min() + 1,
41
+ max_value=df_out["preds"].max() + 1,
42
+ )
43
+ with st.expander("Advanced Options"):
44
+ st.write("Non available right now.")
45
+
46
+ fig = px.line(df_out, x="timestamp", y="preds", line_shape="hv")
47
+ fig.add_hline(cut_off, line_color="red", line_dash="dash")
48
+ with chart_container:
49
+ st.plotly_chart(fig)
50
+
51
+ df = time_slice.create_start_end_time(df_out, cut_off)
52
+ times_dict = time_slice.merge_overlaps_into_dict(df)
53
+ # event: datetime.time = st.select_slider(
54
+ # "Validate event", options=[x["start"] for x in times_dict]
55
+ # )
56
+
57
+ higlight_vid = get_vid_path(
58
+ f"{selected_file.replace('converted', 'downloaded')}.mp4",
59
+ times_dict,
60
+ Path("highlights"),
61
+ )
62
+
63
+ if st.button("Create highlight Video"):
64
+ with st.spinner("Creating video..."):
65
+ build_video(
66
+ f"{selected_file.replace('converted', 'downloaded')}.mp4",
67
+ times_dict,
68
+ higlight_vid,
69
+ )
70
+
71
+ if higlight_vid.exists():
72
+ st.video(str(higlight_vid))
73
+ st.info("Right Click to Download", icon="ℹ️")
utils.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import polars as pl
3
+
4
+
5
+ def build_labels(label_file: Path, fps: int = 3):
6
+ df = pl.read_parquet(label_file)
7
+ highlights = df.select(
8
+ "vid_id",
9
+ frame=pl.int_ranges(
10
+ pl.col("start").cast(pl.Duration).dt.seconds() * fps,
11
+ pl.col("stop").cast(pl.Duration).dt.seconds() * fps,
12
+ ),
13
+ label=pl.lit(1),
14
+ ).explode("frame")
15
+
16
+ dfs = []
17
+ for vid in df["vid_id"].unique():
18
+ frames = len(list(Path(str(vid)).glob("*.jpg")))
19
+ dfs.append(
20
+ pl.DataFrame({"vid_id": [vid] * frames, "frame": np.arange(1, frames + 1)})
21
+ )
22
+
23
+ labeled_df = pl.concat(dfs)
24
+ labeled_df = labeled_df.join(
25
+ highlights, on=["vid_id", "frame"], how="left"
26
+ ).fill_null(0)
27
+ labeled_df = labeled_df.with_columns(
28
+ path=pl.concat_str(
29
+ [
30
+ pl.col("vid_id").cast(pl.Utf8) + "/img",
31
+ pl.col("frame").cast(pl.Utf8) + ".jpg",
32
+ ]
33
+ )
34
+ )
35
+ labeled_df = labeled_df.sort("vid_id", "frame")
36
+ labeled_df.head(2)
utils/__init__.py ADDED
File without changes
utils/kick_dl.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ def download(self, url: str, output_path: str) -> None:
5
+ source_url = self._get_source_url(url)
6
+
7
+ if not source_url:
8
+ raise Exception("could not find a source url for given broadcast")
9
+
10
+ command = f"ffmpeg -i {source_url} -vcodec copy -acodec copy {output_path}"
11
+ os.system(command)
utils/movie_clips.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Dict, List
3
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
4
+
5
+
6
+ def get_vid_path(
7
+ orig_vid: str | Path, timestamps: List[Dict[str, str]], out: Path
8
+ ) -> Path:
9
+ out.mkdir(parents=True, exist_ok=True)
10
+ vid_name = Path(orig_vid).name
11
+ out_path = out / (vid_name + f"_{hash(str(timestamps))}.mp4")
12
+
13
+ return out_path
14
+
15
+
16
+ def build_video(orig_vid: str | Path, timestamps: List[Dict[str, str]], out_path: Path):
17
+ # timestamps = [{"start": "00:01:23", "end": "00:02:45"}]
18
+ if out_path.exists():
19
+ return out_path
20
+
21
+ video_clips = []
22
+ video = VideoFileClip(orig_vid)
23
+
24
+ # Extract video clips for each timestamp event
25
+ for timestamp in timestamps:
26
+ clip = video.subclip(timestamp["start"], timestamp["end"])
27
+ video_clips.append(clip)
28
+
29
+ # Concatenate the video clips with transitions
30
+ final_clip = concatenate_videoclips(video_clips)
31
+
32
+ # Write the final concatenated movie to a file
33
+ final_clip.write_videofile(str(out_path))
34
+ return out_path
utils/time_slice.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import polars as pl
3
+
4
+ SECONDS_10 = pl.duration(seconds=10)
5
+ RANDOM_DATE = pl.date(2023, 1, 1).dt
6
+
7
+
8
+ def create_start_end_time(
9
+ df: pl.DataFrame, cut_off: int, forward: dict, backward: dict
10
+ ) -> pl.DataFrame:
11
+ df = df.filter(pl.col("preds") >= cut_off).select(
12
+ start=pl.col("timestamp"),
13
+ end=pl.col("timestamp") + pl.duration(seconds=10),
14
+ )
15
+ if len(df) == 0:
16
+ return df
17
+
18
+ new_data = df[0].to_dicts()
19
+ for row in df[1:].to_dicts():
20
+ if new_data[-1]["end"] == row["start"]:
21
+ new_data[-1]["end"] = row["end"]
22
+ else:
23
+ new_data.append(row)
24
+ new_data = [
25
+ {
26
+ "start": d["start"] - datetime.timedelta(seconds=backward.get(i, 0)),
27
+ "end": d["end"] + datetime.timedelta(seconds=forward.get(i, 0)),
28
+ }
29
+ for i, d in enumerate(new_data)
30
+ ]
31
+ return pl.DataFrame(new_data)
32
+
33
+
34
+ def merge_overlaps_into_dict(df: pl.DataFrame):
35
+ if len(df) == 0:
36
+ return []
37
+
38
+ data = df.cast(pl.Time).cast(pl.Utf8).to_dicts()
39
+ new_data = [data[0]]
40
+ for row in data[1:]:
41
+ if new_data[-1]["end"] == row["start"]:
42
+ new_data[-1]["end"] = row["end"]
43
+ else:
44
+ new_data.append(row)
45
+ return new_data