Spaces:

lunde
/

solara-esport-highlights

Sleeping

lunde commited on Jul 11, 2023

Commit

bd65e34

0 Parent(s):

Initial commit

initial commit

downscaling and initial labeling

WIP

WIP

fix: make modules work better

WIP: refactor

Create FUNDING.yml

wip: present

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

WIP

wip

fix: deps lock

bump

add solara

WIP

fix: more options

fix: devcontainer

fix: simplify app

fix: improve devconatiner

WIP

WIP

WIP

WIP

WIP

feat: finished app

fix: inference more than 2k frames

WIP: finalizing and prettifying

Swap tab order

fix: empty file/model dir

fix: allow model downloads from r2

fix: add out_folder to download

fix: add end_time

WIP

wip

WIP

fix: env

WIP

fix: make Dockerfile work by default

fix: pre-commit

fix: improve UI looks and clarify divider, minor UX improvements too

fix: minor UI change

WIP: Adding plotly callback to enable interactive time selection

wip

fix: update

Files changed (46) hide show

.devcontainer/devcontainer.json +28 -0
.dockerignore +9 -0
.github/FUNDING.yml +13 -0
.gitignore +180 -0
.gitpod.yml +12 -0
.pre-commit-config.yaml +19 -0
.vscode/settings.json +16 -0
Dockerfile +16 -0
LICENSE +21 -0
LolHighlight.ipynb +0 -0
README.md +23 -0
__init__.py +0 -0
app.py +40 -0
data_utils/frame_datamodule.py +46 -0
data_utils/frame_dataset.py +50 -0
data_utils/rebalancing.py +13 -0
data_utils/splitter.py +15 -0
env.yml +24 -0
highights_nb.ipynb +63 -0
highlights.tsv +264 -0
inference.py +58 -0
ingest.py +49 -0
models/lightning_wrapper.py +55 -0
models/resnet.py +13 -0
models/rnn.py +34 -0
r2.py +66 -0
sol_app.py +87 -0
solara_app/__init__.py +0 -0
solara_app/css.py +2 -0
solara_app/folders.py +14 -0
solara_app/infer.py +23 -0
solara_app/mini_components/c_inference.py +41 -0
solara_app/mini_components/simple.py +35 -0
solara_app/page_download.py +33 -0
solara_app/page_inference.py +164 -0
solara_app/page_models.py +32 -0
solara_app/sol_utils.py +86 -0
streamlit_app/__init__.py +0 -0
streamlit_app/explainer.py +24 -0
streamlit_app/page_download.py +54 -0
streamlit_app/page_inference.py +73 -0
utils.py +36 -0
utils/__init__.py +0 -0
utils/kick_dl.py +11 -0
utils/movie_clips.py +34 -0
utils/time_slice.py +45 -0

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+	"build": {
+		"dockerfile": "../Dockerfile"
+	},
+	"postCreateCommand": "conda init",
+	"postAttachCommand": "conda activate highlights",
+	"features": {
+		"ghcr.io/devcontainers/features/git": {},
+		"ghcr.io/devcontainers-contrib/features/apt-get-packages": {},
+		"ghcr.io/devcontainers-contrib/features/ffmpeg-apt-get:1": {},
+		"ghcr.io/devcontainers-contrib/features/rclone:1": {},
+		"ghcr.io/devcontainers/features/github-cli:1": {}
+	},
+	"forwardPorts": [
+		8765
+	],
+	"customizations": {
+		// Configure properties specific to VS Code.
+		"vscode": {
+			// Add the IDs of extensions you want installed when the container is created.
+			"extensions": [
+				"ms-azuretools.vscode-docker",
+				"ms-python.python",
+				"ms-python.black-formatter"
+			]
+		}
+	}
+}

.dockerignore ADDED Viewed

	@@ -0,0 +1,9 @@

+converted/
+downloaded/
+lightning_logs/
+mlruns/
+tmp/
+highlights/
+out/
+Dockerfile
+env.yml

.github/FUNDING.yml ADDED Viewed

	@@ -0,0 +1,13 @@

+# These are supported funding model platforms
+github: [londogard]# Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
+custom: [https://www.buymeacoffee.com/hlondogard]# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

.gitignore ADDED Viewed

	@@ -0,0 +1,180 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+frames/
+bauss.mkv
+.DS_Store
+*.mkv
+lightning_logs
+*.keras
+*.ckpt
+**/*.jpg
+rclone.conf
+quarto*
+highlights/*.mp4
+mlruns/
+downloaded/
+*.mp3
+*.mp4
+llamafile-server-0.1-llava-v1.5-7b-q4

.gitpod.yml ADDED Viewed

	@@ -0,0 +1,12 @@

+image: gitpod/workspace-python-3.11
+tasks:
+  - init: |
+      pip install -U lightning twitch-dl polars mlflow dagshub mlflow "pydantic<2.0.0" torch torchvision
+      sudo apt install rclone
+ports:
+    - port: 3000
+      onOpen: open-preview
+      name: Website
+      description: Website Preview

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.1.13
+  hooks:
+    # Run the linter.
+    - id: ruff
+      args: [ --fix ]
+    # Run the formatter.
+    - id: ruff-format

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "python.analysis.typeCheckingMode": "basic",
+    "python.analysis.autoImportCompletions": true,
+    "python.analysis.packageIndexDepths": [
+        {
+            "name": "",
+            "depth": 5
+        }
+    ],
+    "python.analysis.diagnosticSeverityOverrides": {
+        "reportPrivateImportUsage": "none"
+    },
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM mcr.microsoft.com/vscode/devcontainers/miniconda:latest
+COPY env.yml .
+RUN conda env create -f env.yml
+RUN conda clean -a -y
+EXPOSE 8765
+COPY . /app
+WORKDIR /app
+RUN conda init
+RUN echo "source activate highlights" > ~/.bashrc
+ENV PATH /opt/conda/envs/highlights/bin:$PATH
+CMD solara run sol_app.py --host=0.0.0.0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Hampus Londögård
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

LolHighlight.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# lol_highlight_detection
+League of Legend Highlight Detection
+## Running project
+1. Use `devcontainer`, should be automatic
+2. Use `conda`/`mamba` and install `env.yml`, simple enough!
+3. `solara run sol_app.py`
+## Presentation
+slides.google.com
+## Resources:
+1. Fast.AI - https://docs.fast.ai/tutorial.image_sequence.html
+2. TIMM/HF - https://github.com/huggingface/pytorch-image-models
+3. HF/ViT - see `video_classification.ipynb`
+4. VideoMAE - https://huggingface.co/docs/transformers/model_doc/videomae
+5. Keras - https://keras.io/examples/vision/video_classification/ (keras-core??)
+6. TF - https://www.tensorflow.org/tutorials/video/video_classification
+7. Papers - https://paperswithcode.com/search?q_meta=&q_type=&q=videomae
+8. Hiera - https://github.com/facebookresearch/hiera

__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from pathlib import Path
+import shutil
+import streamlit as st
+import r2
+from streamlit_app import page_inference
+from streamlit_app.page_download import download_convert_persist
+def sidebar():
+    with st.sidebar:
+        r2_config = st.file_uploader("Upload R2 Config")
+        if r2_config is not None:
+            open("rclone.conf", "w").write(r2_config)
+def download_if_missing():
+    Path("ckpts/timm").mkdir(exist_ok=True, parents=True)
+    if len(list(Path("ckpts/timm").glob("*"))) == 0:
+        with st.spinner("Download model"):
+            r2.download("models/ckpts/timm/tf_efficientnet_b3.aa_in1k.ckpt")
+            shutil.move("tf_efficientnet_b3.aa_in1k.ckpt", "ckpts/timm")
+def main():
+    sidebar()
+    st.header("League of Legend Highlight Extractor")
+    download_if_missing()
+    mode = st.selectbox(
+        "Select Mode", ["Inference", "Download, Convert and Persist Twitch Clips"]
+    )
+    if mode == "Download, Convert and Persist Twitch Clips":
+        download_convert_persist()
+    else:
+        page_inference.inference_page()
+if __name__ == "__main__":
+    main()

data_utils/frame_datamodule.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import lightning as L
+import numpy as np
+from torch.utils.data import DataLoader, Subset, Dataset
+from data_utils.splitter import chunk_splitter
+class FrameDataModule(L.LightningDataModule):
+    def __init__(
+        self,
+        dataset: Dataset,
+        batch_size: int = 32,
+        chunk_size_for_splitting: int = 3 * 30,
+        num_workers: int = 2,
+        pin_memory: bool = False,
+    ):
+        super().__init__()
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
+        self.chunk_size_for_splitting = chunk_size_for_splitting
+        split = chunk_splitter(
+            len(dataset), chunk_size=self.chunk_size_for_splitting, split=0.15  # type: ignore
+        )
+        val_indices = np.where(split)[0]
+        train_indices = np.where(split == 0)[0]
+        self.ds_train = Subset(self.dataset, train_indices)  # type: ignore
+        self.ds_val = Subset(self.dataset, val_indices)  # type: ignore
+    def train_dataloader(self):
+        return DataLoader(
+            self.ds_train,
+            shuffle=True,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            self.ds_val,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+        )

data_utils/frame_dataset.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from torchvision.transforms import Compose
+import torch
+from torch.utils.data import Dataset
+from torchvision.datasets.folder import default_loader
+import polars as pl
+class FrameDataset(Dataset):
+    def __init__(
+        self,
+        df: pl.DataFrame,
+        augments: Compose,
+        frames_per_clip: int,
+        stride: int | None = None,
+        is_train: bool = True,
+    ):
+        super().__init__()
+        self.paths = df["path"].to_list()
+        self.is_train = is_train
+        if is_train:
+            self.y = torch.tensor(df["label"])
+        self.frames_per_clip = frames_per_clip
+        self.augments = augments
+        self.stride = stride or frames_per_clip
+    def __len__(self):
+        return len(self.paths) // self.stride
+    def __getitem__(self, idx):
+        start = idx * self.stride
+        stop = start + self.frames_per_clip
+        if stop - start <= 1:
+            path = self.paths[start]
+            frames_tr = self._open_augment_img(path)
+            if self.is_train:
+                y = self.y[start]
+        else:
+            frames = [self._open_augment_img(path) for path in self.paths[start:stop]]
+            frames_tr = torch.stack(frames)
+            if self.is_train:
+                y = self.y[start:stop].max()
+        if self.is_train:
+            return frames_tr, y
+        else:
+            return frames_tr
+    def _open_augment_img(self, path):
+        img = default_loader(path)
+        img = self.augments(img)
+        return img

data_utils/rebalancing.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import polars as pl
+def balance_labels(df: pl.DataFrame, fraction: float = 0.5) -> pl.DataFrame:
+    prev_num_labels, prev_total = df["label"].sum(), len(df)
+    to_remove = df.filter(pl.col("label") == 0).sample(fraction=fraction)
+    df = df.join(to_remove, on="path", how="anti")
+    print(
+        f"Previously {prev_num_labels / prev_total:.1%} highlights, now {df['label'].sum() / len(df):.1%}"
+    )
+    return df

data_utils/splitter.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import numpy as np
+from sklearn.model_selection import train_test_split
+def chunk_splitter(total_size: int, chunk_size: int, split: int | float) -> np.array:
+    _, val_idxs = train_test_split(
+        np.arange(total_size // chunk_size), test_size=split, random_state=42
+    )  # ignoring final unsized chunk
+    is_valid = np.zeros(total_size, dtype="int")
+    for index in val_idxs:
+        index *= chunk_size
+        is_valid[index : index + chunk_size] = 1
+    return is_valid

env.yml ADDED Viewed

	@@ -0,0 +1,24 @@

+name: highlights
+channels:
+  - conda-forge
+  - pytorch
+dependencies:
+  - python=3.11
+  - lightning
+  - polars
+  - mlflow
+  - pydantic<2.0.0
+  - solara
+  - pytorch
+  - torchvision
+  - transformers
+  - moviepy
+  - plotly
+  - streamlit
+  - rclone
+  - shap
+  - timm
+  - pre-commit
+  - pip:
+    - requests_html
+    - twitch-dl

highights_nb.ipynb ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install solara"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a7070c5e0d31446cb0096d9a0abe044d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/html": [
+       "Cannot show widget. You probably want to rerun the code cell above (<i>Click in the code cell, and press Shift+Enter <kbd>⇧</kbd>+<kbd>↩</kbd></i>)."
+      ],
+      "text/plain": [
+       "Cannot show ipywidgets in text"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from sol_app import Page\n",
+    "\n",
+    "Page()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

highlights.tsv ADDED Viewed

	@@ -0,0 +1,264 @@

+vid_id	start	stop
+1863051677	00:11:56	00:12:20
+1863051677	00:12:33	00:13:10
+1863051677	00:14:20	00:14:32
+1863051677	00:15:21	00:15:32
+1863051677	00:16:30	00:16:47
+1863051677	00:17:45	00:18:00
+1863051677	00:22:04	00:22:15
+1863051677	00:22:20	00:22:36
+1863051677	00:23:00	00:23:22
+1863051677	00:25:15	00:25:36
+1863051677	00:25:37	00:26:25
+1863051677	00:27:08	00:27:16
+1863051677	00:28:00	00:28:22
+1863051677	00:30:08	00:30:45
+1863051677	00:31:00	00:31:20
+1863051677	00:54:15	00:54:34
+1863051677	00:55:26	00:55:50
+1863051677	00:57:17	00:57:32
+1863051677	00:58:38	00:58:54
+1863051677	00:58:34	00:59:44
+1863051677	01:00:28	01:00:43
+1863051677	01:01:15	01:01:30
+1863051677	01:02:55	01:03:28
+1863051677	01:05:20	01:05:42
+1863051677	01:08:20	01:09:22
+1863051677	01:10:15	01:10:55
+1863051677	01:11:20	01:11:38
+1863051677	01:13:28	01:14:00
+1863051677	01:15:07	01:15:48
+1863051677	01:16:58	01:17:38
+1863051677	01:17:53	01:18:24
+1863051677	01:19:32	01:20:00
+1863051677	01:33:00	01:34:00
+1863051677	01:35:05	01:35:27
+1863051677	01:36:40	01:36:57
+1863051677	01:38:37	01:39:00
+1863051677	01:37:55	01:40:22
+1863051677	01:41:40	01:41:55
+1863051677	01:42:40	01:43:06
+1863051677	01:43:20	01:43:29
+1863051677	01:43:36	01:43:45
+1863051677	01:45:48	01:46:19
+1863051677	01:47:03	01:47:22
+1863051677	01:50:00	01:50:08
+1863051677	01:51:35	01:52:10
+1863051677	01:53:45	01:53:55
+1863051677	01:54:40	01:55:23
+1863051677	01:56:25	01:56:48
+1863051677	01:57:28	01:57:38
+1863051677	01:59:15	01:59:26
+1863051677	02:14:07	02:14:22
+1863051677	02:16:05	02:16:15
+1863051677	02:17:55	02:18:05
+1863051677	02:18:47	02:18:57
+1863051677	02:19:53	02:20:02
+1863051677	02:24:20	02:24:30
+1863051677	02:25:30	02:25:55
+1863051677	02:27:55	02:28:03
+1863051677	02:31:23	02:31:30
+1863051677	02:33:47	02:34:05
+1863051677	02:35:55	02:36:07
+1863051677	02:37:35	02:37:43
+1863051677	02:38:45	02:38:55
+1863051677	02:48:25	02:48:55
+1863051677	02:50:30	02:50:47
+1863051677	02:51:18	02:51:45
+1863051677	02:52:15	02:52:43
+1863051677	02:54:25	02:54:40
+1863051677	02:55:40	02:55:57
+1863051677	02:58:08	02:58:14
+1863051677	02:59:15	3:00:00
+1863051677	03:02:00	03:02:08
+1863051677	03:02:55	03:03:05
+1863051677	03:03:16	03:03:52
+1863051677	03:04:20	03:04:58
+1863051677	03:06:50	03:07:21
+1863051677	03:09:08	03:09:40
+1863051677	03:10:17	03:11:07
+1863051677	03:21:04	03:21:17
+1863051677	03:21:55	03:22:08
+1863051677	03:22:42	03:22:49
+1863051677	03:24:05	03:24:20
+1863051677	03:26:34	03:26:45
+1863051677	03:29:20	03:29:28
+1863051677	03:30:31	03:30:38
+1863051677	03:32:20	03:32:33
+1863051677	03:34:40	03:34:54
+1863051677	03:36:15	03:36:25
+1863051677	03:37:20	03:37:29
+1863051677	03:38:22	03:38:56
+1863051677	03:39:15	03:39:27
+1863051677	03:39:35	03:39:43
+1863051677	03:39:47	03:39:53
+1863051677	03:41:28	03:41:42
+1863051677	03:43:22	03:43:41
+1863051677	03:44:49	03:45:01
+1863051677	03:46:42	03:47:07
+1863051677	03:49:30	03:49:46
+1863051677	03:50:42	03:51:02
+1863051677	03:53:15	03:53:25
+1863051677	04:06:45	04:07:09
+1863051677	04:08:12	04:08:27
+1863051677	04:09:20	04:09:28
+1863051677	04:10:20	04:10:47
+1863051677	04:11:55	04:12:05
+1863051677	04:12:55	04:13:08
+1863051677	04:13:17	04:13:23
+1863051677	04:14:15	04:14:30
+1863051677	04:15:38	04:15:45
+1863051677	04:17:40	04:17:47
+1863051677	04:18:33	04:18:40
+1863051677	04:19:26	04:19:37
+1863051677	04:20:13	04:20:21
+1863051677	04:21:57	04:22:13
+1863051677	04:22:20	04:22:31
+1863051677	04:22:50	04:22:58
+1863051677	04:24:35	04:24:45
+1863051677	04:26:03	04:26:16
+1863051677	04:27:03	04:27:10
+1863051677	04:27:26	04:27:42
+1863051677	04:29:45	04:30:16
+1863051677	04:31:27	04:31:45
+1863051677	04:32:58	04:33:10
+1863051677	04:33:36	04:33:47
+1863051677	04:34:22	04:34:28
+1863051677	04:35:55	04:36:17
+1863051677	04:38:40	04:39:04
+1863051677	04:46:39	04:46:46
+1863051677	04:49:19	04:49:28
+1863051677	04:51:20	04:51:35
+1863051677	04:52:37	04:52:48
+1863051677	04:54:20	04:54:39
+1863051677	04:56:18	04:56:23
+1863051677	04:57:30	04:57:48
+1863051677	04:59:05	04:59:21
+1863051677	05:01:10	05:01:25
+1863051677	05:02:59	05:03:20
+1863051677	05:04:35	05:05:04
+1863051677	05:05:52	05:05:58
+1863051677	05:06:50	05:07:13
+1863051677	05:07:37	05:07:42
+1863051677	05:08:35	05:08:42
+1863051677	05:10:01	05:10:10
+1863051677	05:10:18	05:10:23
+1863051677	05:11:55	05:12:03
+1863051677	05:13:52	05:14:03
+1863051677	05:14:47	05:14:56
+1863051677	05:15:56	05:16:18
+1863051677	05:19:48	05:20:06
+1863051677	05:21:43	05:22:03
+1863051677	05:24:39	05:25:11
+1863051677	05:39:37	05:39:49
+1863051677	05:42:48	05:42:52
+1863051677	05:45:25	05:45:37
+1863051677	05:46:41	05:46:59
+1863051677	05:47:34	05:47:38
+1863051677	05:48:11	05:48:20
+1863051677	05:50:41	05:50:51
+1863051677	05:52:13	05:52:18
+1863051677	05:52:59	05:53:16
+1863051677	05:54:17	05:54:40
+1863051677	05:57:13	05:57:29
+1863051677	05:58:45	05:59:00
+1863051677	05:59:28	05:59:35
+1863051677	06:00:45	06:00:55
+1863051677	06:02:00	06:02:08
+1863051677	06:03:38	06:03:48
+1863051677	06:04:47	06:05:05
+1886367077	00:06:45	00:07:00
+1886367077	00:09:20	00:09:32
+1886367077	00:09:47	00:09:53
+1886367077	00:10:43	00:10:51
+1886367077	00:12:12	00:12:18
+1886367077	00:13:14	00:13:19
+1886367077	00:14:07	00:14:25
+1886367077	00:16:52	00:17:03
+1886367077	00:19:00	00:19:04
+1886367077	00:20:10	00:20:20
+1886367077	00:21:35	00:21:45
+1886367077	00:22:56	00:23:10
+1886367077	00:24:19	00:24:34
+1886367077	00:26:24	00:26:42
+1886367077	00:27:55	00:28:05
+1886367077	00:28:57	00:29:25
+1886367077	00:30:40	00:31:04
+1886367077	00:31:09	00:31:15
+1886367077	00:32:33	00:32:48
+1886367077	00:34:06	00:34:21
+1886367077	00:35:30	00:35:45
+1886367077	00:38:45	00:39:10
+1886367077	00:48:40	00:49:00
+1886367077	00:50:38	00:50:56
+1886367077	00:52:02	00:52:24
+1886367077	00:56:30	00:56:53
+1886367077	00:58:48	00:59:15
+1886367077	01:01:10	01:01:40
+1886367077	01:03:03	01:03:40
+1886367077	01:05:53	01:06:10
+1886367077	01:07:17	01:07:42
+1886367077	01:08:20	01:08:55
+1886367077	01:09:20	01:09:41
+1886367077	01:11:42	01:12:27
+1863359610	00:20:15	00:20:25
+1863359610	00:23:12	00:23:20
+1863359610	00:24:10	00:24:21
+1863359610	00:25:08	00:25:25
+1863359610	00:27:35	00:27:45
+1863359610	00:28:44	00:29:00
+1863359610	00:29:39	00:30:15
+1863359610	00:31:16	00:31:25
+1863359610	00:33:44	00:33:54
+1863359610	00:37:05	00:37:14
+1863359610	00:39:00	00:39:10
+1863359610	00:40:35	00:41:15
+1863359610	00:41:56	00:42:06
+1863359610	00:54:36	00:54:42
+1863359610	00:55:37	00:55:51
+1863359610	00:57:35	00:57:42
+1863359610	00:59:06	00:59:15
+1863359610	01:01:25	01:01:31
+1863359610	01:02:22	01:02:40
+1863359610	01:03:25	01:03:35
+1863359610	01:05:34	01:05:40
+1863359610	01:07:23	01:07:41
+1863359610	01:08:04	01:08:22
+1863359610	01:09:48	01:10:03
+1863359610	01:11:15	01:11:31
+1863359610	01:11:55	01:12:22
+1863359610	01:14:48	01:15:15
+1863359610	01:16:20	01:16:32
+1863359610	01:17:22	01:17:32
+1863359610	01:18:25	01:18:46
+1863359610	01:19:33	01:20:07
+1863359610	01:27:31	01:27:47
+1863359610	01:30:40	01:30:46
+1863359610	01:31:16	01:31:36
+1863359610	01:32:32	01:33:22
+1863359610	01:34:02	01:34:10
+1863359610	01:34:48	01:35:00
+1863359610	01:48:09	01:48:18
+1863359610	01:50:12	01:50:25
+1863359610	01:55:32	01:55:50
+1863359610	01:58:58	01:59:16
+1863359610	02:00:58	02:01:10
+1863359610	02:04:10	02:04:28
+1863359610	02:06:22	02:06:32
+1863359610	02:07:27	02:07:32
+1863359610	02:08:04	02:08:17
+1863359610	02:11:17	02:11:39
+1863359610	02:11:57	02:12:03
+1863359610	02:14:14	02:14:35
+1863359610	02:16:40	02:16:52
+1863359610	02:17:45	02:18:10
+1863359610	02:33:28	02:33:47
+1863359610	02:35:15	02:35:29
+1863359610	02:37:15	02:37:42
+1863359610	02:38:51	02:39:03
+1863359610	02:40:21	02:40:31
+1863359610	02:41:51	02:42:10
+1863359610	02:42:34	02:42:47
+1863359610	02:44:06	02:44:36
+1863359610	02:48:02	02:48:25
+1863359610	02:48:48	02:48:58

inference.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from pathlib import Path
+import numpy as np
+from torch.utils.data import DataLoader
+import polars as pl
+import lightning as L
+from data_utils.frame_dataset import FrameDataset
+import torch
+from models.lightning_wrapper import LightningWrapper
+def run_inference(
+    model_path: Path,
+    image_folder: Path,
+    aggregate_duration: int = 30,
+    fps: int = 3,
+) -> pl.DataFrame:
+    model = LightningWrapper.load_from_checkpoint(model_path)
+    trainer = L.Trainer()
+    paths = list(image_folder.rglob("*.jpg"))
+    df = pl.DataFrame(
+        {"path": paths, "frame": [int(p.stem.removeprefix("img")) for p in paths]}
+    ).sort("frame")
+    ds = FrameDataset(df, model.get_transforms(is_training=False), 1, is_train=False)
+    dls = DataLoader(ds, batch_size=32, num_workers=2, pin_memory=True)
+    preds_list: list[torch.Tensor] = trainer.predict(model, dataloaders=dls)  # type: ignore
+    preds = torch.cat(preds_list)
+    pred_class = torch.argmax(preds, dim=1)
+    preds_class = np.repeat(pred_class.numpy(), ds.frames_per_clip)
+    df = df.with_columns(preds=pl.Series(preds_class))
+    df_g = df.group_by(pl.col("frame") // (aggregate_duration * fps)).agg(
+        pl.sum("preds")
+    )
+    seconds = pl.col("frame")
+    df_g = (
+        df_g.with_columns(pl.col("frame") * aggregate_duration)
+        .with_columns(
+            hour=seconds // (60 * 60), minute=(seconds // 60) % 60, second=seconds % 60
+        )
+        .with_columns(
+            timestamp=pl.datetime(
+                year=2023,
+                month=12,
+                day=10,
+                hour=pl.col("hour"),
+                minute="minute",
+                second="second",
+            )
+        )
+        .sort("timestamp")
+    )
+    return df_g

ingest.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import subprocess
+from pathlib import Path
+def download_twitch_stream(TWITCH_ID: str, end_time: str | None = None):
+    out_path = Path(f"downloaded/{TWITCH_ID}.mp4")
+    out_path.parent.mkdir(exist_ok=True, parents=True)
+    if out_path.exists():
+        print(f"Already downloaded {TWITCH_ID}")
+        return
+    end_time = ["-e", end_time] if end_time is not None else []
+    subprocess.Popen(
+        [
+            "twitch-dl",
+            "download",
+            TWITCH_ID,
+            "-q",
+            "720p60",
+            *end_time,
+            "--output",
+            str(out_path),
+        ],
+    ).communicate()
+    return True
+def vid_to_frames(TWITCH_ID: str, use_cuda: bool = True, frames: int = 3):
+    in_path = Path(f"downloaded/{TWITCH_ID}.mp4")
+    out_path = Path(f"converted/{TWITCH_ID}")
+    if out_path.exists():
+        print(f"Already converted {TWITCH_ID} to frames")
+        return
+    out_path.mkdir(parents=True, exist_ok=True)
+    use_cuda = ["-hwaccel", "cuda"] if use_cuda else []
+    subprocess.Popen(
+        [
+            "ffmpeg",
+            *use_cuda,
+            "-i",
+            str(in_path),
+            "-vf",
+            f"fps={frames}",
+            "-q:v",
+            "25",
+            f"{out_path}/img%d.jpg",
+        ],
+    ).communicate()

models/lightning_wrapper.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import torch
+import torch.nn.functional as F
+import lightning as L
+import torchmetrics
+import timm
+class LightningWrapper(L.LightningModule):
+    def __init__(self, timm_model: str, num_classes: int, learning_rate: float = 1e-3):
+        super().__init__()
+        self.timm_model = timm_model
+        self.lr = learning_rate
+        self.model = timm.create_model(
+            self.timm_model, pretrained=True, num_classes=num_classes
+        )
+        self.save_hyperparameters(ignore=["model"])
+        metrics = torchmetrics.MetricCollection(
+            {
+                "accuracy": torchmetrics.Accuracy(
+                    task="multiclass", num_classes=self.model.num_classes
+                )
+            }
+        )
+        self.train_metrics = metrics.clone(prefix="train_")
+        self.val_metrics = metrics.clone(prefix="val_")
+    def get_transforms(self, is_training: bool):
+        data_config = timm.data.resolve_model_data_config(self.timm_model)
+        return timm.data.create_transform(**data_config, is_training=is_training)
+    def forward(self, x):
+        return self.model(x)
+    def training_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        loss = F.cross_entropy(logits, y)
+        self.log("train_loss", loss)
+        self.train_metrics(logits, y)
+        self.log_dict(self.train_metrics, prog_bar=True)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        loss = F.cross_entropy(logits, y)
+        self.log("val_loss", loss)
+        self.val_metrics(logits, y)
+        self.log_dict(self.val_metrics, prog_bar=True)
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
+        return optimizer

models/resnet.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import torch.nn as nn
+from torchvision.models import ResNet
+class ResNetClassifier(nn.Module):
+    def __init__(self, model: ResNet, num_classes: int = 2):
+        super().__init__()
+        self.num_classes = num_classes
+        self.model = model
+        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
+    def forward(self, x):
+        return self.model(x)

models/rnn.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import torch.nn as nn
+from torchvision.models import ResNet
+class RNNClassifier(nn.Module):
+    def __init__(self, model: ResNet, num_classes: int = 2):
+        super().__init__()
+        self.num_classes = num_classes
+        self.feature_extractor = model  # repeat thrice
+        self.feature_extractor.fc = nn.Linear(512, 512)  # New fc layer
+        self.rnn = nn.LSTM(
+            input_size=512, hidden_size=256, num_layers=1, batch_first=True
+        )
+        self.classifier = nn.Linear(256, num_classes)
+    def forward(self, x):
+        features = []
+        # Pass each frame through ResNet sequentially
+        for i in range(x.shape[1]):
+            frame_feat = self.feature_extractor(x[:, i])
+            features.append(frame_feat)
+        x = torch.reshape(torch.stack(features), [x.shape[0], x.shape[1], -1])
+        # Apply RNN
+        out, _ = self.rnn(x)
+        out = out[:, -1, :]
+        # Classify
+        out = self.classifier(out)
+        return out

r2.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from pathlib import Path
+import subprocess
+def compress(TWITCH_ID: str) -> str:
+    file = f"{TWITCH_ID}.tar.lz4"
+    subprocess.Popen(["tar", "-clvf", file, TWITCH_ID]).communicate()
+    return file
+def upload(file: str, prefix: str = "frames/"):
+    subprocess.Popen(
+        [
+            "rclone",
+            "--config",
+            "rclone.conf",
+            "copy",
+            file,
+            f"r2:lol-highlights-eu/{prefix}/",
+        ]
+    ).communicate()
+def download(file: str, out_folder: str = "."):
+    if not Path(file).exists():
+        print(
+            subprocess.Popen(
+                [
+                    "rclone",
+                    "--config",
+                    "rclone.conf",
+                    "copy",
+                    f"r2:lol-highlights-eu/{file}",
+                    out_folder,
+                ]
+            ).communicate()
+        )
+        return file
+def list_files(directory: str) -> list[str]:
+    out, _ = subprocess.Popen(
+        [
+            "rclone",
+            "--config",
+            "rclone.conf",
+            "ls",
+            "--exclude",
+            "*.jpg",
+            f"r2:lol-highlights-eu/{directory}",
+        ],
+        stdout=subprocess.PIPE,
+    ).communicate()
+    out = [x.strip().split(" ")[-1] for x in out.decode("utf-8").split("\n") if len(x)]
+    return out
+def decompress(file: str):
+    subprocess.Popen(["tar", "-xvf", file]).communicate()
+def download_frames_and_unpack(filename: str):
+    download(f"frames/{filename}")
+    decompress(filename)
+    Path(filename).unlink()

sol_app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from pathlib import Path
+import solara
+import solara.lab
+from solara_app import folders, sol_utils
+from solara_app.mini_components.simple import Progress
+from solara_app.page_download import DownloadConvertPersist
+from solara_app.page_inference import Inference
+from solara_app.page_models import DownloadModels
+PROMPT = """<|system|>
+                    You are a chatbot who help write successful titles for Youtube videos of League of Legends Highlights from TheBaus that are generated using AI!</s>
+                    <|user|>
+                    Write me a title that fits a video of TheBaus who wins games as Sion even through having a lot of deaths - good deaths.</s>
+                    <|assistant|>"""
+@solara.component
+def SidebarUpload(selected_page: solara.Reactive[str]):
+    with solara.Sidebar():
+        solara.Title("League of Legend Highlight Extractor")
+        if Path("rclone.conf").exists():
+            solara.Success("rclone.conf uploaded.")
+        else:
+            dump_file = sol_utils.persist_uploaded_file("rclone.conf")
+            solara.FileDrop(label="Drop R2 Config", lazy=False, on_file=dump_file)
+            solara.Error("Upload rclone.conf first!")
+        solara.Select(
+            "Select Page",
+            [
+                "Inference",
+                "Download, Convert and Persist Twitch Clips",
+                "Download Model(s)",
+                "Generate Video Title",
+            ],
+            value=selected_page,
+        )
+@solara.component
+def Page():
+    folders.create_default_folders()
+    selected_page = solara.use_reactive("Inference")
+    SidebarUpload(selected_page)
+    if not Path("rclone.conf").exists():
+        solara.Error("Upload rclone.conf first!")
+    else:
+        if selected_page.value == "Inference":
+            Inference()
+        elif selected_page.value == "Download, Convert and Persist Twitch Clips":
+            DownloadConvertPersist()
+        elif selected_page.value == "Download Model(s)":
+            DownloadModels()
+        elif selected_page.value == "Generate Video Title":
+            solara.Markdown(
+                """
+                    ## Title Generator
+                    Generate a title using a Large Language Model (**LLM**).
+                    """
+            )
+            solara.InputText(
+                "What should title be based on?",
+                "TheBaus is a famous streamer who usually plays Sion, this highlight sections show-cases both (good) deaths and wins!",
+            )
+            from transformers import pipeline
+            title = solara.use_reactive(None)
+            clicks = solara.use_reactive(0)
+            def gen_title():
+                pipe = pipeline(
+                    "text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+                )
+                out = pipe(PROMPT)
+                title.value = out[0]["generated_text"].replace(PROMPT, "")
+            solara.Button("Generate!", on_click=lambda: clicks.set(clicks.value + 1))
+            if clicks.value > 0:
+                res = solara.use_thread(gen_title)
+                if res.state == solara.ResultState.RUNNING:
+                    Progress("Running...")
+            if title.value:
+                solara.Markdown("Title:")
+                solara.Text(title.value)

solara_app/__init__.py ADDED Viewed

File without changes

solara_app/css.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ JUSTIFY_CENTER = {"justify-content": "center"}
2	+ ALIGN_CENTER = {"align-items": "center"}

solara_app/folders.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from pathlib import Path
+TMP = Path("tmp")
+OUT = Path("out")
+DOWNLOADED = Path("downloaded")
+CONVERTED = Path("converted")
+CHECKPOINTS = Path("ckpts")
+_ALL_PATHS = [TMP, OUT, DOWNLOADED, CONVERTED, CHECKPOINTS]
+def create_default_folders():
+    for path in _ALL_PATHS:
+        path.mkdir(parents=True, exist_ok=True)

solara_app/infer.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from pathlib import Path
+import solara
+import polars as pl
+from inference import run_inference
+from utils.movie_clips import build_video
+@solara.memoize
+def solara_run_inference(
+    model_path: Path,
+    image_folder: Path,
+    aggregate_duration: int = 30,
+    fps: int = 3,
+) -> pl.DataFrame:
+    return run_inference(model_path, image_folder, aggregate_duration, fps)
+@solara.memoize(key=lambda _, _2, higlight_vid: higlight_vid)
+def convert_vid(
+    file_name: str | Path, time_dict: list[dict[str, str]], highlight_vid: Path
+):
+    return build_video(file_name, time_dict, highlight_vid)

solara_app/mini_components/c_inference.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import solara
+from moviepy.editor import VideoFileClip, concatenate_videoclips
+import torch
+CODEC = {"codec": "h264_nvenc"} if torch.cuda.is_available() else {}
+@solara.memoize(
+    key=lambda _, disabled, file_name, cache_key: f"{disabled}_{file_name}{cache_key}"
+)
+def write_full_video(
+    start_stop: list[dict[str, str]], disabled: dict, file_name: str, cache_key: str
+) -> str:
+    vid_clip = VideoFileClip(f"downloaded/{file_name}.mp4")
+    clips = []
+    for i, tstamp in enumerate(start_stop):
+        if disabled.get(i):
+            continue
+        clips.append(vid_clip.subclip(tstamp["start"], tstamp["end"]))
+    # Concatenate the video clips with transitions
+    final_clip = concatenate_videoclips(clips)
+    # Write the final concatenated movie to a file
+    file = vid_clip.filename.replace("downloaded", "out")
+    final_clip.write_videofile(file)
+    return file
+@solara.memoize
+def write_video(start: str, stop: str, id: int, file_name: str) -> str:
+    vid_clip = VideoFileClip(f"downloaded/{file_name}.mp4")
+    clip = vid_clip.subclip(start, stop)
+    file = f"tmp/{file_name}_{start}_{stop}_{id}.mp4"
+    clip.write_videofile(file)
+    return file

solara_app/mini_components/simple.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from typing import Any
+import solara
+from ipywidgets import Video as iPyVideo
+@solara.component()
+def Progress(msg: str):
+    with solara.Column(align="center", style={"justify-content": "center"}):
+        solara.SpinnerSolara()
+        solara.Markdown(msg)
+@solara.component()
+def ProgressDynamic(
+    msg: str,
+    result: solara.Result[Any],
+    dynamic_progress: solara.Reactive[str | int | float] | None = None,
+):
+    if result.state == solara.ResultState.RUNNING:
+        Progress(msg)
+        if dynamic_progress is not None:
+            progress = dynamic_progress.value
+            match progress:
+                case int():
+                    solara.ProgressLinear(progress)
+                case float():
+                    solara.ProgressLinear(int(progress * 100))
+                case str():
+                    solara.Markdown(progress)
+@solara.component
+def Video(file_name: str, width: int = 500, autoplay: bool = False, loop: bool = False):
+    vid = iPyVideo.from_file(file_name, width=width, autoplay=autoplay, loop=loop)
+    solara.display(vid)

solara_app/page_download.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import solara
+import torch
+import ingest
+from solara_app.mini_components.simple import Progress
+@solara.component()
+def DownloadConvertPersist():
+    twitch_id = solara.use_reactive("")
+    is_downloading, set_downloading = solara.use_state(False)
+    status, set_status = solara.use_state("")
+    end_time = solara.use_reactive(None)
+    def start_download():
+        set_downloading(True)
+        set_status("")
+        ingest.download_twitch_stream(twitch_id.value, end_time=end_time.value)
+        set_status("Converting to frames...")
+        ingest.vid_to_frames(twitch_id.value, use_cuda=torch.cuda.is_available())
+        set_status("Download completed")
+        set_downloading(False)
+    solara.InputText("Select Twitch ID", twitch_id, disabled=is_downloading)
+    solara.InputText("End Time (hh:mm:ss)", end_time)
+    solara.Markdown(f"You Selected {twitch_id.value}")
+    solara.Button("Download", start_download, disabled=is_downloading)
+    if is_downloading:
+        Progress("Downloading...")
+    solara.Text(status)

solara_app/page_inference.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import datetime
+from pathlib import Path
+import polars as pl
+import solara
+from solara_app import sol_utils
+from solara_app.css import ALIGN_CENTER, JUSTIFY_CENTER
+from solara_app.folders import CHECKPOINTS, CONVERTED
+from solara_app.infer import solara_run_inference
+from solara_app.mini_components.c_inference import write_full_video, write_video
+from solara_app.mini_components.simple import Progress, ProgressDynamic, Video
+from utils import time_slice
+def false() -> bool:
+    return False
+@solara.component
+def DfSelectComponent(df: pl.DataFrame, file: str):
+    extend_forward = solara.use_reactive({})
+    extend_backward = solara.use_reactive({})
+    disabled = solara.use_reactive({})
+    selected_vid, set_selected_vid = solara.use_state(0)
+    cut_off = solara.use_reactive(5)
+    start_stop: solara.Reactive[list[datetime.datetime]] = solara.use_reactive(
+        [df["timestamp"].min(), df["timestamp"].max()]
+    )  # type: ignore
+    clicks, set_clicks = solara.use_state(0)
+    with solara.Card(
+        "Highlight Selection & Editing",
+        "Select highlight threshold, remove or expand clips",
+    ):
+        sol_utils.CutOffChartSelection(cut_off, start_stop, df)
+        df = df.filter(
+            pl.col("timestamp").is_between(start_stop.value[0], start_stop.value[1])
+        )
+        time_df = time_slice.create_start_end_time(
+            df, cut_off.value, extend_forward.value, extend_backward.value
+        )
+        time_dict = time_df.select(pl.all().dt.strftime("%H:%M:%S")).to_dicts()
+        time_dict = solara.use_reactive(time_dict)
+        file_name = f"{file.replace('converted', 'downloaded')}.mp4"
+        if len(time_dict.value) == 0:
+            solara.Warning("No Highlights available...")
+            return
+        tstamp = time_dict.value[selected_vid]
+        res = write_video.use_thread(
+            tstamp["start"],
+            tstamp["end"],
+            selected_vid,
+            Path(file_name).stem,
+        )
+        ProgressDynamic("Building Clip...", res)
+        # TODO: extract into component.
+        if res.state == solara.ResultState.FINISHED:
+            with solara.Row(style={**JUSTIFY_CENTER, **ALIGN_CENTER}):
+                solara.Button(
+                    "<",
+                    disabled=selected_vid == 0,
+                    on_click=lambda: set_selected_vid(selected_vid - 1),
+                )
+                Video(res.value)
+                solara.Button(
+                    ">",
+                    disabled=selected_vid == (len(time_dict.value) - 1),
+                    on_click=lambda: set_selected_vid(selected_vid + 1),
+                )
+            with solara.Column(style=JUSTIFY_CENTER):
+                with solara.Row(style=JUSTIFY_CENTER):
+                    solara.InputInt(
+                        "Expand Leftwards (s)",
+                        value=extend_backward.value.get(selected_vid, 0),
+                        on_value=lambda v: extend_backward.set(
+                            {**extend_backward.value, selected_vid: v}
+                        ),
+                    )
+                    solara.InputInt(
+                        "Expand Rightwards (s)",
+                        value=extend_forward.value.get(selected_vid, 0),
+                        on_value=lambda v: extend_forward.set(
+                            {**extend_forward.value, selected_vid: v}
+                        ),
+                    )
+                def disable_vid(vid: int):
+                    return lambda: disabled.set(
+                        {**disabled.value, vid: not disabled.value.get(vid)}
+                    )
+                solara.Button(
+                    (
+                        "✅ Add Video"
+                        if disabled.value.get(selected_vid)
+                        else "❌ Remove Video"
+                    ),
+                    on_click=disable_vid(selected_vid),
+                    style={"width": "25%"},
+                )
+        with solara.Card("Full Video", "Build the full video!"):
+            solara.Button(
+                "Build Full Video",
+                color="primary",
+                on_click=lambda: set_clicks(clicks + 1),
+            )
+            if clicks > 0:
+                res_full = write_full_video.use_thread(
+                    time_dict.value,
+                    disabled.value,
+                    Path(file_name).stem,
+                    str(time_dict),
+                )
+                if res_full.state == solara.ResultState.RUNNING:
+                    Progress("Building Full Clip...")
+                elif res_full.state == solara.ResultState.FINISHED:
+                    solara.FileDownload(
+                        lambda: open(res_full.value, "rb"), Path(res_full.value).name
+                    )
+@solara.component
+def ShowDfComponent(model: str, file: str):
+    df = solara_run_inference.use_thread(
+        Path(model),
+        Path(file),
+        aggregate_duration=10,
+    )
+    if df.state == solara.ResultState.RUNNING:
+        Progress("Running...")
+    elif df.state == solara.ResultState.FINISHED and df.value is not None:
+        DfSelectComponent(df.value, file)
+@solara.component()
+def Inference():
+    files = [str(p) for p in CONVERTED.glob("*") if p.is_dir()]
+    models = [str(p) for p in CHECKPOINTS.rglob("*.ckpt")]
+    file = solara.use_reactive(files[0] if len(files) else None)
+    model = solara.use_reactive(models[0] if len(models) else None)
+    if model.value is None or file.value is None:
+        return solara.Markdown(
+            "**It's required to at least download one stream and have one model available!**"
+        )
+    clicked = solara.use_reactive(False)
+    sol_utils.ModelFileSelectComponent(file, model, clicked)
+    if clicked.value:
+        ShowDfComponent(model.value, file.value)
+    else:
+        solara.Markdown("**Start running to get further. 🚀**")

solara_app/page_models.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from pathlib import Path
+import solara
+import r2
+from solara_app.folders import CHECKPOINTS
+from solara_app.mini_components.simple import Progress
+@solara.component
+def DownloadModels():
+    models = solara.use_thread(lambda: r2.list_files("models"))
+    selected_models: solara.Reactive[list[str]] = solara.use_reactive([])
+    if models.state == solara.ResultState.FINISHED:
+        unavailable_models: list[str] = [
+            m for m in (models.value or []) if not Path(m).exists()
+        ]
+        solara.SelectMultiple(
+            "Select model(s) to download",
+            selected_models,
+            unavailable_models,  # type: ignore
+        )
+        for m in selected_models.value:
+            output = solara.use_thread(
+                lambda: r2.download(f"models/{m}", out_folder=CHECKPOINTS)
+            )
+            if output.state == solara.ResultState.RUNNING:
+                Progress(f"Downloading {m}...")
+            elif output.state == solara.ResultState.FINISHED:
+                solara.Success(f"Downloaded {output.value}", icon=True)

solara_app/sol_utils.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import datetime
+from typing import Any, Callable
+from solara.components.file_drop import FileInfo
+import solara
+import polars as pl
+import plotly.express as px
+from dateutil import parser
+from solara_app.folders import CHECKPOINTS, CONVERTED
+def persist_uploaded_file(
+    filename: str, key: str = "data"
+) -> Callable[[FileInfo], None]:
+    def func(data: FileInfo) -> None:
+        with open(filename, "wb") as f:
+            f.write(data[key])
+    return func
+@solara.component
+def ModelFileSelectComponent(
+    file: solara.Reactive[str],
+    model: solara.Reactive[str],
+    clicked: solara.Reactive[bool],
+):
+    files = [str(p) for p in CONVERTED.glob("*") if p.is_dir()]
+    models = [str(p) for p in CHECKPOINTS.rglob("*.ckpt")]
+    _clicked = solara.use_reactive(clicked)
+    with solara.Card("Select Video/Model"):
+        with solara.Columns():
+            solara.Select(
+                "Select File",
+                values=files,
+                value=file,
+            )
+            solara.Select(
+                "Select Model",
+                values=models,
+                value=model,
+            )
+        solara.Button(
+            "Run Inference!",
+            color="primary",
+            on_click=lambda: _clicked.set(True),
+        )
+@solara.component
+def CutOffChartSelection(
+    cut_off: solara.Reactive[int],
+    start_stop: solara.Reactive[list[datetime.datetime]],
+    df: pl.DataFrame,
+):
+    div = solara.Column()
+    solara.SliderInt(
+        "Highlight Y-Cutoff",
+        cut_off,
+        min=df["preds"].min() + 1,
+        max=df["preds"].max(),
+        thumb_label="always",
+        tick_labels="end_points",
+    )
+    with div:
+        fig = px.line(
+            df, x="timestamp", y="preds", line_shape="hv", range_x=start_stop.value
+        )
+        fig.add_hline(y=cut_off.value, line_color="red")
+        def update_vals(relayout_dict: dict[str, Any] | None):
+            if relayout_dict is not None:
+                layout = relayout_dict["relayout_data"]
+                if "xaxis.range[0]" in layout:
+                    start_stop.value = [
+                        parser.parse(layout["xaxis.range[0]"], ignoretz=True),
+                        parser.parse(layout["xaxis.range[1]"], ignoretz=True),
+                    ]
+                else:
+                    xaxis_range = layout["xaxis.range"]
+                    start_stop.value = [
+                        parser.parse(xaxis_range[0], ignoretz=True),
+                        parser.parse(xaxis_range[1], ignoretz=True),
+                    ]
+        solara.FigurePlotly(fig, on_relayout=update_vals)

streamlit_app/__init__.py ADDED Viewed

File without changes

streamlit_app/explainer.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import shap
+def explain(images):
+    topk = 4
+    batch_size = 50
+    n_evals = 10000
+    # define a masker that is used to mask out partitions of the input image.
+    masker_blur = shap.maskers.Image("blur(128,128)", Xtr[0].shape)
+    # create an explainer with model and image masker
+    explainer = shap.Explainer(
+        predict, masker_blur, output_names=["Nothing", "Highlight"]
+    )
+    # feed only one image
+    # here we explain two images using 100 evaluations of the underlying model to estimate the SHAP values
+    shap_values = explainer(
+        Xtr[1:2],
+        max_evals=n_evals,
+        batch_size=batch_size,
+        outputs=shap.Explanation.argsort.flip[:topk],
+    )

streamlit_app/page_download.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import json
+from pathlib import Path
+import subprocess
+import streamlit as st
+import ingest
+from utils import kick_dl
+def download_convert_persist():
+    service = st.radio("Streaming service", ["Twitch", "Kick"])
+    if service == "Twitch":
+        twitch_id = st.text_input("Enter Twitch ID")
+        st.write(f"You Selected {twitch_id}")
+        if st.button("Download"):
+            with st.spinner():
+                st.write("Downloading...")
+                ingest.download_twitch_stream(twitch_id)
+                st.write("Converting...")
+                ingest.vid_to_frames(twitch_id, use_cuda=False)
+            st.success("Downloaded!")
+    elif service == "Kick":
+        kick_id = st.text_input("Enter Kick ID")
+        name = st.text_input("Nickname of video")
+        kick_id = Path(kick_id).name
+        API_PATH = "https://kick.com/api/v1/video/"
+        st.write(f"Open [this]({API_PATH}{kick_id}) and copy text into the box below.")
+        json_data = st.text_input("Copy and paste here.")
+        if len(json_data):
+            json_data = json.loads(json_data)["source"]
+            if st.button("Download"):
+                with st.spinner():
+                    st.write("Downloading...")
+                    if not Path(f"converted/{name}").exists():
+                        subprocess.Popen(
+                            [
+                                "ffmpeg",
+                                "-i",
+                                json_data,
+                                "-vcodec",
+                                "copy",
+                                "-acodec",
+                                "copy",
+                                f"downloaded/{name}.mp4",
+                            ]
+                        )
+                        st.write("Converting...")
+                        ingest.vid_to_frames(name, use_cuda=False)
+                        Path(f"downloaded/{name}.mp4").unlink()
+                st.success("Downloaded!")

streamlit_app/page_inference.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import datetime
+from pathlib import Path
+import inference
+import plotly.express as px
+import streamlit as st
+import polars as pl
+from utils import time_slice
+from utils.movie_clips import build_video, get_vid_path
+@st.cache_data
+def st_run_inference(
+    model_path: Path,
+    image_folder: Path,
+    aggregate_duration: int = 30,
+    fps: int = 3,
+) -> pl.DataFrame:
+    return inference.run_inference(model_path, image_folder, aggregate_duration, fps)
+def inference_page():
+    with st.form("random"):
+        selected_file = st.selectbox(
+            "Select File", [str(p) for p in Path("converted").glob("*") if p.is_dir()]
+        )
+        selected_model = st.selectbox(
+            "Select Model", [str(p) for p in Path("ckpts").rglob("*.ckpt")]
+        )
+        st.form_submit_button("Extract Highlights!")
+    df_out = st_run_inference(
+        Path(selected_model),
+        Path(selected_file),
+        aggregate_duration=10,
+    )
+    chart_container = st.container()
+    cut_off = st.slider(
+        "Y-Cutoff Highlight",
+        min_value=df_out["preds"].min() + 1,
+        max_value=df_out["preds"].max() + 1,
+    )
+    with st.expander("Advanced Options"):
+        st.write("Non available right now.")
+    fig = px.line(df_out, x="timestamp", y="preds", line_shape="hv")
+    fig.add_hline(cut_off, line_color="red", line_dash="dash")
+    with chart_container:
+        st.plotly_chart(fig)
+    df = time_slice.create_start_end_time(df_out, cut_off)
+    times_dict = time_slice.merge_overlaps_into_dict(df)
+    # event: datetime.time = st.select_slider(
+    #    "Validate event", options=[x["start"] for x in times_dict]
+    # )
+    higlight_vid = get_vid_path(
+        f"{selected_file.replace('converted', 'downloaded')}.mp4",
+        times_dict,
+        Path("highlights"),
+    )
+    if st.button("Create highlight Video"):
+        with st.spinner("Creating video..."):
+            build_video(
+                f"{selected_file.replace('converted', 'downloaded')}.mp4",
+                times_dict,
+                higlight_vid,
+            )
+    if higlight_vid.exists():
+        st.video(str(higlight_vid))
+        st.info("Right Click to Download", icon="ℹ️")

utils.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from pathlib import Path
+import polars as pl
+def build_labels(label_file: Path, fps: int = 3):
+    df = pl.read_parquet(label_file)
+    highlights = df.select(
+        "vid_id",
+        frame=pl.int_ranges(
+            pl.col("start").cast(pl.Duration).dt.seconds() * fps,
+            pl.col("stop").cast(pl.Duration).dt.seconds() * fps,
+        ),
+        label=pl.lit(1),
+    ).explode("frame")
+    dfs = []
+    for vid in df["vid_id"].unique():
+        frames = len(list(Path(str(vid)).glob("*.jpg")))
+        dfs.append(
+            pl.DataFrame({"vid_id": [vid] * frames, "frame": np.arange(1, frames + 1)})
+        )
+    labeled_df = pl.concat(dfs)
+    labeled_df = labeled_df.join(
+        highlights, on=["vid_id", "frame"], how="left"
+    ).fill_null(0)
+    labeled_df = labeled_df.with_columns(
+        path=pl.concat_str(
+            [
+                pl.col("vid_id").cast(pl.Utf8) + "/img",
+                pl.col("frame").cast(pl.Utf8) + ".jpg",
+            ]
+        )
+    )
+    labeled_df = labeled_df.sort("vid_id", "frame")
+    labeled_df.head(2)

utils/__init__.py ADDED Viewed

File without changes

utils/kick_dl.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import os
+def download(self, url: str, output_path: str) -> None:
+    source_url = self._get_source_url(url)
+    if not source_url:
+        raise Exception("could not find a source url for given broadcast")
+    command = f"ffmpeg -i {source_url} -vcodec copy -acodec copy {output_path}"
+    os.system(command)

utils/movie_clips.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from pathlib import Path
+from typing import Dict, List
+from moviepy.editor import VideoFileClip, concatenate_videoclips
+def get_vid_path(
+    orig_vid: str | Path, timestamps: List[Dict[str, str]], out: Path
+) -> Path:
+    out.mkdir(parents=True, exist_ok=True)
+    vid_name = Path(orig_vid).name
+    out_path = out / (vid_name + f"_{hash(str(timestamps))}.mp4")
+    return out_path
+def build_video(orig_vid: str | Path, timestamps: List[Dict[str, str]], out_path: Path):
+    # timestamps = [{"start": "00:01:23", "end": "00:02:45"}]
+    if out_path.exists():
+        return out_path
+    video_clips = []
+    video = VideoFileClip(orig_vid)
+    # Extract video clips for each timestamp event
+    for timestamp in timestamps:
+        clip = video.subclip(timestamp["start"], timestamp["end"])
+        video_clips.append(clip)
+    # Concatenate the video clips with transitions
+    final_clip = concatenate_videoclips(video_clips)
+    # Write the final concatenated movie to a file
+    final_clip.write_videofile(str(out_path))
+    return out_path

utils/time_slice.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import datetime
+import polars as pl
+SECONDS_10 = pl.duration(seconds=10)
+RANDOM_DATE = pl.date(2023, 1, 1).dt
+def create_start_end_time(
+    df: pl.DataFrame, cut_off: int, forward: dict, backward: dict
+) -> pl.DataFrame:
+    df = df.filter(pl.col("preds") >= cut_off).select(
+        start=pl.col("timestamp"),
+        end=pl.col("timestamp") + pl.duration(seconds=10),
+    )
+    if len(df) == 0:
+        return df
+    new_data = df[0].to_dicts()
+    for row in df[1:].to_dicts():
+        if new_data[-1]["end"] == row["start"]:
+            new_data[-1]["end"] = row["end"]
+        else:
+            new_data.append(row)
+    new_data = [
+        {
+            "start": d["start"] - datetime.timedelta(seconds=backward.get(i, 0)),
+            "end": d["end"] + datetime.timedelta(seconds=forward.get(i, 0)),
+        }
+        for i, d in enumerate(new_data)
+    ]
+    return pl.DataFrame(new_data)
+def merge_overlaps_into_dict(df: pl.DataFrame):
+    if len(df) == 0:
+        return []
+    data = df.cast(pl.Time).cast(pl.Utf8).to_dicts()
+    new_data = [data[0]]
+    for row in data[1:]:
+        if new_data[-1]["end"] == row["start"]:
+            new_data[-1]["end"] = row["end"]
+        else:
+            new_data.append(row)
+    return new_data