File size: 3,890 Bytes
a57c6eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from typing import Union, List
import logging

import h5py
import numpy as np

from .emb import MediaMapEmb

logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

__all__ = ["H5pyMediaMapEmb", "save_value_with_h5py"]


def save_value_with_h5py(
    path: str,
    value: Union[np.ndarray, None],
    key: str,
    idx: Union[int, List[int]] = None,
    dtype=None,
    shape=None,
    overwrite: bool = False,
):
    with h5py.File(path, "a") as f:
        if dtype is None:
            dtype = value.dtype
        if shape is None:
            shape = value.shape
        del_key = False
        if key in f:
            if overwrite:
                del_key = True
            if f[key].dtype != h5py.special_dtype(vlen=str):
                if f[key].shape != value.shape:
                    del_key = True
            if del_key:
                del f[key]
        if key not in f:
            f.create_dataset(key, shape=shape, dtype=dtype)
        if idx is None:
            f[key][...] = value
        else:
            f[key][idx] = value


class H5pyMediaMapEmb(MediaMapEmb):
    def __init__(self, path: str) -> None:
        """
        OfflineEmb = {
            "overall_algo": Emb,  # 整个文件的Emb
            # 整个文件的多维度 Emb
            "theme": np.array,  # 主题,
            "emotion_algo":  np.array,  # 情绪,
            "semantic_algo":  np.array,  # 语义

            "clips_overall_algo":  np.array, n_clip x clip_emb
            "clips_emotion_algo":  np.array, n_clip x clip_emb
            "clips_semantic_algo":  np.array, n_clip x clip_emb
            "clips_theme_algo":  np.array, n_clip x clip_emb

            "scenes_overall_algo":  np.array, n_scenes x scene_emb
            "scenes_emotion_algo":  np.array, n_scenes x scene_emb
            "scenes_semantic_algo":  np.array, n_scenes x scene_emb
            "scenes_theme_algo": E np.arraymb, n_scenes x scene_emb
            # 片段可以是转场切分、MusicStage等, clips目前属于转场切分片段
            # 若后续需要新增段落分割,可以和clips同级新增 stage字段。

            "frames_overall_algo":  np.array, n_frames x frame_emb
            "frames_emotion_algo":  np.array, n_frames x frame_emb
            "frames_semantic_algo":  np.array, n_frames x frame_emb
            "frames_theme_algo":  np.array, n_frames x frame_emb
            "frames_objs_algo": {
                "frame_id_algo": {  #
                    "overall_algo":  np.array, n_objs x obj_emb
                    "emotion_algo":  np.array, n_objs x obj_emb
                    "semantic_algo":  np.array, n_objs x obj_emb
                    "theme_algo":  np.array, n_objs x obj_emb
                }
            }
            "roles_algo": {
                "roleid": np.array, n x obj_emb
            }
        }

        Args:
            path (str): hdf5 存储路径
        """
        super().__init__(path)
        # 待优化支持 with open 的方式来读写
        self.f = h5py.File(path, "a")

    def _keys_index(self, key):
        if not isinstance(key, list):
            key = [key]
        key = "/".join([str(x) for x in key if x is not None])
        return key

    def get_value(self, key, idx=None):
        new_key = self._keys_index(key)
        if idx is None:
            data = np.array(self.f[new_key])
        else:
            data = np.array(self.f[new_key][idx])
        return data

    def set_value(self, key, value, idx=None):
        new_key = self._keys_index(key)
        if new_key not in self.f:
            self.f.create_dataset(new_key, shape=value.shape, dtype=value.dtype)
        if idx is None:
            self.f[new_key][...] = value
        else:
            self.f[new_key][idx] = value

    def close(self):
        self.f.close()


class H5pyMediaMapEmbProxy(H5pyMediaMapEmb):
    pass