|
|
|
from abc import ABCMeta, abstractmethod |
|
|
|
|
|
class BaseStorageBackend(metaclass=ABCMeta): |
|
"""Abstract class of storage backends. |
|
|
|
All backends need to implement two apis: ``get()`` and ``get_text()``. |
|
``get()`` reads the file as a byte stream and ``get_text()`` reads the file |
|
as texts. |
|
""" |
|
|
|
@abstractmethod |
|
def get(self, filepath): |
|
pass |
|
|
|
@abstractmethod |
|
def get_text(self, filepath): |
|
pass |
|
|
|
|
|
class MemcachedBackend(BaseStorageBackend): |
|
"""Memcached storage backend. |
|
|
|
Attributes: |
|
server_list_cfg (str): Config file for memcached server list. |
|
client_cfg (str): Config file for memcached client. |
|
sys_path (str | None): Additional path to be appended to `sys.path`. |
|
Default: None. |
|
""" |
|
|
|
def __init__(self, server_list_cfg, client_cfg, sys_path=None): |
|
if sys_path is not None: |
|
import sys |
|
sys.path.append(sys_path) |
|
try: |
|
import mc |
|
except ImportError: |
|
raise ImportError( |
|
'Please install memcached to enable MemcachedBackend.') |
|
|
|
self.server_list_cfg = server_list_cfg |
|
self.client_cfg = client_cfg |
|
self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, |
|
self.client_cfg) |
|
|
|
self._mc_buffer = mc.pyvector() |
|
|
|
def get(self, filepath): |
|
filepath = str(filepath) |
|
import mc |
|
self._client.Get(filepath, self._mc_buffer) |
|
value_buf = mc.ConvertBuffer(self._mc_buffer) |
|
return value_buf |
|
|
|
def get_text(self, filepath): |
|
raise NotImplementedError |
|
|
|
|
|
class HardDiskBackend(BaseStorageBackend): |
|
"""Raw hard disks storage backend.""" |
|
|
|
def get(self, filepath): |
|
filepath = str(filepath) |
|
with open(filepath, 'rb') as f: |
|
value_buf = f.read() |
|
return value_buf |
|
|
|
def get_text(self, filepath): |
|
filepath = str(filepath) |
|
with open(filepath, 'r') as f: |
|
value_buf = f.read() |
|
return value_buf |
|
|
|
|
|
class LmdbBackend(BaseStorageBackend): |
|
"""Lmdb storage backend. |
|
|
|
Args: |
|
db_paths (str | list[str]): Lmdb database paths. |
|
client_keys (str | list[str]): Lmdb client keys. Default: 'default'. |
|
readonly (bool, optional): Lmdb environment parameter. If True, |
|
disallow any write operations. Default: True. |
|
lock (bool, optional): Lmdb environment parameter. If False, when |
|
concurrent access occurs, do not lock the database. Default: False. |
|
readahead (bool, optional): Lmdb environment parameter. If False, |
|
disable the OS filesystem readahead mechanism, which may improve |
|
random read performance when a database is larger than RAM. |
|
Default: False. |
|
|
|
Attributes: |
|
db_paths (list): Lmdb database path. |
|
_client (list): A list of several lmdb envs. |
|
""" |
|
|
|
def __init__(self, |
|
db_paths, |
|
client_keys='default', |
|
readonly=True, |
|
lock=False, |
|
readahead=False, |
|
**kwargs): |
|
try: |
|
import lmdb |
|
except ImportError: |
|
raise ImportError('Please install lmdb to enable LmdbBackend.') |
|
|
|
if isinstance(client_keys, str): |
|
client_keys = [client_keys] |
|
|
|
if isinstance(db_paths, list): |
|
self.db_paths = [str(v) for v in db_paths] |
|
elif isinstance(db_paths, str): |
|
self.db_paths = [str(db_paths)] |
|
assert len(client_keys) == len(self.db_paths), ( |
|
'client_keys and db_paths should have the same length, ' |
|
f'but received {len(client_keys)} and {len(self.db_paths)}.') |
|
|
|
self._client = {} |
|
|
|
for client, path in zip(client_keys, self.db_paths): |
|
self._client[client] = lmdb.open( |
|
path, |
|
readonly=readonly, |
|
lock=lock, |
|
readahead=readahead, |
|
map_size=8*1024*10485760, |
|
|
|
**kwargs) |
|
|
|
def get(self, filepath, client_key): |
|
"""Get values according to the filepath from one lmdb named client_key. |
|
|
|
Args: |
|
filepath (str | obj:`Path`): Here, filepath is the lmdb key. |
|
client_key (str): Used for distinguishing differnet lmdb envs. |
|
""" |
|
filepath = str(filepath) |
|
assert client_key in self._client, (f'client_key {client_key} is not ' |
|
'in lmdb clients.') |
|
client = self._client[client_key] |
|
with client.begin(write=False) as txn: |
|
value_buf = txn.get(filepath.encode('ascii')) |
|
return value_buf |
|
|
|
def get_text(self, filepath): |
|
raise NotImplementedError |
|
|
|
|
|
class FileClient(object): |
|
"""A general file client to access files in different backend. |
|
|
|
The client loads a file or text in a specified backend from its path |
|
and return it as a binary file. it can also register other backend |
|
accessor with a given name and backend class. |
|
|
|
Attributes: |
|
backend (str): The storage backend type. Options are "disk", |
|
"memcached" and "lmdb". |
|
client (:obj:`BaseStorageBackend`): The backend object. |
|
""" |
|
|
|
_backends = { |
|
'disk': HardDiskBackend, |
|
'memcached': MemcachedBackend, |
|
'lmdb': LmdbBackend, |
|
} |
|
|
|
def __init__(self, backend='disk', **kwargs): |
|
if backend not in self._backends: |
|
raise ValueError( |
|
f'Backend {backend} is not supported. Currently supported ones' |
|
f' are {list(self._backends.keys())}') |
|
self.backend = backend |
|
self.client = self._backends[backend](**kwargs) |
|
|
|
def get(self, filepath, client_key='default'): |
|
|
|
|
|
if self.backend == 'lmdb': |
|
return self.client.get(filepath, client_key) |
|
else: |
|
return self.client.get(filepath) |
|
|
|
def get_text(self, filepath): |
|
return self.client.get_text(filepath) |
|
|