sHORTgpt / shortGPT /config /asset_db.py
wiydarrr's picture
Upload folder using huggingface_hub
5f685fd verified
import base64
import re
import shutil
import time
from datetime import datetime
from pathlib import Path
import enum
import pandas as pd
from shortGPT.audio.audio_utils import downloadYoutubeAudio, get_asset_duration
from shortGPT.database.db_document import TinyMongoDocument
AUDIO_EXTENSIONS = {".mp3", ".m4a", ".wav", ".flac", ".aac", ".ogg", ".wma", ".opus"}
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".svg", ".webp"}
VIDEO_EXTENSIONS = {".mp4", ".mkv", ".flv", ".avi", ".mov", ".wmv", ".webm", ".m4v"}
TEMPLATE_ASSETS_DB_PATH = '.database/template_asset_db.json'
ASSETS_DB_PATH = '.database/asset_db.json'
class AssetType(enum.Enum):
VIDEO = "video"
AUDIO = "audio"
IMAGE = "image"
BACKGROUND_MUSIC = "background music"
BACKGROUND_VIDEO = "background video"
OTHER = "other"
class AssetDatabase:
"""
Class for managing assets, both local and remote.
The class provides methods to add, remove, get and sync assets.
It uses a MongoDB-like database to store information about the assets.
"""
if not Path(ASSETS_DB_PATH).exists():
shutil.copy(TEMPLATE_ASSETS_DB_PATH, ASSETS_DB_PATH)
local_assets = TinyMongoDocument("asset_db", "asset_collection", "local_assets", create=True)
remote_assets = TinyMongoDocument("asset_db", "asset_collection", "remote_assets", create=True)
if not remote_assets._get('subscribe animation'):
remote_assets._save({
'subscribe animation':{
"type": AssetType.VIDEO.value,
"url": "https://www.youtube.com/watch?v=72WhUT0OM98",
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
@classmethod
def asset_exists(cls, name: str) -> bool:
return name in cls.local_assets._get() or name in cls.remote_assets._get()
@classmethod
def add_local_asset(cls, name: str, asset_type: AssetType, path: str):
cls.local_assets._save({
name: {
"type": asset_type.value,
"path": path,
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
@classmethod
def add_remote_asset(cls, name: str, asset_type: AssetType, url: str):
cls.remote_assets._save({
name: {
"type": asset_type.value,
"url": url,
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
@classmethod
def remove_asset(cls, name: str):
if name in cls.local_assets._get():
cls._remove_local_asset(name)
elif name in cls.remote_assets._get():
cls.remote_assets._delete(name)
else:
raise ValueError(f"Asset '{name}' does not exist in the database.")
@classmethod
def get_df(cls, source=None) -> pd.DataFrame:
data = []
if source is None or source == 'local':
for key, asset in cls.local_assets._get().items():
data.append({'name': key,
'type': asset['type'],
'link': asset['path'],
'source': 'local',
'ts': asset.get('ts')
})
if source is None or source == 'youtube':
for key, asset in cls.remote_assets._get().items():
data.append({'name': key,
'type': asset['type'],
'link': asset['url'],
'source': 'youtube' if 'youtube' in asset['url'] else 'internet',
'ts': asset.get('ts')
})
df = pd.DataFrame(data)
if (not df.empty):
df.sort_values(by='ts', ascending=False, inplace=True)
return df.drop(columns='ts')
return df
@classmethod
def sync_local_assets(cls):
"""
Loads all local assets from the static-assets folder into the database.
"""
local_assets = cls.local_assets._get()
local_paths = {asset['path'] for asset in local_assets.values()}
for path in Path('public').rglob('*'):
if path.is_file() and str(path) not in local_paths:
cls._add_local_asset_from_path(path)
@classmethod
def get_asset_link(cls, key: str) -> str:
"""
Get the link to an asset.
Args:
key (str): Name of the asset.
Returns:
str: Link to the asset.
"""
if key in cls.local_assets._get():
return cls._update_local_asset_timestamp_and_get_link(key)
elif key in cls.remote_assets._get():
return cls._get_remote_asset_link(key)
else:
raise ValueError(f"Asset '{key}' does not exist in the database.")
@classmethod
def get_asset_duration(cls, key: str) -> str:
"""
Get the duration of an asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
if key in cls.local_assets._get():
return cls._get_local_asset_duration(key)
elif key in cls.remote_assets._get():
return cls._get_remote_asset_duration(key)
else:
raise ValueError(f"Asset '{key}' does not exist in the database.")
@classmethod
def _remove_local_asset(cls, name: str):
"""
Remove a local asset from the database.
Args:
name (str): Name of the asset.
"""
asset = cls.local_assets._get(name)
if 'required' not in asset:
try:
Path(asset['path']).unlink()
except FileNotFoundError as e:
print(f"File not found: {e}")
cls.local_assets._delete(name)
@classmethod
def _add_local_asset_from_path(cls, path: Path):
"""
Add a local asset to the database from a file path.
Args:
path (Path): Path to the asset.
"""
file_ext = path.suffix
if file_ext in AUDIO_EXTENSIONS:
asset_type = AssetType.AUDIO
elif file_ext in IMAGE_EXTENSIONS:
asset_type = AssetType.IMAGE
elif file_ext in VIDEO_EXTENSIONS:
asset_type = AssetType.VIDEO
else:
asset_type = AssetType.OTHER
cls.local_assets._save({
path.stem: {
"path": str(path),
"type": asset_type.value,
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
@classmethod
def _update_local_asset_timestamp_and_get_link(cls, key: str) -> str:
"""
Update the timestamp of a local asset and get its link.
Args:
key (str): Name of the asset.
Returns:
str: Link to the asset.
"""
asset = cls.local_assets._get(key)
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cls.local_assets._save({key: asset})
return asset['path']
@classmethod
def _get_remote_asset_link(cls, key: str) -> str:
"""
Get the link to a remote asset.
Args:
key (str): Name of the asset.
Returns:
str: Link to the asset.
"""
asset = cls.remote_assets._get(key)
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cls.remote_assets._save({key: asset})
if 'youtube' in asset['url']:
return cls._get_youtube_asset_link(key, asset)
return asset['url']
@classmethod
def _get_local_asset_duration(cls, key: str) -> str:
"""
Get the duration of a local asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
asset = cls.local_assets._get(key)
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cls.local_assets._save({key: asset})
if 'duration' not in asset:
_, duration = cls._update_local_asset_duration(key)
return duration
return asset['duration']
@classmethod
def _get_remote_asset_duration(cls, key: str) -> str:
"""
Get the duration of a remote asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
asset = cls.remote_assets._get(key)
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cls.remote_assets._save({key: asset})
if 'duration' in asset:
return asset['duration']
_, duration = cls._update_youtube_asset_duration(key)
return duration
@classmethod
def _update_local_asset_duration(cls, key: str) -> str:
"""
Update the duration of a local asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
asset = cls.local_assets._get(key)
path = Path(asset['path'])
if any(t in asset['type'] for t in ['audio', 'video', 'music']):
_, duration = get_asset_duration(str(path))
asset['duration'] = duration
else:
duration = None
cls.local_assets._save({key: asset})
return str(path), duration
@classmethod
def _update_youtube_asset_duration(cls, key: str) -> str:
"""
Update the duration of a Youtube asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
asset = cls.remote_assets._get(key)
youtube_url = asset['url']
remote_url, duration = get_asset_duration(youtube_url, isVideo="video" in asset['type'])
asset.update({
"remote_url": base64.b64encode(remote_url.encode()).decode('utf-8'),
"duration": duration,
})
cls.remote_assets._save({key: asset})
return remote_url, duration
@classmethod
def _get_youtube_asset_link(cls, key: str, asset: dict) -> str:
"""
Get the link to a Youtube asset.
Args:
key (str): Name of the asset.
asset (dict): Asset data.
Returns:
str: Link to the asset.
"""
if any(t in asset['type'] for t in ['audio', 'music']):
local_audio_file, duration = downloadYoutubeAudio(asset['url'], f"public/{key}.wav")
cls.local_assets._save({
key: {
'path': local_audio_file,
'duration': duration,
'type': 'audio',
'ts': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
return local_audio_file
if 'remote_url' in asset:
asset['remote_url'] = base64.b64decode(asset['remote_url']).decode('utf-8')
expire_timestamp_match = re.search(r"expire=(\d+)", asset['remote_url'])
not_expired = expire_timestamp_match and int(expire_timestamp_match.group(1)) > time.time() + 1800
if not_expired and 'duration' in asset:
return asset['remote_url']
remote_url, _ = cls._update_youtube_asset_duration(key)
return remote_url