#!/usr/local/bin/python3 # coding: utf-8 # ytdlbot - tasks.py # 12/29/21 14:57 # __author__ = "Benny " import asyncio import logging import os import pathlib import re import shutil import subprocess import tempfile import threading import time import traceback import typing from typing import Any from urllib.parse import quote_plus import filetype import psutil import pyrogram.errors import requests from apscheduler.schedulers.background import BackgroundScheduler from celery import Celery from celery.worker.control import Panel from pyrogram import Client, enums, idle, types from channel import Channel from client_init import create_app from config import ( ARCHIVE_ID, BROKER, ENABLE_CELERY, ENABLE_VIP, OWNER, RATE_LIMIT, RCLONE_PATH, TMPFILE_PATH, WORKERS, ) from constant import BotText from database import Redis from downloader import edit_text, tqdm_progress, upload_hook, ytdl_download from limit import Payment from utils import ( apply_log_formatter, auto_restart, customize_logger, get_metadata, get_revision, sizeof_fmt, ) customize_logger(["pyrogram.client", "pyrogram.session.session", "pyrogram.connection.connection"]) apply_log_formatter() bot_text = BotText() logging.getLogger("apscheduler.executors.default").propagate = False app = Celery("tasks", broker=BROKER) bot = create_app("tasks") channel = Channel() def retrieve_message(chat_id: int, message_id: int) -> types.Message | Any: # this should only be called by celery tasks try: return bot.get_messages(chat_id, message_id) except ConnectionError as e: logging.critical("BOT IS NOT STARTED YET: %s", e) bot.start() return bot.get_messages(chat_id, message_id) @app.task(rate_limit=f"{RATE_LIMIT}/m") def ytdl_download_task(chat_id: int, message_id: int, url: str): logging.info("YouTube celery tasks started for %s", url) bot_msg = retrieve_message(chat_id, message_id) ytdl_normal_download(bot, bot_msg, url) logging.info("YouTube celery tasks ended.") @app.task() def audio_task(chat_id: int, message_id: int): logging.info("Audio celery tasks started for %s-%s", chat_id, message_id) bot_msg = retrieve_message(chat_id, message_id) normal_audio(bot, bot_msg) logging.info("Audio celery tasks ended.") @app.task() def direct_download_task(chat_id: int, message_id: int, url: str): logging.info("Direct download celery tasks started for %s", url) bot_msg = retrieve_message(chat_id, message_id) direct_normal_download(bot, bot_msg, url) logging.info("Direct download celery tasks ended.") def get_unique_clink(original_url: str, user_id: int): payment = Payment() settings = payment.get_user_settings(user_id) clink = channel.extract_canonical_link(original_url) try: # different user may have different resolution settings unique = "{}?p={}{}".format(clink, *settings[1:]) except IndexError: unique = clink return unique def forward_video(client, bot_msg: types.Message | Any, url: str, cached_fid: str): res_msg = upload_processor(client, bot_msg, url, cached_fid) obj = res_msg.document or res_msg.video or res_msg.audio or res_msg.animation or res_msg.photo caption, _ = gen_cap(bot_msg, url, obj) res_msg.edit_text(caption, reply_markup=gen_video_markup()) bot_msg.edit_text(f"Download success!✅") return True def ytdl_download_entrance(client: Client, bot_msg: types.Message, url: str, mode=None): # in Local node and forward mode, we pass client from main # in celery mode, we need to use our own client called bot payment = Payment() redis = Redis() chat_id = bot_msg.chat.id unique = get_unique_clink(url, chat_id) cached_fid = redis.get_send_cache(unique) try: if cached_fid: forward_video(client, bot_msg, url, cached_fid) redis.update_metrics("cache_hit") return redis.update_metrics("cache_miss") mode = mode or payment.get_user_settings(chat_id)[-1] if ENABLE_CELERY and mode in [None, "Celery"]: ytdl_download_task.delay(chat_id, bot_msg.id, url) else: ytdl_normal_download(client, bot_msg, url) except Exception as e: logging.error("Failed to download %s, error: %s", url, e) bot_msg.edit_text(f"Download failed!❌\n\n`{traceback.format_exc()[0:4000]}`", disable_web_page_preview=True) def direct_download_entrance(client: Client, bot_msg: typing.Union[types.Message, typing.Coroutine], url: str): if ENABLE_CELERY: direct_normal_download(client, bot_msg, url) # direct_download_task.delay(bot_msg.chat.id, bot_msg.id, url) else: direct_normal_download(client, bot_msg, url) def audio_entrance(client: Client, bot_msg: types.Message): if ENABLE_CELERY: audio_task.delay(bot_msg.chat.id, bot_msg.id) else: normal_audio(client, bot_msg) def direct_normal_download(client: Client, bot_msg: typing.Union[types.Message, typing.Coroutine], url: str): chat_id = bot_msg.chat.id headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.3987.149 Safari/537.36" } length = 0 req = None try: req = requests.get(url, headers=headers, stream=True) length = int(req.headers.get("content-length")) filename = re.findall("filename=(.+)", req.headers.get("content-disposition"))[0] except TypeError: filename = getattr(req, "url", "").rsplit("/")[-1] except Exception as e: bot_msg.edit_text(f"Download failed!❌\n\n```{e}```", disable_web_page_preview=True) return if not filename: filename = quote_plus(url) with tempfile.TemporaryDirectory(prefix="ytdl-", dir=TMPFILE_PATH) as f: filepath = f"{f}/{filename}" # consume the req.content downloaded = 0 for chunk in req.iter_content(1024 * 1024): text = tqdm_progress("Downloading...", length, downloaded) edit_text(bot_msg, text) with open(filepath, "ab") as fp: fp.write(chunk) downloaded += len(chunk) logging.info("Downloaded file %s", filename) st_size = os.stat(filepath).st_size client.send_chat_action(chat_id, enums.ChatAction.UPLOAD_DOCUMENT) client.send_document( bot_msg.chat.id, filepath, caption=f"filesize: {sizeof_fmt(st_size)}", progress=upload_hook, progress_args=(bot_msg,), ) bot_msg.edit_text("Download success!✅") def normal_audio(client: Client, bot_msg: typing.Union[types.Message, typing.Coroutine]): chat_id = bot_msg.chat.id # fn = getattr(bot_msg.video, "file_name", None) or getattr(bot_msg.document, "file_name", None) status_msg: typing.Union[types.Message, typing.Coroutine] = bot_msg.reply_text( "Converting to audio...please wait patiently", quote=True ) orig_url: str = re.findall(r"https?://.*", bot_msg.caption)[0] with tempfile.TemporaryDirectory(prefix="ytdl-", dir=TMPFILE_PATH) as tmp: client.send_chat_action(chat_id, enums.ChatAction.RECORD_AUDIO) # just try to download the audio using yt-dlp filepath = ytdl_download(orig_url, tmp, status_msg, hijack="bestaudio[ext=m4a]") status_msg.edit_text("Sending audio now...") client.send_chat_action(chat_id, enums.ChatAction.UPLOAD_AUDIO) for f in filepath: client.send_audio(chat_id, f) status_msg.edit_text("✅ Conversion complete.") Redis().update_metrics("audio_success") def ytdl_normal_download(client: Client, bot_msg: types.Message | typing.Any, url: str): """ This function is called by celery task or directly by bot :param client: bot client, either from main or bot(celery) :param bot_msg: bot message :param url: url to download """ chat_id = bot_msg.chat.id temp_dir = tempfile.TemporaryDirectory(prefix="ytdl-", dir=TMPFILE_PATH) video_paths = ytdl_download(url, temp_dir.name, bot_msg) logging.info("Download complete.") client.send_chat_action(chat_id, enums.ChatAction.UPLOAD_DOCUMENT) bot_msg.edit_text("Download complete. Sending now...") try: upload_processor(client, bot_msg, url, video_paths) except pyrogram.errors.Flood as e: logging.critical("FloodWait from Telegram: %s", e) client.send_message( chat_id, f"I'm being rate limited by Telegram. Your video will come after {e} seconds. Please wait patiently.", ) client.send_message(OWNER, f"CRITICAL INFO: {e}") time.sleep(e.value) upload_processor(client, bot_msg, url, video_paths) bot_msg.edit_text("Download success!✅") # setup rclone environment var to back up the downloaded file if RCLONE_PATH: for item in os.listdir(temp_dir.name): logging.info("Copying %s to %s", item, RCLONE_PATH) shutil.copy(os.path.join(temp_dir.name, item), RCLONE_PATH) temp_dir.cleanup() def generate_input_media(file_paths: list, cap: str) -> list: input_media = [] for path in file_paths: mime = filetype.guess_mime(path) if "video" in mime: input_media.append(pyrogram.types.InputMediaVideo(media=path)) elif "image" in mime: input_media.append(pyrogram.types.InputMediaPhoto(media=path)) elif "audio" in mime: input_media.append(pyrogram.types.InputMediaAudio(media=path)) else: input_media.append(pyrogram.types.InputMediaDocument(media=path)) input_media[0].caption = cap return input_media def upload_processor(client: Client, bot_msg: types.Message, url: str, vp_or_fid: str | list): redis = Redis() # raise pyrogram.errors.exceptions.FloodWait(13) # if is str, it's a file id; else it's a list of paths payment = Payment() chat_id = bot_msg.chat.id markup = gen_video_markup() if isinstance(vp_or_fid, list) and len(vp_or_fid) > 1: # just generate the first for simplicity, send as media group(2-20) cap, meta = gen_cap(bot_msg, url, vp_or_fid[0]) res_msg: list["types.Message"] | Any = client.send_media_group(chat_id, generate_input_media(vp_or_fid, cap)) # TODO no cache for now return res_msg[0] elif isinstance(vp_or_fid, list) and len(vp_or_fid) == 1: # normal download, just contains one file in video_paths vp_or_fid = vp_or_fid[0] cap, meta = gen_cap(bot_msg, url, vp_or_fid) else: # just a file id as string cap, meta = gen_cap(bot_msg, url, vp_or_fid) settings = payment.get_user_settings(chat_id) if ARCHIVE_ID and isinstance(vp_or_fid, pathlib.Path): chat_id = ARCHIVE_ID if settings[2] == "document": logging.info("Sending as document") try: # send as document could be sent as video even if it's a document res_msg = client.send_document( chat_id, vp_or_fid, caption=cap, progress=upload_hook, progress_args=(bot_msg,), reply_markup=markup, thumb=meta["thumb"], force_document=True, ) except ValueError: logging.error("Retry to send as video") res_msg = client.send_video( chat_id, vp_or_fid, supports_streaming=True, caption=cap, progress=upload_hook, progress_args=(bot_msg,), reply_markup=markup, **meta, ) elif settings[2] == "audio": logging.info("Sending as audio") res_msg = client.send_audio( chat_id, vp_or_fid, caption=cap, progress=upload_hook, progress_args=(bot_msg,), ) else: # settings==video logging.info("Sending as video") try: res_msg = client.send_video( chat_id, vp_or_fid, supports_streaming=True, caption=cap, progress=upload_hook, progress_args=(bot_msg,), reply_markup=markup, **meta, ) except Exception: # try to send as annimation, photo try: logging.warning("Retry to send as animation") res_msg = client.send_animation( chat_id, vp_or_fid, caption=cap, progress=upload_hook, progress_args=(bot_msg,), reply_markup=markup, **meta, ) except Exception: # this is likely a photo logging.warning("Retry to send as photo") res_msg = client.send_photo( chat_id, vp_or_fid, caption=cap, progress=upload_hook, progress_args=(bot_msg,), ) unique = get_unique_clink(url, bot_msg.chat.id) obj = res_msg.document or res_msg.video or res_msg.audio or res_msg.animation or res_msg.photo redis.add_send_cache(unique, getattr(obj, "file_id", None)) redis.update_metrics("video_success") if ARCHIVE_ID and isinstance(vp_or_fid, pathlib.Path): client.forward_messages(bot_msg.chat.id, ARCHIVE_ID, res_msg.id) return res_msg def gen_cap(bm, url, video_path): payment = Payment() chat_id = bm.chat.id user = bm.chat try: user_info = "@{}({})-{}".format(user.username or "N/A", user.first_name or "" + user.last_name or "", user.id) except Exception: user_info = "" if isinstance(video_path, pathlib.Path): meta = get_metadata(video_path) file_name = video_path.name file_size = sizeof_fmt(os.stat(video_path).st_size) else: file_name = getattr(video_path, "file_name", "") file_size = sizeof_fmt(getattr(video_path, "file_size", (2 << 2) + ((2 << 2) + 1) + (2 << 5))) meta = dict( width=getattr(video_path, "width", 0), height=getattr(video_path, "height", 0), duration=getattr(video_path, "duration", 0), thumb=getattr(video_path, "thumb", None), ) free = payment.get_free_token(chat_id) pay = payment.get_pay_token(chat_id) if ENABLE_VIP: remain = f"Download token count: free {free}, pay {pay}" else: remain = "" if worker_name := os.getenv("WORKER_NAME"): worker = f"Downloaded by {worker_name}" else: worker = "" cap = ( f"{user_info}\n{file_name}\n\n{url}\n\nInfo: {meta['width']}x{meta['height']} {file_size}\t" f"{meta['duration']}s\n{remain}\n{worker}\n{bot_text.custom_text}" ) return cap, meta def gen_video_markup(): markup = types.InlineKeyboardMarkup( [ [ # First row types.InlineKeyboardButton( # Generates a callback query when pressed "convert to audio", callback_data="convert" ) ] ] ) return markup @Panel.register def ping_revision(*args): return get_revision() @Panel.register def hot_patch(*args): app_path = pathlib.Path().cwd().parent logging.info("Hot patching on path %s...", app_path) pip_install = "pip install -r requirements.txt" unset = "git config --unset http.https://github.com/.extraheader" pull_unshallow = "git pull origin --unshallow" pull = "git pull" subprocess.call(unset, shell=True, cwd=app_path) if subprocess.call(pull_unshallow, shell=True, cwd=app_path) != 0: logging.info("Already unshallow, pulling now...") subprocess.call(pull, shell=True, cwd=app_path) logging.info("Code is updated, applying hot patch now...") subprocess.call(pip_install, shell=True, cwd=app_path) psutil.Process().kill() def purge_tasks(): count = app.control.purge() return f"purged {count} tasks." def run_celery(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) worker_name = os.getenv("WORKER_NAME", "") argv = ["-A", "tasks", "worker", "--loglevel=info", "--pool=threads", f"--concurrency={WORKERS}", "-n", worker_name] app.worker_main(argv) if __name__ == "__main__": print("Bootstrapping Celery worker now.....") time.sleep(5) threading.Thread(target=run_celery, daemon=True).start() scheduler = BackgroundScheduler(timezone="Europe/London") scheduler.add_job(auto_restart, "interval", seconds=900) scheduler.start() idle() bot.stop()