dockerbot / ytdlbot /downloader.py
navpan2's picture
Upload 42 files
94ecfcc
#!/usr/local/bin/python3
# coding: utf-8
# ytdlbot - downloader.py
# 8/14/21 16:53
#
__author__ = "Benny <[email protected]>"
import logging
import os
import pathlib
import random
import re
import subprocess
import time
import traceback
from io import StringIO
from unittest.mock import MagicMock
import fakeredis
import ffmpeg
import ffpb
import filetype
import requests
import yt_dlp as ytdl
from pyrogram import types
from tqdm import tqdm
from config import AUDIO_FORMAT, ENABLE_ARIA2, ENABLE_FFMPEG, TG_MAX_SIZE, IPv6
from limit import Payment
from utils import adjust_formats, apply_log_formatter, current_time, sizeof_fmt
r = fakeredis.FakeStrictRedis()
apply_log_formatter()
def edit_text(bot_msg: types.Message, text: str):
key = f"{bot_msg.chat.id}-{bot_msg.id}"
# if the key exists, we shouldn't send edit message
if not r.exists(key):
time.sleep(random.random())
r.set(key, "ok", ex=3)
bot_msg.edit_text(text)
def tqdm_progress(desc, total, finished, speed="", eta=""):
def more(title, initial):
if initial:
return f"{title} {initial}"
else:
return ""
f = StringIO()
tqdm(
total=total,
initial=finished,
file=f,
ascii=False,
unit_scale=True,
ncols=30,
bar_format="{l_bar}{bar} |{n_fmt}/{total_fmt} ",
)
raw_output = f.getvalue()
tqdm_output = raw_output.split("|")
progress = f"`[{tqdm_output[1]}]`"
detail = tqdm_output[2].replace("[A", "")
text = f"""
{desc}
{progress}
{detail}
{more("Speed:", speed)}
{more("ETA:", eta)}
"""
f.close()
return text
def remove_bash_color(text):
return re.sub(r"\u001b|\[0;94m|\u001b\[0m|\[0;32m|\[0m|\[0;33m", "", text)
def download_hook(d: dict, bot_msg):
# since we're using celery, server location may be located in different region.
# Therefore, we can't trigger the hook very often.
# the key is user_id + download_link
original_url = d["info_dict"]["original_url"]
key = f"{bot_msg.chat.id}-{original_url}"
if d["status"] == "downloading":
downloaded = d.get("downloaded_bytes", 0)
total = d.get("total_bytes") or d.get("total_bytes_estimate", 0)
if total > TG_MAX_SIZE:
raise Exception(f"Your download file size {sizeof_fmt(total)} is too large for Telegram.")
# percent = remove_bash_color(d.get("_percent_str", "N/A"))
speed = remove_bash_color(d.get("_speed_str", "N/A"))
eta = remove_bash_color(d.get("_eta_str", d.get("eta")))
text = tqdm_progress("Downloading...", total, downloaded, speed, eta)
edit_text(bot_msg, text)
r.set(key, "ok", ex=5)
def upload_hook(current, total, bot_msg):
text = tqdm_progress("Uploading...", total, current)
edit_text(bot_msg, text)
def convert_to_mp4(video_paths: list, bot_msg):
default_type = ["video/x-flv", "video/webm"]
# all_converted = []
for path in video_paths:
# if we can't guess file type, we assume it's video/mp4
mime = getattr(filetype.guess(path), "mime", "video/mp4")
if mime in default_type:
if not can_convert_mp4(path, bot_msg.chat.id):
logging.warning("Conversion abort for %s", bot_msg.chat.id)
bot_msg._client.send_message(bot_msg.chat.id, "Can't convert your video. ffmpeg has been disabled.")
break
edit_text(bot_msg, f"{current_time()}: Converting {path.name} to mp4. Please wait.")
new_file_path = path.with_suffix(".mp4")
logging.info("Detected %s, converting to mp4...", mime)
run_ffmpeg_progressbar(["ffmpeg", "-y", "-i", path, new_file_path], bot_msg)
index = video_paths.index(path)
video_paths[index] = new_file_path
class ProgressBar(tqdm):
b = None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.bot_msg = self.b
def update(self, n=1):
super().update(n)
t = tqdm_progress("Converting...", self.total, self.n)
edit_text(self.bot_msg, t)
def run_ffmpeg_progressbar(cmd_list: list, bm):
cmd_list = cmd_list.copy()[1:]
ProgressBar.b = bm
ffpb.main(cmd_list, tqdm=ProgressBar)
def can_convert_mp4(video_path, uid):
if not ENABLE_FFMPEG:
return False
return True
def ytdl_download(url: str, tempdir: str, bm, **kwargs) -> list:
payment = Payment()
chat_id = bm.chat.id
hijack = kwargs.get("hijack")
output = pathlib.Path(tempdir, "%(title).70s.%(ext)s").as_posix()
ydl_opts = {
"progress_hooks": [lambda d: download_hook(d, bm)],
"outtmpl": output,
"restrictfilenames": False,
"quiet": True,
}
if ENABLE_ARIA2:
ydl_opts["external_downloader"] = "aria2c"
ydl_opts["external_downloader_args"] = [
"--min-split-size=1M",
"--max-connection-per-server=16",
"--max-concurrent-downloads=16",
"--split=16",
]
if url.startswith("https://drive.google.com"):
# Always use the `source` format for Google Drive URLs.
formats = ["source"]
else:
# Use the default formats for other URLs.
formats = [
# webm , vp9 and av01 are not streamable on telegram, so we'll extract only mp4
"bestvideo[ext=mp4][vcodec!*=av01][vcodec!*=vp09]+bestaudio[ext=m4a]/bestvideo+bestaudio",
"bestvideo[vcodec^=avc]+bestaudio[acodec^=mp4a]/best[vcodec^=avc]/best",
None,
]
adjust_formats(chat_id, url, formats, hijack)
if download_instagram(url, tempdir):
return list(pathlib.Path(tempdir).glob("*"))
address = ["::", "0.0.0.0"] if IPv6 else [None]
error = None
video_paths = None
for format_ in formats:
ydl_opts["format"] = format_
for addr in address:
# IPv6 goes first in each format
ydl_opts["source_address"] = addr
try:
logging.info("Downloading for %s with format %s", url, format_)
with ytdl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
video_paths = list(pathlib.Path(tempdir).glob("*"))
break
except Exception:
error = traceback.format_exc()
logging.error("Download failed for %s - %s, try another way", format_, url)
if error is None:
break
if not video_paths:
raise Exception(error)
# convert format if necessary
settings = payment.get_user_settings(chat_id)
if settings[2] == "video" or isinstance(settings[2], MagicMock):
# only convert if send type is video
convert_to_mp4(video_paths, bm)
if settings[2] == "audio" or hijack == "bestaudio[ext=m4a]":
convert_audio_format(video_paths, bm)
# split_large_video(video_paths)
return video_paths
def convert_audio_format(video_paths: list, bm):
# 1. file is audio, default format
# 2. file is video, default format
# 3. non default format
for path in video_paths:
streams = ffmpeg.probe(path)["streams"]
if AUDIO_FORMAT is None and len(streams) == 1 and streams[0]["codec_type"] == "audio":
logging.info("%s is audio, default format, no need to convert", path)
elif AUDIO_FORMAT is None and len(streams) >= 2:
logging.info("%s is video, default format, need to extract audio", path)
audio_stream = {"codec_name": "m4a"}
for stream in streams:
if stream["codec_type"] == "audio":
audio_stream = stream
break
ext = audio_stream["codec_name"]
new_path = path.with_suffix(f".{ext}")
run_ffmpeg_progressbar(["ffmpeg", "-y", "-i", path, "-vn", "-acodec", "copy", new_path], bm)
path.unlink()
index = video_paths.index(path)
video_paths[index] = new_path
else:
logging.info("Not default format, converting %s to %s", path, AUDIO_FORMAT)
new_path = path.with_suffix(f".{AUDIO_FORMAT}")
run_ffmpeg_progressbar(["ffmpeg", "-y", "-i", path, new_path], bm)
path.unlink()
index = video_paths.index(path)
video_paths[index] = new_path
def split_large_video(video_paths: list):
original_video = None
split = False
for original_video in video_paths:
size = os.stat(original_video).st_size
if size > TG_MAX_SIZE:
split = True
logging.warning("file is too large %s, splitting...", size)
subprocess.check_output(f"sh split-video.sh {original_video} {TG_MAX_SIZE * 0.95} ".split())
os.remove(original_video)
if split and original_video:
return [i for i in pathlib.Path(original_video).parent.glob("*")]
def download_instagram(url: str, tempdir: str):
if not url.startswith("https://www.instagram.com"):
return False
resp = requests.get(f"http://192.168.6.1:15000/?url={url}").json()
if url_results := resp.get("data"):
for link in url_results:
content = requests.get(link, stream=True).content
ext = filetype.guess_extension(content)
save_path = pathlib.Path(tempdir, f"{id(link)}.{ext}")
with open(save_path, "wb") as f:
f.write(content)
return True