diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..81108b986738e8a60899953eb5696781f486d386 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +.venv +__pycache__ +TgBotToken.txt +TestTiktok +chromedriver-win64 +Users +tests +a +b +cookies.json \ No newline at end of file diff --git a/.gradio/certificate.pem b/.gradio/certificate.pem new file mode 100644 index 0000000000000000000000000000000000000000..30aa93639f903461a0f91745c832895eb0b370f8 --- /dev/null +++ b/.gradio/certificate.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw +TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh +cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4 +WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu +ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY +MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc +h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+ +0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U +A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW +T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH +B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC +B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv +KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn +OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn +jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw +qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI +rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV +HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq +hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL +ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ +3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK +NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5 +ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur +TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC +jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc +oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq +4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA +mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d +emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc= +-----END CERTIFICATE----- diff --git a/Data/JSON/TotalView.json b/Data/JSON/TotalView.json new file mode 100644 index 0000000000000000000000000000000000000000..e1f7e4e7aeec053a2d4aa671fc4bfcd15c039408 --- /dev/null +++ b/Data/JSON/TotalView.json @@ -0,0 +1 @@ +{"total_views": 36725712, "total_videos_with_tag": 545} \ No newline at end of file diff --git a/Data/JSON/blackList.json b/Data/JSON/blackList.json new file mode 100644 index 0000000000000000000000000000000000000000..5caa869c8f501be8eeb82b01ef400c72ea59640b --- /dev/null +++ b/Data/JSON/blackList.json @@ -0,0 +1,9 @@ +{ + "usernames": [ + "pane2kvod" + ], + "videos": + [ + 1111 + ] +} \ No newline at end of file diff --git a/Data/JSON/index.json b/Data/JSON/index.json new file mode 100644 index 0000000000000000000000000000000000000000..7976564762f8656677b70e11cdbe87eda753a14c --- /dev/null +++ b/Data/JSON/index.json @@ -0,0 +1 @@ +{"parts": 1, "selectedPart": 0} \ No newline at end of file diff --git a/Data/TXT/Cacto0o.txt b/Data/TXT/Cacto0o.txt new file mode 100644 index 0000000000000000000000000000000000000000..408b55a3872842e20aac6533a3d80d1bf6a6f7f6 --- /dev/null +++ b/Data/TXT/Cacto0o.txt @@ -0,0 +1,14 @@ +pane2kvod +nestereko +tiltocacto0o +nnestereko +kakto_pane2k +fiintex +nepibaro +kakusnarezki +kudravie +_dinsa_ +tartafogo +c_h_e_l_o_v_e_kk +melintaivottak +donlorrento diff --git a/Data/TXT/T2x2.txt b/Data/TXT/T2x2.txt new file mode 100644 index 0000000000000000000000000000000000000000..642fff7c0fae36052d1691817ce0f70fc3eae47b --- /dev/null +++ b/Data/TXT/T2x2.txt @@ -0,0 +1,48 @@ +shorts.t2x2 +ptytok +sheh_blogerov +izvestniy.moments +futurenn +suharik_tani +t2x2rezki +makartalovvv +realt2x +t2x2clip +averagekittenfan +_ledifws +ssk1zyy +skrises +liv.tvv +ebat.ti.kto.man +moments_t2x2 +t2x2cuts +smachnui_prikol +kyvalda4ka +t2x2tosha0 +el.roflano +nevermor1ng +dosmis423789_t2x2stintik +tu4kai +w1zet09 +antifrizovyi_rubb +vovchikgolita +mesgoredit +lexxxaxill +bytilka12 +antosha_t2x +ostrovt2x2 +otvertky_eee +t2xstream +t2x2_clips_ +89_sqd_fun +xray89sqd +kafix728 +t2x2_momentus +hesus_twitchh +satoshitwitch +gven.h +nocaphistoryia +t2x2rezka89 +godofnarezok +qst1mb +_t2x2_live_ diff --git a/Html/index.html b/Html/index.html new file mode 100644 index 0000000000000000000000000000000000000000..089ecb747f003c7e143cb1b925cfc304ac8d357c --- /dev/null +++ b/Html/index.html @@ -0,0 +1,104 @@ + + + + + + Таймер + + + + + +
+

Осталось

+
+
NO
+
CON
+
WEB
+
SOKET
+
+ +
+
+ +
+ 1000 +
+ +
+ + + + + + \ No newline at end of file diff --git a/Html/main.js b/Html/main.js new file mode 100644 index 0000000000000000000000000000000000000000..cb241e76026100c960f86b781ea75f4358244129 --- /dev/null +++ b/Html/main.js @@ -0,0 +1,154 @@ +let days = document.getElementById("days"); +let hours = document.getElementById("hours"); +let minutes = document.getElementById("minutes"); +let seconds = document.getElementById("seconds"); + +let interval = document.getElementById("timeToRestart"); + +function timeNotTwoDigits(number) { + if (number < 10 && number >= 0) { + return "0" + number; + } + if (number < 0&& number > -10) { + return "-0" + Math.abs(number); + } + + return number; +} + +const HEARTBEAT_INTERVAL = 1000; // Интервал heartbeat в миллисекундах +const HEARTBEAT_VALUE = 1; // Значение heartbeat-сообщения +const RECONNECT_INTERVAL = 1000; // Интервал переподключения в миллисекундах +const WS_URL = "ws://localhost:8001"; + +class WebSocketClient { + constructor(url, reconnectInterval, heartbeatInterval) { + this.url = url; + this.reconnectInterval = reconnectInterval; + this.heartbeatInterval = heartbeatInterval; + this.websocket = null; + this.isConnecting = false; + this.heartbeatTimer = null; + this.connect(); + } + + connect() { + if (this.isConnecting) { + console.log("Подключение уже в процессе."); + return; + } + this.isConnecting = true; + console.log("Попытка подключения к:", this.url); + this.websocket = new WebSocket(this.url); + + this.websocket.onopen = () => { + console.log("Соединение WebSocket открыто"); + this.isConnecting = false; + this.startHeartbeat(); + this.onOpen(); + }; + + this.websocket.onmessage = (event) => { + this.onMessage(event); + }; + + this.websocket.onclose = (event) => { + console.log("Соединение WebSocket закрыто:", event.code, event.reason); + this.isConnecting = false; + this.stopHeartbeat(); + this.onClose(event); + if (event.code !== 1000) { + this.reconnect(); + } + }; + + this.websocket.onerror = (error) => { + console.error("Ошибка WebSocket:", error); + this.isConnecting = false; + this.stopHeartbeat(); + this.onError(error); + this.reconnect(); + }; + } + + startHeartbeat() { + if (this.heartbeatInterval <= 0) return + this.heartbeatTimer = setInterval(() => { + if (this.websocket && this.websocket.readyState === WebSocket.OPEN) { + this.websocket.send(HEARTBEAT_VALUE); + } else { + console.log("Не удалось отправить heartbeat, соединение не установлено"); + this.stopHeartbeat(); + } + }, this.heartbeatInterval); + } + + stopHeartbeat() { + clearInterval(this.heartbeatTimer); + } + + + reconnect() { + console.log(`Попытка переподключения через ${this.reconnectInterval / 1000} секунд.`); + setTimeout(() => { + this.connect(); + }, this.reconnectInterval); + } + + send(data) { + if (this.websocket && this.websocket.readyState === WebSocket.OPEN) { + this.websocket.send(data); + } else { + console.log("Соединение WebSocket не установлено, сообщение не отправлено:", data) + } + } + + onOpen() { + console.log("Соединение открыто!"); + } + + onMessage(event) { + try { + let data = JSON.parse(event.data); + this.setTimerFromWSData(data.data.time); + this.setReloadTimeFromWSData(data.data.timerToRestart); + this.setTextUpdating(data.data.isUpdating); + + } catch (e) { + console.log("Получено некорректное сообщение", event.data) + } + } + + onClose(event) { + console.log("Соединение закрыто.", event) + } + + onError(error) { + console.log("Произошла ошибка:", error) + } + + setTimerFromWSData(data) { + let time = data; + let days_ = Math.floor(time / (60 * 60 * 24)); + let hours_ = Math.floor(time / (60 * 60)) % 24; + let minutes_ = Math.floor(time / 60) % 60; + let seconds_ = time % 60; + days.innerHTML = timeNotTwoDigits(days_); + hours.innerHTML = timeNotTwoDigits(hours_); + minutes.innerHTML = timeNotTwoDigits(minutes_); + seconds.innerHTML = timeNotTwoDigits(seconds_); + } + setReloadTimeFromWSData(data) { + let time = data; + interval.innerHTML = `Обновление через - ${time} секунд`; + } + setTextUpdating(data) { + if (data) { + + interval.innerHTML = "Обновление данных..."; + } + } +} + + +const client = new WebSocketClient(WS_URL, RECONNECT_INTERVAL, HEARTBEAT_INTERVAL); \ No newline at end of file diff --git a/Html/reconnecting-websoket.min.js b/Html/reconnecting-websoket.min.js new file mode 100644 index 0000000000000000000000000000000000000000..b264cbd2b3f4cfc195fe59d6c4222b38b23d4e1f --- /dev/null +++ b/Html/reconnecting-websoket.min.js @@ -0,0 +1,365 @@ +// MIT License: +// +// Copyright (c) 2010-2012, Joe Walnes +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/** + * This behaves like a WebSocket in every way, except if it fails to connect, + * or it gets disconnected, it will repeatedly poll until it successfully connects + * again. + * + * It is API compatible, so when you have: + * ws = new WebSocket('ws://....'); + * you can replace with: + * ws = new ReconnectingWebSocket('ws://....'); + * + * The event stream will typically look like: + * onconnecting + * onopen + * onmessage + * onmessage + * onclose // lost connection + * onconnecting + * onopen // sometime later... + * onmessage + * onmessage + * etc... + * + * It is API compatible with the standard WebSocket API, apart from the following members: + * + * - `bufferedAmount` + * - `extensions` + * - `binaryType` + * + * Latest version: https://github.com/joewalnes/reconnecting-websocket/ + * - Joe Walnes + * + * Syntax + * ====== + * var socket = new ReconnectingWebSocket(url, protocols, options); + * + * Parameters + * ========== + * url - The url you are connecting to. + * protocols - Optional string or array of protocols. + * options - See below + * + * Options + * ======= + * Options can either be passed upon instantiation or set after instantiation: + * + * var socket = new ReconnectingWebSocket(url, null, { debug: true, reconnectInterval: 4000 }); + * + * or + * + * var socket = new ReconnectingWebSocket(url); + * socket.debug = true; + * socket.reconnectInterval = 4000; + * + * debug + * - Whether this instance should log debug messages. Accepts true or false. Default: false. + * + * automaticOpen + * - Whether or not the websocket should attempt to connect immediately upon instantiation. The socket can be manually opened or closed at any time using ws.open() and ws.close(). + * + * reconnectInterval + * - The number of milliseconds to delay before attempting to reconnect. Accepts integer. Default: 1000. + * + * maxReconnectInterval + * - The maximum number of milliseconds to delay a reconnection attempt. Accepts integer. Default: 30000. + * + * reconnectDecay + * - The rate of increase of the reconnect delay. Allows reconnect attempts to back off when problems persist. Accepts integer or float. Default: 1.5. + * + * timeoutInterval + * - The maximum time in milliseconds to wait for a connection to succeed before closing and retrying. Accepts integer. Default: 2000. + * + */ +(function (global, factory) { + if (typeof define === 'function' && define.amd) { + define([], factory); + } else if (typeof module !== 'undefined' && module.exports){ + module.exports = factory(); + } else { + global.ReconnectingWebSocket = factory(); + } +})(this, function () { + + if (!('WebSocket' in window)) { + return; + } + + function ReconnectingWebSocket(url, protocols, options) { + + // Default settings + var settings = { + + /** Whether this instance should log debug messages. */ + debug: false, + + /** Whether or not the websocket should attempt to connect immediately upon instantiation. */ + automaticOpen: true, + + /** The number of milliseconds to delay before attempting to reconnect. */ + reconnectInterval: 1000, + /** The maximum number of milliseconds to delay a reconnection attempt. */ + maxReconnectInterval: 30000, + /** The rate of increase of the reconnect delay. Allows reconnect attempts to back off when problems persist. */ + reconnectDecay: 1.5, + + /** The maximum time in milliseconds to wait for a connection to succeed before closing and retrying. */ + timeoutInterval: 2000, + + /** The maximum number of reconnection attempts to make. Unlimited if null. */ + maxReconnectAttempts: null, + + /** The binary type, possible values 'blob' or 'arraybuffer', default 'blob'. */ + binaryType: 'blob' + } + if (!options) { options = {}; } + + // Overwrite and define settings with options if they exist. + for (var key in settings) { + if (typeof options[key] !== 'undefined') { + this[key] = options[key]; + } else { + this[key] = settings[key]; + } + } + + // These should be treated as read-only properties + + /** The URL as resolved by the constructor. This is always an absolute URL. Read only. */ + this.url = url; + + /** The number of attempted reconnects since starting, or the last successful connection. Read only. */ + this.reconnectAttempts = 0; + + /** + * The current state of the connection. + * Can be one of: WebSocket.CONNECTING, WebSocket.OPEN, WebSocket.CLOSING, WebSocket.CLOSED + * Read only. + */ + this.readyState = WebSocket.CONNECTING; + + /** + * A string indicating the name of the sub-protocol the server selected; this will be one of + * the strings specified in the protocols parameter when creating the WebSocket object. + * Read only. + */ + this.protocol = null; + + // Private state variables + + var self = this; + var ws; + var forcedClose = false; + var timedOut = false; + var eventTarget = document.createElement('div'); + + // Wire up "on*" properties as event handlers + + eventTarget.addEventListener('open', function(event) { self.onopen(event); }); + eventTarget.addEventListener('close', function(event) { self.onclose(event); }); + eventTarget.addEventListener('connecting', function(event) { self.onconnecting(event); }); + eventTarget.addEventListener('message', function(event) { self.onmessage(event); }); + eventTarget.addEventListener('error', function(event) { self.onerror(event); }); + + // Expose the API required by EventTarget + + this.addEventListener = eventTarget.addEventListener.bind(eventTarget); + this.removeEventListener = eventTarget.removeEventListener.bind(eventTarget); + this.dispatchEvent = eventTarget.dispatchEvent.bind(eventTarget); + + /** + * This function generates an event that is compatible with standard + * compliant browsers and IE9 - IE11 + * + * This will prevent the error: + * Object doesn't support this action + * + * http://stackoverflow.com/questions/19345392/why-arent-my-parameters-getting-passed-through-to-a-dispatched-event/19345563#19345563 + * @param s String The name that the event should use + * @param args Object an optional object that the event will use + */ + function generateEvent(s, args) { + var evt = document.createEvent("CustomEvent"); + evt.initCustomEvent(s, false, false, args); + return evt; + }; + + this.open = function (reconnectAttempt) { + ws = new WebSocket(self.url, protocols || []); + ws.binaryType = this.binaryType; + + if (reconnectAttempt) { + if (this.maxReconnectAttempts && this.reconnectAttempts > this.maxReconnectAttempts) { + return; + } + } else { + eventTarget.dispatchEvent(generateEvent('connecting')); + this.reconnectAttempts = 0; + } + + if (self.debug || ReconnectingWebSocket.debugAll) { + console.debug('ReconnectingWebSocket', 'attempt-connect', self.url); + } + + var localWs = ws; + var timeout = setTimeout(function() { + if (self.debug || ReconnectingWebSocket.debugAll) { + console.debug('ReconnectingWebSocket', 'connection-timeout', self.url); + } + timedOut = true; + localWs.close(); + timedOut = false; + }, self.timeoutInterval); + + ws.onopen = function(event) { + clearTimeout(timeout); + if (self.debug || ReconnectingWebSocket.debugAll) { + console.debug('ReconnectingWebSocket', 'onopen', self.url); + } + self.protocol = ws.protocol; + self.readyState = WebSocket.OPEN; + self.reconnectAttempts = 0; + var e = generateEvent('open'); + e.isReconnect = reconnectAttempt; + reconnectAttempt = false; + eventTarget.dispatchEvent(e); + }; + + ws.onclose = function(event) { + clearTimeout(timeout); + ws = null; + if (forcedClose) { + self.readyState = WebSocket.CLOSED; + eventTarget.dispatchEvent(generateEvent('close')); + } else { + self.readyState = WebSocket.CONNECTING; + var e = generateEvent('connecting'); + e.code = event.code; + e.reason = event.reason; + e.wasClean = event.wasClean; + eventTarget.dispatchEvent(e); + if (!reconnectAttempt && !timedOut) { + if (self.debug || ReconnectingWebSocket.debugAll) { + console.debug('ReconnectingWebSocket', 'onclose', self.url); + } + eventTarget.dispatchEvent(generateEvent('close')); + } + + var timeout = self.reconnectInterval * Math.pow(self.reconnectDecay, self.reconnectAttempts); + setTimeout(function() { + self.reconnectAttempts++; + self.open(true); + }, timeout > self.maxReconnectInterval ? self.maxReconnectInterval : timeout); + } + }; + ws.onmessage = function(event) { + if (self.debug || ReconnectingWebSocket.debugAll) { + console.debug('ReconnectingWebSocket', 'onmessage', self.url, event.data); + } + var e = generateEvent('message'); + e.data = event.data; + eventTarget.dispatchEvent(e); + }; + ws.onerror = function(event) { + if (self.debug || ReconnectingWebSocket.debugAll) { + console.debug('ReconnectingWebSocket', 'onerror', self.url, event); + } + eventTarget.dispatchEvent(generateEvent('error')); + }; + } + + // Whether or not to create a websocket upon instantiation + if (this.automaticOpen == true) { + this.open(false); + } + + /** + * Transmits data to the server over the WebSocket connection. + * + * @param data a text string, ArrayBuffer or Blob to send to the server. + */ + this.send = function(data) { + if (ws) { + if (self.debug || ReconnectingWebSocket.debugAll) { + console.debug('ReconnectingWebSocket', 'send', self.url, data); + } + return ws.send(data); + } else { + throw 'INVALID_STATE_ERR : Pausing to reconnect websocket'; + } + }; + + /** + * Closes the WebSocket connection or connection attempt, if any. + * If the connection is already CLOSED, this method does nothing. + */ + this.close = function(code, reason) { + // Default CLOSE_NORMAL code + if (typeof code == 'undefined') { + code = 1000; + } + forcedClose = true; + if (ws) { + ws.close(code, reason); + } + }; + + /** + * Additional public API method to refresh the connection if still open (close, re-open). + * For example, if the app suspects bad data / missed heart beats, it can try to refresh. + */ + this.refresh = function() { + if (ws) { + ws.close(); + } + }; + } + + /** + * An event listener to be called when the WebSocket connection's readyState changes to OPEN; + * this indicates that the connection is ready to send and receive data. + */ + ReconnectingWebSocket.prototype.onopen = function(event) {}; + /** An event listener to be called when the WebSocket connection's readyState changes to CLOSED. */ + ReconnectingWebSocket.prototype.onclose = function(event) {}; + /** An event listener to be called when a connection begins being attempted. */ + ReconnectingWebSocket.prototype.onconnecting = function(event) {}; + /** An event listener to be called when a message is received from the server. */ + ReconnectingWebSocket.prototype.onmessage = function(event) {}; + /** An event listener to be called when an error occurs. */ + ReconnectingWebSocket.prototype.onerror = function(event) {}; + + /** + * Whether all instances of ReconnectingWebSocket should log debug messages. + * Setting this to true is the equivalent of setting all instances of ReconnectingWebSocket.debug to true. + */ + ReconnectingWebSocket.debugAll = false; + + ReconnectingWebSocket.CONNECTING = WebSocket.CONNECTING; + ReconnectingWebSocket.OPEN = WebSocket.OPEN; + ReconnectingWebSocket.CLOSING = WebSocket.CLOSING; + ReconnectingWebSocket.CLOSED = WebSocket.CLOSED; + + return ReconnectingWebSocket; +}); \ No newline at end of file diff --git a/Main.py b/Main.py new file mode 100644 index 0000000000000000000000000000000000000000..417f626880c0850e17b5880f158beacd0f494917 --- /dev/null +++ b/Main.py @@ -0,0 +1,222 @@ +import asyncio +import websockets +import time +import json +import threading +import requests +import datetime +import traceback +from playwright.async_api import async_playwright +from TikTok.Server.main import getInfo +from TikTok.Server.SaveTotalView import saveTotalViewAndVideos, getTotalDict +from TikTok.Cookies.cookie import get_tiktok_cookies_from_file +import os +import random +import math +# Replace with your actual function to get TikTok data + + +def get_tiktok_data(hashtag="костиккакто", userlistLink="Data/TXT/Cacto0o.txt") -> dict: + try: + return getInfo(hashtag, userlistLink) + except requests.exceptions.RequestException as e: + print(f"Error fetching TikTok data: {e}") + return None + except Exception as e: + print(f"An unexpected error occurred: {e}") + return None + + +# Global variables (better to use a class) +startTime = 1734648586 +donateAddTime = 35497352 +endTime = startTime + donateAddTime +data_dict = None +global lastReloadTime +global doUpdating +lastReloadTime = time.time() + + +def save_data(data): + if not os.path.exists("Data/JSON/"): + os.makedirs("Data/JSON/") + if type(data) == str: + json_acceptable_string = data.replace("'", "\"") + data = json.loads(json_acceptable_string) + + with open("Data/JSON/data.json", "r") as f: + data_dict = json.loads(f.read()) + for user_data in data["userStats"]: + if user_data == 0: + continue + for new_user_data in data_dict["userStats"]: + if new_user_data == 0: + continue + if (user_data["username"] == new_user_data["username"]): + user_data["total_views"] = new_user_data["total_views"] + user_data["total_videos_with_tag"] = new_user_data["total_videos_with_tag"] + print(f"Updated user: {user_data['username']}") + break + else: + + data_dict.get("userStats").append(user_data) + print(f"newUser {user_data['username']}") + + with open("Data/JSON/dataNew.json", "w") as f: + f.write(json.dumps(data_dict)) + + +def open_dataDict() -> dict: + with open("Data/JSON/TotalView.json", "r") as f: + data = f.read() + return json.loads(data) + + +async def send_data_to_websocket(websocket): + global data_dict + global lastReloadTime + while True: + data_dict = open_dataDict() + if data_dict is not None: + data_dict_a: dict = data_dict + tiktokTime = startTime + data_dict_a.get('total_total_views', 0) + time_left = int(tiktokTime - time.time()) + timeToRestart = (lastReloadTime + 300) - time.time() + transferData = json.dumps({"type": "transfer", "data": { + "time": time_left, "timerToRestart": timeToRestart}}) + + try: + await websocket.send(transferData) + except websockets.exceptions.ConnectionClosedError: + print("Websocket connection closed. Exiting send thread.") + break + await asyncio.sleep(1) + + +def fetch_tiktok_data_periodically_main(hashtag="костиккакто"): + asyncio.run(fetch_tiktok_data_periodically(hashtag)) + + +# 5 minutes +async def fetch_tiktok_data_periodically(hashtag="костиккакто", interval=300): + global data_dict + global lastReloadTime + global doUpdating + isFirst = True + while True: + # print("Starting fetch_tiktok_data_periodically") + # if isFirst: + + # isFirst = False + # data = getTotalDict() + # print(data) + # else: + + doUpdating = True + data: dict = await get_tiktok_data(hashtag, userlistLink="Data/TXT/Cacto0o.txt") + saveTotalViewAndVideos(hashtag) + data_dict = open_dataDict() + print(data_dict) + + # if data.get('total_total_views', 0) > 0: + # save_data(data) + doUpdating = False + lastReloadTime = time.time() + time.sleep(interval) + + +def update_data_periodically(): + global data_dict + print("Starting update_data_periodically") + hashtag = "костиккакто" + while True: + # + saveTotalViewAndVideos(hashtag) + data = open_dataDict() + if data.get('total_views', 0) > 0: + data_dict = open_dataDict() + time.sleep(1) + + +async def handler(websocket): + global data_dict + global doUpdating + while True: + try: + data_dict = open_dataDict() + # Slight delay to avoid immediate re-execution + if data_dict is not None: + tiktokTime = startTime + \ + math.floor(data_dict.get('total_views', 0) / 30000 * 3600) + time_left = int(tiktokTime - time.time()) + timeToRestart = int((lastReloadTime + 300) - time.time()) + transferData = json.dumps({"type": "transfer", "data": {"time": time_left, + "timerToRestart": timeToRestart, + "isUpdating": doUpdating + }}) + await websocket.send(transferData) + await asyncio.sleep(1) + except websockets.exceptions.ConnectionClosedError: + print("Websocket connection closed.") + break + except Exception as e: + print(f"Error in handler: {e}") + break + + +def msTokenFromTiktok(): + asyncio.run(msTokenFromTiktokMain()) + + +async def msTokenFromTiktokMain(): + playwright = await async_playwright().start() + browser = await playwright.chromium.launch( + headless=False, + executable_path="C:/Program Files/Google/Chrome/Application/chrome.exe" + ) + page = await browser.new_page() + await page.goto("https://www.tiktok.com/") + try: + await asyncio.sleep(2) + await page.goto("https://www.tiktok.com/") + while True: + await asyncio.sleep(random.uniform(0, 2)) + random_number = random.randint(1, 1000) + if random_number % 2 == 0: + await page.keyboard.press("L") + await page.keyboard.press("ArrowDown") + await asyncio.sleep(random.uniform(0, 2)) + cookies = await page.context.cookies() + # Save cookies to a file + with open("Data/JSON/cookies.json", "w") as f: + json.dump(cookies, f) + print(get_tiktok_cookies_from_file("Data/JSON/cookies.json")) + await asyncio.sleep(10) + except Exception as e: + print(f"An error occurred: {e}") + + await browser.close() + + +async def main(): + async with websockets.serve(handler, "localhost", 8001): + print("Server started on ws://localhost:8001") + + # Start separate thread for fetching data + threadTikTokInfo = threading.Thread( + target=fetch_tiktok_data_periodically_main) + threadTikTokInfo.daemon = True # Allow the main thread to exit + threadTikTokInfo.start() + + # threadGettingMsToken = threading.Thread(target=msTokenFromTiktok) + # threadGettingMsToken.daemon = True # Allow the main thread to exit + # threadGettingMsToken.start() + + threadUpdate = threading.Thread(target=update_data_periodically) + threadUpdate.daemon = True # Allow the main thread to exit + threadUpdate.start() + + await asyncio.Future() # Keep the event loop running + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/README.md b/README.md index 49904fc7ce0934ddaa22ad674a36c0607d6a53d2..300ca247b80632c7516f8e4914bfad1a0623d2e9 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,6 @@ --- title: TikTokOpen -emoji: ⚡ -colorFrom: green -colorTo: red +app_file: gradioa.py sdk: gradio sdk_version: 5.9.1 -app_file: app.py -pinned: false --- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/TikTok/Cookies/__init__.py b/TikTok/Cookies/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TikTok/Cookies/cookie.py b/TikTok/Cookies/cookie.py new file mode 100644 index 0000000000000000000000000000000000000000..5cdf82fa799c259ef520471c593316fbd361c1c5 --- /dev/null +++ b/TikTok/Cookies/cookie.py @@ -0,0 +1,94 @@ +import browser_cookie3 +import json + +def getMsToken(): + cookie_keys = ["msToken"] + json_cookies = get_tiktok_cookies(cookie_keys) + if json_cookies["found"]: + ms_token = json_cookies["cookies"]["msToken"] + #print(ms_token) + else: + raise Exception("Missing cookie msToken. Login to your tiktok account and retry") + saveMsToken(ms_token) + return ms_token + +def saveMsToken(ms_token): + with open("Data/TXT/Data/ms_token.txt", "w") as f: + f.write(ms_token) +def readOldMsToken(): + with open("Data/TXT/Data/ms_token.txt", "r") as f: + ms_token = f.read() + return ms_token + +def get_tiktok_cookies(cookie_keys): + # Try to get cookie from browser + ref = ["chromium", "opera", "edge", "firefox", "chrome", "brave"] + index = 0 + json_cookie = {} + found = False + for cookie_fn in [ + + browser_cookie3.firefox, + browser_cookie3.chrome, + browser_cookie3.brave, + ]: + try: + for cookie in cookie_fn(domain_name="tiktok.com"): + + if ('tiktok.com' in cookie.domain): + + # print(f"COOKIE - {ref[index]}: {cookie}") + if (cookie.name in cookie_keys): + json_cookie['browser'] = ref[index] + json_cookie[cookie.name] = cookie.value + json_cookie[cookie.name + '_expires'] = cookie.expires + + # Check + found = True + for key in cookie_keys: + if (json_cookie.get(key, "") == ""): + found = False + break + + except Exception as e: + print(e) + + index += 1 + + if (found): + break + #print("found " + str(found)) + return {"found": found, "cookies": json_cookie} + + + +def get_tiktok_cookies_from_file(filepath: str): + msToken = "" + cookies = {} + with open(filepath, "r") as f: + cookies = f.read() + + cookies = json.loads(cookies) + + for cookie in cookies: + cookie: dict + if cookie.get("name", "") == "msToken" and cookie.get("domain", "") == ".tiktok.com": + msToken = cookie.get("value", "") + break + + + if msToken is None: + raise Exception("Missing cookie msToken. Login to your tiktok account and retry") + saveMsToken(msToken) + return msToken + +def getCookiesFromFile(filepath: str): + cookies = {} + with open(filepath, "r") as f: + cookies = f.read() + + cookies = json.loads(cookies) + return cookies +if __name__ == "__main__": + print(get_tiktok_cookies_from_file()) + \ No newline at end of file diff --git a/TikTok/Server/SaveTotalView.py b/TikTok/Server/SaveTotalView.py new file mode 100644 index 0000000000000000000000000000000000000000..c562ec642be520cbed30ba8d2ac9b7c62638e683 --- /dev/null +++ b/TikTok/Server/SaveTotalView.py @@ -0,0 +1,52 @@ +import json +import os + +def saveTotalViewAndVideos(hashtag: str): + allData = {} + hashtag = "костиккакто" + if not os.path.exists(f"Data/JSON/Users/{hashtag}"): + print('a') + os.makedirs(f"Data/JSON/Users/{hashtag}") + + for user in os.listdir(f"Data/JSON/Users/{hashtag}"): + if user == "TotalView.json": + continue + with open(f"Data/JSON/Users/{hashtag}/{user}", "r") as f: + allData[user] = json.loads(f.read()) + totalVideos = 0 + totalViews = 0 + for user in allData: + totalViews += allData[user]["total_views"] + totalVideos += allData[user]["total_videos_with_tag"] + dirname = "Data/JSON/TotalView.json" + if not os.path.exists(os.path.dirname(dirname)): + os.makedirs(os.path.dirname(dirname)) + + with open(f"Data/JSON/TotalView.json", "w") as f: + f.write(json.dumps({ + "total_views": totalViews, + "total_videos_with_tag": totalVideos + })) + +def getTotalDict() -> dict: + if os.path.exists(f"Data/JSON/TotalView.json"): + with open(f"Data/JSON/TotalView.json", "r") as f: + return json.loads(f.read()) + else: + return { + "total_views": 0, + "total_videos_with_tag": 0 + } + + +if __name__ == "__main__": + # load all json from Data/JSON/User/{hashtag}/*.json + + # save all json to Data/JSON/User/{hashtag}/TotalView.json + saveTotalViewAndVideos("костиккакто") + print(getTotalDict()) + + + + + \ No newline at end of file diff --git a/TikTok/Server/__init__.py b/TikTok/Server/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TikTok/Server/main.py b/TikTok/Server/main.py new file mode 100644 index 0000000000000000000000000000000000000000..f2a3f17c238f39ffcfee45a08363d19ff14fe858 --- /dev/null +++ b/TikTok/Server/main.py @@ -0,0 +1,124 @@ +import asyncio +from TikTok.Statistic.tiktok import tiktokUserCountVideoViews, SameMsTokenException +from TikTok.Statistic.SingleUser import users_videos_with_hashtag +from TikTok.Cookies.cookie import getMsToken, readOldMsToken, saveMsToken, get_tiktok_cookies_from_file,getCookiesFromFile +from TikTok.Server.users import get_user_list +import time +import os +import json + +def getNewMsToken(): + try: + + + ms_token = get_tiktok_cookies_from_file("cookies.txt") + print(f" ms_token:\t {ms_token} \n") + + + return ms_token + + except Exception as e: + print("Exception" + e) + except SameMsTokenException as e: + print(e.message) + except ValueError as e: + print(e) + print("Please check your ms_token") + + +def getUserList(userlistLink: str): + userlist = get_user_list(userlistLink) + if not userlist: + raise Exception("No users found in the user list.") + return userlist + +async def divide_list(userlist: list, num_parts: int, selectedPart: int) -> list: + userlist = userlist[selectedPart::num_parts] + return userlist + +def saveIndex(index: dict): + with open("Data/JSON/index.json", "w") as f: + json.dump(index, f) +def openIndex() -> tuple: + with open("Data/JSON/index.json", "r") as f: + index = f.read() + index = json.loads(index) + + return index["parts"], index["selectedPart"] + +async def getInfo(hashtag: str, userlistLink: str) -> dict: + + # ms_token = get_tiktok_cookies_from_file("Data/JSON/cookies.json") + userlist = getUserList(userlistLink) + { + # length = len(userlist) + # try: + # num_parts, selectedPart = openIndex() + # print(f"num_parts: {num_parts}, selectedPart: {selectedPart}") + # except: + # print("No index.json") + # num_parts = 1 + # selectedPart = 0 + + # if selectedPart >= num_parts -1: + # selectedPart = 0 + # else: + # selectedPart += 1 + + # maxusersinrow = 16 + # num_parts = length // maxusersinrow + + # if num_parts == 0: + # num_parts = 1 + # print(f"num_parts: {num_parts}, selectedPart: {selectedPart}") + + # userSmallLists = await divide_list(userlist, num_parts, selectedPart) + # print(f"userSmallLists: {userSmallLists}") + # userlist = userSmallLists + + # saveIndex({"parts": num_parts, "selectedPart": selectedPart}) + } + blackList=getBlackList("Data/JSON/blackList.json") + + result = await users_videos_with_hashtag( + usernameList=userlist, + hashtag=hashtag, + blackList=blackList + ) + + + return result #result + +def getBlackList(blackListFile: str) -> dict: + try: + with open(blackListFile, "r") as f: + blackList = f.read() + if not blackList: + return {} + json_blackList = json.loads(blackList) + return json_blackList + except Exception as e: + print(e) + return {} +#if __name__ == "__main__": + # ms_token= get_tiktok_cookies_from_file("cookies.txt") + # userlistLink = "tiktok_stats/tiktokNames.txt" + # userlistLink = "tiktok_stats/names.txt" + # userlist = getUserList(userlistLink) + # hashtag = "костиккакто" + # result = 0 + # blackList=getBlackList("blackList.json") + # print(f"userlist = {blackList}, users = {blackList.get('usernames')}, videos = {blackList.get('videos')}") + + # try: + # result = asyncio.run(tiktokUserCountVideoViews( + # userlist=userlist, + # ms_token="pLSi7qEbF7imuiF0_ySIDEJe_Ew97wEpGvTZL5Icr8WmcazmH8qwiGigUt7HwWbk6sNffDl6KqnK5Ll1WfqRawl3f-zVNtcSD6iAfRL86GzR5z2A7k5O1BrGtsumNbKFy2XuzYca1SAotXiHd16_", + # hashtag=hashtag, + # blackList=blackList + # )) + # except SameMsTokenException as e: + # print(e.message) + + # print(f"returnValue = {result}") + #asyncio.run(getInfo("костиккакто", "tiktok_stats/tiktokNames.txt")) diff --git a/TikTok/Server/users.py b/TikTok/Server/users.py new file mode 100644 index 0000000000000000000000000000000000000000..66699497bfe5f5f8015c02ceda9afedd03192cff --- /dev/null +++ b/TikTok/Server/users.py @@ -0,0 +1,5 @@ + +def get_user_list(file_path): + with open(file_path, 'r') as file: + user_list = [line.strip() for line in file] + return user_list \ No newline at end of file diff --git a/TikTok/Statistic/AsyncUser.py b/TikTok/Statistic/AsyncUser.py new file mode 100644 index 0000000000000000000000000000000000000000..b9b55b1894e9e22be8282cb7f7149f05b99a0883 --- /dev/null +++ b/TikTok/Statistic/AsyncUser.py @@ -0,0 +1,183 @@ +from ..TikTokApi import TikTokApi + +from ..TikTokApi.api.user import User +from ..TikTokApi.api.video import Video +import asyncio +import os +import json +from datetime import datetime +import math +ms_token = os.environ.get("ms_token", None) # get your own ms_token from your cookies on tiktok.com +maxvalue = 20 +nowProcess = 0 +def debug(debug: bool = False): + if debug: + os.environ["DEBUG"] = "True" + else: + os.environ["DEBUG"] = "False" + +def openJson(path): + try: + with open(path, "r") as f: + return json.loads(f.read()) + except: + raise Exception("Error opening json file") + +def saveJson(path, data): + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + with open(path, "w") as f: + f.write(json.dumps(data)) + +def openTxt(path): + try: + with open(path, "r") as f: + return f.read().splitlines() + except: + raise Exception("Error opening txt file") + with open(path, "r") as f: + return f.read().splitlines() + +def saveTxt(path, data): + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + with open(path, "w") as f: + f.write("\n".join(data)) + +def saveUserInfoInJson(username, data, hashtag = "default"): + saveJson(f"Data/JSON/Users/{hashtag}/{username}.json", data) + + +def debugPrint(text): + + print(f"{datetime.now().strftime('%H:%M:%S.%f')}\t{text}") + + + + +async def users_videos_with_hashtag(usernameList, hashtag, blackList: dict[list] = None, ms_token: str = None): + ''' + Asynchronous function that retrieves TikTok videos with a specific hashtag for a list of usernames, and saves the user's total views and total videos with the hashtag to a JSON file. + + Parameters: + - `usernameList`: List of TikTok usernames to retrieve videos for. + - `hashtag`: Hashtag to search for in the user's videos. + - `blackList`: (Optional) Dictionary containing lists of usernames and video IDs to skip. + - `ms_token`: (Optional) TikTok API access token. + + ''' + async with TikTokApi() as api: + debugPrint("Creating sessions") + + await api.create_sessions(ms_tokens=[ms_token], + num_sessions=1, + sleep_after=20, + headless=False, + executable_path="C:/Program Files/Google/Chrome/Application/chrome.exe", + #browser="firefox", + override_browser_args=["--disable-blink-features=AutomationControlled"], + + #starting_url="https://anycoindirect.eu" + ) + + tasks = [process_user(username=userName, api=api, hashtag=hashtag, blackList=blackList) for userName in usernameList] + + debugPrint("Sessions created") + print(blackList.get("usernames", "")) + await asyncio.gather(*tasks) + { + # async for username in usernameList: + # if username in blackList.get("usernames", ""): + # debugPrint(f"Skipping user {username} because it is in the blacklist") + # continue + # debugPrint(f"Getting user {username}") + # debugPrint(f"username = {username}") + # + # try: + # + # user: User = api.user(username=username) + # user_data = await user.info() + # except: + # print(f"Error getting user {username}") + # continue + # + # videosLen = user_data["userInfo"]["stats"]["videoCount"] + # + # debugPrint(f"videosLen = {videosLen}") + # total_views = 0 + # total_videos_with_tag = 0 + # + # async for video in user.videos(count= videosLen): + # if video.id in blackList.get("videos", []): + # continue + # video: Video + # play_count = int(video.stats.get("playCount", 0)) + # if any(str(h.name).lower() == hashtag for h in video.hashtags): + # total_views += play_count + # total_videos_with_tag += 1 + # + # saveUserInfoInJson(username=username, + # data={ + # "username": username, + # "total_views": total_views, + # "total_videos_with_tag": total_videos_with_tag}, + # hashtag=hashtag) + # await asyncio.sleep(1) + # + # + } + await api.close_sessions() + await api.stop_playwright() + +async def process_user(username, api, hashtag, blackList): + try: + if username in blackList.get("usernames", ""): + debugPrint(f"Skipping user {username} because it is in the blacklist") + return + debugPrint(f"Getting user {username}") + debugPrint(f"username = {username}") + + try: + + user: User = api.user(username=username) + user_data = await user.info() + except: + print(f"Error getting user {username}") + return + while nowProcess >= maxvalue: + debugPrint(f"Waiting for {username}") + await asyncio.sleep(1) + nowProcess += 1 + videosLen = user_data["userInfo"]["stats"]["videoCount"] + + debugPrint(f"videosLen = {videosLen}") + total_views = 0 + total_videos_with_tag = 0 + + async for video in user.videos(count= videosLen): + if video.id in blackList.get("videos", []): + continue + video: Video + play_count = int(video.stats.get("playCount", 0)) + if any(str(h.name).lower() == hashtag for h in video.hashtags): + total_views += play_count + total_videos_with_tag += 1 + debugPrint(f"save {username} {total_views}") + saveUserInfoInJson(username=username, + data={ + "username": username, + "total_views": total_views, + "total_videos_with_tag": total_videos_with_tag}, + hashtag=hashtag) + except: + nowProcess -= 1 + print(f"Error getting user {username} !") + return + +if __name__ == "__main__": + os.environ["DEBUG"] = "True" + #print(os.environ.pop("DEBUG", False)) + usernameList = openTxt("Data/TXT/cacto0o.txt") + hashtag = "костиккакто" + blackList = openJson("Data/JSON/blackList.json") + asyncio.run(users_videos_with_hashtag(usernameList=usernameList, hashtag=hashtag, blackList=blackList)) \ No newline at end of file diff --git a/TikTok/Statistic/SingleUser.py b/TikTok/Statistic/SingleUser.py new file mode 100644 index 0000000000000000000000000000000000000000..5d476aea1998fbc5dc76dce323f1eb20f773b3e0 --- /dev/null +++ b/TikTok/Statistic/SingleUser.py @@ -0,0 +1,179 @@ +from ..TikTokApi import TikTokApi + +from ..TikTokApi.api.user import User +from ..TikTokApi.api.video import Video +import asyncio +import os +import json +from datetime import datetime +import math +import random +from tqdm import tqdm +# get your own ms_token from your cookies on tiktok.com +ms_token = os.environ.get("ms_token", None) + + +def debug(debug: bool = False): + if debug: + os.environ["DEBUG"] = "True" + else: + os.environ["DEBUG"] = "False" + + +def openJson(path): + try: + with open(path, "r") as f: + return json.loads(f.read()) + except: + raise Exception("Error opening json file") + + +def saveJson(path, data): + + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + with open(path, "w") as f: + f.write(json.dumps(data)) + + +def openTxt(path): + try: + with open(path, "r") as f: + return f.read().splitlines() + except: + raise Exception("Error opening txt file") + + + +def saveTxt(path, data): + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + with open(path, "w") as f: + f.write("\n".join(data)) + + +def saveUserInfoInJson(username, data, hashtag="default"): + saveJson(f"Data/JSON/Users/{hashtag}/{username}.json", data) + + +def openUserInfoInJson(username, hashtag="default"): + try: + return openJson(f"Data/JSON/Users/{hashtag}/{username}.json") + except: + return None + + +def compareUserDataViewsAndSaveWithMore(user1, user2): + try: + if user1["total_views"] > user2["total_views"]: + return False + else: + return True + except: + print(f"Error comparing user data ") + return True + + +def debugPrint(text): + #print(f"{datetime.now().strftime('%H:%M:%S.%f')}\t{text}") + pass + + +async def users_videos_with_hashtag(usernameList, hashtag, blackList: dict[list] = None, ms_token: str = None): + ''' + Asynchronous function that retrieves TikTok videos with a specific hashtag for a list of usernames, and saves the user's total views and total videos with the hashtag to a JSON file. + + Parameters: + - `usernameList`: List of TikTok usernames to retrieve videos for. + - `hashtag`: Hashtag to search for in the user's videos. + - `blackList`: (Optional) Dictionary containing lists of usernames and video IDs to skip. + - `ms_token`: (Optional) TikTok API access token. + + ''' + async with TikTokApi() as api: + debugPrint("Creating sessions") + try: + cookieFormLast: list = [openJson("Data/JSON/cookies.json")] + except: + print("No cookies found, creating new sessions") + cookieFormLast = None + + await api.create_sessions(ms_tokens=[ms_token], + num_sessions=1, + sleep_after=20, + headless=False, + executable_path="C:/Program Files/Google/Chrome/Application/chrome.exe", + # browser="firefox", + override_browser_args=[ + "--disable-blink-features=AutomationControlled"], + cookies=cookieFormLast, + starting_url="https://www.tiktok.com/@tiltocacto0o" + ) + + debugPrint("Sessions created") + print(blackList.get("usernames", "")) + for username in tqdm(usernameList): + if username in blackList.get("usernames", ""): + debugPrint( + f"Skipping user {username} because it is in the blacklist") + continue + debugPrint(f"Getting user {username}") + debugPrint(f"username = {username}") + + try: + + user: User = api.user(username=username) + user_data = await user.info() + except: + print(f"Error getting user {username}") + continue + + videosLen = user_data["userInfo"]["stats"]["videoCount"] + + debugPrint(f"videosLen = {videosLen} ") + total_views = 0 + total_videos_with_tag = 0 + try: + async for video in user.videos(count=videosLen): + if video.id in blackList.get("videos", []): + continue + video: Video + + play_count = int(video.stats.get("playCount", 0)) + if any(str(h.name).lower() == hashtag for h in video.hashtags): + total_views += play_count + total_videos_with_tag += 1 + debugPrint(f"save {username} {total_views}") + openUserInfoInJson(username=username, hashtag=hashtag) + if compareUserDataViewsAndSaveWithMore( + openUserInfoInJson(username=username, + hashtag=hashtag), + {"username": username, + "total_views": total_views, + "total_videos_with_tag": total_videos_with_tag} + ): + saveUserInfoInJson(username=username, + data={ + "username": username, "total_views": total_views, "total_videos_with_tag": total_videos_with_tag}, + hashtag=hashtag) + else: + print(f"skip {username} {total_views}") + except Exception as e: + print(f"Error getting videos for user {username}") + print(e) + continue + await asyncio.sleep(random.uniform(0.5, 1.5)) + debugPrint("Closing sessions") + cookietosave = await api.get_session_cookies(api.sessions[0]) + saveJson("Data/JSON/cookies.json", cookietosave) + await api.close_sessions() + await api.stop_playwright() + +if __name__ == "__main__": + os.environ["DEBUG"] = "True" + # print(os.environ.pop("DEBUG", False)) + usernameList = openTxt("Data/TXT/cacto0o.txt") + hashtag = "костиккакто" + blackList = openJson("Data/JSON/blackList.json") + asyncio.run(users_videos_with_hashtag( + usernameList=usernameList, hashtag=hashtag, blackList=blackList)) diff --git a/TikTok/Statistic/__init__.py b/TikTok/Statistic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TikTok/Statistic/tiktok.py b/TikTok/Statistic/tiktok.py new file mode 100644 index 0000000000000000000000000000000000000000..f35fdb975de3d9cee8cf51a86787dcd7023f9a30 --- /dev/null +++ b/TikTok/Statistic/tiktok.py @@ -0,0 +1,170 @@ +import asyncio +import time +import random +from ..TikTokApi import TikTokApi +from ..TikTokApi.exceptions import TikTokException + + + +class SameMsTokenException(TikTokException): + """Raised when the same ms_token is used.""" + + +def hashtagProcess(hashtag: str): + '''Converts the given hashtag string to lowercase. + + Parameters: + - `hashtag: str`: The hashtag string to be processed. + + Returns: + - `str`: The lowercase version of the input hashtag.''' + + hashtag = hashtag.lower() + return hashtag + +async def tiktokUserCountVideoViews(proxylist: list = None, ms_token: str = None, userlist: list = None, hashtag: str = None, cookies: list[dict] = None, blackList: dict[list] = None) -> dict: + '''Asynchronous function that retrieves video view counts for a list of TikTok users, filtering by a specified hashtag and blacklist. + + Parameters: + - `proxylist: list = None`: A list of proxy servers to use. + - `ms_token: str = None`: A required TikTok MS token. + - `userlist: list = None`: A list of TikTok usernames to process. + - `hashtag: str = None`: A hashtag to filter the videos by. + - `cookies: list[dict] = None`: A list of cookie dictionaries to use for the TikTok API sessions. + - `blackList: dict[list] = None`: A dictionary containing lists of blacklisted usernames and video IDs. + + Returns: + - `dict`: A dictionary containing the user statistics and the total number of views across all users.''' + + + if not ms_token: + raise ValueError("A TikTok MS token is required.") + + + if not userlist: + raise ValueError("A list of users is required.") + + if blackList == None: + blackList = [] + print(f"username = {blackList}") + hashtag = hashtagProcess(hashtag) + print(hashtag) + for userName in userlist: + if userName in blackList: + userlist.remove(userName) + + try: + async with TikTokApi() as api: + + startTime = time.time() + await api.create_sessions(headless=False, ms_tokens=[ms_token], num_sessions=1, sleep_after=30, cookies=cookies) + + + #tasks = [process_user(userName=userName, api=api, hashtag=hashtag, videoBlacklist=blackList.get("videos"), userBlacklist=blackList.get("usernames")) for userName in userlist] + #results = await asyncio.gather(*tasks) + results = [] + for userName in userlist: + print(f"Processing user: {userName}") + + results.append(asyncio.gather(process_user(userName=userName, + api=api, + hashtag=hashtag, + videoBlacklist=blackList.get("videos"), + userBlacklist=blackList.get("usernames")))) + total_total_views = 0 + + for i in results: + if isinstance(i, dict): + total_total_views += i['total_views'] + elif isinstance(i, int): + total_total_views += i + + results_as_dict = {"userStats": results, "total_total_views": total_total_views} + + await api.close_sessions() + endTime = time.time() + + print(f"Total views: \033[32m{total_total_views}\033[0m = process time: \033[31m{round(endTime - startTime, 4)}\033[0m") + return results_as_dict + + + + except Exception as e: + if "TimeoutError" in str(e): + print(f"Error: {e}") + await api.close_sessions() + return 0 + else: + print(f"An error occurred: {type(e).__name__}: {e}") + await api.close_sessions() + return 0 + +async def process_user(userName, hashtag:str, api:TikTokApi, userBlacklist, videoBlacklist): + '''Asynchronously processes a user's TikTok account, retrieving video data and calculating the total views for videos with a specified hashtag. + + Args: + - `userName (str)`: The username of the TikTok user to process. + - `hashtag (str)`: The hashtag to search for in the user's videos. + - `api (TikTokApi)`: The TikTokApi instance to use for making API requests. + - `userBlacklist (list)`: A list of usernames to exclude from processing. + - `videoBlacklist (list)`: A list of video IDs to exclude from processing. + + Returns: + - `dict`: A dictionary containing the username, total views, and total videos with the specified hashtag. + ''' + print(userName) + #TODO: if user in blacklist then return 0 + #time.sleep(random.randint(1, 5)/10) + if userName in userBlacklist: + print(f"{userName} in blacklist") + return 0 + # await asyncio.sleep(random.randint(1, 5) / 10) + startTime = time.time() + try: + user = api.user(username=userName) + + user_data = await user.info() + + if "userInfo" not in user_data or "stats" not in user_data["userInfo"] or "videoCount" not in user_data["userInfo"]["stats"]: + print(f"Error: Invalid user data format for {userName}") + return 0 + + video_count = user_data["userInfo"]["stats"]["videoCount"] + if video_count == 0: + print(f"{userName} has no videos.") + return 0 + print(f"{userName} has {video_count} videos.") + + total_views = 0 + total_videos_with_tag = 0 + blackListI = 0 + async for video in user.videos(count=video_count): + if video.id in videoBlacklist: + blackListI += 1 + continue + try: + # TODO: check if video is in a black list + play_count = int(video.stats.get("playCount", 0)) # Handle potential missing data + if any(str(h.name).lower() == hashtag for h in video.hashtags): + + total_views += play_count + total_videos_with_tag += 1 + + except (KeyError, TypeError, ValueError) as e: + print(f"Error processing video for {userName}: {e}") + return 0 # Skip to the next video if there's an error + + endTime = time.time() + tabs = "" + for _ in range(int(24 - len(userName))): + tabs += " " + + print(f"\tTotal views for \033[33m{userName}\033[0m:{tabs} \033[32m{total_views}\033[0m \ttotal videos with tag: \033[35m{total_videos_with_tag}\033[0m \t total videos: \033[36m{video_count}\033[0m process time: \033[31m{round(endTime - startTime, 4)}\033[0m \tblacklisted: \033[31m{blackListI}\033[0m") + return {"username": userName, "total_views": total_views, "total_videos_with_tag": total_videos_with_tag} + except Exception as e: + print(f"An unexpected error occurred for {userName}: {e}") + return 0 # Skip to the next video if there's an error + + + + diff --git a/TikTok/TikTokApi/__init__.py b/TikTok/TikTokApi/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b9f5fd360d7195ac70313c88df0d79f599dfd72d --- /dev/null +++ b/TikTok/TikTokApi/__init__.py @@ -0,0 +1 @@ +from .tiktok import TikTokApi diff --git a/TikTok/TikTokApi/api/__init__.py b/TikTok/TikTokApi/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TikTok/TikTokApi/api/comment.py b/TikTok/TikTokApi/api/comment.py new file mode 100644 index 0000000000000000000000000000000000000000..bb63619c99193c4cc8ab154df2e6180d5ead829e --- /dev/null +++ b/TikTok/TikTokApi/api/comment.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from typing import ClassVar, Iterator, Optional +from typing import TYPE_CHECKING, ClassVar, Optional + +from TikTok.TikTokApi.exceptions import InvalidResponseException + +if TYPE_CHECKING: + from ..tiktok import TikTokApi + from .user import User + + +class Comment: + """ + A TikTok Comment. + + Example Usage + .. code-block:: python + + for comment in video.comments: + print(comment.text) + print(comment.as_dict) + """ + + parent: ClassVar[TikTokApi] + + id: str + """The id of the comment""" + author: ClassVar[User] + """The author of the comment""" + text: str + """The contents of the comment""" + likes_count: int + """The amount of likes of the comment""" + as_dict: dict + """The raw data associated with this comment""" + + def __init__(self, data: Optional[dict] = None): + if data is not None: + self.as_dict = data + self.__extract_from_data() + + def __extract_from_data(self): + data = self.as_dict + self.id = self.as_dict["cid"] + self.text = self.as_dict["text"] + + usr = self.as_dict["user"] + self.author = self.parent.user( + user_id=usr["uid"], username=usr["unique_id"], sec_uid=usr["sec_uid"] + ) + self.likes_count = self.as_dict["digg_count"] + + async def replies(self, count=20, cursor=0, **kwargs) -> Iterator[Comment]: + found = 0 + + while found < count: + params = { + "count": 20, + "cursor": cursor, + "item_id": self.author.user_id, + "comment_id": self.id, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/comment/list/reply/", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + for comment in resp.get("comments", []): + yield self.parent.comment(data=comment) + found += 1 + + if not resp.get("has_more", False): + return + + cursor = resp.get("cursor") + + def __repr__(self): + return self.__str__() + + def __str__(self): + id = getattr(self, "id", None) + text = getattr(self, "text", None) + return f"TikTokApi.comment(comment_id='{id}', text='{text}')" diff --git a/TikTok/TikTokApi/api/hashtag.py b/TikTok/TikTokApi/api/hashtag.py new file mode 100644 index 0000000000000000000000000000000000000000..49e9728b8a1d402e6eb16633bf3e3b0e8aa6657c --- /dev/null +++ b/TikTok/TikTokApi/api/hashtag.py @@ -0,0 +1,167 @@ +from __future__ import annotations +from ..exceptions import * + +from typing import TYPE_CHECKING, ClassVar, Iterator, Optional + +if TYPE_CHECKING: + from ..tiktok import TikTokApi + from .video import Video + + +class Hashtag: + """ + A TikTok Hashtag/Challenge. + + Example Usage + .. code-block:: python + + hashtag = api.hashtag(name='funny') + async for video in hashtag.videos(): + print(video.id) + """ + + parent: ClassVar[TikTokApi] + + id: Optional[str] + """The ID of the hashtag""" + name: Optional[str] + """The name of the hashtag (omiting the #)""" + as_dict: dict + """The raw data associated with this hashtag.""" + + def __init__( + self, + name: Optional[str] = None, + id: Optional[str] = None, + data: Optional[dict] = None, + ): + """ + You must provide the name or id of the hashtag. + """ + + if name is not None: + self.name = name + if id is not None: + self.id = id + + if data is not None: + self.as_dict = data + self.__extract_from_data() + + async def info(self, **kwargs) -> dict: + """ + Returns all information sent by TikTok related to this hashtag. + + Example Usage + .. code-block:: python + + hashtag = api.hashtag(name='funny') + hashtag_data = await hashtag.info() + """ + if not self.name: + raise TypeError( + "You must provide the name when creating this class to use this method." + ) + + url_params = { + "challengeName": self.name, + "msToken": kwargs.get("ms_token"), + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/challenge/detail/", + params=url_params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException(resp, "TikTok returned an invalid response.") + + self.as_dict = resp + self.__extract_from_data() + return resp + + async def videos(self, count=30, cursor=0, **kwargs) -> Iterator[Video]: + """ + Returns TikTok videos that have this hashtag in the caption. + + Args: + count (int): The amount of videos you want returned. + cursor (int): The the offset of videos from 0 you want to get. + + Returns: + async iterator/generator: Yields TikTokApi.video objects. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + async for video in api.hashtag(name='funny').videos(): + # do something + """ + + id = getattr(self, "id", None) + if id is None: + await self.info(**kwargs) + + found = 0 + while found < count: + params = { + "challengeID": self.id, + "count": 35, + "cursor": cursor, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/challenge/item_list/", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + for video in resp.get("itemList", []): + yield self.parent.video(data=video) + found += 1 + + if not resp.get("hasMore", False): + return + + cursor = resp.get("cursor") + + def __extract_from_data(self): + data = self.as_dict + keys = data.keys() + + if "title" in keys: + self.id = data["id"] + self.name = data["title"] + + if "challengeInfo" in keys: + if "challenge" in data["challengeInfo"]: + self.id = data["challengeInfo"]["challenge"]["id"] + self.name = data["challengeInfo"]["challenge"]["title"] + self.split_name = data["challengeInfo"]["challenge"].get("splitTitle") + + if "stats" in data["challengeInfo"]: + self.stats = data["challengeInfo"]["stats"] + + id = getattr(self, "id", None) + name = getattr(self, "name", None) + if None in (id, name): + Hashtag.parent.logger.error( + f"Failed to create Hashtag with data: {data}\nwhich has keys {data.keys()}" + ) + + def __repr__(self): + return self.__str__() + + def __str__(self): + return f"TikTokApi.hashtag(id='{getattr(self, 'id', None)}', name='{getattr(self, 'name', None)}')" diff --git a/TikTok/TikTokApi/api/search.py b/TikTok/TikTokApi/api/search.py new file mode 100644 index 0000000000000000000000000000000000000000..e9118e36e33db8ff82496c17c72c60f21950e380 --- /dev/null +++ b/TikTok/TikTokApi/api/search.py @@ -0,0 +1,106 @@ +from __future__ import annotations +from urllib.parse import urlencode +from typing import TYPE_CHECKING, Iterator +from .user import User +from ..exceptions import InvalidResponseException + +if TYPE_CHECKING: + from ..tiktok import TikTokApi + + +class Search: + """Contains static methods about searching TikTok for a phrase.""" + + parent: TikTokApi + + @staticmethod + async def users(search_term, count=10, cursor=0, **kwargs) -> Iterator[User]: + """ + Searches for users. + + Note: Your ms_token needs to have done a search before for this to work. + + Args: + search_term (str): The phrase you want to search for. + count (int): The amount of users you want returned. + + Returns: + async iterator/generator: Yields TikTokApi.user objects. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + async for user in api.search.users('david teather'): + # do something + """ + async for user in Search.search_type( + search_term, "user", count=count, cursor=cursor, **kwargs + ): + yield user + + @staticmethod + async def search_type( + search_term, obj_type, count=10, cursor=0, **kwargs + ) -> Iterator: + """ + Searches for a specific type of object. But you shouldn't use this directly, use the other methods. + + Note: Your ms_token needs to have done a search before for this to work. + Note: Currently only supports searching for users, other endpoints require auth. + + Args: + search_term (str): The phrase you want to search for. + obj_type (str): The type of object you want to search for (user) + count (int): The amount of users you want returned. + cursor (int): The the offset of users from 0 you want to get. + + Returns: + async iterator/generator: Yields TikTokApi.video objects. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + async for user in api.search.search_type('david teather', 'user'): + # do something + """ + found = 0 + while found < count: + params = { + "keyword": search_term, + "cursor": cursor, + "from_page": "search", + "web_search_code": """{"tiktok":{"client_params_x":{"search_engine":{"ies_mt_user_live_video_card_use_libra":1,"mt_search_general_user_live_card":1}},"search_server":{}}}""", + } + + resp = await Search.parent.make_request( + url=f"https://www.tiktok.com/api/search/{obj_type}/full/", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + if obj_type == "user": + for user in resp.get("user_list", []): + sec_uid = user.get("user_info").get("sec_uid") + uid = user.get("user_info").get("user_id") + username = user.get("user_info").get("unique_id") + yield Search.parent.user( + sec_uid=sec_uid, user_id=uid, username=username + ) + found += 1 + + if not resp.get("has_more", False): + return + + cursor = resp.get("cursor") diff --git a/TikTok/TikTokApi/api/sound.py b/TikTok/TikTokApi/api/sound.py new file mode 100644 index 0000000000000000000000000000000000000000..9b00b00cd34af29d63a6798f9b14aab6550d7b4d --- /dev/null +++ b/TikTok/TikTokApi/api/sound.py @@ -0,0 +1,179 @@ +from __future__ import annotations +from ..exceptions import * +from typing import TYPE_CHECKING, ClassVar, Iterator, Optional + +if TYPE_CHECKING: + from ..tiktok import TikTokApi + from .user import User + from .video import Video + + +class Sound: + """ + A TikTok Sound/Music/Song. + + Example Usage + .. code-block:: python + + song = api.song(id='7016547803243022337') + """ + + parent: ClassVar[TikTokApi] + + id: str + """TikTok's ID for the sound""" + title: Optional[str] + """The title of the song.""" + author: Optional[User] + """The author of the song (if it exists)""" + duration: Optional[int] + """The duration of the song in seconds.""" + original: Optional[bool] + """Whether the song is original or not.""" + + def __init__(self, id: Optional[str] = None, data: Optional[str] = None): + """ + You must provide the id of the sound or it will not work. + """ + if data is not None: + self.as_dict = data + self.__extract_from_data() + elif id is None: + raise TypeError("You must provide id parameter.") + else: + self.id = id + + async def info(self, **kwargs) -> dict: + """ + Returns all information sent by TikTok related to this sound. + + Returns: + dict: The raw data returned by TikTok. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + sound_info = await api.sound(id='7016547803243022337').info() + """ + + id = getattr(self, "id", None) + if not id: + raise TypeError( + "You must provide the id when creating this class to use this method." + ) + + url_params = { + "msToken": kwargs.get("ms_token"), + "musicId": id, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/music/detail/", + params=url_params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException(resp, "TikTok returned an invalid response.") + + self.as_dict = resp + self.__extract_from_data() + return resp + + async def videos(self, count=30, cursor=0, **kwargs) -> Iterator[Video]: + """ + Returns Video objects of videos created with this sound. + + Args: + count (int): The amount of videos you want returned. + cursor (int): The the offset of videos from 0 you want to get. + + Returns: + async iterator/generator: Yields TikTokApi.video objects. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + async for video in api.sound(id='7016547803243022337').videos(): + # do something + """ + id = getattr(self, "id", None) + if id is None: + raise TypeError( + "You must provide the id when creating this class to use this method." + ) + + found = 0 + while found < count: + params = { + "musicID": id, + "count": 30, + "cursor": cursor, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/music/item_list/", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + for video in resp.get("itemList", []): + yield self.parent.video(data=video) + found += 1 + + if not resp.get("hasMore", False): + return + + cursor = resp.get("cursor") + + def __extract_from_data(self): + data = self.as_dict + keys = data.keys() + + if "musicInfo" in keys: + author = data.get("musicInfo").get("author") + if isinstance(author, dict): + self.author = self.parent.user(data=author) + elif isinstance(author, str): + self.author = self.parent.user(username=author) + + if data.get("musicInfo").get("music"): + self.title = data.get("musicInfo").get("music").get("title") + self.id = data.get("musicInfo").get("music").get("id") + self.original = data.get("musicInfo").get("music").get("original") + self.play_url = data.get("musicInfo").get("music").get("playUrl") + self.cover_large = data.get("musicInfo").get("music").get("coverLarge") + self.duration = data.get("musicInfo").get("music").get("duration") + + if "music" in keys: + self.title = data.get("music").get("title") + self.id = data.get("music").get("id") + self.original = data.get("music").get("original") + self.play_url = data.get("music").get("playUrl") + self.cover_large = data.get("music").get("coverLarge") + self.duration = data.get("music").get("duration") + + if "stats" in keys: + self.stats = data.get("stats") + + if getattr(self, "id", None) is None: + Sound.parent.logger.error(f"Failed to create Sound with data: {data}\n") + + def __repr__(self): + return self.__str__() + + def __str__(self): + return f"TikTokApi.sound(id='{getattr(self, 'id', None)}')" diff --git a/TikTok/TikTokApi/api/trending.py b/TikTok/TikTokApi/api/trending.py new file mode 100644 index 0000000000000000000000000000000000000000..a164432c27e04d7d6affdf1d88e66195ea9519ab --- /dev/null +++ b/TikTok/TikTokApi/api/trending.py @@ -0,0 +1,60 @@ +from __future__ import annotations +from ..exceptions import InvalidResponseException +from .video import Video + +from typing import TYPE_CHECKING, Iterator + +if TYPE_CHECKING: + from ..tiktok import TikTokApi + + +class Trending: + """Contains static methods related to trending objects on TikTok.""" + + parent: TikTokApi + + @staticmethod + async def videos(count=30, **kwargs) -> Iterator[Video]: + """ + Returns Videos that are trending on TikTok. + + Args: + count (int): The amount of videos you want returned. + + Returns: + async iterator/generator: Yields TikTokApi.video objects. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + async for video in api.trending.videos(): + # do something + """ + found = 0 + while found < count: + params = { + "from_page": "fyp", + "count": count, + } + + resp = await Trending.parent.make_request( + url="https://www.tiktok.com/api/recommend/item_list/", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + for video in resp.get("itemList", []): + yield Trending.parent.video(data=video) + found += 1 + + if not resp.get("hasMore", False): + return diff --git a/TikTok/TikTokApi/api/user.py b/TikTok/TikTokApi/api/user.py new file mode 100644 index 0000000000000000000000000000000000000000..5877472bde6507498ad4dd411f49ed8505ce4d2c --- /dev/null +++ b/TikTok/TikTokApi/api/user.py @@ -0,0 +1,280 @@ +from __future__ import annotations +from typing import TYPE_CHECKING, ClassVar, Iterator, Optional +from ..exceptions import InvalidResponseException + +if TYPE_CHECKING: + from ..tiktok import TikTokApi + from .video import Video + + +class User: + """ + A TikTok User. + + Example Usage: + .. code-block:: python + + user = api.user(username='therock') + """ + + parent: ClassVar[TikTokApi] + + user_id: str + """The ID of the user.""" + sec_uid: str + """The sec UID of the user.""" + username: str + """The username of the user.""" + as_dict: dict + """The raw data associated with this user.""" + + def __init__( + self, + username: Optional[str] = None, + user_id: Optional[str] = None, + sec_uid: Optional[str] = None, + data: Optional[dict] = None, + ): + """ + You must provide the username or (user_id and sec_uid) otherwise this + will not function correctly. + """ + self.__update_id_sec_uid_username(user_id, sec_uid, username) + if data is not None: + self.as_dict = data + self.__extract_from_data() + + async def info(self, **kwargs) -> dict: + """ + Returns a dictionary of information associated with this User. + + Returns: + dict: A dictionary of information associated with this User. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + user_data = await api.user(username='therock').info() + """ + + username = getattr(self, "username", None) + if not username: + raise TypeError( + "You must provide the username when creating this class to use this method." + ) + + sec_uid = getattr(self, "sec_uid", None) + url_params = { + "secUid": sec_uid if sec_uid is not None else "", + "uniqueId": username, + "msToken": kwargs.get("ms_token"), + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/user/detail/", + params=url_params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException(resp, "TikTok returned an invalid response.") + + self.as_dict = resp + self.__extract_from_data() + return resp + + async def playlists(self, count=20, cursor=0, **kwargs) -> Iterator[dict]: + """ + Returns a dictionary of information associated with this User's playlist. + + Returns: + dict: A dictionary of information associated with this User's playlist. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + user_data = await api.user(username='therock').playlist() + """ + + sec_uid = getattr(self, "sec_uid", None) + if sec_uid is None or sec_uid == "": + await self.info(**kwargs) + found = 0 + + while found < count: + params = { + "secUid": sec_uid, + "count": 20, + "cursor": cursor, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/user/playlist", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException(resp, "TikTok returned an invalid response.") + + for playlist in resp.get("playList", []): + yield playlist + found += 1 + + if not resp.get("hasMore", False): + return + + cursor = resp.get("cursor") + + + async def videos(self, count=30, cursor=0, **kwargs) -> Iterator[Video]: + """ + Returns a user's videos. + + Args: + count (int): The amount of videos you want returned. + cursor (int): The the offset of videos from 0 you want to get. + + Returns: + async iterator/generator: Yields TikTokApi.video objects. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + async for video in api.user(username="davidteathercodes").videos(): + # do something + """ + sec_uid = getattr(self, "sec_uid", None) + if sec_uid is None or sec_uid == "": + await self.info(**kwargs) + + found = 0 + while found < count: + params = { + "secUid": self.sec_uid, + "count": 35, + "cursor": cursor, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/post/item_list/", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + for video in resp.get("itemList", []): + yield self.parent.video(data=video) + found += 1 + + if not resp.get("hasMore", False): + return + + cursor = resp.get("cursor") + + async def liked( + self, count: int = 30, cursor: int = 0, **kwargs + ) -> Iterator[Video]: + """ + Returns a user's liked posts if public. + + Args: + count (int): The amount of recent likes you want returned. + cursor (int): The the offset of likes from 0 you want to get. + + Returns: + async iterator/generator: Yields TikTokApi.video objects. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, the user's likes are private, or one we don't understand. + + Example Usage: + .. code-block:: python + + async for like in api.user(username="davidteathercodes").liked(): + # do something + """ + sec_uid = getattr(self, "sec_uid", None) + if sec_uid is None or sec_uid == "": + await self.info(**kwargs) + + found = 0 + while found < count: + params = { + "secUid": self.sec_uid, + "count": 35, + "cursor": cursor, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/favorite/item_list", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + for video in resp.get("itemList", []): + yield self.parent.video(data=video) + found += 1 + + if not resp.get("hasMore", False): + return + + cursor = resp.get("cursor") + + def __extract_from_data(self): + data = self.as_dict + keys = data.keys() + + if "userInfo" in keys: + self.__update_id_sec_uid_username( + data["userInfo"]["user"]["id"], + data["userInfo"]["user"]["secUid"], + data["userInfo"]["user"]["uniqueId"], + ) + else: + self.__update_id_sec_uid_username( + data["id"], + data["secUid"], + data["uniqueId"], + ) + + if None in (self.username, self.user_id, self.sec_uid): + User.parent.logger.error( + f"Failed to create User with data: {data}\nwhich has keys {data.keys()}" + ) + + def __update_id_sec_uid_username(self, id, sec_uid, username): + self.user_id = id + self.sec_uid = sec_uid + self.username = username + + def __repr__(self): + return self.__str__() + + def __str__(self): + username = getattr(self, "username", None) + user_id = getattr(self, "user_id", None) + sec_uid = getattr(self, "sec_uid", None) + return f"TikTokApi.user(username='{username}', user_id='{user_id}', sec_uid='{sec_uid}')" diff --git a/TikTok/TikTokApi/api/video.py b/TikTok/TikTokApi/api/video.py new file mode 100644 index 0000000000000000000000000000000000000000..992457d2afb9cc6750243bb01d70dac13a8eb8e6 --- /dev/null +++ b/TikTok/TikTokApi/api/video.py @@ -0,0 +1,332 @@ +from __future__ import annotations +from ..helpers import extract_video_id_from_url, requests_cookie_to_playwright_cookie +from typing import TYPE_CHECKING, ClassVar, Iterator, Optional +from datetime import datetime +import requests +from ..exceptions import InvalidResponseException +import json +import httpx +from typing import Union, AsyncIterator + +if TYPE_CHECKING: + from ..tiktok import TikTokApi + from .user import User + from .sound import Sound + from .hashtag import Hashtag + from .comment import Comment + + +class Video: + """ + A TikTok Video class + + Example Usage + ```py + video = api.video(id='7041997751718137094') + ``` + """ + + parent: ClassVar[TikTokApi] + + id: Optional[str] + """TikTok's ID of the Video""" + url: Optional[str] + """The URL of the Video""" + create_time: Optional[datetime] + """The creation time of the Video""" + stats: Optional[dict] + """TikTok's stats of the Video""" + author: Optional[User] + """The User who created the Video""" + sound: Optional[Sound] + """The Sound that is associated with the Video""" + hashtags: Optional[list[Hashtag]] + """A List of Hashtags on the Video""" + as_dict: dict + """The raw data associated with this Video.""" + + def __init__( + self, + id: Optional[str] = None, + url: Optional[str] = None, + data: Optional[dict] = None, + **kwargs, + ): + """ + You must provide the id or a valid url, else this will fail. + """ + self.id = id + self.url = url + if data is not None: + self.as_dict = data + self.__extract_from_data() + elif url is not None: + i, session = self.parent._get_session(**kwargs) + self.id = extract_video_id_from_url( + url, + headers=session.headers, + proxy=kwargs.get("proxy") + if kwargs.get("proxy") is not None + else session.proxy, + ) + + if getattr(self, "id", None) is None: + raise TypeError("You must provide id or url parameter.") + + async def info(self, **kwargs) -> dict: + """ + Returns a dictionary of all data associated with a TikTok Video. + + Note: This is slow since it requires an HTTP request, avoid using this if possible. + + Returns: + dict: A dictionary of all data associated with a TikTok Video. + + Raises: + InvalidResponseException: If TikTok returns an invalid response, or one we don't understand. + + Example Usage: + .. code-block:: python + + url = "https://www.tiktok.com/@davidteathercodes/video/7106686413101468970" + video_info = await api.video(url=url).info() + """ + i, session = self.parent._get_session(**kwargs) + proxy = ( + kwargs.get("proxy") if kwargs.get("proxy") is not None else session.proxy + ) + if self.url is None: + raise TypeError("To call video.info() you need to set the video's url.") + + r = requests.get(self.url, headers=session.headers, proxies=proxy) + if r.status_code != 200: + raise InvalidResponseException( + r.text, "TikTok returned an invalid response.", error_code=r.status_code + ) + + # Try SIGI_STATE first + # extract tag + # extract json in the middle + + start = r.text.find('", start) + + if end == -1: + raise InvalidResponseException( + r.text, "TikTok returned an invalid response.", error_code=r.status_code + ) + + data = json.loads(r.text[start:end]) + video_info = data["ItemModule"][self.id] + else: + # Try __UNIVERSAL_DATA_FOR_REHYDRATION__ next + + # extract tag + # extract json in the middle + + start = r.text.find('", start) + + if end == -1: + raise InvalidResponseException( + r.text, "TikTok returned an invalid response.", error_code=r.status_code + ) + + data = json.loads(r.text[start:end]) + default_scope = data.get("__DEFAULT_SCOPE__", {}) + video_detail = default_scope.get("webapp.video-detail", {}) + if video_detail.get("statusCode", 0) != 0: # assume 0 if not present + raise InvalidResponseException( + r.text, "TikTok returned an invalid response structure.", error_code=r.status_code + ) + video_info = video_detail.get("itemInfo", {}).get("itemStruct") + if video_info is None: + raise InvalidResponseException( + r.text, "TikTok returned an invalid response structure.", error_code=r.status_code + ) + + self.as_dict = video_info + self.__extract_from_data() + + cookies = [requests_cookie_to_playwright_cookie(c) for c in r.cookies] + + await self.parent.set_session_cookies( + session, + cookies + ) + return video_info + + async def bytes(self, stream: bool = False, **kwargs) -> Union[bytes, AsyncIterator[bytes]]: + """ + Returns the bytes of a TikTok Video. + + TODO: + Not implemented yet. + + Example Usage: + .. code-block:: python + + video_bytes = await api.video(id='7041997751718137094').bytes() + + # Saving The Video + with open('saved_video.mp4', 'wb') as output: + output.write(video_bytes) + + # Streaming (if stream=True) + async for chunk in api.video(id='7041997751718137094').bytes(stream=True): + # Process or upload chunk + """ + i, session = self.parent._get_session(**kwargs) + downloadAddr = self.as_dict["video"]["downloadAddr"] + + cookies = await self.parent.get_session_cookies(session) + + h = session.headers + h["range"] = 'bytes=0-' + h["accept-encoding"] = 'identity;q=1, *;q=0' + h["referer"] = 'https://www.tiktok.com/' + + if stream: + async def stream_bytes(): + async with httpx.AsyncClient() as client: + async with client.stream('GET', downloadAddr, headers=h, cookies=cookies) as response: + async for chunk in response.aiter_bytes(): + yield chunk + return stream_bytes() + else: + resp = requests.get(downloadAddr, headers=h, cookies=cookies) + return resp.content + + def __extract_from_data(self) -> None: + data = self.as_dict + self.id = data["id"] + + timestamp = data.get("createTime", None) + if timestamp is not None: + try: + timestamp = int(timestamp) + except ValueError: + pass + + self.create_time = datetime.fromtimestamp(timestamp) + self.stats = data.get('statsV2') or data.get('stats') + + author = data.get("author") + if isinstance(author, str): + self.author = self.parent.user(username=author) + else: + self.author = self.parent.user(data=author) + self.sound = self.parent.sound(data=data) + + self.hashtags = [ + self.parent.hashtag(data=hashtag) for hashtag in data.get("challenges", []) + ] + + if getattr(self, "id", None) is None: + Video.parent.logger.error( + f"Failed to create Video with data: {data}\nwhich has keys {data.keys()}" + ) + + async def comments(self, count=20, cursor=0, **kwargs) -> Iterator[Comment]: + """ + Returns the comments of a TikTok Video. + + Parameters: + count (int): The amount of comments you want returned. + cursor (int): The the offset of comments from 0 you want to get. + + Returns: + async iterator/generator: Yields TikTokApi.comment objects. + + Example Usage + .. code-block:: python + + async for comment in api.video(id='7041997751718137094').comments(): + # do something + ``` + """ + found = 0 + while found < count: + params = { + "aweme_id": self.id, + "count": 20, + "cursor": cursor, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/comment/list/", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + for video in resp.get("comments", []): + yield self.parent.comment(data=video) + found += 1 + + if not resp.get("has_more", False): + return + + cursor = resp.get("cursor") + + async def related_videos( + self, count: int = 30, cursor: int = 0, **kwargs + ) -> Iterator[Video]: + """ + Returns related videos of a TikTok Video. + + Parameters: + count (int): The amount of comments you want returned. + cursor (int): The the offset of comments from 0 you want to get. + + Returns: + async iterator/generator: Yields TikTokApi.video objects. + + Example Usage + .. code-block:: python + + async for related_videos in api.video(id='7041997751718137094').related_videos(): + # do something + ``` + """ + found = 0 + while found < count: + params = { + "itemID": self.id, + "count": 16, + } + + resp = await self.parent.make_request( + url="https://www.tiktok.com/api/related/item_list/", + params=params, + headers=kwargs.get("headers"), + session_index=kwargs.get("session_index"), + ) + + if resp is None: + raise InvalidResponseException( + resp, "TikTok returned an invalid response." + ) + + for video in resp.get("itemList", []): + yield self.parent.video(data=video) + found += 1 + + def __repr__(self): + return self.__str__() + + def __str__(self): + return f"TikTokApi.video(id='{getattr(self, 'id', None)}')" diff --git a/TikTok/TikTokApi/exceptions.py b/TikTok/TikTokApi/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..6ebe627cf8cb81892965ae4c9ae5a523863ecc4f --- /dev/null +++ b/TikTok/TikTokApi/exceptions.py @@ -0,0 +1,35 @@ +class TikTokException(Exception): + """Generic exception that all other TikTok errors are children of.""" + + def __init__(self, raw_response, message, error_code=None): + self.error_code = error_code + self.raw_response = raw_response + self.message = message + super().__init__(self.message) + + def __str__(self): + return f"{self.error_code} -> {self.message}" + + +class CaptchaException(TikTokException): + """TikTok is showing captcha""" + + +class NotFoundException(TikTokException): + """TikTok indicated that this object does not exist.""" + + +class EmptyResponseException(TikTokException): + """TikTok sent back an empty response.""" + + +class SoundRemovedException(TikTokException): + """This TikTok sound has no id from being removed by TikTok.""" + + +class InvalidJSONException(TikTokException): + """TikTok returned invalid JSON.""" + + +class InvalidResponseException(TikTokException): + """The response from TikTok was invalid.""" diff --git a/TikTok/TikTokApi/helpers.py b/TikTok/TikTokApi/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..cbca75c373d6d69524d0af8e58ba1e2b8395ecee --- /dev/null +++ b/TikTok/TikTokApi/helpers.py @@ -0,0 +1,36 @@ +from .exceptions import * + +import requests +import random + + +def extract_video_id_from_url(url, headers={}, proxy=None): + url = requests.head( + url=url, allow_redirects=True, headers=headers, proxies=proxy + ).url + if "@" in url and "/video/" in url: + return url.split("/video/")[1].split("?")[0] + else: + raise TypeError( + "URL format not supported. Below is an example of a supported url.\n" + "https://www.tiktok.com/@therock/video/6829267836783971589" + ) + + +def random_choice(choices: list): + """Return a random choice from a list, or None if the list is empty""" + if choices is None or len(choices) == 0: + return None + return random.choice(choices) + +def requests_cookie_to_playwright_cookie(req_c): + c = { + 'name': req_c.name, + 'value': req_c.value, + 'domain': req_c.domain, + 'path': req_c.path, + 'secure': req_c.secure + } + if req_c.expires: + c['expires'] = req_c.expires + return c diff --git a/TikTok/TikTokApi/stealth/__init__.py b/TikTok/TikTokApi/stealth/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aa22504555b5fab6a2c83b37de7cda79e0720d78 --- /dev/null +++ b/TikTok/TikTokApi/stealth/__init__.py @@ -0,0 +1 @@ +from .stealth import stealth_async diff --git a/TikTok/TikTokApi/stealth/js/__init__.py b/TikTok/TikTokApi/stealth/js/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TikTok/TikTokApi/stealth/js/chrome_app.py b/TikTok/TikTokApi/stealth/js/chrome_app.py new file mode 100644 index 0000000000000000000000000000000000000000..79c10af8d003dce0b9385983e23bd6d0f012d7f6 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/chrome_app.py @@ -0,0 +1,73 @@ +chrome_app = """ +if (!window.chrome) { + // Use the exact property descriptor found in headful Chrome + // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` + Object.defineProperty(window, 'chrome', { + writable: true, + enumerable: true, + configurable: false, // note! + value: {} // We'll extend that later + }) +} + +// app in window.chrome means we're running headful and don't need to mock anything +if (!('app' in window.chrome)) { + const makeError = { + ErrorInInvocation: fn => { + const err = new TypeError(`Error in invocation of app.${fn}()`) + return utils.stripErrorWithAnchor( + err, + `at ${fn} (eval at ` + ) + } + } + +// There's a some static data in that property which doesn't seem to change, +// we should periodically check for updates: `JSON.stringify(window.app, null, 2)` + const APP_STATIC_DATA = JSON.parse( + ` +{ + "isInstalled": false, + "InstallState": { + "DISABLED": "disabled", + "INSTALLED": "installed", + "NOT_INSTALLED": "not_installed" + }, + "RunningState": { + "CANNOT_RUN": "cannot_run", + "READY_TO_RUN": "ready_to_run", + "RUNNING": "running" + } +} + `.trim() + ) + + window.chrome.app = { + ...APP_STATIC_DATA, + + get isInstalled() { + return false + }, + + getDetails: function getDetails() { + if (arguments.length) { + throw makeError.ErrorInInvocation(`getDetails`) + } + return null + }, + getIsInstalled: function getDetails() { + if (arguments.length) { + throw makeError.ErrorInInvocation(`getIsInstalled`) + } + return false + }, + runningState: function getDetails() { + if (arguments.length) { + throw makeError.ErrorInInvocation(`runningState`) + } + return 'cannot_run' + } + } + utils.patchToStringNested(window.chrome.app) +} +""" diff --git a/TikTok/TikTokApi/stealth/js/chrome_csi.py b/TikTok/TikTokApi/stealth/js/chrome_csi.py new file mode 100644 index 0000000000000000000000000000000000000000..5331002305171ed70d14eb9d24d4a740f354dd8f --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/chrome_csi.py @@ -0,0 +1,29 @@ +chrome_csi = """ +if (!window.chrome) { + // Use the exact property descriptor found in headful Chrome + // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` + Object.defineProperty(window, 'chrome', { + writable: true, + enumerable: true, + configurable: false, // note! + value: {} // We'll extend that later + }) +} + +// Check if we're running headful and don't need to mock anything +// Check that the Navigation Timing API v1 is available, we need that +if (!('csi' in window.chrome) && (window.performance || window.performance.timing)) { + const {csi_timing} = window.performance + + log.info('loading chrome.csi.js') + window.chrome.csi = function () { + return { + onloadT: csi_timing.domContentLoadedEventEnd, + startE: csi_timing.navigationStart, + pageT: Date.now() - csi_timing.navigationStart, + tran: 15 // Transition type or something + } + } + utils.patchToString(window.chrome.csi) +} +""" diff --git a/TikTok/TikTokApi/stealth/js/chrome_hairline.py b/TikTok/TikTokApi/stealth/js/chrome_hairline.py new file mode 100644 index 0000000000000000000000000000000000000000..3ce3df352170103616bf21ddd243511e0e1066f8 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/chrome_hairline.py @@ -0,0 +1,16 @@ +chrome_hairline = """ +// https://intoli.com/blog/making-chrome-headless-undetectable/ +// store the existing descriptor +const elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'offsetHeight'); + +// redefine the property with a patched descriptor +Object.defineProperty(HTMLDivElement.prototype, 'offsetHeight', { + ...elementDescriptor, + get: function() { + if (this.id === 'modernizr') { + return 1; + } + return elementDescriptor.get.apply(this); + }, +}); +""" diff --git a/TikTok/TikTokApi/stealth/js/chrome_load_times.py b/TikTok/TikTokApi/stealth/js/chrome_load_times.py new file mode 100644 index 0000000000000000000000000000000000000000..a673cda43f99a454b3bbe3988d62f0837d90182a --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/chrome_load_times.py @@ -0,0 +1,124 @@ +chrome_load_times = """ +if (!window.chrome) { + // Use the exact property descriptor found in headful Chrome + // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` + Object.defineProperty(window, 'chrome', { + writable: true, + enumerable: true, + configurable: false, // note! + value: {} // We'll extend that later + }) +} + +// That means we're running headful and don't need to mock anything +if ('loadTimes' in window.chrome) { + throw new Error('skipping chrome loadtimes update, running in headfull mode') +} + +// Check that the Navigation Timing API v1 + v2 is available, we need that +if ( + window.performance || + window.performance.timing || + window.PerformancePaintTiming +) { + + const {performance} = window + + // Some stuff is not available on about:blank as it requires a navigation to occur, + // let's harden the code to not fail then: + const ntEntryFallback = { + nextHopProtocol: 'h2', + type: 'other' + } + + // The API exposes some funky info regarding the connection + const protocolInfo = { + get connectionInfo() { + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ntEntry.nextHopProtocol + }, + get npnNegotiatedProtocol() { + // NPN is deprecated in favor of ALPN, but this implementation returns the + // HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN. + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) + ? ntEntry.nextHopProtocol + : 'unknown' + }, + get navigationType() { + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ntEntry.type + }, + get wasAlternateProtocolAvailable() { + // The Alternate-Protocol header is deprecated in favor of Alt-Svc + // (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this + // should always return false. + return false + }, + get wasFetchedViaSpdy() { + // SPDY is deprecated in favor of HTTP/2, but this implementation returns + // true for HTTP/2 or HTTP2+QUIC/39 as well. + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) + }, + get wasNpnNegotiated() { + // NPN is deprecated in favor of ALPN, but this implementation returns true + // for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN. + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) + } + } + + const {timing} = window.performance + +// Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3 + function toFixed(num, fixed) { + var re = new RegExp('^-?\\d+(?:.\\d{0,' + (fixed || -1) + '})?') + return num.toString().match(re)[0] + } + + const timingInfo = { + get firstPaintAfterLoadTime() { + // This was never actually implemented and always returns 0. + return 0 + }, + get requestTime() { + return timing.navigationStart / 1000 + }, + get startLoadTime() { + return timing.navigationStart / 1000 + }, + get commitLoadTime() { + return timing.responseStart / 1000 + }, + get finishDocumentLoadTime() { + return timing.domContentLoadedEventEnd / 1000 + }, + get finishLoadTime() { + return timing.loadEventEnd / 1000 + }, + get firstPaintTime() { + const fpEntry = performance.getEntriesByType('paint')[0] || { + startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`) + } + return toFixed( + (fpEntry.startTime + performance.timeOrigin) / 1000, + 3 + ) + } + } + + window.chrome.loadTimes = function () { + return { + ...protocolInfo, + ...timingInfo + } + } + utils.patchToString(window.chrome.loadTimes) +} +""" diff --git a/TikTok/TikTokApi/stealth/js/chrome_runtime.py b/TikTok/TikTokApi/stealth/js/chrome_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..d971a90a73e04f36c52df5222729ef60f3cd5070 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/chrome_runtime.py @@ -0,0 +1,265 @@ +chrome_runtime = """ +const STATIC_DATA = { + "OnInstalledReason": { + "CHROME_UPDATE": "chrome_update", + "INSTALL": "install", + "SHARED_MODULE_UPDATE": "shared_module_update", + "UPDATE": "update" + }, + "OnRestartRequiredReason": { + "APP_UPDATE": "app_update", + "OS_UPDATE": "os_update", + "PERIODIC": "periodic" + }, + "PlatformArch": { + "ARM": "arm", + "ARM64": "arm64", + "MIPS": "mips", + "MIPS64": "mips64", + "X86_32": "x86-32", + "X86_64": "x86-64" + }, + "PlatformNaclArch": { + "ARM": "arm", + "MIPS": "mips", + "MIPS64": "mips64", + "X86_32": "x86-32", + "X86_64": "x86-64" + }, + "PlatformOs": { + "ANDROID": "android", + "CROS": "cros", + "LINUX": "linux", + "MAC": "mac", + "OPENBSD": "openbsd", + "WIN": "win" + }, + "RequestUpdateCheckStatus": { + "NO_UPDATE": "no_update", + "THROTTLED": "throttled", + "UPDATE_AVAILABLE": "update_available" + } +} + +if (!window.chrome) { + // Use the exact property descriptor found in headful Chrome + // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` + Object.defineProperty(window, 'chrome', { + writable: true, + enumerable: true, + configurable: false, // note! + value: {} // We'll extend that later + }) +} + +// That means we're running headfull and don't need to mock anything +const existsAlready = 'runtime' in window.chrome +// `chrome.runtime` is only exposed on secure origins +const isNotSecure = !window.location.protocol.startsWith('https') +if (!(existsAlready || (isNotSecure && !opts.runOnInsecureOrigins))) { + window.chrome.runtime = { + // There's a bunch of static data in that property which doesn't seem to change, + // we should periodically check for updates: `JSON.stringify(window.chrome.runtime, null, 2)` + ...STATIC_DATA, + // `chrome.runtime.id` is extension related and returns undefined in Chrome + get id() { + return undefined + }, + // These two require more sophisticated mocks + connect: null, + sendMessage: null + } + + const makeCustomRuntimeErrors = (preamble, method, extensionId) => ({ + NoMatchingSignature: new TypeError( + preamble + `No matching signature.` + ), + MustSpecifyExtensionID: new TypeError( + preamble + + `${method} called from a webpage must specify an Extension ID (string) for its first argument.` + ), + InvalidExtensionID: new TypeError( + preamble + `Invalid extension id: '${extensionId}'` + ) + }) + + // Valid Extension IDs are 32 characters in length and use the letter `a` to `p`: + // https://source.chromium.org/chromium/chromium/src/+/main:components/crx_file/id_util.cc;drc=14a055ccb17e8c8d5d437fe080faba4c6f07beac;l=90 + const isValidExtensionID = str => + str.length === 32 && str.toLowerCase().match(/^[a-p]+$/) + + /** Mock `chrome.runtime.sendMessage` */ + const sendMessageHandler = { + apply: function (target, ctx, args) { + const [extensionId, options, responseCallback] = args || [] + + // Define custom errors + const errorPreamble = `Error in invocation of runtime.sendMessage(optional string extensionId, any message, optional object options, optional function responseCallback): ` + const Errors = makeCustomRuntimeErrors( + errorPreamble, + `chrome.runtime.sendMessage()`, + extensionId + ) + + // Check if the call signature looks ok + const noArguments = args.length === 0 + const tooManyArguments = args.length > 4 + const incorrectOptions = options && typeof options !== 'object' + const incorrectResponseCallback = + responseCallback && typeof responseCallback !== 'function' + if ( + noArguments || + tooManyArguments || + incorrectOptions || + incorrectResponseCallback + ) { + throw Errors.NoMatchingSignature + } + + // At least 2 arguments are required before we even validate the extension ID + if (args.length < 2) { + throw Errors.MustSpecifyExtensionID + } + + // Now let's make sure we got a string as extension ID + if (typeof extensionId !== 'string') { + throw Errors.NoMatchingSignature + } + + if (!isValidExtensionID(extensionId)) { + throw Errors.InvalidExtensionID + } + + return undefined // Normal behavior + } + } + utils.mockWithProxy( + window.chrome.runtime, + 'sendMessage', + function sendMessage() { + }, + sendMessageHandler + ) + + /** + * Mock `chrome.runtime.connect` + * + * @see https://developer.chrome.com/apps/runtime#method-connect + */ + const connectHandler = { + apply: function (target, ctx, args) { + const [extensionId, connectInfo] = args || [] + + // Define custom errors + const errorPreamble = `Error in invocation of runtime.connect(optional string extensionId, optional object connectInfo): ` + const Errors = makeCustomRuntimeErrors( + errorPreamble, + `chrome.runtime.connect()`, + extensionId + ) + + // Behavior differs a bit from sendMessage: + const noArguments = args.length === 0 + const emptyStringArgument = args.length === 1 && extensionId === '' + if (noArguments || emptyStringArgument) { + throw Errors.MustSpecifyExtensionID + } + + const tooManyArguments = args.length > 2 + const incorrectConnectInfoType = + connectInfo && typeof connectInfo !== 'object' + + if (tooManyArguments || incorrectConnectInfoType) { + throw Errors.NoMatchingSignature + } + + const extensionIdIsString = typeof extensionId === 'string' + if (extensionIdIsString && extensionId === '') { + throw Errors.MustSpecifyExtensionID + } + if (extensionIdIsString && !isValidExtensionID(extensionId)) { + throw Errors.InvalidExtensionID + } + + // There's another edge-case here: extensionId is optional so we might find a connectInfo object as first param, which we need to validate + const validateConnectInfo = ci => { + // More than a first param connectInfo as been provided + if (args.length > 1) { + throw Errors.NoMatchingSignature + } + // An empty connectInfo has been provided + if (Object.keys(ci).length === 0) { + throw Errors.MustSpecifyExtensionID + } + // Loop over all connectInfo props an check them + Object.entries(ci).forEach(([k, v]) => { + const isExpected = ['name', 'includeTlsChannelId'].includes(k) + if (!isExpected) { + throw new TypeError( + errorPreamble + `Unexpected property: '${k}'.` + ) + } + const MismatchError = (propName, expected, found) => + TypeError( + errorPreamble + + `Error at property '${propName}': Invalid type: expected ${expected}, found ${found}.` + ) + if (k === 'name' && typeof v !== 'string') { + throw MismatchError(k, 'string', typeof v) + } + if (k === 'includeTlsChannelId' && typeof v !== 'boolean') { + throw MismatchError(k, 'boolean', typeof v) + } + }) + } + if (typeof extensionId === 'object') { + validateConnectInfo(extensionId) + throw Errors.MustSpecifyExtensionID + } + + // Unfortunately even when the connect fails Chrome will return an object with methods we need to mock as well + return utils.patchToStringNested(makeConnectResponse()) + } + } + utils.mockWithProxy( + window.chrome.runtime, + 'connect', + function connect() { + }, + connectHandler + ) + + function makeConnectResponse() { + const onSomething = () => ({ + addListener: function addListener() { + }, + dispatch: function dispatch() { + }, + hasListener: function hasListener() { + }, + hasListeners: function hasListeners() { + return false + }, + removeListener: function removeListener() { + } + }) + + const response = { + name: '', + sender: undefined, + disconnect: function disconnect() { + }, + onDisconnect: onSomething(), + onMessage: onSomething(), + postMessage: function postMessage() { + if (!arguments.length) { + throw new TypeError(`Insufficient number of arguments.`) + } + throw new Error(`Attempting to use a disconnected port object`) + } + } + return response + } +} + +""" diff --git a/TikTok/TikTokApi/stealth/js/generate_magic_arrays.py b/TikTok/TikTokApi/stealth/js/generate_magic_arrays.py new file mode 100644 index 0000000000000000000000000000000000000000..1642ea98280e91ef80f3ead1abff9ad2f42ec8d4 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/generate_magic_arrays.py @@ -0,0 +1,144 @@ +generate_magic_arrays = """ +generateFunctionMocks = ( + proto, + itemMainProp, + dataArray +) => ({ + item: utils.createProxy(proto.item, { + apply(target, ctx, args) { + if (!args.length) { + throw new TypeError( + `Failed to execute 'item' on '${ + proto[Symbol.toStringTag] + }': 1 argument required, but only 0 present.` + ) + } + // Special behavior alert: + // - Vanilla tries to cast strings to Numbers (only integers!) and use them as property index lookup + // - If anything else than an integer (including as string) is provided it will return the first entry + const isInteger = args[0] && Number.isInteger(Number(args[0])) // Cast potential string to number first, then check for integer + // Note: Vanilla never returns `undefined` + return (isInteger ? dataArray[Number(args[0])] : dataArray[0]) || null + } + }), + /** Returns the MimeType object with the specified name. */ + namedItem: utils.createProxy(proto.namedItem, { + apply(target, ctx, args) { + if (!args.length) { + throw new TypeError( + `Failed to execute 'namedItem' on '${ + proto[Symbol.toStringTag] + }': 1 argument required, but only 0 present.` + ) + } + return dataArray.find(mt => mt[itemMainProp] === args[0]) || null // Not `undefined`! + } + }), + /** Does nothing and shall return nothing */ + refresh: proto.refresh + ? utils.createProxy(proto.refresh, { + apply(target, ctx, args) { + return undefined + } + }) + : undefined +}) + +function generateMagicArray( + dataArray = [], + proto = MimeTypeArray.prototype, + itemProto = MimeType.prototype, + itemMainProp = 'type' +) { + // Quick helper to set props with the same descriptors vanilla is using + const defineProp = (obj, prop, value) => + Object.defineProperty(obj, prop, { + value, + writable: false, + enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)` + configurable: false + }) + + // Loop over our fake data and construct items + const makeItem = data => { + const item = {} + for (const prop of Object.keys(data)) { + if (prop.startsWith('__')) { + continue + } + defineProp(item, prop, data[prop]) + } + // navigator.plugins[i].length should always be 1 + if (itemProto === Plugin.prototype) { + defineProp(item, 'length', 1) + } + // We need to spoof a specific `MimeType` or `Plugin` object + return Object.create(itemProto, Object.getOwnPropertyDescriptors(item)) + } + + const magicArray = [] + + // Loop through our fake data and use that to create convincing entities + dataArray.forEach(data => { + magicArray.push(makeItem(data)) + }) + + // Add direct property access based on types (e.g. `obj['application/pdf']`) afterwards + magicArray.forEach(entry => { + defineProp(magicArray, entry[itemMainProp], entry) + }) + + // This is the best way to fake the type to make sure this is false: `Array.isArray(navigator.mimeTypes)` + const magicArrayObj = Object.create(proto, { + ...Object.getOwnPropertyDescriptors(magicArray), + + // There's one ugly quirk we unfortunately need to take care of: + // The `MimeTypeArray` prototype has an enumerable `length` property, + // but headful Chrome will still skip it when running `Object.getOwnPropertyNames(navigator.mimeTypes)`. + // To strip it we need to make it first `configurable` and can then overlay a Proxy with an `ownKeys` trap. + length: { + value: magicArray.length, + writable: false, + enumerable: false, + configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length` + } + }) + + // Generate our functional function mocks :-) + const functionMocks = generateFunctionMocks( + proto, + itemMainProp, + magicArray + ) + + // Override custom object with proxy + return new Proxy(magicArrayObj, { + get(target, key = '') { + // Redirect function calls to our custom proxied versions mocking the vanilla behavior + if (key === 'item') { + return functionMocks.item + } + if (key === 'namedItem') { + return functionMocks.namedItem + } + if (proto === PluginArray.prototype && key === 'refresh') { + return functionMocks.refresh + } + // Everything else can pass through as normal + return utils.cache.Reflect.get(...arguments) + }, + ownKeys(target) { + // There are a couple of quirks where the original property demonstrates "magical" behavior that makes no sense + // This can be witnessed when calling `Object.getOwnPropertyNames(navigator.mimeTypes)` and the absense of `length` + // My guess is that it has to do with the recent change of not allowing data enumeration and this being implemented weirdly + // For that reason we just completely fake the available property names based on our data to match what regular Chrome is doing + // Specific issues when not patching this: `length` property is available, direct `types` props (e.g. `obj['application/pdf']`) are missing + const keys = [] + const typeProps = magicArray.map(mt => mt[itemMainProp]) + typeProps.forEach((_, i) => keys.push(`${i}`)) + typeProps.forEach(propName => keys.push(propName)) + return keys + } + }) +} +""" diff --git a/TikTok/TikTokApi/stealth/js/iframe_contentWindow.py b/TikTok/TikTokApi/stealth/js/iframe_contentWindow.py new file mode 100644 index 0000000000000000000000000000000000000000..0749092b01b2ebd6c37c19659ef36304d4de6ed7 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/iframe_contentWindow.py @@ -0,0 +1,99 @@ +iframe_contentWindow = """ +try { + // Adds a contentWindow proxy to the provided iframe element + const addContentWindowProxy = iframe => { + const contentWindowProxy = { + get(target, key) { + // Now to the interesting part: + // We actually make this thing behave like a regular iframe window, + // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :) + // That makes it possible for these assertions to be correct: + // iframe.contentWindow.self === window.top // must be false + if (key === 'self') { + return this + } + // iframe.contentWindow.frameElement === iframe // must be true + if (key === 'frameElement') { + return iframe + } + return Reflect.get(target, key) + } + } + + if (!iframe.contentWindow) { + const proxy = new Proxy(window, contentWindowProxy) + Object.defineProperty(iframe, 'contentWindow', { + get() { + return proxy + }, + set(newValue) { + return newValue // contentWindow is immutable + }, + enumerable: true, + configurable: false + }) + } + } + + // Handles iframe element creation, augments `srcdoc` property so we can intercept further + const handleIframeCreation = (target, thisArg, args) => { + const iframe = target.apply(thisArg, args) + + // We need to keep the originals around + const _iframe = iframe + const _srcdoc = _iframe.srcdoc + + // Add hook for the srcdoc property + // We need to be very surgical here to not break other iframes by accident + Object.defineProperty(iframe, 'srcdoc', { + configurable: true, // Important, so we can reset this later + get: function () { + return _iframe.srcdoc + }, + set: function (newValue) { + addContentWindowProxy(this) + // Reset property, the hook is only needed once + Object.defineProperty(iframe, 'srcdoc', { + configurable: false, + writable: false, + value: _srcdoc + }) + _iframe.srcdoc = newValue + } + }) + return iframe + } + + // Adds a hook to intercept iframe creation events + const addIframeCreationSniffer = () => { + /* global document */ + const createElementHandler = { + // Make toString() native + get(target, key) { + return Reflect.get(target, key) + }, + apply: function (target, thisArg, args) { + const isIframe = + args && args.length && `${args[0]}`.toLowerCase() === 'iframe' + if (!isIframe) { + // Everything as usual + return target.apply(thisArg, args) + } else { + return handleIframeCreation(target, thisArg, args) + } + } + } + // All this just due to iframes with srcdoc bug + utils.replaceWithProxy( + document, + 'createElement', + createElementHandler + ) + } + + // Let's go + addIframeCreationSniffer() +} catch (err) { + // console.warn(err) +} +""" diff --git a/TikTok/TikTokApi/stealth/js/media_codecs.py b/TikTok/TikTokApi/stealth/js/media_codecs.py new file mode 100644 index 0000000000000000000000000000000000000000..46f25e6a851732d68c7ef78e9e4d1d4af1ebc269 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/media_codecs.py @@ -0,0 +1,65 @@ +media_codecs = """ +/** + * Input might look funky, we need to normalize it so e.g. whitespace isn't an issue for our spoofing. + * + * @example + * video/webm; codecs="vp8, vorbis" + * video/mp4; codecs="avc1.42E01E" + * audio/x-m4a; + * audio/ogg; codecs="vorbis" + * @param {String} arg + */ +const parseInput = arg => { + const [mime, codecStr] = arg.trim().split(';') + let codecs = [] + if (codecStr && codecStr.includes('codecs="')) { + codecs = codecStr + .trim() + .replace(`codecs="`, '') + .replace(`"`, '') + .trim() + .split(',') + .filter(x => !!x) + .map(x => x.trim()) + } + return { + mime, + codecStr, + codecs + } +} + +const canPlayType = { + // Intercept certain requests + apply: function (target, ctx, args) { + if (!args || !args.length) { + return target.apply(ctx, args) + } + const {mime, codecs} = parseInput(args[0]) + // This specific mp4 codec is missing in Chromium + if (mime === 'video/mp4') { + if (codecs.includes('avc1.42E01E')) { + return 'probably' + } + } + // This mimetype is only supported if no codecs are specified + if (mime === 'audio/x-m4a' && !codecs.length) { + return 'maybe' + } + + // This mimetype is only supported if no codecs are specified + if (mime === 'audio/aac' && !codecs.length) { + return 'probably' + } + // Everything else as usual + return target.apply(ctx, args) + } +} + +/* global HTMLMediaElement */ +utils.replaceWithProxy( + HTMLMediaElement.prototype, + 'canPlayType', + canPlayType +) +""" diff --git a/TikTok/TikTokApi/stealth/js/navigator_hardwareConcurrency.py b/TikTok/TikTokApi/stealth/js/navigator_hardwareConcurrency.py new file mode 100644 index 0000000000000000000000000000000000000000..1aa2d9ddfe8db246dc2a4f2e7dd00f2d5982c1ba --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/navigator_hardwareConcurrency.py @@ -0,0 +1,10 @@ +navigator_hardwareConcurrency = """ +const patchNavigator = (name, value) => + utils.replaceProperty(Object.getPrototypeOf(navigator), name, { + get() { + return value + } + }) + +patchNavigator('hardwareConcurrency', opts.navigator_hardware_concurrency || 4); +""" diff --git a/TikTok/TikTokApi/stealth/js/navigator_languages.py b/TikTok/TikTokApi/stealth/js/navigator_languages.py new file mode 100644 index 0000000000000000000000000000000000000000..ee91a988a9e8d35c2c3cdbda23271b54efa713dc --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/navigator_languages.py @@ -0,0 +1,6 @@ +navigator_languages = """ +Object.defineProperty(Object.getPrototypeOf(navigator), 'languages', { + get: () => opts.languages || ['en-US', 'en'] +}) + +""" diff --git a/TikTok/TikTokApi/stealth/js/navigator_permissions.py b/TikTok/TikTokApi/stealth/js/navigator_permissions.py new file mode 100644 index 0000000000000000000000000000000000000000..71ae8663e003c56694abf0eb8499f570e2660803 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/navigator_permissions.py @@ -0,0 +1,22 @@ +navigator_permissions = """ +const handler = { + apply: function (target, ctx, args) { + const param = (args || [])[0] + + if (param && param.name && param.name === 'notifications') { + const result = {state: Notification.permission} + Object.setPrototypeOf(result, PermissionStatus.prototype) + return Promise.resolve(result) + } + + return utils.cache.Reflect.apply(...arguments) + } +} + +utils.replaceWithProxy( + window.navigator.permissions.__proto__, // eslint-disable-line no-proto + 'query', + handler +) + +""" diff --git a/TikTok/TikTokApi/stealth/js/navigator_platform.py b/TikTok/TikTokApi/stealth/js/navigator_platform.py new file mode 100644 index 0000000000000000000000000000000000000000..476d35a4afba723f997052a34ace85e64db66795 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/navigator_platform.py @@ -0,0 +1,7 @@ +navigator_platform = """ +if (opts.navigator_platform) { + Object.defineProperty(Object.getPrototypeOf(navigator), 'platform', { + get: () => opts.navigator_plaftorm, + }) +} +""" diff --git a/TikTok/TikTokApi/stealth/js/navigator_plugins.py b/TikTok/TikTokApi/stealth/js/navigator_plugins.py new file mode 100644 index 0000000000000000000000000000000000000000..688a7e7aa0529eb17171277e56897fdb2849b4a4 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/navigator_plugins.py @@ -0,0 +1,94 @@ +navigator_plugins = """ +data = { + "mimeTypes": [ + { + "type": "application/pdf", + "suffixes": "pdf", + "description": "", + "__pluginName": "Chrome PDF Viewer" + }, + { + "type": "application/x-google-chrome-pdf", + "suffixes": "pdf", + "description": "Portable Document Format", + "__pluginName": "Chrome PDF Plugin" + }, + { + "type": "application/x-nacl", + "suffixes": "", + "description": "Native Client Executable", + "__pluginName": "Native Client" + }, + { + "type": "application/x-pnacl", + "suffixes": "", + "description": "Portable Native Client Executable", + "__pluginName": "Native Client" + } + ], + "plugins": [ + { + "name": "Chrome PDF Plugin", + "filename": "internal-pdf-viewer", + "description": "Portable Document Format", + "__mimeTypes": ["application/x-google-chrome-pdf"] + }, + { + "name": "Chrome PDF Viewer", + "filename": "mhjfbmdgcfjbbpaeojofohoefgiehjai", + "description": "", + "__mimeTypes": ["application/pdf"] + }, + { + "name": "Native Client", + "filename": "internal-nacl-plugin", + "description": "", + "__mimeTypes": ["application/x-nacl", "application/x-pnacl"] + } + ] +} + + +// That means we're running headful +const hasPlugins = 'plugins' in navigator && navigator.plugins.length +if (!(hasPlugins)) { + + const mimeTypes = generateMagicArray( + data.mimeTypes, + MimeTypeArray.prototype, + MimeType.prototype, + 'type' + ) + const plugins = generateMagicArray( + data.plugins, + PluginArray.prototype, + Plugin.prototype, + 'name' + ) + + // Plugin and MimeType cross-reference each other, let's do that now + // Note: We're looping through `data.plugins` here, not the generated `plugins` + for (const pluginData of data.plugins) { + pluginData.__mimeTypes.forEach((type, index) => { + plugins[pluginData.name][index] = mimeTypes[type] + plugins[type] = mimeTypes[type] + Object.defineProperty(mimeTypes[type], 'enabledPlugin', { + value: JSON.parse(JSON.stringify(plugins[pluginData.name])), + writable: false, + enumerable: false, // Important: `JSON.stringify(navigator.plugins)` + configurable: false + }) + }) + } + + const patchNavigator = (name, value) => + utils.replaceProperty(Object.getPrototypeOf(navigator), name, { + get() { + return value + } + }) + + patchNavigator('mimeTypes', mimeTypes) + patchNavigator('plugins', plugins) +} +""" diff --git a/TikTok/TikTokApi/stealth/js/navigator_userAgent.py b/TikTok/TikTokApi/stealth/js/navigator_userAgent.py new file mode 100644 index 0000000000000000000000000000000000000000..a89d21962eb18572ea68a07af582a2f14b4430cf --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/navigator_userAgent.py @@ -0,0 +1,8 @@ +navigator_userAgent = """ +// replace Headless references in default useragent +const current_ua = navigator.userAgent +Object.defineProperty(Object.getPrototypeOf(navigator), 'userAgent', { + get: () => opts.navigator_user_agent || current_ua.replace('HeadlessChrome/', 'Chrome/') +}) + +""" diff --git a/TikTok/TikTokApi/stealth/js/navigator_vendor.py b/TikTok/TikTokApi/stealth/js/navigator_vendor.py new file mode 100644 index 0000000000000000000000000000000000000000..aba039f27865763a918babc4ec18470e4da83001 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/navigator_vendor.py @@ -0,0 +1,6 @@ +navigator_vendor = """ +Object.defineProperty(Object.getPrototypeOf(navigator), 'vendor', { + get: () => opts.navigator_vendor || 'Google Inc.', +}) + +""" diff --git a/TikTok/TikTokApi/stealth/js/utils.py b/TikTok/TikTokApi/stealth/js/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..88df9d98d89349f1a5f52d556244a249ec35029a --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/utils.py @@ -0,0 +1,458 @@ +utils = """ +/** + * A set of shared utility functions specifically for the purpose of modifying native browser APIs without leaving traces. + * + * Meant to be passed down in puppeteer and used in the context of the page (everything in here runs in NodeJS as well as a browser). + * + * Note: If for whatever reason you need to use this outside of `puppeteer-extra`: + * Just remove the `module.exports` statement at the very bottom, the rest can be copy pasted into any browser context. + * + * Alternatively take a look at the `extract-stealth-evasions` package to create a finished bundle which includes these utilities. + * + */ +const utils = {} + +/** + * Wraps a JS Proxy Handler and strips it's presence from error stacks, in case the traps throw. + * + * The presence of a JS Proxy can be revealed as it shows up in error stack traces. + * + * @param {object} handler - The JS Proxy handler to wrap + */ +utils.stripProxyFromErrors = (handler = {}) => { + const newHandler = {} + // We wrap each trap in the handler in a try/catch and modify the error stack if they throw + const traps = Object.getOwnPropertyNames(handler) + traps.forEach(trap => { + newHandler[trap] = function() { + try { + // Forward the call to the defined proxy handler + return handler[trap].apply(this, arguments || []) + } catch (err) { + // Stack traces differ per browser, we only support chromium based ones currently + if (!err || !err.stack || !err.stack.includes(`at `)) { + throw err + } + + // When something throws within one of our traps the Proxy will show up in error stacks + // An earlier implementation of this code would simply strip lines with a blacklist, + // but it makes sense to be more surgical here and only remove lines related to our Proxy. + // We try to use a known "anchor" line for that and strip it with everything above it. + // If the anchor line cannot be found for some reason we fall back to our blacklist approach. + + const stripWithBlacklist = stack => { + const blacklist = [ + `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply + `at Object.${trap} `, // e.g. Object.get or Object.apply + `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-) + ] + return ( + err.stack + .split('\n') + // Always remove the first (file) line in the stack (guaranteed to be our proxy) + .filter((line, index) => index !== 1) + // Check if the line starts with one of our blacklisted strings + .filter(line => !blacklist.some(bl => line.trim().startsWith(bl))) + .join('\n') + ) + } + + const stripWithAnchor = stack => { + const stackArr = stack.split('\n') + const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium + const anchorIndex = stackArr.findIndex(line => + line.trim().startsWith(anchor) + ) + if (anchorIndex === -1) { + return false // 404, anchor not found + } + // Strip everything from the top until we reach the anchor line + // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`) + stackArr.splice(1, anchorIndex) + return stackArr.join('\n') + } + + // Try using the anchor method, fallback to blacklist if necessary + err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack) + + throw err // Re-throw our now sanitized error + } + } + }) + return newHandler +} + +/** + * Strip error lines from stack traces until (and including) a known line the stack. + * + * @param {object} err - The error to sanitize + * @param {string} anchor - The string the anchor line starts with + */ +utils.stripErrorWithAnchor = (err, anchor) => { + const stackArr = err.stack.split('\n') + const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor)) + if (anchorIndex === -1) { + return err // 404, anchor not found + } + // Strip everything from the top until we reach the anchor line (remove anchor line as well) + // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`) + stackArr.splice(1, anchorIndex) + err.stack = stackArr.join('\n') + return err +} + +/** + * Replace the property of an object in a stealthy way. + * + * Note: You also want to work on the prototype of an object most often, + * as you'd otherwise leave traces (e.g. showing up in Object.getOwnPropertyNames(obj)). + * + * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty + * + * @example + * replaceProperty(WebGLRenderingContext.prototype, 'getParameter', { value: "alice" }) + * // or + * replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['en-US', 'en'] }) + * + * @param {object} obj - The object which has the property to replace + * @param {string} propName - The property name to replace + * @param {object} descriptorOverrides - e.g. { value: "alice" } + */ +utils.replaceProperty = (obj, propName, descriptorOverrides = {}) => { + return Object.defineProperty(obj, propName, { + // Copy over the existing descriptors (writable, enumerable, configurable, etc) + ...(Object.getOwnPropertyDescriptor(obj, propName) || {}), + // Add our overrides (e.g. value, get()) + ...descriptorOverrides + }) +} + +/** + * Preload a cache of function copies and data. + * + * For a determined enough observer it would be possible to overwrite and sniff usage of functions + * we use in our internal Proxies, to combat that we use a cached copy of those functions. + * + * This is evaluated once per execution context (e.g. window) + */ +utils.preloadCache = () => { + if (utils.cache) { + return + } + utils.cache = { + // Used in our proxies + Reflect: { + get: Reflect.get.bind(Reflect), + apply: Reflect.apply.bind(Reflect) + }, + // Used in `makeNativeString` + nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }` + } +} + +/** + * Utility function to generate a cross-browser `toString` result representing native code. + * + * There's small differences: Chromium uses a single line, whereas FF & Webkit uses multiline strings. + * To future-proof this we use an existing native toString result as the basis. + * + * The only advantage we have over the other team is that our JS runs first, hence we cache the result + * of the native toString result once, so they cannot spoof it afterwards and reveal that we're using it. + * + * Note: Whenever we add a `Function.prototype.toString` proxy we should preload the cache before, + * by executing `utils.preloadCache()` before the proxy is applied (so we don't cause recursive lookups). + * + * @example + * makeNativeString('foobar') // => `function foobar() { [native code] }` + * + * @param {string} [name] - Optional function name + */ +utils.makeNativeString = (name = '') => { + // Cache (per-window) the original native toString or use that if available + utils.preloadCache() + return utils.cache.nativeToStringStr.replace('toString', name || '') +} + +/** + * Helper function to modify the `toString()` result of the provided object. + * + * Note: Use `utils.redirectToString` instead when possible. + * + * There's a quirk in JS Proxies that will cause the `toString()` result to differ from the vanilla Object. + * If no string is provided we will generate a `[native code]` thing based on the name of the property object. + * + * @example + * patchToString(WebGLRenderingContext.prototype.getParameter, 'function getParameter() { [native code] }') + * + * @param {object} obj - The object for which to modify the `toString()` representation + * @param {string} str - Optional string used as a return value + */ +utils.patchToString = (obj, str = '') => { + utils.preloadCache() + + const toStringProxy = new Proxy(Function.prototype.toString, { + apply: function(target, ctx) { + // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""` + if (ctx === Function.prototype.toString) { + return utils.makeNativeString('toString') + } + // `toString` targeted at our proxied Object detected + if (ctx === obj) { + // We either return the optional string verbatim or derive the most desired result automatically + return str || utils.makeNativeString(obj.name) + } + // Check if the toString protype of the context is the same as the global prototype, + // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case + const hasSameProto = Object.getPrototypeOf( + Function.prototype.toString + ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins + if (!hasSameProto) { + // Pass the call on to the local Function.prototype.toString instead + return ctx.toString() + } + return target.call(ctx) + } + }) + utils.replaceProperty(Function.prototype, 'toString', { + value: toStringProxy + }) +} + +/** + * Make all nested functions of an object native. + * + * @param {object} obj + */ +utils.patchToStringNested = (obj = {}) => { + return utils.execRecursively(obj, ['function'], utils.patchToString) +} + +/** + * Redirect toString requests from one object to another. + * + * @param {object} proxyObj - The object that toString will be called on + * @param {object} originalObj - The object which toString result we wan to return + */ +utils.redirectToString = (proxyObj, originalObj) => { + utils.preloadCache() + + const toStringProxy = new Proxy(Function.prototype.toString, { + apply: function(target, ctx) { + // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""` + if (ctx === Function.prototype.toString) { + return utils.makeNativeString('toString') + } + + // `toString` targeted at our proxied Object detected + if (ctx === proxyObj) { + const fallback = () => + originalObj && originalObj.name + ? utils.makeNativeString(originalObj.name) + : utils.makeNativeString(proxyObj.name) + + // Return the toString representation of our original object if possible + return originalObj + '' || fallback() + } + + // Check if the toString protype of the context is the same as the global prototype, + // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case + const hasSameProto = Object.getPrototypeOf( + Function.prototype.toString + ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins + if (!hasSameProto) { + // Pass the call on to the local Function.prototype.toString instead + return ctx.toString() + } + + return target.call(ctx) + } + }) + utils.replaceProperty(Function.prototype, 'toString', { + value: toStringProxy + }) +} + +/** + * All-in-one method to replace a property with a JS Proxy using the provided Proxy handler with traps. + * + * Will stealthify these aspects (strip error stack traces, redirect toString, etc). + * Note: This is meant to modify native Browser APIs and works best with prototype objects. + * + * @example + * replaceWithProxy(WebGLRenderingContext.prototype, 'getParameter', proxyHandler) + * + * @param {object} obj - The object which has the property to replace + * @param {string} propName - The name of the property to replace + * @param {object} handler - The JS Proxy handler to use + */ +utils.replaceWithProxy = (obj, propName, handler) => { + utils.preloadCache() + const originalObj = obj[propName] + const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler)) + + utils.replaceProperty(obj, propName, { value: proxyObj }) + utils.redirectToString(proxyObj, originalObj) + + return true +} + +/** + * All-in-one method to mock a non-existing property with a JS Proxy using the provided Proxy handler with traps. + * + * Will stealthify these aspects (strip error stack traces, redirect toString, etc). + * + * @example + * mockWithProxy(chrome.runtime, 'sendMessage', function sendMessage() {}, proxyHandler) + * + * @param {object} obj - The object which has the property to replace + * @param {string} propName - The name of the property to replace or create + * @param {object} pseudoTarget - The JS Proxy target to use as a basis + * @param {object} handler - The JS Proxy handler to use + */ +utils.mockWithProxy = (obj, propName, pseudoTarget, handler) => { + utils.preloadCache() + const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler)) + + utils.replaceProperty(obj, propName, { value: proxyObj }) + utils.patchToString(proxyObj) + + return true +} + +/** + * All-in-one method to create a new JS Proxy with stealth tweaks. + * + * This is meant to be used whenever we need a JS Proxy but don't want to replace or mock an existing known property. + * + * Will stealthify certain aspects of the Proxy (strip error stack traces, redirect toString, etc). + * + * @example + * createProxy(navigator.mimeTypes.__proto__.namedItem, proxyHandler) // => Proxy + * + * @param {object} pseudoTarget - The JS Proxy target to use as a basis + * @param {object} handler - The JS Proxy handler to use + */ +utils.createProxy = (pseudoTarget, handler) => { + utils.preloadCache() + const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler)) + utils.patchToString(proxyObj) + + return proxyObj +} + +/** + * Helper function to split a full path to an Object into the first part and property. + * + * @example + * splitObjPath(`HTMLMediaElement.prototype.canPlayType`) + * // => {objName: "HTMLMediaElement.prototype", propName: "canPlayType"} + * + * @param {string} objPath - The full path to an object as dot notation string + */ +utils.splitObjPath = objPath => ({ + // Remove last dot entry (property) ==> `HTMLMediaElement.prototype` + objName: objPath + .split('.') + .slice(0, -1) + .join('.'), + // Extract last dot entry ==> `canPlayType` + propName: objPath.split('.').slice(-1)[0] +}) + +/** + * Convenience method to replace a property with a JS Proxy using the provided objPath. + * + * Supports a full path (dot notation) to the object as string here, in case that makes it easier. + * + * @example + * replaceObjPathWithProxy('WebGLRenderingContext.prototype.getParameter', proxyHandler) + * + * @param {string} objPath - The full path to an object (dot notation string) to replace + * @param {object} handler - The JS Proxy handler to use + */ +utils.replaceObjPathWithProxy = (objPath, handler) => { + const { objName, propName } = utils.splitObjPath(objPath) + const obj = eval(objName) // eslint-disable-line no-eval + return utils.replaceWithProxy(obj, propName, handler) +} + +/** + * Traverse nested properties of an object recursively and apply the given function on a whitelist of value types. + * + * @param {object} obj + * @param {array} typeFilter - e.g. `['function']` + * @param {Function} fn - e.g. `utils.patchToString` + */ +utils.execRecursively = (obj = {}, typeFilter = [], fn) => { + function recurse(obj) { + for (const key in obj) { + if (obj[key] === undefined) { + continue + } + if (obj[key] && typeof obj[key] === 'object') { + recurse(obj[key]) + } else { + if (obj[key] && typeFilter.includes(typeof obj[key])) { + fn.call(this, obj[key]) + } + } + } + } + recurse(obj) + return obj +} + +/** + * Everything we run through e.g. `page.evaluate` runs in the browser context, not the NodeJS one. + * That means we cannot just use reference variables and functions from outside code, we need to pass everything as a parameter. + * + * Unfortunately the data we can pass is only allowed to be of primitive types, regular functions don't survive the built-in serialization process. + * This utility function will take an object with functions and stringify them, so we can pass them down unharmed as strings. + * + * We use this to pass down our utility functions as well as any other functions (to be able to split up code better). + * + * @see utils.materializeFns + * + * @param {object} fnObj - An object containing functions as properties + */ +utils.stringifyFns = (fnObj = { hello: () => 'world' }) => { + // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine + // https://github.com/feross/fromentries + function fromEntries(iterable) { + return [...iterable].reduce((obj, [key, val]) => { + obj[key] = val + return obj + }, {}) + } + return (Object.fromEntries || fromEntries)( + Object.entries(fnObj) + .filter(([key, value]) => typeof value === 'function') + .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval + ) +} + +/** + * Utility function to reverse the process of `utils.stringifyFns`. + * Will materialize an object with stringified functions (supports classic and fat arrow functions). + * + * @param {object} fnStrObj - An object containing stringified functions as properties + */ +utils.materializeFns = (fnStrObj = { hello: "() => 'world'" }) => { + return Object.fromEntries( + Object.entries(fnStrObj).map(([key, value]) => { + if (value.startsWith('function')) { + // some trickery is needed to make oldschool functions work :-) + return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval + } else { + // arrow functions just work + return [key, eval(value)] // eslint-disable-line no-eval + } + }) + ) +} + +// -- +// Stuff starting below this line is NodeJS specific. +// -- +// module.exports = utils +""" diff --git a/TikTok/TikTokApi/stealth/js/webgl_vendor.py b/TikTok/TikTokApi/stealth/js/webgl_vendor.py new file mode 100644 index 0000000000000000000000000000000000000000..4003ad8929e539aea107747731b146b65fc2d735 --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/webgl_vendor.py @@ -0,0 +1,27 @@ +webgl_vendor = """ +console.log(opts) +const getParameterProxyHandler = { + apply: function (target, ctx, args) { + const param = (args || [])[0] + // UNMASKED_VENDOR_WEBGL + if (param === 37445) { + return opts.webgl_vendor || 'Intel Inc.' // default in headless: Google Inc. + } + // UNMASKED_RENDERER_WEBGL + if (param === 37446) { + return opts.webgl_renderer || 'Intel Iris OpenGL Engine' // default in headless: Google SwiftShader + } + return utils.cache.Reflect.apply(target, ctx, args) + } +} + +// There's more than one WebGL rendering context +// https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility +// To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter) +const addProxy = (obj, propName) => { + utils.replaceWithProxy(obj, propName, getParameterProxyHandler) +} +// For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing: +addProxy(WebGLRenderingContext.prototype, 'getParameter') +addProxy(WebGL2RenderingContext.prototype, 'getParameter') +""" diff --git a/TikTok/TikTokApi/stealth/js/window_outerdimensions.py b/TikTok/TikTokApi/stealth/js/window_outerdimensions.py new file mode 100644 index 0000000000000000000000000000000000000000..c157706595765fc2922496128d4841e9aef569fa --- /dev/null +++ b/TikTok/TikTokApi/stealth/js/window_outerdimensions.py @@ -0,0 +1,15 @@ +window_outerdimensions = """ +'use strict' + +try { + if (!!window.outerWidth && !!window.outerHeight) { + const windowFrame = 85 // probably OS and WM dependent + window.outerWidth = window.innerWidth + console.log(`current window outer height ${window.outerHeight}`) + window.outerHeight = window.innerHeight + windowFrame + console.log(`new window outer height ${window.outerHeight}`) + } +} catch (err) { +} + +""" diff --git a/TikTok/TikTokApi/stealth/stealth.py b/TikTok/TikTokApi/stealth/stealth.py new file mode 100644 index 0000000000000000000000000000000000000000..7b0986cb26d45d266cd071692cb9dd3b4e2b7e5c --- /dev/null +++ b/TikTok/TikTokApi/stealth/stealth.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +import json +from dataclasses import dataclass +from typing import Tuple, Optional, Dict + +from playwright.async_api import Page as AsyncPage + +from .js.chrome_app import chrome_app +from .js.chrome_csi import chrome_csi +from .js.chrome_hairline import chrome_hairline +from .js.chrome_load_times import chrome_load_times +from .js.chrome_runtime import chrome_runtime +from .js.generate_magic_arrays import generate_magic_arrays +from .js.iframe_contentWindow import iframe_contentWindow +from .js.media_codecs import media_codecs +from .js.navigator_hardwareConcurrency import navigator_hardwareConcurrency +from .js.navigator_languages import navigator_languages +from .js.navigator_permissions import navigator_permissions +from .js.navigator_platform import navigator_platform +from .js.navigator_plugins import navigator_plugins +from .js.navigator_userAgent import navigator_userAgent +from .js.navigator_vendor import navigator_vendor +from .js.webgl_vendor import webgl_vendor +from .js.window_outerdimensions import window_outerdimensions +from .js.utils import utils + +SCRIPTS: Dict[str, str] = { + "chrome_csi": chrome_csi, + "chrome_app": chrome_app, + "chrome_runtime": chrome_runtime, + "chrome_load_times": chrome_load_times, + "chrome_hairline": chrome_hairline, + "generate_magic_arrays": generate_magic_arrays, + "iframe_content_window": iframe_contentWindow, + "media_codecs": media_codecs, + "navigator_vendor": navigator_vendor, + "navigator_plugins": navigator_plugins, + "navigator_permissions": navigator_permissions, + "navigator_languages": navigator_languages, + "navigator_platform": navigator_platform, + "navigator_user_agent": navigator_userAgent, + "navigator_hardware_concurrency": navigator_hardwareConcurrency, + "outerdimensions": window_outerdimensions, + "utils": utils, + "webdriver": "delete Object.getPrototypeOf(navigator).webdriver", + "webgl_vendor": webgl_vendor, +} + + +@dataclass +class StealthConfig: + """ + Playwright stealth configuration that applies stealth strategies to playwright page objects. + The stealth strategies are contained in ./js package and are basic javascript scripts that are executed + on every page.goto() called. + Note: + All init scripts are combined by playwright into one script and then executed this means + the scripts should not have conflicting constants/variables etc. ! + This also means scripts can be extended by overriding enabled_scripts generator: + ``` + @property + def enabled_scripts(): + yield 'console.log("first script")' + yield from super().enabled_scripts() + yield 'console.log("last script")' + ``` + """ + + # load script options + webdriver: bool = True + webgl_vendor: bool = True + chrome_app: bool = True + chrome_csi: bool = True + chrome_load_times: bool = True + chrome_runtime: bool = True + iframe_content_window: bool = True + media_codecs: bool = True + navigator_hardware_concurrency: int = 4 + navigator_languages: bool = True + navigator_permissions: bool = True + navigator_platform: bool = True + navigator_plugins: bool = True + navigator_user_agent: bool = True + navigator_vendor: bool = True + outerdimensions: bool = True + hairline: bool = True + + # options + vendor: str = "Intel Inc." + renderer: str = "Intel Iris OpenGL Engine" + nav_vendor: str = "Google Inc." + nav_user_agent: str = None + nav_platform: str = None + languages: Tuple[str] = ("en-US", "en") + runOnInsecureOrigins: Optional[bool] = None + + @property + def enabled_scripts(self): + opts = json.dumps( + { + "webgl_vendor": self.vendor, + "webgl_renderer": self.renderer, + "navigator_vendor": self.nav_vendor, + "navigator_platform": self.nav_platform, + "navigator_user_agent": self.nav_user_agent, + "languages": list(self.languages), + "runOnInsecureOrigins": self.runOnInsecureOrigins, + } + ) + # defined options constant + yield f"const opts = {opts}" + # init utils and generate_magic_arrays helper + yield SCRIPTS["utils"] + yield SCRIPTS["generate_magic_arrays"] + + if self.chrome_app: + yield SCRIPTS["chrome_app"] + if self.chrome_csi: + yield SCRIPTS["chrome_csi"] + if self.hairline: + yield SCRIPTS["chrome_hairline"] + if self.chrome_load_times: + yield SCRIPTS["chrome_load_times"] + if self.chrome_runtime: + yield SCRIPTS["chrome_runtime"] + if self.iframe_content_window: + yield SCRIPTS["iframe_content_window"] + if self.media_codecs: + yield SCRIPTS["media_codecs"] + if self.navigator_languages: + yield SCRIPTS["navigator_languages"] + if self.navigator_permissions: + yield SCRIPTS["navigator_permissions"] + if self.navigator_platform: + yield SCRIPTS["navigator_platform"] + if self.navigator_plugins: + yield SCRIPTS["navigator_plugins"] + if self.navigator_user_agent: + yield SCRIPTS["navigator_user_agent"] + if self.navigator_vendor: + yield SCRIPTS["navigator_vendor"] + if self.webdriver: + yield SCRIPTS["webdriver"] + if self.outerdimensions: + yield SCRIPTS["outerdimensions"] + if self.webgl_vendor: + yield SCRIPTS["webgl_vendor"] + + +async def stealth_async(page: AsyncPage, config: StealthConfig = None): + """stealth the page""" + for script in (config or StealthConfig()).enabled_scripts: + await page.add_init_script(script) diff --git a/TikTok/TikTokApi/tiktok.py b/TikTok/TikTokApi/tiktok.py new file mode 100644 index 0000000000000000000000000000000000000000..a8be55a5d4bec6489672cd32f377dc8b7875569e --- /dev/null +++ b/TikTok/TikTokApi/tiktok.py @@ -0,0 +1,519 @@ +import asyncio +import logging +import dataclasses +from typing import Any +import random +import time +import json +import os + +from playwright.async_api import async_playwright +from urllib.parse import urlencode, quote, urlparse +from .stealth import stealth_async +from .helpers import random_choice + +from .api.user import User +from .api.video import Video +from .api.sound import Sound +from .api.hashtag import Hashtag +from .api.comment import Comment +from .api.trending import Trending +from .api.search import Search + +from pystealth import PyStealth + +from .exceptions import ( + InvalidJSONException, + EmptyResponseException, +) + + +@dataclasses.dataclass +class TikTokPlaywrightSession: + """A TikTok session using Playwright""" + + context: Any + page: Any + proxy: str = None + params: dict = None + headers: dict = None + ms_token: str = None + base_url: str = "https://www.tiktok.com" + + +class TikTokApi: + """The main TikTokApi class that contains all the endpoints. + + Import With: + .. code-block:: python + + from TikTokApi import TikTokApi + api = TikTokApi() + """ + + user = User + video = Video + sound = Sound + hashtag = Hashtag + comment = Comment + trending = Trending + search = Search + + def __init__(self, logging_level: int = logging.WARN, logger_name: str = None): + """ + Create a TikTokApi object. + + Args: + logging_level (int): The logging level you want to use. + logger_name (str): The name of the logger you want to use. + """ + self.sessions = [] + + if logger_name is None: + logger_name = __name__ + self.__create_logger(logger_name, logging_level) + + User.parent = self + Video.parent = self + Sound.parent = self + Hashtag.parent = self + Comment.parent = self + Trending.parent = self + Search.parent = self + + def __create_logger(self, name: str, level: int = logging.DEBUG): + """Create a logger for the class.""" + self.logger: logging.Logger = logging.getLogger(name) + self.logger.setLevel(level) + handler = logging.StreamHandler() + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + handler.setFormatter(formatter) + self.logger.addHandler(handler) + + async def __set_session_params(self, session: TikTokPlaywrightSession): + """Set the session params for a TikTokPlaywrightSession""" + user_agent = await session.page.evaluate("() => navigator.userAgent") + language = await session.page.evaluate( + "() => navigator.language || navigator.userLanguage" + ) + platform = await session.page.evaluate("() => navigator.platform") + device_id = str(random.randint(10**18, 10**19 - 1)) # Random device id + history_len = str(random.randint(1, 10)) # Random history length + screen_height = str(random.randint(600, 1080)) # Random screen height + screen_width = str(random.randint(800, 1920)) # Random screen width + timezone = await session.page.evaluate( + "() => Intl.DateTimeFormat().resolvedOptions().timeZone" + ) + + session_params = { + "aid": "1988", + "app_language": language, + "app_name": "tiktok_web", + "browser_language": language, + "browser_name": "Mozilla", + "browser_online": "true", + "browser_platform": platform, + "browser_version": user_agent, + "channel": "tiktok_web", + "cookie_enabled": "true", + "device_id": device_id, + "device_platform": "web_pc", + "focus_state": "true", + "from_page": "user", + "history_len": history_len, + "is_fullscreen": "false", + "is_page_visible": "true", + "language": language, + "os": platform, + "priority_region": "", + "referer": "", + "region": "US", # TODO: TikTokAPI option + "screen_height": screen_height, + "screen_width": screen_width, + "tz_name": timezone, + "webcast_language": language, + } + session.params = session_params + + async def __create_session( + self, + url: str = "https://www.tiktok.com", + ms_token: str = None, + proxy: str = None, + context_options: dict = {}, + sleep_after: int = 1, + cookies: dict = None, + suppress_resource_load_types: list[str] = None, + ): + print("Creating session...") + """Create a TikTokPlaywrightSession""" + if ms_token is not None: + if cookies is None: + cookies = {} + cookies["msToken"] = ms_token + print("ms_token", ms_token) + + context = await self.browser.new_context(proxy=proxy, **context_options) + if cookies is not None: + formatted_cookies = [ + {"name": k, "value": v, "domain": urlparse(url).netloc, "path": "/"} + for k, v in cookies.items() + if v is not None + ] + await context.add_cookies(formatted_cookies) + page = await context.new_page() + await PyStealth.setup_playwright(page) + + # Get the request headers to the url + request_headers = None + + def handle_request(request): + nonlocal request_headers + request_headers = request.headers + + page.once("request", handle_request) + + if suppress_resource_load_types is not None: + await page.route( + "**/*", + lambda route, request: route.abort() + if request.resource_type in suppress_resource_load_types + else route.continue_(), + ) + + await page.goto(url) + await asyncio.sleep(10) + await page.mouse.move(0,0) + await page.mouse.move(0,100) + + session = TikTokPlaywrightSession( + context, + page, + ms_token=ms_token, + proxy=proxy, + headers=request_headers, + base_url=url, + ) + if ms_token is None: + time.sleep(sleep_after) # TODO: Find a better way to wait for msToken + + cookies = await self.get_session_cookies(session) + with open("cookies.json", "w") as f: + json.dump(cookies, f) + ms_token = cookies.get("msToken") + session.ms_token = ms_token + if ms_token is None: + self.logger.info( + f"Failed to get msToken on session index {len(self.sessions)}, you should consider specifying ms_tokens" + ) + self.sessions.append(session) + await self.__set_session_params(session) + + async def create_sessions( + self, + num_sessions=5, + headless=True, + ms_tokens: list[str] = None, + proxies: list = None, + sleep_after=1, + starting_url="https://www.tiktok.com", + context_options: dict = {}, + override_browser_args: list[dict] = None, + cookies: list[dict] = None, + suppress_resource_load_types: list[str] = None, + browser: str = "chromium", + executable_path: str = None + ): + """ + Create sessions for use within the TikTokApi class. + + These sessions are what will carry out requesting your data from TikTok. + + Args: + num_sessions (int): The amount of sessions you want to create. + headless (bool): Whether or not you want the browser to be headless. + ms_tokens (list[str]): A list of msTokens to use for the sessions, you can get these from your cookies after visiting TikTok. + If you don't provide any, the sessions will try to get them themselves, but this is not guaranteed to work. + proxies (list): A list of proxies to use for the sessions + sleep_after (int): The amount of time to sleep after creating a session, this is to allow the msToken to be generated. + starting_url (str): The url to start the sessions on, this is usually https://www.tiktok.com. + context_options (dict): Options to pass to the playwright context. + override_browser_args (list[dict]): A list of dictionaries containing arguments to pass to the browser. + cookies (list[dict]): A list of cookies to use for the sessions, you can get these from your cookies after visiting TikTok. + suppress_resource_load_types (list[str]): Types of resources to suppress playwright from loading, excluding more types will make playwright faster.. Types: document, stylesheet, image, media, font, script, textrack, xhr, fetch, eventsource, websocket, manifest, other. + browser (str): specify either firefox or chromium, default is chromium + executable_path (str): Path to the browser executable + + Example Usage: + .. code-block:: python + + from TikTokApi import TikTokApi + with TikTokApi() as api: + await api.create_sessions(num_sessions=5, ms_tokens=['msToken1', 'msToken2']) + """ + self.playwright = await async_playwright().start() + if browser == "chromium": + if headless and override_browser_args is None: + override_browser_args = ["--headless=new"] + headless = False # managed by the arg + + # self.browser = await self.playwright.chromium.launch_persistent_context( + # user_data_dir= + # headless=headless, args=override_browser_args, proxy=random_choice(proxies), executable_path=executable_path + # ) + self.browser = await self.playwright.chromium.launch( + + headless=headless, args=override_browser_args, proxy=random_choice(proxies), executable_path=executable_path + ) + elif browser == "firefox": + # self.browser = await self.playwright.firefox.launch_persistent_context( + # user_data_dir= f"C:\\Users\\{os.getlogin()}\\AppData\\Local\\Mozilla\\Firefox\\Profiles\\0haktc5t.default-release", + # headless=headless, args=override_browser_args, proxy=random_choice(proxies), executable_path=executable_path + # ) + self.browser = await self.playwright.firefox.launch( + headless=headless, args=override_browser_args, proxy=random_choice(proxies), executable_path=executable_path + ) + elif browser == "webkit": + self.browser = await self.playwright.webkit.launch( + headless=headless, args=override_browser_args, proxy=random_choice(proxies), executable_path=executable_path + ) + else: + raise ValueError("Invalid browser argument passed") + + await asyncio.gather( + *( + self.__create_session( + proxy=random_choice(proxies), + ms_token=random_choice(ms_tokens), + url=starting_url, + context_options=context_options, + sleep_after=sleep_after, + cookies=random_choice(cookies), + suppress_resource_load_types=suppress_resource_load_types, + ) + for _ in range(num_sessions) + ) + ) + self.num_sessions = len(self.sessions) + + async def close_sessions(self): + """ + Close all the sessions. Should be called when you're done with the TikTokApi object + + This is called automatically when using the TikTokApi with "with" + """ + for session in self.sessions: + await session.page.close() + await session.context.close() + self.sessions.clear() + + await self.browser.close() + await self.playwright.stop() + + def generate_js_fetch(self, method: str, url: str, headers: dict) -> str: + """Generate a javascript fetch function for use in playwright""" + headers_js = json.dumps(headers) + return f""" + () => {{ + return new Promise((resolve, reject) => {{ + fetch('{url}', {{ method: '{method}', headers: {headers_js} }}) + .then(response => response.text()) + .then(data => resolve(data)) + .catch(error => reject(error.message)); + }}); + }} + """ + + def _get_session(self, **kwargs): + """Get a random session + + Args: + session_index (int): The index of the session you want to use, if not provided a random session will be used. + + Returns: + int: The index of the session. + TikTokPlaywrightSession: The session. + """ + if kwargs.get("session_index") is not None: + i = kwargs["session_index"] + else: + i = random.randint(0, self.num_sessions - 1) + return i, self.sessions[i] + + async def set_session_cookies(self, session, cookies): + """ + Set the cookies for a session + + Args: + session (TikTokPlaywrightSession): The session to set the cookies for. + cookies (dict): The cookies to set for the session. + """ + await session.context.add_cookies(cookies) + + async def get_session_cookies(self, session): + """ + Get the cookies for a session + + Args: + session (TikTokPlaywrightSession): The session to get the cookies for. + + Returns: + dict: The cookies for the session. + """ + cookies = await session.context.cookies() + return {cookie["name"]: cookie["value"] for cookie in cookies} + + async def run_fetch_script(self, url: str, headers: dict, **kwargs): + """ + Execute a javascript fetch function in a session + + Args: + url (str): The url to fetch. + headers (dict): The headers to use for the fetch. + + Returns: + any: The result of the fetch. Seems to be a string or dict + """ + js_script = self.generate_js_fetch("GET", url, headers) + _, session = self._get_session(**kwargs) + result = await session.page.evaluate(js_script) + return result + + async def generate_x_bogus(self, url: str, **kwargs): + """Generate the X-Bogus header for a url""" + _, session = self._get_session(**kwargs) + await session.page.wait_for_function("window.byted_acrawler !== undefined") + result = await session.page.evaluate( + f'() => {{ return window.byted_acrawler.frontierSign("{url}") }}' + ) + return result + + async def sign_url(self, url: str, **kwargs): + """Sign a url""" + i, session = self._get_session(**kwargs) + + # TODO: Would be nice to generate msToken here + + # Add X-Bogus to url + x_bogus = (await self.generate_x_bogus(url, session_index=i)).get("X-Bogus") + if x_bogus is None: + raise Exception("Failed to generate X-Bogus") + + if "?" in url: + url += "&" + else: + url += "?" + url += f"X-Bogus={x_bogus}" + + return url + + async def make_request( + self, + url: str, + headers: dict = None, + params: dict = None, + retries: int = 3, + exponential_backoff: bool = True, + **kwargs, + ): + """ + Makes a request to TikTok through a session. + + Args: + url (str): The url to make the request to. + headers (dict): The headers to use for the request. + params (dict): The params to use for the request. + retries (int): The amount of times to retry the request if it fails. + exponential_backoff (bool): Whether or not to use exponential backoff when retrying the request. + session_index (int): The index of the session you want to use, if not provided a random session will be used. + + Returns: + dict: The json response from TikTok. + + Raises: + Exception: If the request fails. + """ + i, session = self._get_session(**kwargs) + if session.params is not None: + params = {**session.params, **params} + + if headers is not None: + headers = {**session.headers, **headers} + else: + headers = session.headers + + # get msToken + if params.get("msToken") is None: + # try to get msToken from session + if session.ms_token is not None: + params["msToken"] = session.ms_token + else: + # we'll try to read it from cookies + cookies = await self.get_session_cookies(session) + ms_token = cookies.get("msToken") + if ms_token is None: + self.logger.warn( + "Failed to get msToken from cookies, trying to make the request anyway (probably will fail)" + ) + params["msToken"] = ms_token + + encoded_params = f"{url}?{urlencode(params, safe='=', quote_via=quote)}" + signed_url = await self.sign_url(encoded_params, session_index=i) + + retry_count = 0 + while retry_count < retries: + retry_count += 1 + result = await self.run_fetch_script( + signed_url, headers=headers, session_index=i + ) + + if result is None: + raise Exception("TikTokApi.run_fetch_script returned None") + + if result == "": + raise EmptyResponseException(result, "TikTok returned an empty response") + + try: + data = json.loads(result) + if data.get("status_code") != 0: + self.logger.error(f"Got an unexpected status code: {data}") + return data + except json.decoder.JSONDecodeError: + if retry_count == retries: + self.logger.error(f"Failed to decode json response: {result}") + raise InvalidJSONException() + + self.logger.info( + f"Failed a request, retrying ({retry_count}/{retries})" + ) + if exponential_backoff: + await asyncio.sleep(2**retry_count) + else: + await asyncio.sleep(1) + + async def close_sessions(self): + """Close all the sessions. Should be called when you're done with the TikTokApi object""" + for session in self.sessions: + await session.page.close() + await session.context.close() + self.sessions.clear() + + async def stop_playwright(self): + """Stop the playwright browser""" + await self.browser.close() + await self.playwright.stop() + + async def get_session_content(self, url: str, **kwargs): + """Get the content of a url""" + _, session = self._get_session(**kwargs) + + return await session.page.content() + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.close_sessions() + await self.stop_playwright() diff --git a/gradioa.py b/gradioa.py new file mode 100644 index 0000000000000000000000000000000000000000..0f70ee4582aa66894a1143386a633710fff3e1ba --- /dev/null +++ b/gradioa.py @@ -0,0 +1,12 @@ + +import gradio as gr +def greet(name, intensity): + return "Hello, " + name + "!" * int(intensity) + +demo = gr.Interface( + fn=greet, + inputs=["text", "slider"], + outputs=["text"], +) + +demo.launch(share=True) \ No newline at end of file diff --git a/hello b/hello new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..b4295a05e29bd02379cd669c3330dc7ea6a1bd91 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +asyncio +requests>=2.31.0,<3.0 +browser_cookie3 +playwright==1.37.0 +pytelegrambotapi +websockets +httpx>=0.27.0,<1.0 +pystealth +tqdm \ No newline at end of file