Spaces:

zzmez
/

tools

Paused

App Files Files Community

tools / utils /gradio_utils.py

zzmez

fix: Replace 'Aol' with 'Yahoo' in compute_offer function

ea9f5e6 8 months ago

raw

history blame

13.8 kB

	import random
	import ipaddress
	import pandas as pd
	import re

	### SWAKS ###

	def get_subclasses(ip_classes: str):
	ip_classes = ip_classes.split('\n')
	result = []
	for ip_class in ip_classes:
	try:
	network = ipaddress.IPv4Network(ip_class, strict=False)
	for ip in network.subnets(new_prefix=24):
	result.append(str(ip))
	except ValueError:
	result.append(f"Invalid IP class: {ip_class}")
	return result


	def update_header(max_ips):
	return f"found=0; max_ips={int(max_ips)};\n"


	def str_to_list(ips_string) -> list:
	"""
	Converts a string to a list.
	"""
	return ips_string.split('\n')


	def generate_cmds_from_bulk(big_subclasses_raw: str,
	domain_raw: str,
	max_ips: int = 2,
	sample_size: int = 5):
	"""
	big_subclasses_raw: The list of classes
	domain_raw: The list of domains
	max_ips: The maximum number of IPs to pick
	sample_size: The number of sample size to generate IPs from /24 class
	"""
	# Gradio fixes
	# big_subclasses_raw = big_subclasses_raw.split('\n')
	domain_raw = domain_raw.split('\n')
	sample_size = int(sample_size)
	max_ips = int(max_ips)

	commands = []
	header = f"found=0; max_ips={max_ips};"

	for ip_subclass_24 in get_subclasses(big_subclasses_raw):
	ip_addresses_1_20, ip_addresses_201_255 = generate_ip_addresses(ip_subclass_24)
	commands.append(header)

	for ip in random.sample(ip_addresses_1_20, sample_size):
	commands.append(generate_randomized_output_max_ips(ip, domain_raw))

	commands.append(header)
	for ip in random.sample(ip_addresses_201_255, sample_size):
	commands.append(generate_randomized_output_max_ips(ip, domain_raw))
	return '\n'.join(commands)


	def generate_ips_per_subclass(ip_subclasses: str, num_of_ips: int) -> str:
	"""
	Generates a list of IP addresses for a given list of IP subclasses and the number of IPs.

	:param ip_subclasses: List of non /24 IP subclasses in CIDR notation.
	:param num_of_ips: Number of IP addresses to generate per IP subclass.
	:return: List of generated IP addresses.
	"""
	ip_addresses = []

	for ip_subclass_24 in get_subclasses(ip_subclasses):
	ip_addresses.extend(generate_ips_per_slash24(ip_subclass_24, num_of_ips))
	return ip_addresses


	def generate_ips_per_slash24(ip_class: str, num_ips: int) -> list:
	"""
	Generates a list of IP addresses within a specified class C network (/24).

	:param ip_class: The IP class in CIDR notation, e.g., "192.168.1.1/24".
	:param num_ips: The number of IP addresses to generate.
	:return: A list of generated IP addresses.
	"""
	ip_addresses = []

	# The IP address ranges for two disjoint domains.
	domain1 = range(1, 20)
	domain2 = range(201, 254)
	num_ips = int(num_ips)

	# The base of the IP address (e.g., "192.168.1." for "192.168.1.1/24").
	base_ip = ip_class.split('/')[0].rsplit('.', 1)[0] + '.'

	for i in range(1, num_ips + 1):
	# If the index is in the first half of the requested IPs,
	# generate an IP from the first domain.
	if i <= num_ips / 2:
	ip_addresses.append(base_ip +random.choice(domain1).__str__())
	# If the index is in the second half of the requested IPs,
	# generate an IP from the second domain.
	else:
	ip_addresses.append(base_ip +random.choice(domain2).__str__())

	return ip_addresses


	def generate_ip_addresses(slash_24: str) -> list:
	ip_addresses_1_20 = []
	ip_addresses_201_255 = []

	# The IP address ranges for two disjoint domains.
	domain_1_20 = range(1, 20)
	domain_201_255 = range(201, 255)
	# domain = chain(domain_1_20, domain_201_255)

	# The base of the IP address (e.g., "192.168.1." for "192.168.1.1/24").
	base_ip = slash_24.split('/')[0].rsplit('.', 1)[0] + '.'
	for ip in domain_1_20:
	ip_addresses_1_20.append(base_ip + ip.__str__())

	for ip in domain_201_255:
	ip_addresses_201_255.append(base_ip + ip.__str__())

	return [ip_addresses_1_20, ip_addresses_201_255]


	def generate_random_string(min_length=2, max_length=7):
	letters = "abcdefghijklmnopqrstuvwxyz"
	length = random.randint(min_length, max_length)
	return ''.join(random.choice(letters) for _ in range(length))


	def generate_randomized_output_max_ips(ip_address: str, domain_list: list, sleep_sec: int = 5) -> str:
	part1 = generate_random_string()
	part2 = generate_random_string()
	part3 = generate_random_string()
	part4 = generate_random_string()
	# domain_list = domain_list.split('\n')
	domain = random.choice(domain_list)
	ip_class = ip_address.rsplit('.', 1)[0]

	template = f"""if [ $found -lt $max_ips ]; then swaks -t [email protected] -h {part1}-{part2}.{part3}-{part4}.com -f from@{domain} -q from --li {ip_address} \|\
	tee -a swaks_full.log \| \
	grep -q 'sender ok'; \
	if [ $? -eq 0 ]; \
	then \
	found=$((found+1)); \
	echo {ip_address} >>bune_{ip_class}.txt; \
	else echo {ip_address} >>blocate_{ip_class}.txt; \
	fi; \
	sleep {sleep_sec}; \
	fi;"""
	return template


	### MIX ###

	def mix(domains: str, ip_addresses: str, num_of_ips: int) -> str:
	"""
	Mixes the IP addresses with the domains.

	:param ip_addresses: List of IP addresses.
	:param domains: List of domains.
	:return: List of mixed IP addresses and domains.
	"""
	domains = domains.split('\n')
	ip_addresses = ip_addresses.split('\n')

	mixed = []

	# Check if the number of IP addresses is the same than the number of domains.
	if len(ip_addresses) == len(domains):
	for i in range(len(ip_addresses)):
	if i % num_of_ips == 0:
	mixed.append('')
	line = domains[i] + ': ' + ip_addresses[i]
	mixed.append(line)
	else:
	raise ValueError('The number of IP addresses and domains must be the same.')

	return "\n".join(mixed)


	# def generate_ips_per_subclass(ip_subclasses: str, num_of_ips: int) -> str:
	# """
	# Generates a list of IP addresses for a given list of IP subclasses and the number of IPs.

	# :param ip_subclasses: List of non /24 IP subclasses in CIDR notation.
	# :param num_of_ips: Number of IP addresses to generate per IP subclass.
	# :return: List of generated IP addresses.
	# """
	# ip_addresses = []

	# for ip_subclass_24 in get_subclasses(ip_subclasses):
	# ip_addresses.extend(generate_ips_per_slash24(ip_subclass_24, num_of_ips))
	# return ip_addresses

	def generate_ips_per_subclass(ip_subclasses: str, num_of_ips: int) -> str:
	"""
	Generates a list of IP addresses for a given list of IP subclasses and the number of IPs.

	:param ip_subclasses: List of non /24 IP subclasses in CIDR notation.
	:param num_of_ips: Number of IP addresses to generate per IP subclass.
	:return: List of generated IP addresses.
	"""
	ip_subclasses = ip_subclasses.split('\n')
	ip_addresses = []
	mask_split_threshold = 24

	for ip_subclass in ip_subclasses:
	ip_base, mask = ip_subclass.rsplit('/', 1)
	mask = int(mask)
	ip_base = ip_base.rsplit('.', 1)[0]

	if mask == mask_split_threshold:
	ip_addresses.extend(generate_ips_per_slash24(ip_subclass, num_of_ips))
	else:
	for i in range((mask_split_threshold - mask) ** 2):
	split_ip_base = ip_base.split('.')
	third_octet = int(split_ip_base[2]) + i

	# Construct the /24 subnet for the IP subclass
	ip_subclass_24 = split_ip_base[0] + '.' + split_ip_base[1] + '.' + str(third_octet) + '.0/24'

	# Assuming generate_ips_per_slash24 is the same as the previously discussed generate_ips function
	ip_addresses.extend(generate_ips_per_slash24(ip_subclass_24, num_of_ips))
	return "\n".join(ip_addresses)


	def _replace_numbers(input_string: str) -> str:
	# Find the numbers before and inside the parentheses
	match = re.search(r'(\d+)\s*$(\d+)$', input_string)
	if match:
	# Replace the first set of numbers with the second set
	replaced_string = input_string.replace(match.group(1), match.group(2), 1)
	# Remove the parentheses and any surrounding whitespace
	cleaned_string = re.sub(r'$\d+$', '', replaced_string).strip()
	return cleaned_string
	else:
	return input_string

	def _limit_chars(input_string: str, limit: int = 35) -> str:
	return input_string[:limit]

	### GENERATE TOP LISTS ###
	def compute_offer(csv_file, days_lookback, min_sent, domain, team, offer_type, x_list, ):
	pd.set_option('display.max_colwidth', 10)

	df_all = pd.read_csv(csv_file.name, parse_dates=['Data'])
	if team == "Team 1":
	team_members = ['Ana Boros', 'Adrian Pop',
	'Liviu Avram', 'Alexandru Popescu', 'Vlad Draghici']
	elif team == "Team 2":
	team_members = ['Cristi Rusu', 'Robert Rachiteanu', 'Adrian Sabau','Gabriel Sabau']
	else:
	team_members = [] # All

	cols = ['Campanie', 'Oferta', 'Nume', 'Server', 'User', 'offer_id',
	'Lista Custom', 'Data', 'HClicks', 'Clicks', 'Unscribers', 'Openers',
	'Click Open', 'Leads', 'CLike', 'Complains', 'Traps', 'Send', 'ECPM', 'Comision', 'Domeniu']

	df_all['offer_id'] = df_all['Nume'].str.extract(r'(\d{3,4}$)')

	# Treat Aol as Yahoo
	df_all['Domeniu'].replace('Aol', 'Yahoo', inplace=True)

	if offer_type == "Offers - IDs only" or offer_type == "Offers":
	exclude_list = df_all[(df_all['Data'] > (pd.Timestamp('now') - pd.Timedelta(days=days_lookback))) \
	& (df_all['Domeniu'] == domain)\
	& (df_all['User'].isin(team_members))]['offer_id'].unique()
	df_all = df_all[~df_all['offer_id'].isin(exclude_list)]
	elif offer_type == "Newsletters":
	exclude_list = df_all[(df_all['Data'] > (pd.Timestamp('now') - pd.Timedelta(days=days_lookback))) \
	& (df_all['Domeniu'] == domain)]['Oferta'].unique()
	df_all = df_all[~df_all['Oferta'].isin(exclude_list)]



	df_all = df_all[df_all['Send'] > int(min_sent)]
	df_all = df_all[cols]
	# fixed a blank line in the csv
	df_all = df_all[df_all["Oferta"] != " "]

	df_all['Click Open'] = df_all['Click Open'].str.replace('%', '').astype(float)
	df_all['ECPM'] = df_all['ECPM'].astype(float)
	df_all['Comision'] = df_all['Comision'].astype(float)
	df_all['Send'] = df_all['Send'].astype(int)

	# Limit the characters in the "Nume" column
	# df_all["Nume"] = df_all["Nume"].apply(_limit_chars)

	# Filter for newsletters or offers
	if offer_type == "Newsletters":
	df_all = df_all[
	df_all['Nume'].str.startswith("Aeon News") & \
	(~df_all['Nume'].str.contains(r'$\d{4}$')) & \
	(df_all['Nume'].str.contains(r' \d{4}$')) & \
	(~df_all['Nume'].str.contains('TRIMITE'))
	]
	elif offer_type == "Offers" or offer_type == "Offers - IDs only":
	df_all = df_all[~df_all['Nume'].str.startswith("Aeon News")]
	df_all = df_all[~df_all['Nume'].str.contains("NU SE TRIMITE")]
	df_all = df_all[~df_all['Nume'].str.contains("de testat")]
	df_all = df_all[~df_all['Nume'].str.contains("_TEST")]
	df_all = df_all[~df_all['Nume'].str.contains("CPM")]
	df_all = df_all[~df_all['Nume'].str.contains("RESTRICTED")]

	if x_list != "":
	x_list = x_list.split(',')
	df_all = df_all[~df_all['Nume'].str.contains('\|'.join(x_list))]
	# Compress the newsletter names
	# df_all = df_all[df_all['Nume'].str.contains(r'\b[A-Z]{3}\b.\b\d{4}\?\s*($\d{4}$)?\b')]
	# df_all['Nume'] = df_all['Nume'].apply(_replace_numbers)
	# exclude again after the transformation
	# df_all = df_all[~df_all['Oferta'].isin(exclude_list)]

	df_all.reset_index(drop=True, inplace=True)

	if offer_type == "Newsletters":
	final_df = df_all.groupby(["Oferta", "Nume"])\
	.agg( times_sent=('Oferta', 'count'), send_avg=('Send', 'mean'), CO=('Click Open', 'mean'))\
	.sort_values(['CO', 'times_sent'], ascending=False)
	final_df['send_avg'] = final_df['send_avg'].astype(int)
	final_df['CO'] = final_df['CO'].round(2).astype(float)
	final_df.reset_index(inplace=True)
	elif offer_type == "Offers":
	final_df = df_all.groupby(["Oferta", "Nume"])\
	.agg( times_sent=('Oferta', 'count'), send_avg=('Send', 'mean'), ECPM=('ECPM', 'mean'))\
	.sort_values(['ECPM', 'times_sent'], ascending=False)
	final_df['send_avg'] = final_df['send_avg'].astype(int)
	final_df['ECPM'] = final_df['ECPM'].round(2).astype(float)
	final_df.reset_index(inplace=True)
	elif offer_type == "Offers - IDs only":

	final_df = df_all.groupby(["offer_id"])\
	.agg( times_sent=('offer_id', 'count'), send_avg=('Send', 'mean'), total_sent=('Send', 'sum'),\
	USD=('Comision', 'sum'))

	final_df['USD'] = final_df['USD'].round(2).astype(float)
	final_df['send_avg'] = final_df['send_avg'].astype(int)
	final_df['ECPM'] = ( ( final_df['USD'] * 33.33 ) / final_df['total_sent'] ) * 1000
	final_df['ECPM'] = final_df['ECPM'].round(2).astype(float)

	final_df.sort_values(by='ECPM', ascending=False, inplace=True)

	final_df.reset_index(inplace=True)
	else:
	final_df = pd.DataFrame()
	return final_df