tools / utils /gradio_utils.py
zzmez's picture
feat(lists): enhanced filtering
144c5a1
raw
history blame
9.86 kB
import random
import ipaddress
import pandas as pd
### SWAKS ###
def get_subclasses(ip_classes: str):
ip_classes = ip_classes.split('\n')
result = []
for ip_class in ip_classes:
try:
network = ipaddress.IPv4Network(ip_class, strict=False)
for ip in network.subnets(new_prefix=24):
result.append(str(ip))
except ValueError:
result.append(f"Invalid IP class: {ip_class}")
return result
def update_header(max_ips):
return f"found=0; max_ips={int(max_ips)};\n"
def str_to_list(ips_string) -> list:
"""
Converts a string to a list.
"""
return ips_string.split('\n')
def generate_cmds_from_bulk(big_subclasses_raw: str,
domain_raw: str,
max_ips: int = 2,
sample_size: int = 5):
"""
big_subclasses_raw: The list of classes
domain_raw: The list of domains
max_ips: The maximum number of IPs to pick
sample_size: The number of sample size to generate IPs from /24 class
"""
# Gradio fixes
# big_subclasses_raw = big_subclasses_raw.split('\n')
domain_raw = domain_raw.split('\n')
sample_size = int(sample_size)
max_ips = int(max_ips)
commands = []
header = f"found=0; max_ips={max_ips};"
for ip_subclass_24 in get_subclasses(big_subclasses_raw):
ip_addresses_1_20, ip_addresses_201_255 = generate_ip_addresses(ip_subclass_24)
commands.append(header)
for ip in random.sample(ip_addresses_1_20, sample_size):
commands.append(generate_randomized_output_max_ips(ip, domain_raw))
commands.append(header)
for ip in random.sample(ip_addresses_201_255, sample_size):
commands.append(generate_randomized_output_max_ips(ip, domain_raw))
return '\n'.join(commands)
def generate_ips_per_subclass(ip_subclasses: str, num_of_ips: int) -> str:
"""
Generates a list of IP addresses for a given list of IP subclasses and the number of IPs.
:param ip_subclasses: List of non /24 IP subclasses in CIDR notation.
:param num_of_ips: Number of IP addresses to generate per IP subclass.
:return: List of generated IP addresses.
"""
ip_addresses = []
for ip_subclass_24 in get_subclasses(ip_subclasses):
ip_addresses.extend(generate_ips_per_slash24(ip_subclass_24, num_of_ips))
return ip_addresses
def generate_ips_per_slash24(ip_class: str, num_ips: int) -> list:
"""
Generates a list of IP addresses within a specified class C network (/24).
:param ip_class: The IP class in CIDR notation, e.g., "192.168.1.1/24".
:param num_ips: The number of IP addresses to generate.
:return: A list of generated IP addresses.
"""
ip_addresses = []
# The IP address ranges for two disjoint domains.
domain1 = range(1, 20)
domain2 = range(201, 254)
num_ips = int(num_ips)
# The base of the IP address (e.g., "192.168.1." for "192.168.1.1/24").
base_ip = ip_class.split('/')[0].rsplit('.', 1)[0] + '.'
for i in range(1, num_ips + 1):
# If the index is in the first half of the requested IPs,
# generate an IP from the first domain.
if i <= num_ips / 2:
ip_addresses.append(base_ip +random.choice(domain1).__str__())
# If the index is in the second half of the requested IPs,
# generate an IP from the second domain.
else:
ip_addresses.append(base_ip +random.choice(domain2).__str__())
return ip_addresses
def generate_ip_addresses(slash_24: str) -> list:
ip_addresses_1_20 = []
ip_addresses_201_255 = []
# The IP address ranges for two disjoint domains.
domain_1_20 = range(1, 20)
domain_201_255 = range(201, 255)
# domain = chain(domain_1_20, domain_201_255)
# The base of the IP address (e.g., "192.168.1." for "192.168.1.1/24").
base_ip = slash_24.split('/')[0].rsplit('.', 1)[0] + '.'
for ip in domain_1_20:
ip_addresses_1_20.append(base_ip + ip.__str__())
for ip in domain_201_255:
ip_addresses_201_255.append(base_ip + ip.__str__())
return [ip_addresses_1_20, ip_addresses_201_255]
def generate_random_string(min_length=2, max_length=7):
letters = "abcdefghijklmnopqrstuvwxyz"
length = random.randint(min_length, max_length)
return ''.join(random.choice(letters) for _ in range(length))
def generate_randomized_output_max_ips(ip_address: str, domain_list: list, sleep_sec: int = 5) -> str:
part1 = generate_random_string()
part2 = generate_random_string()
part3 = generate_random_string()
part4 = generate_random_string()
# domain_list = domain_list.split('\n')
domain = random.choice(domain_list)
ip_class = ip_address.rsplit('.', 1)[0]
template = f"""if [ $found -lt $max_ips ]; then swaks -t [email protected] -h {part1}-{part2}.{part3}-{part4}.com -f from@{domain} -q from --li {ip_address} |\
tee -a swaks_full.log | \
grep -q 'sender ok'; \
if [ $? -eq 0 ]; \
then \
found=$((found+1)); \
echo {ip_address} >>bune_{ip_class}.txt; \
else echo {ip_address} >>blocate_{ip_class}.txt; \
fi; \
sleep {sleep_sec}; \
fi;"""
return template
### MIX ###
def mix(domains: str, ip_addresses: str, num_of_ips: int) -> str:
"""
Mixes the IP addresses with the domains.
:param ip_addresses: List of IP addresses.
:param domains: List of domains.
:return: List of mixed IP addresses and domains.
"""
domains = domains.split('\n')
ip_addresses = ip_addresses.split('\n')
mixed = []
# Check if the number of IP addresses is the same than the number of domains.
if len(ip_addresses) == len(domains):
for i in range(len(ip_addresses)):
if i % num_of_ips == 0:
mixed.append('')
line = domains[i] + ': ' + ip_addresses[i]
mixed.append(line)
else:
raise ValueError('The number of IP addresses and domains must be the same.')
return "\n".join(mixed)
# def generate_ips_per_subclass(ip_subclasses: str, num_of_ips: int) -> str:
# """
# Generates a list of IP addresses for a given list of IP subclasses and the number of IPs.
# :param ip_subclasses: List of non /24 IP subclasses in CIDR notation.
# :param num_of_ips: Number of IP addresses to generate per IP subclass.
# :return: List of generated IP addresses.
# """
# ip_addresses = []
# for ip_subclass_24 in get_subclasses(ip_subclasses):
# ip_addresses.extend(generate_ips_per_slash24(ip_subclass_24, num_of_ips))
# return ip_addresses
def generate_ips_per_subclass(ip_subclasses: str, num_of_ips: int) -> str:
"""
Generates a list of IP addresses for a given list of IP subclasses and the number of IPs.
:param ip_subclasses: List of non /24 IP subclasses in CIDR notation.
:param num_of_ips: Number of IP addresses to generate per IP subclass.
:return: List of generated IP addresses.
"""
ip_subclasses = ip_subclasses.split('\n')
ip_addresses = []
mask_split_threshold = 24
for ip_subclass in ip_subclasses:
ip_base, mask = ip_subclass.rsplit('/', 1)
mask = int(mask)
ip_base = ip_base.rsplit('.', 1)[0]
if mask == mask_split_threshold:
ip_addresses.extend(generate_ips_per_slash24(ip_subclass, num_of_ips))
else:
for i in range((mask_split_threshold - mask) ** 2):
split_ip_base = ip_base.split('.')
third_octet = int(split_ip_base[2]) + i
# Construct the /24 subnet for the IP subclass
ip_subclass_24 = split_ip_base[0] + '.' + split_ip_base[1] + '.' + str(third_octet) + '.0/24'
# Assuming generate_ips_per_slash24 is the same as the previously discussed generate_ips function
ip_addresses.extend(generate_ips_per_slash24(ip_subclass_24, num_of_ips))
return "\n".join(ip_addresses)
### GENERATE TOP LISTS ###
def compute_offer(csv_file, days_lookback, min_sent):
# cmp_list = ['MSP', 'HOM', 'NTU', 'HCK', 'DDS', 'MNP', 'PSC', 'DTL', 'GVS', 'ANP', 'WDR', 'BSG'] #1
#raw_df = pd.read_csv('tools/data/30.08.2023.gabriel.sabau.campanii.csv', parse_dates=['Data'])
comcast_df = pd.read_csv(csv_file.name, parse_dates=['Data'])
# raw_df = pd.read_csv(csv_file.name, parse_dates=['Data']) #1
cols = ['Campanie', 'Oferta', 'Nume', 'Server', 'User',
'Lista Custom', 'Data', 'HClicks', 'Clicks', 'Unscribers', 'Openers',
'Click Open', 'Leads', 'CLike', 'Complains', 'Traps', 'Send']
# comcast_df = raw_df[raw_df['Nume'].str.contains('|'.join(cmp_list))] #1
# comcast_df = comcast_df[comcast_df['Domeniu'] == 'Comcast'] #2
comcast_df = comcast_df[comcast_df['Send'] > int(min_sent)]
comcast_df = comcast_df[cols]
comcast_df['Click Open'] = comcast_df['Click Open'].str.replace('%', '').astype(float)
exclude_list = comcast_df[comcast_df['Data'] > (pd.Timestamp('now') - pd.Timedelta(days=days_lookback))]['Oferta'].unique()
comcast_df = comcast_df[~comcast_df['Oferta'].isin(exclude_list)]
# comcast_df = comcast_df[comcast_df['Lista Custom'].str.contains('open')]
comcast_df = comcast_df[comcast_df['Nume'].str.startswith("Aeon News") & comcast_df['Nume'].str.contains(r'\b[A-Z]{3}\b.*\b\d{4}\*?\s*(\(\d{4}\))?\b')]
comcast_df.reset_index(drop=True, inplace=True)
final_df = comcast_df.groupby(["Oferta", "Nume"])\
.agg( N=('Oferta', 'count'), send_avg=('Send', 'mean'), CO=('Click Open', 'mean'))\
.sort_values(['CO', 'N'], ascending=False)
final_df['send_avg'] = final_df['send_avg'].round(2).astype(float)
final_df.reset_index(inplace=True)
return final_df