Spaces:
Running
Running
File size: 2,025 Bytes
41c5156 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
def generate_fake_data(df, num_fake):
# Options for random generation
species_options = [
"beluga",
"blue_whale",
"bottlenose_dolphin",
"brydes_whale",
"commersons_dolphin",
"common_dolphin",
"cuviers_beaked_whale",
"dusky_dolphin",
"false_killer_whale",
"fin_whale",
"frasiers_dolphin",
"gray_whale",
"humpback_whale",
"killer_whale",
"long_finned_pilot_whale",
"melon_headed_whale",
"minke_whale",
"pantropic_spotted_dolphin",
"pygmy_killer_whale",
"rough_toothed_dolphin",
"sei_whale",
"short_finned_pilot_whale",
"southern_right_whale",
"spinner_dolphin",
"spotted_dolphin",
"white_sided_dolphin",
]
email_options = [
'[email protected]', '[email protected]',
'[email protected]', '[email protected]'
]
def random_ocean_coord():
"""Generate random ocean-friendly coordinates."""
lat = random.uniform(-60, 60) # avoid poles
lon = random.uniform(-180, 180)
return lat, lon
def random_date(start_year=2018, end_year=2025):
"""Generate a random date."""
start = datetime(start_year, 1, 1)
end = datetime(end_year, 1, 1)
return start + timedelta(days=random.randint(0, (end - start).days))
# Generate 20 new observations
new_data = []
for _ in range(num_fake):
lat, lon = random_ocean_coord()
species = random.choice(species_options)
email = random.choice(email_options)
date = random_date()
new_data.append([lat, lon, species, email, date])
# Create a DataFrame and append
new_df = pd.DataFrame(new_data, columns=['lat', 'lon', 'species', 'author_email', 'date'])
df = pd.concat([df, new_df], ignore_index=True)
return df |