|
import numpy as np |
|
import _pickle |
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
from utils.ndgan import DCGAN |
|
|
|
np.random.seed(4269) |
|
|
|
|
|
|
|
class dataset(): |
|
""" Creates dataset from input source |
|
""" |
|
def __init__(self,number_samples:int, name:str,source:str,boundary_conditions:list=None): |
|
""" Init |
|
|
|
Args: |
|
number_samples (int): number of samples to be genarated |
|
name (str): name of dataset |
|
source (str): source file |
|
boundary_conditions (list): y1,y2,x1,x2 |
|
""" |
|
self.sample_size = number_samples |
|
self.name = name |
|
self.samples = [] |
|
self.encoding_dim = 8 |
|
self.latent_dim = 16 |
|
self.source = source |
|
self.boundary_conditions = boundary_conditions |
|
|
|
def generate(self): |
|
with open(f"./data/{self.source}", "rb") as input_file: |
|
local = pd.read_csv(input_file) |
|
dfs = local.drop("Name",axis=1) |
|
dfs = (dfs-dfs.min())/(dfs.max()-dfs.min()) |
|
dfs = pd.concat([local.Name,dfs],1) |
|
|
|
self.vae = DCGAN(self.latent_dim,dfs) |
|
|
|
self.vae.start_training() |
|
self.samples = self.vae.predict(self.sample_size) |
|
|
|
if self.boundary_conditions: |
|
self.samples=self.samples[((self.samples[:,0]>self.boundary_conditions[2]) & (self.samples[:,0] < self.boundary_conditions[-1]))&((self.samples[:,0]>self.boundary_conditions[0]) & (self.samples[:,0] < self.boundary_conditions[1]))] |
|
|
|
print("Samples:",self.samples) |
|
dataframe = pd.concat([dfs,pd.DataFrame(self.samples,columns=dfs.columns[1:])]) |
|
dataframe.to_pickle(f'./data/{self.name}') |
|
print(dataframe) |
|
|
|
|
|
|
|
return dataframe |
|
|
|
|