Oct-02-2021, 09:01 AM
I generate different data-sets, and I want to save them into excel file in the form of columns. But, excel file always returns the data-sets row-wise.
Here's the code;
Here's the code;
"""
Data Generator for Multiple Use;
"""
import numpy as np
import pandas as pd
import random
import xlsxwriter
import math
from numpy import linalg as LA
import matplotlib.pyplot as plt
import matplotlib.colors
from scipy.stats import poisson
from scipy.stats import bernoulli
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm import tqdm_notebook
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_blobs
class DataGenerator:
"""
Pre-defined Features
"""
def __init__(self):
self.LengthSample = 100
self.mu = 0.1
self.sigma = 0.5
self.data_set_no = 3
self.PoissonPar = 3
self.BenoulliPar = 0.6
self.DataIdx = ['Gaussian', 'Bernoulli', 'Poisson']
self.PercNo_Layer = np.asarray([200, 1000, 300, 400, 200])
self.Active_Layer = np.asarray([10/100*self.PercNo_Layer[0], self.PercNo_Layer[1], 3/100*self.PercNo_Layer[2], 3/100*self.PercNo_Layer[3], 10/100*self.PercNo_Layer[4]])
# ==============================================================================================================#
"""
Data set;
Mixture of python blobs, Bernoulli, Gaussian, Poisson data distribution
"""
def GenerateData(self):
# Usual python blobs
data_blobs, _ = make_blobs(n_samples=self.LengthSample, centers=1, n_features=int(self.Active_Layer[0]),
cluster_std=5.0, random_state=1000)
data_blobs = data_blobs.transpose()
# Some Poisson distribution
data_Poisson = np.asarray(
[poisson.rvs(mu=self.PoissonPar, size=self.LengthSample) for _ in
range(int(self.Active_Layer[0]))]).reshape(
int(self.LengthSample), int(self.Active_Layer[0]))
data_Poisson = data_Poisson.transpose()
# Some Gaussian distribution
data_Gaussian = np.asarray([np.random.normal(self.mu, self.sigma, size=self.LengthSample) for _ in
range(int(self.Active_Layer[0]))]).reshape(int(self.LengthSample),
int(self.Active_Layer[0]))
data_Gaussian = data_Gaussian.transpose()
# Some Bernoulli
data_bern = np.asarray(
[bernoulli.rvs(size=self.LengthSample, p=self.BenoulliPar) for _ in
range(int(self.Active_Layer[0]))]).reshape(
int(self.LengthSample), int(self.Active_Layer[0]))
data_bern = data_bern.transpose()
return data_blobs, data_Gaussian, data_bern, data_Poisson
def Save2Xls(self):
# Call the data
data_blobs, data_Gaussian, data_bern, data_Poisson = self.GenerateData()
# Individual data frames per data-set
data_Poisson_df = pd.DataFrame({'Poisson': [data_Poisson]})
data_Gaussian_df = pd.DataFrame({'Gaussian': [data_Gaussian]})
data_blobs_df = pd.DataFrame({'Blobs': [data_blobs]})
data_Bernoulli_df = pd.DataFrame({'Bernoulli': [data_bern]})
data_sheets = {'Blobs':data_blobs_df, 'Poisson':data_Poisson_df, 'Gaussian':data_Gaussian_df, 'Bernoulli':data_Bernoulli_df}
data_sets_ind = pd.ExcelWriter('./DataSets.xlsx', engine='xlsxwriter')
for sheet_name in data_sheets.keys(): #
data_sheets[sheet_name].to_excel(data_sets_ind, sheet_name=sheet_name)
data_sets_ind.save()
# Dictionary of data-sets
data_dictionary = pd.DataFrame({'Blobs':[data_blobs], 'Poisson':[data_Poisson], 'Gaussian':[data_Gaussian], 'Bernoulli':[data_bern]})
data_dictionary.to_excel('./data_dictionary.xlsx', sheet_name='Data dictionary', index=False)
return data_dictionary, data_sets_ind
if __name__ == '__main__':
runner = DataGenerator()
runner.Save2Xls()
