forked from alexdrk14/SAMLP
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpipeline.py
More file actions
executable file
·84 lines (55 loc) · 3.1 KB
/
Copy pathpipeline.py
File metadata and controls
executable file
·84 lines (55 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
""""####################################################################################################################
Author: Alexander Shevtsov ICS-FORTH
E-mail: shevtsov@ics.forth.gr, shevtsov@csd.uoc.gr
-----------------------------------
Parameter fine-tuning and Feature selection for ML model.
####################################################################################################################"""
import pandas as pd
from datetime import datetime
from utilities.DataLoading import DataLoading
from utilities.feature_selector import FeatureSelector
from utilities.model_selector import ModelSelector
from utilities.plotting import plot_shap_figure, plot_confusion_figure
class Piepeline:
def __init__(self, stratified=True,
shuffle=True, verbose=True):
self.verbose = verbose
self.stratified = stratified
self.shuffle = shuffle
self.main()
def main(self):
"""Loading the Data splited in train/test and hold-out portions"""
DL = DataLoading(verbose=self.verbose)
"""Load only the visible data portion containing Train/Validation"""
print(f'{datetime.now()} Pipeline: Data Loading\n')
X_train, Y_train = DL.load_dataset(train=True, test=False, splited=True)
"""Create feature selector"""
print(f'{datetime.now()} Feature selection')
FS = FeatureSelector(stratified=self.stratified, shuffle=self.shuffle, verbose=self.verbose)
"""Get best features based on the visible data portion, with use of Lasso feature selector"""
selected_features = FS.get_features(X_train, Y_train)
print(f'{datetime.now()} End of fine-tuning\n' +
f'{datetime.now()} Start of model fine-tuning')
"""Drop noisy/un-selected features"""
X_train.drop([feature for feature in X_train.columns if feature not in selected_features],
axis=1, inplace=True)
MS = ModelSelector(Y_train, stratified=self.stratified, shuffle=self.shuffle, verbose=self.verbose)
MS.fine_tune_models(X_train, Y_train)
print(f'{datetime.now()} End of fine-tuning\n' +
f'{datetime.now()} Start of testing')
X_hold, Y_hold = DL.load_dataset(train=False, test=True, splited=True)
"""Drop noisy/un-selected features"""
X_hold.drop([feature for feature in X_hold.columns if feature not in selected_features],
axis=1, inplace=True)
MS.models[MS.best_model_index]
MS.measure_hold_out(X_hold, Y_hold)
print(f'{datetime.now()} Shap explain plotting')
"""Plot SHAP explanability"""
plot_shap_figure(MS.models[MS.best_model_index], X_hold, binary=MS.binary_class)
plot_confusion_figure(MS.models[MS.best_model_index], X_hold, Y_hold)
"""Merge train and test dataset, train the final model and store it"""
MS.store_final_model(pd.concat([X_train, X_hold]), pd.concat([Y_train, Y_hold]))
print(f'{datetime.now()} End')
if __name__ == "__main__":
print('Starting of model creation')
_ = Piepeline()