-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathNormalizer.ts
More file actions
109 lines (99 loc) · 3.26 KB
/
Copy pathNormalizer.ts
File metadata and controls
109 lines (99 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/**
* @license
* Copyright 2021, JsData. All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ==========================================================================
*/
import { convertToNumericTensor2D } from '../utils'
import { Scikit2D, Tensor2D } from '../types'
import { isScikit2D, assert, isDataFrameInterface } from '../typesUtils'
import { TransformerMixin } from '../mixins'
import { getBackend } from '../tf-singleton'
/*
Next steps:
1. Pass the next five scikit-learn tests
*/
export interface NormalizerParams {
/** What kind of norm we wish to scale by. **default = "l2" ** */
norm?: 'l2' | 'l1' | 'max'
}
/**
* A Normalizer scales each *sample* by the $l_1$, $l_2$ or $max$ value in that sample.
* If you imagine the input matrix as a 2D grid, then this is effectively a "horizontal" scaling (per-sample scaling)
* as opposed to a StandardScaler which is a "vertical" scaling (per-feature scaling).
*
* The only input is what kind of norm you wish to scale by.
*
* @example
* ```js
* import { Normalizer } from 'scikitjs'
*
* const data = [
[-1, 1],
[-6, 6],
[0, 10],
[10, 20]
]
const scaler = new Normalizer({ norm: 'l1' })
const expected = scaler.fitTransform(scaler)
const expectedValueAbove = [
[-0.5, 0.5],
[-0.5, 0.5],
[0, 1],
[0.33, 0.66]
]
* ```
*/
export class Normalizer extends TransformerMixin {
norm: string
/** The number of features seen during fit */
nFeaturesIn: number
/** Names of features seen during fit. Only stores feature names if input is a DataFrame */
featureNamesIn: Array<string>
/** Useful for pipelines and column transformers to have a default name for transforms */
name = 'Normalizer'
constructor({ norm = 'l2' }: NormalizerParams = {}) {
super()
this.tf = getBackend()
this.norm = norm
this.nFeaturesIn = 0
this.featureNamesIn = []
}
/**
* Fits a Normalizer to the data
*/
public fit(X: Scikit2D): Normalizer {
assert(isScikit2D(X), 'Data can not be converted to a 2D matrix.')
const tensorArray = convertToNumericTensor2D(X)
this.nFeaturesIn = tensorArray.shape[1]
if (isDataFrameInterface(X)) {
this.featureNamesIn = [...X.columns]
}
return this
}
/**
* Transform the data using the Normalizer
* */
public transform(X: Scikit2D): Tensor2D {
assert(isScikit2D(X), 'Data can not be converted to a 2D matrix.')
const tensorArray = convertToNumericTensor2D(X)
if (this.norm === 'l1') {
const means = this.tf.abs(tensorArray).sum(1).reshape([-1, 1])
return tensorArray.divNoNan(means)
}
if (this.norm === 'l2') {
const means = tensorArray.square().sum(1).sqrt().reshape([-1, 1])
return tensorArray.divNoNan(means)
}
// max case
const means = this.tf.abs(tensorArray).max(1).reshape([-1, 1])
return tensorArray.divNoNan(means)
}
}