scikit.js/src/preprocessing/Normalizer.ts at update-docs · javascriptdata/scikit.js

109 lines (99 loc) · 3.26 KB
*  @license
* Copyright 2021, JsData. All rights reserved.
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ==========================================================================
import { convertToNumericTensor2D } from '../utils'
import { Scikit2D, Tensor2D } from '../types'
import { isScikit2D, assert, isDataFrameInterface } from '../typesUtils'
import { TransformerMixin } from '../mixins'
import { getBackend } from '../tf-singleton'
Next steps:
1. Pass the next five scikit-learn tests
export interface NormalizerParams {
  /** What kind of norm we wish to scale by. **default = "l2" ** */
  norm?: 'l2' | 'l1' | 'max'
 * A Normalizer scales each *sample* by the $l_1$, $l_2$ or $max$ value in that sample.
 * If you imagine the input matrix as a 2D grid, then this is effectively a "horizontal" scaling (per-sample scaling)
 * as opposed to a StandardScaler which is a "vertical" scaling (per-feature scaling).
 * The only input is what kind of norm you wish to scale by.
 * @example
 * import { Normalizer } from 'scikitjs'
 * const data = [
      [-1, 1],
      [-6, 6],
      [0, 10],
      [10, 20]
    const scaler = new Normalizer({ norm: 'l1' })
    const expected = scaler.fitTransform(scaler)
    const expectedValueAbove = [
      [-0.5, 0.5],
      [-0.5, 0.5],
      [0, 1],
      [0.33, 0.66]
export class Normalizer extends TransformerMixin {
  norm: string
  /** The number of features seen during fit */
  nFeaturesIn: number
  /** Names of features seen during fit. Only stores feature names if input is a DataFrame */
  featureNamesIn: Array<string>
  /** Useful for pipelines and column transformers to have a default name for transforms */
  name = 'Normalizer'
  constructor({ norm = 'l2' }: NormalizerParams = {}) {
    super()
    this.tf = getBackend()
    this.norm = norm
    this.nFeaturesIn = 0
    this.featureNamesIn = []
   * Fits a Normalizer to the data
  public fit(X: Scikit2D): Normalizer {
    assert(isScikit2D(X), 'Data can not be converted to a 2D matrix.')
    const tensorArray = convertToNumericTensor2D(X)
    this.nFeaturesIn = tensorArray.shape[1]
    if (isDataFrameInterface(X)) {
      this.featureNamesIn = [...X.columns]
    return this
   * Transform the data using the Normalizer
  public transform(X: Scikit2D): Tensor2D {
    assert(isScikit2D(X), 'Data can not be converted to a 2D matrix.')
    const tensorArray = convertToNumericTensor2D(X)
    if (this.norm === 'l1') {
      const means = this.tf.abs(tensorArray).sum(1).reshape([-1, 1])
      return tensorArray.divNoNan(means)
    if (this.norm === 'l2') {
      const means = tensorArray.square().sum(1).sqrt().reshape([-1, 1])
      return tensorArray.divNoNan(means)
    // max case
    const means = this.tf.abs(tensorArray).max(1).reshape([-1, 1])
    return tensorArray.divNoNan(means)
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

FilesExpand file tree

Normalizer.ts

Latest commit

History

Normalizer.ts

File metadata and controls