-
Notifications
You must be signed in to change notification settings - Fork 28
Expand file tree
/
Copy pathDataFrameInternal.class.st
More file actions
414 lines (303 loc) · 10 KB
/
Copy pathDataFrameInternal.class.st
File metadata and controls
414 lines (303 loc) · 10 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
"
I am the internal representation of a DataFrame. I store the data very efficiently and allow you to access it very quickly.
"
Class {
#name : #DataFrameInternal,
#superclass : #Collection,
#instVars : [
'contents'
],
#category : #'DataFrame-Core'
}
{ #category : #'instance creation' }
DataFrameInternal class >> fromArray2D: anArray2D [
| dfInternal |
"We can't use Array2D>>extent, because it gives us col@row instead of row@col"
dfInternal := self new:
(anArray2D rowCount @ anArray2D columnCount).
anArray2D withIndicesDo: [ :each :i :j |
dfInternal at: i at: j put: each ].
^ dfInternal
]
{ #category : #'instance creation' }
DataFrameInternal class >> new: aPoint [
^ self new initialize: aPoint
]
{ #category : #'instance creation' }
DataFrameInternal class >> withColumns: anArrayOfArrays [
| numberOfRows numberOfColumns dfInternal |
numberOfColumns := anArrayOfArrays size.
numberOfRows := 0.
1 to: numberOfColumns do: [ :index |
numberOfRows := numberOfRows max: (anArrayOfArrays at: index) size
].
dfInternal := self new: (numberOfRows @ numberOfColumns ).
1 to: numberOfRows do: [ :i |
1 to: numberOfColumns do: [ :j |
dfInternal at: i at: j
put: ((anArrayOfArrays at: j) asArray at: i ifAbsent: nil) ] ].
^ dfInternal
]
{ #category : #'instance creation' }
DataFrameInternal class >> withRows: anArrayOfArrays [
| numberOfRows numberOfColumns dfInternal |
numberOfRows := anArrayOfArrays size.
numberOfColumns := 0.
1 to: numberOfRows do: [ :index |
numberOfColumns := numberOfColumns max: (anArrayOfArrays at: index) size
].
dfInternal := self new: numberOfRows @ numberOfColumns.
1 to: numberOfRows do: [ :i |
1 to: numberOfColumns do: [ :j |
dfInternal at: i at: j
put: ((anArrayOfArrays at: i) asArray at: j ifAbsent: nil) ] ].
^ dfInternal
]
{ #category : #comparing }
DataFrameInternal >> = other [
((other class == self class)
and: [ other numberOfRows = self numberOfRows
and: [ other numberOfColumns = self numberOfColumns]])
ifFalse: [ ^ false ].
other withIndicesDo: [ :each :i :j |
(self at: i at: j) = each
ifFalse: [ ^ false ] ].
^ true
]
{ #category : #adding }
DataFrameInternal >> addColumn: anArray atPosition: aNumber [
| rows cols newContents |
rows := self numberOfRows.
cols := self numberOfColumns + 1.
anArray size = rows
ifFalse: [ SizeMismatch signal ].
newContents := Array2D
rows: rows
columns: cols.
1 to: rows do: [ :i |
1 to: aNumber - 1 do: [ :j |
newContents at: i at: j put:
(self at: i at: j) ] ].
1 to: rows do: [ :i |
newContents at: i at: aNumber
put: (anArray at: i) ].
1 to: rows do: [ :i |
aNumber + 1 to: cols do: [ :j |
newContents at: i at: j put:
(self at: i at: j - 1) ] ].
contents := newContents
]
{ #category : #adding }
DataFrameInternal >> addRow: anArray atPosition: aNumber [
| rows cols newContents |
rows := self numberOfRows + 1.
cols := self numberOfColumns.
anArray size = cols
ifFalse: [ SizeMismatch signal ].
newContents := Array2D
rows: rows
columns: cols.
1 to: aNumber - 1 do: [ :i |
1 to: cols do: [ :j |
newContents at: i at: j put:
(self at: i at: j) ] ].
1 to: cols do: [ :j |
newContents at: aNumber at: j
put: (anArray at: j) ].
aNumber + 1 to: rows do: [ :i |
1 to: cols do: [ :j |
newContents at: i at: j put:
(self at: i - 1 at: j) ] ].
contents := newContents
]
{ #category : #converting }
DataFrameInternal >> asArray2D [
^ contents
]
{ #category : #converting }
DataFrameInternal >> asArrayOfColumns [
"Converts DataFrameInternal to the array of columns"
^ (1 to: self numberOfColumns) collect: [ :j |
(1 to: self numberOfRows) collect: [ :i |
self at: i at: j ] ]
]
{ #category : #converting }
DataFrameInternal >> asArrayOfRows [
"Converts DataFrameInternal to the array of rows"
^ (1 to: self numberOfRows) collect: [ :i |
(1 to: self numberOfColumns) collect: [ :j |
self at: i at: j ] ]
]
{ #category : #accessing }
DataFrameInternal >> at: rowIndex at: columnIndex [
^ contents at: rowIndex at: columnIndex
]
{ #category : #accessing }
DataFrameInternal >> at: rowIndex at: columnIndex put: value [
^ contents at: rowIndex at: columnIndex put: value
]
{ #category : #enumerating }
DataFrameInternal >> collect: aBlock [
^ self class fromArray2D:
(contents collect: aBlock)
]
{ #category : #accessing }
DataFrameInternal >> columnAt: aNumber [
^ (1 to: self numberOfRows) collect: [ :i |
self at: i at: aNumber ]
]
{ #category : #accessing }
DataFrameInternal >> columnAt: aNumber put: anArray [
"Sets all the values of a given column equal to the values in array. It is assumed that array is of the same size as every column (number of rows). This should be tested in DataFrame"
(1 to: self numberOfRows) do: [ :i |
self at: i at: aNumber put: (anArray at: i) ]
]
{ #category : #accessing }
DataFrameInternal >> columnsAt: anArrayOfNumbers [
| dfInternal numberOfRows numberOfColumns |
numberOfRows := self numberOfRows.
numberOfColumns := anArrayOfNumbers size.
dfInternal := DataFrameInternal new: (numberOfRows @ numberOfColumns).
1 to: numberOfRows do: [ :i |
anArrayOfNumbers doWithIndex: [ :j :k |
dfInternal at: i at: k put: (self at: i at: j) ] ].
^ dfInternal
]
{ #category : #copying }
DataFrameInternal >> deepCopy [
^ self class fromArray2D: contents
]
{ #category : #enumerating }
DataFrameInternal >> do: aBlock [
^ contents do: aBlock
]
{ #category : #initialization }
DataFrameInternal >> initialize [
super initialize.
contents := Array2D new
]
{ #category : #initialization }
DataFrameInternal >> initialize: aPoint [
contents := Array2D rows: aPoint x columns: aPoint y
]
{ #category : #accessing }
DataFrameInternal >> numberOfColumns [
contents numberOfColumns ifNil: [ ^ 0 ].
^ contents numberOfColumns
]
{ #category : #accessing }
DataFrameInternal >> numberOfRows [
contents numberOfRows ifNil: [ ^ 0 ].
^ contents numberOfRows
]
{ #category : #printing }
DataFrameInternal >> printOn: aStream [
contents printOn: aStream
]
{ #category : #removing }
DataFrameInternal >> removeColumnAt: columnNumber [
| newContents |
newContents := Array2D
rows: (self numberOfRows)
columns: (self numberOfColumns - 1).
1 to: self numberOfRows do: [ :i |
1 to: columnNumber - 1 do: [ :j |
newContents at: i at: j put:
(contents at: i at: j) ].
columnNumber + 1 to: self numberOfColumns do: [ :j |
newContents at: i at: j - 1 put:
(contents at: i at: j) ] ].
contents := newContents
]
{ #category : #removing }
DataFrameInternal >> removeColumnsOfRowElementsSatisfying: aBlock onRow: rowNumber [
"Executes aBlock for all elements in specified rowNumber and deletes the column
which satisfied condition given in aBlock."
| newContents columnsToDrop k |
"columnsToDrop has 1 at i if i-th column needs to be dropped, else 0"
columnsToDrop := (self rowAt: rowNumber) collect: [ :ele | aBlock value: ele ].
newContents := Array2D rows: self numberOfRows columns: self numberOfColumns - (columnsToDrop select: [ :ele | ele ]) size.
newContents numberOfColumns = 0 ifTrue: [
contents := Array2D rows: 0 columns: 0.
^ self ].
1 to: self numberOfRows do: [ :i |
k := 0.
1 to: self numberOfColumns do: [ :j |
(columnsToDrop at: j)
ifTrue: [ k := k + 1 ]
ifFalse: [ newContents at: i at: j - k put: (contents at: i at: j) ] ] ].
contents := newContents
]
{ #category : #removing }
DataFrameInternal >> removeRowAt: rowNumber [
| newContents |
newContents := Array2D
rows: (self numberOfRows - 1)
columns: (self numberOfColumns).
1 to: self numberOfColumns do: [ :j |
1 to: rowNumber - 1 do: [ :i |
newContents at: i at: j put:
(contents at: i at: j) ].
rowNumber + 1 to: self numberOfRows do: [ :i |
newContents at: i - 1 at: j put:
(contents at: i at: j) ] ].
contents := newContents
]
{ #category : #removing }
DataFrameInternal >> removeRowsOfColumnElementsSatisfying: aBlock onColumn: columnNumber [
"Removes all rows having a nil value at the column columnNumber"
| newContents rowsToDrop k |
"rowsToDrop has 1 at i if i-th row needs to be dropped, else 0"
rowsToDrop := (self columnAt: columnNumber) collect: [ :ele | aBlock value: ele ].
newContents := Array2D rows: self numberOfRows - (rowsToDrop select: [ :ele | ele ]) size columns: self numberOfColumns.
newContents numberOfRows = 0 ifTrue: [
contents := Array2D rows: 0 columns: 0.
^ self ].
1 to: self numberOfColumns do: [ :j |
k := 0.
1 to: self numberOfRows do: [ :i |
(rowsToDrop at: i)
ifTrue: [ k := k + 1 ]
ifFalse: [ newContents at: i - k at: j put: (contents at: i at: j) ] ] ].
contents := newContents
]
{ #category : #filling }
DataFrameInternal >> replaceMissingValuesStrings: aSet [
"Replaces dataframeinternal values with nil if they are a part of aSet"
1 to: self numberOfRows do: [ :i |
1 to: self numberOfColumns do: [ :j |
(aSet includes: (contents at: i at: j))
ifTrue: [ contents at: i at: j put: nil ].
] ]
]
{ #category : #accessing }
DataFrameInternal >> rowAt: aNumber [
^ (1 to: self numberOfColumns) collect: [ :j |
self at: aNumber at: j ]
]
{ #category : #accessing }
DataFrameInternal >> rowAt: aNumber put: anArray [
"Sets all the values of a given row equal to the values in array. It is assumed that array is of the same size as every row (number of columns). This should be tested in DataFrame"
(1 to: self numberOfColumns) do: [ :j |
self at: aNumber at: j put: (anArray at: j) ]
]
{ #category : #accessing }
DataFrameInternal >> rowsAt: anArrayOfNumbers [
| dfInternal numberOfRows numberOfColumns |
numberOfRows := anArrayOfNumbers size.
numberOfColumns := self numberOfColumns.
dfInternal := DataFrameInternal new: (numberOfRows @ numberOfColumns).
anArrayOfNumbers doWithIndex: [ :i :k |
1 to: numberOfColumns do: [ :j |
dfInternal at: k at: j put: (self at: i at: j) ] ].
^ dfInternal
]
{ #category : #enumerating }
DataFrameInternal >> withIndicesCollect: aBlock [
^ self class fromArray2D:
(contents withIndicesCollect: aBlock)
]
{ #category : #enumerating }
DataFrameInternal >> withIndicesDo: aBlock [
^ contents withIndicesDo: aBlock
]