import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from tensorflow import keras
from tqdm import tqdmtf.get_logger().setLevel('ERROR')data = pd.read_csv('../data/data.csv')data_X = data.iloc[:,2:]
data_y = data.click.valuesdata_X
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| hour | C1 | banner_pos | site_id | site_domain | site_category | app_id | app_domain | app_category | device_id | ... | device_type | device_conn_type | C14 | C15 | C16 | C17 | C18 | C19 | C20 | C21 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 14102100 | 1005 | 0 | 1fbe01fe | f3845767 | 28905ebd | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 2 | 15705 | 320 | 50 | 1722 | 0 | 35 | -1 | 79 |
| 1 | 14102100 | 1005 | 0 | 4dd0a958 | 79cf0c8d | f028772b | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 0 | 20352 | 320 | 50 | 2333 | 0 | 39 | -1 | 157 |
| 2 | 14102100 | 1005 | 0 | 543a539e | c7ca3108 | 3e814130 | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 0 | 20352 | 320 | 50 | 2333 | 0 | 39 | -1 | 157 |
| 3 | 14102100 | 1005 | 0 | 8cbacf0b | a434fa42 | f028772b | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 0 | 19772 | 320 | 50 | 2227 | 0 | 687 | 100075 | 48 |
| 4 | 14102100 | 1005 | 0 | f282ab5a | 61eb5bc4 | f028772b | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 0 | 18993 | 320 | 50 | 2161 | 0 | 35 | -1 | 157 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99995 | 14102101 | 1005 | 0 | 1fbe01fe | f3845767 | 28905ebd | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 0 | 15705 | 320 | 50 | 1722 | 0 | 35 | 100084 | 79 |
| 99996 | 14102101 | 1005 | 1 | d9750ee7 | 98572c79 | f028772b | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 0 | 17614 | 320 | 50 | 1993 | 2 | 1063 | -1 | 33 |
| 99997 | 14102101 | 1005 | 0 | 85f751fd | c4e18dd6 | 50e219e0 | febd1138 | 82e27996 | 0f2161f8 | a99f214a | ... | 1 | 0 | 21611 | 320 | 50 | 2480 | 3 | 297 | 100111 | 61 |
| 99998 | 14102101 | 1005 | 0 | 1fbe01fe | f3845767 | 28905ebd | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 0 | 15699 | 320 | 50 | 1722 | 0 | 35 | 100084 | 79 |
| 99999 | 14102101 | 1005 | 0 | 1fbe01fe | f3845767 | 28905ebd | ecad2386 | 7801e8d9 | 07d7df22 | a99f214a | ... | 1 | 0 | 15706 | 320 | 50 | 1722 | 0 | 35 | -1 | 79 |
100000 rows × 22 columns
可以看到测试的数据全都是类别特征, 其实实际的业务场景中几乎也都是类别型的特征
这里我们给特征进行Label Encode
data_X = data_X.apply(LabelEncoder().fit_transform)data_X
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| hour | C1 | banner_pos | site_id | site_domain | site_category | app_id | app_domain | app_category | device_id | ... | device_type | device_conn_type | C14 | C15 | C16 | C17 | C18 | C19 | C20 | C21 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2 | 0 | 110 | 823 | 1 | 712 | 28 | 0 | 5703 | ... | 1 | 1 | 128 | 3 | 2 | 36 | 0 | 1 | 0 | 18 |
| 1 | 0 | 2 | 0 | 303 | 403 | 16 | 712 | 28 | 0 | 5703 | ... | 1 | 0 | 303 | 3 | 2 | 103 | 0 | 2 | 0 | 31 |
| 2 | 0 | 2 | 0 | 334 | 668 | 3 | 712 | 28 | 0 | 5703 | ... | 1 | 0 | 303 | 3 | 2 | 103 | 0 | 2 | 0 | 31 |
| 3 | 0 | 2 | 0 | 543 | 563 | 16 | 712 | 28 | 0 | 5703 | ... | 1 | 0 | 234 | 3 | 2 | 76 | 0 | 26 | 53 | 10 |
| 4 | 0 | 2 | 0 | 924 | 316 | 16 | 712 | 28 | 0 | 5703 | ... | 1 | 0 | 210 | 3 | 2 | 71 | 0 | 1 | 0 | 31 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99995 | 1 | 2 | 0 | 110 | 823 | 1 | 712 | 28 | 0 | 5703 | ... | 1 | 0 | 128 | 3 | 2 | 36 | 0 | 1 | 59 | 18 |
| 99996 | 1 | 2 | 1 | 825 | 510 | 16 | 712 | 28 | 0 | 5703 | ... | 1 | 0 | 173 | 3 | 2 | 60 | 2 | 30 | 0 | 6 |
| 99997 | 1 | 2 | 0 | 519 | 658 | 5 | 767 | 31 | 2 | 5703 | ... | 1 | 0 | 407 | 3 | 2 | 130 | 3 | 13 | 77 | 13 |
| 99998 | 1 | 2 | 0 | 110 | 823 | 1 | 712 | 28 | 0 | 5703 | ... | 1 | 0 | 122 | 3 | 2 | 36 | 0 | 1 | 59 | 18 |
| 99999 | 1 | 2 | 0 | 110 | 823 | 1 | 712 | 28 | 0 | 5703 | ... | 1 | 0 | 129 | 3 | 2 | 36 | 0 | 1 | 0 | 18 |
100000 rows × 22 columns
每一个特征都独立进行了label 编码, 这种好处是可以直接进行embedding
当我们embedding共享权值的时候, 可以给每列特征的label加入之前特征的类别总和,来达到所有特征的label
这也是所有模型代码中 offset 的作用
e.g. field_dims = [2, 4, 2], offsets = [0, 2, 6]
所以,实际look up table中
0 - 1行 对应 特征 X0, 即 field_dims[0]
2 - 5行 对应 特征 X1, 即 field_dims[1]
6 - 7行 对应 特征 X2, 即 field_dims[2]
但实际特征取值 forward(self, x) 的 x大小 只在自身词表内取值
比如:X1取值1,对应Embedding内行数就是 offsets[X1] + X1 = 2 + 1 = 3
fields = data_X.max().values + 1 # 模型输入的feature_fieldsfieldsarray([ 2, 6, 6, 987, 872, 18, 769, 62, 19,
8544, 47309, 2606, 4, 4, 448, 5, 6, 141,
4, 38, 144, 33], dtype=int64)
tmp_X, test_X, tmp_y, test_y = train_test_split(data_X, data_y, test_size = 0.2, random_state=42, stratify=data_y)
train_X, val_X, train_y, val_y = train_test_split(tmp_X, tmp_y, test_size = 0.25, random_state=42, stratify=tmp_y)数据是avazu数据的随机10万条
优化器统一Adam, lr = 0.001
epoch 为 1, batch_size = 32
主要的目的是跑通所有的模型
epoch多几次, 调调参数对稍微复杂的网络有好处
tips : 类别特征embedding等价于一层没有bias项的全连接,所以模型中几乎都用embedding来模拟LR线性过程
from model import LRmodel = LR.LogisticRegression(feature_fields = fields)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 5s 86us/sample - loss: 0.4232 - auc: 0.7029 - val_loss: 0.4135 - val_auc: 0.7317
<tensorflow.python.keras.callbacks.History at 0x243c491bdd8>
from model import FMmodel = FM.FactorizationMachine(feature_fields = fields, embed_dim = 8)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 14s 229us/sample - loss: 0.4195 - auc_1: 0.7145 - val_loss: 0.4058 - val_auc_1: 0.7435
<tensorflow.python.keras.callbacks.History at 0x243cb3ecc18>
from model import FFMmodel = FFM.FieldFactorizationMachine(feature_fields = fields, embed_dim = 8)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 216s 4ms/sample - loss: 0.4079 - auc_2: 0.7364 - val_loss: 0.4018 - val_auc_2: 0.7529
<tensorflow.python.keras.callbacks.History at 0x243d1f00908>
from model import AFMmodel = AFM.AttentionalFactorizationMachine(feature_fields = fields, embed_dim = 8, attn_size = 8, dropout = 0.2)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 20s 325us/sample - loss: 0.4267 - auc_3: 0.6965 - val_loss: 0.4119 - val_auc_3: 0.7317
<tensorflow.python.keras.callbacks.History at 0x243ec9d36d8>
from model import DeepFMmodel = DeepFM.DeepFM(feature_fields = fields, embed_dim = 8, mlp_dims = [32,16], dropout=0.2)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 17s 288us/sample - loss: 0.4250 - auc_4: 0.7027 - val_loss: 0.4068 - val_auc_4: 0.7410
<tensorflow.python.keras.callbacks.History at 0x243cc90a438>
from model import xDeepFMmodel = xDeepFM.xDeepFM(feature_fields = fields, embed_dim = 8, mlp_dims = (32, 16),
dropout = 0.3, cross_layer_sizes = (16, 16))model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 20s 340us/sample - loss: 0.4291 - auc_5: 0.6972 - val_loss: 0.4099 - val_auc_5: 0.7386
<tensorflow.python.keras.callbacks.History at 0x243f5982128>
from model import PNNmodel = PNN.PNN(feature_fields=fields, embed_dim=8, mlp_dims=[32,16], dropout=0.2, method='inner')model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 15s 246us/sample - loss: 0.4352 - auc_6: 0.6846 - val_loss: 0.4120 - val_auc_6: 0.7339
<tensorflow.python.keras.callbacks.History at 0x243fb894080>
from model import DCNmodel = DCN.DeepCrossNet(feature_fields=fields, embed_dim=8, num_layers=3, mlp_dims=[32, 16], dropout=0.2)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 14s 241us/sample - loss: 0.4152 - auc_7: 0.7203 - val_loss: 0.4049 - val_auc_7: 0.7458
<tensorflow.python.keras.callbacks.History at 0x243fa8b5b00>
from model import AutoIntmodel = AutoInt.AutoInt(feature_fields=fields, embed_dim=16, head_num=4, attn_layers=3, mlp_dims=(32,16), dropout=0.2)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 29s 477us/sample - loss: 0.4283 - auc_8: 0.7040 - val_loss: 0.4064 - val_auc_8: 0.7437
<tensorflow.python.keras.callbacks.History at 0x243858b2be0>
from model import FiBiNETmodel = FiBiNET.FiBiNET(feature_fields=fields, embed_dim=8, reduction_ratio=2, pooling='mean')model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 75s 1ms/sample - loss: 0.4149 - auc_9: 0.7225 - val_loss: 0.4094 - val_auc_9: 0.7445
<tensorflow.python.keras.callbacks.History at 0x2439817def0>
from model import DCNv2model = DCNv2.DeepCrossNetv2(feature_fields = fields, embed_dim = 16, layer_num = 2,
mlp_dims = (32, 16), dropout = 0.1, cross_method = 'Matrix')model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 39s 645us/sample - loss: 0.4122 - auc_10: 0.7278 - val_loss: 0.4039 - val_auc_10: 0.7517
<tensorflow.python.keras.callbacks.History at 0x24421bdd198>
model = DCNv2.DeepCrossNetv2(feature_fields = fields, embed_dim = 16, layer_num = 2,
mlp_dims = (32, 16), dropout = 0.1, cross_method = 'Mix')model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)Train on 60000 samples, validate on 20000 samples
60000/60000 [==============================] - 30s 507us/sample - loss: 0.4142 - auc_11: 0.7233 - val_loss: 0.4066 - val_auc_11: 0.7464
<tensorflow.python.keras.callbacks.History at 0x24424dac080>
from model import DIFMmodel = DIFM.DIFM(feature_fields=fields, embed_dim=8, head_num=2, dropout=0.1)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)1875/1875 [==============================] - 11s 6ms/step - loss: 0.4154 - auc: 0.7222 - val_loss: 0.4100 - val_auc: 0.7392
<tensorflow.python.keras.callbacks.History at 0x1d2a7f34e20>
from model import AFNmodel = AFN.AFN(feature_fields=fields, embed_size=8, hidden_size=256, dropout=0.1)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)1875/1875 [==============================] - 14s 8ms/step - loss: 0.4203 - auc: 0.7150 - val_loss: 0.4073 - val_auc: 0.7446
<tensorflow.python.keras.callbacks.History at 0x206acfd5940>
from model import ONNmodel = ONN.ONN(feature_fields=fields, embed_dim=8, mlp_dims=[64, 32], dropout=0.1)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=1)1875/1875 [==============================] - 137s 73ms/step - loss: 0.4377 - auc: 0.7004 - val_loss: 0.4053 - val_auc: 0.7478
<tensorflow.python.keras.callbacks.History at 0x24f6a8b6a90>
Deep Interest Net在预测的时候,对用户不同的行为的注意力是不一样的
在生成User embedding的时候,加入了Activation Unit Layer.这一层产生了每个用户行为的权重乘上相应的物品embedding,从而生产了user interest embedding的表示
实际例子: Amazon Book数据 10K
每条数据记录会有用户的行为数据
只保留了商品特征,以及历史上的商品hist的特征.
# 预处理好的数据
# 处理的函数在AmazonDataPreprocress.py中
# 原始数据为.txt文件
data = pd.read_csv('../data/amazon-books-100k-preprocessed.csv', index_col = 0)data
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| hist_cate_0 | hist_cate_1 | hist_cate_2 | hist_cate_3 | hist_cate_4 | hist_cate_5 | hist_cate_6 | hist_cate_7 | hist_cate_8 | hist_cate_9 | ... | hist_cate_32 | hist_cate_33 | hist_cate_34 | hist_cate_35 | hist_cate_36 | hist_cate_37 | hist_cate_38 | hist_cate_39 | cateID | label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 142 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 751 | 0 |
| 1 | 142 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 1 |
| 2 | 142 | 142 | 142 | 142 | 97 | 142 | 142 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1094 | 0 |
| 3 | 142 | 142 | 142 | 142 | 97 | 142 | 142 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 1 |
| 4 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99995 | 142 | 142 | 142 | 142 | 142 | 751 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 1 |
| 99996 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 0 |
| 99997 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 607 | 1 |
| 99998 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 0 |
| 99999 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 1 |
100000 rows × 42 columns
data_X = data.iloc[:,:-1]
data_y = data.label.valuesfields = data_X.max().max()fields1347
tmp_X, test_X, tmp_y, test_y = train_test_split(data_X, data_y, test_size = 0.2, random_state=42, stratify=data_y)
train_X, val_X, train_y, val_y = train_test_split(tmp_X, tmp_y, test_size = 0.25, random_state=42, stratify=tmp_y)from model import DINmodel = DIN.DeepInterestNet(feature_dim=fields, embed_dim=8, mlp_dims=[64,32], dropout=0.2)model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss = 'binary_crossentropy', metrics=[keras.metrics.AUC()])model.fit(train_X.values, train_y, batch_size=32, validation_data=(val_X.values, val_y), epochs=2)Train on 60000 samples, validate on 20000 samples
Epoch 1/2
60000/60000 [==============================] - 14s 235us/sample - loss: 0.6788 - auc: 0.5817 - val_loss: 0.6751 - val_auc: 0.5981
Epoch 2/2
60000/60000 [==============================] - 11s 188us/sample - loss: 0.6687 - auc: 0.6080 - val_loss: 0.6744 - val_auc: 0.5921
<tensorflow.python.keras.callbacks.History at 0x1abfc62fb00>
相比于DIN, DIEN的改动:
1) 关注兴趣的演化过程,提出了兴趣进化网络,用序列模型做的, DIN中用户兴趣之间是相互独立的,但实际上的兴趣是不断进化的
2) 设计了一个兴趣抽取层,加入了一个二分类模型来辅助计算兴趣抽取的准确性
3) 用序列模型表达用户的兴趣动态变化性
实际的数据用例和DIN一样
data = pd.read_csv('../data/amazon-books-100k-preprocessed.csv', index_col=0)data
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| hist_cate_0 | hist_cate_1 | hist_cate_2 | hist_cate_3 | hist_cate_4 | hist_cate_5 | hist_cate_6 | hist_cate_7 | hist_cate_8 | hist_cate_9 | ... | hist_cate_32 | hist_cate_33 | hist_cate_34 | hist_cate_35 | hist_cate_36 | hist_cate_37 | hist_cate_38 | hist_cate_39 | cateID | label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 142 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 751 | 0 |
| 1 | 142 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 1 |
| 2 | 142 | 142 | 142 | 142 | 97 | 142 | 142 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1094 | 0 |
| 3 | 142 | 142 | 142 | 142 | 97 | 142 | 142 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 1 |
| 4 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99995 | 142 | 142 | 142 | 142 | 142 | 751 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 1 |
| 99996 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 0 |
| 99997 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | 142 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 607 | 1 |
| 99998 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 0 |
| 99999 | 142 | 142 | 142 | 142 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 1 |
100000 rows × 42 columns
fields = data.max().max()
data_X = data.iloc[:,:-1]
data_y = data.label.valuesfrom model.DIEN import DeepInterestEvolutionNet, auxiliary_sampletmp_X, test_X, tmp_y, test_y = train_test_split(data_X, data_y, test_size = 0.2, random_state=42, stratify=data_y)
train_X, val_X, train_y, val_y = train_test_split(tmp_X, tmp_y, test_size = 0.25, random_state=42, stratify=tmp_y)train_X_neg = auxiliary_sample(train_X)train_X = train_X.values
val_X = val_X.values
test_X = test_X.valuestrain_loader = tf.data.Dataset.from_tensor_slices((train_X, train_X_neg, train_y)).shuffle(len(train_X)).batch(128)val_loader =tf.data.Dataset.from_tensor_slices((val_X, val_y)).batch(128)model = DeepInterestEvolutionNet(feature_dim=fields, embed_dim=4, mlp_dims=[32,32], dropout=0.2, gru_type = 'GRU')
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)epoches = 3
for epoch in range(epoches):
epoch_train_loss = tf.keras.metrics.Mean()
for batch, (x, neg_x, y) in tqdm(enumerate(train_loader)):
with tf.GradientTape() as tape:
out, aux_loss = model(x, neg_x)
loss = tf.keras.losses.binary_crossentropy(y, out)
loss = tf.reduce_mean(loss) + tf.cast(aux_loss, tf.float32)
loss = tf.reduce_mean(loss)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(grads_and_vars = zip(grads, model.trainable_variables))
epoch_train_loss(loss)
epoch_val_loss = tf.keras.metrics.Mean()
for batch, (x, y) in tqdm(enumerate(val_loader)):
out,_ = model(x)
loss = tf.keras.losses.binary_crossentropy(y, out)
loss = tf.reduce_mean(loss)
epoch_val_loss(loss)
print('EPOCH : %s, train loss : %s, val loss: %s' % (epoch,
epoch_train_loss.result().numpy(),
epoch_val_loss.result().numpy()))469it [01:42, 4.58it/s]
157it [00:11, 14.24it/s]
0it [00:00, ?it/s]
EPOCH : 0, train loss : 1.9061264, val loss: 0.69325197
469it [01:43, 4.55it/s]
157it [00:11, 14.19it/s]
0it [00:00, ?it/s]
EPOCH : 1, train loss : 0.80915856, val loss: 0.6931492
469it [01:42, 4.57it/s]
157it [00:11, 14.26it/s]
EPOCH : 2, train loss : 0.7702951, val loss: 0.693148