@@ -17,43 +17,52 @@ and run the following codes.
1717
1818``` python
1919import pandas as pd
20- from sklearn.preprocessing import LabelEncoder,MinMaxScaler
20+ from sklearn.preprocessing import LabelEncoder, MinMaxScaler
21+ from sklearn.model_selection import train_test_split
22+ from sklearn.metrics import log_loss, roc_auc_score
2123from deepctr.models import DeepFM
2224
23-
2425data = pd.read_csv(' ./criteo_sample.txt' )
2526
26- sparse_features = [' C' + str (i) for i in range (1 , 27 )]
27- dense_features = [' I' + str (i) for i in range (1 ,14 )]
27+ sparse_features = [' C' + str (i) for i in range (1 , 27 )]
28+ dense_features = [' I' + str (i) for i in range (1 , 14 )]
2829
2930data[sparse_features] = data[sparse_features].fillna(' -1' , )
3031data[dense_features] = data[dense_features].fillna(0 ,)
31-
3232target = [' label' ]
3333
3434# 1.Label Encoding for sparse features,and do simple Transformation for dense features
3535for feat in sparse_features:
3636 lbe = LabelEncoder()
3737 data[feat] = lbe.fit_transform(data[feat])
38- mms = MinMaxScaler(feature_range = (0 ,1 ))
38+ mms = MinMaxScaler(feature_range = (0 , 1 ))
3939data[dense_features] = mms.fit_transform(data[dense_features])
4040
4141# 2.count #unique features for each sparse field,and record dense feature field name
4242
43- sparse_feature_dict = {feat: data[feat].nunique() for feat in sparse_features}
43+ sparse_feature_dict = {feat: data[feat].nunique()
44+ for feat in sparse_features}
4445dense_feature_list = dense_features
4546
4647# 3.generate input data for model
4748
48- model_input = [data[feat].values for feat in sparse_feature_dict] + [data[feat].values for feat in dense_feature_list]
49-
50- # 4.Define Model,compile and
51-
52-
53- model = DeepFM({" sparse" : sparse_feature_dict, " dense" : dense_feature_list}, final_activation = ' sigmoid' )
54- model.compile(" adam" , " binary_crossentropy" , metrics = [' binary_crossentropy' ], )
55- history = model.fit(model_input, data[target].values,
56- batch_size = 256 , epochs = 1 , verbose = 2 , validation_split = 0.2 ,)
49+ train, test = train_test_split(data, test_size = 0.2 )
50+ train_model_input = [train[feat].values for feat in sparse_feature_dict] + \
51+ [train[feat].values for feat in dense_feature_list]
52+ test_model_input = [test[feat].values for feat in sparse_feature_dict] + \
53+ [test[feat].values for feat in dense_feature_list]
54+
55+ # 4.Define Model,train,predict and evaluate
56+ model = DeepFM({" sparse" : sparse_feature_dict,
57+ " dense" : dense_feature_list}, final_activation = ' sigmoid' )
58+ model.compile(" adam" , " binary_crossentropy" ,
59+ metrics = [' binary_crossentropy' ], )
60+
61+ history = model.fit(train_model_input, train[target].values,
62+ batch_size = 256 , epochs = 10 , verbose = 2 , validation_split = 0.2 , )
63+ pred_ans = model.predict(test_model_input, batch_size = 256 )
64+ print (" test LogLoss" , round (log_loss(test[target].values, pred_ans), 4 ))
65+ print (" test AUC" , round (roc_auc_score(test[target].values, pred_ans), 4 ))
5766```
5867
5968## Regression: Movielens
@@ -70,28 +79,37 @@ This example shows how to use ``DeepFM`` to solve a simple binary regression tas
7079
7180``` python
7281import pandas as pd
73- from sklearn.preprocessing import LabelEncoder,MinMaxScaler
82+ from sklearn.preprocessing import LabelEncoder
83+ from sklearn.model_selection import train_test_split
84+ from sklearn.metrics import mean_squared_error
7485from deepctr.models import DeepFM
7586
76-
7787data = pd.read_csv(" ./movielens_sample.txt" )
78- sparse_features = [ " movie_id" ," user_id" ," gender" ," age" ," occupation" ," zip" ]
88+ sparse_features = [" movie_id" , " user_id" ,
89+ " gender" , " age" , " occupation" , " zip" ]
7990target = [' rating' ]
8091
8192# 1.Label Encoding for sparse features,and do simple Transformation for dense features
8293for feat in sparse_features:
8394 lbe = LabelEncoder()
8495 data[feat] = lbe.fit_transform(data[feat])
85- # 2.count #unique features for each sparse field
86- sparse_feature_dim = {feat:data[feat].nunique() for feat in sparse_features}
87- # 3.generate input data for model
88- model_input = [data[feat].values for feat in sparse_feature_dim]
89- # 4.Define Model,compile and train
90- model = DeepFM({" sparse" :sparse_feature_dim," dense" :[]},final_activation = ' linear' )
91-
92- model.compile(" adam" ," mse" ,metrics = [' mse' ],)
93- history = model.fit(model_input,data[target].values,
94- batch_size = 256 ,epochs = 10 ,verbose = 2 ,validation_split = 0.2 ,)
96+ # 2.count #unique features for each sparse field
97+ sparse_feature_dim = {feat: data[feat].nunique()
98+ for feat in sparse_features}
99+ # 3.generate input data for model
100+ train, test = train_test_split(data, test_size = 0.2 )
101+ train_model_input = [train[feat].values for feat in sparse_feature_dim]
102+ test_model_input = [test[feat].values for feat in sparse_feature_dim]
103+ # 4.Define Model,train,predict and evaluate
104+ model = DeepFM({" sparse" : sparse_feature_dim, " dense" : []},
105+ final_activation = ' linear' )
106+ model.compile(" adam" , " mse" , metrics = [' mse' ],)
107+
108+ history = model.fit(train_model_input, train[target].values,
109+ batch_size = 256 , epochs = 1 , verbose = 2 , validation_split = 0.2 ,)
110+ pred_ans = model.predict(test_model_input, batch_size = 256 )
111+ print (" test MSE" , round (mean_squared_error(
112+ test[target].values, pred_ans), 4 ))
95113```
96114## Multi-value Input : Movielens
97115----------------------------------
0 commit comments