1+ from  collections  import  OrderedDict 
12from  itertools  import  chain 
23
3- from  tensorflow .python .keras  import  Input 
44from  tensorflow .python .keras .initializers  import  RandomNormal 
5- from  tensorflow .python .keras .layers  import  Embedding , Dense , Reshape , Concatenate 
5+ from  tensorflow .python .keras .layers  import  Embedding , Dense , Reshape , Concatenate ,  Input ,  add 
66from  tensorflow .python .keras .regularizers  import  l2 
77from  .sequence  import  SequencePoolingLayer 
8- from  .utils  import  get_linear_logit 
98
109
1110def  create_input_dict (feature_dim_dict , prefix = '' ):
12-     sparse_input  =  {feat : Input (shape = (1 ,), name = prefix + 'sparse_'  +  str (i ) +  '-'  +  feat ) for  i , feat  in 
13-                     enumerate (feature_dim_dict ["sparse" ])}
14-     dense_input  =  {feat : Input (shape = (1 ,), name = prefix + 'dense_'  +  str (i ) +  '-'  +  feat ) for  i , feat  in 
15-                    enumerate (feature_dim_dict ["dense" ])}
11+     sparse_input  =  OrderedDict ()
12+     for  i , feat  in  enumerate (feature_dim_dict ["sparse" ]):
13+         sparse_input [feat .name ] =  Input (
14+             shape = (1 ,), name = prefix + 'sparse_'  +  str (i ) +  '-'  +  feat .name )
15+ 
16+     dense_input  =  OrderedDict ()
17+ 
18+     for  i , feat  in  enumerate (feature_dim_dict ["dense" ]):
19+         dense_input [feat ] =  Input (
20+             shape = (1 ,), name = prefix + 'dense_'  +  str (i ) +  '-'  +  feat .name )
21+ 
1622    return  sparse_input , dense_input 
1723
1824
19- def  create_sequence_input_dict (feature_dim_dict ):
25+ def  create_sequence_input_dict (feature_dim_dict ,  mask_zero = True ):
2026
2127    sequence_dim_dict  =  feature_dim_dict .get ('sequence' , [])
2228    sequence_input_dict  =  {feat .name : Input (shape = (feat .maxlen ,), name = 'seq_'  +  str (
2329        i ) +  '-'  +  feat .name ) for  i , feat  in  enumerate (sequence_dim_dict )}
2430    sequence_pooling_dict  =  {feat .name : feat .combiner 
2531                             for  i , feat  in  enumerate (sequence_dim_dict )}
26-     sequence_len_dict  =  {feat .name : Input (shape = (
27-         1 ,), name = 'seq_length' + str (i )+ '-' + feat .name ) for  i , feat  in  enumerate (sequence_dim_dict )}
28-     sequence_max_len_dict  =  {feat .name : feat .maxlen 
29-                              for  i , feat  in  enumerate (sequence_dim_dict )}
32+     if  mask_zero :
33+         sequence_len_dict , sequence_max_len_dict  =  None , None 
34+     else :
35+         sequence_len_dict  =  {feat .name : Input (shape = (
36+             1 ,), name = 'seq_length' + str (i )+ '-' + feat .name ) for  i , feat  in  enumerate (sequence_dim_dict )}
37+         sequence_max_len_dict  =  {feat .name : feat .maxlen 
38+                                  for  i , feat  in  enumerate (sequence_dim_dict )}
3039    return  sequence_input_dict , sequence_pooling_dict , sequence_len_dict , sequence_max_len_dict 
3140
3241
33- def  create_embedding_dict (feature_dim_dict , embedding_size , init_std , seed , l2_reg , prefix = 'sparse' ):
42+ def  create_embedding_dict (feature_dim_dict , embedding_size , init_std , seed , l2_reg , prefix = 'sparse' ,  seq_mask_zero = True ):
3443    if  embedding_size  ==  'auto' :
3544
36-         sparse_embedding  =  {feat : Embedding (feature_dim_dict [ "sparse" ][ feat ] , 6  *  int (pow (feature_dim_dict [ "sparse" ][ feat ] , 0.25 )),
37-                                             embeddings_initializer = RandomNormal (
45+         sparse_embedding  =  {feat . name : Embedding (feat . dimension , 6  *  int (pow (feat . dimension , 0.25 )),
46+                                                   embeddings_initializer = RandomNormal (
3847            mean = 0.0 , stddev = init_std , seed = seed ),
3948            embeddings_regularizer = l2 (l2_reg ),
40-             name = prefix + '_emb_'  +  str (i ) +  '-'  +  feat ) for  i , feat  in 
49+             name = prefix + '_emb_'  +  str (i ) +  '-'  +  feat . name ) for  i , feat  in 
4150            enumerate (feature_dim_dict ["sparse" ])}
4251    else :
4352
44-         sparse_embedding  =  {feat : Embedding (feature_dim_dict ["sparse" ][feat ], embedding_size ,
45-                                             embeddings_initializer = RandomNormal (
46-             mean = 0.0 , stddev = init_std , seed = seed ),
47-             embeddings_regularizer = l2 (l2_reg ),
48-             name = prefix + '_emb_'  +  str (i ) +  '-'  +  feat ) for  i , feat  in 
49-             enumerate (feature_dim_dict ["sparse" ])}
53+         sparse_embedding  =  {feat .name : Embedding (feat .dimension , embedding_size ,
54+                                                  embeddings_initializer = RandomNormal (
55+                                                      mean = 0.0 , stddev = init_std , seed = seed ),
56+                                                  embeddings_regularizer = l2 (
57+                                                      l2_reg ),
58+                                                  name = prefix + '_emb_'  +  str (i ) +  '-'  +  feat .name ) for  i , feat  in 
59+                             enumerate (feature_dim_dict ["sparse" ])}
5060
5161    if  'sequence'  in  feature_dim_dict :
5262        count  =  len (sparse_embedding )
5363        sequence_dim_list  =  feature_dim_dict ['sequence' ]
5464        for  feat  in  sequence_dim_list :
55-             if  feat .name  not  in   sparse_embedding :
56-                  if  embedding_size  ==  "auto" :
57-                      sparse_embedding [feat .name ] =  Embedding (feat .dimension , 6  *  int (pow (feat .dimension , 0.25 )),
58-                                                              embeddings_initializer = RandomNormal (
59-                                                                  mean = 0.0 , stddev = init_std , seed = seed ),
60-                                                              embeddings_regularizer = l2 (
61-                                                                  l2_reg ),
62-                                                              name = prefix  +  '_emb_'  +  str (count ) +  '-'  +  feat .name )
63- 
64-                  else :
65-                      sparse_embedding [feat .name ] =  Embedding (feat .dimension , embedding_size ,
66-                                                              embeddings_initializer = RandomNormal (
67-                                                                  mean = 0.0 , stddev = init_std , seed = seed ),
68-                                                              embeddings_regularizer = l2 (
69-                                                                  l2_reg ),
70-                                                              name = prefix + '_emb_'  +  str (count ) +  '-'  +  feat .name )
71- 
72-                  count  +=  1 
65+             #  if feat.name not in sparse_embedding:
66+             if  embedding_size  ==  "auto" :
67+                 sparse_embedding [feat .name ] =  Embedding (feat .dimension , 6  *  int (pow (feat .dimension , 0.25 )),
68+                                                         embeddings_initializer = RandomNormal (
69+                                                             mean = 0.0 , stddev = init_std , seed = seed ),
70+                                                         embeddings_regularizer = l2 (
71+                                                             l2_reg ),
72+                                                         name = prefix  +  '_emb_'  +  str (count ) +  '-'  +  feat .name ,  mask_zero = seq_mask_zero )
73+ 
74+             else :
75+                 sparse_embedding [feat .name ] =  Embedding (feat .dimension , embedding_size ,
76+                                                         embeddings_initializer = RandomNormal (
77+                                                             mean = 0.0 , stddev = init_std , seed = seed ),
78+                                                         embeddings_regularizer = l2 (
79+                                                             l2_reg ),
80+                                                         name = prefix + '_emb_'  +  str (count ) +  '-'  +  feat .name ,  mask_zero = seq_mask_zero )
81+ 
82+             count  +=  1 
7383
7484    return  sparse_embedding 
7585
@@ -109,7 +119,6 @@ def merge_sequence_input(embedding_dict, embed_list, sequence_input_dict, sequen
109119
110120
111121def  get_embedding_vec_list (embedding_dict , input_dict ):
112- 
113122    return  [embedding_dict [feat ](v )
114123            for  feat , v  in  input_dict .items ()]
115124
@@ -121,12 +130,15 @@ def get_varlen_embedding_vec_dict(embedding_dict, input_dict):
121130
122131
123132def  get_pooling_vec_list (sequence_embed_dict , sequence_len_dict , sequence_max_len_dict , sequence_pooling_dict ):
124-     return  [SequencePoolingLayer (sequence_max_len_dict [feat ], sequence_pooling_dict [feat ])(
125-         [v , sequence_len_dict [feat ]]) for  feat , v  in  sequence_embed_dict .items ()]
133+     if  sequence_max_len_dict  is  None  or  sequence_len_dict  is  None :
134+         return  [SequencePoolingLayer (- 1 , sequence_pooling_dict [feat ])(v ) for  feat , v  in  sequence_embed_dict .items ()]
135+     else :
136+         return  [SequencePoolingLayer (sequence_max_len_dict [feat ], sequence_pooling_dict [feat ])(
137+             [v , sequence_len_dict [feat ]]) for  feat , v  in  sequence_embed_dict .items ()]
126138
127139
128140def  get_inputs_list (inputs ):
129-     return  list (chain (* list (map (lambda  x : x .values (), inputs ))))
141+     return  list (chain (* list (map (lambda  x : x .values (), filter ( lambda   x :  x   is   not   None ,  inputs ) ))))
130142
131143
132144def  get_inputs_embedding (feature_dim_dict , embedding_size , l2_reg_embedding , l2_reg_linear , init_std , seed , include_linear = True ):
@@ -162,3 +174,25 @@ def get_inputs_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_
162174    inputs_list  =  get_inputs_list (
163175        [sparse_input_dict , dense_input_dict , sequence_input_dict , sequence_input_len_dict ])
164176    return  deep_emb_list , linear_logit , inputs_list 
177+ 
178+ 
179+ def  get_linear_logit (linear_term , dense_input_ , l2_reg ):
180+     if  len (linear_term ) >  1 :
181+         linear_term  =  add (linear_term )
182+     elif  len (linear_term ) ==  1 :
183+         linear_term  =  linear_term [0 ]
184+     else :
185+         linear_term  =  None 
186+ 
187+     dense_input  =  list (dense_input_ .values ())
188+     if  len (dense_input ) >  0 :
189+         dense_input__  =  dense_input [0 ] if  len (
190+             dense_input ) ==  1  else  Concatenate ()(dense_input )
191+         linear_dense_logit  =  Dense (
192+             1 , activation = None , use_bias = False , kernel_regularizer = l2 (l2_reg ))(dense_input__ )
193+         if  linear_term  is  not   None :
194+             linear_term  =  add ([linear_dense_logit , linear_term ])
195+         else :
196+             linear_term  =  linear_dense_logit 
197+ 
198+     return  linear_term 
0 commit comments