1+ """
2+ Deep Belief Network
3+ author: Ye Hu
4+ 2016/12/20
5+ """
6+ import timeit
7+ import numpy as np
8+ import tensorflow as tf
9+ import input_data
10+ from logisticRegression import LogisticRegression
11+ from mlp import HiddenLayer
12+ from rbm import RBM
13+
14+ class DBN (object ):
15+ """
16+ An implement of deep belief network
17+ The hidden layers are firstly pretrained by RBM, then DBN is treated as a normal
18+ MLP by adding a output layer.
19+ """
20+ def __init__ (self , n_in = 784 , n_out = 10 , hidden_layers_sizes = [500 , 500 ]):
21+ """
22+ :param n_in: int, the dimension of input
23+ :param n_out: int, the dimension of output
24+ :param hidden_layers_sizes: list or tuple, the hidden layer sizes
25+ """
26+ # Number of layers
27+ assert len (hidden_layers_sizes ) > 0
28+ self .n_layers = len (hidden_layers_sizes )
29+ self .layers = [] # normal sigmoid layer
30+ self .rbm_layers = [] # RBM layer
31+ self .params = [] # keep track of params for training
32+
33+ # Define the input and output
34+ self .x = tf .placeholder (tf .float32 , shape = [None , n_in ])
35+ self .y = tf .placeholder (tf .float32 , shape = [None , n_out ])
36+
37+ # Contruct the layers of DBN
38+ for i in range (self .n_layers ):
39+ if i == 0 :
40+ layer_input = self .x
41+ input_size = n_in
42+ else :
43+ layer_input = self .layers [i - 1 ].output
44+ input_size = hidden_layers_sizes [i - 1 ]
45+ # Sigmoid layer
46+ sigmoid_layer = HiddenLayer (inpt = layer_input , n_in = input_size , n_out = hidden_layers_sizes [i ],
47+ activation = tf .nn .sigmoid )
48+ self .layers .append (sigmoid_layer )
49+ # Add the parameters for finetuning
50+ self .params .extend (sigmoid_layer .params )
51+ # Create the RBM layer
52+ self .rbm_layers .append (RBM (inpt = layer_input , n_visiable = input_size , n_hidden = hidden_layers_sizes [i ],
53+ W = sigmoid_layer .W , hbias = sigmoid_layer .b ))
54+ # We use the LogisticRegression layer as the output layer
55+ self .output_layer = LogisticRegression (inpt = self .layers [- 1 ].output , n_in = hidden_layers_sizes [- 1 ],
56+ n_out = n_out )
57+ self .params .extend (self .output_layer .params )
58+ # The finetuning cost
59+ self .cost = self .output_layer .cost (self .y )
60+ # The accuracy
61+ self .accuracy = self .output_layer .accuarcy (self .y )
62+
63+ def pretrain (self , sess , X_train , batch_size = 50 , pretraining_epochs = 10 , lr = 0.1 , k = 1 ,
64+ display_step = 1 ):
65+ """
66+ Pretrain the layers (just train the RBM layers)
67+ :param sess: tf.Session
68+ :param X_train: the input of the train set (You might modidy this function if you do not use the desgined mnist)
69+ :param batch_size: int
70+ :param lr: float
71+ :param k: int, use CD-k
72+ :param pretraining_epoch: int
73+ :param display_step: int
74+ """
75+ print ('Starting pretraining...\n ' )
76+ start_time = timeit .default_timer ()
77+ batch_num = int (X_train .train .num_examples / batch_size )
78+ # Pretrain layer by layer
79+ for i in range (self .n_layers ):
80+ cost = self .rbm_layers [i ].get_reconstruction_cost ()
81+ train_ops = self .rbm_layers [i ].get_train_ops (learning_rate = lr , k = k , persistent = None )
82+ for epoch in range (pretraining_epochs ):
83+ avg_cost = 0.0
84+ for j in range (batch_num ):
85+ x_batch , _ = X_train .train .next_batch (batch_size )
86+ # 训练
87+ sess .run (train_ops , feed_dict = {self .x : x_batch })
88+ # 计算cost
89+ avg_cost += sess .run (cost , feed_dict = {self .x : x_batch ,}) / batch_num
90+ # 输出
91+ if epoch % display_step == 0 :
92+ print ("\t Pretraing layer {0} Epoch {1} cost: {2}" .format (i , epoch , avg_cost ))
93+
94+ end_time = timeit .default_timer ()
95+ print ("\n The pretraining process ran for {0} minutes" .format ((end_time - start_time ) / 60 ))
96+
97+ def finetuning (self , sess , trainSet , training_epochs = 10 , batch_size = 100 , lr = 0.1 ,
98+ display_step = 1 ):
99+ """
100+ Finetuing the network
101+ """
102+ print ("\n Start finetuning...\n " )
103+ start_time = timeit .default_timer ()
104+ train_op = tf .train .GradientDescentOptimizer (learning_rate = lr ).minimize (
105+ self .cost , var_list = self .params )
106+ for epoch in range (training_epochs ):
107+ avg_cost = 0.0
108+ batch_num = int (trainSet .train .num_examples / batch_size )
109+ for i in range (batch_num ):
110+ x_batch , y_batch = trainSet .train .next_batch (batch_size )
111+ # 训练
112+ sess .run (train_op , feed_dict = {self .x : x_batch , self .y : y_batch })
113+ # 计算cost
114+ avg_cost += sess .run (self .cost , feed_dict =
115+ {self .x : x_batch , self .y : y_batch }) / batch_num
116+ # 输出
117+ if epoch % display_step == 0 :
118+ val_acc = sess .run (self .accuracy , feed_dict = {self .x : trainSet .validation .images ,
119+ self .y : trainSet .validation .labels })
120+ print ("\t Epoch {0} cost: {1}, validation accuacy: {2}" .format (epoch , avg_cost , val_acc ))
121+
122+ end_time = timeit .default_timer ()
123+ print ("\n The finetuning process ran for {0} minutes" .format ((end_time - start_time ) / 60 ))
124+
125+ if __name__ == "__main__" :
126+ # mnist examples
127+ mnist = input_data .read_data_sets ("MNIST_data/" , one_hot = True )
128+ dbn = DBN (n_in = 784 , n_out = 10 , hidden_layers_sizes = [500 , 500 , 500 ])
129+ sess = tf .Session ()
130+ init = tf .global_variables_initializer ()
131+ sess .run (init )
132+ # set random_seed
133+ tf .set_random_seed (seed = 1111 )
134+ dbn .pretrain (sess , X_train = mnist )
135+ dbn .finetuning (sess , trainSet = mnist )
0 commit comments