From 3302bf4c92f6b0b5a5a2666886b246efd1113496 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 3 Aug 2018 22:05:05 +0300 Subject: [PATCH 001/180] Added docstring. --- pytorch2keras/layers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index fae2360..4005007 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -5,7 +5,12 @@ def random_string(length): - return ''.join(random.choice(string.ascii_letters) for m in range(length)) + """ + Generate a random string for the layer name. + :param length: a length of required random string + :return: generated random string + """ + return ''.join(random.choice(string.ascii_letters) for _ in range(length)) def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_names): From 79bd571589b1332650a025c06ae23bde91028227 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 3 Aug 2018 22:05:41 +0300 Subject: [PATCH 002/180] Fix PEP8 issues. --- pytorch2keras/layers.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 4005007..62e7e25 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -61,7 +61,6 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name layers[padding_name] = padding_layer(layers[input_name]) input_name = padding_name - weights = None if has_bias: weights = [W, biases] else: @@ -84,7 +83,7 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name name=tf_name ) layers[scope_name] = conv(layers[input_name]) - elif len(weights[weights_name].numpy().shape) == 4: # 2D conv + elif len(weights[weights_name].numpy().shape) == 4: # 2D conv W = weights[weights_name].numpy().transpose(2, 3, 1, 0) height, width, channels, n_filters = W.shape @@ -104,7 +103,6 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name layers[padding_name] = padding_layer(layers[input_name]) input_name = padding_name - weights = None if has_bias: weights = [W, biases] else: @@ -122,7 +120,7 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name name=tf_name ) layers[scope_name] = conv(layers[input_name]) - else: # 1D conv + else: # 1D conv W = weights[weights_name].numpy().transpose(2, 1, 0) width, channels, n_filters = W.shape @@ -141,7 +139,6 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name layers[padding_name] = padding_layer(layers[inputs[0]]) input_name = padding_name - weights = None if has_bias: weights = [W, biases] else: @@ -200,7 +197,6 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, s input_name = inputs[0] - weights = None if has_bias: weights = [W, biases] else: @@ -835,7 +831,6 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, short_n short_names: use short names for keras layers """ print('Converting reshape ...') - if short_names: tf_name = 'RESH' + random_string(4) else: From 733674c121f212f61760c71ee82a424eabbc458a Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 3 Aug 2018 22:06:51 +0300 Subject: [PATCH 003/180] Fixed double-flatten issue. --- pytorch2keras/layers.py | 9 +++++++-- tests/view.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 62e7e25..f46e7dc 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -232,14 +232,19 @@ def convert_flatten(params, w_name, scope_name, inputs, layers, weights, short_n short_names: use short names for keras layers """ print('Conerting reshape ...') + if short_names: tf_name = 'R' + random_string(7) else: tf_name = w_name + str(random.random()) # TODO: check if the input is already flattened - reshape = keras.layers.Flatten(name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) + # Ad-hoc to avoid it: + if len(list(layers[inputs[0]].shape)) == 2: + layers[scope_name] = layers[inputs[0]] + else: + reshape = keras.layers.Flatten(name=tf_name) + layers[scope_name] = reshape(layers[inputs[0]]) def convert_gemm(params, w_name, scope_name, inputs, layers, weights, short_names): diff --git a/tests/view.py b/tests/view.py index 630bdaf..4a60a42 100644 --- a/tests/view.py +++ b/tests/view.py @@ -15,7 +15,7 @@ def __init__(self, inp=10, out=16, kernel_size=3, bias=True): def forward(self, x): x = self.conv2d(x) - x = x.view([x.size(0), -1, 2, 1, 1, 1, 1, 1]) + x = x.view([x.size(0), -1, 2, 1, 1, 1, 1, 1]).view(x.size(0), -1).view(x.size(0), -1) x = torch.nn.Tanh()(x) return x From 79e50f4db9914d74517766087fb2bc71089d3c87 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 5 Aug 2018 12:10:41 +0300 Subject: [PATCH 004/180] Update PyPi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b441ed6..af04917 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.3', + version='0.1.4', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 844ec080029108f5acee452dc9d924d87557eec4 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 5 Aug 2018 14:58:12 +0300 Subject: [PATCH 005/180] Added new features (DepthwiseConv2d), fixed some bugs. --- README.md | 9 ++- pytorch2keras/layers.py | 137 +++++++++++++++++++++++++------------ tests/depthwise_conv2d.py | 56 +++++++++++++++ tests/mobilinet.py | 140 ++++++++++++++++++++++++++++++++++++++ tests/shufflenet.py | 0 5 files changed, 297 insertions(+), 45 deletions(-) create mode 100644 tests/depthwise_conv2d.py create mode 100644 tests/mobilinet.py create mode 100644 tests/shufflenet.py diff --git a/README.md b/README.md index b184683..3869607 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,7 @@ Layers: * Linear * Conv2d +* DepthwiseConv2d (with limited parameters) * Conv3d * ConvTranspose2d * MaxPool2d @@ -96,8 +97,8 @@ Layers: Reshape: * View -* Reshape (only with 0.4) -* Transpose (only with 0.4) +* Reshape +* Transpose Activations: @@ -106,6 +107,7 @@ Activations: * PReLU (only with 0.2) * SELU (only with 0.2) * Tanh +* HardTanh (clamp) * Softmax * Softplus (only with 0.2) * Softsign (only with 0.2) @@ -134,8 +136,9 @@ Misc: * SqueezeNet (with ceil_mode=False) * DenseNet * AlexNet -* Inception (v4 only) +* Inception * SeNet +* Mobilenet v2 ## Usage Look at the `tests` directory. diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index f46e7dc..0e70553 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -2,6 +2,7 @@ import numpy as np import random import string +import tensorflow as tf def random_string(length): @@ -87,39 +88,67 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name W = weights[weights_name].numpy().transpose(2, 3, 1, 0) height, width, channels, n_filters = W.shape - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False + if params['group'] == n_filters: + print('Perform depthwise convolution') - if params['pads'][0] > 0 or params['pads'][1] > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(params['pads'][0], params['pads'][1]), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[input_name]) - input_name = padding_name + if params['pads'][0] > 0 or params['pads'][1] > 0: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding2D( + padding=(params['pads'][0], params['pads'][1]), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[input_name]) + input_name = padding_name + + def target_layer(x): + x = tf.transpose(x, [0, 2, 3, 1]) + + # tensorflow.python.framework.errors_impl.UnimplementedError: + # Depthwise convolution on CPU is only supported for NHWC format + layer = tf.nn.depthwise_conv2d(x, W.transpose(0, 1, 3, 2), + strides=(1, params['strides'][0], params['strides'][1], 1), + padding='VALID', rate=[1, 1]) + layer = tf.transpose(layer, [0, 3, 1, 2]) + return layer + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[input_name]) - if has_bias: - weights = [W, biases] else: - weights = [W] - conv = keras.layers.Conv2D( - filters=n_filters, - kernel_size=(height, width), - strides=(params['strides'][0], params['strides'][1]), - padding='valid', - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - name=tf_name - ) - layers[scope_name] = conv(layers[input_name]) + if bias_name in weights: + biases = weights[bias_name].numpy() + has_bias = True + else: + biases = None + has_bias = False + + if params['pads'][0] > 0 or params['pads'][1] > 0: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding2D( + padding=(params['pads'][0], params['pads'][1]), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[input_name]) + input_name = padding_name + + if has_bias: + weights = [W, biases] + else: + weights = [W] + + conv = keras.layers.Conv2D( + filters=n_filters, + kernel_size=(height, width), + strides=(params['strides'][0], params['strides'][1]), + padding='valid', + weights=weights, + use_bias=has_bias, + activation=None, + dilation_rate=params['dilations'][0], + name=tf_name + ) + layers[scope_name] = conv(layers[input_name]) else: # 1D conv W = weights[weights_name].numpy().transpose(2, 1, 0) width, channels, n_filters = W.shape @@ -231,20 +260,15 @@ def convert_flatten(params, w_name, scope_name, inputs, layers, weights, short_n weights: pytorch state_dict short_names: use short names for keras layers """ - print('Conerting reshape ...') + print('Converting flatten ...') if short_names: tf_name = 'R' + random_string(7) else: tf_name = w_name + str(random.random()) - # TODO: check if the input is already flattened - # Ad-hoc to avoid it: - if len(list(layers[inputs[0]].shape)) == 2: - layers[scope_name] = layers[inputs[0]] - else: - reshape = keras.layers.Flatten(name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) + reshape = keras.layers.Reshape([-1], name=tf_name) + layers[scope_name] = reshape(layers[inputs[0]]) def convert_gemm(params, w_name, scope_name, inputs, layers, weights, short_names): @@ -282,7 +306,7 @@ def convert_gemm(params, w_name, scope_name, inputs, layers, weights, short_name dense = keras.layers.Dense( output_channels, - weights=keras_weights, use_bias=has_bias, name=tf_name + weights=keras_weights, use_bias=has_bias, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', ) layers[scope_name] = dense(layers[inputs[0]]) @@ -771,6 +795,28 @@ def convert_tanh(params, w_name, scope_name, inputs, layers, weights, short_name layers[scope_name] = tanh(layers[inputs[0]]) +def convert_hardtanh(params, w_name, scope_name, inputs, layers, weights, short_names): + """ + Convert hardtanh layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + short_names: use short names for keras layers + """ + print('Converting hardtanh (clip) ...') + + def target_layer(x, max_val=float(params['max_val']), min_val=float(params['min_val'])): + return tf.minimum(max_val, tf.maximum(min_val, x)) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + def convert_selu(params, w_name, scope_name, inputs, layers, weights, short_names): """ Convert selu layer. @@ -812,7 +858,10 @@ def convert_transpose(params, w_name, scope_name, inputs, layers, weights, short if params['perm'][0] != 0: # raise AssertionError('Cannot permute batch dimension') print('!!! Cannot permute batch dimension. Result may be wrong !!!') - layers[scope_name] = layers[inputs[0]] + try: + layers[scope_name] = layers[inputs[0]] + except: + pass else: if short_names: tf_name = 'PERM' + random_string(4) @@ -841,7 +890,11 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, short_n else: tf_name = w_name + str(random.random()) + print(layers[inputs[1]]) if len(inputs) > 1: + if layers[inputs[1]][0] == -1: + print('Cannot deduct batch size! It will be omitted, but result may be wrong.') + reshape = keras.layers.Reshape(layers[inputs[1]][1:], name=tf_name) layers[scope_name] = reshape(layers[inputs[0]]) else: @@ -879,7 +932,7 @@ def convert_matmul(params, w_name, scope_name, inputs, layers, weights, short_na dense = keras.layers.Dense( output_channels, - weights=keras_weights, use_bias=False, name=tf_name + weights=keras_weights, use_bias=False, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', ) layers[scope_name] = dense(layers[inputs[0]]) elif len(inputs) == 2: @@ -892,7 +945,7 @@ def convert_matmul(params, w_name, scope_name, inputs, layers, weights, short_na dense = keras.layers.Dense( output_channels, - weights=keras_weights, use_bias=False, name=tf_name + weights=keras_weights, use_bias=False, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', ) layers[scope_name] = dense(layers[inputs[0]]) else: @@ -1047,7 +1100,6 @@ def convert_padding(params, w_name, scope_name, inputs, layers, weights, short_n layers[scope_name] = padding_layer(layers[inputs[0]]) - def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weights, short_names): """ Convert adaptive_avg_pool2d layer. @@ -1099,6 +1151,7 @@ def target_layer(x): 'onnx::Sigmoid': convert_sigmoid, 'onnx::Softmax': convert_softmax, 'onnx::Tanh': convert_tanh, + 'aten::hardtanh': convert_hardtanh, 'onnx::Selu': convert_selu, 'onnx::Transpose': convert_transpose, 'onnx::Reshape': convert_reshape, diff --git a/tests/depthwise_conv2d.py b/tests/depthwise_conv2d.py new file mode 100644 index 0000000..2d75566 --- /dev/null +++ b/tests/depthwise_conv2d.py @@ -0,0 +1,56 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +def depthwise_conv3x3(channels, + stride): + return nn.Conv2d( + in_channels=channels, + out_channels=channels, + kernel_size=3, + stride=stride, + padding=1, + groups=channels, + bias=False) + + +class TestConv2d(nn.Module): + """Module for Conv2d conversion testing + """ + + def __init__(self, inp=10, stride=1): + super(TestConv2d, self).__init__() + self.conv2d_dw = depthwise_conv3x3(inp, stride) + + def forward(self, x): + x = self.conv2d_dw(x) + return x + + +if __name__ == '__main__': + max_error = 0 + for i in range(100): + kernel_size = np.random.randint(1, 7) + inp = np.random.randint(kernel_size + 1, 100) + stride = np.random.randint(1, 3) + + model = TestConv2d(inp, stride) + + input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/mobilinet.py b/tests/mobilinet.py new file mode 100644 index 0000000..18a6bb9 --- /dev/null +++ b/tests/mobilinet.py @@ -0,0 +1,140 @@ +import numpy as np +import torch +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras +import torch.nn as nn +import math + + +def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +def conv_1x1_bn(inp, oup): + return nn.Sequential( + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + self.use_res_connect = self.stride == 1 and inp == oup + + self.conv = nn.Sequential( + # pw + nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False), + nn.BatchNorm2d(inp * expand_ratio), + nn.ReLU6(inplace=True), + # dw + nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3, stride, 1, groups=inp * expand_ratio, bias=False), + nn.BatchNorm2d(inp * expand_ratio), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, n_class=1000, input_size=224, width_mult=1.): + super(MobileNetV2, self).__init__() + # setting of inverted residual blocks + self.interverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # building first layer + assert input_size % 32 == 0 + input_channel = int(32 * width_mult) + self.last_channel = int(1280 * width_mult) if width_mult > 1.0 else 1280 + self.features = [conv_bn(3, input_channel, 2)] + # building inverted residual blocks + for t, c, n, s in self.interverted_residual_setting: + output_channel = int(c * width_mult) + for i in range(n): + if i == 0: + self.features.append(InvertedResidual(input_channel, output_channel, s, t)) + else: + self.features.append(InvertedResidual(input_channel, output_channel, 1, t)) + input_channel = output_channel + # building last several layers + self.features.append(conv_1x1_bn(input_channel, self.last_channel)) + self.features.append(nn.AvgPool2d(input_size//32)) + # make it nn.Sequential + self.features = nn.Sequential(*self.features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(self.last_channel, n_class), + ) + + # self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = x.view(-1, self.last_channel) + x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + n = m.weight.size(1) + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = MobileNetV2() + for m in model.modules(): + m.training = False + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/shufflenet.py b/tests/shufflenet.py new file mode 100644 index 0000000..e69de29 From 523d750af8c9e2ecf2f88639e4408c04a83dec5d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 5 Aug 2018 15:02:35 +0300 Subject: [PATCH 006/180] Hotfixed Flatten layer. --- pytorch2keras/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 0e70553..eb98e30 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -267,7 +267,7 @@ def convert_flatten(params, w_name, scope_name, inputs, layers, weights, short_n else: tf_name = w_name + str(random.random()) - reshape = keras.layers.Reshape([-1], name=tf_name) + reshape = keras.layers.Flatten(name=tf_name) layers[scope_name] = reshape(layers[inputs[0]]) From 6fdc8c9f7c673554f8a7ea685c461fd44199c520 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 6 Aug 2018 00:19:26 +0300 Subject: [PATCH 007/180] Added grouped convolution support. Added Channel Shuffle test. --- pytorch2keras/layers.py | 31 ++++++++++++++++++++ tests/channel_shuffle.py | 62 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 tests/channel_shuffle.py diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index eb98e30..5fe48bb 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -81,6 +81,7 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name use_bias=has_bias, activation=None, dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', name=tf_name ) layers[scope_name] = conv(layers[input_name]) @@ -113,6 +114,33 @@ def target_layer(x): lambda_layer = keras.layers.Lambda(target_layer) layers[scope_name] = lambda_layer(layers[input_name]) + elif params['group'] != 1: + # Example from https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html + # # Split input and weights and convolve them separately + # input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) + # weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights) + # output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] + + # # Concat the convolved output together again + # conv = tf.concat(axis=3, values=output_groups) + def target_layer(x, groups=params['group'], stride_y=params['strides'][0], stride_x=params['strides'][1]): + x = tf.transpose(x, [0, 2, 3, 1]) + + convolve = lambda i, k: tf.nn.conv2d(i, k, + strides=[1, stride_y, stride_x, 1], + padding='VALID') + + input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) + weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W.transpose(0, 1, 2, 3)) + output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] + + layer = tf.concat(axis=3, values=output_groups) + + layer = tf.transpose(layer, [0, 3, 1, 2]) + return layer + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[input_name]) else: @@ -146,6 +174,7 @@ def target_layer(x): use_bias=has_bias, activation=None, dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', name=tf_name ) layers[scope_name] = conv(layers[input_name]) @@ -182,6 +211,7 @@ def target_layer(x): use_bias=has_bias, activation=None, dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', name=tf_name ) layers[scope_name] = conv(layers[input_name]) @@ -240,6 +270,7 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, s use_bias=has_bias, activation=None, dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', name=tf_name ) layers[scope_name] = conv(layers[input_name]) diff --git a/tests/channel_shuffle.py b/tests/channel_shuffle.py new file mode 100644 index 0000000..52b2900 --- /dev/null +++ b/tests/channel_shuffle.py @@ -0,0 +1,62 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +def channel_shuffle(x, groups): + """Channel Shuffle operation from ShuffleNet [arxiv: 1707.01083] + Arguments: + x (Tensor): tensor to shuffle. + groups (int): groups to be split + """ + batch, channels, height, width = x.size() + #assert (channels % groups == 0) + channels_per_group = channels // groups + x = x.view(batch, groups, channels_per_group, height, width) + x = torch.transpose(x, 1, 2).contiguous() + x = x.view(batch, channels, height, width) + return x + + +class TestChannelShuffle2d(nn.Module): + """Module for Channel shuffle conversion testing + """ + + def __init__(self, inp=10, out=16, groups=32): + super(TestChannelShuffle2d, self).__init__() + self.groups = groups + self.conv2d = nn.Conv2d(inp, out, kernel_size=3, bias=False) + + def forward(self, x): + x = self.conv2d(x) + x = channel_shuffle(x, self.groups) + return x + + +if __name__ == '__main__': + max_error = 0 + for i in range(100): + groups = np.random.randint(1, 32) + inp = np.random.randint(3, 32) + out = np.random.randint(3, 32) * groups + + model = TestChannelShuffle2d(inp, out, groups) + + input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) + From e2e4c0ae6d66fef148c1a80ee87677e55f679962 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 6 Aug 2018 00:20:53 +0300 Subject: [PATCH 008/180] Added group Conv2d test. --- tests/group_conv2d.py | 55 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/group_conv2d.py diff --git a/tests/group_conv2d.py b/tests/group_conv2d.py new file mode 100644 index 0000000..92a47ab --- /dev/null +++ b/tests/group_conv2d.py @@ -0,0 +1,55 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +def group_conv1x1(in_channels, + out_channels, + groups): + return nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + groups=groups, + bias=False) + + +class TestGroupConv2d(nn.Module): + """Module for Conv2d conversion testing + """ + + def __init__(self, inp=10, groups=1): + super(TestGroupConv2d, self).__init__() + self.conv2d_group = group_conv1x1(inp, inp, groups) + + def forward(self, x): + x = self.conv2d_group(x) + return x + + +if __name__ == '__main__': + max_error = 0 + for i in range(100): + kernel_size = np.random.randint(1, 7) + groups = np.random.randint(1, 10) + inp = np.random.randint(kernel_size + 1, 10) * groups + h, w = 32, 32 + model = TestGroupConv2d(inp, groups) + + input_np = np.random.uniform(0, 1, (1, inp, h, w)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp, h, w,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 7e68933da19456add534ad4775da1e1f19b06c8f Mon Sep 17 00:00:00 2001 From: pkdogcom Date: Mon, 6 Aug 2018 23:20:30 +0800 Subject: [PATCH 009/180] Fixed numpy array cannot be converted to keras symbolic tensor error; Fixed namespace error; Fixed numpy array cannot be serialized error --- pytorch2keras/layers.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index eb98e30..cf28492 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -643,7 +643,8 @@ def convert_sum( print('Converting Sum ...') def target_layer(x): - return keras.backend.sum(x) + import keras.backend as K + return K.sum(x) lambda_layer = keras.layers.Lambda(target_layer) layers[scope_name] = lambda_layer(layers[inputs[0]]) @@ -1002,10 +1003,11 @@ def convert_reduce_sum(params, w_name, scope_name, inputs, layers, weights, shor print('Converting reduce_sum ...') keepdims = params['keepdims'] > 0 - axis = np.array(params['axes']) + axis = params['axes'] def target_layer(x, keepdims=keepdims, axis=axis): - return keras.backend.sum(x, keepdims=keepdims, axis=axis) + import keras.backend as K + return K.sum(x, keepdims=keepdims, axis=axis) lambda_layer = keras.layers.Lambda(target_layer) layers[scope_name] = lambda_layer(layers[inputs[0]]) @@ -1026,12 +1028,15 @@ def convert_constant(params, w_name, scope_name, inputs, layers, weights, short_ """ print('Converting constant ...') - # def target_layer(x, params=params): - # return keras.backend.constant(np.float32(params['value'])) + params_list = params['value'].numpy().tolist() - # lambda_layer = keras.layers.Lambda(target_layer) - # layers[scope_name] = lambda_layer(layers[inputs[0]]) - layers[scope_name] = np.float32(params['value']) + def target_layer(x): + import keras.backend as K + return K.constant(params_list) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py + # layers[scope_name] = params['value'] def convert_upsample(params, w_name, scope_name, inputs, layers, weights, short_names): From fe892f10873db4450d6f2438ec742fba36413364 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 6 Aug 2018 21:34:25 +0300 Subject: [PATCH 010/180] Updated readme. Fixed flatten-related problem. --- README.md | 1 - pytorch2keras/layers.py | 2 +- tests/menet.py | 365 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 366 insertions(+), 2 deletions(-) create mode 100644 tests/menet.py diff --git a/README.md b/README.md index 3869607..3a7235d 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,6 @@ Misc: ## Unsupported parameters * Pooling: count_include_pad, dilation, ceil_mode -* Convolution: group ## Models converted with pytorch2keras diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index bfbdd83..044db78 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -298,7 +298,7 @@ def convert_flatten(params, w_name, scope_name, inputs, layers, weights, short_n else: tf_name = w_name + str(random.random()) - reshape = keras.layers.Flatten(name=tf_name) + reshape = keras.layers.Reshape([-1], name=tf_name) layers[scope_name] = reshape(layers[inputs[0]]) diff --git a/tests/menet.py b/tests/menet.py new file mode 100644 index 0000000..30ffb61 --- /dev/null +++ b/tests/menet.py @@ -0,0 +1,365 @@ +import numpy as np +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + +""" + MENet, implemented in PyTorch. + Original paper: 'Merging and Evolution: Improving Convolutional Neural Networks for Mobile Applications' +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.nn.init as init + +# 0.034489512 + + + +def depthwise_conv3x3(channels, + stride): + return nn.Conv2d( + in_channels=channels, + out_channels=channels, + kernel_size=3, + stride=stride, + padding=1, + groups=channels, + bias=False) + + +def group_conv1x1(in_channels, + out_channels, + groups): + return nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + groups=groups, + bias=False) + +def channel_shuffle(x, + groups): + """Channel Shuffle operation from ShuffleNet [arxiv: 1707.01083] + Arguments: + x (Tensor): tensor to shuffle. + groups (int): groups to be split + """ + batch, channels, height, width = x.size() + #assert (channels % groups == 0) + channels_per_group = channels // groups + x = x.view(batch, groups, channels_per_group, height, width) + x = torch.transpose(x, 1, 2).contiguous() + x = x.view(batch, channels, height, width) + return x + + +class ChannelShuffle(nn.Module): + + def __init__(self, + channels, + groups): + super(ChannelShuffle, self).__init__() + #assert (channels % groups == 0) + if channels % groups != 0: + raise ValueError('channels must be divisible by groups') + self.groups = groups + + def forward(self, x): + return channel_shuffle(x, self.groups) + +class ShuffleInitBlock(nn.Module): + + def __init__(self, + in_channels, + out_channels): + super(ShuffleInitBlock, self).__init__() + + self.conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False) + self.bn = nn.BatchNorm2d(num_features=out_channels) + self.activ = nn.ReLU(inplace=True) + self.pool = nn.MaxPool2d( + kernel_size=3, + stride=2, + padding=1) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.activ(x) + x = self.pool(x) + return x + + + +def conv1x1(in_channels, + out_channels): + return nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + bias=False) + + +def conv3x3(in_channels, + out_channels, + stride): + return nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=False) + + +class MEModule(nn.Module): + + def __init__(self, + in_channels, + out_channels, + side_channels, + groups, + downsample, + ignore_group): + super(MEModule, self).__init__() + self.downsample = downsample + mid_channels = out_channels // 4 + + if downsample: + out_channels -= in_channels + + # residual branch + self.compress_conv1 = group_conv1x1( + in_channels=in_channels, + out_channels=mid_channels, + groups=(1 if ignore_group else groups)) + self.compress_bn1 = nn.BatchNorm2d(num_features=mid_channels) + self.c_shuffle = ChannelShuffle( + channels=mid_channels, + groups=(1 if ignore_group else groups)) + self.dw_conv2 = depthwise_conv3x3( + channels=mid_channels, + stride=(2 if self.downsample else 1)) + self.dw_bn2 = nn.BatchNorm2d(num_features=mid_channels) + self.expand_conv3 = group_conv1x1( + in_channels=mid_channels, + out_channels=out_channels, + groups=groups) + self.expand_bn3 = nn.BatchNorm2d(num_features=out_channels) + if downsample: + self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1) + self.activ = nn.ReLU(inplace=True) + + # fusion branch + self.s_merge_conv = conv1x1( + in_channels=mid_channels, + out_channels=side_channels) + self.s_merge_bn = nn.BatchNorm2d(num_features=side_channels) + self.s_conv = conv3x3( + in_channels=side_channels, + out_channels=side_channels, + stride=(2 if self.downsample else 1)) + self.s_conv_bn = nn.BatchNorm2d(num_features=side_channels) + self.s_evolve_conv = conv1x1( + in_channels=side_channels, + out_channels=mid_channels) + self.s_evolve_bn = nn.BatchNorm2d(num_features=mid_channels) + + def forward(self, x): + identity = x + # pointwise group convolution 1 + x = self.activ(self.compress_bn1(self.compress_conv1(x))) + x = self.c_shuffle(x) + # merging + y = self.s_merge_conv(x) + y = self.s_merge_bn(y) + y = self.activ(y) + # depthwise convolution (bottleneck) + x = self.dw_bn2(self.dw_conv2(x)) + # evolution + y = self.s_conv(y) + y = self.s_conv_bn(y) + y = self.activ(y) + y = self.s_evolve_conv(y) + y = self.s_evolve_bn(y) + y = F.sigmoid(y) + x = x * y + # pointwise group convolution 2 + x = self.expand_bn3(self.expand_conv3(x)) + # identity branch + if self.downsample: + identity = self.avgpool(identity) + x = torch.cat((x, identity), dim=1) + else: + x = x + identity + x = self.activ(x) + return x + + +class MENet(nn.Module): + + def __init__(self, + block_channels, + side_channels, + groups, + num_classes=1000): + super(MENet, self).__init__() + input_channels = 3 + block_layers = [4, 8, 4] + + self.features = nn.Sequential() + self.features.add_module("init_block", ShuffleInitBlock( + in_channels=input_channels, + out_channels=block_channels[0])) + + for i in range(len(block_channels) - 1): + stage = nn.Sequential() + in_channels_i = block_channels[i] + out_channels_i = block_channels[i + 1] + for j in range(block_layers[i]): + stage.add_module("unit_{}".format(j + 1), MEModule( + in_channels=(in_channels_i if j == 0 else out_channels_i), + out_channels=out_channels_i, + side_channels=side_channels, + groups=groups, + downsample=(j == 0), + ignore_group=(i == 0 and j == 0))) + self.features.add_module("stage_{}".format(i + 1), stage) + + self.features.add_module('final_pool', nn.AvgPool2d(kernel_size=7)) + + self.output = nn.Linear( + in_features=block_channels[-1], + out_features=num_classes) + + self._init_params() + + def _init_params(self): + for name, module in self.named_modules(): + if isinstance(module, nn.Conv2d): + init.kaiming_uniform_(module.weight) + if module.bias is not None: + init.constant_(module.bias, 0) + + def forward(self, x): + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.output(x) + return x + + +def get_menet(first_block_channels, + side_channels, + groups, + pretrained=False, + **kwargs): + if first_block_channels == 108: + block_channels = [12, 108, 216, 432] + elif first_block_channels == 128: + block_channels = [12, 128, 256, 512] + elif first_block_channels == 160: + block_channels = [16, 160, 320, 640] + elif first_block_channels == 228: + block_channels = [24, 228, 456, 912] + elif first_block_channels == 256: + block_channels = [24, 256, 512, 1024] + elif first_block_channels == 348: + block_channels = [24, 348, 696, 1392] + elif first_block_channels == 352: + block_channels = [24, 352, 704, 1408] + elif first_block_channels == 456: + block_channels = [48, 456, 912, 1824] + else: + raise ValueError("The {} of `first_block_channels` is not supported".format(first_block_channels)) + + if pretrained: + raise ValueError("Pretrained model is not supported") + + net = MENet( + block_channels=block_channels, + side_channels=side_channels, + groups=groups, + **kwargs) + return net + + +def menet108_8x1_g3(**kwargs): + return get_menet(108, 8, 3, **kwargs) + + +def menet128_8x1_g4(**kwargs): + return get_menet(128, 8, 4, **kwargs) + + +def menet160_8x1_g8(**kwargs): + return get_menet(160, 8, 8, **kwargs) + + +def menet228_12x1_g3(**kwargs): + return get_menet(228, 12, 3, **kwargs) + + +def menet256_12x1_g4(**kwargs): + return get_menet(256, 12, 4, **kwargs) + + +def menet348_12x1_g3(**kwargs): + return get_menet(348, 12, 3, **kwargs) + + +def menet352_12x1_g8(**kwargs): + return get_menet(352, 12, 8, **kwargs) + + +def menet456_24x1_g3(**kwargs): + return get_menet(456, 24, 3, **kwargs) + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = menet228_12x1_g3() + for m in model.modules(): + m.training = False + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) +# +# +# if __name__ == "__main__": +# import numpy as np +# import torch +# from torch.autograd import Variable +# net = menet228_12x1_g3(num_classes=1000) +# input = Variable(torch.randn(1, 3, 224, 224)) +# output = net(input) +# #print(output.size()) +# #print("net={}".format(net)) +# +# net.train() +# net_params = filter(lambda p: p.requires_grad, net.parameters()) +# weight_count = 0 +# for param in net_params: +# weight_count += np.prod(param.size()) +# print("weight_count={}".format(weight_count)) +# From bbdfdfeae6fde40623ac7672fa705f0b97251c21 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 7 Aug 2018 09:58:03 +0300 Subject: [PATCH 011/180] Added slicing support. --- pytorch2keras/layers.py | 62 +++++++++++++++++++++++++++++++++++++++++ tests/slice.py | 44 +++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 tests/slice.py diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 044db78..65e4691 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1166,6 +1166,66 @@ def target_layer(x): layers[scope_name] = lambda_layer(layers_global_pool) +def convert_slice(params, w_name, scope_name, inputs, layers, weights, short_names): + """ + Convert slice operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + short_names: use short names for keras layers + """ + print('Converting slice ...') + + if len(params['axes']) > 1: + raise AssertionError('Cannot convert slice by multiple dimensions') + + if params['axes'][0] not in [0, 1, 2, 3]: + raise AssertionError('Slice by dimension more than 3 or less than 0 is not supported') + + def target_layer(x, axis=int(params['axes'][0]), start=int(params['starts'][0]), end=int(params['ends'][0])): + if axis == 0: + return x[start:end] + elif axis == 1: + return x[:, start:end] + elif axis == 2: + return x[:, :, start:end] + elif axis == 3: + return x[:, :, :, start:end] + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, short_names): + """ + Convert squeeze operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + short_names: use short names for keras layers + """ + print('Converting squeeze ...') + + if len(params['axes']) > 1: + raise AssertionError('Cannot convert squeeze by multiple dimensions') + + def target_layer(x, axis=int(params['axes'][0])): + return tf.squeeze(x, axis=axis) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + AVAILABLE_CONVERTERS = { 'onnx::Conv': convert_conv, 'onnx::ConvTranspose': convert_convtranspose, @@ -1198,4 +1258,6 @@ def target_layer(x): 'onnx::Upsample': convert_upsample, 'onnx::Pad': convert_padding, 'aten::adaptive_avg_pool2d': convert_adaptive_avg_pool2d, + 'onnx::Slice': convert_slice, + 'onnx::Squeeze': convert_squeeze, } diff --git a/tests/slice.py b/tests/slice.py new file mode 100644 index 0000000..1dbcfe8 --- /dev/null +++ b/tests/slice.py @@ -0,0 +1,44 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class TestSlice(nn.Module): + """Module for Slicings conversion testing + """ + + def __init__(self, inp=10, out=16, kernel_size=3, bias=True): + super(TestSlice, self).__init__() + self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) + + def forward(self, x): + x = self.conv2d(x) + return x[:, 0, :, :] + + +if __name__ == '__main__': + max_error = 0 + for i in range(100): + kernel_size = np.random.randint(1, 7) + inp = np.random.randint(kernel_size + 1, 100) + out = np.random.randint(1, 100) + + model = TestSlice(inp, out, kernel_size, inp % 2) + + input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 17f98171b23def119e291e5cdcce3e6d3e09016f Mon Sep 17 00:00:00 2001 From: Sean Micklethwaite Date: Tue, 7 Aug 2018 20:28:54 +0100 Subject: [PATCH 012/180] fix: models reusing inputs from previous layers --- pytorch2keras/converter.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 65cf333..c2ef653 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -131,6 +131,7 @@ def pytorch_to_keras( outputs = [] input_index = 0 + model_inputs = dict() for node in nodes: node_inputs = list(node.inputs()) node_input_names = [] @@ -139,8 +140,13 @@ def pytorch_to_keras( node_input_names.append(get_node_id(node_input.node())) if len(node_input_names) == 0: - node_input_names.append('input{0}'.format(input_index)) - input_index += 1 + if node_inputs[0] in model_inputs: + node_input_names.append(model_inputs[node_inputs[0]]) + else: + input_name = 'input{0}'.format(input_index) + node_input_names.append(input_name) + input_index += 1 + model_inputs[node_inputs[0]] = input_name node_type = node.kind() # print(dir(node)) From ddaf87bb6e5854f74e3b2a796bf1e8f350ceeb1f Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 9 Aug 2018 00:42:10 +0300 Subject: [PATCH 013/180] Updated Readme and Requirements. --- README.md | 6 +++--- requirements.txt | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3a7235d..489c7d7 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ pip install pytorch2keras ## Important notice -In that moment the only PyTorch 0.2 (deprecated) and PyTorch 0.4 (latest stable) are supported. +In that moment the only PyTorch 0.2 (deprecated) and PyTorch 0.4.0 (latest stable) are supported. To use the converter properly, please, make changes in your `~/.keras/keras.json`: @@ -24,7 +24,7 @@ To use the converter properly, please, make changes in your `~/.keras/keras.json ... ``` -From the latest releases, multiple inputs is also supported. +The latest version of PyTorch (0.4.1) isn't supported yet. ## Tensorflow.js @@ -83,7 +83,7 @@ That's all! If all is ok, the Keras model is stores into the `k_model` variable. Layers: * Linear -* Conv2d +* Conv2d (also with groups) * DepthwiseConv2d (with limited parameters) * Conv3d * ConvTranspose2d diff --git a/requirements.txt b/requirements.txt index 068d5df..c983c5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ keras -numpy \ No newline at end of file +tensorflow +numpy +torch==0.4 +torchvision \ No newline at end of file From 3742dda9593c42f75209c97a536b240860a43a2d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 17 Aug 2018 22:45:51 +0300 Subject: [PATCH 014/180] Added PreResNet model. Fixed MaxPool2d aten operator conversion. --- pytorch2keras/layers.py | 15 +- tests/preresnet18.py | 739 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 747 insertions(+), 7 deletions(-) create mode 100644 tests/preresnet18.py diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 65e4691..2547859 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1059,15 +1059,15 @@ def convert_constant(params, w_name, scope_name, inputs, layers, weights, short_ """ print('Converting constant ...') - params_list = params['value'].numpy().tolist() + # params_list = params['value'].numpy().tolist() - def target_layer(x): - import keras.backend as K - return K.constant(params_list) + # def target_layer(x): + # import keras.backend as K + # return K.constant(params_list) - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py - # layers[scope_name] = params['value'] + # lambda_layer = keras.layers.Lambda(target_layer) + # layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py + layers[scope_name] = params['value'].tolist() def convert_upsample(params, w_name, scope_name, inputs, layers, weights, short_names): @@ -1234,6 +1234,7 @@ def target_layer(x, axis=int(params['axes'][0])): 'onnx::MaxPool': convert_maxpool, 'max_pool2d': convert_maxpool, 'aten::max_pool3d': convert_maxpool3, + 'aten::max_pool2d': convert_maxpool, 'onnx::AveragePool': convert_avgpool, 'onnx::Dropout': convert_dropout, 'onnx::BatchNormalization': convert_batchnorm, diff --git a/tests/preresnet18.py b/tests/preresnet18.py new file mode 100644 index 0000000..d9b0373 --- /dev/null +++ b/tests/preresnet18.py @@ -0,0 +1,739 @@ +""" +Model from https://github.com/osmr/imgclsmob/tree/master/pytorch/models +""" + +import numpy as np +import torch +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras +import torchvision + +import os +import torch.nn as nn +import torch.nn.init as init + + +class PreResConv(nn.Module): + """ + PreResNet specific convolution block, with pre-activation. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + kernel_size : int or tuple/list of 2 int + Convolution window size. + stride : int or tuple/list of 2 int + Strides of the convolution. + padding : int or tuple/list of 2 int + Padding value for convolution layer. + """ + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding): + super(PreResConv, self).__init__() + self.bn = nn.BatchNorm2d(num_features=in_channels) + self.activ = nn.ReLU(inplace=True) + self.conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=False) + + def forward(self, x): + x = self.bn(x) + x = self.activ(x) + x_pre_activ = x + x = self.conv(x) + return x, x_pre_activ + + +def conv1x1(in_channels, + out_channels, + stride): + """ + Convolution 1x1 layer. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + stride : int or tuple/list of 2 int + Strides of the convolution. + """ + return nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=stride, + padding=0, + bias=False) + + +def preres_conv1x1(in_channels, + out_channels, + stride): + """ + 1x1 version of the PreResNet specific convolution block. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + stride : int or tuple/list of 2 int + Strides of the convolution. + """ + return PreResConv( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=stride, + padding=0) + + +def preres_conv3x3(in_channels, + out_channels, + stride): + """ + 3x3 version of the PreResNet specific convolution block. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + stride : int or tuple/list of 2 int + Strides of the convolution. + bn_use_global_stats : bool + Whether global moving statistics is used instead of local batch-norm for BatchNorm layers. + """ + return PreResConv( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + padding=1) + + +class PreResBlock(nn.Module): + """ + Simple PreResNet block for residual path in ResNet unit. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + stride : int or tuple/list of 2 int + Strides of the convolution. + """ + def __init__(self, + in_channels, + out_channels, + stride): + super(PreResBlock, self).__init__() + self.conv1 = preres_conv3x3( + in_channels=in_channels, + out_channels=out_channels, + stride=stride) + self.conv2 = preres_conv3x3( + in_channels=out_channels, + out_channels=out_channels, + stride=1) + + def forward(self, x): + x, x_pre_activ = self.conv1(x) + x, _ = self.conv2(x) + return x, x_pre_activ + + +class PreResBottleneck(nn.Module): + """ + PreResNet bottleneck block for residual path in PreResNet unit. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + stride : int or tuple/list of 2 int + Strides of the convolution. + conv1_stride : bool + Whether to use stride in the first or the second convolution layer of the block. + """ + def __init__(self, + in_channels, + out_channels, + stride, + conv1_stride): + super(PreResBottleneck, self).__init__() + mid_channels = out_channels // 4 + + self.conv1 = preres_conv1x1( + in_channels=in_channels, + out_channels=mid_channels, + stride=(stride if conv1_stride else 1)) + self.conv2 = preres_conv3x3( + in_channels=mid_channels, + out_channels=mid_channels, + stride=(1 if conv1_stride else stride)) + self.conv3 = preres_conv1x1( + in_channels=mid_channels, + out_channels=out_channels, + stride=1) + + def forward(self, x): + x, x_pre_activ = self.conv1(x) + x, _ = self.conv2(x) + x, _ = self.conv3(x) + return x, x_pre_activ + + +class PreResUnit(nn.Module): + """ + PreResNet unit with residual connection. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + stride : int or tuple/list of 2 int + Strides of the convolution. + bottleneck : bool + Whether to use a bottleneck or simple block in units. + conv1_stride : bool + Whether to use stride in the first or the second convolution layer of the block. + """ + def __init__(self, + in_channels, + out_channels, + stride, + bottleneck, + conv1_stride): + super(PreResUnit, self).__init__() + self.resize_identity = (in_channels != out_channels) or (stride != 1) + + if bottleneck: + self.body = PreResBottleneck( + in_channels=in_channels, + out_channels=out_channels, + stride=stride, + conv1_stride=conv1_stride) + else: + self.body = PreResBlock( + in_channels=in_channels, + out_channels=out_channels, + stride=stride) + if self.resize_identity: + self.identity_conv = conv1x1( + in_channels=in_channels, + out_channels=out_channels, + stride=stride) + + def forward(self, x): + identity = x + x, x_pre_activ = self.body(x) + if self.resize_identity: + identity = self.identity_conv(x_pre_activ) + x = x + identity + return x + + +class PreResInitBlock(nn.Module): + """ + PreResNet specific initial block. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + """ + def __init__(self, + in_channels, + out_channels): + super(PreResInitBlock, self).__init__() + self.conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.bn = nn.BatchNorm2d(num_features=out_channels) + self.activ = nn.ReLU(inplace=True) + self.pool = nn.MaxPool2d( + kernel_size=3, + stride=2, + padding=1) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.activ(x) + x = self.pool(x) + return x + + +class PreResActivation(nn.Module): + """ + PreResNet pure pre-activation block without convolution layer. It's used by itself as the final block. + + Parameters: + ---------- + in_channels : int + Number of input channels. + """ + def __init__(self, + in_channels): + super(PreResActivation, self).__init__() + self.bn = nn.BatchNorm2d(num_features=in_channels) + self.activ = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.bn(x) + x = self.activ(x) + return x + + +class PreResNet(nn.Module): + """ + PreResNet model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + channels : list of list of int + Number of output channels for each unit. + init_block_channels : int + Number of output channels for the initial unit. + bottleneck : bool + Whether to use a bottleneck or simple block in units. + conv1_stride : bool + Whether to use stride in the first or the second convolution layer in units. + in_channels : int, default 3 + Number of input channels. + num_classes : int, default 1000 + Number of classification classes. + """ + def __init__(self, + channels, + init_block_channels, + bottleneck, + conv1_stride, + in_channels=3, + num_classes=1000): + super(PreResNet, self).__init__() + + self.features = nn.Sequential() + self.features.add_module("init_block", PreResInitBlock( + in_channels=in_channels, + out_channels=init_block_channels)) + in_channels = init_block_channels + for i, channels_per_stage in enumerate(channels): + stage = nn.Sequential() + for j, out_channels in enumerate(channels_per_stage): + stride = 1 if (i == 0) or (j != 0) else 2 + stage.add_module("unit{}".format(j + 1), PreResUnit( + in_channels=in_channels, + out_channels=out_channels, + stride=stride, + bottleneck=bottleneck, + conv1_stride=conv1_stride)) + in_channels = out_channels + self.features.add_module("stage{}".format(i + 1), stage) + self.features.add_module('post_activ', PreResActivation(in_channels=in_channels)) + self.features.add_module('final_pool', nn.AvgPool2d( + kernel_size=7, + stride=1)) + + self.output = nn.Linear( + in_features=in_channels, + out_features=num_classes) + + self._init_params() + + def _init_params(self): + for name, module in self.named_modules(): + if isinstance(module, nn.Conv2d): + init.kaiming_uniform_(module.weight) + if module.bias is not None: + init.constant_(module.bias, 0) + + def forward(self, x): + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.output(x) + return x + + +def get_preresnet(blocks, + conv1_stride=True, + width_scale=1.0, + model_name=None, + pretrained=False, + root=os.path.join('~', '.torch', 'models'), + **kwargs): + """ + Create PreResNet model with specific parameters. + + Parameters: + ---------- + blocks : int + Number of blocks. + conv1_stride : bool + Whether to use stride in the first or the second convolution layer in units. + width_scale : float + Scale factor for width of layers. + model_name : str or None, default None + Model name for loading pretrained model. + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + + if blocks == 10: + layers = [1, 1, 1, 1] + elif blocks == 12: + layers = [2, 1, 1, 1] + elif blocks == 14: + layers = [2, 2, 1, 1] + elif blocks == 16: + layers = [2, 2, 2, 1] + elif blocks == 18: + layers = [2, 2, 2, 2] + elif blocks == 34: + layers = [3, 4, 6, 3] + elif blocks == 50: + layers = [3, 4, 6, 3] + elif blocks == 101: + layers = [3, 4, 23, 3] + elif blocks == 152: + layers = [3, 8, 36, 3] + elif blocks == 200: + layers = [3, 24, 36, 3] + else: + raise ValueError("Unsupported ResNet with number of blocks: {}".format(blocks)) + + init_block_channels = 64 + + if blocks < 50: + channels_per_layers = [64, 128, 256, 512] + bottleneck = False + else: + channels_per_layers = [256, 512, 1024, 2048] + bottleneck = True + + channels = [[ci] * li for (ci, li) in zip(channels_per_layers, layers)] + + if width_scale != 1.0: + channels = [[int(cij * width_scale) for cij in ci] for ci in channels] + init_block_channels = int(init_block_channels * width_scale) + + net = PreResNet( + channels=channels, + init_block_channels=init_block_channels, + bottleneck=bottleneck, + conv1_stride=conv1_stride, + **kwargs) + + if pretrained: + if (model_name is None) or (not model_name): + raise ValueError("Parameter `model_name` should be properly initialized for loading pretrained model.") + import torch + from .model_store import get_model_file + net.load_state_dict(torch.load(get_model_file( + model_name=model_name, + local_model_store_dir_path=root))) + + return net + + +def preresnet10(**kwargs): + """ + PreResNet-10 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + It's an experimental model. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=10, model_name="preresnet10", **kwargs) + + +def preresnet12(**kwargs): + """ + PreResNet-12 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + It's an experimental model. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=12, model_name="preresnet12", **kwargs) + + +def preresnet14(**kwargs): + """ + PreResNet-14 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + It's an experimental model. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=14, model_name="preresnet14", **kwargs) + + +def preresnet16(**kwargs): + """ + PreResNet-16 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + It's an experimental model. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=16, model_name="preresnet16", **kwargs) + + +def preresnet18_wd4(**kwargs): + """ + PreResNet-18 model with 0.25 width scale from 'Identity Mappings in Deep Residual Networks,' + https://arxiv.org/abs/1603.05027. It's an experimental model. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=18, width_scale=0.25, model_name="preresnet18_wd4", **kwargs) + + +def preresnet18_wd2(**kwargs): + """ + PreResNet-18 model with 0.5 width scale from 'Identity Mappings in Deep Residual Networks,' + https://arxiv.org/abs/1603.05027. It's an experimental model. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=18, width_scale=0.5, model_name="preresnet18_wd2", **kwargs) + + +def preresnet18_w3d4(**kwargs): + """ + PreResNet-18 model with 0.75 width scale from 'Identity Mappings in Deep Residual Networks,' + https://arxiv.org/abs/1603.05027. It's an experimental model. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=18, width_scale=0.75, model_name="preresnet18_w3d4", **kwargs) + + +def preresnet18(**kwargs): + """ + PreResNet-18 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=18, model_name="preresnet18", **kwargs) + + +def preresnet34(**kwargs): + """ + PreResNet-34 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=34, model_name="preresnet34", **kwargs) + + +def preresnet50(**kwargs): + """ + PreResNet-50 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=50, model_name="preresnet50", **kwargs) + + +def preresnet50b(**kwargs): + """ + PreResNet-50 model with stride at the second convolution in bottleneck block from 'Identity Mappings in Deep + Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=50, conv1_stride=False, model_name="preresnet50b", **kwargs) + + +def preresnet101(**kwargs): + """ + PreResNet-101 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=101, model_name="preresnet101", **kwargs) + + +def preresnet101b(**kwargs): + """ + PreResNet-101 model with stride at the second convolution in bottleneck block from 'Identity Mappings in Deep + Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=101, conv1_stride=False, model_name="preresnet101b", **kwargs) + + +def preresnet152(**kwargs): + """ + PreResNet-152 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=152, model_name="preresnet152", **kwargs) + + +def preresnet152b(**kwargs): + """ + PreResNet-152 model with stride at the second convolution in bottleneck block from 'Identity Mappings in Deep + Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=152, conv1_stride=False, model_name="preresnet152b", **kwargs) + + +def preresnet200(**kwargs): + """ + PreResNet-200 model from 'Identity Mappings in Deep Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=200, model_name="preresnet200", **kwargs) + + +def preresnet200b(**kwargs): + """ + PreResNet-200 model with stride at the second convolution in bottleneck block from 'Identity Mappings in Deep + Residual Networks,' https://arxiv.org/abs/1603.05027. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_preresnet(blocks=200, conv1_stride=False, model_name="preresnet200b", **kwargs) + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = preresnet18() + for m in model.modules(): + m.training = False + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From f71e1d308d361afc351b3515b972e0b4ecc207f0 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 17 Aug 2018 22:46:08 +0300 Subject: [PATCH 015/180] Added SqueezeNext model test. --- tests/squeezenext.py | 407 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 407 insertions(+) create mode 100644 tests/squeezenext.py diff --git a/tests/squeezenext.py b/tests/squeezenext.py new file mode 100644 index 0000000..4200e4b --- /dev/null +++ b/tests/squeezenext.py @@ -0,0 +1,407 @@ +""" +Model from https://github.com/osmr/imgclsmob/tree/master/pytorch/models +""" + + +import numpy as np +import torch +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras +import torchvision + +import os +import torch.nn as nn +import torch.nn.init as init + + +class SqnxtConv(nn.Module): + """ + SqueezeNext specific convolution block. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + kernel_size : int or tuple/list of 2 int + Convolution window size. + stride : int or tuple/list of 2 int + Strides of the convolution. + padding : int or tuple/list of 2 int, default (0, 0) + Padding value for convolution layer. + """ + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding=(0, 0)): + super(SqnxtConv, self).__init__() + self.conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding) + self.bn = nn.BatchNorm2d(num_features=out_channels) + self.activ = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.activ(x) + return x + + +class SqnxtUnit(nn.Module): + """ + SqueezeNext unit. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + stride : int or tuple/list of 2 int + Strides of the convolution. + """ + def __init__(self, + in_channels, + out_channels, + stride): + super(SqnxtUnit, self).__init__() + if stride == 2: + reduction_den = 1 + self.resize_identity = True + elif in_channels > out_channels: + reduction_den = 4 + self.resize_identity = True + else: + reduction_den = 2 + self.resize_identity = False + + self.conv1 = SqnxtConv( + in_channels=in_channels, + out_channels=(in_channels // reduction_den), + kernel_size=1, + stride=stride) + self.conv2 = SqnxtConv( + in_channels=(in_channels // reduction_den), + out_channels=(in_channels // (2 * reduction_den)), + kernel_size=1, + stride=1) + self.conv3 = SqnxtConv( + in_channels=(in_channels // (2 * reduction_den)), + out_channels=(in_channels // reduction_den), + kernel_size=(1, 3), + stride=1, + padding=(0, 1)) + self.conv4 = SqnxtConv( + in_channels=(in_channels // reduction_den), + out_channels=(in_channels // reduction_den), + kernel_size=(3, 1), + stride=1, + padding=(1, 0)) + self.conv5 = SqnxtConv( + in_channels=(in_channels // reduction_den), + out_channels=out_channels, + kernel_size=1, + stride=1) + + if self.resize_identity: + self.identity_conv = SqnxtConv( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=stride) + self.activ = nn.ReLU(inplace=True) + + def forward(self, x): + if self.resize_identity: + identity = self.identity_conv(x) + else: + identity = x + identity = self.activ(identity) + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + x = self.conv4(x) + x = self.conv5(x) + x = x + identity + x = self.activ(x) + return x + + +class SqnxtInitBlock(nn.Module): + """ + SqueezeNext specific initial block. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + """ + def __init__(self, + in_channels, + out_channels): + super(SqnxtInitBlock, self).__init__() + self.conv = SqnxtConv( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=7, + stride=2, + padding=1) + self.pool = nn.MaxPool2d( + kernel_size=3, + stride=2, + ceil_mode=True) + + def forward(self, x): + x = self.conv(x) + x = self.pool(x) + return x + + +class SqueezeNext(nn.Module): + """ + SqueezeNext model from 'SqueezeNext: Hardware-Aware Neural Network Design,' https://arxiv.org/abs/1803.10615. + + Parameters: + ---------- + channels : list of list of int + Number of output channels for each unit. + init_block_channels : int + Number of output channels for the initial unit. + final_block_channels : int + Number of output channels for the final block of the feature extractor. + in_channels : int, default 3 + Number of input channels. + num_classes : int, default 1000 + Number of classification classes. + """ + def __init__(self, + channels, + init_block_channels, + final_block_channels, + in_channels=3, + num_classes=1000): + super(SqueezeNext, self).__init__() + + self.features = nn.Sequential() + self.features.add_module("init_block", SqnxtInitBlock( + in_channels=in_channels, + out_channels=init_block_channels)) + in_channels = init_block_channels + for i, channels_per_stage in enumerate(channels): + stage = nn.Sequential() + for j, out_channels in enumerate(channels_per_stage): + stride = 2 if (j == 0) and (i != 0) else 1 + stage.add_module("unit{}".format(j + 1), SqnxtUnit( + in_channels=in_channels, + out_channels=out_channels, + stride=stride)) + in_channels = out_channels + self.features.add_module("stage{}".format(i + 1), stage) + self.features.add_module('final_block', SqnxtConv( + in_channels=in_channels, + out_channels=final_block_channels, + kernel_size=1, + stride=1)) + in_channels = final_block_channels + self.features.add_module('final_pool', nn.AvgPool2d( + kernel_size=7, + stride=1)) + + self.output = nn.Linear( + in_features=in_channels, + out_features=num_classes) + + self._init_params() + + def _init_params(self): + for name, module in self.named_modules(): + if isinstance(module, nn.Conv2d): + init.kaiming_uniform_(module.weight) + if module.bias is not None: + init.constant_(module.bias, 0) + + def forward(self, x): + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.output(x) + return x + + +def get_squeezenext(version, + width_scale, + model_name=None, + pretrained=False, + root=os.path.join('~', '.torch', 'models'), + **kwargs): + """ + Create SqueezeNext model with specific parameters. + + Parameters: + ---------- + version : str + Version of SqueezeNet ('23' or '23v5'). + width_scale : float + Scale factor for width of layers. + model_name : str or None, default None + Model name for loading pretrained model. + pretrained : bool, default False + Whether to load the pretrained weights for model. + ctx : Context, default CPU + The context in which to load the pretrained weights. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + + init_block_channels = 64 + final_block_channels = 128 + channels_per_layers = [32, 64, 128, 256] + + if version == '23': + layers = [6, 6, 8, 1] + elif version == '23v5': + layers = [2, 4, 14, 1] + else: + raise ValueError("Unsupported SqueezeNet version {}".format(version)) + + channels = [[ci] * li for (ci, li) in zip(channels_per_layers, layers)] + + if width_scale != 1: + channels = [[int(cij * width_scale) for cij in ci] for ci in channels] + init_block_channels = int(init_block_channels * width_scale) + final_block_channels = int(final_block_channels * width_scale) + + net = SqueezeNext( + channels=channels, + init_block_channels=init_block_channels, + final_block_channels=final_block_channels, + **kwargs) + + if pretrained: + if (model_name is None) or (not model_name): + raise ValueError("Parameter `model_name` should be properly initialized for loading pretrained model.") + import torch + from .model_store import get_model_file + net.load_state_dict(torch.load(get_model_file( + model_name=model_name, + local_model_store_dir_path=root))) + + return net + + +def sqnxt23_w1(**kwargs): + """ + 1.0-SqNxt-23 model from 'SqueezeNext: Hardware-Aware Neural Network Design,' https://arxiv.org/abs/1803.10615. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_squeezenext(version="23", width_scale=1.0, model_name="sqnxt23_w1", **kwargs) + + +def sqnxt23_w3d2(**kwargs): + """ + 0.75-SqNxt-23 model from 'SqueezeNext: Hardware-Aware Neural Network Design,' https://arxiv.org/abs/1803.10615. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_squeezenext(version="23", width_scale=1.5, model_name="sqnxt23_w3d2", **kwargs) + + +def sqnxt23_w2(**kwargs): + """ + 0.5-SqNxt-23 model from 'SqueezeNext: Hardware-Aware Neural Network Design,' https://arxiv.org/abs/1803.10615. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_squeezenext(version="23", width_scale=2.0, model_name="sqnxt23_w2", **kwargs) + + +def sqnxt23v5_w1(**kwargs): + """ + 1.0-SqNxt-23v5 model from 'SqueezeNext: Hardware-Aware Neural Network Design,' https://arxiv.org/abs/1803.10615. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_squeezenext(version="23v5", width_scale=1.0, model_name="sqnxt23v5_w1", **kwargs) + + +def sqnxt23v5_w3d2(**kwargs): + """ + 0.75-SqNxt-23v5 model from 'SqueezeNext: Hardware-Aware Neural Network Design,' https://arxiv.org/abs/1803.10615. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_squeezenext(version="23v5", width_scale=1.5, model_name="sqnxt23v5_w3d2", **kwargs) + + +def sqnxt23v5_w2(**kwargs): + """ + 0.5-SqNxt-23v5 model from 'SqueezeNext: Hardware-Aware Neural Network Design,' https://arxiv.org/abs/1803.10615. + + Parameters: + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.torch/models' + Location for keeping the model parameters. + """ + return get_squeezenext(version="23v5", width_scale=2.0, model_name="sqnxt23v5_w2", **kwargs) + + + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = sqnxt23_w1() + for m in model.modules(): + m.training = False + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 5d18e034d50feb53aad21fc6347f92a584ba5cff Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 17 Aug 2018 22:46:23 +0300 Subject: [PATCH 016/180] Updated Readme. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 489c7d7..ba52bec 100644 --- a/README.md +++ b/README.md @@ -129,11 +129,11 @@ Misc: ## Models converted with pytorch2keras -* ResNet18 -* ResNet34 -* ResNet50 +* ResNet* +* PreResNet* * SqueezeNet (with ceil_mode=False) -* DenseNet +* SqueezeNext +* DenseNet* * AlexNet * Inception * SeNet From b291de739dc02bae439c666b94feb7460e8b0e85 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 20 Aug 2018 16:05:42 +0300 Subject: [PATCH 017/180] Update requirements.txt. --- requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index c983c5c..694ca6a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -keras -tensorflow -numpy +keras==2.2.2 +tensorflow==1.9.0 +numpy==1.14.1 torch==0.4 -torchvision \ No newline at end of file +torchvision==0.2.1 \ No newline at end of file From e95f8df9b62ebb9c5edb2baba892e3bb136e9a15 Mon Sep 17 00:00:00 2001 From: Sean Micklethwaite Date: Tue, 4 Sep 2018 10:55:05 +0100 Subject: [PATCH 018/180] fix: padding for transposed 2d convolution --- pytorch2keras/layers.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 2547859..210419c 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -251,9 +251,6 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, s biases = None has_bias = False - assert(params['pads'][0] == 0) - assert(params['pads'][1] == 0) - input_name = inputs[0] if has_bias: @@ -266,6 +263,7 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, s kernel_size=(height, width), strides=(params['strides'][0], params['strides'][1]), padding='valid', + output_padding=0, weights=weights, use_bias=has_bias, activation=None, @@ -273,7 +271,18 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, s bias_initializer='zeros', kernel_initializer='zeros', name=tf_name ) + layers[scope_name] = conv(layers[input_name]) + + pads = params['pads'] + if pads[0] > 0: + assert(len(pads) == 2 or (pads[2] == pads[0] and pads[3] == pads[1])) + + crop = keras.layers.Cropping2D( + pads[:2], + name=tf_name + '_crop' + ) + layers[scope_name] = crop(layers[scope_name]) else: raise AssertionError('Layer is not supported for now') @@ -696,6 +705,11 @@ def convert_concat(params, w_name, scope_name, inputs, layers, weights, short_na """ print('Converting concat ...') concat_nodes = [layers[i] for i in inputs] + + if len(concat_nodes) == 1: + # no-op + layers[scope_name] = concat_nodes[0] + return if short_names: tf_name = 'CAT' + random_string(5) From f6185aff8c0d8906db8a762e883558f15cec46ce Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 5 Sep 2018 01:52:24 +0300 Subject: [PATCH 019/180] Update readme. --- README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/README.md b/README.md index ba52bec..8604f49 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,29 @@ To use the converter properly, please, make changes in your `~/.keras/keras.json The latest version of PyTorch (0.4.1) isn't supported yet. +## Python 3.7 + +There are some problem related to a new version: + +Q. PyTorch 0.4 hadn't released wheel package for Python 3.7 + +A. You can build it from source: + +``` +git clone https://github.com/pytorch/pytorch + +cd pytorch + +git checkout v0.4.0 + +NO_CUDA=1 python setup.py install +``` + +Q. Tensorflow isn't available for Python 3.7 + +A. Yes, we're waiting for it. + + ## Tensorflow.js For the proper convertion to the tensorflow.js format, please use a new flag `short_names=True`. @@ -36,6 +59,11 @@ For the proper convertion to the tensorflow.js format, please use a new flag `sh Please, follow [this guide](https://github.com/pytorch/pytorch#from-source) to compile the latest version. +Additional information for Arch Linux users: + +* the latest gcc8 is incompatible with actual nvcc version +* the legacy gcc54 can't compile C/C++ modules because of compiler flags + ## How to use It's a convertor of pytorch graph to a Keras (Tensorflow backend) graph. From 1b781f694884d17874706dd1a51a36e08240c8c1 Mon Sep 17 00:00:00 2001 From: Sean Micklethwaite Date: Wed, 5 Sep 2018 17:36:23 +0100 Subject: [PATCH 020/180] fix: false detection of depthwise conv, and avoid lambda layer --- pytorch2keras/layers.py | 53 ++++++++++++++++++++++++++------------- tests/depthwise_conv2d.py | 36 ++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 22 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 210419c..3a697ef 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -87,10 +87,13 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name layers[scope_name] = conv(layers[input_name]) elif len(weights[weights_name].numpy().shape) == 4: # 2D conv W = weights[weights_name].numpy().transpose(2, 3, 1, 0) - height, width, channels, n_filters = W.shape + height, width, channels_per_group, out_channels = W.shape + n_groups = params['group'] + in_channels = channels_per_group * n_groups - if params['group'] == n_filters: - print('Perform depthwise convolution') + if n_groups == in_channels: + print('Perform depthwise convolution: h={} w={} in={} out={}' + .format(height, width, in_channels, out_channels)) if params['pads'][0] > 0 or params['pads'][1] > 0: padding_name = tf_name + '_pad' @@ -100,21 +103,37 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name ) layers[padding_name] = padding_layer(layers[input_name]) input_name = padding_name + + if bias_name in weights: + biases = weights[bias_name].numpy() + has_bias = True + else: + biases = None + has_bias = False - def target_layer(x): - x = tf.transpose(x, [0, 2, 3, 1]) - - # tensorflow.python.framework.errors_impl.UnimplementedError: - # Depthwise convolution on CPU is only supported for NHWC format - layer = tf.nn.depthwise_conv2d(x, W.transpose(0, 1, 3, 2), - strides=(1, params['strides'][0], params['strides'][1], 1), - padding='VALID', rate=[1, 1]) - layer = tf.transpose(layer, [0, 3, 1, 2]) - return layer + # We are just doing depthwise conv, so make the pointwise a no-op + pointwise_wt = np.expand_dims(np.expand_dims(np.identity(out_channels), 0), 0) + W = W.transpose(0, 1, 3, 2) + if has_bias: + weights = [W, pointwise_wt, biases] + else: + weights = [W, pointwise_wt] + + conv = keras.layers.SeparableConv2D( + filters=out_channels, + depth_multiplier=1, + kernel_size=(height, width), + strides=(params['strides'][0], params['strides'][1]), + padding='valid', + weights=weights, + use_bias=has_bias, + activation=None, + bias_initializer='zeros', kernel_initializer='zeros', + name=tf_name + ) + layers[scope_name] = conv(layers[input_name]) - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[input_name]) - elif params['group'] != 1: + elif n_groups != 1: # Example from https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html # # Split input and weights and convolve them separately # input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) @@ -166,7 +185,7 @@ def target_layer(x, groups=params['group'], stride_y=params['strides'][0], strid weights = [W] conv = keras.layers.Conv2D( - filters=n_filters, + filters=out_channels, kernel_size=(height, width), strides=(params['strides'][0], params['strides'][1]), padding='valid', diff --git a/tests/depthwise_conv2d.py b/tests/depthwise_conv2d.py index 2d75566..9290117 100644 --- a/tests/depthwise_conv2d.py +++ b/tests/depthwise_conv2d.py @@ -29,6 +29,30 @@ def forward(self, x): x = self.conv2d_dw(x) return x +class TestConv2dNonDepthwise(nn.Module): + def __init__(self, inp=10, stride=1): + super(TestConv2dNonDepthwise, self).__init__() + self.conv2d_non_dw = nn.Conv2d( + in_channels = inp, + out_channels = 1, + kernel_size = 3, + padding = 1, + stride = stride, + bias = False + ) + + def forward(self, x): + return self.conv2d_non_dw(x) + +def check_error(output, k_model, input_np, epsilon=1E-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error if __name__ == '__main__': max_error = 0 @@ -45,12 +69,14 @@ def forward(self, x): k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error + # This isn't a depthwise convolution, so shouldn't be detected as such + non_dw_model = TestConv2dNonDepthwise(inp) + output = non_dw_model(input_var) + k_model = pytorch_to_keras(non_dw_model, input_var, (inp, inp, inp,), verbose=True) + check_error(output, k_model, input_np) + print('Max error: {0}'.format(max_error)) From 167c946dae7cc323ab10578ceaaea53283a5ae96 Mon Sep 17 00:00:00 2001 From: Sean Micklethwaite Date: Fri, 7 Sep 2018 16:33:41 +0100 Subject: [PATCH 021/180] test: ConvTranspose2D with padding --- tests/convtranspose2d.py | 71 +++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/tests/convtranspose2d.py b/tests/convtranspose2d.py index 151f048..3536c2b 100644 --- a/tests/convtranspose2d.py +++ b/tests/convtranspose2d.py @@ -1,3 +1,4 @@ +import unittest import numpy as np import torch import torch.nn as nn @@ -9,36 +10,66 @@ class TestConvTranspose2d(nn.Module): """Module for ConvTranspose2d conversion testing """ - def __init__(self, inp=10, out=16, kernel_size=3, padding=1, bias=True): + def __init__(self, inp=10, out=16, kernel_size=3, stride=1, bias=True, padding=0): super(TestConvTranspose2d, self).__init__() - self.conv2d = nn.ConvTranspose2d(inp, out, kernel_size=kernel_size, bias=bias, stride=padding) + self.conv2d = nn.ConvTranspose2d(inp, out, kernel_size=kernel_size, bias=bias, stride=stride, padding=padding) def forward(self, x): x = self.conv2d(x) return x +class ConvTranspose2dTest(unittest.TestCase): + N = 100 -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) + def test(self): + max_error = 0 + for i in range(self.N): + kernel_size = np.random.randint(1, 7) + inp = np.random.randint(kernel_size + 1, 100) + out = np.random.randint(1, 100) + + model = TestConvTranspose2d(inp, out, kernel_size, 2, inp % 3) + + input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) - model = TestConvTranspose2d(inp, out, kernel_size, 2, inp % 3) + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) + print('Max error: {0}'.format(max_error)) + + def test_with_padding(self): + max_error = 0 + for i in range(self.N): + kernel_size = np.random.randint(1, 7) + inp = np.random.randint(kernel_size + 1, 100) + out = np.random.randint(1, 100) - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) + model = TestConvTranspose2d(inp, out, kernel_size, 2, inp % 3, padding=1) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) + input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error + k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - print('Max error: {0}'.format(max_error)) + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) + +if __name__ == '__main__': + unittest.main() From 62712af1636f620def1f0ac4f129535b9aceaacb Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 25 Sep 2018 10:00:18 +0300 Subject: [PATCH 022/180] Fixed error with padding for group conv2d. --- pytorch2keras/layers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 2547859..ff9837d 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -120,7 +120,15 @@ def target_layer(x): # input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) # weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights) # output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] - + if params['pads'][0] > 0 or params['pads'][1] > 0: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding2D( + padding=(params['pads'][0], params['pads'][1]), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[input_name]) + input_name = padding_name + # # Concat the convolved output together again # conv = tf.concat(axis=3, values=output_groups) def target_layer(x, groups=params['group'], stride_y=params['strides'][0], stride_x=params['strides'][1]): From 9431c13a249573a48a9f01f9c1da1b7fec513d8e Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 2 Oct 2018 02:54:19 +0300 Subject: [PATCH 023/180] Remove padding layer duplicates. --- pytorch2keras/layers.py | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 3c5b314..d04df46 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -86,6 +86,15 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name ) layers[scope_name] = conv(layers[input_name]) elif len(weights[weights_name].numpy().shape) == 4: # 2D conv + if params['pads'][0] > 0 or params['pads'][1] > 0: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding2D( + padding=(params['pads'][0], params['pads'][1]), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[input_name]) + input_name = padding_name + W = weights[weights_name].numpy().transpose(2, 3, 1, 0) height, width, channels_per_group, out_channels = W.shape n_groups = params['group'] @@ -94,15 +103,6 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name if n_groups == in_channels: print('Perform depthwise convolution: h={} w={} in={} out={}' .format(height, width, in_channels, out_channels)) - - if params['pads'][0] > 0 or params['pads'][1] > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(params['pads'][0], params['pads'][1]), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[input_name]) - input_name = padding_name if bias_name in weights: biases = weights[bias_name].numpy() @@ -139,14 +139,6 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name # input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) # weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights) # output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] - if params['pads'][0] > 0 or params['pads'][1] > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(params['pads'][0], params['pads'][1]), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[input_name]) - input_name = padding_name # # Concat the convolved output together again # conv = tf.concat(axis=3, values=output_groups) @@ -170,7 +162,6 @@ def target_layer(x, groups=params['group'], stride_y=params['strides'][0], strid layers[scope_name] = lambda_layer(layers[input_name]) else: - if bias_name in weights: biases = weights[bias_name].numpy() has_bias = True @@ -178,15 +169,6 @@ def target_layer(x, groups=params['group'], stride_y=params['strides'][0], strid biases = None has_bias = False - if params['pads'][0] > 0 or params['pads'][1] > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(params['pads'][0], params['pads'][1]), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[input_name]) - input_name = padding_name - if has_bias: weights = [W, biases] else: From 634d473a25f08ceff321d9330f44682c33aeaa79 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 2 Oct 2018 02:59:34 +0300 Subject: [PATCH 024/180] Style fixes, remove some prints. --- pytorch2keras/layers.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index d04df46..025054a 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -42,7 +42,6 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name if len(weights[weights_name].numpy().shape) == 5: # 3D conv W = weights[weights_name].numpy().transpose(2, 3, 4, 1, 0) height, width, channels, n_layers, n_filters = W.shape - print(W.shape) if bias_name in weights: biases = weights[bias_name].numpy() @@ -67,9 +66,6 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name else: weights = [W] - print(len(weights), len(weights[0]), len(weights[0][0]), - len(weights[0][0][0]), len(weights[0][0][0][0]), - len(weights[0][0][0][0][0])) conv = keras.layers.Conv3D( filters=n_filters, kernel_size=(channels, height, width), @@ -433,10 +429,12 @@ def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, short_n stride_height, stride_width = params['strides'] else: stride_height, stride_width = params['stride'] + if 'pads' in params: padding_h, padding_w, _, _ = params['pads'] else: padding_h, padding_w = params['padding'] + input_name = inputs[0] if padding_h > 0 and padding_w > 0: padding_name = tf_name + '_pad' @@ -488,10 +486,12 @@ def convert_maxpool3(params, w_name, scope_name, inputs, layers, weights, short_ stride_height, stride_width, stride_depth = params['strides'] else: stride_height, stride_width, stride_depth = params['stride'] + if 'pads' in params: padding_h, padding_w, padding_d, _, _ = params['pads'] else: padding_h, padding_w, padding_d = params['padding'] + input_name = inputs[0] if padding_h > 0 and padding_w > 0 and padding_d > 0: padding_name = tf_name + '_pad' @@ -945,7 +945,6 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, short_n else: tf_name = w_name + str(random.random()) - print(layers[inputs[1]]) if len(inputs) > 1: if layers[inputs[1]][0] == -1: print('Cannot deduct batch size! It will be omitted, but result may be wrong.') From c94a1828e6310b9d2284525ab8aa393cf0f662bd Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 2 Oct 2018 03:01:30 +0300 Subject: [PATCH 025/180] Remove Conv2d test from DepthwiseConv2d test file. --- tests/depthwise_conv2d.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/tests/depthwise_conv2d.py b/tests/depthwise_conv2d.py index 9290117..832ee23 100644 --- a/tests/depthwise_conv2d.py +++ b/tests/depthwise_conv2d.py @@ -29,22 +29,8 @@ def forward(self, x): x = self.conv2d_dw(x) return x -class TestConv2dNonDepthwise(nn.Module): - def __init__(self, inp=10, stride=1): - super(TestConv2dNonDepthwise, self).__init__() - self.conv2d_non_dw = nn.Conv2d( - in_channels = inp, - out_channels = 1, - kernel_size = 3, - padding = 1, - stride = stride, - bias = False - ) - - def forward(self, x): - return self.conv2d_non_dw(x) -def check_error(output, k_model, input_np, epsilon=1E-5): +def check_error(output, k_model, input_np, epsilon=1e-5): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) @@ -54,6 +40,7 @@ def check_error(output, k_model, input_np, epsilon=1E-5): assert error < epsilon return error + if __name__ == '__main__': max_error = 0 for i in range(100): @@ -73,10 +60,4 @@ def check_error(output, k_model, input_np, epsilon=1E-5): if max_error < error: max_error = error - # This isn't a depthwise convolution, so shouldn't be detected as such - non_dw_model = TestConv2dNonDepthwise(inp) - output = non_dw_model(input_var) - k_model = pytorch_to_keras(non_dw_model, input_var, (inp, inp, inp,), verbose=True) - check_error(output, k_model, input_np) - print('Max error: {0}'.format(max_error)) From 7a56b6b2e6979e347ab2de6f545c6e5daa732993 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 2 Oct 2018 03:19:56 +0300 Subject: [PATCH 026/180] Reorganize tests. --- tests/{shufflenet.py => layers/__init__.py} | 0 tests/{ => layers}/avg_pool.py | 0 tests/{ => layers}/bn.py | 0 tests/{ => layers}/channel_shuffle.py | 0 tests/{ => layers}/concat_many.py | 0 tests/{ => layers}/const.py | 0 tests/{ => layers}/conv2d.py | 0 tests/{ => layers}/conv2d_channels_last.py | 0 tests/{ => layers}/conv2d_dilation.py | 0 tests/{ => layers}/conv3d.py | 0 tests/{ => layers}/convtranspose2d.py | 0 tests/{ => layers}/dense.py | 0 tests/{ => layers}/densenet.py | 0 tests/{ => layers}/depthwise_conv2d.py | 0 tests/{ => layers}/droupout.py | 0 tests/{ => layers}/embedding.py | 0 tests/{ => layers}/group_conv2d.py | 3 ++- tests/{ => layers}/lrelu.py | 0 tests/{ => layers}/max_pool.py | 0 tests/{ => layers}/max_pool3d.py | 0 tests/{ => layers}/mul.py | 0 tests/{ => layers}/multiple_inputs.py | 0 tests/{ => layers}/relu.py | 0 tests/{ => layers}/sigmoid.py | 0 tests/{ => layers}/slice.py | 0 tests/{ => layers}/softmax.py | 0 tests/{ => layers}/sub.py | 0 tests/{ => layers}/sum.py | 0 tests/{ => layers}/tanh.py | 0 tests/{ => layers}/transpose.py | 0 tests/{ => layers}/upsample_nearest.py | 0 tests/{ => layers}/view.py | 0 tests/models/__init__.py | 0 tests/{ => models}/alexnet.py | 23 ++++++++++++------ tests/{ => models}/menet.py | 0 tests/{ => models}/mobilinet.py | 0 tests/{ => models}/preresnet18.py | 0 tests/{ => models}/resnet18.py | 25 +++++++++++++------- tests/{ => models}/resnet18_channels_last.py | 0 tests/{ => models}/resnet34.py | 0 tests/{ => models}/resnet50.py | 0 tests/{ => models}/senet.py | 0 tests/{ => models}/squeezenet.py | 0 tests/{ => models}/squeezenext.py | 0 tests/{ => models}/vgg11.py | 0 45 files changed, 34 insertions(+), 17 deletions(-) rename tests/{shufflenet.py => layers/__init__.py} (100%) rename tests/{ => layers}/avg_pool.py (100%) rename tests/{ => layers}/bn.py (100%) rename tests/{ => layers}/channel_shuffle.py (100%) rename tests/{ => layers}/concat_many.py (100%) rename tests/{ => layers}/const.py (100%) rename tests/{ => layers}/conv2d.py (100%) rename tests/{ => layers}/conv2d_channels_last.py (100%) rename tests/{ => layers}/conv2d_dilation.py (100%) rename tests/{ => layers}/conv3d.py (100%) rename tests/{ => layers}/convtranspose2d.py (100%) rename tests/{ => layers}/dense.py (100%) rename tests/{ => layers}/densenet.py (100%) rename tests/{ => layers}/depthwise_conv2d.py (100%) rename tests/{ => layers}/droupout.py (100%) rename tests/{ => layers}/embedding.py (100%) rename tests/{ => layers}/group_conv2d.py (97%) rename tests/{ => layers}/lrelu.py (100%) rename tests/{ => layers}/max_pool.py (100%) rename tests/{ => layers}/max_pool3d.py (100%) rename tests/{ => layers}/mul.py (100%) rename tests/{ => layers}/multiple_inputs.py (100%) rename tests/{ => layers}/relu.py (100%) rename tests/{ => layers}/sigmoid.py (100%) rename tests/{ => layers}/slice.py (100%) rename tests/{ => layers}/softmax.py (100%) rename tests/{ => layers}/sub.py (100%) rename tests/{ => layers}/sum.py (100%) rename tests/{ => layers}/tanh.py (100%) rename tests/{ => layers}/transpose.py (100%) rename tests/{ => layers}/upsample_nearest.py (100%) rename tests/{ => layers}/view.py (100%) create mode 100644 tests/models/__init__.py rename tests/{ => models}/alexnet.py (56%) rename tests/{ => models}/menet.py (100%) rename tests/{ => models}/mobilinet.py (100%) rename tests/{ => models}/preresnet18.py (100%) rename tests/{ => models}/resnet18.py (54%) rename tests/{ => models}/resnet18_channels_last.py (100%) rename tests/{ => models}/resnet34.py (100%) rename tests/{ => models}/resnet50.py (100%) rename tests/{ => models}/senet.py (100%) rename tests/{ => models}/squeezenet.py (100%) rename tests/{ => models}/squeezenext.py (100%) rename tests/{ => models}/vgg11.py (100%) diff --git a/tests/shufflenet.py b/tests/layers/__init__.py similarity index 100% rename from tests/shufflenet.py rename to tests/layers/__init__.py diff --git a/tests/avg_pool.py b/tests/layers/avg_pool.py similarity index 100% rename from tests/avg_pool.py rename to tests/layers/avg_pool.py diff --git a/tests/bn.py b/tests/layers/bn.py similarity index 100% rename from tests/bn.py rename to tests/layers/bn.py diff --git a/tests/channel_shuffle.py b/tests/layers/channel_shuffle.py similarity index 100% rename from tests/channel_shuffle.py rename to tests/layers/channel_shuffle.py diff --git a/tests/concat_many.py b/tests/layers/concat_many.py similarity index 100% rename from tests/concat_many.py rename to tests/layers/concat_many.py diff --git a/tests/const.py b/tests/layers/const.py similarity index 100% rename from tests/const.py rename to tests/layers/const.py diff --git a/tests/conv2d.py b/tests/layers/conv2d.py similarity index 100% rename from tests/conv2d.py rename to tests/layers/conv2d.py diff --git a/tests/conv2d_channels_last.py b/tests/layers/conv2d_channels_last.py similarity index 100% rename from tests/conv2d_channels_last.py rename to tests/layers/conv2d_channels_last.py diff --git a/tests/conv2d_dilation.py b/tests/layers/conv2d_dilation.py similarity index 100% rename from tests/conv2d_dilation.py rename to tests/layers/conv2d_dilation.py diff --git a/tests/conv3d.py b/tests/layers/conv3d.py similarity index 100% rename from tests/conv3d.py rename to tests/layers/conv3d.py diff --git a/tests/convtranspose2d.py b/tests/layers/convtranspose2d.py similarity index 100% rename from tests/convtranspose2d.py rename to tests/layers/convtranspose2d.py diff --git a/tests/dense.py b/tests/layers/dense.py similarity index 100% rename from tests/dense.py rename to tests/layers/dense.py diff --git a/tests/densenet.py b/tests/layers/densenet.py similarity index 100% rename from tests/densenet.py rename to tests/layers/densenet.py diff --git a/tests/depthwise_conv2d.py b/tests/layers/depthwise_conv2d.py similarity index 100% rename from tests/depthwise_conv2d.py rename to tests/layers/depthwise_conv2d.py diff --git a/tests/droupout.py b/tests/layers/droupout.py similarity index 100% rename from tests/droupout.py rename to tests/layers/droupout.py diff --git a/tests/embedding.py b/tests/layers/embedding.py similarity index 100% rename from tests/embedding.py rename to tests/layers/embedding.py diff --git a/tests/group_conv2d.py b/tests/layers/group_conv2d.py similarity index 97% rename from tests/group_conv2d.py rename to tests/layers/group_conv2d.py index 92a47ab..d53a603 100644 --- a/tests/group_conv2d.py +++ b/tests/layers/group_conv2d.py @@ -11,7 +11,8 @@ def group_conv1x1(in_channels, return nn.Conv2d( in_channels=in_channels, out_channels=out_channels, - kernel_size=1, + kernel_size=3, + padding=1, groups=groups, bias=False) diff --git a/tests/lrelu.py b/tests/layers/lrelu.py similarity index 100% rename from tests/lrelu.py rename to tests/layers/lrelu.py diff --git a/tests/max_pool.py b/tests/layers/max_pool.py similarity index 100% rename from tests/max_pool.py rename to tests/layers/max_pool.py diff --git a/tests/max_pool3d.py b/tests/layers/max_pool3d.py similarity index 100% rename from tests/max_pool3d.py rename to tests/layers/max_pool3d.py diff --git a/tests/mul.py b/tests/layers/mul.py similarity index 100% rename from tests/mul.py rename to tests/layers/mul.py diff --git a/tests/multiple_inputs.py b/tests/layers/multiple_inputs.py similarity index 100% rename from tests/multiple_inputs.py rename to tests/layers/multiple_inputs.py diff --git a/tests/relu.py b/tests/layers/relu.py similarity index 100% rename from tests/relu.py rename to tests/layers/relu.py diff --git a/tests/sigmoid.py b/tests/layers/sigmoid.py similarity index 100% rename from tests/sigmoid.py rename to tests/layers/sigmoid.py diff --git a/tests/slice.py b/tests/layers/slice.py similarity index 100% rename from tests/slice.py rename to tests/layers/slice.py diff --git a/tests/softmax.py b/tests/layers/softmax.py similarity index 100% rename from tests/softmax.py rename to tests/layers/softmax.py diff --git a/tests/sub.py b/tests/layers/sub.py similarity index 100% rename from tests/sub.py rename to tests/layers/sub.py diff --git a/tests/sum.py b/tests/layers/sum.py similarity index 100% rename from tests/sum.py rename to tests/layers/sum.py diff --git a/tests/tanh.py b/tests/layers/tanh.py similarity index 100% rename from tests/tanh.py rename to tests/layers/tanh.py diff --git a/tests/transpose.py b/tests/layers/transpose.py similarity index 100% rename from tests/transpose.py rename to tests/layers/transpose.py diff --git a/tests/upsample_nearest.py b/tests/layers/upsample_nearest.py similarity index 100% rename from tests/upsample_nearest.py rename to tests/layers/upsample_nearest.py diff --git a/tests/view.py b/tests/layers/view.py similarity index 100% rename from tests/view.py rename to tests/layers/view.py diff --git a/tests/models/__init__.py b/tests/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/alexnet.py b/tests/models/alexnet.py similarity index 56% rename from tests/alexnet.py rename to tests/models/alexnet.py index 4041d64..62d2a95 100644 --- a/tests/alexnet.py +++ b/tests/models/alexnet.py @@ -1,12 +1,25 @@ import numpy as np import torch +import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + if __name__ == '__main__': max_error = 0 - for i in range(10): + for i in range(100): model = torchvision.models.AlexNet() model.eval() @@ -16,12 +29,8 @@ k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) + print('Max error: {0}'.format(max_error)) \ No newline at end of file diff --git a/tests/menet.py b/tests/models/menet.py similarity index 100% rename from tests/menet.py rename to tests/models/menet.py diff --git a/tests/mobilinet.py b/tests/models/mobilinet.py similarity index 100% rename from tests/mobilinet.py rename to tests/models/mobilinet.py diff --git a/tests/preresnet18.py b/tests/models/preresnet18.py similarity index 100% rename from tests/preresnet18.py rename to tests/models/preresnet18.py diff --git a/tests/resnet18.py b/tests/models/resnet18.py similarity index 54% rename from tests/resnet18.py rename to tests/models/resnet18.py index 7ebf78a..c49cdd3 100644 --- a/tests/resnet18.py +++ b/tests/models/resnet18.py @@ -1,16 +1,27 @@ import numpy as np import torch +import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + if __name__ == '__main__': max_error = 0 - for i in range(10): + for i in range(100): model = torchvision.models.resnet18() - for m in model.modules(): - m.training = False + model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) input_var = Variable(torch.FloatTensor(input_np)) @@ -18,12 +29,8 @@ k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) + print('Max error: {0}'.format(max_error)) \ No newline at end of file diff --git a/tests/resnet18_channels_last.py b/tests/models/resnet18_channels_last.py similarity index 100% rename from tests/resnet18_channels_last.py rename to tests/models/resnet18_channels_last.py diff --git a/tests/resnet34.py b/tests/models/resnet34.py similarity index 100% rename from tests/resnet34.py rename to tests/models/resnet34.py diff --git a/tests/resnet50.py b/tests/models/resnet50.py similarity index 100% rename from tests/resnet50.py rename to tests/models/resnet50.py diff --git a/tests/senet.py b/tests/models/senet.py similarity index 100% rename from tests/senet.py rename to tests/models/senet.py diff --git a/tests/squeezenet.py b/tests/models/squeezenet.py similarity index 100% rename from tests/squeezenet.py rename to tests/models/squeezenet.py diff --git a/tests/squeezenext.py b/tests/models/squeezenext.py similarity index 100% rename from tests/squeezenext.py rename to tests/models/squeezenext.py diff --git a/tests/vgg11.py b/tests/models/vgg11.py similarity index 100% rename from tests/vgg11.py rename to tests/models/vgg11.py From acebe1c16dc3edc2b75ce3d3a0de0ce405ae28df Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 2 Oct 2018 03:20:53 +0300 Subject: [PATCH 027/180] Fixed changing ordering for flatten layer. --- pytorch2keras/converter.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index c2ef653..cf30444 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -188,6 +188,7 @@ def pytorch_to_keras( if change_ordering: import numpy as np conf = model.get_config() + for layer in conf['layers']: if layer['config'] and 'batch_input_shape' in layer['config']: layer['config']['batch_input_shape'] = \ @@ -199,13 +200,15 @@ def pytorch_to_keras( ]), -1 )) if layer['config'] and 'target_shape' in layer['config']: - layer['config']['target_shape'] = \ - tuple(np.reshape(np.array( - [ - list(layer['config']['target_shape'][1:][:]), - layer['config']['target_shape'][0] - ]), -1 - )) + if len(list(layer['config']['target_shape'][1:][:])) > 0: + layer['config']['target_shape'] = \ + tuple(np.reshape(np.array( + [ + list(layer['config']['target_shape'][1:][:]), + layer['config']['target_shape'][0] + ]), -1 + ),) + if layer['config'] and 'data_format' in layer['config']: layer['config']['data_format'] = 'channels_last' if layer['config'] and 'axis' in layer['config']: From 61619cf38303f97f29d43a35bbcbad73e9a42790 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 3 Oct 2018 14:03:08 +0300 Subject: [PATCH 028/180] Update some tests. --- tests/models/resnet18_channels_last.py | 27 +++++++++++++++----------- tests/models/resnet34.py | 25 +++++++++++++++--------- tests/models/resnet50.py | 25 +++++++++++++++--------- tests/models/vgg11.py | 25 +++++++++++++++--------- 4 files changed, 64 insertions(+), 38 deletions(-) diff --git a/tests/models/resnet18_channels_last.py b/tests/models/resnet18_channels_last.py index 03f18db..08bc62c 100644 --- a/tests/models/resnet18_channels_last.py +++ b/tests/models/resnet18_channels_last.py @@ -1,30 +1,35 @@ import numpy as np import torch +import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + if __name__ == '__main__': max_error = 0 - for i in range(10): + for i in range(100): model = torchvision.models.resnet18() - for m in model.modules(): - m.training = False + model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) input_var = Variable(torch.FloatTensor(input_np)) output = model(input_var) - k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True, change_ordering=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np.transpose(0, 2, 3, 1)) - - print(pytorch_output.shape, keras_output.shape) + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True, change_ordering=True) - error = np.max(pytorch_output - keras_output) - print(error) + error = check_error(output, k_model, input_np.transpose(0, 2, 3, 1)) if max_error < error: max_error = error diff --git a/tests/models/resnet34.py b/tests/models/resnet34.py index 6cc458d..d73b050 100644 --- a/tests/models/resnet34.py +++ b/tests/models/resnet34.py @@ -1,16 +1,27 @@ import numpy as np import torch +import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + if __name__ == '__main__': max_error = 0 - for i in range(10): + for i in range(100): model = torchvision.models.resnet34() - for m in model.modules(): - m.training = False + model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) input_var = Variable(torch.FloatTensor(input_np)) @@ -18,12 +29,8 @@ k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) + print('Max error: {0}'.format(max_error)) \ No newline at end of file diff --git a/tests/models/resnet50.py b/tests/models/resnet50.py index 959f6bc..b31ffe8 100644 --- a/tests/models/resnet50.py +++ b/tests/models/resnet50.py @@ -1,16 +1,27 @@ import numpy as np import torch +import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + if __name__ == '__main__': max_error = 0 - for i in range(10): + for i in range(100): model = torchvision.models.resnet50() - for m in model.modules(): - m.training = False + model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) input_var = Variable(torch.FloatTensor(input_np)) @@ -18,12 +29,8 @@ k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) + print('Max error: {0}'.format(max_error)) \ No newline at end of file diff --git a/tests/models/vgg11.py b/tests/models/vgg11.py index c7e39fc..86f148c 100644 --- a/tests/models/vgg11.py +++ b/tests/models/vgg11.py @@ -1,16 +1,27 @@ import numpy as np import torch +import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + if __name__ == '__main__': max_error = 0 - for i in range(10): + for i in range(100): model = torchvision.models.vgg11_bn() - for m in model.modules(): - m.training = False + model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) input_var = Variable(torch.FloatTensor(input_np)) @@ -18,12 +29,8 @@ k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) + print('Max error: {0}'.format(max_error)) \ No newline at end of file From f79b158bc66bb0075215f3c1c3efedc723d820c4 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 7 Oct 2018 21:09:36 +0300 Subject: [PATCH 029/180] Minor workaround for constants. --- pytorch2keras/converter.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index cf30444..d20b417 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -140,13 +140,14 @@ def pytorch_to_keras( node_input_names.append(get_node_id(node_input.node())) if len(node_input_names) == 0: - if node_inputs[0] in model_inputs: - node_input_names.append(model_inputs[node_inputs[0]]) - else: - input_name = 'input{0}'.format(input_index) - node_input_names.append(input_name) - input_index += 1 - model_inputs[node_inputs[0]] = input_name + if len(node_inputs) > 0: + if node_inputs[0] in model_inputs: + node_input_names.append(model_inputs[node_inputs[0]]) + else: + input_name = 'input{0}'.format(input_index) + node_input_names.append(input_name) + input_index += 1 + model_inputs[node_inputs[0]] = input_name node_type = node.kind() # print(dir(node)) From 0d7c682212473184eca554a05a98d924d1417cc5 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 7 Oct 2018 21:09:56 +0300 Subject: [PATCH 030/180] Added Instance Normalization layer. --- pytorch2keras/layers.py | 34 ++++++++++++++++++++++++ tests/layers/instance_norm.py | 49 +++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 tests/layers/instance_norm.py diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 025054a..3c4d753 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -590,6 +590,39 @@ def convert_batchnorm(params, w_name, scope_name, inputs, layers, weights, short layers[scope_name] = bn(layers[inputs[0]]) +def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, short_names): + """ + Convert instance normalization layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + short_names: use short names for keras layers + """ + print('Converting instancenorm ...') + + if short_names: + tf_name = 'IN' + random_string(6) + else: + tf_name = w_name + str(random.random()) + + assert(len(inputs) == 3) + + gamma = layers[inputs[-1]] + beta = layers[inputs[-2]] + + def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): + layer = tf.contrib.layers.instance_norm(x, [gamma, beta], epsilon=epsilon, data_format='NCHW') + return layer + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + def convert_elementwise_add( params, w_name, scope_name, inputs, layers, weights, short_names ): @@ -1260,6 +1293,7 @@ def target_layer(x, axis=int(params['axes'][0])): 'onnx::AveragePool': convert_avgpool, 'onnx::Dropout': convert_dropout, 'onnx::BatchNormalization': convert_batchnorm, + 'onnx::InstanceNormalization': convert_instancenorm, 'onnx::Add': convert_elementwise_add, 'onnx::Mul': convert_elementwise_mul, 'onnx::Sub': convert_elementwise_sub, diff --git a/tests/layers/instance_norm.py b/tests/layers/instance_norm.py new file mode 100644 index 0000000..c6b2fb5 --- /dev/null +++ b/tests/layers/instance_norm.py @@ -0,0 +1,49 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class TestInstanceNorm2d(nn.Module): + """Module for InstanceNorm2d conversion testing + """ + + def __init__(self, inp=10, out=16, kernel_size=3, bias=True): + super(TestInstanceNorm2d, self).__init__() + self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) + self.bn = nn.InstanceNorm2d(out) + + def forward(self, x): + x = self.conv2d(x) + x = self.bn(x) + return x + + +if __name__ == '__main__': + max_error = 0 + for i in range(100): + kernel_size = np.random.randint(1, 7) + inp = np.random.randint(kernel_size + 1, 100) + out = np.random.randint(1, 100) + + model = TestInstanceNorm2d(inp, out, kernel_size, inp % 2) + model.eval() + for m in model.modules(): + m.training = False + + input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) + + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print(error) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 468d0b3898fb8276b6a487e52cf6a551622e693f Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 7 Oct 2018 23:13:53 +0300 Subject: [PATCH 031/180] Hotfix InstanceNorm. Added reflection padding. --- pytorch2keras/layers.py | 50 ++++++++++++++++++++++------------- tests/layers/instance_norm.py | 4 ++- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 3c4d753..118b1ba 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -612,11 +612,13 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, sh assert(len(inputs) == 3) - gamma = layers[inputs[-1]] - beta = layers[inputs[-2]] + gamma = layers[inputs[-2]] + beta = layers[inputs[-1]] def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): - layer = tf.contrib.layers.instance_norm(x, [gamma, beta], epsilon=epsilon, data_format='NCHW') + layer = tf.contrib.layers.instance_norm(x, + param_initializers={'beta': tf.constant_initializer(beta), 'gamma': tf.constant_initializer(gamma)}, + epsilon=epsilon, data_format='NCHW') return layer lambda_layer = keras.layers.Lambda(target_layer) @@ -1170,25 +1172,37 @@ def convert_padding(params, w_name, scope_name, inputs, layers, weights, short_n """ print('Converting padding...') - if params['mode'] != 'constant': - raise AssertionError('Cannot convert non-constant padding') + if params['mode'] == 'constant': + # raise AssertionError('Cannot convert non-constant padding') - if params['value'] != 0.0: - raise AssertionError('Cannot convert non-zero padding') + if params['value'] != 0.0: + raise AssertionError('Cannot convert non-zero padding') - if short_names: - tf_name = 'PADD' + random_string(4) - else: - tf_name = w_name + str(random.random()) + if short_names: + tf_name = 'PADD' + random_string(4) + else: + tf_name = w_name + str(random.random()) - # Magic ordering - padding_name = tf_name - padding_layer = keras.layers.ZeroPadding2D( - padding=((params['pads'][2], params['pads'][6]), (params['pads'][3], params['pads'][7])), - name=padding_name - ) + # Magic ordering + padding_name = tf_name + padding_layer = keras.layers.ZeroPadding2D( + padding=((params['pads'][2], params['pads'][6]), (params['pads'][3], params['pads'][7])), + name=padding_name + ) + + layers[scope_name] = padding_layer(layers[inputs[0]]) + elif params['mode'] == 'reflect': + + def target_layer(x, pads=params['pads']): + print(x) + # x = tf.transpose(x, [0, 2, 3, 1]) + layer = tf.pad(x, [[0, 0], [0, 0], [pads[2], pads[6]], [pads[3], pads[7]]], 'REFLECT') + # layer = tf.transpose(layer, [0, 3, 1, 2]) + print(layer) + return layer - layers[scope_name] = padding_layer(layers[inputs[0]]) + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weights, short_names): diff --git a/tests/layers/instance_norm.py b/tests/layers/instance_norm.py index c6b2fb5..960d6ee 100644 --- a/tests/layers/instance_norm.py +++ b/tests/layers/instance_norm.py @@ -12,7 +12,9 @@ class TestInstanceNorm2d(nn.Module): def __init__(self, inp=10, out=16, kernel_size=3, bias=True): super(TestInstanceNorm2d, self).__init__() self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - self.bn = nn.InstanceNorm2d(out) + self.bn = nn.InstanceNorm2d(out, affine=True) + self.bn.weight = torch.nn.Parameter(torch.FloatTensor(self.bn.weight.size()).uniform_(0,1)) + self.bn.bias = torch.nn.Parameter(torch.FloatTensor(self.bn.bias.size()).uniform_(2,3)) def forward(self, x): x = self.conv2d(x) From 43a32261fff24fbe739d6e433d5140bc9b186937 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 7 Oct 2018 23:24:22 +0300 Subject: [PATCH 032/180] Minor fixes. Updated readme. --- README.md | 2 ++ pytorch2keras/layers.py | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8604f49..19286f5 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,8 @@ Layers: * Global average pooling (as special case of AdaptiveAvgPool2d) * Embedding * UpsamplingNearest2d +* BatchNorm2d +* InstanceNorm2d Reshape: diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 118b1ba..df3b6a8 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -618,7 +618,8 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, sh def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): layer = tf.contrib.layers.instance_norm(x, param_initializers={'beta': tf.constant_initializer(beta), 'gamma': tf.constant_initializer(gamma)}, - epsilon=epsilon, data_format='NCHW') + epsilon=epsilon, data_format='NCHW', + trainable=False) return layer lambda_layer = keras.layers.Lambda(target_layer) @@ -1194,11 +1195,9 @@ def convert_padding(params, w_name, scope_name, inputs, layers, weights, short_n elif params['mode'] == 'reflect': def target_layer(x, pads=params['pads']): - print(x) # x = tf.transpose(x, [0, 2, 3, 1]) layer = tf.pad(x, [[0, 0], [0, 0], [pads[2], pads[6]], [pads[3], pads[7]]], 'REFLECT') # layer = tf.transpose(layer, [0, 3, 1, 2]) - print(layer) return layer lambda_layer = keras.layers.Lambda(target_layer) From 5468e64b22036328da3db43d11333855ee7a543b Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 12 Oct 2018 23:53:40 +0300 Subject: [PATCH 033/180] Tested models with non-specified HW-shape. Updated readme. --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 19286f5..e86a940 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,14 @@ from converter import pytorch_to_keras k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) ``` +You can also set H and W dimensions to None to make your model shape-agnostic: + +``` +from converter import pytorch_to_keras +# we should specify shape of the input tensor +k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) +``` + That's all! If all is ok, the Keras model is stores into the `k_model` variable. ## Supported layers From f896b5646a0585770695c138e410cf45ee4342cd Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 16 Oct 2018 21:44:25 +0300 Subject: [PATCH 034/180] Updated readme. Updated pypi version. --- README.md | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e86a940..88d1ddd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # pytorch2keras [![Build Status](https://travis-ci.com/nerox8664/pytorch2keras.svg?branch=master)](https://travis-ci.com/nerox8664/pytorch2keras) +[![GitHub License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) Pytorch to Keras model convertor. Still beta for now. diff --git a/setup.py b/setup.py index af04917..06e60dd 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.4', + version='0.1.5', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From db9c50ebea98dc59c85f9b1066fa06bcc78ff316 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 19 Oct 2018 23:39:28 +0300 Subject: [PATCH 035/180] Fixed ConvTranspose2d output shape problem. --- pytorch2keras/layers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index df3b6a8..b1dc35a 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -278,6 +278,10 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, s ) layers[scope_name] = conv(layers[input_name]) + + # Magic ad-hoc. + # See the Keras issue: https://github.com/keras-team/keras/issues/6777 + layers[scope_name].set_shape(layers[scope_name]._keras_shape) pads = params['pads'] if pads[0] > 0: From 55d0c58d42261e1a05b113504653cd86a398e609 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 23 Oct 2018 00:43:13 +0300 Subject: [PATCH 036/180] Added the new option for layers naming. --- pytorch2keras/converter.py | 6 +- pytorch2keras/layers.py | 237 ++++++++++++++++++++++--------------- 2 files changed, 146 insertions(+), 97 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index d20b417..120b4d5 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -59,7 +59,7 @@ def get_node_id(node): def pytorch_to_keras( model, args, input_shapes, - change_ordering=False, training=False, verbose=False, short_names=False, + change_ordering=False, training=False, verbose=False, names=False, ): """ By given pytorch model convert layers with specified convertors. @@ -71,7 +71,7 @@ def pytorch_to_keras( change_ordering: change CHW to HWC training: switch model to training mode verbose: verbose output - short_names: use shorn names for keras layers + names: use short names, use random-suffix or keep original names for keras layers Returns: model: created keras model. @@ -179,7 +179,7 @@ def pytorch_to_keras( node_weights_name, node_id, node_input_names, layers, state_dict, - short_names + names ) if node_id in graph_outputs: outputs.append(layers[node_id]) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index b1dc35a..6a9d95e 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -14,7 +14,7 @@ def random_string(length): return ''.join(random.choice(string.ascii_letters) for _ in range(length)) -def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): """ Convert convolution layer. @@ -25,13 +25,14 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, short_name inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers - short_names: use short names + names: use short names for keras layers """ print('Converting convolution ...') - if short_names: + if names == 'short': tf_name = 'C' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -222,7 +223,7 @@ def target_layer(x, groups=params['group'], stride_y=params['strides'][0], strid layers[scope_name] = conv(layers[input_name]) -def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, names): """ Convert transposed convolution layer. @@ -233,12 +234,14 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, s inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting transposed convolution ...') - if short_names: + if names == 'short': tf_name = 'C' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -296,7 +299,7 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, s raise AssertionError('Layer is not supported for now') -def convert_flatten(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_flatten(params, w_name, scope_name, inputs, layers, weights, names): """ Convert reshape(view). @@ -307,12 +310,14 @@ def convert_flatten(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting flatten ...') - if short_names: + if names == 'short': tf_name = 'R' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -320,7 +325,7 @@ def convert_flatten(params, w_name, scope_name, inputs, layers, weights, short_n layers[scope_name] = reshape(layers[inputs[0]]) -def convert_gemm(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_gemm(params, w_name, scope_name, inputs, layers, weights, names): """ Convert Linear. @@ -331,12 +336,14 @@ def convert_gemm(params, w_name, scope_name, inputs, layers, weights, short_name inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting Linear ...') - if short_names: + if names == 'short': tf_name = 'FC' + random_string(6) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -361,7 +368,7 @@ def convert_gemm(params, w_name, scope_name, inputs, layers, weights, short_name layers[scope_name] = dense(layers[inputs[0]]) -def convert_avgpool(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_avgpool(params, w_name, scope_name, inputs, layers, weights, names): """ Convert Average pooling. @@ -372,12 +379,14 @@ def convert_avgpool(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting pooling ...') - if short_names: + if names == 'short': tf_name = 'P' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -403,7 +412,7 @@ def convert_avgpool(params, w_name, scope_name, inputs, layers, weights, short_n layers[scope_name] = pooling(layers[input_name]) -def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, names): """ Convert Max pooling. @@ -414,13 +423,15 @@ def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting pooling ...') - if short_names: + if names == 'short': tf_name = 'P' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -460,7 +471,7 @@ def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, short_n layers[scope_name] = pooling(layers[input_name]) -def convert_maxpool3(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_maxpool3(params, w_name, scope_name, inputs, layers, weights, names): """ Convert 3d Max pooling. @@ -471,13 +482,15 @@ def convert_maxpool3(params, w_name, scope_name, inputs, layers, weights, short_ inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting pooling ...') - if short_names: + if names == 'short': tf_name = 'P' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -517,7 +530,7 @@ def convert_maxpool3(params, w_name, scope_name, inputs, layers, weights, short_ layers[scope_name] = pooling(layers[input_name]) -def convert_dropout(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_dropout(params, w_name, scope_name, inputs, layers, weights, names): """ Convert dropout. @@ -528,12 +541,14 @@ def convert_dropout(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting dropout ...') - if short_names: + if names == 'short': tf_name = 'DO' + random_string(6) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -541,7 +556,7 @@ def convert_dropout(params, w_name, scope_name, inputs, layers, weights, short_n layers[scope_name] = dropout(layers[inputs[0]]) -def convert_batchnorm(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_batchnorm(params, w_name, scope_name, inputs, layers, weights, names): """ Convert batch normalization layer. @@ -552,12 +567,14 @@ def convert_batchnorm(params, w_name, scope_name, inputs, layers, weights, short inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting batchnorm ...') - if short_names: + if names == 'short': tf_name = 'BN' + random_string(6) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -594,7 +611,7 @@ def convert_batchnorm(params, w_name, scope_name, inputs, layers, weights, short layers[scope_name] = bn(layers[inputs[0]]) -def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, names): """ Convert instance normalization layer. @@ -605,12 +622,14 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, sh inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting instancenorm ...') - if short_names: + if names == 'short': tf_name = 'IN' + random_string(6) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -631,7 +650,7 @@ def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): def convert_elementwise_add( - params, w_name, scope_name, inputs, layers, weights, short_names + params, w_name, scope_name, inputs, layers, weights, names ): """ Convert elementwise addition. @@ -643,14 +662,16 @@ def convert_elementwise_add( inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting elementwise_add ...') model0 = layers[inputs[0]] model1 = layers[inputs[1]] - if short_names: + if names == 'short': tf_name = 'A' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -659,7 +680,7 @@ def convert_elementwise_add( def convert_elementwise_mul( - params, w_name, scope_name, inputs, layers, weights, short_names + params, w_name, scope_name, inputs, layers, weights, names ): """ Convert elementwise multiplication. @@ -671,14 +692,16 @@ def convert_elementwise_mul( inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting elementwise_mul ...') model0 = layers[inputs[0]] model1 = layers[inputs[1]] - if short_names: + if names == 'short': tf_name = 'M' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -687,7 +710,7 @@ def convert_elementwise_mul( def convert_elementwise_sub( - params, w_name, scope_name, inputs, layers, weights, short_names + params, w_name, scope_name, inputs, layers, weights, names ): """ Convert elementwise subtraction. @@ -699,14 +722,16 @@ def convert_elementwise_sub( inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting elementwise_sub ...') model0 = layers[inputs[0]] model1 = layers[inputs[1]] - if short_names: + if names == 'short': tf_name = 'S' + random_string(7) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -715,7 +740,7 @@ def convert_elementwise_sub( def convert_sum( - params, w_name, scope_name, inputs, layers, weights, short_names + params, w_name, scope_name, inputs, layers, weights, names ): """ Convert sum. @@ -727,7 +752,7 @@ def convert_sum( inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting Sum ...') @@ -739,7 +764,7 @@ def target_layer(x): layers[scope_name] = lambda_layer(layers[inputs[0]]) -def convert_concat(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_concat(params, w_name, scope_name, inputs, layers, weights, names): """ Convert concatenation. @@ -750,7 +775,7 @@ def convert_concat(params, w_name, scope_name, inputs, layers, weights, short_na inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting concat ...') concat_nodes = [layers[i] for i in inputs] @@ -760,8 +785,10 @@ def convert_concat(params, w_name, scope_name, inputs, layers, weights, short_na layers[scope_name] = concat_nodes[0] return - if short_names: + if names == 'short': tf_name = 'CAT' + random_string(5) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -769,7 +796,7 @@ def convert_concat(params, w_name, scope_name, inputs, layers, weights, short_na layers[scope_name] = cat(concat_nodes) -def convert_relu(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_relu(params, w_name, scope_name, inputs, layers, weights, names): """ Convert relu layer. @@ -780,12 +807,14 @@ def convert_relu(params, w_name, scope_name, inputs, layers, weights, short_name inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting relu ...') - if short_names: + if names == 'short': tf_name = 'RELU' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -793,7 +822,7 @@ def convert_relu(params, w_name, scope_name, inputs, layers, weights, short_name layers[scope_name] = relu(layers[inputs[0]]) -def convert_lrelu(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_lrelu(params, w_name, scope_name, inputs, layers, weights, names): """ Convert leaky relu layer. @@ -804,12 +833,14 @@ def convert_lrelu(params, w_name, scope_name, inputs, layers, weights, short_nam inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting lrelu ...') - if short_names: + if names == 'short': tf_name = 'lRELU' + random_string(3) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -818,7 +849,7 @@ def convert_lrelu(params, w_name, scope_name, inputs, layers, weights, short_nam layers[scope_name] = leakyrelu(layers[inputs[0]]) -def convert_sigmoid(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_sigmoid(params, w_name, scope_name, inputs, layers, weights, names): """ Convert sigmoid layer. @@ -829,12 +860,14 @@ def convert_sigmoid(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting sigmoid ...') - if short_names: + if names == 'short': tf_name = 'SIGM' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -842,7 +875,7 @@ def convert_sigmoid(params, w_name, scope_name, inputs, layers, weights, short_n layers[scope_name] = sigmoid(layers[inputs[0]]) -def convert_softmax(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_softmax(params, w_name, scope_name, inputs, layers, weights, names): """ Convert softmax layer. @@ -853,12 +886,14 @@ def convert_softmax(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting softmax ...') - if short_names: + if names == 'short': tf_name = 'SMAX' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -866,7 +901,7 @@ def convert_softmax(params, w_name, scope_name, inputs, layers, weights, short_n layers[scope_name] = softmax(layers[inputs[0]]) -def convert_tanh(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_tanh(params, w_name, scope_name, inputs, layers, weights, names): """ Convert tanh layer. @@ -877,12 +912,14 @@ def convert_tanh(params, w_name, scope_name, inputs, layers, weights, short_name inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting tanh ...') - if short_names: + if names == 'short': tf_name = 'TANH' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -890,7 +927,7 @@ def convert_tanh(params, w_name, scope_name, inputs, layers, weights, short_name layers[scope_name] = tanh(layers[inputs[0]]) -def convert_hardtanh(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_hardtanh(params, w_name, scope_name, inputs, layers, weights, names): """ Convert hardtanh layer. @@ -901,7 +938,7 @@ def convert_hardtanh(params, w_name, scope_name, inputs, layers, weights, short_ inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting hardtanh (clip) ...') @@ -912,7 +949,7 @@ def target_layer(x, max_val=float(params['max_val']), min_val=float(params['min_ layers[scope_name] = lambda_layer(layers[inputs[0]]) -def convert_selu(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_selu(params, w_name, scope_name, inputs, layers, weights, names): """ Convert selu layer. @@ -923,12 +960,14 @@ def convert_selu(params, w_name, scope_name, inputs, layers, weights, short_name inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting selu ...') - if short_names: + if names == 'short': tf_name = 'SELU' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -936,7 +975,7 @@ def convert_selu(params, w_name, scope_name, inputs, layers, weights, short_name layers[scope_name] = selu(layers[inputs[0]]) -def convert_transpose(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_transpose(params, w_name, scope_name, inputs, layers, weights, names): """ Convert transpose layer. @@ -947,7 +986,7 @@ def convert_transpose(params, w_name, scope_name, inputs, layers, weights, short inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting transpose ...') if params['perm'][0] != 0: @@ -958,7 +997,7 @@ def convert_transpose(params, w_name, scope_name, inputs, layers, weights, short except: pass else: - if short_names: + if names: tf_name = 'PERM' + random_string(4) else: tf_name = w_name + str(random.random()) @@ -966,7 +1005,7 @@ def convert_transpose(params, w_name, scope_name, inputs, layers, weights, short layers[scope_name] = permute(layers[inputs[0]]) -def convert_reshape(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_reshape(params, w_name, scope_name, inputs, layers, weights, names): """ Convert reshape layer. @@ -977,11 +1016,13 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting reshape ...') - if short_names: + if names == 'short': tf_name = 'RESH' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -996,7 +1037,7 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, short_n layers[scope_name] = reshape(layers[inputs[0]]) -def convert_matmul(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_matmul(params, w_name, scope_name, inputs, layers, weights, names): """ Convert matmul layer. @@ -1007,12 +1048,14 @@ def convert_matmul(params, w_name, scope_name, inputs, layers, weights, short_na inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting matmul ...') - if short_names: + if names == 'short': tf_name = 'MMUL' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -1046,7 +1089,7 @@ def convert_matmul(params, w_name, scope_name, inputs, layers, weights, short_na raise AssertionError('Cannot convert matmul layer') -def convert_gather(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_gather(params, w_name, scope_name, inputs, layers, weights, names): """ Convert gather (embedding) layer. @@ -1057,12 +1100,14 @@ def convert_gather(params, w_name, scope_name, inputs, layers, weights, short_na inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting embedding ...') - if short_names: + if names == 'short': tf_name = 'EMBD' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -1080,7 +1125,7 @@ def convert_gather(params, w_name, scope_name, inputs, layers, weights, short_na layers[scope_name] = dense(layers[inputs[0]]) -def convert_reduce_sum(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_reduce_sum(params, w_name, scope_name, inputs, layers, weights, names): """ Convert reduce_sum layer. @@ -1091,7 +1136,7 @@ def convert_reduce_sum(params, w_name, scope_name, inputs, layers, weights, shor inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting reduce_sum ...') @@ -1106,7 +1151,7 @@ def target_layer(x, keepdims=keepdims, axis=axis): layers[scope_name] = lambda_layer(layers[inputs[0]]) -def convert_constant(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_constant(params, w_name, scope_name, inputs, layers, weights, names): """ Convert constant layer. @@ -1117,7 +1162,7 @@ def convert_constant(params, w_name, scope_name, inputs, layers, weights, short_ inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting constant ...') @@ -1132,7 +1177,7 @@ def convert_constant(params, w_name, scope_name, inputs, layers, weights, short_ layers[scope_name] = params['value'].tolist() -def convert_upsample(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_upsample(params, w_name, scope_name, inputs, layers, weights, names): """ Convert upsample_bilinear2d layer. @@ -1143,15 +1188,17 @@ def convert_upsample(params, w_name, scope_name, inputs, layers, weights, short_ inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting upsample...') if params['mode'] != 'nearest': raise AssertionError('Cannot convert non-nearest upsampling') - if short_names: + if names == 'short': tf_name = 'UPSL' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -1162,7 +1209,7 @@ def convert_upsample(params, w_name, scope_name, inputs, layers, weights, short_ layers[scope_name] = upsampling(layers[inputs[0]]) -def convert_padding(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_padding(params, w_name, scope_name, inputs, layers, weights, names): """ Convert padding layer. @@ -1173,7 +1220,7 @@ def convert_padding(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting padding...') @@ -1183,7 +1230,7 @@ def convert_padding(params, w_name, scope_name, inputs, layers, weights, short_n if params['value'] != 0.0: raise AssertionError('Cannot convert non-zero padding') - if short_names: + if names: tf_name = 'PADD' + random_string(4) else: tf_name = w_name + str(random.random()) @@ -1208,7 +1255,7 @@ def target_layer(x, pads=params['pads']): layers[scope_name] = lambda_layer(layers[inputs[0]]) -def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weights, names): """ Convert adaptive_avg_pool2d layer. @@ -1219,12 +1266,14 @@ def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weig inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting adaptive_avg_pool2d...') - if short_names: + if names == 'short': tf_name = 'APOL' + random_string(4) + elif names == 'keep': + tf_name = w_name else: tf_name = w_name + str(random.random()) @@ -1238,7 +1287,7 @@ def target_layer(x): layers[scope_name] = lambda_layer(layers_global_pool) -def convert_slice(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_slice(params, w_name, scope_name, inputs, layers, weights, names): """ Convert slice operation. @@ -1249,7 +1298,7 @@ def convert_slice(params, w_name, scope_name, inputs, layers, weights, short_nam inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting slice ...') @@ -1273,7 +1322,7 @@ def target_layer(x, axis=int(params['axes'][0]), start=int(params['starts'][0]), layers[scope_name] = lambda_layer(layers[inputs[0]]) -def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, short_names): +def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, names): """ Convert squeeze operation. @@ -1284,7 +1333,7 @@ def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, short_n inputs: pytorch node inputs layers: dictionary with keras tensors weights: pytorch state_dict - short_names: use short names for keras layers + names: use short names for keras layers """ print('Converting squeeze ...') From 9dc3eeba82cb6ca2c4301e1f0d6e3e30d9215971 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 23 Oct 2018 00:44:56 +0300 Subject: [PATCH 037/180] Update readme. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 88d1ddd..7d2781f 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ A. Yes, we're waiting for it. ## Tensorflow.js -For the proper convertion to the tensorflow.js format, please use a new flag `short_names=True`. +For the proper convertion to the tensorflow.js format, please use a new flag `names='short'`. ## How to build the latest PyTorch From 0bac2fa82ce17bd3117ecce5df99128b2d2aff37 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 30 Oct 2018 14:17:34 +0300 Subject: [PATCH 038/180] Update readme. --- README.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 7d2781f..ee185b4 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ pip install pytorch2keras ## Important notice -In that moment the only PyTorch 0.2 (deprecated) and PyTorch 0.4.0 (latest stable) are supported. +At that moment the only PyTorch 0.4.0 is supported. To use the converter properly, please, make changes in your `~/.keras/keras.json`: @@ -26,9 +26,6 @@ To use the converter properly, please, make changes in your `~/.keras/keras.json ... ``` -The latest version of PyTorch (0.4.1) isn't supported yet. - - ## Python 3.7 There are some problem related to a new version: @@ -68,7 +65,7 @@ Additional information for Arch Linux users: ## How to use -It's a convertor of pytorch graph to a Keras (Tensorflow backend) graph. +It's the convertor of pytorch graph to a Keras (Tensorflow backend) graph. Firstly, we need to load (or create) pytorch model: @@ -144,13 +141,9 @@ Activations: * ReLU * LeakyReLU -* PReLU (only with 0.2) -* SELU (only with 0.2) * Tanh * HardTanh (clamp) * Softmax -* Softplus (only with 0.2) -* Softsign (only with 0.2) * Sigmoid Element-wise: From 7200590f1c936b05e629e8ecdff09d5f09a4f71d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 30 Oct 2018 14:23:43 +0300 Subject: [PATCH 039/180] Added test for multiple inputs. --- tests/layers/minputs.py | 58 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/layers/minputs.py diff --git a/tests/layers/minputs.py b/tests/layers/minputs.py new file mode 100644 index 0000000..00c1728 --- /dev/null +++ b/tests/layers/minputs.py @@ -0,0 +1,58 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras +from sys import exit + + +class TestMultipleInputs(nn.Module): + """Module for multiple inputs conversion testing + """ + + def __init__(self, inp=10, out=16, kernel_size=3, bias=True): + super(TestMultipleInputs, self).__init__() + self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) + self.deconv2d = nn.ConvTranspose2d(inp, out, kernel_size=kernel_size, bias=bias) + self.in2d = nn.InstanceNorm2d(out) + + def forward(self, x, y, z): + # return self.in2d(self.conv2d(x)) + self.conv2d(y) + self.conv2d(z) + return self.in2d(self.deconv2d(x)) + self.in2d(self.deconv2d(y)) + self.in2d(self.deconv2d(z)) + # return self.conv2d(x) + self.conv2d(y) + self.conv2d(z) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict([input_np, input_np, input_np]) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(100): + kernel_size = np.random.randint(1, 7) + inp = np.random.randint(kernel_size + 1, 100) + out = np.random.randint(1, 100) + + model = TestMultipleInputs(inp, out, kernel_size, inp % 2) + + input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) + input_var = Variable(torch.FloatTensor(input_np)) + input_var2 = Variable(torch.FloatTensor(input_np)) + input_var3 = Variable(torch.FloatTensor(input_np)) + + output = model(input_var, input_var2, input_var3) + + k_model = pytorch_to_keras(model, [input_var, input_var2, input_var3], [(inp, isnp, inp,), (inp, inp, inp,), (inp, inp, inp,)], verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) \ No newline at end of file From cf43ccd6a474333df2fb67e09a88e234b91ffae3 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 31 Oct 2018 11:27:27 +0300 Subject: [PATCH 040/180] Fix code formatting according to PEP8. --- setup.py | 2 +- tests/layers/avg_pool.py | 12 ++++++------ tests/layers/channel_shuffle.py | 2 -- tests/layers/convtranspose2d.py | 4 +++- tests/layers/depthwise_conv2d.py | 2 +- tests/layers/group_conv2d.py | 2 +- tests/layers/instance_norm.py | 4 ++-- tests/layers/minputs.py | 14 +++++++------ tests/layers/multiple_inputs.py | 7 ++++++- tests/models/alexnet.py | 5 ++--- tests/models/menet.py | 27 ++------------------------ tests/models/preresnet18.py | 1 - tests/models/resnet18.py | 9 ++++++--- tests/models/resnet18_channels_last.py | 3 +-- tests/models/resnet34.py | 5 ++--- tests/models/resnet50.py | 5 ++--- tests/models/squeezenext.py | 3 --- tests/models/vgg11.py | 5 ++--- 18 files changed, 45 insertions(+), 67 deletions(-) diff --git a/setup.py b/setup.py index 06e60dd..d4e1fc9 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ with open('README.md') as f: - long_description = f.read() + long_description = f.read() setup(name='pytorch2keras', diff --git a/tests/layers/avg_pool.py b/tests/layers/avg_pool.py index 84cbac6..6d093f9 100644 --- a/tests/layers/avg_pool.py +++ b/tests/layers/avg_pool.py @@ -6,13 +6,13 @@ class AvgPool(nn.Module): - """Module for MaxPool conversion testing + """Module for AveragePool conversion testing """ def __init__(self, inp=10, out=16, kernel_size=3, bias=True): super(AvgPool, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, padding=3, bias=bias) - self.pool = nn.AvgPool2d(kernel_size=kernel_size, count_include_pad=True) + self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, padding=1, bias=bias) + self.pool = nn.AvgPool2d(kernel_size=3, padding=1, count_include_pad=False, stride=2) def forward(self, x): x = self.conv2d(x) @@ -23,7 +23,7 @@ def forward(self, x): if __name__ == '__main__': max_error = 0 for i in range(100): - kernel_size = np.random.randint(1, 7) + kernel_size = np.random.randint(4, 7) inp = np.random.randint(kernel_size + 1, 100) out = np.random.randint(1, 100) @@ -33,8 +33,8 @@ def forward(self, x): input_var = Variable(torch.FloatTensor(input_np)) output = model(input_var) - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - + k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True, names='keep') + print(k_model.summary()) pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) diff --git a/tests/layers/channel_shuffle.py b/tests/layers/channel_shuffle.py index 52b2900..05f9c16 100644 --- a/tests/layers/channel_shuffle.py +++ b/tests/layers/channel_shuffle.py @@ -12,7 +12,6 @@ def channel_shuffle(x, groups): groups (int): groups to be split """ batch, channels, height, width = x.size() - #assert (channels % groups == 0) channels_per_group = channels // groups x = x.view(batch, groups, channels_per_group, height, width) x = torch.transpose(x, 1, 2).contiguous() @@ -59,4 +58,3 @@ def forward(self, x): max_error = error print('Max error: {0}'.format(max_error)) - diff --git a/tests/layers/convtranspose2d.py b/tests/layers/convtranspose2d.py index 3536c2b..b9ddfa2 100644 --- a/tests/layers/convtranspose2d.py +++ b/tests/layers/convtranspose2d.py @@ -18,6 +18,7 @@ def forward(self, x): x = self.conv2d(x) return x + class ConvTranspose2dTest(unittest.TestCase): N = 100 @@ -45,7 +46,7 @@ def test(self): max_error = error print('Max error: {0}'.format(max_error)) - + def test_with_padding(self): max_error = 0 for i in range(self.N): @@ -71,5 +72,6 @@ def test_with_padding(self): print('Max error: {0}'.format(max_error)) + if __name__ == '__main__': unittest.main() diff --git a/tests/layers/depthwise_conv2d.py b/tests/layers/depthwise_conv2d.py index 832ee23..ed00cba 100644 --- a/tests/layers/depthwise_conv2d.py +++ b/tests/layers/depthwise_conv2d.py @@ -56,7 +56,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - error = check_error(output, k_model, input_np) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error diff --git a/tests/layers/group_conv2d.py b/tests/layers/group_conv2d.py index d53a603..108610c 100644 --- a/tests/layers/group_conv2d.py +++ b/tests/layers/group_conv2d.py @@ -35,7 +35,7 @@ def forward(self, x): for i in range(100): kernel_size = np.random.randint(1, 7) groups = np.random.randint(1, 10) - inp = np.random.randint(kernel_size + 1, 10) * groups + inp = np.random.randint(kernel_size + 1, 10) * groups h, w = 32, 32 model = TestGroupConv2d(inp, groups) diff --git a/tests/layers/instance_norm.py b/tests/layers/instance_norm.py index 960d6ee..1c1bd27 100644 --- a/tests/layers/instance_norm.py +++ b/tests/layers/instance_norm.py @@ -13,8 +13,8 @@ def __init__(self, inp=10, out=16, kernel_size=3, bias=True): super(TestInstanceNorm2d, self).__init__() self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) self.bn = nn.InstanceNorm2d(out, affine=True) - self.bn.weight = torch.nn.Parameter(torch.FloatTensor(self.bn.weight.size()).uniform_(0,1)) - self.bn.bias = torch.nn.Parameter(torch.FloatTensor(self.bn.bias.size()).uniform_(2,3)) + self.bn.weight = torch.nn.Parameter(torch.FloatTensor(self.bn.weight.size()).uniform_(0, 1)) + self.bn.bias = torch.nn.Parameter(torch.FloatTensor(self.bn.bias.size()).uniform_(2, 3)) def forward(self, x): x = self.conv2d(x) diff --git a/tests/layers/minputs.py b/tests/layers/minputs.py index 00c1728..8d88379 100644 --- a/tests/layers/minputs.py +++ b/tests/layers/minputs.py @@ -3,7 +3,6 @@ import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras -from sys import exit class TestMultipleInputs(nn.Module): @@ -17,9 +16,7 @@ def __init__(self, inp=10, out=16, kernel_size=3, bias=True): self.in2d = nn.InstanceNorm2d(out) def forward(self, x, y, z): - # return self.in2d(self.conv2d(x)) + self.conv2d(y) + self.conv2d(z) return self.in2d(self.deconv2d(x)) + self.in2d(self.deconv2d(y)) + self.in2d(self.deconv2d(z)) - # return self.conv2d(x) + self.conv2d(y) + self.conv2d(z) def check_error(output, k_model, input_np, epsilon=1e-5): @@ -49,10 +46,15 @@ def check_error(output, k_model, input_np, epsilon=1e-5): output = model(input_var, input_var2, input_var3) - k_model = pytorch_to_keras(model, [input_var, input_var2, input_var3], [(inp, isnp, inp,), (inp, inp, inp,), (inp, inp, inp,)], verbose=True) + k_model = pytorch_to_keras( + model, + [input_var, input_var2, input_var3], + [(inp, inp, inp,), (inp, inp, inp,), (inp, inp, inp,)], + verbose=True + ) - error = check_error(output, k_model, input_np) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) \ No newline at end of file + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/multiple_inputs.py b/tests/layers/multiple_inputs.py index dfff736..196dc47 100644 --- a/tests/layers/multiple_inputs.py +++ b/tests/layers/multiple_inputs.py @@ -32,7 +32,12 @@ def forward(self, x, y, z): input_var3 = Variable(torch.FloatTensor(input_np)) output = model(input_var, input_var2, input_var3) - k_model = pytorch_to_keras(model, [input_var, input_var2, input_var3], [(inp, inp, inp,), (inp, inp, inp,), (inp, inp, inp,)], verbose=True) + k_model = pytorch_to_keras( + model, + [input_var, input_var2, input_var3], + [(inp, inp, inp,), (inp, inp, inp,), (inp, inp, inp,)], + verbose=True + ) k_model.summary() pytorch_output = output.data.numpy() keras_output = k_model.predict([input_np, input_np, input_np]) diff --git a/tests/models/alexnet.py b/tests/models/alexnet.py index 62d2a95..01e9419 100644 --- a/tests/models/alexnet.py +++ b/tests/models/alexnet.py @@ -1,6 +1,5 @@ import numpy as np import torch -import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision @@ -29,8 +28,8 @@ def check_error(output, k_model, input_np, epsilon=1e-5): k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - error = check_error(output, k_model, input_np) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) \ No newline at end of file + print('Max error: {0}'.format(max_error)) diff --git a/tests/models/menet.py b/tests/models/menet.py index 30ffb61..1381ba7 100644 --- a/tests/models/menet.py +++ b/tests/models/menet.py @@ -12,9 +12,6 @@ import torch.nn.functional as F import torch.nn.init as init -# 0.034489512 - - def depthwise_conv3x3(channels, stride): @@ -38,6 +35,7 @@ def group_conv1x1(in_channels, groups=groups, bias=False) + def channel_shuffle(x, groups): """Channel Shuffle operation from ShuffleNet [arxiv: 1707.01083] @@ -46,7 +44,6 @@ def channel_shuffle(x, groups (int): groups to be split """ batch, channels, height, width = x.size() - #assert (channels % groups == 0) channels_per_group = channels // groups x = x.view(batch, groups, channels_per_group, height, width) x = torch.transpose(x, 1, 2).contiguous() @@ -60,7 +57,6 @@ def __init__(self, channels, groups): super(ChannelShuffle, self).__init__() - #assert (channels % groups == 0) if channels % groups != 0: raise ValueError('channels must be divisible by groups') self.groups = groups @@ -68,6 +64,7 @@ def __init__(self, def forward(self, x): return channel_shuffle(x, self.groups) + class ShuffleInitBlock(nn.Module): def __init__(self, @@ -97,7 +94,6 @@ def forward(self, x): return x - def conv1x1(in_channels, out_channels): return nn.Conv2d( @@ -344,22 +340,3 @@ def menet456_24x1_g3(**kwargs): max_error = error print('Max error: {0}'.format(max_error)) -# -# -# if __name__ == "__main__": -# import numpy as np -# import torch -# from torch.autograd import Variable -# net = menet228_12x1_g3(num_classes=1000) -# input = Variable(torch.randn(1, 3, 224, 224)) -# output = net(input) -# #print(output.size()) -# #print("net={}".format(net)) -# -# net.train() -# net_params = filter(lambda p: p.requires_grad, net.parameters()) -# weight_count = 0 -# for param in net_params: -# weight_count += np.prod(param.size()) -# print("weight_count={}".format(weight_count)) -# diff --git a/tests/models/preresnet18.py b/tests/models/preresnet18.py index d9b0373..251573f 100644 --- a/tests/models/preresnet18.py +++ b/tests/models/preresnet18.py @@ -6,7 +6,6 @@ import torch from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras -import torchvision import os import torch.nn as nn diff --git a/tests/models/resnet18.py b/tests/models/resnet18.py index c49cdd3..09032ee 100644 --- a/tests/models/resnet18.py +++ b/tests/models/resnet18.py @@ -1,6 +1,5 @@ import numpy as np import torch -import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision @@ -27,10 +26,14 @@ def check_error(output, k_model, input_np, epsilon=1e-5): input_var = Variable(torch.FloatTensor(input_np)) output = model(input_var) + output.sum().backward() + + print(dir(output.grad_fn.next_functions[0][0])) + exit(0) k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - error = check_error(output, k_model, input_np) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) \ No newline at end of file + print('Max error: {0}'.format(max_error)) diff --git a/tests/models/resnet18_channels_last.py b/tests/models/resnet18_channels_last.py index 08bc62c..fce1916 100644 --- a/tests/models/resnet18_channels_last.py +++ b/tests/models/resnet18_channels_last.py @@ -1,6 +1,5 @@ import numpy as np import torch -import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision @@ -29,7 +28,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True, change_ordering=True) - error = check_error(output, k_model, input_np.transpose(0, 2, 3, 1)) + error = check_error(output, k_model, input_np.transpose(0, 2, 3, 1)) if max_error < error: max_error = error diff --git a/tests/models/resnet34.py b/tests/models/resnet34.py index d73b050..9909914 100644 --- a/tests/models/resnet34.py +++ b/tests/models/resnet34.py @@ -1,6 +1,5 @@ import numpy as np import torch -import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision @@ -29,8 +28,8 @@ def check_error(output, k_model, input_np, epsilon=1e-5): k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - error = check_error(output, k_model, input_np) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) \ No newline at end of file + print('Max error: {0}'.format(max_error)) diff --git a/tests/models/resnet50.py b/tests/models/resnet50.py index b31ffe8..0b1b173 100644 --- a/tests/models/resnet50.py +++ b/tests/models/resnet50.py @@ -1,6 +1,5 @@ import numpy as np import torch -import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision @@ -29,8 +28,8 @@ def check_error(output, k_model, input_np, epsilon=1e-5): k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - error = check_error(output, k_model, input_np) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) \ No newline at end of file + print('Max error: {0}'.format(max_error)) diff --git a/tests/models/squeezenext.py b/tests/models/squeezenext.py index 4200e4b..e78b9f4 100644 --- a/tests/models/squeezenext.py +++ b/tests/models/squeezenext.py @@ -7,7 +7,6 @@ import torch from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras -import torchvision import os import torch.nn as nn @@ -381,8 +380,6 @@ def sqnxt23v5_w2(**kwargs): return get_squeezenext(version="23v5", width_scale=2.0, model_name="sqnxt23v5_w2", **kwargs) - - if __name__ == '__main__': max_error = 0 for i in range(10): diff --git a/tests/models/vgg11.py b/tests/models/vgg11.py index 86f148c..77eb677 100644 --- a/tests/models/vgg11.py +++ b/tests/models/vgg11.py @@ -1,6 +1,5 @@ import numpy as np import torch -import torch.nn as nn from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras import torchvision @@ -29,8 +28,8 @@ def check_error(output, k_model, input_np, epsilon=1e-5): k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - error = check_error(output, k_model, input_np) + error = check_error(output, k_model, input_np) if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) \ No newline at end of file + print('Max error: {0}'.format(max_error)) From c97233fcb440280b8e92943768619db9b3dbb886 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 31 Oct 2018 11:48:46 +0300 Subject: [PATCH 041/180] Fix error in adaptive average pooling (global average pooling). Fix PEP8 issues. --- pytorch2keras/layers.py | 53 ++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 6a9d95e..8556688 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -40,7 +40,7 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): weights_name = '{0}.weight'.format(w_name) input_name = inputs[0] - if len(weights[weights_name].numpy().shape) == 5: # 3D conv + if len(weights[weights_name].numpy().shape) == 5: # 3D conv W = weights[weights_name].numpy().transpose(2, 3, 4, 1, 0) height, width, channels, n_layers, n_filters = W.shape @@ -98,9 +98,12 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): in_channels = channels_per_group * n_groups if n_groups == in_channels: - print('Perform depthwise convolution: h={} w={} in={} out={}' - .format(height, width, in_channels, out_channels)) - + print( + 'Perform depthwise convolution: h={} w={} in={} out={}'.format( + height, width, in_channels, out_channels + ) + ) + if bias_name in weights: biases = weights[bias_name].numpy() has_bias = True @@ -115,7 +118,7 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): weights = [W, pointwise_wt, biases] else: weights = [W, pointwise_wt] - + conv = keras.layers.SeparableConv2D( filters=out_channels, depth_multiplier=1, @@ -136,19 +139,18 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): # input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) # weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights) # output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] - + # # Concat the convolved output together again # conv = tf.concat(axis=3, values=output_groups) def target_layer(x, groups=params['group'], stride_y=params['strides'][0], stride_x=params['strides'][1]): x = tf.transpose(x, [0, 2, 3, 1]) - convolve = lambda i, k: tf.nn.conv2d(i, k, - strides=[1, stride_y, stride_x, 1], - padding='VALID') + def convolve_lambda(i, k): + return tf.nn.conv2d(i, k, strides=[1, stride_y, stride_x, 1], padding='VALID') input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W.transpose(0, 1, 2, 3)) - output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] + output_groups = [convolve_lambda(i, k) for i, k in zip(input_groups, weight_groups)] layer = tf.concat(axis=3, values=output_groups) @@ -279,9 +281,9 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, n bias_initializer='zeros', kernel_initializer='zeros', name=tf_name ) - + layers[scope_name] = conv(layers[input_name]) - + # Magic ad-hoc. # See the Keras issue: https://github.com/keras-team/keras/issues/6777 layers[scope_name].set_shape(layers[scope_name]._keras_shape) @@ -639,13 +641,15 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, na beta = layers[inputs[-1]] def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): - layer = tf.contrib.layers.instance_norm(x, + layer = tf.contrib.layers.instance_norm( + x, param_initializers={'beta': tf.constant_initializer(beta), 'gamma': tf.constant_initializer(gamma)}, epsilon=epsilon, data_format='NCHW', - trainable=False) + trainable=False + ) return layer - lambda_layer = keras.layers.Lambda(target_layer) + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) layers[scope_name] = lambda_layer(layers[inputs[0]]) @@ -784,7 +788,7 @@ def convert_concat(params, w_name, scope_name, inputs, layers, weights, names): # no-op layers[scope_name] = concat_nodes[0] return - + if names == 'short': tf_name = 'CAT' + random_string(5) elif names == 'keep': @@ -992,10 +996,10 @@ def convert_transpose(params, w_name, scope_name, inputs, layers, weights, names if params['perm'][0] != 0: # raise AssertionError('Cannot permute batch dimension') print('!!! Cannot permute batch dimension. Result may be wrong !!!') - try: - layers[scope_name] = layers[inputs[0]] - except: - pass + # try: + layers[scope_name] = layers[inputs[0]] + # except: + # pass else: if names: tf_name = 'PERM' + random_string(4) @@ -1277,14 +1281,15 @@ def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weig else: tf_name = w_name + str(random.random()) - global_pool = keras.layers.GlobalAveragePooling2D() - layers_global_pool = global_pool(layers[inputs[0]]) + global_pool = keras.layers.GlobalAveragePooling2D(data_format='channels_first', name=tf_name) + layers[scope_name] = global_pool(layers[inputs[0]]) def target_layer(x): return keras.backend.expand_dims(x) - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers_global_pool) + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') + layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims + layers[scope_name] = lambda_layer(layers[scope_name]) def convert_slice(params, w_name, scope_name, inputs, layers, weights, names): From a3353c4bde06a5d503064d6dabaea9c89d832a86 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 2 Nov 2018 00:38:03 +0300 Subject: [PATCH 042/180] Fix broken resnet18 test. --- tests/models/resnet18.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/models/resnet18.py b/tests/models/resnet18.py index 09032ee..0616e29 100644 --- a/tests/models/resnet18.py +++ b/tests/models/resnet18.py @@ -26,10 +26,6 @@ def check_error(output, k_model, input_np, epsilon=1e-5): input_var = Variable(torch.FloatTensor(input_np)) output = model(input_var) - output.sum().backward() - - print(dir(output.grad_fn.next_functions[0][0])) - exit(0) k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) error = check_error(output, k_model, input_np) From a39278e522a81764113a4155b331727164ee290c Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 13 Nov 2018 19:31:58 +0300 Subject: [PATCH 043/180] Added 2 more shields. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index ee185b4..21d806c 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ [![Build Status](https://travis-ci.com/nerox8664/pytorch2keras.svg?branch=master)](https://travis-ci.com/nerox8664/pytorch2keras) [![GitHub License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) +![PyPI - Downloads](https://img.shields.io/pypi/dd/pytorch2keras.svg) +![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) Pytorch to Keras model convertor. Still beta for now. From bbe55edb08a6a2373314091f59e757f842dea742 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 13 Nov 2018 19:34:24 +0300 Subject: [PATCH 044/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d4e1fc9..dee81fe 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.5', + version='0.1.6', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From fdeb6cfd241ff43d317c34c0c13e07b11b71fdcf Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 14 Nov 2018 11:43:06 +0300 Subject: [PATCH 045/180] Update jit optimization functions. --- pytorch2keras/converter.py | 65 +++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 11 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 120b4d5..24405ec 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -1,5 +1,5 @@ """ -The Pytorch2Keras converter module over JIT-trace. +The PyTorch2Keras converter module over JIT-trace. """ import torch @@ -31,26 +31,68 @@ def set_training(model, mode): if old_mode != mode: model.train(old_mode) - -def _optimize_graph(graph, aten): - # run dce first to eliminate dead parts of the graph that might have been +from torch._C import ListType + +# ONNX can't handle constants that are lists of tensors, which can +# get generated in constant prop. So we split them back into prim::ListConstructs +def _split_tensor_list_constants(g, block): + for node in block.nodes(): + for subblock in node.blocks(): + _split_tensor_list_constants(g, subblock) + if node.kind() == "prim::Constant": + output_type = node.output().type() + if output_type.isSubtypeOf(ListType.ofTensors()): + inputs = [g.create("prim::Constant").t_('value', t) + .insertBefore(node).output() + for t in node['value']] + lc = (g.create("prim::ListConstruct", inputs) + .insertBefore(node) + .output() + .setType(ListType.ofTensors())) + node.output().replaceAllUsesWith(lc) + +from torch.onnx import ONNX_ARCHIVE_MODEL_PROTO_NAME, ExportTypes, OperatorExportTypes +def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): + torch._C._jit_pass_remove_inplace_ops(graph) + # we record now record some ops like ones/zeros + # into a trace where we previously recorded constants + # use constant prop to maintain our current level of onnx support + # without implementing symbolics for all of them + torch._C._jit_pass_constant_propagation(graph) + # _split_tensor_list_constants(graph, graph) + # run dce to eliminate dead parts of the graph that might have been # left behind by things like symbolic_override torch._C._jit_pass_dce(graph) torch._C._jit_pass_lint(graph) - torch._C._jit_pass_peephole(graph) + torch._C._jit_pass_canonicalize_ops(graph) torch._C._jit_pass_lint(graph) - graph = torch._C._jit_pass_onnx(graph, aten) + + torch._C._jit_pass_peephole(graph, True) torch._C._jit_pass_lint(graph) - torch._C._jit_pass_onnx_peephole(graph) + + # onnx only supports tensors, but 1 / 2 = 0.5 and tensor(1) / tensor(2) = 0 + torch._C._jit_pass_prepare_division_for_onnx(graph) + # onnx only supports tensors, so we turn all out number types into tensors + torch._C._jit_pass_erase_number_types(graph) + # onnx does not support tuples, so try to remove them + torch._C._jit_pass_lower_all_tuples(graph) + # torch._C._jit_pass_peephole(graph, True) torch._C._jit_pass_lint(graph) + + if operator_export_type != OperatorExportTypes.RAW: + graph = torch._C._jit_pass_onnx(graph, operator_export_type) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_onnx_peephole(graph) + torch._C._jit_pass_lint(graph) torch._C._jit_pass_dce(graph) torch._C._jit_pass_lint(graph) - graph = torch._C._jit_pass_canonicalize(graph) - torch._C._jit_pass_lint(graph) + # torch._C._jit_pass_fixup_onnx_loops(graph) + # torch._C._jit_pass_lint(graph) + # graph = torch._C._jit_pass_canonicalize(graph) + # torch._C._jit_pass_lint(graph) return graph - def get_node_id(node): import re node_id = re.search(r"[\d]+", node.__str__()) @@ -95,8 +137,9 @@ def pytorch_to_keras( "something weird is happening in your model!") # _optimize_trace(trace, False) - trace.set_graph(_optimize_graph(trace.graph(), False)) + trace.set_graph(_optimize_graph(trace.graph(), OperatorExportTypes.ONNX)) + trace.graph().lint() if verbose: print(trace.graph()) From c4376d27981add8ae55752d4b3976e0d901f832a Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 14 Nov 2018 11:44:16 +0300 Subject: [PATCH 046/180] Add adaptive max pooling. --- pytorch2keras/layers.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 8556688..c81adcf 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1292,6 +1292,39 @@ def target_layer(x): layers[scope_name] = lambda_layer(layers[scope_name]) +def convert_adaptive_max_pool2d(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert convert_adaptive_max_pool2d layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting adaptive_avg_pool2d...') + + if names == 'short': + tf_name = 'APOL' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + global_pool = keras.layers.GlobalMaxPooling2D(data_format='channels_first', name=tf_name) + layers[scope_name] = global_pool(layers[inputs[0]]) + + def target_layer(x): + return keras.backend.expand_dims(x) + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') + layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims + layers[scope_name] = lambda_layer(layers[scope_name]) + + def convert_slice(params, w_name, scope_name, inputs, layers, weights, names): """ Convert slice operation. From 132755d1bafed9eb48e43ab29de86031b0df3a1e Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 14 Nov 2018 11:45:06 +0300 Subject: [PATCH 047/180] Add unsqueeze and shape operations. --- pytorch2keras/layers.py | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index c81adcf..252da2d 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1385,6 +1385,58 @@ def target_layer(x, axis=int(params['axes'][0])): layers[scope_name] = lambda_layer(layers[inputs[0]]) +def convert_unsqueeze(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert unsqueeze operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting unsqueeze ...') + + if names == 'short': + tf_name = 'UNSQ' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + + def target_layer(x): + return keras.backend.expand_dims(x) + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_shape(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert shape operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting shape ...') + + def target_layer(x): + return tf.shape(x) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + AVAILABLE_CONVERTERS = { 'onnx::Conv': convert_conv, 'onnx::ConvTranspose': convert_convtranspose, @@ -1419,6 +1471,9 @@ def target_layer(x, axis=int(params['axes'][0])): 'onnx::Upsample': convert_upsample, 'onnx::Pad': convert_padding, 'aten::adaptive_avg_pool2d': convert_adaptive_avg_pool2d, + 'aten::adaptive_max_pool2d': convert_adaptive_max_pool2d, 'onnx::Slice': convert_slice, 'onnx::Squeeze': convert_squeeze, + 'onnx::Unsqueeze': convert_unsqueeze, + 'onnx::Shape': convert_shape, } From 3ef620c6f3caca3af0c621caff4d601cbc9215b0 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:02:37 +0300 Subject: [PATCH 048/180] Update graph optimization function. --- pytorch2keras/converter.py | 132 +++++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 57 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 24405ec..47c84dc 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -2,12 +2,15 @@ The PyTorch2Keras converter module over JIT-trace. """ +import contextlib +from packaging import version + import torch import torch.jit import torch.autograd import torch.serialization -import contextlib from torch.jit import _unique_state_dict +from torch.onnx import ONNX_ARCHIVE_MODEL_PROTO_NAME, ExportTypes, OperatorExportTypes from .layers import AVAILABLE_CONVERTERS @@ -31,68 +34,83 @@ def set_training(model, mode): if old_mode != mode: model.train(old_mode) -from torch._C import ListType - -# ONNX can't handle constants that are lists of tensors, which can -# get generated in constant prop. So we split them back into prim::ListConstructs -def _split_tensor_list_constants(g, block): - for node in block.nodes(): - for subblock in node.blocks(): - _split_tensor_list_constants(g, subblock) - if node.kind() == "prim::Constant": - output_type = node.output().type() - if output_type.isSubtypeOf(ListType.ofTensors()): - inputs = [g.create("prim::Constant").t_('value', t) - .insertBefore(node).output() - for t in node['value']] - lc = (g.create("prim::ListConstruct", inputs) - .insertBefore(node) - .output() - .setType(ListType.ofTensors())) - node.output().replaceAllUsesWith(lc) +if torch.__version__ != '0.4.1': + from torch._C import ListType + + # ONNX can't handle constants that are lists of tensors, which can + # get generated in constant prop. So we split them back into prim::ListConstructs + def _split_tensor_list_constants(g, block): + for node in block.nodes(): + for subblock in node.blocks(): + _split_tensor_list_constants(g, subblock) + if node.kind() == "prim::Constant": + output_type = node.output().type() + if output_type.isSubtypeOf(ListType.ofTensors()): + inputs = [g.create("prim::Constant").t_('value', t) + .insertBefore(node).output() + for t in node['value']] + lc = (g.create("prim::ListConstruct", inputs) + .insertBefore(node) + .output() + .setType(ListType.ofTensors())) + node.output().replaceAllUsesWith(lc) + -from torch.onnx import ONNX_ARCHIVE_MODEL_PROTO_NAME, ExportTypes, OperatorExportTypes def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): - torch._C._jit_pass_remove_inplace_ops(graph) - # we record now record some ops like ones/zeros - # into a trace where we previously recorded constants - # use constant prop to maintain our current level of onnx support - # without implementing symbolics for all of them - torch._C._jit_pass_constant_propagation(graph) - # _split_tensor_list_constants(graph, graph) - # run dce to eliminate dead parts of the graph that might have been - # left behind by things like symbolic_override - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - - torch._C._jit_pass_canonicalize_ops(graph) - torch._C._jit_pass_lint(graph) - - torch._C._jit_pass_peephole(graph, True) - torch._C._jit_pass_lint(graph) - - # onnx only supports tensors, but 1 / 2 = 0.5 and tensor(1) / tensor(2) = 0 - torch._C._jit_pass_prepare_division_for_onnx(graph) - # onnx only supports tensors, so we turn all out number types into tensors - torch._C._jit_pass_erase_number_types(graph) - # onnx does not support tuples, so try to remove them - torch._C._jit_pass_lower_all_tuples(graph) - # torch._C._jit_pass_peephole(graph, True) - torch._C._jit_pass_lint(graph) - - if operator_export_type != OperatorExportTypes.RAW: - graph = torch._C._jit_pass_onnx(graph, operator_export_type) + if version.parse('0.4.1') < version.parse(torch.__version__): + torch._C._jit_pass_remove_inplace_ops(graph) + # we record now record some ops like ones/zeros + # into a trace where we previously recorded constants + # use constant prop to maintain our current level of onnx support + # without implementing symbolics for all of them + torch._C._jit_pass_constant_propagation(graph) + # _split_tensor_list_constants(graph, graph) + # run dce to eliminate dead parts of the graph that might have been + # left behind by things like symbolic_override + torch._C._jit_pass_dce(graph) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_canonicalize_ops(graph) torch._C._jit_pass_lint(graph) - torch._C._jit_pass_onnx_peephole(graph) + + torch._C._jit_pass_peephole(graph, True) + torch._C._jit_pass_lint(graph) + + # onnx only supports tensors, but 1 / 2 = 0.5 and tensor(1) / tensor(2) = 0 + torch._C._jit_pass_prepare_division_for_onnx(graph) + # onnx only supports tensors, so we turn all out number types into tensors + torch._C._jit_pass_erase_number_types(graph) + # onnx does not support tuples, so try to remove them + torch._C._jit_pass_lower_all_tuples(graph) + # torch._C._jit_pass_peephole(graph, True) + torch._C._jit_pass_lint(graph) + + if operator_export_type != OperatorExportTypes.RAW: + graph = torch._C._jit_pass_onnx(graph, operator_export_type) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_onnx_peephole(graph) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_dce(graph) + torch._C._jit_pass_lint(graph) + else: + torch._C._jit_pass_dce(graph) + torch._C._jit_pass_lint(graph) + + torch._C._jit_pass_peephole(graph) + torch._C._jit_pass_lint(graph) + + # torch._C._jit_pass_peephole(graph, True) + torch._C._jit_pass_lint(graph) + + if operator_export_type != OperatorExportTypes.RAW: + graph = torch._C._jit_pass_onnx(graph, operator_export_type) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_onnx_peephole(graph) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_dce(graph) torch._C._jit_pass_lint(graph) - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - # torch._C._jit_pass_fixup_onnx_loops(graph) - # torch._C._jit_pass_lint(graph) - # graph = torch._C._jit_pass_canonicalize(graph) - # torch._C._jit_pass_lint(graph) return graph + def get_node_id(node): import re node_id = re.search(r"[\d]+", node.__str__()) From 97230cfa56515f3e08f7dbf44c96b5f21de2cc17 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:03:08 +0300 Subject: [PATCH 049/180] Fixes in layers. --- pytorch2keras/layers.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 252da2d..73c37dd 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1034,8 +1034,15 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, names): if layers[inputs[1]][0] == -1: print('Cannot deduct batch size! It will be omitted, but result may be wrong.') - reshape = keras.layers.Reshape(layers[inputs[1]][1:], name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) + print(layers[inputs[0]]) + + def target_layer(x, shape=layers[inputs[1]]): + return tf.reshape(x, shape) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + # layers[scope_name] = reshape(layers[inputs[0]]) else: reshape = keras.layers.Reshape(params['shape'][1:], name=tf_name) layers[scope_name] = reshape(layers[inputs[0]]) @@ -1170,15 +1177,15 @@ def convert_constant(params, w_name, scope_name, inputs, layers, weights, names) """ print('Converting constant ...') - # params_list = params['value'].numpy().tolist() + params_list = params['value'].numpy().tolist() - # def target_layer(x): - # import keras.backend as K - # return K.constant(params_list) + def target_layer(x): + import keras.backend as K + return tf.constant(params_list) - # lambda_layer = keras.layers.Lambda(target_layer) - # layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py - layers[scope_name] = params['value'].tolist() + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py + # layers[scope_name] = params['value'].tolist() def convert_upsample(params, w_name, scope_name, inputs, layers, weights, names): @@ -1470,7 +1477,9 @@ def target_layer(x): 'onnx::Constant': convert_constant, 'onnx::Upsample': convert_upsample, 'onnx::Pad': convert_padding, + 'onnx::GlobalAveragePool': convert_adaptive_avg_pool2d, 'aten::adaptive_avg_pool2d': convert_adaptive_avg_pool2d, + 'onnx::GlobalMaxPool': convert_adaptive_max_pool2d, 'aten::adaptive_max_pool2d': convert_adaptive_max_pool2d, 'onnx::Slice': convert_slice, 'onnx::Squeeze': convert_squeeze, From 3a7ebd4fad62ee253ab4b215772c78a0950739f9 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:03:20 +0300 Subject: [PATCH 050/180] Add packaging to requirements. --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 694ca6a..ed607a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ keras==2.2.2 tensorflow==1.9.0 numpy==1.14.1 torch==0.4 -torchvision==0.2.1 \ No newline at end of file +torchvision==0.2.1 +packaging \ No newline at end of file From dffe755c8e3b2637b745ca4c6d1e3257af055404 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:03:46 +0300 Subject: [PATCH 051/180] Add heuristic to convert flatten. --- pytorch2keras/converter.py | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 47c84dc..f55d71e 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -167,6 +167,52 @@ def pytorch_to_keras( # Get all graph nodes nodes = list(trace.graph().nodes()) + # Optimize Flatten: + # When we have something loke that: + # + # %523 : Long() = onnx::Constant[value={0}](), scope: ResNet + # %524 : Dynamic = onnx::Shape(%522), scope: ResNet + # %526 : Long() = onnx::Gather[axis=0](%524, %523), scope: ResNet + # %527 : Long() = onnx::Constant[value={-1}](), scope: ResNet + # %534 : Dynamic = onnx::Unsqueeze[axes=[0]](%526) + # %535 : Dynamic = onnx::Unsqueeze[axes=[0]](%527) + # %536 : Dynamic = onnx::Concat[axis=0](%534, %535) + # %529 : Float(1, 512) = onnx::Reshape(%522, %536), scope: ResNet + # + # It's better to replace it with onnx::Flatten + from types import SimpleNamespace + seq_to_find = \ + ['onnx::Constant', 'onnx::Shape', 'onnx::Gather', 'onnx::Constant', 'onnx::Unsqueeze', 'onnx::Unsqueeze', 'onnx::Concat', 'onnx::Reshape'] + seq_to_replace = \ + ['onnx::Flatten'] + k = 0 + s = 0 + for i, node in enumerate(nodes): + if node.kind() == seq_to_find[k]: + if k == 0: + s = i + k += 1 + if k == len(seq_to_find): + print('found seq', k, s) + reshape_op = nodes[s + k - 1] + flatten_op = { + 'kind': (lambda: 'onnx::Flatten'), + 'attributeNames': (lambda: {}), + 'outputs': (lambda: list(reshape_op.outputs())), + 'scopeName': (lambda: reshape_op.scopeName()), + 'inputs': (lambda: list(reshape_op.inputs())[:1]), + '__str__': (lambda: reshape_op.__str__()), + } + print(flatten_op) + nodes = nodes[:s] + [SimpleNamespace(**flatten_op)] + nodes[s+k:] + # print(nodes) + # exit(0) + break + else: + k = 0 + s = -1 + + print(nodes) # Collect graph outputs graph_outputs = [n.uniqueName() for n in trace.graph().outputs()] print('Graph outputs:', graph_outputs) From 6e133c37e11218968027194de1a957f7054cf29b Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:04:16 +0300 Subject: [PATCH 052/180] Add DRN test. --- tests/models/drn.py | 430 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 430 insertions(+) create mode 100644 tests/models/drn.py diff --git a/tests/models/drn.py b/tests/models/drn.py new file mode 100644 index 0000000..629c895 --- /dev/null +++ b/tests/models/drn.py @@ -0,0 +1,430 @@ +import numpy as np +import torch +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras +import torchvision +import torch.nn as nn + + +# Code below copied from DRN repo +import math +BatchNorm = nn.BatchNorm2d + +def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=padding, bias=False, dilation=dilation) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, + dilation=(1, 1), residual=True): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride, + padding=dilation[0], dilation=dilation[0]) + self.bn1 = BatchNorm(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes, + padding=dilation[1], dilation=dilation[1]) + self.bn2 = BatchNorm(planes) + self.downsample = downsample + self.stride = stride + self.residual = residual + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + if self.residual: + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, + dilation=(1, 1), residual=True): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = BatchNorm(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=dilation[1], bias=False, + dilation=dilation[1]) + self.bn2 = BatchNorm(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = BatchNorm(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class DRN(nn.Module): + + def __init__(self, block, layers, num_classes=1000, + channels=(16, 32, 64, 128, 256, 512, 512, 512), + out_map=False, out_middle=False, pool_size=28, arch='D'): + super(DRN, self).__init__() + self.inplanes = channels[0] + self.out_map = out_map + self.out_dim = channels[-1] + self.out_middle = out_middle + self.arch = arch + + if arch == 'C': + self.conv1 = nn.Conv2d(3, channels[0], kernel_size=7, stride=1, + padding=3, bias=False) + self.bn1 = BatchNorm(channels[0]) + self.relu = nn.ReLU(inplace=True) + + self.layer1 = self._make_layer( + BasicBlock, channels[0], layers[0], stride=1) + self.layer2 = self._make_layer( + BasicBlock, channels[1], layers[1], stride=2) + elif arch == 'D': + self.layer0 = nn.Sequential( + nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3, + bias=False), + BatchNorm(channels[0]), + nn.ReLU(inplace=True) + ) + + self.layer1 = self._make_conv_layers( + channels[0], layers[0], stride=1) + self.layer2 = self._make_conv_layers( + channels[1], layers[1], stride=2) + + self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2) + self.layer4 = self._make_layer(block, channels[3], layers[3], stride=2) + self.layer5 = self._make_layer(block, channels[4], layers[4], + dilation=2, new_level=False) + self.layer6 = None if layers[5] == 0 else \ + self._make_layer(block, channels[5], layers[5], dilation=4, + new_level=False) + + if arch == 'C': + self.layer7 = None if layers[6] == 0 else \ + self._make_layer(BasicBlock, channels[6], layers[6], dilation=2, + new_level=False, residual=False) + self.layer8 = None if layers[7] == 0 else \ + self._make_layer(BasicBlock, channels[7], layers[7], dilation=1, + new_level=False, residual=False) + elif arch == 'D': + self.layer7 = None if layers[6] == 0 else \ + self._make_conv_layers(channels[6], layers[6], dilation=2) + self.layer8 = None if layers[7] == 0 else \ + self._make_conv_layers(channels[7], layers[7], dilation=1) + + if num_classes > 0: + self.avgpool = nn.AvgPool2d(pool_size) + self.fc = nn.Conv2d(self.out_dim, num_classes, kernel_size=1, + stride=1, padding=0, bias=True) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, BatchNorm): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1, dilation=1, + new_level=True, residual=True): + assert dilation == 1 or dilation % 2 == 0 + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + BatchNorm(planes * block.expansion), + ) + + layers = list() + layers.append(block( + self.inplanes, planes, stride, downsample, + dilation=(1, 1) if dilation == 1 else ( + dilation // 2 if new_level else dilation, dilation), + residual=residual)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, residual=residual, + dilation=(dilation, dilation))) + + return nn.Sequential(*layers) + + def _make_conv_layers(self, channels, convs, stride=1, dilation=1): + modules = [] + for i in range(convs): + modules.extend([ + nn.Conv2d(self.inplanes, channels, kernel_size=3, + stride=stride if i == 0 else 1, + padding=dilation, bias=False, dilation=dilation), + BatchNorm(channels), + nn.ReLU(inplace=True)]) + self.inplanes = channels + return nn.Sequential(*modules) + + def forward(self, x): + y = list() + + if self.arch == 'C': + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + elif self.arch == 'D': + x = self.layer0(x) + + x = self.layer1(x) + y.append(x) + x = self.layer2(x) + y.append(x) + + x = self.layer3(x) + y.append(x) + + x = self.layer4(x) + y.append(x) + + x = self.layer5(x) + y.append(x) + + if self.layer6 is not None: + x = self.layer6(x) + y.append(x) + + if self.layer7 is not None: + x = self.layer7(x) + y.append(x) + + if self.layer8 is not None: + x = self.layer8(x) + y.append(x) + + if self.out_map: + x = self.fc(x) + else: + x = self.avgpool(x) + x = self.fc(x) + x = x.view(x.size(0), -1) + + if self.out_middle: + return x, y + else: + return x + + +class DRN_A(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(DRN_A, self).__init__() + self.out_dim = 512 * block.expansion + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=1, + dilation=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=1, + dilation=4) + self.avgpool = nn.AvgPool2d(28, stride=1) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, BatchNorm): + m.weight.data.fill_(1) + m.bias.data.zero_() + + # for m in self.modules(): + # if isinstance(m, nn.Conv2d): + # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + # elif isinstance(m, nn.BatchNorm2d): + # nn.init.constant_(m.weight, 1) + # nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilation=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, + dilation=(dilation, dilation))) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +def drn_a_50(pretrained=False, **kwargs): + model = DRN_A(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['resnet50'])) + return model + + +def drn_c_26(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-c-26'])) + return model + + +def drn_c_42(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-c-42'])) + return model + + +def drn_c_58(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-c-58'])) + return model + + +def drn_d_22(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-22'])) + return model + + +def drn_d_24(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-24'])) + return model + + +def drn_d_38(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-38'])) + return model + + +def drn_d_40(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-40'])) + return model + + +def drn_d_54(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-54'])) + return model + + +def drn_d_56(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-56'])) + return model + + +def drn_d_105(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-105'])) + return model + + +def drn_d_107(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 2, 2], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-107'])) + return model + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(100): + model = drn_c_26() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 45254186666adf3736a36deaa96b655753bcf0a0 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:04:53 +0300 Subject: [PATCH 053/180] Fix missing input conversion failure. --- pytorch2keras/converter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index f55d71e..5fde368 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -252,6 +252,8 @@ def pytorch_to_keras( node_input_names.append(model_inputs[node_inputs[0]]) else: input_name = 'input{0}'.format(input_index) + if input_name not in layers: + continue node_input_names.append(input_name) input_index += 1 model_inputs[node_inputs[0]] = input_name From 776fc8cd33584a67368cbf3169ccbf96415c0b33 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:11:15 +0300 Subject: [PATCH 054/180] Fix alexnet. --- tests/models/alexnet.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/models/alexnet.py b/tests/models/alexnet.py index 01e9419..ef551a0 100644 --- a/tests/models/alexnet.py +++ b/tests/models/alexnet.py @@ -5,6 +5,17 @@ import torchvision +class AlexNet(torchvision.models.AlexNet): + def __init__(self): + super(AlexNet, self).__init__() + + def forward(self, x): + x = self.features(x) + x = x.view([int(x.size(0)), 256 * 6 * 6]) # << important fix + x = self.classifier(x) + return x + + def check_error(output, k_model, input_np, epsilon=1e-5): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) @@ -19,7 +30,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if __name__ == '__main__': max_error = 0 for i in range(100): - model = torchvision.models.AlexNet() + model = AlexNet() model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) From 898e37c0b62ef9458922514080c24696596ce66f Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:15:46 +0300 Subject: [PATCH 055/180] Reimplement clip layer. --- pytorch2keras/layers.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 73c37dd..5b30724 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1444,6 +1444,28 @@ def target_layer(x): layers[scope_name] = lambda_layer(layers[inputs[0]]) +def convert_clip(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert clip operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting clip ...') + + def target_layer(x, vmin=params['min'], vmax=params['max']): + return tf.clip_by_value(x, vmin, vmax) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + AVAILABLE_CONVERTERS = { 'onnx::Conv': convert_conv, 'onnx::ConvTranspose': convert_convtranspose, @@ -1485,4 +1507,5 @@ def target_layer(x): 'onnx::Squeeze': convert_squeeze, 'onnx::Unsqueeze': convert_unsqueeze, 'onnx::Shape': convert_shape, + 'onnx::Clip': convert_clip, } From a7c748feafc99102bd3aa19822a821d12e8b5754 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:16:58 +0300 Subject: [PATCH 056/180] Update preresnet test. --- tests/models/preresnet18.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/preresnet18.py b/tests/models/preresnet18.py index 251573f..730bec0 100644 --- a/tests/models/preresnet18.py +++ b/tests/models/preresnet18.py @@ -377,7 +377,7 @@ def _init_params(self): def forward(self, x): x = self.features(x) - x = x.view(x.size(0), -1) + x = x.view(int(x.size(0)), -1) x = self.output(x) return x From d62d3a33c045389b66bbc5e1823cad300478e961 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:20:15 +0300 Subject: [PATCH 057/180] Fix resnet test. --- tests/models/resnet18.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/models/resnet18.py b/tests/models/resnet18.py index 0616e29..26b065f 100644 --- a/tests/models/resnet18.py +++ b/tests/models/resnet18.py @@ -5,6 +5,27 @@ import torchvision +class ResNet(torchvision.models.resnet.ResNet): + def __init__(self, *args, **kwargs): + super(ResNet, self).__init__(*args, **kwargs) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(int(x.size(0)), -1) # << This fix again + x = self.fc(x) + return x + + def check_error(output, k_model, input_np, epsilon=1e-5): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) @@ -19,7 +40,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if __name__ == '__main__': max_error = 0 for i in range(100): - model = torchvision.models.resnet18() + model = ResNet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2]) model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) From e68b1c1ba053fb05f2ceaf35c46212cd6aeec236 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:23:06 +0300 Subject: [PATCH 058/180] Fix resnet34 and resnet50 tests. --- tests/models/resnet34.py | 25 +++++++++++++++++++++++-- tests/models/resnet50.py | 25 +++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/tests/models/resnet34.py b/tests/models/resnet34.py index 9909914..79aeaf2 100644 --- a/tests/models/resnet34.py +++ b/tests/models/resnet34.py @@ -5,7 +5,28 @@ import torchvision -def check_error(output, k_model, input_np, epsilon=1e-5): +class ResNet(torchvision.models.resnet.ResNet): + def __init__(self, *args, **kwargs): + super(ResNet, self).__init__(*args, **kwargs) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(int(x.size(0)), -1) # << This fix again + x = self.fc(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-3): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) @@ -19,7 +40,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if __name__ == '__main__': max_error = 0 for i in range(100): - model = torchvision.models.resnet34() + model = ResNet(torchvision.models.resnet.BasicBlock, [3, 4, 6, 3]) model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) diff --git a/tests/models/resnet50.py b/tests/models/resnet50.py index 0b1b173..3691926 100644 --- a/tests/models/resnet50.py +++ b/tests/models/resnet50.py @@ -5,7 +5,28 @@ import torchvision -def check_error(output, k_model, input_np, epsilon=1e-5): +class ResNet(torchvision.models.resnet.ResNet): + def __init__(self, *args, **kwargs): + super(ResNet, self).__init__(*args, **kwargs) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(int(x.size(0)), -1) # << This fix again + x = self.fc(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-3): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) @@ -19,7 +40,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if __name__ == '__main__': max_error = 0 for i in range(100): - model = torchvision.models.resnet50() + model = ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3]) model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) From c0b06f1c3eaaab3d98c14590ff0c8a96dba3ebb1 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:27:15 +0300 Subject: [PATCH 059/180] Fix SENet. --- tests/models/senet.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/models/senet.py b/tests/models/senet.py index 5a5382a..e261eb5 100644 --- a/tests/models/senet.py +++ b/tests/models/senet.py @@ -19,8 +19,10 @@ def __init__(self, channel, reduction=16): def forward(self, x): b, c, _, _ = x.size() - y = self.avg_pool(x).view(b, c) - y = self.fc(y).view(b, c, 1, 1) + y = self.avg_pool(x) + y = y.view([int(b), -1]) + y = self.fc(y) + y = y.view([int(b), int(c), 1, 1]) return x * y @@ -230,7 +232,7 @@ def forward(self, x): x = self.layer3(x) x = self.avgpool(x) - x = x.view(x.size(0), -1) + x = x.view([int(x.size(0)), -1]) x = self.fc(x) return x @@ -252,7 +254,7 @@ def forward(self, x): x = self.relu(x) x = self.avgpool(x) - x = x.view(x.size(0), -1) + x = x.view([int(x.size(0)), -1]) x = self.fc(x) From d634d6cc7160b1224372e4f5ada361be09184de3 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:28:46 +0300 Subject: [PATCH 060/180] Fix squeezenet. --- tests/models/squeezenet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/squeezenet.py b/tests/models/squeezenet.py index 27f6073..0658566 100644 --- a/tests/models/squeezenet.py +++ b/tests/models/squeezenet.py @@ -103,7 +103,7 @@ def __init__(self, version=1.0, num_classes=1000): def forward(self, x): x = self.features(x) x = self.classifier(x) - return x.view(x.size(0), self.num_classes) + return x.view([int(x.size(0)), self.num_classes]) if __name__ == '__main__': From 81f1c18e74fedcae4f8ed0de89bfc04422c1f748 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:32:43 +0300 Subject: [PATCH 061/180] FIx VGG test. --- tests/models/vgg11.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/models/vgg11.py b/tests/models/vgg11.py index 77eb677..f9da7c7 100644 --- a/tests/models/vgg11.py +++ b/tests/models/vgg11.py @@ -5,6 +5,17 @@ import torchvision +class VGG(torchvision.models.vgg.VGG): + def __init__(self, *args, **kwargs): + super(VGG, self).__init__(*args, **kwargs) + + def forward(self, x): + x = self.features(x) + x = x.view([int(x.size(0)), -1]) + x = self.classifier(x) + return x + + def check_error(output, k_model, input_np, epsilon=1e-5): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) @@ -19,7 +30,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if __name__ == '__main__': max_error = 0 for i in range(100): - model = torchvision.models.vgg11_bn() + model = VGG(torchvision.models.vgg.make_layers(torchvision.models.vgg.cfg['A'], batch_norm=True)) model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) From 3479642519c952a3f3251eda995d5fb51f2afb6f Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:33:53 +0300 Subject: [PATCH 062/180] Fix resnet18 changed channel ordering. --- tests/models/resnet18_channels_last.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/models/resnet18_channels_last.py b/tests/models/resnet18_channels_last.py index fce1916..3e9cc49 100644 --- a/tests/models/resnet18_channels_last.py +++ b/tests/models/resnet18_channels_last.py @@ -5,6 +5,27 @@ import torchvision +class ResNet(torchvision.models.resnet.ResNet): + def __init__(self, *args, **kwargs): + super(ResNet, self).__init__(*args, **kwargs) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(int(x.size(0)), -1) # << This fix again + x = self.fc(x) + return x + + def check_error(output, k_model, input_np, epsilon=1e-5): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) @@ -19,7 +40,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if __name__ == '__main__': max_error = 0 for i in range(100): - model = torchvision.models.resnet18() + model = ResNet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2]) model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) From cd0d10ea7af4273be7f7d465f4eb276ce2696784 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:38:15 +0300 Subject: [PATCH 063/180] Update readme. --- README.md | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 21d806c..b5e96b5 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ![PyPI - Downloads](https://img.shields.io/pypi/dd/pytorch2keras.svg) ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) -Pytorch to Keras model convertor. Still beta for now. +PyTorch to Keras model convertor. ## Installation @@ -16,8 +16,6 @@ pip install pytorch2keras ## Important notice -At that moment the only PyTorch 0.4.0 is supported. - To use the converter properly, please, make changes in your `~/.keras/keras.json`: @@ -28,29 +26,34 @@ To use the converter properly, please, make changes in your `~/.keras/keras.json ... ``` -## Python 3.7 - -There are some problem related to a new version: +## PyTorch 0.4.1 and greater -Q. PyTorch 0.4 hadn't released wheel package for Python 3.7 +There are [some problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): -A. You can build it from source: +To make it work, please, cast all your `.view()` parameters to `int`. For example: ``` -git clone https://github.com/pytorch/pytorch - -cd pytorch +class ResNet(torchvision.models.resnet.ResNet): + def __init__(self, *args, **kwargs): + super(ResNet, self).__init__(*args, **kwargs) -git checkout v0.4.0 - -NO_CUDA=1 python setup.py install + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(int(x.size(0)), -1) # << Here + x = self.fc(x) + return x ``` -Q. Tensorflow isn't available for Python 3.7 - -A. Yes, we're waiting for it. - - ## Tensorflow.js For the proper convertion to the tensorflow.js format, please use a new flag `names='short'`. From f8f23dc49eee7b3e1717f3803db4d7d5e188205e Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 24 Nov 2018 19:40:47 +0300 Subject: [PATCH 064/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index dee81fe..a3ac43e 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.6', + version='0.1.7', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From ad2100bf40e81c9964b9f4c8a3e5a9dbf5f4efd9 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 25 Nov 2018 00:36:50 +0300 Subject: [PATCH 065/180] Fix PEP8 issues. --- pytorch2keras/converter.py | 10 +-- pytorch2keras/layers.py | 12 ++-- tests/layers/view.py | 26 +++---- tests/models/drn.py | 93 ++------------------------ tests/models/resnet18.py | 2 +- tests/models/resnet18_channels_last.py | 2 +- tests/models/resnet34.py | 2 +- tests/models/resnet50.py | 2 +- 8 files changed, 32 insertions(+), 117 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 5fde368..91819a0 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -10,7 +10,7 @@ import torch.autograd import torch.serialization from torch.jit import _unique_state_dict -from torch.onnx import ONNX_ARCHIVE_MODEL_PROTO_NAME, ExportTypes, OperatorExportTypes +from torch.onnx import OperatorExportTypes from .layers import AVAILABLE_CONVERTERS @@ -34,6 +34,7 @@ def set_training(model, mode): if old_mode != mode: model.train(old_mode) + if torch.__version__ != '0.4.1': from torch._C import ListType @@ -94,7 +95,7 @@ def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): else: torch._C._jit_pass_dce(graph) torch._C._jit_pass_lint(graph) - + torch._C._jit_pass_peephole(graph) torch._C._jit_pass_lint(graph) @@ -182,9 +183,8 @@ def pytorch_to_keras( # It's better to replace it with onnx::Flatten from types import SimpleNamespace seq_to_find = \ - ['onnx::Constant', 'onnx::Shape', 'onnx::Gather', 'onnx::Constant', 'onnx::Unsqueeze', 'onnx::Unsqueeze', 'onnx::Concat', 'onnx::Reshape'] - seq_to_replace = \ - ['onnx::Flatten'] + ['onnx::Constant', 'onnx::Shape', 'onnx::Gather', + 'onnx::Constant', 'onnx::Unsqueeze', 'onnx::Unsqueeze', 'onnx::Concat', 'onnx::Reshape'] k = 0 s = 0 for i, node in enumerate(nodes): diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 5b30724..62b56da 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1034,8 +1034,6 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, names): if layers[inputs[1]][0] == -1: print('Cannot deduct batch size! It will be omitted, but result may be wrong.') - print(layers[inputs[0]]) - def target_layer(x, shape=layers[inputs[1]]): return tf.reshape(x, shape) @@ -1180,11 +1178,10 @@ def convert_constant(params, w_name, scope_name, inputs, layers, weights, names) params_list = params['value'].numpy().tolist() def target_layer(x): - import keras.backend as K return tf.constant(params_list) lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py + layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py # layers[scope_name] = params['value'].tolist() @@ -1295,7 +1292,7 @@ def target_layer(x): return keras.backend.expand_dims(x) lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims + layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims layers[scope_name] = lambda_layer(layers[scope_name]) @@ -1328,7 +1325,7 @@ def target_layer(x): return keras.backend.expand_dims(x) lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims + layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims layers[scope_name] = lambda_layer(layers[scope_name]) @@ -1414,12 +1411,11 @@ def convert_unsqueeze(params, w_name, scope_name, inputs, layers, weights, names else: tf_name = w_name + str(random.random()) - def target_layer(x): return keras.backend.expand_dims(x) lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[inputs[0]]) + layers[scope_name] = lambda_layer(layers[inputs[0]]) def convert_shape(params, w_name, scope_name, inputs, layers, weights, names): diff --git a/tests/layers/view.py b/tests/layers/view.py index 4a60a42..6fe6c9d 100644 --- a/tests/layers/view.py +++ b/tests/layers/view.py @@ -1,4 +1,5 @@ import numpy as np + import torch import torch.nn as nn from torch.autograd import Variable @@ -6,35 +7,36 @@ class TestView(nn.Module): - """Module for View conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): + def __init__(self): super(TestView, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) + self.conv2d = nn.Conv2d(22, 32, kernel_size=1, bias=True) + self.fc = nn.Linear(15488, 3) def forward(self, x): x = self.conv2d(x) - x = x.view([x.size(0), -1, 2, 1, 1, 1, 1, 1]).view(x.size(0), -1).view(x.size(0), -1) - x = torch.nn.Tanh()(x) + + print(type(x.size()[0])) + + x = x.view([int(x.size(0)), -1]) + x = self.fc(x) return x if __name__ == '__main__': max_error = 0 for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = 2 * np.random.randint(kernel_size + 1, 10) - out = 2 * np.random.randint(1, 10) + kernel_size = 1 + inp = 22 + out = 32 - model = TestView(inp, out, kernel_size, inp % 2) + model = TestView() input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) + output = model(input_var) pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) diff --git a/tests/models/drn.py b/tests/models/drn.py index 629c895..668aad8 100644 --- a/tests/models/drn.py +++ b/tests/models/drn.py @@ -2,7 +2,6 @@ import torch from torch.autograd import Variable from pytorch2keras.converter import pytorch_to_keras -import torchvision import torch.nn as nn @@ -10,9 +9,11 @@ import math BatchNorm = nn.BatchNorm2d + def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1): - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=padding, bias=False, dilation=dilation) + return nn.Conv2d( + in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, bias=False, dilation=dilation + ) class BasicBlock(nn.Module): @@ -316,90 +317,6 @@ def forward(self, x): return x -def drn_a_50(pretrained=False, **kwargs): - model = DRN_A(Bottleneck, [3, 4, 6, 3], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['resnet50'])) - return model - - -def drn_c_26(pretrained=False, **kwargs): - model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-c-26'])) - return model - - -def drn_c_42(pretrained=False, **kwargs): - model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-c-42'])) - return model - - -def drn_c_58(pretrained=False, **kwargs): - model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-c-58'])) - return model - - -def drn_d_22(pretrained=False, **kwargs): - model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-22'])) - return model - - -def drn_d_24(pretrained=False, **kwargs): - model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-24'])) - return model - - -def drn_d_38(pretrained=False, **kwargs): - model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-38'])) - return model - - -def drn_d_40(pretrained=False, **kwargs): - model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-40'])) - return model - - -def drn_d_54(pretrained=False, **kwargs): - model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-54'])) - return model - - -def drn_d_56(pretrained=False, **kwargs): - model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-56'])) - return model - - -def drn_d_105(pretrained=False, **kwargs): - model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-105'])) - return model - - -def drn_d_107(pretrained=False, **kwargs): - model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 2, 2], arch='D', **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(https://github.com/model_urls['drn-d-107'])) - return model - - def check_error(output, k_model, input_np, epsilon=1e-5): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) @@ -414,7 +331,7 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if __name__ == '__main__': max_error = 0 for i in range(100): - model = drn_c_26() + model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C') model.eval() input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) diff --git a/tests/models/resnet18.py b/tests/models/resnet18.py index 26b065f..522e2f4 100644 --- a/tests/models/resnet18.py +++ b/tests/models/resnet18.py @@ -21,7 +21,7 @@ def forward(self, x): x = self.layer4(x) x = self.avgpool(x) - x = x.view(int(x.size(0)), -1) # << This fix again + x = x.view(int(x.size(0)), -1) # << This fix again x = self.fc(x) return x diff --git a/tests/models/resnet18_channels_last.py b/tests/models/resnet18_channels_last.py index 3e9cc49..216357e 100644 --- a/tests/models/resnet18_channels_last.py +++ b/tests/models/resnet18_channels_last.py @@ -21,7 +21,7 @@ def forward(self, x): x = self.layer4(x) x = self.avgpool(x) - x = x.view(int(x.size(0)), -1) # << This fix again + x = x.view(int(x.size(0)), -1) # << This fix again x = self.fc(x) return x diff --git a/tests/models/resnet34.py b/tests/models/resnet34.py index 79aeaf2..9a93876 100644 --- a/tests/models/resnet34.py +++ b/tests/models/resnet34.py @@ -21,7 +21,7 @@ def forward(self, x): x = self.layer4(x) x = self.avgpool(x) - x = x.view(int(x.size(0)), -1) # << This fix again + x = x.view(int(x.size(0)), -1) # << This fix again x = self.fc(x) return x diff --git a/tests/models/resnet50.py b/tests/models/resnet50.py index 3691926..21c0c16 100644 --- a/tests/models/resnet50.py +++ b/tests/models/resnet50.py @@ -21,7 +21,7 @@ def forward(self, x): x = self.layer4(x) x = self.avgpool(x) - x = x.view(int(x.size(0)), -1) # << This fix again + x = x.view(int(x.size(0)), -1) # << This fix again x = self.fc(x) return x From a6b91b5e4409d816398d1978a77a823c5030a789 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 29 Nov 2018 00:02:08 +0300 Subject: [PATCH 066/180] Add Manifest.in. Fixed package. --- MANIFEST.in | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..0385206 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include LICENSE +include README.md +include requirements.txt \ No newline at end of file diff --git a/setup.py b/setup.py index a3ac43e..946b23b 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.7', + version='0.1.8', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 74d4c7497824947934d077a160396db6b75c2622 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 29 Nov 2018 00:17:36 +0300 Subject: [PATCH 067/180] Add backward compatibility. Unfreeze versions. --- pytorch2keras/converter.py | 109 ++++++++++++++++++++++--------------- requirements.txt | 10 ++-- 2 files changed, 71 insertions(+), 48 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 91819a0..2d82bd8 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -10,7 +10,9 @@ import torch.autograd import torch.serialization from torch.jit import _unique_state_dict -from torch.onnx import OperatorExportTypes + +if version.parse('0.4.1') <= version.parse(torch.__version__): + from torch.onnx import OperatorExportTypes from .layers import AVAILABLE_CONVERTERS @@ -35,7 +37,7 @@ def set_training(model, mode): model.train(old_mode) -if torch.__version__ != '0.4.1': +if version.parse('0.4.1') < version.parse(torch.__version__): from torch._C import ListType # ONNX can't handle constants that are lists of tensors, which can @@ -56,60 +58,78 @@ def _split_tensor_list_constants(g, block): .setType(ListType.ofTensors())) node.output().replaceAllUsesWith(lc) - -def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): - if version.parse('0.4.1') < version.parse(torch.__version__): - torch._C._jit_pass_remove_inplace_ops(graph) - # we record now record some ops like ones/zeros - # into a trace where we previously recorded constants - # use constant prop to maintain our current level of onnx support - # without implementing symbolics for all of them - torch._C._jit_pass_constant_propagation(graph) - # _split_tensor_list_constants(graph, graph) - # run dce to eliminate dead parts of the graph that might have been +if version.parse('0.4.0') >= version.parse(torch.__version__): + def _optimize_graph(graph, aten): + # run dce first to eliminate dead parts of the graph that might have been # left behind by things like symbolic_override torch._C._jit_pass_dce(graph) torch._C._jit_pass_lint(graph) - torch._C._jit_pass_canonicalize_ops(graph) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_peephole(graph, True) + torch._C._jit_pass_peephole(graph) torch._C._jit_pass_lint(graph) - - # onnx only supports tensors, but 1 / 2 = 0.5 and tensor(1) / tensor(2) = 0 - torch._C._jit_pass_prepare_division_for_onnx(graph) - # onnx only supports tensors, so we turn all out number types into tensors - torch._C._jit_pass_erase_number_types(graph) - # onnx does not support tuples, so try to remove them - torch._C._jit_pass_lower_all_tuples(graph) - # torch._C._jit_pass_peephole(graph, True) + graph = torch._C._jit_pass_onnx(graph, aten) torch._C._jit_pass_lint(graph) - - if operator_export_type != OperatorExportTypes.RAW: - graph = torch._C._jit_pass_onnx(graph, operator_export_type) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_onnx_peephole(graph) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_dce(graph) + torch._C._jit_pass_onnx_peephole(graph) torch._C._jit_pass_lint(graph) - else: torch._C._jit_pass_dce(graph) torch._C._jit_pass_lint(graph) - - torch._C._jit_pass_peephole(graph) + graph = torch._C._jit_pass_canonicalize(graph) torch._C._jit_pass_lint(graph) + return graph +else: + def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): + if version.parse('0.4.1') < version.parse(torch.__version__): + torch._C._jit_pass_remove_inplace_ops(graph) + # we record now record some ops like ones/zeros + # into a trace where we previously recorded constants + # use constant prop to maintain our current level of onnx support + # without implementing symbolics for all of them + torch._C._jit_pass_constant_propagation(graph) + # _split_tensor_list_constants(graph, graph) + # run dce to eliminate dead parts of the graph that might have been + # left behind by things like symbolic_override + torch._C._jit_pass_dce(graph) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_canonicalize_ops(graph) + torch._C._jit_pass_lint(graph) - # torch._C._jit_pass_peephole(graph, True) - torch._C._jit_pass_lint(graph) + torch._C._jit_pass_peephole(graph, True) + torch._C._jit_pass_lint(graph) - if operator_export_type != OperatorExportTypes.RAW: - graph = torch._C._jit_pass_onnx(graph, operator_export_type) + # onnx only supports tensors, but 1 / 2 = 0.5 and tensor(1) / tensor(2) = 0 + torch._C._jit_pass_prepare_division_for_onnx(graph) + # onnx only supports tensors, so we turn all out number types into tensors + torch._C._jit_pass_erase_number_types(graph) + # onnx does not support tuples, so try to remove them + torch._C._jit_pass_lower_all_tuples(graph) + # torch._C._jit_pass_peephole(graph, True) torch._C._jit_pass_lint(graph) - torch._C._jit_pass_onnx_peephole(graph) + + if operator_export_type != OperatorExportTypes.RAW: + graph = torch._C._jit_pass_onnx(graph, operator_export_type) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_onnx_peephole(graph) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_dce(graph) + torch._C._jit_pass_lint(graph) + else: + torch._C._jit_pass_dce(graph) torch._C._jit_pass_lint(graph) - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - return graph + + torch._C._jit_pass_peephole(graph) + torch._C._jit_pass_lint(graph) + + # torch._C._jit_pass_peephole(graph, True) + torch._C._jit_pass_lint(graph) + + if operator_export_type != OperatorExportTypes.RAW: + graph = torch._C._jit_pass_onnx(graph, operator_export_type) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_onnx_peephole(graph) + torch._C._jit_pass_lint(graph) + torch._C._jit_pass_dce(graph) + torch._C._jit_pass_lint(graph) + return graph def get_node_id(node): @@ -156,7 +176,10 @@ def pytorch_to_keras( "something weird is happening in your model!") # _optimize_trace(trace, False) - trace.set_graph(_optimize_graph(trace.graph(), OperatorExportTypes.ONNX)) + if version.parse('0.4.0') < version.parse(torch.__version__): + trace.set_graph(_optimize_graph(trace.graph(), OperatorExportTypes.ONNX)) + else: + trace.set_graph(_optimize_graph(trace.graph(), False)) trace.graph().lint() if verbose: diff --git a/requirements.txt b/requirements.txt index ed607a1..bc9860e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -keras==2.2.2 -tensorflow==1.9.0 -numpy==1.14.1 -torch==0.4 -torchvision==0.2.1 +keras +tensorflow +numpy +torch +torchvision packaging \ No newline at end of file From f3fcc9d1c562bfc76a44d8d74fc4a2653655f47d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 29 Nov 2018 00:18:55 +0300 Subject: [PATCH 068/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 946b23b..99edfce 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.8', + version='0.1.9', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From a3aba6ffcbf00d063197687702b8f61e9d8bde5f Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 29 Nov 2018 11:49:40 +0300 Subject: [PATCH 069/180] Add keywords and classifiers to pypi. --- pytorch2keras/converter.py | 2 ++ setup.py | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 2d82bd8..56ab78f 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -360,4 +360,6 @@ def pytorch_to_keras( model = model_tf_ordering + print('Your model was (probably) successfully converted! ' + 'Please, follow the repository https://github.com/nerox8664/pytorch2keras and give a star :)') return model diff --git a/setup.py b/setup.py index 99edfce..547b78c 100644 --- a/setup.py +++ b/setup.py @@ -28,6 +28,16 @@ url='https://github.com/nerox8664/pytorch2keras', author='Grigory Malivenko', author_email='nerox8664@gmail.com', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Scientific/Engineering :: Image Recognition', + ], + keywords='machine-learning deep-learning pytorch keras neuralnetwork vgg resnet ' + 'densenet drn dpn darknet squeezenet mobilenet', license='MIT', packages=find_packages(), install_requires=reqs, From 6721b190cc2fc6d472f3e601cb81ec69089125e3 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 29 Nov 2018 11:50:22 +0300 Subject: [PATCH 070/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 547b78c..c435385 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.9', + version='0.1.10', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 5645bfcb9514f3c30ce2ccbcab58e156c6b65291 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 30 Nov 2018 10:01:02 +0300 Subject: [PATCH 071/180] Update shield. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b5e96b5..b4f210e 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Build Status](https://travis-ci.com/nerox8664/pytorch2keras.svg?branch=master)](https://travis-ci.com/nerox8664/pytorch2keras) [![GitHub License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) -![PyPI - Downloads](https://img.shields.io/pypi/dd/pytorch2keras.svg) +[![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) PyTorch to Keras model convertor. From c07274aac94aa0876fa21087f725ca35fe72e14b Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 4 Dec 2018 21:45:36 +0300 Subject: [PATCH 072/180] Fix dilation in depthwise convolution. --- pytorch2keras/layers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 62b56da..875b94c 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -128,6 +128,7 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): weights=weights, use_bias=has_bias, activation=None, + dilation_rate=params['dilations'][0], bias_initializer='zeros', kernel_initializer='zeros', name=tf_name ) From 3d48a7f6e2f5d8871965b04bc27d4dd963194586 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 4 Dec 2018 21:58:53 +0300 Subject: [PATCH 073/180] Tested VGG, DiracNet and DARTS. --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index b4f210e..a86e1b4 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,7 @@ Misc: ## Models converted with pytorch2keras * ResNet* +* VGG* * PreResNet* * SqueezeNet (with ceil_mode=False) * SqueezeNext @@ -176,6 +177,8 @@ Misc: * Inception * SeNet * Mobilenet v2 +* DiracNet +* DARTS ## Usage Look at the `tests` directory. From 308138fa6e3c537dac832bf3362a37583acb99d1 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 4 Dec 2018 22:00:50 +0300 Subject: [PATCH 074/180] Tested DRNC. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a86e1b4..50d8f69 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,7 @@ Misc: * Mobilenet v2 * DiracNet * DARTS +* DRNC ## Usage Look at the `tests` directory. From 790e201f8d0716d2b8d2b050fb61ce58fe5a33f2 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 4 Dec 2018 22:27:00 +0300 Subject: [PATCH 075/180] Update readme. --- README.md | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/README.md b/README.md index 50d8f69..3bd93e5 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,63 @@ Misc: * DARTS * DRNC +| Model | Top1 | Top5 | Params | FLOPs | Source weights | Remarks | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | +| ResNet-10 | 37.09 | 15.55 | 5,418,792 | 892.62M | osmr's repo | Success | +| ResNet-12 | 35.86 | 14.46 | 5,492,776 | 1,124.23M | osmr's repo | Success | +| ResNet-14 | 32.85 | 12.41 | 5,788,200 | 1,355.64M | osmr's repo | Success | +| ResNet-16 | 30.68 | 11.10 | 6,968,872 | 1,586.95M | osmr's repo | Success | +| ResNet-18 x0.25 | 49.16 | 24.45 | 831,096 | 136.64M | osmr's repo | Success | +| ResNet-18 x0.5 | 36.54 | 14.96 | 3,055,880 | 485.22M | osmr's repo | Success | +| ResNet-18 x0.75 | 33.25 | 12.54 | 6,675,352 | 1,045.75M | osmr's repo | Success | +| ResNet-18 | 29.13 | 9.94 | 11,689,512 | 1,818.21M | osmr's repo | Success | +| ResNet-34 | 25.34 | 7.92 | 21,797,672 | 3,669.16M | osmr's repo | Success | +| ResNet-50 | 23.50 | 6.87 | 25,557,032 | 3,868.96M | osmr's repo | Success | +| ResNet-50b | 22.92 | 6.44 | 25,557,032 | 4,100.70M | osmr's repo | Success | +| ResNet-101 | 21.66 | 5.99 | 44,549,160 | 7,586.30M | osmr's repo | Success | +| ResNet-101b | 21.18 | 5.60 | 44,549,160 | 7,818.04M | osmr's repo | Success | +| ResNet-152 | 21.01 | 5.61 | 60,192,808 | 11,304.85M | osmr's repo | Success | +| ResNet-152b | 20.54 | 5.37 | 60,192,808 | 11,536.58M | osmr's repo | Success | +| PreResNet-18 | 28.72 | 9.88 | 11,687,848 | 1,818.41M | osmr's repo | Success | +| PreResNet-34 | 25.88 | 8.11 | 21,796,008 | 3,669.36M | osmr's repo | Success | +| PreResNet-50 | 23.39 | 6.68 | 25,549,480 | 3,869.16M | osmr's repo | Success | +| PreResNet-50b | 23.16 | 6.64 | 25,549,480 | 4,100.90M | osmr's repo | Success | +| PreResNet-101 | 21.45 | 5.75 | 44,541,608 | 7,586.50M | osmr's repo | Success | +| PreResNet-101b | 21.73 | 5.88 | 44,541,608 | 7,818.24M | osmr's repo | Success | +| PreResNet-152 | 20.70 | 5.32 | 60,185,256 | 11,305.05M | osmr's repo | Success | +| PreResNet-152b | 21.00 | 5.75 | 60,185,256 | 11,536.78M | Gluon Model Zoo| Success | +| PreResNet-200b | 21.10 | 5.64 | 64,666,280 | 15,040.27M | tornadomeet/ResNet | Success | +| DenseNet-121 | 25.11 | 7.80 | 7,978,856 | 2,852.39M | Gluon Model Zoo| Success | +| DenseNet-161 | 22.40 | 6.18 | 28,681,000 | 7,761.25M | Gluon Model Zoo| Success | +| DenseNet-169 | 23.89 | 6.89 | 14,149,480 | 3,381.48M | Gluon Model Zoo| Success | +| DenseNet-201 | 22.71 | 6.36 | 20,013,928 | 4,318.75M | Gluon Model Zoo| Success | + +| DarkNet Tiny | 40.31 | 17.46 | 1,042,104 | 496.34M | osmr's repo | Success | +| DarkNet Ref | 38.00 | 16.68 | 7,319,416 | 365.55M | osmr's repo | Success | +| SqueezeNet v1.0 | 40.97 | 18.96 | 1,248,424 | 828.30M | osmr's repo | Success | +| SqueezeNet v1.1 | 39.09 | 17.39 | 1,235,496 | 354.88M | osmr's repo | Success | +| MobileNet x0.25 | 45.78 | 22.18 | 470,072 | 42.30M | osmr's repo | Success | +| MobileNet x0.5 | 36.12 | 14.81 | 1,331,592 | 152.04M | osmr's repo | Success | +| MobileNet x0.75 | 32.71 | 12.28 | 2,585,560 | 329.22M | Gluon Model Zoo| Success | +| MobileNet x1.0 | 29.25 | 10.03 | 4,231,976 | 573.83M | Gluon Model Zoo| Success | +| FD-MobileNet x0.25 | 56.19 | 31.38 | 383,160 | 12.44M | osmr's repo | Success | +| FD-MobileNet x0.5 | 42.62 | 19.69 | 993,928 | 40.93M | osmr's repo | Success | +| FD-MobileNet x1.0 | 35.95 | 14.72 | 2,901,288 | 146.08M | clavichord93/FD-MobileNet | Success | +| MobileNetV2 x0.25 | 48.89 | 25.24 | 1,516,392 | 32.22M | Gluon Model Zoo| Success | +| MobileNetV2 x0.5 | 35.51 | 14.64 | 1,964,736 | 95.62M | Gluon Model Zoo| Success | +| MobileNetV2 x0.75 | 30.82 | 11.26 | 2,627,592 | 191.61M | Gluon Model Zoo| Success | +| MobileNetV2 x1.0 | 28.51 | 9.90 | 3,504,960 | 320.19M | Gluon Model Zoo| Success | + +| InceptionV3 | 21.22 | 5.59 | 23,834,568 | 5,746.72M | Gluon Model Zoo| Success | + + +| DiracNetV2-18 | 31.47 | 11.70 | 11,511,784 | 1,798.43M | szagoruyko/diracnets | Success | +| DiracNetV2-34 | 28.75 | 9.93 | 21,616,232 | 3,649.37M | szagoruyko/diracnets | Success | +| DARTS | 26.70 | 8.74 | 4,718,752 | 537.64M | szagoruyko/diracnets | Success | + ## Usage Look at the `tests` directory. From 95f43bfa50995d3b8ab57befcfbb04a00549bc8d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 4 Dec 2018 22:28:19 +0300 Subject: [PATCH 076/180] Fix table in readme. --- README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/README.md b/README.md index 3bd93e5..ec6b7b3 100644 --- a/README.md +++ b/README.md @@ -211,9 +211,6 @@ Misc: | DenseNet-161 | 22.40 | 6.18 | 28,681,000 | 7,761.25M | Gluon Model Zoo| Success | | DenseNet-169 | 23.89 | 6.89 | 14,149,480 | 3,381.48M | Gluon Model Zoo| Success | | DenseNet-201 | 22.71 | 6.36 | 20,013,928 | 4,318.75M | Gluon Model Zoo| Success | - | DarkNet Tiny | 40.31 | 17.46 | 1,042,104 | 496.34M | osmr's repo | Success | | DarkNet Ref | 38.00 | 16.68 | 7,319,416 | 365.55M | osmr's repo | Success | | SqueezeNet v1.0 | 40.97 | 18.96 | 1,248,424 | 828.30M | osmr's repo | Success | @@ -229,11 +226,7 @@ Misc: | MobileNetV2 x0.5 | 35.51 | 14.64 | 1,964,736 | 95.62M | Gluon Model Zoo| Success | | MobileNetV2 x0.75 | 30.82 | 11.26 | 2,627,592 | 191.61M | Gluon Model Zoo| Success | | MobileNetV2 x1.0 | 28.51 | 9.90 | 3,504,960 | 320.19M | Gluon Model Zoo| Success | - | InceptionV3 | 21.22 | 5.59 | 23,834,568 | 5,746.72M | Gluon Model Zoo| Success | - - | DiracNetV2-18 | 31.47 | 11.70 | 11,511,784 | 1,798.43M | szagoruyko/diracnets | Success | | DiracNetV2-34 | 28.75 | 9.93 | 21,616,232 | 3,649.37M | szagoruyko/diracnets | Success | | DARTS | 26.70 | 8.74 | 4,718,752 | 537.64M | szagoruyko/diracnets | Success | From 600cc937bd1231cca6afdf1f716c9d9731939980 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 5 Dec 2018 17:11:23 +0300 Subject: [PATCH 077/180] Fix instancenorm2d. --- pytorch2keras/layers.py | 48 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 875b94c..33708fe 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -638,8 +638,8 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, na assert(len(inputs) == 3) - gamma = layers[inputs[-2]] - beta = layers[inputs[-1]] + gamma = layers[inputs[-2] + '_np'] + beta = layers[inputs[-1] + '_np'] def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): layer = tf.contrib.layers.instance_norm( @@ -711,9 +711,45 @@ def convert_elementwise_mul( tf_name = w_name + str(random.random()) mul = keras.layers.Multiply(name=tf_name) + print(model0, model1) layers[scope_name] = mul([model0, model1]) +def convert_elementwise_div( + params, w_name, scope_name, inputs, layers, weights, names +): + """ + Convert elementwise multiplication. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting elementwise_div ...') + + if names == 'short': + tf_name = 'D' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + def target_layer(x): + layer = tf.div( + x[0], + x[1] + ) + return layer + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) + layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) + + def convert_elementwise_sub( params, w_name, scope_name, inputs, layers, weights, names ): @@ -1176,12 +1212,13 @@ def convert_constant(params, w_name, scope_name, inputs, layers, weights, names) """ print('Converting constant ...') - params_list = params['value'].numpy().tolist() + params_list = params['value'].numpy() - def target_layer(x): - return tf.constant(params_list) + def target_layer(x, value=params_list): + return tf.constant(value.tolist(), shape=value.shape) lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name + '_np'] = params_list # ad-hoc layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py # layers[scope_name] = params['value'].tolist() @@ -1478,6 +1515,7 @@ def target_layer(x, vmin=params['min'], vmax=params['max']): 'onnx::InstanceNormalization': convert_instancenorm, 'onnx::Add': convert_elementwise_add, 'onnx::Mul': convert_elementwise_mul, + 'onnx::Div': convert_elementwise_div, 'onnx::Sub': convert_elementwise_sub, 'onnx::Sum': convert_sum, 'onnx::Concat': convert_concat, From d0987c9a0b9f8faada494cf7ec8af7feef495785 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 5 Dec 2018 17:11:53 +0300 Subject: [PATCH 078/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c435385..ebb33db 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.10', + version='0.1.11', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 613e5e536c50ebd8456ce157a3f7a688cd27fc9b Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 6 Dec 2018 00:43:06 +0300 Subject: [PATCH 079/180] Fix some typos. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ec6b7b3..5fa361d 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) -PyTorch to Keras model convertor. +PyTorch to Keras model converter. ## Installation @@ -28,7 +28,7 @@ To use the converter properly, please, make changes in your `~/.keras/keras.json ## PyTorch 0.4.1 and greater -There are [some problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): +There is [the problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): To make it work, please, cast all your `.view()` parameters to `int`. For example: @@ -56,7 +56,7 @@ class ResNet(torchvision.models.resnet.ResNet): ## Tensorflow.js -For the proper convertion to the tensorflow.js format, please use a new flag `names='short'`. +For the proper conversion to a tensorflow.js format, please use the new flag `names='short'`. ## How to build the latest PyTorch From 911f9c561618e0e36540031329d07bbbe604f223 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 6 Dec 2018 16:56:23 +0300 Subject: [PATCH 080/180] Add additional condition to use depthwise convolution. --- pytorch2keras/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 33708fe..cdad021 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -97,7 +97,7 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): n_groups = params['group'] in_channels = channels_per_group * n_groups - if n_groups == in_channels: + if n_groups == in_channels and n_groups != 1: print( 'Perform depthwise convolution: h={} w={} in={} out={}'.format( height, width, in_channels, out_channels From da19df8d9fa9922074937df8fa597398b34086c1 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sat, 8 Dec 2018 15:19:49 +0300 Subject: [PATCH 081/180] Tests clean-up. --- tests/layers/densenet.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 tests/layers/densenet.py diff --git a/tests/layers/densenet.py b/tests/layers/densenet.py deleted file mode 100644 index b1994b7..0000000 --- a/tests/layers/densenet.py +++ /dev/null @@ -1,29 +0,0 @@ -import numpy as np -import torch -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras -import torchvision - - -if __name__ == '__main__': - max_error = 0 - for i in range(10): - model = torchvision.models.DenseNet() - for m in model.modules(): - m.training = False - - input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) From 7a45f9178a140fcf83409480b1790f3f6356fdb9 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 10 Dec 2018 16:08:11 +0300 Subject: [PATCH 082/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ebb33db..d99969d 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.11', + version='0.1.12', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 318f28e17a714de54b18858f27605ec306ccb3e9 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 10 Dec 2018 16:17:45 +0300 Subject: [PATCH 083/180] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 28 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 14 ++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..f3d5fcf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a bug report +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +__Snippet of your code__ + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Logs** +If applicable, add error message to help explain your problem. + +**Environment (please complete the following information):** + - OS: [e.g. iOS] + - Python [e.g. Python 2, Python 3] + - Version [e.g. v0.1.11] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..24e8dd7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,14 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Feature request** +A clear and concise description of what the problem is. Ex. My layer hasn't supported yet [...] + +**Additional context** +Add any other context or screenshots about the feature request here. From 3f7ee3de2dad8c5b352ac0ba3127e1e10d498715 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 11 Dec 2018 16:11:06 +0300 Subject: [PATCH 084/180] Add tfjs section to readme. --- README.md | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/README.md b/README.md index 5fa361d..0c8319a 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,75 @@ class ResNet(torchvision.models.resnet.ResNet): For the proper conversion to a tensorflow.js format, please use the new flag `names='short'`. +Here is a short instruction how to get a tensorflow.js model: + +1. First of all, you have to convert your model to Keras with this converter: + +``` +k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True, names='short') +``` + +2. Now you have Keras model. You can save it as h5 file and then convert it with `tensorflowjs_converter` but it doesn't work sometimes. As alternative, you may get Tensorflow Graph and save it as a frozen model: + +``` +# Function below [copied from here](https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb): +def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): + """ + Freezes the state of a session into a pruned computation graph. + + Creates a new computation graph where variable nodes are replaced by + constants taking their current value in the session. The new graph will be + pruned so subgraphs that are not necessary to compute the requested + outputs are removed. + @param session The TensorFlow session to be frozen. + @param keep_var_names A list of variable names that should not be frozen, + or None to freeze all the variables in the graph. + @param output_names Names of the relevant graph outputs. + @param clear_devices Remove the device directives from the graph for better portability. + @return The frozen graph definition. + """ + from tensorflow.python.framework.graph_util import convert_variables_to_constants + graph = session.graph + with graph.as_default(): + freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or [])) + output_names = output_names or [] + output_names += [v.op.name for v in tf.global_variables()] + input_graph_def = graph.as_graph_def() + if clear_devices: + for node in input_graph_def.node: + node.device = "" + frozen_graph = convert_variables_to_constants(session, input_graph_def, + output_names, freeze_var_names) + return frozen_graph + + +from keras import backend as K +import tensorflow as tf +frozen_graph = freeze_session(K.get_session(), + output_names=[out.op.name for out in k_model.outputs]) + +tf.train.write_graph(frozen_graph, ".", "my_model.pb", as_text=False) +print([i for i in k_model.outputs]) + +``` + +3. You will see the output layer name, so, now it's time to convert `my_model.pb` to tfjs model: + +``` +tensorflowjs_converter \ + --input_format=tf_frozen_model \ + --output_node_names='TANHTObs/Tanh' \ + my_model.pb \ + model_tfjs +``` + +4. Thats all! + +``` +const MODEL_URL = `model_tfjs/tensorflowjs_model.pb`; +const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; +cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); +``` ## How to build the latest PyTorch From fcdf9bdae3422ab47ae9c89969c8e18671a85f74 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 11 Dec 2018 16:12:39 +0300 Subject: [PATCH 085/180] Fix formatting. --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0c8319a..23f9103 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,8 @@ k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True, name 2. Now you have Keras model. You can save it as h5 file and then convert it with `tensorflowjs_converter` but it doesn't work sometimes. As alternative, you may get Tensorflow Graph and save it as a frozen model: ``` -# Function below [copied from here](https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb): +# Function below copied from here: +# https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): """ Freezes the state of a session into a pruned computation graph. @@ -88,7 +89,8 @@ def freeze_session(session, keep_var_names=None, output_names=None, clear_device from tensorflow.python.framework.graph_util import convert_variables_to_constants graph = session.graph with graph.as_default(): - freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or [])) + freeze_var_names = \ + list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or [])) output_names = output_names or [] output_names += [v.op.name for v in tf.global_variables()] input_graph_def = graph.as_graph_def() From efa845c16fc385354c579cf0d231fa2d5d4e9e6a Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 13 Dec 2018 12:42:45 +0300 Subject: [PATCH 086/180] Update readme. --- README.md | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 23f9103..541f485 100644 --- a/README.md +++ b/README.md @@ -130,29 +130,21 @@ const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); ``` -## How to build the latest PyTorch - -Please, follow [this guide](https://github.com/pytorch/pytorch#from-source) to compile the latest version. - -Additional information for Arch Linux users: - -* the latest gcc8 is incompatible with actual nvcc version -* the legacy gcc54 can't compile C/C++ modules because of compiler flags - ## How to use -It's the convertor of pytorch graph to a Keras (Tensorflow backend) graph. +It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. -Firstly, we need to load (or create) pytorch model: +Firstly, we need to load (or create) a valid PyTorch model: ``` class TestConv2d(nn.Module): - """Module for Conv2d convertion testing + """ + Module for Conv2d testing """ def __init__(self, inp=10, out=16, kernel_size=3): super(TestConv2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, stride=(inp % 3 + 1), kernel_size=kernel_size, bias=True) + self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=True) def forward(self, x): x = self.conv2d(x) @@ -164,14 +156,14 @@ model = TestConv2d() # model.load_state_dict(torch.load(path_to_weights.pth)) ``` -The next step - create a dummy variable with correct shapes: +The next step - create a dummy variable with correct shape: ``` input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) input_var = Variable(torch.FloatTensor(input_np)) ``` -We're using dummy-variable in order to trace the model. +We use the dummy-variable to trace the model (with jit.trace): ``` from converter import pytorch_to_keras @@ -179,7 +171,7 @@ from converter import pytorch_to_keras k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) ``` -You can also set H and W dimensions to None to make your model shape-agnostic: +You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): ``` from converter import pytorch_to_keras @@ -187,15 +179,16 @@ from converter import pytorch_to_keras k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) ``` -That's all! If all is ok, the Keras model is stores into the `k_model` variable. +That's all! If all the modules have converted properly, the Keras model will be stored in the `k_model` variable. + ## Supported layers Layers: -* Linear -* Conv2d (also with groups) -* DepthwiseConv2d (with limited parameters) +* Linear (Dense) +* Conv2d (groups and dilations are also supported) +* DepthwiseConv2d * Conv3d * ConvTranspose2d * MaxPool2d @@ -228,13 +221,6 @@ Element-wise: * Multiplication * Subtraction -Misc: - -* reduce sum ( .sum() method) - -## Unsupported parameters - -* Pooling: count_include_pad, dilation, ceil_mode ## Models converted with pytorch2keras From 1cdf933fc504dd41fb3e01910974a39efd4d8dc3 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 13 Dec 2018 14:04:13 +0300 Subject: [PATCH 087/180] Add keras import. --- pytorch2keras/layers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index cdad021..58738f9 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1,4 +1,5 @@ import keras.layers +import keras import numpy as np import random import string From 5caca6907ce3b69857a89109e5a7362da8e461d7 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 13 Dec 2018 20:53:43 +0300 Subject: [PATCH 088/180] Fix keras model loading. --- pytorch2keras/layers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 58738f9..4474778 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -1,5 +1,4 @@ import keras.layers -import keras import numpy as np import random import string @@ -1328,6 +1327,7 @@ def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weig layers[scope_name] = global_pool(layers[inputs[0]]) def target_layer(x): + import keras return keras.backend.expand_dims(x) lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') @@ -1361,6 +1361,7 @@ def convert_adaptive_max_pool2d(params, w_name, scope_name, inputs, layers, weig layers[scope_name] = global_pool(layers[inputs[0]]) def target_layer(x): + import keras return keras.backend.expand_dims(x) lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') @@ -1451,6 +1452,7 @@ def convert_unsqueeze(params, w_name, scope_name, inputs, layers, weights, names tf_name = w_name + str(random.random()) def target_layer(x): + import keras return keras.backend.expand_dims(x) lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') From 9bc569fd4322b1fb4eea1e3cfa817ee480676bfd Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 17 Dec 2018 16:46:31 +0300 Subject: [PATCH 089/180] Update Readme. --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 541f485..db1f6a5 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,26 @@ k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) That's all! If all the modules have converted properly, the Keras model will be stored in the `k_model` variable. +## API + +Here is the only method `pytorch_to_keras` from `pytorch2keras` module. +``` +def pytorch_to_keras( + model, args, input_shapes, + change_ordering=False, training=False, verbose=False, names=False, +) +``` + +Options: + +* model -- a PyTorch module to convert; +* args -- list of dummy variables with proper shapes; +* input_shapes -- list with shape tuples; +* change_ordering -- boolean, if enabled, the converter will try to change `BCHW` to `BHWC` +* training -- boolean, switch model to training mode (never use it) +* verbose -- boolean, verbose output +* names -- choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. + ## Supported layers Layers: From 04cbad42d8f177c6cf541a4048f66b0d30cbd59f Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 19 Dec 2018 16:52:24 +0300 Subject: [PATCH 090/180] Update average pooling test. --- tests/layers/avg_pool.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/layers/avg_pool.py b/tests/layers/avg_pool.py index 6d093f9..d18a6b3 100644 --- a/tests/layers/avg_pool.py +++ b/tests/layers/avg_pool.py @@ -9,13 +9,11 @@ class AvgPool(nn.Module): """Module for AveragePool conversion testing """ - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): + def __init__(self, stride=3, padding=0, kernel_size=3): super(AvgPool, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, padding=1, bias=bias) - self.pool = nn.AvgPool2d(kernel_size=3, padding=1, count_include_pad=False, stride=2) + self.pool = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) def forward(self, x): - x = self.conv2d(x) x = self.pool(x) return x @@ -23,17 +21,18 @@ def forward(self, x): if __name__ == '__main__': max_error = 0 for i in range(100): - kernel_size = np.random.randint(4, 7) + kernel_size = np.random.randint(2, 7) + stride = np.random.randint(1, kernel_size) + padding = np.random.randint(1, kernel_size/2 + 1) inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - model = AvgPool(inp, out, kernel_size, inp % 2) + model = AvgPool(kernel_size=kernel_size, padding=padding, stride=stride) input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) input_var = Variable(torch.FloatTensor(input_np)) output = model(input_var) - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True, names='keep') + k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=False, names='keep') print(k_model.summary()) pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) From 983c77bdffc174a4dce024838674ed334b15761b Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 19 Dec 2018 16:53:31 +0300 Subject: [PATCH 091/180] Update weights names regex. --- pytorch2keras/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 56ab78f..69105a6 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -287,7 +287,7 @@ def pytorch_to_keras( node_scope_name = node.scopeName() node_id = get_node_id(node) node_weights_name = '.'.join( - re.findall(r'\[([\w\d.]+)\]', node_scope_name) + re.findall(r'\[([\w\d.\-\[\]\s]+)\]', node_scope_name) ) node_attrs = {k: node[k] for k in node.attributeNames()} From 291b4f361e6379e2d616893ca6e1da2c565aa089 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 19 Dec 2018 17:17:05 +0300 Subject: [PATCH 092/180] Fix poolings. Add upsampling bilinear support. --- pytorch2keras/layers.py | 108 +++++++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 17 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 4474778..892f30b 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -393,23 +393,44 @@ def convert_avgpool(params, w_name, scope_name, inputs, layers, weights, names): else: tf_name = w_name + str(random.random()) - height, width = params['kernel_shape'] - stride_height, stride_width = params['strides'] - padding_h, padding_w, _, _ = params['pads'] + if 'kernel_shape' in params: + height, width = params['kernel_shape'] + else: + height, width = params['kernel_size'] + + if 'strides' in params: + stride_height, stride_width = params['strides'] + else: + stride_height, stride_width = params['stride'] + + if 'pads' in params: + padding_h, padding_w, _, _ = params['pads'] + else: + padding_h, padding_w = params['padding'] input_name = inputs[0] - padding = 'valid' - if padding_h > 0 and padding_w > 0: - if padding_h == height // 2 and padding_w == width // 2: - padding = 'same' - else: - raise AssertionError('Custom padding isnt supported') + pad = 'valid' + + if height % 2 == 1 and width % 2 == 1 and \ + height // 2 == padding_h and width // 2 == padding_w and \ + stride_height == 1 and stride_width == 1: + pad = 'same' + else: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding2D( + padding=(padding_h, padding_w), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[inputs[0]]) + input_name = padding_name + # Pooling type AveragePooling2D pooling = keras.layers.AveragePooling2D( pool_size=(height, width), strides=(stride_height, stride_width), - padding=padding, - name=tf_name + padding=pad, + name=tf_name, + data_format='channels_first' ) layers[scope_name] = pooling(layers[input_name]) @@ -454,7 +475,13 @@ def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, names): padding_h, padding_w = params['padding'] input_name = inputs[0] - if padding_h > 0 and padding_w > 0: + pad = 'valid' + + if height % 2 == 1 and width % 2 == 1 and \ + height // 2 == padding_h and width // 2 == padding_w and \ + stride_height == 1 and stride_width == 1: + pad = 'same' + else: padding_name = tf_name + '_pad' padding_layer = keras.layers.ZeroPadding2D( padding=(padding_h, padding_w), @@ -463,12 +490,13 @@ def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, names): layers[padding_name] = padding_layer(layers[inputs[0]]) input_name = padding_name - # Pooling type + # Pooling type MaxPooling2D pooling = keras.layers.MaxPooling2D( pool_size=(height, width), strides=(stride_height, stride_width), - padding='valid', - name=tf_name + padding=pad, + name=tf_name, + data_format='channels_first' ) layers[scope_name] = pooling(layers[input_name]) @@ -938,8 +966,13 @@ def convert_softmax(params, w_name, scope_name, inputs, layers, weights, names): else: tf_name = w_name + str(random.random()) - softmax = keras.layers.Activation('softmax', name=tf_name) - layers[scope_name] = softmax(layers[inputs[0]]) + def target_layer(x, dim=params['dim']): + import keras + return keras.activations.softmax(x, axis=dim) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + def convert_tanh(params, w_name, scope_name, inputs, layers, weights, names): @@ -1223,6 +1256,43 @@ def target_layer(x, value=params_list): # layers[scope_name] = params['value'].tolist() + +def convert_upsample_bilinear(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert upsample_bilinear2d layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting upsample...') + + if names == 'short': + tf_name = 'UPSL' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + output_size = params['output_size'] + align_corners = params['align_corners'] > 0 + + def target_layer(x, size=output_size, align_corners=align_corners): + import tensorflow as tf + x = tf.transpose(x, [0, 2, 3, 1]) + x = tf.image.resize_images(x, size, align_corners=align_corners) + x = tf.transpose(x, [0, 3, 1, 2]) + return x + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + def convert_upsample(params, w_name, scope_name, inputs, layers, weights, names): """ Convert upsample_bilinear2d layer. @@ -1511,7 +1581,9 @@ def target_layer(x, vmin=params['min'], vmax=params['max']): 'onnx::MaxPool': convert_maxpool, 'max_pool2d': convert_maxpool, 'aten::max_pool3d': convert_maxpool3, + 'aten::max_pool2d_with_indices': convert_maxpool, 'aten::max_pool2d': convert_maxpool, + 'aten::avg_pool2d': convert_avgpool, 'onnx::AveragePool': convert_avgpool, 'onnx::Dropout': convert_dropout, 'onnx::BatchNormalization': convert_batchnorm, @@ -1526,6 +1598,7 @@ def target_layer(x, vmin=params['min'], vmax=params['max']): 'onnx::LeakyRelu': convert_lrelu, 'onnx::Sigmoid': convert_sigmoid, 'onnx::Softmax': convert_softmax, + 'aten::softmax': convert_softmax, 'onnx::Tanh': convert_tanh, 'aten::hardtanh': convert_hardtanh, 'onnx::Selu': convert_selu, @@ -1535,6 +1608,7 @@ def target_layer(x, vmin=params['min'], vmax=params['max']): 'onnx::Gather': convert_gather, 'onnx::ReduceSum': convert_reduce_sum, 'onnx::Constant': convert_constant, + 'aten::upsample_bilinear2d': convert_upsample_bilinear, 'onnx::Upsample': convert_upsample, 'onnx::Pad': convert_padding, 'onnx::GlobalAveragePool': convert_adaptive_avg_pool2d, From 64414173fb32ccb62eb911ec67b62243b25efc21 Mon Sep 17 00:00:00 2001 From: idearibosome Date: Thu, 20 Dec 2018 09:54:09 +0900 Subject: [PATCH 093/180] Use tf.multiply instead of keras.layers.Multiply --- pytorch2keras/layers.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 892f30b..2068384 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -738,9 +738,15 @@ def convert_elementwise_mul( else: tf_name = w_name + str(random.random()) - mul = keras.layers.Multiply(name=tf_name) - print(model0, model1) - layers[scope_name] = mul([model0, model1]) + def target_layer(x): + layer = tf.multiply( + x[0], + x[1] + ) + return layer + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) + layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) def convert_elementwise_div( From 365a75b85e5d14b13a15c12da6edf7e599f1d741 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 12:10:21 +0300 Subject: [PATCH 094/180] Minor code clean-up. --- pytorch2keras/layers.py | 51 ++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 2068384..572dbc8 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -98,12 +98,6 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): in_channels = channels_per_group * n_groups if n_groups == in_channels and n_groups != 1: - print( - 'Perform depthwise convolution: h={} w={} in={} out={}'.format( - height, width, in_channels, out_channels - ) - ) - if bias_name in weights: biases = weights[bias_name].numpy() has_bias = True @@ -666,6 +660,9 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, na assert(len(inputs) == 3) + # Use previously taken constants + assert(inputs[-2] + '_np' in layers) + assert(inputs[-1] + '_np' in layers) gamma = layers[inputs[-2] + '_np'] beta = layers[inputs[-1] + '_np'] @@ -698,18 +695,36 @@ def convert_elementwise_add( names: use short names for keras layers """ print('Converting elementwise_add ...') - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] + if 'broadcast' in params: + model0 = layers[inputs[0]] + model1 = layers[inputs[1]] + + if names == 'short': + tf_name = 'A' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) - if names == 'short': - tf_name = 'A' + random_string(7) - elif names == 'keep': - tf_name = w_name + def target_layer(x): + layer = tf.add(x[0], x[1]) + return layer + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) + layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) else: - tf_name = w_name + str(random.random()) + model0 = layers[inputs[0]] + model1 = layers[inputs[1]] - add = keras.layers.Add(name=tf_name) - layers[scope_name] = add([model0, model1]) + if names == 'short': + tf_name = 'A' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + add = keras.layers.Add(name=tf_name) + layers[scope_name] = add([model0, model1]) def convert_elementwise_mul( @@ -875,7 +890,7 @@ def convert_relu(params, w_name, scope_name, inputs, layers, weights, names): """ Convert relu layer. - Args: + Args: params: dictionary with layer parameters w_name: name prefix in state_dict scope_name: pytorch scope name @@ -928,7 +943,7 @@ def convert_sigmoid(params, w_name, scope_name, inputs, layers, weights, names): """ Convert sigmoid layer. - Args: + Args: params: dictionary with layer parameters w_name: name prefix in state_dict scope_name: pytorch scope name @@ -954,7 +969,7 @@ def convert_softmax(params, w_name, scope_name, inputs, layers, weights, names): """ Convert softmax layer. - Args: + Args: params: dictionary with layer parameters w_name: name prefix in state_dict scope_name: pytorch scope name From b574506bd1473fdf5d3a2c317a3d4d47970d5ba9 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 12:54:48 +0300 Subject: [PATCH 095/180] Started layers refactoring. --- pytorch2keras/activation_layers.py | 211 ++++ pytorch2keras/common.py | 10 + pytorch2keras/constant_layers.py | 31 + pytorch2keras/convolution_layers.py | 290 +++++ pytorch2keras/elementwise_layers.py | 156 +++ pytorch2keras/embedding_layers.py | 42 + pytorch2keras/layers.py | 1604 +------------------------ pytorch2keras/linear_layers.py | 101 ++ pytorch2keras/normalization_layers.py | 130 ++ pytorch2keras/operation_layers.py | 124 ++ pytorch2keras/padding_layers.py | 52 + pytorch2keras/pooling_layers.py | 264 ++++ pytorch2keras/reshape_layers.py | 176 +++ pytorch2keras/upsampling_layers.py | 74 ++ 14 files changed, 1678 insertions(+), 1587 deletions(-) create mode 100644 pytorch2keras/activation_layers.py create mode 100644 pytorch2keras/common.py create mode 100644 pytorch2keras/constant_layers.py create mode 100644 pytorch2keras/convolution_layers.py create mode 100644 pytorch2keras/elementwise_layers.py create mode 100644 pytorch2keras/embedding_layers.py create mode 100644 pytorch2keras/linear_layers.py create mode 100644 pytorch2keras/normalization_layers.py create mode 100644 pytorch2keras/operation_layers.py create mode 100644 pytorch2keras/padding_layers.py create mode 100644 pytorch2keras/pooling_layers.py create mode 100644 pytorch2keras/reshape_layers.py create mode 100644 pytorch2keras/upsampling_layers.py diff --git a/pytorch2keras/activation_layers.py b/pytorch2keras/activation_layers.py new file mode 100644 index 0000000..b7cb6cd --- /dev/null +++ b/pytorch2keras/activation_layers.py @@ -0,0 +1,211 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_relu(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert relu layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting relu ...') + + if names == 'short': + tf_name = 'RELU' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + relu = keras.layers.Activation('relu', name=tf_name) + layers[scope_name] = relu(layers[inputs[0]]) + + +def convert_lrelu(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert leaky relu layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting lrelu ...') + + if names == 'short': + tf_name = 'lRELU' + random_string(3) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + leakyrelu = \ + keras.layers.LeakyReLU(alpha=params['alpha'], name=tf_name) + layers[scope_name] = leakyrelu(layers[inputs[0]]) + + +def convert_sigmoid(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert sigmoid layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting sigmoid ...') + + if names == 'short': + tf_name = 'SIGM' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + sigmoid = keras.layers.Activation('sigmoid', name=tf_name) + layers[scope_name] = sigmoid(layers[inputs[0]]) + + +def convert_softmax(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert softmax layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting softmax ...') + + if names == 'short': + tf_name = 'SMAX' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + def target_layer(x, dim=params['dim']): + import keras + return keras.activations.softmax(x, axis=dim) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_tanh(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert tanh layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting tanh ...') + + if names == 'short': + tf_name = 'TANH' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + tanh = keras.layers.Activation('tanh', name=tf_name) + layers[scope_name] = tanh(layers[inputs[0]]) + + +def convert_hardtanh(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert hardtanh layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting hardtanh (clip) ...') + + def target_layer(x, max_val=float(params['max_val']), min_val=float(params['min_val'])): + return tf.minimum(max_val, tf.maximum(min_val, x)) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_selu(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert selu layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting selu ...') + + if names == 'short': + tf_name = 'SELU' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + selu = keras.layers.Activation('selu', name=tf_name) + layers[scope_name] = selu(layers[inputs[0]]) + + +def convert_clip(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert clip operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting clip ...') + + def target_layer(x, vmin=params['min'], vmax=params['max']): + return tf.clip_by_value(x, vmin, vmax) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) \ No newline at end of file diff --git a/pytorch2keras/common.py b/pytorch2keras/common.py new file mode 100644 index 0000000..fde4e99 --- /dev/null +++ b/pytorch2keras/common.py @@ -0,0 +1,10 @@ +import random + + +def random_string(length): + """ + Generate a random string for the layer name. + :param length: a length of required random string + :return: generated random string + """ + return ''.join(random.choice(string.ascii_letters) for _ in range(length)) \ No newline at end of file diff --git a/pytorch2keras/constant_layers.py b/pytorch2keras/constant_layers.py new file mode 100644 index 0000000..a356cdb --- /dev/null +++ b/pytorch2keras/constant_layers.py @@ -0,0 +1,31 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_constant(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert constant layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting constant ...') + + params_list = params['value'].numpy() + + def target_layer(x, value=params_list): + return tf.constant(value.tolist(), shape=value.shape) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name + '_np'] = params_list # ad-hoc + layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py diff --git a/pytorch2keras/convolution_layers.py b/pytorch2keras/convolution_layers.py new file mode 100644 index 0000000..08934c1 --- /dev/null +++ b/pytorch2keras/convolution_layers.py @@ -0,0 +1,290 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert convolution layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting convolution ...') + + if names == 'short': + tf_name = 'C' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + bias_name = '{0}.bias'.format(w_name) + weights_name = '{0}.weight'.format(w_name) + input_name = inputs[0] + + if len(weights[weights_name].numpy().shape) == 5: # 3D conv + W = weights[weights_name].numpy().transpose(2, 3, 4, 1, 0) + height, width, channels, n_layers, n_filters = W.shape + + if bias_name in weights: + biases = weights[bias_name].numpy() + has_bias = True + else: + biases = None + has_bias = False + + if params['pads'][0] > 0 or params['pads'][1] > 0: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding3D( + padding=(params['pads'][0], + params['pads'][1], + params['pads'][2]), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[input_name]) + input_name = padding_name + + if has_bias: + weights = [W, biases] + else: + weights = [W] + + conv = keras.layers.Conv3D( + filters=n_filters, + kernel_size=(channels, height, width), + strides=(params['strides'][0], + params['strides'][1], + params['strides'][2]), + padding='valid', + weights=weights, + use_bias=has_bias, + activation=None, + dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', + name=tf_name + ) + layers[scope_name] = conv(layers[input_name]) + + elif len(weights[weights_name].numpy().shape) == 4: # 2D conv + if params['pads'][0] > 0 or params['pads'][1] > 0: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding2D( + padding=(params['pads'][0], params['pads'][1]), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[input_name]) + input_name = padding_name + + W = weights[weights_name].numpy().transpose(2, 3, 1, 0) + height, width, channels_per_group, out_channels = W.shape + n_groups = params['group'] + in_channels = channels_per_group * n_groups + + if n_groups == in_channels and n_groups != 1: + if bias_name in weights: + biases = weights[bias_name].numpy() + has_bias = True + else: + biases = None + has_bias = False + + # We are just doing depthwise conv, so make the pointwise a no-op + pointwise_wt = np.expand_dims(np.expand_dims(np.identity(out_channels), 0), 0) + W = W.transpose(0, 1, 3, 2) + if has_bias: + weights = [W, pointwise_wt, biases] + else: + weights = [W, pointwise_wt] + + conv = keras.layers.SeparableConv2D( + filters=out_channels, + depth_multiplier=1, + kernel_size=(height, width), + strides=(params['strides'][0], params['strides'][1]), + padding='valid', + weights=weights, + use_bias=has_bias, + activation=None, + dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', + name=tf_name + ) + layers[scope_name] = conv(layers[input_name]) + + elif n_groups != 1: + # Example from https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html + # # Split input and weights and convolve them separately + # input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) + # weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights) + # output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] + + # # Concat the convolved output together again + # conv = tf.concat(axis=3, values=output_groups) + def target_layer(x, groups=params['group'], stride_y=params['strides'][0], stride_x=params['strides'][1]): + x = tf.transpose(x, [0, 2, 3, 1]) + + def convolve_lambda(i, k): + return tf.nn.conv2d(i, k, strides=[1, stride_y, stride_x, 1], padding='VALID') + + input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) + weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W.transpose(0, 1, 2, 3)) + output_groups = [convolve_lambda(i, k) for i, k in zip(input_groups, weight_groups)] + + layer = tf.concat(axis=3, values=output_groups) + + layer = tf.transpose(layer, [0, 3, 1, 2]) + return layer + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[input_name]) + + else: + if bias_name in weights: + biases = weights[bias_name].numpy() + has_bias = True + else: + biases = None + has_bias = False + + if has_bias: + weights = [W, biases] + else: + weights = [W] + + conv = keras.layers.Conv2D( + filters=out_channels, + kernel_size=(height, width), + strides=(params['strides'][0], params['strides'][1]), + padding='valid', + weights=weights, + use_bias=has_bias, + activation=None, + dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', + name=tf_name + ) + layers[scope_name] = conv(layers[input_name]) + + else: # 1D conv + W = weights[weights_name].numpy().transpose(2, 1, 0) + width, channels, n_filters = W.shape + + if bias_name in weights: + biases = weights[bias_name].numpy() + has_bias = True + else: + biases = None + has_bias = False + + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding1D( + padding=params['pads'][0], + name=padding_name + ) + layers[padding_name] = padding_layer(layers[inputs[0]]) + input_name = padding_name + + if has_bias: + weights = [W, biases] + else: + weights = [W] + + conv = keras.layers.Conv1D( + filters=n_filters, + kernel_size=width, + strides=params['strides'][0], + padding='valid', + weights=weights, + use_bias=has_bias, + activation=None, + dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', + name=tf_name + ) + layers[scope_name] = conv(layers[input_name]) + + +def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert transposed convolution layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting transposed convolution ...') + + if names == 'short': + tf_name = 'C' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + bias_name = '{0}.bias'.format(w_name) + weights_name = '{0}.weight'.format(w_name) + + if len(weights[weights_name].numpy().shape) == 4: + W = weights[weights_name].numpy().transpose(2, 3, 1, 0) + height, width, n_filters, channels = W.shape + + if bias_name in weights: + biases = weights[bias_name].numpy() + has_bias = True + else: + biases = None + has_bias = False + + input_name = inputs[0] + + if has_bias: + weights = [W, biases] + else: + weights = [W] + + conv = keras.layers.Conv2DTranspose( + filters=n_filters, + kernel_size=(height, width), + strides=(params['strides'][0], params['strides'][1]), + padding='valid', + output_padding=0, + weights=weights, + use_bias=has_bias, + activation=None, + dilation_rate=params['dilations'][0], + bias_initializer='zeros', kernel_initializer='zeros', + name=tf_name + ) + + layers[scope_name] = conv(layers[input_name]) + + # Magic ad-hoc. + # See the Keras issue: https://github.com/keras-team/keras/issues/6777 + layers[scope_name].set_shape(layers[scope_name]._keras_shape) + + pads = params['pads'] + if pads[0] > 0: + assert(len(pads) == 2 or (pads[2] == pads[0] and pads[3] == pads[1])) + + crop = keras.layers.Cropping2D( + pads[:2], + name=tf_name + '_crop' + ) + layers[scope_name] = crop(layers[scope_name]) + else: + raise AssertionError('Layer is not supported for now') diff --git a/pytorch2keras/elementwise_layers.py b/pytorch2keras/elementwise_layers.py new file mode 100644 index 0000000..acbde84 --- /dev/null +++ b/pytorch2keras/elementwise_layers.py @@ -0,0 +1,156 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_elementwise_add( + params, w_name, scope_name, inputs, layers, weights, names +): + """ + Convert elementwise addition. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting elementwise_add ...') + if 'broadcast' in params: + model0 = layers[inputs[0]] + model1 = layers[inputs[1]] + + if names == 'short': + tf_name = 'A' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + def target_layer(x): + layer = tf.add(x[0], x[1]) + return layer + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) + layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) + else: + model0 = layers[inputs[0]] + model1 = layers[inputs[1]] + + if names == 'short': + tf_name = 'A' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + add = keras.layers.Add(name=tf_name) + layers[scope_name] = add([model0, model1]) + + +def convert_elementwise_mul( + params, w_name, scope_name, inputs, layers, weights, names +): + """ + Convert elementwise multiplication. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting elementwise_mul ...') + model0 = layers[inputs[0]] + model1 = layers[inputs[1]] + + if names == 'short': + tf_name = 'M' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + def target_layer(x): + layer = tf.multiply( + x[0], + x[1] + ) + return layer + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) + layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) + + +def convert_elementwise_div( + params, w_name, scope_name, inputs, layers, weights, names +): + """ + Convert elementwise multiplication. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting elementwise_div ...') + + if names == 'short': + tf_name = 'D' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + def target_layer(x): + layer = tf.div( + x[0], + x[1] + ) + return layer + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) + layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) + + +def convert_elementwise_sub( + params, w_name, scope_name, inputs, layers, weights, names +): + """ + Convert elementwise subtraction. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting elementwise_sub ...') + model0 = layers[inputs[0]] + model1 = layers[inputs[1]] + + if names == 'short': + tf_name = 'S' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + sub = keras.layers.Subtract(name=tf_name) + layers[scope_name] = sub([model0, model1]) diff --git a/pytorch2keras/embedding_layers.py b/pytorch2keras/embedding_layers.py new file mode 100644 index 0000000..0feb7e6 --- /dev/null +++ b/pytorch2keras/embedding_layers.py @@ -0,0 +1,42 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_gather(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert gather (embedding) layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting embedding ...') + + if names == 'short': + tf_name = 'EMBD' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + weights_name = '{0}.weight'.format(w_name) + + W = weights[weights_name].numpy() + input_channels, output_channels = W.shape + + keras_weights = [W] + + dense = keras.layers.Embedding( + input_channels, + weights=keras_weights, output_dim=output_channels, name=tf_name + ) + layers[scope_name] = dense(layers[inputs[0]]) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 572dbc8..28091a8 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -5,1593 +5,23 @@ import tensorflow as tf -def random_string(length): - """ - Generate a random string for the layer name. - :param length: a length of required random string - :return: generated random string - """ - return ''.join(random.choice(string.ascii_letters) for _ in range(length)) - - -def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert convolution layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting convolution ...') - - if names == 'short': - tf_name = 'C' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - input_name = inputs[0] - - if len(weights[weights_name].numpy().shape) == 5: # 3D conv - W = weights[weights_name].numpy().transpose(2, 3, 4, 1, 0) - height, width, channels, n_layers, n_filters = W.shape - - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - if params['pads'][0] > 0 or params['pads'][1] > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding3D( - padding=(params['pads'][0], - params['pads'][1], - params['pads'][2]), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[input_name]) - input_name = padding_name - - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.Conv3D( - filters=n_filters, - kernel_size=(channels, height, width), - strides=(params['strides'][0], - params['strides'][1], - params['strides'][2]), - padding='valid', - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - layers[scope_name] = conv(layers[input_name]) - elif len(weights[weights_name].numpy().shape) == 4: # 2D conv - if params['pads'][0] > 0 or params['pads'][1] > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(params['pads'][0], params['pads'][1]), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[input_name]) - input_name = padding_name - - W = weights[weights_name].numpy().transpose(2, 3, 1, 0) - height, width, channels_per_group, out_channels = W.shape - n_groups = params['group'] - in_channels = channels_per_group * n_groups - - if n_groups == in_channels and n_groups != 1: - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - # We are just doing depthwise conv, so make the pointwise a no-op - pointwise_wt = np.expand_dims(np.expand_dims(np.identity(out_channels), 0), 0) - W = W.transpose(0, 1, 3, 2) - if has_bias: - weights = [W, pointwise_wt, biases] - else: - weights = [W, pointwise_wt] - - conv = keras.layers.SeparableConv2D( - filters=out_channels, - depth_multiplier=1, - kernel_size=(height, width), - strides=(params['strides'][0], params['strides'][1]), - padding='valid', - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - layers[scope_name] = conv(layers[input_name]) - - elif n_groups != 1: - # Example from https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html - # # Split input and weights and convolve them separately - # input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) - # weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights) - # output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] - - # # Concat the convolved output together again - # conv = tf.concat(axis=3, values=output_groups) - def target_layer(x, groups=params['group'], stride_y=params['strides'][0], stride_x=params['strides'][1]): - x = tf.transpose(x, [0, 2, 3, 1]) - - def convolve_lambda(i, k): - return tf.nn.conv2d(i, k, strides=[1, stride_y, stride_x, 1], padding='VALID') - - input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) - weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W.transpose(0, 1, 2, 3)) - output_groups = [convolve_lambda(i, k) for i, k in zip(input_groups, weight_groups)] - - layer = tf.concat(axis=3, values=output_groups) - - layer = tf.transpose(layer, [0, 3, 1, 2]) - return layer - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[input_name]) - - else: - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.Conv2D( - filters=out_channels, - kernel_size=(height, width), - strides=(params['strides'][0], params['strides'][1]), - padding='valid', - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - layers[scope_name] = conv(layers[input_name]) - else: # 1D conv - W = weights[weights_name].numpy().transpose(2, 1, 0) - width, channels, n_filters = W.shape - - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding1D( - padding=params['pads'][0], - name=padding_name - ) - layers[padding_name] = padding_layer(layers[inputs[0]]) - input_name = padding_name - - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.Conv1D( - filters=n_filters, - kernel_size=width, - strides=params['strides'][0], - padding='valid', - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - layers[scope_name] = conv(layers[input_name]) - - -def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert transposed convolution layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting transposed convolution ...') - - if names == 'short': - tf_name = 'C' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - - if len(weights[weights_name].numpy().shape) == 4: - W = weights[weights_name].numpy().transpose(2, 3, 1, 0) - height, width, n_filters, channels = W.shape - - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - input_name = inputs[0] - - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.Conv2DTranspose( - filters=n_filters, - kernel_size=(height, width), - strides=(params['strides'][0], params['strides'][1]), - padding='valid', - output_padding=0, - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - - layers[scope_name] = conv(layers[input_name]) - - # Magic ad-hoc. - # See the Keras issue: https://github.com/keras-team/keras/issues/6777 - layers[scope_name].set_shape(layers[scope_name]._keras_shape) - - pads = params['pads'] - if pads[0] > 0: - assert(len(pads) == 2 or (pads[2] == pads[0] and pads[3] == pads[1])) - - crop = keras.layers.Cropping2D( - pads[:2], - name=tf_name + '_crop' - ) - layers[scope_name] = crop(layers[scope_name]) - else: - raise AssertionError('Layer is not supported for now') - - -def convert_flatten(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert reshape(view). - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting flatten ...') - - if names == 'short': - tf_name = 'R' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - reshape = keras.layers.Reshape([-1], name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) - - -def convert_gemm(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert Linear. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting Linear ...') - - if names == 'short': - tf_name = 'FC' + random_string(6) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - - W = weights[weights_name].numpy().transpose() - input_channels, output_channels = W.shape - - keras_weights = [W] - has_bias = False - if bias_name in weights: - bias = weights[bias_name].numpy() - keras_weights = [W, bias] - has_bias = True - - dense = keras.layers.Dense( - output_channels, - weights=keras_weights, use_bias=has_bias, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', - ) - - layers[scope_name] = dense(layers[inputs[0]]) - - -def convert_avgpool(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert Average pooling. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting pooling ...') - - if names == 'short': - tf_name = 'P' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if 'kernel_shape' in params: - height, width = params['kernel_shape'] - else: - height, width = params['kernel_size'] - - if 'strides' in params: - stride_height, stride_width = params['strides'] - else: - stride_height, stride_width = params['stride'] - - if 'pads' in params: - padding_h, padding_w, _, _ = params['pads'] - else: - padding_h, padding_w = params['padding'] - - input_name = inputs[0] - pad = 'valid' - - if height % 2 == 1 and width % 2 == 1 and \ - height // 2 == padding_h and width // 2 == padding_w and \ - stride_height == 1 and stride_width == 1: - pad = 'same' - else: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(padding_h, padding_w), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[inputs[0]]) - input_name = padding_name - - # Pooling type AveragePooling2D - pooling = keras.layers.AveragePooling2D( - pool_size=(height, width), - strides=(stride_height, stride_width), - padding=pad, - name=tf_name, - data_format='channels_first' - ) - - layers[scope_name] = pooling(layers[input_name]) - - -def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert Max pooling. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - - print('Converting pooling ...') - - if names == 'short': - tf_name = 'P' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if 'kernel_shape' in params: - height, width = params['kernel_shape'] - else: - height, width = params['kernel_size'] - - if 'strides' in params: - stride_height, stride_width = params['strides'] - else: - stride_height, stride_width = params['stride'] - - if 'pads' in params: - padding_h, padding_w, _, _ = params['pads'] - else: - padding_h, padding_w = params['padding'] - - input_name = inputs[0] - pad = 'valid' - - if height % 2 == 1 and width % 2 == 1 and \ - height // 2 == padding_h and width // 2 == padding_w and \ - stride_height == 1 and stride_width == 1: - pad = 'same' - else: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(padding_h, padding_w), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[inputs[0]]) - input_name = padding_name - - # Pooling type MaxPooling2D - pooling = keras.layers.MaxPooling2D( - pool_size=(height, width), - strides=(stride_height, stride_width), - padding=pad, - name=tf_name, - data_format='channels_first' - ) - - layers[scope_name] = pooling(layers[input_name]) - - -def convert_maxpool3(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert 3d Max pooling. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - - print('Converting pooling ...') - - if names == 'short': - tf_name = 'P' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if 'kernel_shape' in params: - height, width, depth = params['kernel_shape'] - else: - height, width, depth = params['kernel_size'] - - if 'strides' in params: - stride_height, stride_width, stride_depth = params['strides'] - else: - stride_height, stride_width, stride_depth = params['stride'] - - if 'pads' in params: - padding_h, padding_w, padding_d, _, _ = params['pads'] - else: - padding_h, padding_w, padding_d = params['padding'] - - input_name = inputs[0] - if padding_h > 0 and padding_w > 0 and padding_d > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding3D( - padding=(padding_h, padding_w, padding_d), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[inputs[0]]) - input_name = padding_name - - # Pooling type - pooling = keras.layers.MaxPooling3D( - pool_size=(height, width, depth), - strides=(stride_height, stride_width, stride_depth), - padding='valid', - name=tf_name - ) - - layers[scope_name] = pooling(layers[input_name]) - - -def convert_dropout(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert dropout. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting dropout ...') - - if names == 'short': - tf_name = 'DO' + random_string(6) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - dropout = keras.layers.Dropout(rate=params['ratio'], name=tf_name) - layers[scope_name] = dropout(layers[inputs[0]]) - - -def convert_batchnorm(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert batch normalization layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting batchnorm ...') - - if names == 'short': - tf_name = 'BN' + random_string(6) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - mean_name = '{0}.running_mean'.format(w_name) - var_name = '{0}.running_var'.format(w_name) - - if bias_name in weights: - beta = weights[bias_name].numpy() - - if weights_name in weights: - gamma = weights[weights_name].numpy() - - mean = weights[mean_name].numpy() - variance = weights[var_name].numpy() - - eps = params['epsilon'] - momentum = params['momentum'] - - if weights_name not in weights: - bn = keras.layers.BatchNormalization( - axis=1, momentum=momentum, epsilon=eps, - center=False, scale=False, - weights=[mean, variance], - name=tf_name - ) - else: - bn = keras.layers.BatchNormalization( - axis=1, momentum=momentum, epsilon=eps, - weights=[gamma, beta, mean, variance], - name=tf_name - ) - layers[scope_name] = bn(layers[inputs[0]]) - - -def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert instance normalization layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting instancenorm ...') - - if names == 'short': - tf_name = 'IN' + random_string(6) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - assert(len(inputs) == 3) - - # Use previously taken constants - assert(inputs[-2] + '_np' in layers) - assert(inputs[-1] + '_np' in layers) - gamma = layers[inputs[-2] + '_np'] - beta = layers[inputs[-1] + '_np'] - - def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): - layer = tf.contrib.layers.instance_norm( - x, - param_initializers={'beta': tf.constant_initializer(beta), 'gamma': tf.constant_initializer(gamma)}, - epsilon=epsilon, data_format='NCHW', - trainable=False - ) - return layer - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_elementwise_add( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert elementwise addition. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting elementwise_add ...') - if 'broadcast' in params: - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] - - if names == 'short': - tf_name = 'A' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x): - layer = tf.add(x[0], x[1]) - return layer - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) - layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) - else: - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] - - if names == 'short': - tf_name = 'A' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - add = keras.layers.Add(name=tf_name) - layers[scope_name] = add([model0, model1]) - - -def convert_elementwise_mul( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert elementwise multiplication. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting elementwise_mul ...') - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] - - if names == 'short': - tf_name = 'M' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x): - layer = tf.multiply( - x[0], - x[1] - ) - return layer - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) - layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) - - -def convert_elementwise_div( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert elementwise multiplication. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting elementwise_div ...') - - if names == 'short': - tf_name = 'D' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x): - layer = tf.div( - x[0], - x[1] - ) - return layer - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) - layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) - - -def convert_elementwise_sub( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert elementwise subtraction. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting elementwise_sub ...') - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] - - if names == 'short': - tf_name = 'S' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - sub = keras.layers.Subtract(name=tf_name) - layers[scope_name] = sub([model0, model1]) - - -def convert_sum( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert sum. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting Sum ...') - - def target_layer(x): - import keras.backend as K - return K.sum(x) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_concat(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert concatenation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting concat ...') - concat_nodes = [layers[i] for i in inputs] - - if len(concat_nodes) == 1: - # no-op - layers[scope_name] = concat_nodes[0] - return - - if names == 'short': - tf_name = 'CAT' + random_string(5) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - cat = keras.layers.Concatenate(name=tf_name, axis=params['axis']) - layers[scope_name] = cat(concat_nodes) - - -def convert_relu(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert relu layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting relu ...') - - if names == 'short': - tf_name = 'RELU' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - relu = keras.layers.Activation('relu', name=tf_name) - layers[scope_name] = relu(layers[inputs[0]]) - - -def convert_lrelu(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert leaky relu layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting lrelu ...') - - if names == 'short': - tf_name = 'lRELU' + random_string(3) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - leakyrelu = \ - keras.layers.LeakyReLU(alpha=params['alpha'], name=tf_name) - layers[scope_name] = leakyrelu(layers[inputs[0]]) - - -def convert_sigmoid(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert sigmoid layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting sigmoid ...') - - if names == 'short': - tf_name = 'SIGM' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - sigmoid = keras.layers.Activation('sigmoid', name=tf_name) - layers[scope_name] = sigmoid(layers[inputs[0]]) - - -def convert_softmax(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert softmax layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting softmax ...') - - if names == 'short': - tf_name = 'SMAX' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x, dim=params['dim']): - import keras - return keras.activations.softmax(x, axis=dim) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - - -def convert_tanh(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert tanh layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting tanh ...') - - if names == 'short': - tf_name = 'TANH' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - tanh = keras.layers.Activation('tanh', name=tf_name) - layers[scope_name] = tanh(layers[inputs[0]]) - - -def convert_hardtanh(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert hardtanh layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting hardtanh (clip) ...') - - def target_layer(x, max_val=float(params['max_val']), min_val=float(params['min_val'])): - return tf.minimum(max_val, tf.maximum(min_val, x)) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_selu(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert selu layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting selu ...') - - if names == 'short': - tf_name = 'SELU' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - selu = keras.layers.Activation('selu', name=tf_name) - layers[scope_name] = selu(layers[inputs[0]]) - - -def convert_transpose(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert transpose layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting transpose ...') - if params['perm'][0] != 0: - # raise AssertionError('Cannot permute batch dimension') - print('!!! Cannot permute batch dimension. Result may be wrong !!!') - # try: - layers[scope_name] = layers[inputs[0]] - # except: - # pass - else: - if names: - tf_name = 'PERM' + random_string(4) - else: - tf_name = w_name + str(random.random()) - permute = keras.layers.Permute(params['perm'][1:], name=tf_name) - layers[scope_name] = permute(layers[inputs[0]]) - - -def convert_reshape(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert reshape layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting reshape ...') - if names == 'short': - tf_name = 'RESH' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if len(inputs) > 1: - if layers[inputs[1]][0] == -1: - print('Cannot deduct batch size! It will be omitted, but result may be wrong.') - - def target_layer(x, shape=layers[inputs[1]]): - return tf.reshape(x, shape) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - # layers[scope_name] = reshape(layers[inputs[0]]) - else: - reshape = keras.layers.Reshape(params['shape'][1:], name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) - - -def convert_matmul(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert matmul layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting matmul ...') - - if names == 'short': - tf_name = 'MMUL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if len(inputs) == 1: - weights_name = '{0}.weight'.format(w_name) - - W = weights[weights_name].numpy().transpose() - input_channels, output_channels = W.shape - - keras_weights = [W] - - dense = keras.layers.Dense( - output_channels, - weights=keras_weights, use_bias=False, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', - ) - layers[scope_name] = dense(layers[inputs[0]]) - elif len(inputs) == 2: - weights_name = '{0}.weight'.format(w_name) - - W = weights[weights_name].numpy().transpose() - input_channels, output_channels = W.shape - - keras_weights = [W] - - dense = keras.layers.Dense( - output_channels, - weights=keras_weights, use_bias=False, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', - ) - layers[scope_name] = dense(layers[inputs[0]]) - else: - raise AssertionError('Cannot convert matmul layer') - - -def convert_gather(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert gather (embedding) layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting embedding ...') - - if names == 'short': - tf_name = 'EMBD' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - weights_name = '{0}.weight'.format(w_name) - - W = weights[weights_name].numpy() - input_channels, output_channels = W.shape - - keras_weights = [W] - - dense = keras.layers.Embedding( - input_channels, - weights=keras_weights, output_dim=output_channels, name=tf_name - ) - layers[scope_name] = dense(layers[inputs[0]]) - - -def convert_reduce_sum(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert reduce_sum layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting reduce_sum ...') - - keepdims = params['keepdims'] > 0 - axis = params['axes'] - - def target_layer(x, keepdims=keepdims, axis=axis): - import keras.backend as K - return K.sum(x, keepdims=keepdims, axis=axis) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_constant(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert constant layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting constant ...') - - params_list = params['value'].numpy() - - def target_layer(x, value=params_list): - return tf.constant(value.tolist(), shape=value.shape) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name + '_np'] = params_list # ad-hoc - layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py - # layers[scope_name] = params['value'].tolist() - - - -def convert_upsample_bilinear(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert upsample_bilinear2d layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting upsample...') - - if names == 'short': - tf_name = 'UPSL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - output_size = params['output_size'] - align_corners = params['align_corners'] > 0 - - def target_layer(x, size=output_size, align_corners=align_corners): - import tensorflow as tf - x = tf.transpose(x, [0, 2, 3, 1]) - x = tf.image.resize_images(x, size, align_corners=align_corners) - x = tf.transpose(x, [0, 3, 1, 2]) - return x - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_upsample(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert upsample_bilinear2d layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting upsample...') - - if params['mode'] != 'nearest': - raise AssertionError('Cannot convert non-nearest upsampling') - - if names == 'short': - tf_name = 'UPSL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - scale = (params['height_scale'], params['width_scale']) - upsampling = keras.layers.UpSampling2D( - size=scale, name=tf_name - ) - layers[scope_name] = upsampling(layers[inputs[0]]) - - -def convert_padding(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert padding layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting padding...') - - if params['mode'] == 'constant': - # raise AssertionError('Cannot convert non-constant padding') - - if params['value'] != 0.0: - raise AssertionError('Cannot convert non-zero padding') - - if names: - tf_name = 'PADD' + random_string(4) - else: - tf_name = w_name + str(random.random()) - - # Magic ordering - padding_name = tf_name - padding_layer = keras.layers.ZeroPadding2D( - padding=((params['pads'][2], params['pads'][6]), (params['pads'][3], params['pads'][7])), - name=padding_name - ) - - layers[scope_name] = padding_layer(layers[inputs[0]]) - elif params['mode'] == 'reflect': - - def target_layer(x, pads=params['pads']): - # x = tf.transpose(x, [0, 2, 3, 1]) - layer = tf.pad(x, [[0, 0], [0, 0], [pads[2], pads[6]], [pads[3], pads[7]]], 'REFLECT') - # layer = tf.transpose(layer, [0, 3, 1, 2]) - return layer - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert adaptive_avg_pool2d layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting adaptive_avg_pool2d...') - - if names == 'short': - tf_name = 'APOL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - global_pool = keras.layers.GlobalAveragePooling2D(data_format='channels_first', name=tf_name) - layers[scope_name] = global_pool(layers[inputs[0]]) - - def target_layer(x): - import keras - return keras.backend.expand_dims(x) - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims - layers[scope_name] = lambda_layer(layers[scope_name]) - - -def convert_adaptive_max_pool2d(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert convert_adaptive_max_pool2d layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting adaptive_avg_pool2d...') - - if names == 'short': - tf_name = 'APOL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - global_pool = keras.layers.GlobalMaxPooling2D(data_format='channels_first', name=tf_name) - layers[scope_name] = global_pool(layers[inputs[0]]) - - def target_layer(x): - import keras - return keras.backend.expand_dims(x) - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims - layers[scope_name] = lambda_layer(layers[scope_name]) - - -def convert_slice(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert slice operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting slice ...') - - if len(params['axes']) > 1: - raise AssertionError('Cannot convert slice by multiple dimensions') - - if params['axes'][0] not in [0, 1, 2, 3]: - raise AssertionError('Slice by dimension more than 3 or less than 0 is not supported') - - def target_layer(x, axis=int(params['axes'][0]), start=int(params['starts'][0]), end=int(params['ends'][0])): - if axis == 0: - return x[start:end] - elif axis == 1: - return x[:, start:end] - elif axis == 2: - return x[:, :, start:end] - elif axis == 3: - return x[:, :, :, start:end] - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert squeeze operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting squeeze ...') - - if len(params['axes']) > 1: - raise AssertionError('Cannot convert squeeze by multiple dimensions') - - def target_layer(x, axis=int(params['axes'][0])): - return tf.squeeze(x, axis=axis) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_unsqueeze(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert unsqueeze operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting unsqueeze ...') - - if names == 'short': - tf_name = 'UNSQ' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x): - import keras - return keras.backend.expand_dims(x) - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_shape(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert shape operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting shape ...') - - def target_layer(x): - return tf.shape(x) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_clip(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert clip operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting clip ...') - - def target_layer(x, vmin=params['min'], vmax=params['max']): - return tf.clip_by_value(x, vmin, vmax) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) +from .convolution_layers import convert_conv, convert_convtranspose +from .reshape_layers import convert_flatten, convert_transpose, convert_reshape, \ + convert_squeeze, convert_unsqueeze, convert_shape +from .elementwise_layers import convert_elementwise_add, convert_elementwise_mul, \ + convert_elementwise_div, convert_elementwise_sub +from .activation_layers import convert_relu, convert_lrelu, convert_selu, \ + convert_softmax, convert_sigmoid, convert_tanh, convert_hardtanh, convert_clip +from .pooling_layers import convert_avgpool, convert_maxpool, convert_maxpool3, \ + convert_adaptive_avg_pool2d, convert_adaptive_max_pool2d +from .normalization_layers import convert_batchnorm, convert_instancenorm, convert_dropout +from .linear_layers import convert_gemm, convert_matmul +from .embedding_layers import convert_gather +from .upsampling_layers import convert_upsample_bilinear, convert_upsample +from .padding_layers import convert_padding +from .operation_layers import convert_concat, convert_slice, convert_sum, \ + convert_reduce_sum, convert_slice +from .constant_layers import convert_constant AVAILABLE_CONVERTERS = { diff --git a/pytorch2keras/linear_layers.py b/pytorch2keras/linear_layers.py new file mode 100644 index 0000000..147eeef --- /dev/null +++ b/pytorch2keras/linear_layers.py @@ -0,0 +1,101 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_gemm(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert Linear. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting Linear ...') + + if names == 'short': + tf_name = 'FC' + random_string(6) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + bias_name = '{0}.bias'.format(w_name) + weights_name = '{0}.weight'.format(w_name) + + W = weights[weights_name].numpy().transpose() + input_channels, output_channels = W.shape + + keras_weights = [W] + has_bias = False + if bias_name in weights: + bias = weights[bias_name].numpy() + keras_weights = [W, bias] + has_bias = True + + dense = keras.layers.Dense( + output_channels, + weights=keras_weights, use_bias=has_bias, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', + ) + + layers[scope_name] = dense(layers[inputs[0]]) + + +def convert_matmul(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert matmul layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting matmul ...') + + if names == 'short': + tf_name = 'MMUL' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + if len(inputs) == 1: + weights_name = '{0}.weight'.format(w_name) + + W = weights[weights_name].numpy().transpose() + input_channels, output_channels = W.shape + + keras_weights = [W] + + dense = keras.layers.Dense( + output_channels, + weights=keras_weights, use_bias=False, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', + ) + layers[scope_name] = dense(layers[inputs[0]]) + elif len(inputs) == 2: + weights_name = '{0}.weight'.format(w_name) + + W = weights[weights_name].numpy().transpose() + input_channels, output_channels = W.shape + + keras_weights = [W] + + dense = keras.layers.Dense( + output_channels, + weights=keras_weights, use_bias=False, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', + ) + layers[scope_name] = dense(layers[inputs[0]]) + else: + raise AssertionError('Cannot convert matmul layer') diff --git a/pytorch2keras/normalization_layers.py b/pytorch2keras/normalization_layers.py new file mode 100644 index 0000000..a4d8977 --- /dev/null +++ b/pytorch2keras/normalization_layers.py @@ -0,0 +1,130 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_batchnorm(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert batch normalization layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting batchnorm ...') + + if names == 'short': + tf_name = 'BN' + random_string(6) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + bias_name = '{0}.bias'.format(w_name) + weights_name = '{0}.weight'.format(w_name) + mean_name = '{0}.running_mean'.format(w_name) + var_name = '{0}.running_var'.format(w_name) + + if bias_name in weights: + beta = weights[bias_name].numpy() + + if weights_name in weights: + gamma = weights[weights_name].numpy() + + mean = weights[mean_name].numpy() + variance = weights[var_name].numpy() + + eps = params['epsilon'] + momentum = params['momentum'] + + if weights_name not in weights: + bn = keras.layers.BatchNormalization( + axis=1, momentum=momentum, epsilon=eps, + center=False, scale=False, + weights=[mean, variance], + name=tf_name + ) + else: + bn = keras.layers.BatchNormalization( + axis=1, momentum=momentum, epsilon=eps, + weights=[gamma, beta, mean, variance], + name=tf_name + ) + layers[scope_name] = bn(layers[inputs[0]]) + + +def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert instance normalization layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting instancenorm ...') + + if names == 'short': + tf_name = 'IN' + random_string(6) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + assert(len(inputs) == 3) + + # Use previously taken constants + assert(inputs[-2] + '_np' in layers) + assert(inputs[-1] + '_np' in layers) + gamma = layers[inputs[-2] + '_np'] + beta = layers[inputs[-1] + '_np'] + + def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): + layer = tf.contrib.layers.instance_norm( + x, + param_initializers={'beta': tf.constant_initializer(beta), 'gamma': tf.constant_initializer(gamma)}, + epsilon=epsilon, data_format='NCHW', + trainable=False + ) + return layer + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_dropout(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert dropout. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting dropout ...') + + if names == 'short': + tf_name = 'DO' + random_string(6) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + dropout = keras.layers.Dropout(rate=params['ratio'], name=tf_name) + layers[scope_name] = dropout(layers[inputs[0]]) \ No newline at end of file diff --git a/pytorch2keras/operation_layers.py b/pytorch2keras/operation_layers.py new file mode 100644 index 0000000..f537dec --- /dev/null +++ b/pytorch2keras/operation_layers.py @@ -0,0 +1,124 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + + +def convert_sum( + params, w_name, scope_name, inputs, layers, weights, names +): + """ + Convert sum. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting Sum ...') + + def target_layer(x): + import keras.backend as K + return K.sum(x) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_reduce_sum(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert reduce_sum layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting reduce_sum ...') + + keepdims = params['keepdims'] > 0 + axis = params['axes'] + + def target_layer(x, keepdims=keepdims, axis=axis): + import keras.backend as K + return K.sum(x, keepdims=keepdims, axis=axis) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + +def convert_concat(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert concatenation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting concat ...') + concat_nodes = [layers[i] for i in inputs] + + if len(concat_nodes) == 1: + # no-op + layers[scope_name] = concat_nodes[0] + return + + if names == 'short': + tf_name = 'CAT' + random_string(5) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + cat = keras.layers.Concatenate(name=tf_name, axis=params['axis']) + layers[scope_name] = cat(concat_nodes) + + +def convert_slice(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert slice operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting slice ...') + + if len(params['axes']) > 1: + raise AssertionError('Cannot convert slice by multiple dimensions') + + if params['axes'][0] not in [0, 1, 2, 3]: + raise AssertionError('Slice by dimension more than 3 or less than 0 is not supported') + + def target_layer(x, axis=int(params['axes'][0]), start=int(params['starts'][0]), end=int(params['ends'][0])): + if axis == 0: + return x[start:end] + elif axis == 1: + return x[:, start:end] + elif axis == 2: + return x[:, :, start:end] + elif axis == 3: + return x[:, :, :, start:end] + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) diff --git a/pytorch2keras/padding_layers.py b/pytorch2keras/padding_layers.py new file mode 100644 index 0000000..f136dbb --- /dev/null +++ b/pytorch2keras/padding_layers.py @@ -0,0 +1,52 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_padding(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert padding layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting padding...') + + if params['mode'] == 'constant': + # raise AssertionError('Cannot convert non-constant padding') + + if params['value'] != 0.0: + raise AssertionError('Cannot convert non-zero padding') + + if names: + tf_name = 'PADD' + random_string(4) + else: + tf_name = w_name + str(random.random()) + + # Magic ordering + padding_name = tf_name + padding_layer = keras.layers.ZeroPadding2D( + padding=((params['pads'][2], params['pads'][6]), (params['pads'][3], params['pads'][7])), + name=padding_name + ) + + layers[scope_name] = padding_layer(layers[inputs[0]]) + elif params['mode'] == 'reflect': + + def target_layer(x, pads=params['pads']): + # x = tf.transpose(x, [0, 2, 3, 1]) + layer = tf.pad(x, [[0, 0], [0, 0], [pads[2], pads[6]], [pads[3], pads[7]]], 'REFLECT') + # layer = tf.transpose(layer, [0, 3, 1, 2]) + return layer + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) diff --git a/pytorch2keras/pooling_layers.py b/pytorch2keras/pooling_layers.py new file mode 100644 index 0000000..6784869 --- /dev/null +++ b/pytorch2keras/pooling_layers.py @@ -0,0 +1,264 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_avgpool(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert Average pooling. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting pooling ...') + + if names == 'short': + tf_name = 'P' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + if 'kernel_shape' in params: + height, width = params['kernel_shape'] + else: + height, width = params['kernel_size'] + + if 'strides' in params: + stride_height, stride_width = params['strides'] + else: + stride_height, stride_width = params['stride'] + + if 'pads' in params: + padding_h, padding_w, _, _ = params['pads'] + else: + padding_h, padding_w = params['padding'] + + input_name = inputs[0] + pad = 'valid' + + if height % 2 == 1 and width % 2 == 1 and \ + height // 2 == padding_h and width // 2 == padding_w and \ + stride_height == 1 and stride_width == 1: + pad = 'same' + else: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding2D( + padding=(padding_h, padding_w), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[inputs[0]]) + input_name = padding_name + + # Pooling type AveragePooling2D + pooling = keras.layers.AveragePooling2D( + pool_size=(height, width), + strides=(stride_height, stride_width), + padding=pad, + name=tf_name, + data_format='channels_first' + ) + + layers[scope_name] = pooling(layers[input_name]) + + +def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert Max pooling. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + + print('Converting pooling ...') + + if names == 'short': + tf_name = 'P' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + if 'kernel_shape' in params: + height, width = params['kernel_shape'] + else: + height, width = params['kernel_size'] + + if 'strides' in params: + stride_height, stride_width = params['strides'] + else: + stride_height, stride_width = params['stride'] + + if 'pads' in params: + padding_h, padding_w, _, _ = params['pads'] + else: + padding_h, padding_w = params['padding'] + + input_name = inputs[0] + pad = 'valid' + + if height % 2 == 1 and width % 2 == 1 and \ + height // 2 == padding_h and width // 2 == padding_w and \ + stride_height == 1 and stride_width == 1: + pad = 'same' + else: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding2D( + padding=(padding_h, padding_w), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[inputs[0]]) + input_name = padding_name + + # Pooling type MaxPooling2D + pooling = keras.layers.MaxPooling2D( + pool_size=(height, width), + strides=(stride_height, stride_width), + padding=pad, + name=tf_name, + data_format='channels_first' + ) + + layers[scope_name] = pooling(layers[input_name]) + + +def convert_maxpool3(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert 3d Max pooling. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + + print('Converting pooling ...') + + if names == 'short': + tf_name = 'P' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + if 'kernel_shape' in params: + height, width, depth = params['kernel_shape'] + else: + height, width, depth = params['kernel_size'] + + if 'strides' in params: + stride_height, stride_width, stride_depth = params['strides'] + else: + stride_height, stride_width, stride_depth = params['stride'] + + if 'pads' in params: + padding_h, padding_w, padding_d, _, _ = params['pads'] + else: + padding_h, padding_w, padding_d = params['padding'] + + input_name = inputs[0] + if padding_h > 0 and padding_w > 0 and padding_d > 0: + padding_name = tf_name + '_pad' + padding_layer = keras.layers.ZeroPadding3D( + padding=(padding_h, padding_w, padding_d), + name=padding_name + ) + layers[padding_name] = padding_layer(layers[inputs[0]]) + input_name = padding_name + + # Pooling type + pooling = keras.layers.MaxPooling3D( + pool_size=(height, width, depth), + strides=(stride_height, stride_width, stride_depth), + padding='valid', + name=tf_name + ) + + layers[scope_name] = pooling(layers[input_name]) + + +def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert adaptive_avg_pool2d layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting adaptive_avg_pool2d...') + + if names == 'short': + tf_name = 'APOL' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + global_pool = keras.layers.GlobalAveragePooling2D(data_format='channels_first', name=tf_name) + layers[scope_name] = global_pool(layers[inputs[0]]) + + def target_layer(x): + import keras + return keras.backend.expand_dims(x) + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') + layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims + layers[scope_name] = lambda_layer(layers[scope_name]) + + +def convert_adaptive_max_pool2d(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert convert_adaptive_max_pool2d layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting adaptive_avg_pool2d...') + + if names == 'short': + tf_name = 'APOL' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + global_pool = keras.layers.GlobalMaxPooling2D(data_format='channels_first', name=tf_name) + layers[scope_name] = global_pool(layers[inputs[0]]) + + def target_layer(x): + import keras + return keras.backend.expand_dims(x) + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') + layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims + layers[scope_name] = lambda_layer(layers[scope_name]) diff --git a/pytorch2keras/reshape_layers.py b/pytorch2keras/reshape_layers.py new file mode 100644 index 0000000..558a29f --- /dev/null +++ b/pytorch2keras/reshape_layers.py @@ -0,0 +1,176 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_flatten(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert reshape(view). + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting flatten ...') + + if names == 'short': + tf_name = 'R' + random_string(7) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + reshape = keras.layers.Reshape([-1], name=tf_name) + layers[scope_name] = reshape(layers[inputs[0]]) + + +def convert_transpose(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert transpose layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting transpose ...') + if params['perm'][0] != 0: + # raise AssertionError('Cannot permute batch dimension') + print('!!! Cannot permute batch dimension. Result may be wrong !!!') + # try: + layers[scope_name] = layers[inputs[0]] + # except: + # pass + else: + if names: + tf_name = 'PERM' + random_string(4) + else: + tf_name = w_name + str(random.random()) + permute = keras.layers.Permute(params['perm'][1:], name=tf_name) + layers[scope_name] = permute(layers[inputs[0]]) + + +def convert_reshape(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert reshape layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting reshape ...') + if names == 'short': + tf_name = 'RESH' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + if len(inputs) > 1: + if layers[inputs[1]][0] == -1: + print('Cannot deduct batch size! It will be omitted, but result may be wrong.') + + def target_layer(x, shape=layers[inputs[1]]): + return tf.reshape(x, shape) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + # layers[scope_name] = reshape(layers[inputs[0]]) + else: + reshape = keras.layers.Reshape(params['shape'][1:], name=tf_name) + layers[scope_name] = reshape(layers[inputs[0]]) + + +def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert squeeze operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting squeeze ...') + + if len(params['axes']) > 1: + raise AssertionError('Cannot convert squeeze by multiple dimensions') + + def target_layer(x, axis=int(params['axes'][0])): + return tf.squeeze(x, axis=axis) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_unsqueeze(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert unsqueeze operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting unsqueeze ...') + + if names == 'short': + tf_name = 'UNSQ' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + def target_layer(x): + import keras + return keras.backend.expand_dims(x) + + lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_shape(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert shape operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting shape ...') + + def target_layer(x): + return tf.shape(x) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) \ No newline at end of file diff --git a/pytorch2keras/upsampling_layers.py b/pytorch2keras/upsampling_layers.py new file mode 100644 index 0000000..7a9721f --- /dev/null +++ b/pytorch2keras/upsampling_layers.py @@ -0,0 +1,74 @@ +import keras.layers +import numpy as np +import random +import string +import tensorflow as tf +from .common import random_string + + +def convert_upsample_bilinear(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert upsample_bilinear2d layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting upsample...') + + if names == 'short': + tf_name = 'UPSL' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + output_size = params['output_size'] + align_corners = params['align_corners'] > 0 + + def target_layer(x, size=output_size, align_corners=align_corners): + import tensorflow as tf + x = tf.transpose(x, [0, 2, 3, 1]) + x = tf.image.resize_images(x, size, align_corners=align_corners) + x = tf.transpose(x, [0, 3, 1, 2]) + return x + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_upsample(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert upsample_bilinear2d layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting upsample...') + + if params['mode'] != 'nearest': + raise AssertionError('Cannot convert non-nearest upsampling') + + if names == 'short': + tf_name = 'UPSL' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + scale = (params['height_scale'], params['width_scale']) + upsampling = keras.layers.UpSampling2D( + size=scale, name=tf_name + ) + layers[scope_name] = upsampling(layers[inputs[0]]) From 7ca682450b587817471b8344e1500c5222e1ed03 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 12:55:15 +0300 Subject: [PATCH 096/180] Started tests refactoring. --- tests/layers/activations/lrelu.py | 71 +++++++++++++++++++++++++++++++ tests/layers/activations/relu.py | 69 ++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 tests/layers/activations/lrelu.py create mode 100644 tests/layers/activations/relu.py diff --git a/tests/layers/activations/lrelu.py b/tests/layers/activations/lrelu.py new file mode 100644 index 0000000..2dc4789 --- /dev/null +++ b/tests/layers/activations/lrelu.py @@ -0,0 +1,71 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, negative_slope): + super(LayerTest, self).__init__() + self.relu = nn.LeakyReLU(negative_slope=negative_slope) + + def forward(self, x): + x = self.relu(x) + return x + + +class FTest(nn.Module): + def __init__(self, negative_slope): + super(FTest, self).__init__() + self.relu = nn.ReLU() + self.negative_slope = negative_slope + + def forward(self, x): + from torch.nn import functional as F + return F.leaky_relu(x, self.negative_slope) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + import random + model = LayerTest(negative_slope=random.random() - 0.5) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(10): + model = FTest(negative_slope=random.random() - 0.5) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/activations/relu.py b/tests/layers/activations/relu.py new file mode 100644 index 0000000..aee323b --- /dev/null +++ b/tests/layers/activations/relu.py @@ -0,0 +1,69 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self): + super(LayerTest, self).__init__() + self.relu = nn.ReLU() + + def forward(self, x): + x = self.relu(x) + return x + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + self.relu = nn.ReLU() + + def forward(self, x): + from torch.nn import functional as F + return F.relu(x) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = LayerTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(10): + model = FTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 87ab0480aab8fdb96f95a33caecf9080d26f1541 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:00:08 +0300 Subject: [PATCH 097/180] Fix some excess code in tests. --- tests/layers/activations/lrelu.py | 1 - tests/layers/activations/relu.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/layers/activations/lrelu.py b/tests/layers/activations/lrelu.py index 2dc4789..5be0a2b 100644 --- a/tests/layers/activations/lrelu.py +++ b/tests/layers/activations/lrelu.py @@ -18,7 +18,6 @@ def forward(self, x): class FTest(nn.Module): def __init__(self, negative_slope): super(FTest, self).__init__() - self.relu = nn.ReLU() self.negative_slope = negative_slope def forward(self, x): diff --git a/tests/layers/activations/relu.py b/tests/layers/activations/relu.py index aee323b..985c03d 100644 --- a/tests/layers/activations/relu.py +++ b/tests/layers/activations/relu.py @@ -18,7 +18,6 @@ def forward(self, x): class FTest(nn.Module): def __init__(self): super(FTest, self).__init__() - self.relu = nn.ReLU() def forward(self, x): from torch.nn import functional as F From 2b416a5f8b4a3d3dd504f1ac443b27e8a1116988 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:00:17 +0300 Subject: [PATCH 098/180] Add sigmoid test. --- tests/layers/activations/sigmoid.py | 68 +++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/layers/activations/sigmoid.py diff --git a/tests/layers/activations/sigmoid.py b/tests/layers/activations/sigmoid.py new file mode 100644 index 0000000..f5b71aa --- /dev/null +++ b/tests/layers/activations/sigmoid.py @@ -0,0 +1,68 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self): + super(LayerTest, self).__init__() + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + x = self.sigmoid(x) + return x + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + + def forward(self, x): + from torch.nn import functional as F + return F.sigmoid(x) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = LayerTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(10): + model = FTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From aefa843008655b3c5312e5bc6104e929357878d7 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:00:24 +0300 Subject: [PATCH 099/180] Add softmax test. --- tests/layers/activations/softmax.py | 70 +++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tests/layers/activations/softmax.py diff --git a/tests/layers/activations/softmax.py b/tests/layers/activations/softmax.py new file mode 100644 index 0000000..bb91abe --- /dev/null +++ b/tests/layers/activations/softmax.py @@ -0,0 +1,70 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, dim): + super(LayerTest, self).__init__() + self.softmax = nn.Softmax(dim=dim) + + def forward(self, x): + x = self.softmax(x) + return x + + +class FTest(nn.Module): + def __init__(self, dim): + super(FTest, self).__init__() + self.dim = dim + + def forward(self, x): + from torch.nn import functional as F + return F.softmax(x, dim=self.dim) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(50): + import random + model = LayerTest(dim=np.random.randint(0, 3)) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(50): + model = FTest(dim=np.random.randint(0, 3)) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 76f65ef73db8597d16277bfd8dcc5d273d27e633 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:00:33 +0300 Subject: [PATCH 100/180] Add tanh test. --- tests/layers/activations/tanh.py | 68 ++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/layers/activations/tanh.py diff --git a/tests/layers/activations/tanh.py b/tests/layers/activations/tanh.py new file mode 100644 index 0000000..231ce6c --- /dev/null +++ b/tests/layers/activations/tanh.py @@ -0,0 +1,68 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self): + super(LayerTest, self).__init__() + self.tanh = nn.Tanh() + + def forward(self, x): + x = self.tanh(x) + return x + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + + def forward(self, x): + from torch.nn import functional as F + return F.tanh(x) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = LayerTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(10): + model = FTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 1d23690581b5c462f79e19cd6c10fcfab12dd662 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:01:48 +0300 Subject: [PATCH 101/180] Move clip to operations module. --- pytorch2keras/activation_layers.py | 22 ---------------------- pytorch2keras/layers.py | 4 ++-- pytorch2keras/operation_layers.py | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/pytorch2keras/activation_layers.py b/pytorch2keras/activation_layers.py index b7cb6cd..cdf8604 100644 --- a/pytorch2keras/activation_layers.py +++ b/pytorch2keras/activation_layers.py @@ -187,25 +187,3 @@ def convert_selu(params, w_name, scope_name, inputs, layers, weights, names): selu = keras.layers.Activation('selu', name=tf_name) layers[scope_name] = selu(layers[inputs[0]]) - - -def convert_clip(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert clip operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting clip ...') - - def target_layer(x, vmin=params['min'], vmax=params['max']): - return tf.clip_by_value(x, vmin, vmax) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) \ No newline at end of file diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 28091a8..9dea97f 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -11,7 +11,7 @@ from .elementwise_layers import convert_elementwise_add, convert_elementwise_mul, \ convert_elementwise_div, convert_elementwise_sub from .activation_layers import convert_relu, convert_lrelu, convert_selu, \ - convert_softmax, convert_sigmoid, convert_tanh, convert_hardtanh, convert_clip + convert_softmax, convert_sigmoid, convert_tanh, convert_hardtanh from .pooling_layers import convert_avgpool, convert_maxpool, convert_maxpool3, \ convert_adaptive_avg_pool2d, convert_adaptive_max_pool2d from .normalization_layers import convert_batchnorm, convert_instancenorm, convert_dropout @@ -20,7 +20,7 @@ from .upsampling_layers import convert_upsample_bilinear, convert_upsample from .padding_layers import convert_padding from .operation_layers import convert_concat, convert_slice, convert_sum, \ - convert_reduce_sum, convert_slice + convert_reduce_sum, convert_slice, convert_clip from .constant_layers import convert_constant diff --git a/pytorch2keras/operation_layers.py b/pytorch2keras/operation_layers.py index f537dec..3bbab5d 100644 --- a/pytorch2keras/operation_layers.py +++ b/pytorch2keras/operation_layers.py @@ -122,3 +122,25 @@ def target_layer(x, axis=int(params['axes'][0]), start=int(params['starts'][0]), lambda_layer = keras.layers.Lambda(target_layer) layers[scope_name] = lambda_layer(layers[inputs[0]]) + + +def convert_clip(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert clip operation. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting clip ...') + + def target_layer(x, vmin=params['min'], vmax=params['max']): + return tf.clip_by_value(x, vmin, vmax) + + lambda_layer = keras.layers.Lambda(target_layer) + layers[scope_name] = lambda_layer(layers[inputs[0]]) From 18b21022f4cca2c66bf918105b7a3b87d298d51a Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:05:33 +0300 Subject: [PATCH 102/180] Add hard_tanh test. --- tests/layers/activations/hard_tanh.py | 75 +++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 tests/layers/activations/hard_tanh.py diff --git a/tests/layers/activations/hard_tanh.py b/tests/layers/activations/hard_tanh.py new file mode 100644 index 0000000..f092949 --- /dev/null +++ b/tests/layers/activations/hard_tanh.py @@ -0,0 +1,75 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, min_val, max_val): + super(LayerTest, self).__init__() + self.htanh = nn.Hardtanh(min_val=min_val, max_val=max_val) + + def forward(self, x): + x = self.htanh(x) + return x + + +class FTest(nn.Module): + def __init__(self, min_val, max_val): + super(FTest, self).__init__() + self.min_val = min_val + self.max_val = max_val + + def forward(self, x): + from torch.nn import functional as F + return F.hardtanh(x, min_val=self.min_val, max_val=self.max_val) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + import random + vmin = random.random() - 1.0 + vmax = vmin + 2.0 * random.random() + model = LayerTest(vmin, vmax) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(10): + vmin = random.random() - 1.0 + vmax = vmin + 2.0 * random.random() + model = FTest(vmin, vmax) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 37cfaf178770b05bbf4d5261b588683bc319df53 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:06:27 +0300 Subject: [PATCH 103/180] Add SELU test. --- tests/layers/activations/selu.py | 68 ++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/layers/activations/selu.py diff --git a/tests/layers/activations/selu.py b/tests/layers/activations/selu.py new file mode 100644 index 0000000..92e249e --- /dev/null +++ b/tests/layers/activations/selu.py @@ -0,0 +1,68 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self): + super(LayerTest, self).__init__() + self.selu = nn.SELU() + + def forward(self, x): + x = self.selu(x) + return x + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + + def forward(self, x): + from torch.nn import functional as F + return F.selu(x) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = LayerTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(10): + model = FTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 9ec2dab828bb6e4e9f8a84773f8e7203fc0664ac Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:23:24 +0300 Subject: [PATCH 104/180] Fix import error. --- pytorch2keras/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch2keras/common.py b/pytorch2keras/common.py index fde4e99..178ab8d 100644 --- a/pytorch2keras/common.py +++ b/pytorch2keras/common.py @@ -1,4 +1,5 @@ import random +import string def random_string(length): From cebd4d5230779cca65671ef7518cbd1dbe96d2b6 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:53:15 +0300 Subject: [PATCH 105/180] Add constant test. --- tests/layers/constant/constant.py | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/layers/constant/constant.py diff --git a/tests/layers/constant/constant.py b/tests/layers/constant/constant.py new file mode 100644 index 0000000..a8d16fd --- /dev/null +++ b/tests/layers/constant/constant.py @@ -0,0 +1,44 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + + def forward(self, x): + return x + 1 + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + + for i in range(10): + model = FTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 1cf4c73059ba4435f37a93a316edbd426263cbf0 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 13:53:39 +0300 Subject: [PATCH 106/180] Change inputs computation. --- pytorch2keras/converter.py | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 69105a6..554d9ca 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -240,6 +240,10 @@ def pytorch_to_keras( graph_outputs = [n.uniqueName() for n in trace.graph().outputs()] print('Graph outputs:', graph_outputs) + + graph_inputs = [n.uniqueName() for n in trace.graph().inputs()] + print('Graph inputs:', graph_inputs) + # Collect model state dict state_dict = _unique_state_dict(model) if verbose: @@ -261,25 +265,32 @@ def pytorch_to_keras( outputs = [] input_index = 0 - model_inputs = dict() + model_inputs = ['input' + i for i in graph_inputs] + for node in nodes: node_inputs = list(node.inputs()) + # print(node_inputs, model_inputs) node_input_names = [] + for node_input in node_inputs: + print(get_node_id(node_input.node())) if node_input.node().scopeName(): node_input_names.append(get_node_id(node_input.node())) - - if len(node_input_names) == 0: - if len(node_inputs) > 0: - if node_inputs[0] in model_inputs: - node_input_names.append(model_inputs[node_inputs[0]]) - else: - input_name = 'input{0}'.format(input_index) - if input_name not in layers: - continue - node_input_names.append(input_name) - input_index += 1 - model_inputs[node_inputs[0]] = input_name + if 'input{0}'.format(get_node_id(node_input.node())) in model_inputs: + node_input_names.append('input{0}'.format(get_node_id(node_input.node()))) + # print(node_input_names) + + # if len(node_input_names) == 0: + # if len(node_inputs) > 0: + # if node_inputs[0] in model_inputs: + # node_input_names.append(model_inputs[node_inputs[0]]) + # else: + # input_name = 'input{0}'.format(input_index) + # if input_name not in layers: + # continue + # node_input_names.append(input_name) + # input_index += 1 + # model_inputs[node_inputs[0]] = input_name node_type = node.kind() # print(dir(node)) @@ -300,6 +311,7 @@ def pytorch_to_keras( if verbose: print(' ____ ') print('graph node:', node_scope_name) + print('node id:', node_id) print('type:', node_type) print('inputs:', node_input_names) print('outputs:', node_outputs_names) From ac2876ec4c971a95bf25fe318576bc644266dd37 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 14:02:57 +0300 Subject: [PATCH 107/180] Add conv2d full test. --- tests/layers/convolutions/conv2d.py | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/layers/convolutions/conv2d.py diff --git a/tests/layers/convolutions/conv2d.py b/tests/layers/convolutions/conv2d.py new file mode 100644 index 0000000..f1cb067 --- /dev/null +++ b/tests/layers/convolutions/conv2d.py @@ -0,0 +1,56 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, inp, out, kernel_size=3, padding=1, stride=1, bias=False, dilation=1, groups=1): + super(LayerTest, self).__init__() + self.conv = nn.Conv2d(inp, out, kernel_size=kernel_size, padding=padding, \ + stride=stride, bias=bias, dilation=dilation, groups=groups) + + def forward(self, x): + x = self.conv(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for kernel_size in [1, 3, 5]: + for padding in [0, 1, 3]: + for stride in [1, 2]: + for bias in [True, False]: + for dilation in [1, 2, 3]: + for groups in [1, 3]: + # ValueError: strides > 1 not supported in conjunction with dilation_rate > 1 + if stride > 1 and dilation > 1: + continue + + model = LayerTest(3, groups, \ + kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, dilation=dilation, groups=groups) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From f09b488e0539796b55d7aa68f51473654295e1b1 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 14:20:29 +0300 Subject: [PATCH 108/180] Add Conv1d test. --- tests/layers/convolutions/conv1d.py | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/layers/convolutions/conv1d.py diff --git a/tests/layers/convolutions/conv1d.py b/tests/layers/convolutions/conv1d.py new file mode 100644 index 0000000..c700dad --- /dev/null +++ b/tests/layers/convolutions/conv1d.py @@ -0,0 +1,56 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, inp, out, kernel_size=3, padding=1, stride=1, bias=False, dilation=1): + super(LayerTest, self).__init__() + self.conv = nn.Conv1d(inp, out, kernel_size=kernel_size, padding=padding, \ + stride=stride, bias=bias, dilation=dilation) + + def forward(self, x): + x = self.conv(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for kernel_size in [1, 3, 5]: + for padding in [0, 1, 3]: + for stride in [1, 2]: + for bias in [True, False]: + for dilation in [1, 2, 3]: + # ValueError: strides > 1 not supported in conjunction with dilation_rate > 1 + if stride > 1 and dilation > 1: + continue + + ins = np.random.choice([1, 3, 7]) + model = LayerTest(ins, np.random.choice([1, 3, 7]), \ + kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, dilation=dilation) + model.eval() + + input_np = np.random.uniform(0, 1, (1, ins, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + print(output.size()) + k_model = pytorch_to_keras(model, input_var, (ins, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From a1f7adba3db4d5fe6330c456f2b099a893fd3a72 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 14:20:45 +0300 Subject: [PATCH 109/180] Fix Conv1d layer. --- pytorch2keras/convolution_layers.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pytorch2keras/convolution_layers.py b/pytorch2keras/convolution_layers.py index 08934c1..48f400e 100644 --- a/pytorch2keras/convolution_layers.py +++ b/pytorch2keras/convolution_layers.py @@ -178,6 +178,9 @@ def convolve_lambda(i, k): else: # 1D conv W = weights[weights_name].numpy().transpose(2, 1, 0) width, channels, n_filters = W.shape + n_groups = params['group'] + if n_groups > 1: + raise AssertionError('Cannot convert conv1d with groups != 1') if bias_name in weights: biases = weights[bias_name].numpy() @@ -200,14 +203,15 @@ def convolve_lambda(i, k): weights = [W] conv = keras.layers.Conv1D( - filters=n_filters, + filters=channels, kernel_size=width, - strides=params['strides'][0], + strides=params['strides'], padding='valid', weights=weights, use_bias=has_bias, activation=None, - dilation_rate=params['dilations'][0], + data_format='channels_first', + dilation_rate=params['dilations'], bias_initializer='zeros', kernel_initializer='zeros', name=tf_name ) From 30ad24dc9428677da910d7f4d5a6dec4fb1c39a2 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 20 Dec 2018 14:27:48 +0300 Subject: [PATCH 110/180] Add full convtranspose2d test. --- tests/layers/convolutions/convtranspose2d.py | 52 ++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 tests/layers/convolutions/convtranspose2d.py diff --git a/tests/layers/convolutions/convtranspose2d.py b/tests/layers/convolutions/convtranspose2d.py new file mode 100644 index 0000000..d6ac389 --- /dev/null +++ b/tests/layers/convolutions/convtranspose2d.py @@ -0,0 +1,52 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, inp, out, kernel_size=3, padding=1, stride=1, bias=False): + super(LayerTest, self).__init__() + self.conv = nn.ConvTranspose2d(inp, out, kernel_size=kernel_size, padding=padding, \ + stride=stride, bias=bias) + + def forward(self, x): + x = self.conv(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for kernel_size in [1, 3, 5]: + for padding in [0, 1, 3]: + for stride in [1, 2]: + for bias in [True, False]: + outs = np.random.choice([1, 3, 7]) + + model = LayerTest(3, outs, \ + kernel_size=kernel_size, padding=padding, stride=stride, bias=bias) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From d63c1c2693e5a3b25e35766c38ca714749f05c7c Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 12:55:28 +0300 Subject: [PATCH 111/180] Minor changes in converter. --- pytorch2keras/converter.py | 3 +-- pytorch2keras/convolution_layers.py | 7 +++++++ pytorch2keras/embedding_layers.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 554d9ca..ac18086 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -273,11 +273,10 @@ def pytorch_to_keras( node_input_names = [] for node_input in node_inputs: - print(get_node_id(node_input.node())) if node_input.node().scopeName(): node_input_names.append(get_node_id(node_input.node())) if 'input{0}'.format(get_node_id(node_input.node())) in model_inputs: - node_input_names.append('input{0}'.format(get_node_id(node_input.node()))) + node_input_names.append('input{0}'.format(node_input.uniqueName())) # print(node_input_names) # if len(node_input_names) == 0: diff --git a/pytorch2keras/convolution_layers.py b/pytorch2keras/convolution_layers.py index 48f400e..3fb04a8 100644 --- a/pytorch2keras/convolution_layers.py +++ b/pytorch2keras/convolution_layers.py @@ -247,6 +247,13 @@ def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, n W = weights[weights_name].numpy().transpose(2, 3, 1, 0) height, width, n_filters, channels = W.shape + n_groups = params['group'] + if n_groups > 1: + raise AssertionError('Cannot convert conv1d with groups != 1') + + if params['dilations'][0] > 1: + raise AssertionError('Cannot convert conv1d with dilation_rate != 1') + if bias_name in weights: biases = weights[bias_name].numpy() has_bias = True diff --git a/pytorch2keras/embedding_layers.py b/pytorch2keras/embedding_layers.py index 0feb7e6..3f0960a 100644 --- a/pytorch2keras/embedding_layers.py +++ b/pytorch2keras/embedding_layers.py @@ -39,4 +39,4 @@ def convert_gather(params, w_name, scope_name, inputs, layers, weights, names): input_channels, weights=keras_weights, output_dim=output_channels, name=tf_name ) - layers[scope_name] = dense(layers[inputs[0]]) + layers[scope_name] = dense(layers[inputs[1]]) From afaeb44ec63ccb004edd3d66c9579d17177dd820 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 12:55:52 +0300 Subject: [PATCH 112/180] Minor fixes in tests. --- tests/layers/{constant => constants}/constant.py | 0 tests/layers/convolutions/conv1d.py | 2 +- tests/layers/convolutions/conv2d.py | 2 +- tests/layers/convolutions/convtranspose2d.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename tests/layers/{constant => constants}/constant.py (100%) diff --git a/tests/layers/constant/constant.py b/tests/layers/constants/constant.py similarity index 100% rename from tests/layers/constant/constant.py rename to tests/layers/constants/constant.py diff --git a/tests/layers/convolutions/conv1d.py b/tests/layers/convolutions/conv1d.py index c700dad..a265761 100644 --- a/tests/layers/convolutions/conv1d.py +++ b/tests/layers/convolutions/conv1d.py @@ -53,4 +53,4 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/convolutions/conv2d.py b/tests/layers/convolutions/conv2d.py index f1cb067..8261b82 100644 --- a/tests/layers/convolutions/conv2d.py +++ b/tests/layers/convolutions/conv2d.py @@ -53,4 +53,4 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/convolutions/convtranspose2d.py b/tests/layers/convolutions/convtranspose2d.py index d6ac389..44d52f2 100644 --- a/tests/layers/convolutions/convtranspose2d.py +++ b/tests/layers/convolutions/convtranspose2d.py @@ -49,4 +49,4 @@ def check_error(output, k_model, input_np, epsilon=1e-5): if max_error < error: max_error = error - print('Max error: {0}'.format(max_error)) + print('Max error: {0}'.format(max_error)) From d9c0636d2eb1be2ce069b1a9c1471b1d4d70bb15 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 12:56:04 +0300 Subject: [PATCH 113/180] Add element-wise tests. --- tests/layers/elementwise/add.py | 46 +++++++++++++++++++++++++++++++++ tests/layers/elementwise/div.py | 46 +++++++++++++++++++++++++++++++++ tests/layers/elementwise/mul.py | 46 +++++++++++++++++++++++++++++++++ tests/layers/elementwise/sub.py | 46 +++++++++++++++++++++++++++++++++ 4 files changed, 184 insertions(+) create mode 100644 tests/layers/elementwise/add.py create mode 100644 tests/layers/elementwise/div.py create mode 100644 tests/layers/elementwise/mul.py create mode 100644 tests/layers/elementwise/sub.py diff --git a/tests/layers/elementwise/add.py b/tests/layers/elementwise/add.py new file mode 100644 index 0000000..be943fa --- /dev/null +++ b/tests/layers/elementwise/add.py @@ -0,0 +1,46 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + def forward(self, x, y): + x = x + y + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + + for i in range(10): + model = FTest() + model.eval() + + input_np1 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_np2 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var1 = Variable(torch.FloatTensor(input_np1)) + input_var2 = Variable(torch.FloatTensor(input_np2)) + output = model(input_var1, input_var2) + + k_model = pytorch_to_keras(model, [input_var1, input_var2], [(3, 224, 224,), (3, 224, 224,)], verbose=True) + + error = check_error(output, k_model, [input_np1, input_np2]) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/elementwise/div.py b/tests/layers/elementwise/div.py new file mode 100644 index 0000000..55c5f36 --- /dev/null +++ b/tests/layers/elementwise/div.py @@ -0,0 +1,46 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + def forward(self, x, y): + x = x / y + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + + for i in range(10): + model = FTest() + model.eval() + + input_np1 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_np2 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var1 = Variable(torch.FloatTensor(input_np1)) + input_var2 = Variable(torch.FloatTensor(input_np2)) + output = model(input_var1, input_var2) + + k_model = pytorch_to_keras(model, [input_var1, input_var2], [(3, 224, 224,), (3, 224, 224,)], verbose=True) + + error = check_error(output, k_model, [input_np1, input_np2]) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/elementwise/mul.py b/tests/layers/elementwise/mul.py new file mode 100644 index 0000000..4194ef4 --- /dev/null +++ b/tests/layers/elementwise/mul.py @@ -0,0 +1,46 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + def forward(self, x, y): + x = x * y + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + + for i in range(10): + model = FTest() + model.eval() + + input_np1 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_np2 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var1 = Variable(torch.FloatTensor(input_np1)) + input_var2 = Variable(torch.FloatTensor(input_np2)) + output = model(input_var1, input_var2) + + k_model = pytorch_to_keras(model, [input_var1, input_var2], [(3, 224, 224,), (3, 224, 224,)], verbose=True) + + error = check_error(output, k_model, [input_np1, input_np2]) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/elementwise/sub.py b/tests/layers/elementwise/sub.py new file mode 100644 index 0000000..b046961 --- /dev/null +++ b/tests/layers/elementwise/sub.py @@ -0,0 +1,46 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + def forward(self, x, y): + x = x - y + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + + for i in range(10): + model = FTest() + model.eval() + + input_np1 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_np2 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var1 = Variable(torch.FloatTensor(input_np1)) + input_var2 = Variable(torch.FloatTensor(input_np2)) + output = model(input_var1, input_var2) + + k_model = pytorch_to_keras(model, [input_var1, input_var2], [(3, 224, 224,), (3, 224, 224,)], verbose=True) + + error = check_error(output, k_model, [input_np1, input_np2]) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From a1b07b7b62d2237a236842a5fc56efd9479df929 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 12:56:21 +0300 Subject: [PATCH 114/180] Add pooling tests. --- tests/layers/poolings/avgpool2d.py | 51 +++++++++++++++++++++++ tests/layers/poolings/global_avgpool2d.py | 46 ++++++++++++++++++++ tests/layers/poolings/global_maxpool2d.py | 46 ++++++++++++++++++++ tests/layers/poolings/maxpool2d.py | 51 +++++++++++++++++++++++ 4 files changed, 194 insertions(+) create mode 100644 tests/layers/poolings/avgpool2d.py create mode 100644 tests/layers/poolings/global_avgpool2d.py create mode 100644 tests/layers/poolings/global_maxpool2d.py create mode 100644 tests/layers/poolings/maxpool2d.py diff --git a/tests/layers/poolings/avgpool2d.py b/tests/layers/poolings/avgpool2d.py new file mode 100644 index 0000000..a750096 --- /dev/null +++ b/tests/layers/poolings/avgpool2d.py @@ -0,0 +1,51 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, kernel_size=3, padding=1, stride=1): + super(LayerTest, self).__init__() + self.pool = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) + + def forward(self, x): + x = self.pool(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for kernel_size in [1, 3, 5, 7]: + for padding in [0, 1, 3]: + for stride in [1, 2, 3, 4]: + # RuntimeError: invalid argument 2: pad should be smaller than half of kernel size, but got padW = 1, padH = 1, kW = 1, + if padding > kernel_size / 2: + continue + + model = LayerTest(kernel_size=kernel_size, padding=padding, stride=stride) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/poolings/global_avgpool2d.py b/tests/layers/poolings/global_avgpool2d.py new file mode 100644 index 0000000..954c6c2 --- /dev/null +++ b/tests/layers/poolings/global_avgpool2d.py @@ -0,0 +1,46 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self): + super(LayerTest, self).__init__() + self.pool = nn.AdaptiveAvgPool2d((1,1)) + + def forward(self, x): + x = self.pool(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + + model = LayerTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/poolings/global_maxpool2d.py b/tests/layers/poolings/global_maxpool2d.py new file mode 100644 index 0000000..9a5bf9f --- /dev/null +++ b/tests/layers/poolings/global_maxpool2d.py @@ -0,0 +1,46 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self): + super(LayerTest, self).__init__() + self.pool = nn.AdaptiveMaxPool2d((1,1)) + + def forward(self, x): + x = self.pool(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + + model = LayerTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/poolings/maxpool2d.py b/tests/layers/poolings/maxpool2d.py new file mode 100644 index 0000000..9bb24ca --- /dev/null +++ b/tests/layers/poolings/maxpool2d.py @@ -0,0 +1,51 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, kernel_size=3, padding=1, stride=1): + super(LayerTest, self).__init__() + self.pool = nn.MaxPool2d(kernel_size=kernel_size, padding=padding, stride=stride) + + def forward(self, x): + x = self.pool(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for kernel_size in [1, 3, 5, 7]: + for padding in [0, 1, 3]: + for stride in [1, 2, 3, 4]: + # RuntimeError: invalid argument 2: pad should be smaller than half of kernel size, but got padW = 1, padH = 1, kW = 1, + if padding > kernel_size / 2: + continue + + model = LayerTest(kernel_size=kernel_size, padding=padding, stride=stride) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 610efc1b1217434258c19b906e33d9edb4ac9366 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 12:56:33 +0300 Subject: [PATCH 115/180] Add embedding test. --- tests/layers/embeddings/embedding.py | 49 ++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/layers/embeddings/embedding.py diff --git a/tests/layers/embeddings/embedding.py b/tests/layers/embeddings/embedding.py new file mode 100644 index 0000000..5201e87 --- /dev/null +++ b/tests/layers/embeddings/embedding.py @@ -0,0 +1,49 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, input_size, embedd_size): + super(LayerTest, self).__init__() + self.embedd = nn.Embedding(input_size, embedd_size) + + def forward(self, x): + x = self.embedd(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + + for i in range(10): + emb_size = np.random.randint(10, 1000) + inp_size = np.random.randint(10, 1000) + + model = LayerTest(inp_size, emb_size) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 1, inp_size)) + input_var = Variable(torch.LongTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, [(1, inp_size)], verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 0a4b2127effd9edade5f8ccdb7af8423b69d0a81 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 13:08:33 +0300 Subject: [PATCH 116/180] Add test for linear. --- tests/layers/linears/linear.py | 46 ++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/layers/linears/linear.py diff --git a/tests/layers/linears/linear.py b/tests/layers/linears/linear.py new file mode 100644 index 0000000..71e3c8f --- /dev/null +++ b/tests/layers/linears/linear.py @@ -0,0 +1,46 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, inp, out, bias=False): + super(LayerTest, self).__init__() + self.fc = nn.Linear(inp, out, bias=bias) + + def forward(self, x): + x = self.fc(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for bias in [True, False]: + ins = np.random.choice([1, 3, 7]) + model = LayerTest(ins, np.random.choice([1, 3, 7]), bias=bias) + model.eval() + + input_np = np.random.uniform(0, 1, (1, ins)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + print(output.size()) + k_model = pytorch_to_keras(model, input_var, (ins,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From d405abca5c6459948010f5834a50fa85835d2549 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 13:09:00 +0300 Subject: [PATCH 117/180] Fix transpose for linear layer. --- pytorch2keras/reshape_layers.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pytorch2keras/reshape_layers.py b/pytorch2keras/reshape_layers.py index 558a29f..c5caf48 100644 --- a/pytorch2keras/reshape_layers.py +++ b/pytorch2keras/reshape_layers.py @@ -47,12 +47,11 @@ def convert_transpose(params, w_name, scope_name, inputs, layers, weights, names """ print('Converting transpose ...') if params['perm'][0] != 0: - # raise AssertionError('Cannot permute batch dimension') - print('!!! Cannot permute batch dimension. Result may be wrong !!!') - # try: - layers[scope_name] = layers[inputs[0]] - # except: - # pass + if inputs[0] in layers: + print('!!! Cannot permute batch dimension. Result may be wrong !!!') + layers[scope_name] = layers[inputs[0]] + else: + print('Skip weight matrix transpose, result may be wrong.') else: if names: tf_name = 'PERM' + random_string(4) @@ -95,9 +94,11 @@ def target_layer(x, shape=layers[inputs[1]]): # layers[scope_name] = reshape(layers[inputs[0]]) else: - reshape = keras.layers.Reshape(params['shape'][1:], name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) - + if inputs[0] in layers: + reshape = keras.layers.Reshape(params['shape'][1:], name=tf_name) + layers[scope_name] = reshape(layers[inputs[0]]) + else: + print('Skip weight matrix transpose, but result may be wrong.') def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, names): """ From 78f101ae62143dd4c4b2f9ec0579bc81559a2b15 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 13:18:09 +0300 Subject: [PATCH 118/180] Add normalization tests. --- tests/layers/normalizations/bn2d.py | 48 ++++++++++++++++++++++++++++ tests/layers/normalizations/do.py | 49 +++++++++++++++++++++++++++++ tests/layers/normalizations/in2d.py | 48 ++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+) create mode 100644 tests/layers/normalizations/bn2d.py create mode 100644 tests/layers/normalizations/do.py create mode 100644 tests/layers/normalizations/in2d.py diff --git a/tests/layers/normalizations/bn2d.py b/tests/layers/normalizations/bn2d.py new file mode 100644 index 0000000..7653f00 --- /dev/null +++ b/tests/layers/normalizations/bn2d.py @@ -0,0 +1,48 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras +import random + + +class LayerTest(nn.Module): + def __init__(self, out, eps, momentum): + super(LayerTest, self).__init__() + self.bn = nn.BatchNorm2d(out, eps=eps, momentum=momentum) + + def forward(self, x): + x = self.bn(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + inp_size = np.random.randint(10, 100) + + model = LayerTest(inp_size, random.random(), random.random()) + model.eval() + + input_np = np.random.uniform(0, 1, (1, inp_size, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp_size, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/normalizations/do.py b/tests/layers/normalizations/do.py new file mode 100644 index 0000000..81a1bfb --- /dev/null +++ b/tests/layers/normalizations/do.py @@ -0,0 +1,49 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras +import random + + +class LayerTest(nn.Module): + def __init__(self, p): + super(LayerTest, self).__init__() + self.do = nn.Dropout2d(p=p) + + def forward(self, x): + x = x + 0 # To keep the graph + x = self.do(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + inp_size = np.random.randint(10, 100) + + model = LayerTest(random.random()) + model.eval() + + input_np = np.random.uniform(0, 1, (1, inp_size, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp_size, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/normalizations/in2d.py b/tests/layers/normalizations/in2d.py new file mode 100644 index 0000000..3919626 --- /dev/null +++ b/tests/layers/normalizations/in2d.py @@ -0,0 +1,48 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras +import random + + +class LayerTest(nn.Module): + def __init__(self, out, eps, momentum): + super(LayerTest, self).__init__() + self.in2d = nn.InstanceNorm2d(out, eps=eps, momentum=momentum) + + def forward(self, x): + x = self.in2d(x) + return x + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + inp_size = np.random.randint(10, 100) + + model = LayerTest(inp_size, random.random(), random.random()) + model.eval() + + input_np = np.random.uniform(0, 1, (1, inp_size, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (inp_size, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 27e845ffe7bc23bbdeaa6a5c2b708ddf51254135 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 13:18:20 +0300 Subject: [PATCH 119/180] Update readme. --- README.md | 64 +++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index db1f6a5..f8152dc 100644 --- a/README.md +++ b/README.md @@ -204,42 +204,42 @@ Options: ## Supported layers -Layers: - -* Linear (Dense) -* Conv2d (groups and dilations are also supported) -* DepthwiseConv2d -* Conv3d -* ConvTranspose2d -* MaxPool2d -* MaxPool3d -* AvgPool2d -* Global average pooling (as special case of AdaptiveAvgPool2d) -* Embedding -* UpsamplingNearest2d -* BatchNorm2d -* InstanceNorm2d - -Reshape: +* Activations: + + ReLU + + LeakyReLU + + SELU + + Sigmoid + + Softmax + + Tanh + + HardTanh + +* Constants + +* Convolutions: + + Conv1d + + Conv2d + + ConvTrsnpose2d + +* Element-wise: + + Add + + Mul + + Sub + + Div -* View -* Reshape -* Transpose - -Activations: +* Embedding -* ReLU -* LeakyReLU -* Tanh -* HardTanh (clamp) -* Softmax -* Sigmoid +* Linear -Element-wise: +* Normalizations: + + BatchNorm2d + + InstanceNorm2d + + Dropout -* Addition -* Multiplication -* Subtraction +* Poolings: + + MaxPool2d + + AvgPool2d + + Global MaxPool2d (adaptive pooling to shape [1, 1]) + + Global AvgPool2d (adaptive pooling to shape [1, 1]) ## Models converted with pytorch2keras From ec022b4fe4c04952a6850b43f5e0c2dffc308cde Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 13:21:20 +0300 Subject: [PATCH 120/180] Clean tests directory. --- README.md | 5 ++ tests/layers/avg_pool.py | 45 ---------------- tests/layers/bn.py | 48 ----------------- tests/layers/channel_shuffle.py | 60 ---------------------- tests/layers/concat_many.py | 50 ------------------ tests/layers/const.py | 43 ---------------- tests/layers/conv2d.py | 44 ---------------- tests/layers/conv2d_channels_last.py | 44 ---------------- tests/layers/conv2d_dilation.py | 45 ---------------- tests/layers/conv3d.py | 46 ----------------- tests/layers/convtranspose2d.py | 77 ---------------------------- tests/layers/dense.py | 43 ---------------- tests/layers/depthwise_conv2d.py | 63 ----------------------- tests/layers/droupout.py | 47 ----------------- tests/layers/embedding.py | 36 ------------- tests/layers/group_conv2d.py | 56 -------------------- tests/layers/instance_norm.py | 51 ------------------ tests/layers/lrelu.py | 44 ---------------- tests/layers/max_pool.py | 46 ----------------- tests/layers/max_pool3d.py | 46 ----------------- tests/layers/minputs.py | 60 ---------------------- tests/layers/mul.py | 46 ----------------- tests/layers/multiple_inputs.py | 50 ------------------ tests/layers/relu.py | 44 ---------------- tests/layers/sigmoid.py | 44 ---------------- tests/layers/slice.py | 44 ---------------- tests/layers/softmax.py | 44 ---------------- tests/layers/sub.py | 46 ----------------- tests/layers/sum.py | 36 ------------- tests/layers/tanh.py | 44 ---------------- tests/layers/transpose.py | 46 ----------------- tests/layers/view.py | 48 ----------------- 32 files changed, 5 insertions(+), 1486 deletions(-) delete mode 100644 tests/layers/avg_pool.py delete mode 100644 tests/layers/bn.py delete mode 100644 tests/layers/channel_shuffle.py delete mode 100644 tests/layers/concat_many.py delete mode 100644 tests/layers/const.py delete mode 100644 tests/layers/conv2d.py delete mode 100644 tests/layers/conv2d_channels_last.py delete mode 100644 tests/layers/conv2d_dilation.py delete mode 100644 tests/layers/conv3d.py delete mode 100644 tests/layers/convtranspose2d.py delete mode 100644 tests/layers/dense.py delete mode 100644 tests/layers/depthwise_conv2d.py delete mode 100644 tests/layers/droupout.py delete mode 100644 tests/layers/embedding.py delete mode 100644 tests/layers/group_conv2d.py delete mode 100644 tests/layers/instance_norm.py delete mode 100644 tests/layers/lrelu.py delete mode 100644 tests/layers/max_pool.py delete mode 100644 tests/layers/max_pool3d.py delete mode 100644 tests/layers/minputs.py delete mode 100644 tests/layers/mul.py delete mode 100644 tests/layers/multiple_inputs.py delete mode 100644 tests/layers/relu.py delete mode 100644 tests/layers/sigmoid.py delete mode 100644 tests/layers/slice.py delete mode 100644 tests/layers/softmax.py delete mode 100644 tests/layers/sub.py delete mode 100644 tests/layers/sum.py delete mode 100644 tests/layers/tanh.py delete mode 100644 tests/layers/transpose.py delete mode 100644 tests/layers/view.py diff --git a/README.md b/README.md index f8152dc..ec7db1e 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,11 @@ Options: + Global MaxPool2d (adaptive pooling to shape [1, 1]) + Global AvgPool2d (adaptive pooling to shape [1, 1]) +* Not tested yet: + + Upsampling + + Padding + + Reshape + ## Models converted with pytorch2keras diff --git a/tests/layers/avg_pool.py b/tests/layers/avg_pool.py deleted file mode 100644 index d18a6b3..0000000 --- a/tests/layers/avg_pool.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class AvgPool(nn.Module): - """Module for AveragePool conversion testing - """ - - def __init__(self, stride=3, padding=0, kernel_size=3): - super(AvgPool, self).__init__() - self.pool = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) - - def forward(self, x): - x = self.pool(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(2, 7) - stride = np.random.randint(1, kernel_size) - padding = np.random.randint(1, kernel_size/2 + 1) - inp = np.random.randint(kernel_size + 1, 100) - - model = AvgPool(kernel_size=kernel_size, padding=padding, stride=stride) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=False, names='keep') - print(k_model.summary()) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/bn.py b/tests/layers/bn.py deleted file mode 100644 index 02d9274..0000000 --- a/tests/layers/bn.py +++ /dev/null @@ -1,48 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestConv2d(nn.Module): - """Module for BatchNorm2d conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestConv2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - self.bn = nn.BatchNorm2d(out) - - def forward(self, x): - x = self.conv2d(x) - x = self.bn(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestConv2d(inp, out, kernel_size, inp % 2) - for m in model.modules(): - m.training = False - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/channel_shuffle.py b/tests/layers/channel_shuffle.py deleted file mode 100644 index 05f9c16..0000000 --- a/tests/layers/channel_shuffle.py +++ /dev/null @@ -1,60 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -def channel_shuffle(x, groups): - """Channel Shuffle operation from ShuffleNet [arxiv: 1707.01083] - Arguments: - x (Tensor): tensor to shuffle. - groups (int): groups to be split - """ - batch, channels, height, width = x.size() - channels_per_group = channels // groups - x = x.view(batch, groups, channels_per_group, height, width) - x = torch.transpose(x, 1, 2).contiguous() - x = x.view(batch, channels, height, width) - return x - - -class TestChannelShuffle2d(nn.Module): - """Module for Channel shuffle conversion testing - """ - - def __init__(self, inp=10, out=16, groups=32): - super(TestChannelShuffle2d, self).__init__() - self.groups = groups - self.conv2d = nn.Conv2d(inp, out, kernel_size=3, bias=False) - - def forward(self, x): - x = self.conv2d(x) - x = channel_shuffle(x, self.groups) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - groups = np.random.randint(1, 32) - inp = np.random.randint(3, 32) - out = np.random.randint(3, 32) * groups - - model = TestChannelShuffle2d(inp, out, groups) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/concat_many.py b/tests/layers/concat_many.py deleted file mode 100644 index 64f7990..0000000 --- a/tests/layers/concat_many.py +++ /dev/null @@ -1,50 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestConcatMany(nn.Module): - """Module for Concatenation (2 or many layers) testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestConcatMany, self).__init__() - self.conv2_1 = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - self.conv2_2 = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - self.conv2_3 = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - - def forward(self, x): - x = torch.cat([ - self.conv2_1(x), - self.conv2_2(x), - self.conv2_3(x) - ], dim=1) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestConcatMany(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/const.py b/tests/layers/const.py deleted file mode 100644 index 0b16a8a..0000000 --- a/tests/layers/const.py +++ /dev/null @@ -1,43 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestConst(nn.Module): - """Module for Const conversion testing - """ - - def __init__(self, inp=10, out=16, bias=True): - super(TestConst, self).__init__() - self.linear = nn.Linear(inp, out, bias=False) - - def forward(self, x): - x = self.linear(x) * 2.0 - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - inp = np.random.randint(1, 100) - out = np.random.randint(1, 100) - model = TestConst(inp, out, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/conv2d.py b/tests/layers/conv2d.py deleted file mode 100644 index 4aa3d59..0000000 --- a/tests/layers/conv2d.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestConv2d(nn.Module): - """Module for Conv2d conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestConv2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - - def forward(self, x): - x = self.conv2d(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestConv2d(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/conv2d_channels_last.py b/tests/layers/conv2d_channels_last.py deleted file mode 100644 index a3d59ea..0000000 --- a/tests/layers/conv2d_channels_last.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestConv2d(nn.Module): - """Module for Conv2d conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestConv2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=bias) - - def forward(self, x): - x = self.conv2d(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 10) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 2) - - model = TestConv2d(inp + 2, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp + 2, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp + 2, inp, inp,), change_ordering=True, verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np.transpose(0, 2, 3, 1)) - - error = np.max(pytorch_output - keras_output.transpose(0, 3, 1, 2)) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/conv2d_dilation.py b/tests/layers/conv2d_dilation.py deleted file mode 100644 index 10d2f6c..0000000 --- a/tests/layers/conv2d_dilation.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestConv2d(nn.Module): - """Module for Conv2d conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, dilation=1, bias=True): - super(TestConv2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias, dilation=dilation) - - def forward(self, x): - x = self.conv2d(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - dilation = np.random.randint(1, kernel_size + 1) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestConv2d(inp, out, kernel_size, dilation, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/conv3d.py b/tests/layers/conv3d.py deleted file mode 100644 index 027b18c..0000000 --- a/tests/layers/conv3d.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestConv3d(nn.Module): - """Module for Conv2d conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestConv3d, self).__init__() - self.conv3d = nn.Conv3d(inp, out, kernel_size=kernel_size, bias=bias) - - def forward(self, x): - x = self.conv3d(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 30) - out = np.random.randint(1, 30) - - model = TestConv3d(inp, out, kernel_size, inp % 2) - - input_var = Variable(torch.randn(1, inp, inp, inp, inp)) - - output = model(input_var) - - k_model = pytorch_to_keras(model, - input_var, - (inp, inp, inp, inp,), - verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_var.numpy()) - error = np.max(pytorch_output - keras_output) - print("iteration: {}, error: {}".format(i, error)) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/convtranspose2d.py b/tests/layers/convtranspose2d.py deleted file mode 100644 index b9ddfa2..0000000 --- a/tests/layers/convtranspose2d.py +++ /dev/null @@ -1,77 +0,0 @@ -import unittest -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestConvTranspose2d(nn.Module): - """Module for ConvTranspose2d conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, stride=1, bias=True, padding=0): - super(TestConvTranspose2d, self).__init__() - self.conv2d = nn.ConvTranspose2d(inp, out, kernel_size=kernel_size, bias=bias, stride=stride, padding=padding) - - def forward(self, x): - x = self.conv2d(x) - return x - - -class ConvTranspose2dTest(unittest.TestCase): - N = 100 - - def test(self): - max_error = 0 - for i in range(self.N): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestConvTranspose2d(inp, out, kernel_size, 2, inp % 3) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) - - def test_with_padding(self): - max_error = 0 - for i in range(self.N): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestConvTranspose2d(inp, out, kernel_size, 2, inp % 3, padding=1) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/layers/dense.py b/tests/layers/dense.py deleted file mode 100644 index cd2dcbf..0000000 --- a/tests/layers/dense.py +++ /dev/null @@ -1,43 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestDense(nn.Module): - """Module for Dense conversion testing - """ - - def __init__(self, inp=10, out=16, bias=True): - super(TestDense, self).__init__() - self.linear = nn.Linear(inp, out, bias=False) - - def forward(self, x): - x = self.linear(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - inp = np.random.randint(1, 100) - out = np.random.randint(1, 100) - model = TestDense(inp, out, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/depthwise_conv2d.py b/tests/layers/depthwise_conv2d.py deleted file mode 100644 index ed00cba..0000000 --- a/tests/layers/depthwise_conv2d.py +++ /dev/null @@ -1,63 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -def depthwise_conv3x3(channels, - stride): - return nn.Conv2d( - in_channels=channels, - out_channels=channels, - kernel_size=3, - stride=stride, - padding=1, - groups=channels, - bias=False) - - -class TestConv2d(nn.Module): - """Module for Conv2d conversion testing - """ - - def __init__(self, inp=10, stride=1): - super(TestConv2d, self).__init__() - self.conv2d_dw = depthwise_conv3x3(inp, stride) - - def forward(self, x): - x = self.conv2d_dw(x) - return x - - -def check_error(output, k_model, input_np, epsilon=1e-5): - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print('Error:', error) - - assert error < epsilon - return error - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - stride = np.random.randint(1, 3) - - model = TestConv2d(inp, stride) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - error = check_error(output, k_model, input_np) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/droupout.py b/tests/layers/droupout.py deleted file mode 100644 index d722fa1..0000000 --- a/tests/layers/droupout.py +++ /dev/null @@ -1,47 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestDropout(nn.Module): - """Module for Dropout conversion testing - """ - - def __init__(self, inp=10, out=16, p=0.5, bias=True): - super(TestDropout, self).__init__() - self.linear = nn.Linear(inp, out, bias=bias) - self.dropout = nn.Dropout(p=p) - - def forward(self, x): - x = self.linear(x) - x = self.dropout(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - inp = np.random.randint(1, 100) - out = np.random.randint(1, 100) - p = np.random.uniform(0, 1) - model = TestDropout(inp, out, inp % 2, p) - model.eval() - - input_np = np.random.uniform(-1.0, 1.0, (1, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp,), verbose=True) - - keras_output = k_model.predict(input_np) - - pytorch_output = output.data.numpy() - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - # not implemented yet diff --git a/tests/layers/embedding.py b/tests/layers/embedding.py deleted file mode 100644 index bafbb53..0000000 --- a/tests/layers/embedding.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestEmbedding(nn.Module): - def __init__(self, input_size): - super(TestEmbedding, self).__init__() - self.embedd = nn.Embedding(input_size, 100) - - def forward(self, input): - return self.embedd(input) - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - input_np = np.random.randint(0, 10, (1, 1, 4)) - input = Variable(torch.LongTensor(input_np)) - - simple_net = TestEmbedding(1000) - output = simple_net(input) - - k_model = pytorch_to_keras(simple_net, input, (1, 4), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output[0]) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/group_conv2d.py b/tests/layers/group_conv2d.py deleted file mode 100644 index 108610c..0000000 --- a/tests/layers/group_conv2d.py +++ /dev/null @@ -1,56 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -def group_conv1x1(in_channels, - out_channels, - groups): - return nn.Conv2d( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - padding=1, - groups=groups, - bias=False) - - -class TestGroupConv2d(nn.Module): - """Module for Conv2d conversion testing - """ - - def __init__(self, inp=10, groups=1): - super(TestGroupConv2d, self).__init__() - self.conv2d_group = group_conv1x1(inp, inp, groups) - - def forward(self, x): - x = self.conv2d_group(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - groups = np.random.randint(1, 10) - inp = np.random.randint(kernel_size + 1, 10) * groups - h, w = 32, 32 - model = TestGroupConv2d(inp, groups) - - input_np = np.random.uniform(0, 1, (1, inp, h, w)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, h, w,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/instance_norm.py b/tests/layers/instance_norm.py deleted file mode 100644 index 1c1bd27..0000000 --- a/tests/layers/instance_norm.py +++ /dev/null @@ -1,51 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestInstanceNorm2d(nn.Module): - """Module for InstanceNorm2d conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestInstanceNorm2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - self.bn = nn.InstanceNorm2d(out, affine=True) - self.bn.weight = torch.nn.Parameter(torch.FloatTensor(self.bn.weight.size()).uniform_(0, 1)) - self.bn.bias = torch.nn.Parameter(torch.FloatTensor(self.bn.bias.size()).uniform_(2, 3)) - - def forward(self, x): - x = self.conv2d(x) - x = self.bn(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestInstanceNorm2d(inp, out, kernel_size, inp % 2) - model.eval() - for m in model.modules(): - m.training = False - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/lrelu.py b/tests/layers/lrelu.py deleted file mode 100644 index c0aa4f4..0000000 --- a/tests/layers/lrelu.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestLeakyRelu(nn.Module): - """Module for LeakyReLu conversion testing - """ - - def __init__(self, inp=10, out=16, bias=True): - super(TestLeakyRelu, self).__init__() - self.linear = nn.Linear(inp, out, bias=True) - self.relu = nn.LeakyReLU(inplace=True) - - def forward(self, x): - x = self.linear(x) - x = self.relu(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - inp = np.random.randint(1, 100) - out = np.random.randint(1, 100) - model = TestLeakyRelu(inp, out, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/max_pool.py b/tests/layers/max_pool.py deleted file mode 100644 index 288e8ab..0000000 --- a/tests/layers/max_pool.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class MaxPool(nn.Module): - """Module for MaxPool conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(MaxPool, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - self.pool = nn.MaxPool2d(kernel_size=3, padding=1) - - def forward(self, x): - x = self.conv2d(x) - x = self.pool(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = MaxPool(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/max_pool3d.py b/tests/layers/max_pool3d.py deleted file mode 100644 index 53d4cf1..0000000 --- a/tests/layers/max_pool3d.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class MaxPool(nn.Module): - """Module for MaxPool conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(MaxPool, self).__init__() - self.conv3d = nn.Conv3d(inp, out, kernel_size=kernel_size, bias=bias) - self.pool3d = nn.MaxPool3d(kernel_size=3, padding=1) - - def forward(self, x): - x = self.conv3d(x) - x = self.pool3d(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 30) - out = np.random.randint(1, 30) - - model = MaxPool(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/minputs.py b/tests/layers/minputs.py deleted file mode 100644 index 8d88379..0000000 --- a/tests/layers/minputs.py +++ /dev/null @@ -1,60 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestMultipleInputs(nn.Module): - """Module for multiple inputs conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestMultipleInputs, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - self.deconv2d = nn.ConvTranspose2d(inp, out, kernel_size=kernel_size, bias=bias) - self.in2d = nn.InstanceNorm2d(out) - - def forward(self, x, y, z): - return self.in2d(self.deconv2d(x)) + self.in2d(self.deconv2d(y)) + self.in2d(self.deconv2d(z)) - - -def check_error(output, k_model, input_np, epsilon=1e-5): - pytorch_output = output.data.numpy() - keras_output = k_model.predict([input_np, input_np, input_np]) - - error = np.max(pytorch_output - keras_output) - print('Error:', error) - - assert error < epsilon - return error - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestMultipleInputs(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - input_var2 = Variable(torch.FloatTensor(input_np)) - input_var3 = Variable(torch.FloatTensor(input_np)) - - output = model(input_var, input_var2, input_var3) - - k_model = pytorch_to_keras( - model, - [input_var, input_var2, input_var3], - [(inp, inp, inp,), (inp, inp, inp,), (inp, inp, inp,)], - verbose=True - ) - - error = check_error(output, k_model, input_np) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/mul.py b/tests/layers/mul.py deleted file mode 100644 index ca2b0f2..0000000 --- a/tests/layers/mul.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestMul(nn.Module): - """Module for Element-wise multiplication conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestMul, self).__init__() - self.conv2d_1 = nn.Conv2d(inp, out, stride=(inp % 3 + 1), kernel_size=kernel_size, bias=bias) - self.conv2d_2 = nn.Conv2d(inp, out, stride=(inp % 3 + 1), kernel_size=kernel_size, bias=bias) - - def forward(self, x): - x1 = self.conv2d_1(x) - x2 = self.conv2d_2(x) - return (x1 * x2).sum() - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestMul(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/multiple_inputs.py b/tests/layers/multiple_inputs.py deleted file mode 100644 index 196dc47..0000000 --- a/tests/layers/multiple_inputs.py +++ /dev/null @@ -1,50 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestMultipleInputs(nn.Module): - """Module for Conv2d conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestMultipleInputs, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - - def forward(self, x, y, z): - return self.conv2d(x) + self.conv2d(y) + self.conv2d(z) - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestMultipleInputs(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - input_var2 = Variable(torch.FloatTensor(input_np)) - input_var3 = Variable(torch.FloatTensor(input_np)) - output = model(input_var, input_var2, input_var3) - - k_model = pytorch_to_keras( - model, - [input_var, input_var2, input_var3], - [(inp, inp, inp,), (inp, inp, inp,), (inp, inp, inp,)], - verbose=True - ) - k_model.summary() - pytorch_output = output.data.numpy() - keras_output = k_model.predict([input_np, input_np, input_np]) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/relu.py b/tests/layers/relu.py deleted file mode 100644 index 69ad413..0000000 --- a/tests/layers/relu.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestRelu(nn.Module): - """Module for ReLu conversion testing - """ - - def __init__(self, inp=10, out=16, bias=True): - super(TestRelu, self).__init__() - self.linear = nn.Linear(inp, out, bias=True) - self.relu = nn.ReLU(inplace=True) - - def forward(self, x): - x = self.linear(x) - x = self.relu(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - inp = np.random.randint(1, 100) - out = np.random.randint(1, 100) - model = TestRelu(inp, out, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/sigmoid.py b/tests/layers/sigmoid.py deleted file mode 100644 index 4ed247c..0000000 --- a/tests/layers/sigmoid.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestSigmoid(nn.Module): - """Module for Sigmoid activation conversion testing - """ - - def __init__(self, inp=10, out=16, bias=True): - super(TestSigmoid, self).__init__() - self.linear = nn.Linear(inp, out, bias=True) - self.sigmoid = nn.Sigmoid() - - def forward(self, x): - x = self.linear(x) - x = self.sigmoid(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - inp = np.random.randint(1, 100) - out = np.random.randint(1, 100) - model = TestSigmoid(inp, out, inp % 2) - - input_np = np.random.uniform(-1.0, 1.0, (1, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/slice.py b/tests/layers/slice.py deleted file mode 100644 index 1dbcfe8..0000000 --- a/tests/layers/slice.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestSlice(nn.Module): - """Module for Slicings conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestSlice, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - - def forward(self, x): - x = self.conv2d(x) - return x[:, 0, :, :] - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestSlice(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/softmax.py b/tests/layers/softmax.py deleted file mode 100644 index 934828a..0000000 --- a/tests/layers/softmax.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestSoftmax(nn.Module): - """Module for Softmax activation conversion testing - """ - - def __init__(self, inp=10, out=16, bias=True): - super(TestSoftmax, self).__init__() - self.linear = nn.Linear(inp, out, bias=True) - self.softmax = nn.Softmax() - - def forward(self, x): - x = self.linear(x) - x = self.softmax(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - inp = np.random.randint(1, 100) - out = np.random.randint(1, 100) - model = TestSoftmax(inp, out, inp % 2) - - input_np = np.random.uniform(-1.0, 1.0, (1, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp,), verbose=True, change_ordering=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/sub.py b/tests/layers/sub.py deleted file mode 100644 index 5baf91a..0000000 --- a/tests/layers/sub.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestSub(nn.Module): - """Module for Element-wise subtaction conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestSub, self).__init__() - self.conv2d_1 = nn.Conv2d(inp, out, stride=(inp % 3 + 1), kernel_size=kernel_size, bias=bias) - self.conv2d_2 = nn.Conv2d(inp, out, stride=(inp % 3 + 1), kernel_size=kernel_size, bias=bias) - - def forward(self, x): - x1 = self.conv2d_1(x) - x2 = self.conv2d_2(x) - return x1 - x2 - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestSub(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/sum.py b/tests/layers/sum.py deleted file mode 100644 index 0b5208c..0000000 --- a/tests/layers/sum.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestSum(nn.Module): - def __init__(self, input_size): - super(TestSum, self).__init__() - self.embedd = nn.Embedding(input_size, 100) - - def forward(self, input): - return self.embedd(input).sum(dim=0) - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - input_np = np.random.randint(0, 10, (1, 1, 4)) - input = Variable(torch.LongTensor(input_np)) - - simple_net = TestSum(1000) - output = simple_net(input) - - k_model = pytorch_to_keras(simple_net, input, (1, 4), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output[0]) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/tanh.py b/tests/layers/tanh.py deleted file mode 100644 index a746d29..0000000 --- a/tests/layers/tanh.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestTanh(nn.Module): - """Module for Tanh activation conversion testing - """ - - def __init__(self, inp=10, out=16, bias=True): - super(TestTanh, self).__init__() - self.linear = nn.Linear(inp, out, bias=True) - self.tanh = nn.Tanh() - - def forward(self, x): - x = self.linear(x) - x = self.tanh(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - inp = np.random.randint(1, 100) - out = np.random.randint(1, 100) - model = TestTanh(inp, out, inp % 2) - - input_np = np.random.uniform(-1.0, 1.0, (1, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/transpose.py b/tests/layers/transpose.py deleted file mode 100644 index b02e32a..0000000 --- a/tests/layers/transpose.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestTranspose(nn.Module): - """Module for Transpose conversion testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3, bias=True): - super(TestTranspose, self).__init__() - self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) - - def forward(self, x): - x = self.conv2d(x) - x = torch.transpose(x, 2, 3) - x = torch.nn.Tanh()(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = np.random.randint(1, 7) - inp = np.random.randint(kernel_size + 1, 100) - out = np.random.randint(1, 100) - - model = TestTranspose(inp, out, kernel_size, inp % 2) - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - output = model(input_var) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/view.py b/tests/layers/view.py deleted file mode 100644 index 6fe6c9d..0000000 --- a/tests/layers/view.py +++ /dev/null @@ -1,48 +0,0 @@ -import numpy as np - -import torch -import torch.nn as nn -from torch.autograd import Variable -from pytorch2keras.converter import pytorch_to_keras - - -class TestView(nn.Module): - def __init__(self): - super(TestView, self).__init__() - self.conv2d = nn.Conv2d(22, 32, kernel_size=1, bias=True) - self.fc = nn.Linear(15488, 3) - - def forward(self, x): - x = self.conv2d(x) - - print(type(x.size()[0])) - - x = x.view([int(x.size(0)), -1]) - x = self.fc(x) - return x - - -if __name__ == '__main__': - max_error = 0 - for i in range(100): - kernel_size = 1 - inp = 22 - out = 32 - - model = TestView() - - input_np = np.random.uniform(0, 1, (1, inp, inp, inp)) - input_var = Variable(torch.FloatTensor(input_np)) - - k_model = pytorch_to_keras(model, input_var, (inp, inp, inp,), verbose=True) - - output = model(input_var) - pytorch_output = output.data.numpy() - keras_output = k_model.predict(input_np) - - error = np.max(pytorch_output - keras_output) - print(error) - if max_error < error: - max_error = error - - print('Max error: {0}'.format(max_error)) From e4d9c72cc6d2dec3c9a0880ac2b9c55239db37ac Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 21 Dec 2018 13:25:03 +0300 Subject: [PATCH 121/180] Update PyPi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d99969d..6a46818 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.12', + version='0.1.13', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From ede034ca7ea78472d65dfec62ce01f49b86d5bd9 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 23 Dec 2018 21:35:03 +0300 Subject: [PATCH 122/180] Added tests for upsampling. --- pytorch2keras/upsampling_layers.py | 2 +- .../layers/upsamplings/upsampling_bilinear.py | 68 +++++++++++++++++++ .../layers/upsamplings/upsampling_nearest.py | 68 +++++++++++++++++++ 3 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 tests/layers/upsamplings/upsampling_bilinear.py create mode 100644 tests/layers/upsamplings/upsampling_nearest.py diff --git a/pytorch2keras/upsampling_layers.py b/pytorch2keras/upsampling_layers.py index 7a9721f..42fa002 100644 --- a/pytorch2keras/upsampling_layers.py +++ b/pytorch2keras/upsampling_layers.py @@ -44,7 +44,7 @@ def target_layer(x, size=output_size, align_corners=align_corners): def convert_upsample(params, w_name, scope_name, inputs, layers, weights, names): """ - Convert upsample_bilinear2d layer. + Convert nearest upsampling layer. Args: params: dictionary with layer parameters diff --git a/tests/layers/upsamplings/upsampling_bilinear.py b/tests/layers/upsamplings/upsampling_bilinear.py new file mode 100644 index 0000000..b96069b --- /dev/null +++ b/tests/layers/upsamplings/upsampling_bilinear.py @@ -0,0 +1,68 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, scale_factor=2): + super(LayerTest, self).__init__() + self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor) + + def forward(self, x): + x = self.up(x) + return x + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + + def forward(self, x): + from torch.nn import functional as F + return F.upsample_bilinear(x, scale_factor=2) + + +def check_error(output, k_model, input_np, epsilon=1e-4): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for scale_factor in [1, 2, 3, 4]: + model = LayerTest(scale_factor) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(10): + model = FTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) diff --git a/tests/layers/upsamplings/upsampling_nearest.py b/tests/layers/upsamplings/upsampling_nearest.py new file mode 100644 index 0000000..59f780e --- /dev/null +++ b/tests/layers/upsamplings/upsampling_nearest.py @@ -0,0 +1,68 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + + +class LayerTest(nn.Module): + def __init__(self, scale_factor=2): + super(LayerTest, self).__init__() + self.up = nn.UpsamplingNearest2d(scale_factor=scale_factor) + + def forward(self, x): + x = self.up(x) + return x + + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + + def forward(self, x): + from torch.nn import functional as F + return F.upsample_nearest(x, scale_factor=2) + + +def check_error(output, k_model, input_np, epsilon=1e-4): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for scale_factor in [1, 2, 3, 4]: + model = LayerTest(scale_factor) + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + for i in range(10): + model = FTest() + model.eval() + + input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var = Variable(torch.FloatTensor(input_np)) + output = model(input_var) + + k_model = pytorch_to_keras(model, input_var, (3, 224, 224,), verbose=True) + + error = check_error(output, k_model, input_np) + if max_error < error: + max_error = error + + print('Max error: {0}'.format(max_error)) From 40cccf1c395090bad1afd43fa7334390d235f46d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 31 Dec 2018 12:50:21 +0300 Subject: [PATCH 123/180] Add more imports for lambda layers. --- pytorch2keras/reshape_layers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytorch2keras/reshape_layers.py b/pytorch2keras/reshape_layers.py index c5caf48..eb8605b 100644 --- a/pytorch2keras/reshape_layers.py +++ b/pytorch2keras/reshape_layers.py @@ -87,6 +87,7 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, names): print('Cannot deduct batch size! It will be omitted, but result may be wrong.') def target_layer(x, shape=layers[inputs[1]]): + import tensorflow as tf return tf.reshape(x, shape) lambda_layer = keras.layers.Lambda(target_layer) @@ -119,6 +120,7 @@ def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, names): raise AssertionError('Cannot convert squeeze by multiple dimensions') def target_layer(x, axis=int(params['axes'][0])): + import tensorflow as tf return tf.squeeze(x, axis=axis) lambda_layer = keras.layers.Lambda(target_layer) @@ -171,6 +173,7 @@ def convert_shape(params, w_name, scope_name, inputs, layers, weights, names): print('Converting shape ...') def target_layer(x): + import tensorflow as tf return tf.shape(x) lambda_layer = keras.layers.Lambda(target_layer) From 7e393ee9a334ff30b264ac2c822d3cda45ac9da8 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 2 Jan 2019 02:25:11 +0300 Subject: [PATCH 124/180] Update constant test. --- tests/layers/constants/constant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/layers/constants/constant.py b/tests/layers/constants/constant.py index a8d16fd..4df515a 100644 --- a/tests/layers/constants/constant.py +++ b/tests/layers/constants/constant.py @@ -10,7 +10,7 @@ def __init__(self): super(FTest, self).__init__() def forward(self, x): - return x + 1 + return x + torch.FloatTensor([1.0]) def check_error(output, k_model, input_np, epsilon=1e-5): From 5101a0fe2a89683ea3df8cb896610aaa3da04fb1 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 2 Jan 2019 02:27:05 +0300 Subject: [PATCH 125/180] Update id parser in the converter. --- pytorch2keras/converter.py | 77 +++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index ac18086..22b0b4a 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -4,6 +4,7 @@ import contextlib from packaging import version +from collections import defaultdict import torch import torch.jit @@ -134,8 +135,14 @@ def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): def get_node_id(node): import re - node_id = re.search(r"[\d]+", node.__str__()) - return node_id.group(0) + try: + node_id = re.search(r"\%[\d\w]+", node.__str__()) + int(node_id.group(0)[1:]) + return node_id.group(0)[1:] + except AttributeError: + return '0' + except ValueError: + return '0' def pytorch_to_keras( @@ -182,11 +189,9 @@ def pytorch_to_keras( trace.set_graph(_optimize_graph(trace.graph(), False)) trace.graph().lint() - if verbose: - print(trace.graph()) if verbose: - print(list(trace.graph().outputs())) + print(trace.graph()) # Get all graph nodes nodes = list(trace.graph().nodes()) @@ -216,7 +221,6 @@ def pytorch_to_keras( s = i k += 1 if k == len(seq_to_find): - print('found seq', k, s) reshape_op = nodes[s + k - 1] flatten_op = { 'kind': (lambda: 'onnx::Flatten'), @@ -226,27 +230,21 @@ def pytorch_to_keras( 'inputs': (lambda: list(reshape_op.inputs())[:1]), '__str__': (lambda: reshape_op.__str__()), } - print(flatten_op) nodes = nodes[:s] + [SimpleNamespace(**flatten_op)] + nodes[s+k:] - # print(nodes) - # exit(0) break else: k = 0 s = -1 - print(nodes) - # Collect graph outputs - graph_outputs = [n.uniqueName() for n in trace.graph().outputs()] - print('Graph outputs:', graph_outputs) - - - graph_inputs = [n.uniqueName() for n in trace.graph().inputs()] - print('Graph inputs:', graph_inputs) - + # Collect graph inputs and outputs + graph_outputs = [get_node_id(n) for n in trace.graph().outputs()] + graph_inputs = [get_node_id(n) for n in trace.graph().inputs()] + # Collect model state dict state_dict = _unique_state_dict(model) if verbose: + print('Graph inputs:', graph_inputs) + print('Graph outputs:', graph_outputs) print('State dict:', list(state_dict)) import re @@ -267,38 +265,39 @@ def pytorch_to_keras( input_index = 0 model_inputs = ['input' + i for i in graph_inputs] + group_indices = defaultdict(lambda: 0, {}) + for node in nodes: node_inputs = list(node.inputs()) - # print(node_inputs, model_inputs) node_input_names = [] for node_input in node_inputs: - if node_input.node().scopeName(): - node_input_names.append(get_node_id(node_input.node())) if 'input{0}'.format(get_node_id(node_input.node())) in model_inputs: - node_input_names.append('input{0}'.format(node_input.uniqueName())) - # print(node_input_names) - - # if len(node_input_names) == 0: - # if len(node_inputs) > 0: - # if node_inputs[0] in model_inputs: - # node_input_names.append(model_inputs[node_inputs[0]]) - # else: - # input_name = 'input{0}'.format(input_index) - # if input_name not in layers: - # continue - # node_input_names.append(input_name) - # input_index += 1 - # model_inputs[node_inputs[0]] = input_name + node_input_names.append('input{0}'.format(get_node_id(node_input.node()))) + else: + node_input_names.append(get_node_id(node_input.node())) node_type = node.kind() - # print(dir(node)) node_scope_name = node.scopeName() node_id = get_node_id(node) - node_weights_name = '.'.join( - re.findall(r'\[([\w\d.\-\[\]\s]+)\]', node_scope_name) - ) + node_name_regex = re.findall(r'\[([\w\d.\-\[\]\s]+)\]', node_scope_name) + + try: + int(node_name_regex[-1]) + node_weigth_group_name = '.'.join( + node_name_regex[:-1] + ) + node_weights_name = node_weigth_group_name + '.' + str(group_indices[node_weigth_group_name]) + group_indices[node_weigth_group_name] += 1 + + except ValueError: + node_weights_name = '.'.join( + node_name_regex + ) + except IndexError: + node_weights_name = '.'.join(node_input_names) + node_attrs = {k: node[k] for k in node.attributeNames()} node_outputs = list(node.outputs()) From e022dfe40426741b8808e92fb7749d410641afd2 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Wed, 2 Jan 2019 02:54:19 +0300 Subject: [PATCH 126/180] Add test for multiple inputs. Minor bug fix in the converter. --- pytorch2keras/converter.py | 36 +++++++++++++++---------- tests/layers/multiple_inputs.py | 47 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 14 deletions(-) create mode 100644 tests/layers/multiple_inputs.py diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 22b0b4a..380394c 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -135,15 +135,22 @@ def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): def get_node_id(node): import re - try: - node_id = re.search(r"\%[\d\w]+", node.__str__()) - int(node_id.group(0)[1:]) - return node_id.group(0)[1:] - except AttributeError: - return '0' - except ValueError: - return '0' + node_id = re.search(r"[\d]+", node.__str__()) + return node_id.group(0) + +def get_leaf_id(node, state={}): + import re + try: + node_id = re.search(r"[\d\w]+ defined in", node.__str__()) + int(node_id.group(0)[:-11]) + return node_id.group(0)[:-11] + except: + if node_id.group(0)[:-11] in state: + return state[node_id.group(0)[:-11]] + else: + state[node_id.group(0)[:-11]] = str(len(state.keys())) + return str(state[node_id.group(0)[:-11]]) def pytorch_to_keras( model, args, input_shapes, @@ -237,9 +244,10 @@ def pytorch_to_keras( s = -1 # Collect graph inputs and outputs - graph_outputs = [get_node_id(n) for n in trace.graph().outputs()] - graph_inputs = [get_node_id(n) for n in trace.graph().inputs()] - + graph_outputs = [get_leaf_id(n) for n in trace.graph().outputs()] + graph_inputs = [get_leaf_id(n) for n in trace.graph().inputs()] + for i in trace.graph().inputs(): + print(i) # Collect model state dict state_dict = _unique_state_dict(model) if verbose: @@ -272,10 +280,10 @@ def pytorch_to_keras( node_input_names = [] for node_input in node_inputs: - if 'input{0}'.format(get_node_id(node_input.node())) in model_inputs: - node_input_names.append('input{0}'.format(get_node_id(node_input.node()))) + if 'input{0}'.format(get_leaf_id(node_input)) in model_inputs: + node_input_names.append('input{0}'.format(get_leaf_id(node_input))) else: - node_input_names.append(get_node_id(node_input.node())) + node_input_names.append(get_leaf_id(node_input)) node_type = node.kind() diff --git a/tests/layers/multiple_inputs.py b/tests/layers/multiple_inputs.py new file mode 100644 index 0000000..a9805be --- /dev/null +++ b/tests/layers/multiple_inputs.py @@ -0,0 +1,47 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.autograd import Variable +from pytorch2keras.converter import pytorch_to_keras + +class FTest(nn.Module): + def __init__(self): + super(FTest, self).__init__() + + def forward(self, x, y, z): + from torch.nn import functional as F + return F.relu(x) + F.relu(y) + F.relu(z) + + +def check_error(output, k_model, input_np, epsilon=1e-5): + pytorch_output = output.data.numpy() + keras_output = k_model.predict(input_np) + + error = np.max(pytorch_output - keras_output) + print('Error:', error) + + assert error < epsilon + return error + + +if __name__ == '__main__': + max_error = 0 + for i in range(10): + model = FTest() + model.eval() + + input_np1 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var1 = Variable(torch.FloatTensor(input_np1)) + + input_np2 = np.random.uniform(0, 1, (1, 3, 224, 224)) + input_var2 = Variable(torch.FloatTensor(input_np2)) + output = model(input_var1, input_var2, input_var2) + + k_model = pytorch_to_keras(model, [input_var1, input_var2, input_var2], [(3, 224, 224,), (3, 224, 224,), (3, 224, 224,)], verbose=True) + + error = check_error(output, k_model, [input_np1, input_np2, input_np2]) + if max_error < error: + max_error = error + + + print('Max error: {0}'.format(max_error)) From 01b1805d59d2f358b3d955a4a2d0f6e4da3b3885 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 02:01:05 +0300 Subject: [PATCH 127/180] Add documentation template. --- .readthedocs.yml | 11 ++ docs/index.md | 325 ++++++++++++++++++++++++++++++++++++++++++ docs/requirements.txt | 1 + mkdocs.yml | 1 + 4 files changed, 338 insertions(+) create mode 100644 .readthedocs.yml create mode 100644 docs/index.md create mode 100644 docs/requirements.txt create mode 100644 mkdocs.yml diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..9a3a7a6 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,11 @@ +# .readthedocs.yml + +build: + image: latest + +python: + version: 3.6 + setup_py_install: true + +# Don't build any extra formats +formats: [] \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..776dbcd --- /dev/null +++ b/docs/index.md @@ -0,0 +1,325 @@ +# pytorch2keras + +[![Build Status](https://travis-ci.com/nerox8664/pytorch2keras.svg?branch=master)](https://travis-ci.com/nerox8664/pytorch2keras) +[![GitHub License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) +[![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) +![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) + +PyTorch to Keras model converter. This project is created to make a model conversation easier, so, the converter API is developed with maximal simplicity. + + +## Installation + + +To install ready-to-use package, you may use PIP: + +``` +pip install pytorch2keras +``` + +It's possible to install package with PIP right from git: + +``` +pip install -U git+https://github.com/nerox8664/pytorch2keras +``` + +Or to clone it manually and then install it with PIP: + +``` +git clone https://github.com/nerox8664/pytorch2keras +cd pytorch2keras +pip install -e . +``` + + +## PyTorch 0.4.1 and greater + +There is [the problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): + +To make it work, please, cast all your `.view()` parameters to `int`. For example: + +``` +class ResNet(torchvision.models.resnet.ResNet): + def __init__(self, *args, **kwargs): + super(ResNet, self).__init__(*args, **kwargs) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(int(x.size(0)), -1) # << Here + x = self.fc(x) + return x +``` + +## Tensorflow.js + +For the proper conversion to a tensorflow.js format, please use the new flag `names='short'`. + +Here is a short instruction how to get a tensorflow.js model: + +1. First of all, you have to convert your model to Keras with this converter: + +``` +k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True, names='short') +``` + +2. Now you have Keras model. You can save it as h5 file and then convert it with `tensorflowjs_converter` but it doesn't work sometimes. As alternative, you may get Tensorflow Graph and save it as a frozen model: + +``` +# Function below copied from here: +# https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb +def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): + """ + Freezes the state of a session into a pruned computation graph. + + Creates a new computation graph where variable nodes are replaced by + constants taking their current value in the session. The new graph will be + pruned so subgraphs that are not necessary to compute the requested + outputs are removed. + @param session The TensorFlow session to be frozen. + @param keep_var_names A list of variable names that should not be frozen, + or None to freeze all the variables in the graph. + @param output_names Names of the relevant graph outputs. + @param clear_devices Remove the device directives from the graph for better portability. + @return The frozen graph definition. + """ + from tensorflow.python.framework.graph_util import convert_variables_to_constants + graph = session.graph + with graph.as_default(): + freeze_var_names = \ + list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or [])) + output_names = output_names or [] + output_names += [v.op.name for v in tf.global_variables()] + input_graph_def = graph.as_graph_def() + if clear_devices: + for node in input_graph_def.node: + node.device = "" + frozen_graph = convert_variables_to_constants(session, input_graph_def, + output_names, freeze_var_names) + return frozen_graph + + +from keras import backend as K +import tensorflow as tf +frozen_graph = freeze_session(K.get_session(), + output_names=[out.op.name for out in k_model.outputs]) + +tf.train.write_graph(frozen_graph, ".", "my_model.pb", as_text=False) +print([i for i in k_model.outputs]) + +``` + +3. You will see the output layer name, so, now it's time to convert `my_model.pb` to tfjs model: + +``` +tensorflowjs_converter \ + --input_format=tf_frozen_model \ + --output_node_names='TANHTObs/Tanh' \ + my_model.pb \ + model_tfjs +``` + +4. Thats all! + +``` +const MODEL_URL = `model_tfjs/tensorflowjs_model.pb`; +const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; +cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); +``` + +## How to use + +It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. + +Firstly, we need to load (or create) a valid PyTorch model: + +``` +class TestConv2d(nn.Module): + """ + Module for Conv2d testing + """ + + def __init__(self, inp=10, out=16, kernel_size=3): + super(TestConv2d, self).__init__() + self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=True) + + def forward(self, x): + x = self.conv2d(x) + return x + +model = TestConv2d() + +# load weights here +# model.load_state_dict(torch.load(path_to_weights.pth)) +``` + +The next step - create a dummy variable with correct shape: + +``` +input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) +input_var = Variable(torch.FloatTensor(input_np)) +``` + +We use the dummy-variable to trace the model (with jit.trace): + +``` +from converter import pytorch_to_keras +# we should specify shape of the input tensor +k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) +``` + +You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): + +``` +from converter import pytorch_to_keras +# we should specify shape of the input tensor +k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) +``` + +That's all! If all the modules have converted properly, the Keras model will be stored in the `k_model` variable. + + +## API + +Here is the only method `pytorch_to_keras` from `pytorch2keras` module. +``` +def pytorch_to_keras( + model, args, input_shapes, + change_ordering=False, training=False, verbose=False, names=False, +) +``` + +Options: + +* model -- a PyTorch module to convert; +* args -- list of dummy variables with proper shapes; +* input_shapes -- list with shape tuples; +* change_ordering -- boolean, if enabled, the converter will try to change `BCHW` to `BHWC` +* training -- boolean, switch model to training mode (never use it) +* verbose -- boolean, verbose output +* names -- choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. + +## Supported layers + +* Activations: + + ReLU + + LeakyReLU + + SELU + + Sigmoid + + Softmax + + Tanh + + HardTanh + +* Constants + +* Convolutions: + + Conv1d + + Conv2d + + ConvTrsnpose2d + +* Element-wise: + + Add + + Mul + + Sub + + Div + +* Embedding + +* Linear + +* Normalizations: + + BatchNorm2d + + InstanceNorm2d + + Dropout + +* Poolings: + + MaxPool2d + + AvgPool2d + + Global MaxPool2d (adaptive pooling to shape [1, 1]) + + Global AvgPool2d (adaptive pooling to shape [1, 1]) + +* Not tested yet: + + Upsampling + + Padding + + Reshape + + +## Models converted with pytorch2keras + +* ResNet* +* VGG* +* PreResNet* +* SqueezeNet (with ceil_mode=False) +* SqueezeNext +* DenseNet* +* AlexNet +* Inception +* SeNet +* Mobilenet v2 +* DiracNet +* DARTS +* DRNC + +| Model | Top1 | Top5 | Params | FLOPs | Source weights | Remarks | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | +| ResNet-10 | 37.09 | 15.55 | 5,418,792 | 892.62M | osmr's repo | Success | +| ResNet-12 | 35.86 | 14.46 | 5,492,776 | 1,124.23M | osmr's repo | Success | +| ResNet-14 | 32.85 | 12.41 | 5,788,200 | 1,355.64M | osmr's repo | Success | +| ResNet-16 | 30.68 | 11.10 | 6,968,872 | 1,586.95M | osmr's repo | Success | +| ResNet-18 x0.25 | 49.16 | 24.45 | 831,096 | 136.64M | osmr's repo | Success | +| ResNet-18 x0.5 | 36.54 | 14.96 | 3,055,880 | 485.22M | osmr's repo | Success | +| ResNet-18 x0.75 | 33.25 | 12.54 | 6,675,352 | 1,045.75M | osmr's repo | Success | +| ResNet-18 | 29.13 | 9.94 | 11,689,512 | 1,818.21M | osmr's repo | Success | +| ResNet-34 | 25.34 | 7.92 | 21,797,672 | 3,669.16M | osmr's repo | Success | +| ResNet-50 | 23.50 | 6.87 | 25,557,032 | 3,868.96M | osmr's repo | Success | +| ResNet-50b | 22.92 | 6.44 | 25,557,032 | 4,100.70M | osmr's repo | Success | +| ResNet-101 | 21.66 | 5.99 | 44,549,160 | 7,586.30M | osmr's repo | Success | +| ResNet-101b | 21.18 | 5.60 | 44,549,160 | 7,818.04M | osmr's repo | Success | +| ResNet-152 | 21.01 | 5.61 | 60,192,808 | 11,304.85M | osmr's repo | Success | +| ResNet-152b | 20.54 | 5.37 | 60,192,808 | 11,536.58M | osmr's repo | Success | +| PreResNet-18 | 28.72 | 9.88 | 11,687,848 | 1,818.41M | osmr's repo | Success | +| PreResNet-34 | 25.88 | 8.11 | 21,796,008 | 3,669.36M | osmr's repo | Success | +| PreResNet-50 | 23.39 | 6.68 | 25,549,480 | 3,869.16M | osmr's repo | Success | +| PreResNet-50b | 23.16 | 6.64 | 25,549,480 | 4,100.90M | osmr's repo | Success | +| PreResNet-101 | 21.45 | 5.75 | 44,541,608 | 7,586.50M | osmr's repo | Success | +| PreResNet-101b | 21.73 | 5.88 | 44,541,608 | 7,818.24M | osmr's repo | Success | +| PreResNet-152 | 20.70 | 5.32 | 60,185,256 | 11,305.05M | osmr's repo | Success | +| PreResNet-152b | 21.00 | 5.75 | 60,185,256 | 11,536.78M | Gluon Model Zoo| Success | +| PreResNet-200b | 21.10 | 5.64 | 64,666,280 | 15,040.27M | tornadomeet/ResNet | Success | +| DenseNet-121 | 25.11 | 7.80 | 7,978,856 | 2,852.39M | Gluon Model Zoo| Success | +| DenseNet-161 | 22.40 | 6.18 | 28,681,000 | 7,761.25M | Gluon Model Zoo| Success | +| DenseNet-169 | 23.89 | 6.89 | 14,149,480 | 3,381.48M | Gluon Model Zoo| Success | +| DenseNet-201 | 22.71 | 6.36 | 20,013,928 | 4,318.75M | Gluon Model Zoo| Success | +| DarkNet Tiny | 40.31 | 17.46 | 1,042,104 | 496.34M | osmr's repo | Success | +| DarkNet Ref | 38.00 | 16.68 | 7,319,416 | 365.55M | osmr's repo | Success | +| SqueezeNet v1.0 | 40.97 | 18.96 | 1,248,424 | 828.30M | osmr's repo | Success | +| SqueezeNet v1.1 | 39.09 | 17.39 | 1,235,496 | 354.88M | osmr's repo | Success | +| MobileNet x0.25 | 45.78 | 22.18 | 470,072 | 42.30M | osmr's repo | Success | +| MobileNet x0.5 | 36.12 | 14.81 | 1,331,592 | 152.04M | osmr's repo | Success | +| MobileNet x0.75 | 32.71 | 12.28 | 2,585,560 | 329.22M | Gluon Model Zoo| Success | +| MobileNet x1.0 | 29.25 | 10.03 | 4,231,976 | 573.83M | Gluon Model Zoo| Success | +| FD-MobileNet x0.25 | 56.19 | 31.38 | 383,160 | 12.44M | osmr's repo | Success | +| FD-MobileNet x0.5 | 42.62 | 19.69 | 993,928 | 40.93M | osmr's repo | Success | +| FD-MobileNet x1.0 | 35.95 | 14.72 | 2,901,288 | 146.08M | clavichord93/FD-MobileNet | Success | +| MobileNetV2 x0.25 | 48.89 | 25.24 | 1,516,392 | 32.22M | Gluon Model Zoo| Success | +| MobileNetV2 x0.5 | 35.51 | 14.64 | 1,964,736 | 95.62M | Gluon Model Zoo| Success | +| MobileNetV2 x0.75 | 30.82 | 11.26 | 2,627,592 | 191.61M | Gluon Model Zoo| Success | +| MobileNetV2 x1.0 | 28.51 | 9.90 | 3,504,960 | 320.19M | Gluon Model Zoo| Success | +| InceptionV3 | 21.22 | 5.59 | 23,834,568 | 5,746.72M | Gluon Model Zoo| Success | +| DiracNetV2-18 | 31.47 | 11.70 | 11,511,784 | 1,798.43M | szagoruyko/diracnets | Success | +| DiracNetV2-34 | 28.75 | 9.93 | 21,616,232 | 3,649.37M | szagoruyko/diracnets | Success | +| DARTS | 26.70 | 8.74 | 4,718,752 | 537.64M | szagoruyko/diracnets | Success | + + +## License +This software is covered by MIT License. \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..b854bca --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +mkdocs \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..843db6a --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1 @@ +site_name: pytorch2keras documentation From 16d2a1f27e898abab50d3ac6edac77fa325e8135 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 11:35:58 +0300 Subject: [PATCH 128/180] Update documentation. --- LICENSE | 2 +- docs/api.md | 17 ++ docs/getting_started.md | 124 ++++++++++++ docs/index.md | 287 ---------------------------- docs/installation.md | 22 +++ docs/license.md | 21 ++ docs/supported_layers_and_models.md | 110 +++++++++++ mkdocs.yml | 9 + 8 files changed, 304 insertions(+), 288 deletions(-) create mode 100644 docs/api.md create mode 100644 docs/getting_started.md create mode 100644 docs/installation.md create mode 100644 docs/license.md create mode 100644 docs/supported_layers_and_models.md diff --git a/LICENSE b/LICENSE index af461f4..4d7d753 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 Grigory Malivenko +Copyright (c) 2019 Grigory Malivenko Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..a9ea4d4 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,17 @@ +Here is the only method `pytorch_to_keras` from `pytorch2keras` module. +``` +def pytorch_to_keras( + model, args, input_shapes, + change_ordering=False, training=False, verbose=False, names=False, +) +``` + +Options: + +* model -- a PyTorch module to convert; +* args -- list of dummy variables with proper shapes; +* input_shapes -- list with shape tuples; +* change_ordering -- boolean, if enabled, the converter will try to change `BCHW` to `BHWC` +* training -- boolean, switch model to training mode (never use it) +* verbose -- boolean, verbose output +* names -- choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. \ No newline at end of file diff --git a/docs/getting_started.md b/docs/getting_started.md new file mode 100644 index 0000000..eac719c --- /dev/null +++ b/docs/getting_started.md @@ -0,0 +1,124 @@ +# Basic usage + +It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. + +Firstly, we need to load (or create) a valid PyTorch model: + + ``` + class TestConv2d(nn.Module): + """ + Module for Conv2d testing + """ + + def __init__(self, inp=10, out=16, kernel_size=3): + super(TestConv2d, self).__init__() + self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=True) + + def forward(self, x): + x = self.conv2d(x) + return x + + model = TestConv2d() + + # load weights here + # model.load_state_dict(torch.load(path_to_weights.pth)) + ``` + +The next step - create a dummy variable with correct shape: + + ``` + input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) + input_var = Variable(torch.FloatTensor(input_np)) + ``` + +We use the dummy-variable to trace the model (with jit.trace): + + ``` + from converter import pytorch_to_keras + # we should specify shape of the input tensor + k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) + ``` + +You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): + + ``` + from converter import pytorch_to_keras + # we should specify shape of the input tensor + k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) + ``` + +That's all! If all the modules have converted properly, the Keras model will be stored in the `k_model` variable. + +# Tensorflow.js + +For the proper conversion to a tensorflow.js format, please use the new flag `names='short'`. + +Here is a short instruction how to get a tensorflow.js model: + +1. First of all, you have to convert your model to Keras with this converter: + + ``` + k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True, names='short') + ``` + +2. Now you have Keras model. You can save it as h5 file and then convert it with `tensorflowjs_converter` but it doesn't work sometimes. As alternative, you may get Tensorflow Graph and save it as a frozen model: + + ``` + # Function below copied from here: + # https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb + def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): + """ + Freezes the state of a session into a pruned computation graph. + + Creates a new computation graph where variable nodes are replaced by + constants taking their current value in the session. The new graph will be + pruned so subgraphs that are not necessary to compute the requested + outputs are removed. + @param session The TensorFlow session to be frozen. + @param keep_var_names A list of variable names that should not be frozen, + or None to freeze all the variables in the graph. + @param output_names Names of the relevant graph outputs. + @param clear_devices Remove the device directives from the graph for better portability. + @return The frozen graph definition. + """ + from tensorflow.python.framework.graph_util import convert_variables_to_constants + graph = session.graph + with graph.as_default(): + freeze_var_names = \ + list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or [])) + output_names = output_names or [] + output_names += [v.op.name for v in tf.global_variables()] + input_graph_def = graph.as_graph_def() + if clear_devices: + for node in input_graph_def.node: + node.device = "" + frozen_graph = convert_variables_to_constants(session, input_graph_def, + output_names, freeze_var_names) + return frozen_graph + + from keras import backend as K + import tensorflow as tf + frozen_graph = freeze_session(K.get_session(), + output_names=[out.op.name for out in k_model.outputs]) + + tf.train.write_graph(frozen_graph, ".", "my_model.pb", as_text=False) + print([i for i in k_model.outputs]) + ``` + +3. You will see the output layer name, so, now it's time to convert `my_model.pb` to tfjs model: + + ``` + tensorflowjs_converter \ + --input_format=tf_frozen_model \ + --output_node_names='TANHTObs/Tanh' \ + my_model.pb \ + model_tfjs + ``` + +4. Thats all! + + ``` + const MODEL_URL = `model_tfjs/tensorflowjs_model.pb`; + const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; + cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); + ``` diff --git a/docs/index.md b/docs/index.md index 776dbcd..69edf02 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,30 +9,6 @@ PyTorch to Keras model converter. This project is created to make a model conversation easier, so, the converter API is developed with maximal simplicity. -## Installation - - -To install ready-to-use package, you may use PIP: - -``` -pip install pytorch2keras -``` - -It's possible to install package with PIP right from git: - -``` -pip install -U git+https://github.com/nerox8664/pytorch2keras -``` - -Or to clone it manually and then install it with PIP: - -``` -git clone https://github.com/nerox8664/pytorch2keras -cd pytorch2keras -pip install -e . -``` - - ## PyTorch 0.4.1 and greater There is [the problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): @@ -60,266 +36,3 @@ class ResNet(torchvision.models.resnet.ResNet): x = self.fc(x) return x ``` - -## Tensorflow.js - -For the proper conversion to a tensorflow.js format, please use the new flag `names='short'`. - -Here is a short instruction how to get a tensorflow.js model: - -1. First of all, you have to convert your model to Keras with this converter: - -``` -k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True, names='short') -``` - -2. Now you have Keras model. You can save it as h5 file and then convert it with `tensorflowjs_converter` but it doesn't work sometimes. As alternative, you may get Tensorflow Graph and save it as a frozen model: - -``` -# Function below copied from here: -# https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb -def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): - """ - Freezes the state of a session into a pruned computation graph. - - Creates a new computation graph where variable nodes are replaced by - constants taking their current value in the session. The new graph will be - pruned so subgraphs that are not necessary to compute the requested - outputs are removed. - @param session The TensorFlow session to be frozen. - @param keep_var_names A list of variable names that should not be frozen, - or None to freeze all the variables in the graph. - @param output_names Names of the relevant graph outputs. - @param clear_devices Remove the device directives from the graph for better portability. - @return The frozen graph definition. - """ - from tensorflow.python.framework.graph_util import convert_variables_to_constants - graph = session.graph - with graph.as_default(): - freeze_var_names = \ - list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or [])) - output_names = output_names or [] - output_names += [v.op.name for v in tf.global_variables()] - input_graph_def = graph.as_graph_def() - if clear_devices: - for node in input_graph_def.node: - node.device = "" - frozen_graph = convert_variables_to_constants(session, input_graph_def, - output_names, freeze_var_names) - return frozen_graph - - -from keras import backend as K -import tensorflow as tf -frozen_graph = freeze_session(K.get_session(), - output_names=[out.op.name for out in k_model.outputs]) - -tf.train.write_graph(frozen_graph, ".", "my_model.pb", as_text=False) -print([i for i in k_model.outputs]) - -``` - -3. You will see the output layer name, so, now it's time to convert `my_model.pb` to tfjs model: - -``` -tensorflowjs_converter \ - --input_format=tf_frozen_model \ - --output_node_names='TANHTObs/Tanh' \ - my_model.pb \ - model_tfjs -``` - -4. Thats all! - -``` -const MODEL_URL = `model_tfjs/tensorflowjs_model.pb`; -const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; -cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); -``` - -## How to use - -It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. - -Firstly, we need to load (or create) a valid PyTorch model: - -``` -class TestConv2d(nn.Module): - """ - Module for Conv2d testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3): - super(TestConv2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=True) - - def forward(self, x): - x = self.conv2d(x) - return x - -model = TestConv2d() - -# load weights here -# model.load_state_dict(torch.load(path_to_weights.pth)) -``` - -The next step - create a dummy variable with correct shape: - -``` -input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) -input_var = Variable(torch.FloatTensor(input_np)) -``` - -We use the dummy-variable to trace the model (with jit.trace): - -``` -from converter import pytorch_to_keras -# we should specify shape of the input tensor -k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) -``` - -You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): - -``` -from converter import pytorch_to_keras -# we should specify shape of the input tensor -k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) -``` - -That's all! If all the modules have converted properly, the Keras model will be stored in the `k_model` variable. - - -## API - -Here is the only method `pytorch_to_keras` from `pytorch2keras` module. -``` -def pytorch_to_keras( - model, args, input_shapes, - change_ordering=False, training=False, verbose=False, names=False, -) -``` - -Options: - -* model -- a PyTorch module to convert; -* args -- list of dummy variables with proper shapes; -* input_shapes -- list with shape tuples; -* change_ordering -- boolean, if enabled, the converter will try to change `BCHW` to `BHWC` -* training -- boolean, switch model to training mode (never use it) -* verbose -- boolean, verbose output -* names -- choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. - -## Supported layers - -* Activations: - + ReLU - + LeakyReLU - + SELU - + Sigmoid - + Softmax - + Tanh - + HardTanh - -* Constants - -* Convolutions: - + Conv1d - + Conv2d - + ConvTrsnpose2d - -* Element-wise: - + Add - + Mul - + Sub - + Div - -* Embedding - -* Linear - -* Normalizations: - + BatchNorm2d - + InstanceNorm2d - + Dropout - -* Poolings: - + MaxPool2d - + AvgPool2d - + Global MaxPool2d (adaptive pooling to shape [1, 1]) - + Global AvgPool2d (adaptive pooling to shape [1, 1]) - -* Not tested yet: - + Upsampling - + Padding - + Reshape - - -## Models converted with pytorch2keras - -* ResNet* -* VGG* -* PreResNet* -* SqueezeNet (with ceil_mode=False) -* SqueezeNext -* DenseNet* -* AlexNet -* Inception -* SeNet -* Mobilenet v2 -* DiracNet -* DARTS -* DRNC - -| Model | Top1 | Top5 | Params | FLOPs | Source weights | Remarks | -| --- | ---: | ---: | ---: | ---: | ---: | ---: | -| ResNet-10 | 37.09 | 15.55 | 5,418,792 | 892.62M | osmr's repo | Success | -| ResNet-12 | 35.86 | 14.46 | 5,492,776 | 1,124.23M | osmr's repo | Success | -| ResNet-14 | 32.85 | 12.41 | 5,788,200 | 1,355.64M | osmr's repo | Success | -| ResNet-16 | 30.68 | 11.10 | 6,968,872 | 1,586.95M | osmr's repo | Success | -| ResNet-18 x0.25 | 49.16 | 24.45 | 831,096 | 136.64M | osmr's repo | Success | -| ResNet-18 x0.5 | 36.54 | 14.96 | 3,055,880 | 485.22M | osmr's repo | Success | -| ResNet-18 x0.75 | 33.25 | 12.54 | 6,675,352 | 1,045.75M | osmr's repo | Success | -| ResNet-18 | 29.13 | 9.94 | 11,689,512 | 1,818.21M | osmr's repo | Success | -| ResNet-34 | 25.34 | 7.92 | 21,797,672 | 3,669.16M | osmr's repo | Success | -| ResNet-50 | 23.50 | 6.87 | 25,557,032 | 3,868.96M | osmr's repo | Success | -| ResNet-50b | 22.92 | 6.44 | 25,557,032 | 4,100.70M | osmr's repo | Success | -| ResNet-101 | 21.66 | 5.99 | 44,549,160 | 7,586.30M | osmr's repo | Success | -| ResNet-101b | 21.18 | 5.60 | 44,549,160 | 7,818.04M | osmr's repo | Success | -| ResNet-152 | 21.01 | 5.61 | 60,192,808 | 11,304.85M | osmr's repo | Success | -| ResNet-152b | 20.54 | 5.37 | 60,192,808 | 11,536.58M | osmr's repo | Success | -| PreResNet-18 | 28.72 | 9.88 | 11,687,848 | 1,818.41M | osmr's repo | Success | -| PreResNet-34 | 25.88 | 8.11 | 21,796,008 | 3,669.36M | osmr's repo | Success | -| PreResNet-50 | 23.39 | 6.68 | 25,549,480 | 3,869.16M | osmr's repo | Success | -| PreResNet-50b | 23.16 | 6.64 | 25,549,480 | 4,100.90M | osmr's repo | Success | -| PreResNet-101 | 21.45 | 5.75 | 44,541,608 | 7,586.50M | osmr's repo | Success | -| PreResNet-101b | 21.73 | 5.88 | 44,541,608 | 7,818.24M | osmr's repo | Success | -| PreResNet-152 | 20.70 | 5.32 | 60,185,256 | 11,305.05M | osmr's repo | Success | -| PreResNet-152b | 21.00 | 5.75 | 60,185,256 | 11,536.78M | Gluon Model Zoo| Success | -| PreResNet-200b | 21.10 | 5.64 | 64,666,280 | 15,040.27M | tornadomeet/ResNet | Success | -| DenseNet-121 | 25.11 | 7.80 | 7,978,856 | 2,852.39M | Gluon Model Zoo| Success | -| DenseNet-161 | 22.40 | 6.18 | 28,681,000 | 7,761.25M | Gluon Model Zoo| Success | -| DenseNet-169 | 23.89 | 6.89 | 14,149,480 | 3,381.48M | Gluon Model Zoo| Success | -| DenseNet-201 | 22.71 | 6.36 | 20,013,928 | 4,318.75M | Gluon Model Zoo| Success | -| DarkNet Tiny | 40.31 | 17.46 | 1,042,104 | 496.34M | osmr's repo | Success | -| DarkNet Ref | 38.00 | 16.68 | 7,319,416 | 365.55M | osmr's repo | Success | -| SqueezeNet v1.0 | 40.97 | 18.96 | 1,248,424 | 828.30M | osmr's repo | Success | -| SqueezeNet v1.1 | 39.09 | 17.39 | 1,235,496 | 354.88M | osmr's repo | Success | -| MobileNet x0.25 | 45.78 | 22.18 | 470,072 | 42.30M | osmr's repo | Success | -| MobileNet x0.5 | 36.12 | 14.81 | 1,331,592 | 152.04M | osmr's repo | Success | -| MobileNet x0.75 | 32.71 | 12.28 | 2,585,560 | 329.22M | Gluon Model Zoo| Success | -| MobileNet x1.0 | 29.25 | 10.03 | 4,231,976 | 573.83M | Gluon Model Zoo| Success | -| FD-MobileNet x0.25 | 56.19 | 31.38 | 383,160 | 12.44M | osmr's repo | Success | -| FD-MobileNet x0.5 | 42.62 | 19.69 | 993,928 | 40.93M | osmr's repo | Success | -| FD-MobileNet x1.0 | 35.95 | 14.72 | 2,901,288 | 146.08M | clavichord93/FD-MobileNet | Success | -| MobileNetV2 x0.25 | 48.89 | 25.24 | 1,516,392 | 32.22M | Gluon Model Zoo| Success | -| MobileNetV2 x0.5 | 35.51 | 14.64 | 1,964,736 | 95.62M | Gluon Model Zoo| Success | -| MobileNetV2 x0.75 | 30.82 | 11.26 | 2,627,592 | 191.61M | Gluon Model Zoo| Success | -| MobileNetV2 x1.0 | 28.51 | 9.90 | 3,504,960 | 320.19M | Gluon Model Zoo| Success | -| InceptionV3 | 21.22 | 5.59 | 23,834,568 | 5,746.72M | Gluon Model Zoo| Success | -| DiracNetV2-18 | 31.47 | 11.70 | 11,511,784 | 1,798.43M | szagoruyko/diracnets | Success | -| DiracNetV2-34 | 28.75 | 9.93 | 21,616,232 | 3,649.37M | szagoruyko/diracnets | Success | -| DARTS | 26.70 | 8.74 | 4,718,752 | 537.64M | szagoruyko/diracnets | Success | - - -## License -This software is covered by MIT License. \ No newline at end of file diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000..e6c13a3 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,22 @@ +# Installation + + +To install ready-to-use package, you may use PIP: + +``` +pip install pytorch2keras +``` + +It's possible to install package with PIP right from git: + +``` +pip install -U git+https://github.com/nerox8664/pytorch2keras +``` + +Or to clone it manually and then install it with PIP: + +``` +git clone https://github.com/nerox8664/pytorch2keras +cd pytorch2keras +pip install -e . +``` diff --git a/docs/license.md b/docs/license.md new file mode 100644 index 0000000..4d7d753 --- /dev/null +++ b/docs/license.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Grigory Malivenko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/docs/supported_layers_and_models.md b/docs/supported_layers_and_models.md new file mode 100644 index 0000000..2c2565a --- /dev/null +++ b/docs/supported_layers_and_models.md @@ -0,0 +1,110 @@ +# Supported layers + +* Activations: + + ReLU + + LeakyReLU + + SELU + + Sigmoid + + Softmax + + Tanh + + HardTanh + +* Constants + +* Convolutions: + + Conv1d + + Conv2d + + ConvTrsnpose2d + +* Element-wise: + + Add + + Mul + + Sub + + Div + +* Embedding + +* Linear + +* Normalizations: + + BatchNorm2d + + InstanceNorm2d + + Dropout + +* Poolings: + + MaxPool2d + + AvgPool2d + + Global MaxPool2d (adaptive pooling to shape [1, 1]) + + Global AvgPool2d (adaptive pooling to shape [1, 1]) + +* Not tested yet: + + Upsampling + + Padding + + Reshape + + +# Models converted with pytorch2keras + +* ResNet* +* VGG* +* PreResNet* +* SqueezeNet (with ceil_mode=False) +* SqueezeNext +* DenseNet* +* AlexNet +* Inception +* SeNet +* Mobilenet v2 +* DiracNet +* DARTS +* DRNC + +| Model | Top1 | Top5 | Params | FLOPs | Source weights | Remarks | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | +| ResNet-10 | 37.09 | 15.55 | 5,418,792 | 892.62M | osmr's repo | Success | +| ResNet-12 | 35.86 | 14.46 | 5,492,776 | 1,124.23M | osmr's repo | Success | +| ResNet-14 | 32.85 | 12.41 | 5,788,200 | 1,355.64M | osmr's repo | Success | +| ResNet-16 | 30.68 | 11.10 | 6,968,872 | 1,586.95M | osmr's repo | Success | +| ResNet-18 x0.25 | 49.16 | 24.45 | 831,096 | 136.64M | osmr's repo | Success | +| ResNet-18 x0.5 | 36.54 | 14.96 | 3,055,880 | 485.22M | osmr's repo | Success | +| ResNet-18 x0.75 | 33.25 | 12.54 | 6,675,352 | 1,045.75M | osmr's repo | Success | +| ResNet-18 | 29.13 | 9.94 | 11,689,512 | 1,818.21M | osmr's repo | Success | +| ResNet-34 | 25.34 | 7.92 | 21,797,672 | 3,669.16M | osmr's repo | Success | +| ResNet-50 | 23.50 | 6.87 | 25,557,032 | 3,868.96M | osmr's repo | Success | +| ResNet-50b | 22.92 | 6.44 | 25,557,032 | 4,100.70M | osmr's repo | Success | +| ResNet-101 | 21.66 | 5.99 | 44,549,160 | 7,586.30M | osmr's repo | Success | +| ResNet-101b | 21.18 | 5.60 | 44,549,160 | 7,818.04M | osmr's repo | Success | +| ResNet-152 | 21.01 | 5.61 | 60,192,808 | 11,304.85M | osmr's repo | Success | +| ResNet-152b | 20.54 | 5.37 | 60,192,808 | 11,536.58M | osmr's repo | Success | +| PreResNet-18 | 28.72 | 9.88 | 11,687,848 | 1,818.41M | osmr's repo | Success | +| PreResNet-34 | 25.88 | 8.11 | 21,796,008 | 3,669.36M | osmr's repo | Success | +| PreResNet-50 | 23.39 | 6.68 | 25,549,480 | 3,869.16M | osmr's repo | Success | +| PreResNet-50b | 23.16 | 6.64 | 25,549,480 | 4,100.90M | osmr's repo | Success | +| PreResNet-101 | 21.45 | 5.75 | 44,541,608 | 7,586.50M | osmr's repo | Success | +| PreResNet-101b | 21.73 | 5.88 | 44,541,608 | 7,818.24M | osmr's repo | Success | +| PreResNet-152 | 20.70 | 5.32 | 60,185,256 | 11,305.05M | osmr's repo | Success | +| PreResNet-152b | 21.00 | 5.75 | 60,185,256 | 11,536.78M | Gluon Model Zoo| Success | +| PreResNet-200b | 21.10 | 5.64 | 64,666,280 | 15,040.27M | tornadomeet/ResNet | Success | +| DenseNet-121 | 25.11 | 7.80 | 7,978,856 | 2,852.39M | Gluon Model Zoo| Success | +| DenseNet-161 | 22.40 | 6.18 | 28,681,000 | 7,761.25M | Gluon Model Zoo| Success | +| DenseNet-169 | 23.89 | 6.89 | 14,149,480 | 3,381.48M | Gluon Model Zoo| Success | +| DenseNet-201 | 22.71 | 6.36 | 20,013,928 | 4,318.75M | Gluon Model Zoo| Success | +| DarkNet Tiny | 40.31 | 17.46 | 1,042,104 | 496.34M | osmr's repo | Success | +| DarkNet Ref | 38.00 | 16.68 | 7,319,416 | 365.55M | osmr's repo | Success | +| SqueezeNet v1.0 | 40.97 | 18.96 | 1,248,424 | 828.30M | osmr's repo | Success | +| SqueezeNet v1.1 | 39.09 | 17.39 | 1,235,496 | 354.88M | osmr's repo | Success | +| MobileNet x0.25 | 45.78 | 22.18 | 470,072 | 42.30M | osmr's repo | Success | +| MobileNet x0.5 | 36.12 | 14.81 | 1,331,592 | 152.04M | osmr's repo | Success | +| MobileNet x0.75 | 32.71 | 12.28 | 2,585,560 | 329.22M | Gluon Model Zoo| Success | +| MobileNet x1.0 | 29.25 | 10.03 | 4,231,976 | 573.83M | Gluon Model Zoo| Success | +| FD-MobileNet x0.25 | 56.19 | 31.38 | 383,160 | 12.44M | osmr's repo | Success | +| FD-MobileNet x0.5 | 42.62 | 19.69 | 993,928 | 40.93M | osmr's repo | Success | +| FD-MobileNet x1.0 | 35.95 | 14.72 | 2,901,288 | 146.08M | clavichord93/FD-MobileNet | Success | +| MobileNetV2 x0.25 | 48.89 | 25.24 | 1,516,392 | 32.22M | Gluon Model Zoo| Success | +| MobileNetV2 x0.5 | 35.51 | 14.64 | 1,964,736 | 95.62M | Gluon Model Zoo| Success | +| MobileNetV2 x0.75 | 30.82 | 11.26 | 2,627,592 | 191.61M | Gluon Model Zoo| Success | +| MobileNetV2 x1.0 | 28.51 | 9.90 | 3,504,960 | 320.19M | Gluon Model Zoo| Success | +| InceptionV3 | 21.22 | 5.59 | 23,834,568 | 5,746.72M | Gluon Model Zoo| Success | +| DiracNetV2-18 | 31.47 | 11.70 | 11,511,784 | 1,798.43M | szagoruyko/diracnets | Success | +| DiracNetV2-34 | 28.75 | 9.93 | 21,616,232 | 3,649.37M | szagoruyko/diracnets | Success | +| DARTS | 26.70 | 8.74 | 4,718,752 | 537.64M | szagoruyko/diracnets | Success | diff --git a/mkdocs.yml b/mkdocs.yml index 843db6a..7f74a3b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1 +1,10 @@ site_name: pytorch2keras documentation +nav: + - Home: index.md + - Installation: installation.md + - Getting started: getting_started.md + - Supported layers and models: supported_layers_and_models.md + - API: api.md + - License: license.md + +theme: readthedocs \ No newline at end of file From c530f28b6ec33d5a86b7dd83cd4f4915ea0bc448 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 11:41:48 +0300 Subject: [PATCH 129/180] Update docs. --- docs/getting_started.md | 4 ++-- docs/supported_layers_and_models.md | 4 ++-- mkdocs.yml | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index eac719c..342f3c2 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -1,4 +1,4 @@ -# Basic usage +## Basic usage It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. @@ -49,7 +49,7 @@ You can also set H and W dimensions to None to make your model shape-agnostic (e That's all! If all the modules have converted properly, the Keras model will be stored in the `k_model` variable. -# Tensorflow.js +## Tensorflow.js For the proper conversion to a tensorflow.js format, please use the new flag `names='short'`. diff --git a/docs/supported_layers_and_models.md b/docs/supported_layers_and_models.md index 2c2565a..138d7ad 100644 --- a/docs/supported_layers_and_models.md +++ b/docs/supported_layers_and_models.md @@ -1,4 +1,4 @@ -# Supported layers +## Supported layers * Activations: + ReLU @@ -43,7 +43,7 @@ + Reshape -# Models converted with pytorch2keras +## Models converted with pytorch2keras * ResNet* * VGG* diff --git a/mkdocs.yml b/mkdocs.yml index 7f74a3b..e051ace 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,10 +1,10 @@ site_name: pytorch2keras documentation nav: - - Home: index.md - - Installation: installation.md - - Getting started: getting_started.md - - Supported layers and models: supported_layers_and_models.md - - API: api.md - - License: license.md + - 'Home': index.md + - 'Installation': installation.md + - 'Getting started': getting_started.md + - 'Supported layers and models': supported_layers_and_models.md + - 'API': api.md + - 'License': license.md theme: readthedocs \ No newline at end of file From cf97c7aa156b82ea5ad452e2da1d352ad346a3a1 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 11:55:36 +0300 Subject: [PATCH 130/180] Update docs. --- .readthedocs.yml | 11 ----------- docs/installation.md | 3 --- mkdocs.yml | 16 ++++++++-------- readthedocs.yml | 9 +++++++++ 4 files changed, 17 insertions(+), 22 deletions(-) delete mode 100644 .readthedocs.yml create mode 100644 readthedocs.yml diff --git a/.readthedocs.yml b/.readthedocs.yml deleted file mode 100644 index 9a3a7a6..0000000 --- a/.readthedocs.yml +++ /dev/null @@ -1,11 +0,0 @@ -# .readthedocs.yml - -build: - image: latest - -python: - version: 3.6 - setup_py_install: true - -# Don't build any extra formats -formats: [] \ No newline at end of file diff --git a/docs/installation.md b/docs/installation.md index e6c13a3..e92a6be 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,6 +1,3 @@ -# Installation - - To install ready-to-use package, you may use PIP: ``` diff --git a/mkdocs.yml b/mkdocs.yml index e051ace..823e329 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,10 +1,10 @@ site_name: pytorch2keras documentation -nav: - - 'Home': index.md - - 'Installation': installation.md - - 'Getting started': getting_started.md - - 'Supported layers and models': supported_layers_and_models.md - - 'API': api.md - - 'License': license.md +theme: readthedocs -theme: readthedocs \ No newline at end of file +nav: + - Home: index.md + - Installation: installation.md + - Getting started: getting_started.md + - Supported layers and models: supported_layers_and_models.md + - API: api.md + - License: license.md \ No newline at end of file diff --git a/readthedocs.yml b/readthedocs.yml new file mode 100644 index 0000000..eba6050 --- /dev/null +++ b/readthedocs.yml @@ -0,0 +1,9 @@ +# .readthedocs.yml + +build: + image: latest + +python: + setup_py_install: true + +requirements_file: docs/requirements.txt \ No newline at end of file From 3bc1f1ba9afe4531f5279cf3eac4cf05c31ec3ea Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 11:58:10 +0300 Subject: [PATCH 131/180] Add readthedocs badge. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index ec7db1e..7c37dfc 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ [![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) [![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) +![Readthedocs](https://img.shields.io/readthedocs/pytorch2keras.svg) + PyTorch to Keras model converter. From 98584bb09faf49df157476f240851e820873a9aa Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 11:59:18 +0300 Subject: [PATCH 132/180] Another try to fix menu ordering. --- mkdocs.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 823e329..0765465 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,10 +1,10 @@ site_name: pytorch2keras documentation theme: readthedocs -nav: - - Home: index.md - - Installation: installation.md - - Getting started: getting_started.md - - Supported layers and models: supported_layers_and_models.md - - API: api.md - - License: license.md \ No newline at end of file +pages: +- Home: index.md +- Installation: installation.md +- Getting started: getting_started.md +- Supported layers and models: supported_layers_and_models.md +- API: api.md +- License: license.md \ No newline at end of file From 2736df1527427629ecd81c5c98b708005b34fc3f Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 12:01:55 +0300 Subject: [PATCH 133/180] Fix shield link. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c37dfc..c91a687 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) [![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) -![Readthedocs](https://img.shields.io/readthedocs/pytorch2keras.svg) +[![Readthedocs](https://img.shields.io/readthedocs/pytorch2keras.svg)](https://pytorch2keras.readthedocs.io/en/latest/api/) PyTorch to Keras model converter. From 0e74eb5b396ce3c0702ff60a8233f7b70ed89b9c Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 12:02:40 +0300 Subject: [PATCH 134/180] Set doc shield link to doc index. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c91a687..163c641 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) [![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) -[![Readthedocs](https://img.shields.io/readthedocs/pytorch2keras.svg)](https://pytorch2keras.readthedocs.io/en/latest/api/) +[![Readthedocs](https://img.shields.io/readthedocs/pytorch2keras.svg)](https://pytorch2keras.readthedocs.io/en/latest/) PyTorch to Keras model converter. From 874e2178afbdea32fba2babac914b57d20082483 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 17:32:35 +0300 Subject: [PATCH 135/180] Update documentation. --- docs/api.md | 15 ++++---- docs/getting_started.md | 84 ++++++++++++++++++++--------------------- docs/index.md | 29 -------------- docs/known_problems.md | 31 +++++++++++++++ mkdocs.yml | 1 + 5 files changed, 81 insertions(+), 79 deletions(-) create mode 100644 docs/known_problems.md diff --git a/docs/api.md b/docs/api.md index a9ea4d4..3ede65f 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,4 +1,5 @@ Here is the only method `pytorch_to_keras` from `pytorch2keras` module. + ``` def pytorch_to_keras( model, args, input_shapes, @@ -8,10 +9,10 @@ def pytorch_to_keras( Options: -* model -- a PyTorch module to convert; -* args -- list of dummy variables with proper shapes; -* input_shapes -- list with shape tuples; -* change_ordering -- boolean, if enabled, the converter will try to change `BCHW` to `BHWC` -* training -- boolean, switch model to training mode (never use it) -* verbose -- boolean, verbose output -* names -- choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. \ No newline at end of file + * `model` -- a PyTorch module to convert; + * `args` -- list of dummy variables with proper shapes; + * `input_shapes` -- list with shape tuples; + * `change_ordering` -- boolean, if enabled, the converter will try to change `BCHW` to `BHWC` + * `training` -- boolean, switch model to training mode (never use it) + * `verbose` -- boolean, verbose output + * `names` -- choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. \ No newline at end of file diff --git a/docs/getting_started.md b/docs/getting_started.md index 342f3c2..0692dbd 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -4,48 +4,48 @@ It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. Firstly, we need to load (or create) a valid PyTorch model: - ``` - class TestConv2d(nn.Module): - """ - Module for Conv2d testing - """ +``` +class TestConv2d(nn.Module): + """ + Module for Conv2d testing + """ - def __init__(self, inp=10, out=16, kernel_size=3): - super(TestConv2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=True) + def __init__(self, inp=10, out=16, kernel_size=3): + super(TestConv2d, self).__init__() + self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=True) - def forward(self, x): - x = self.conv2d(x) - return x + def forward(self, x): + x = self.conv2d(x) + return x - model = TestConv2d() +model = TestConv2d() - # load weights here - # model.load_state_dict(torch.load(path_to_weights.pth)) - ``` +# load weights here +# model.load_state_dict(torch.load(path_to_weights.pth)) +``` The next step - create a dummy variable with correct shape: - ``` - input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) - input_var = Variable(torch.FloatTensor(input_np)) - ``` +``` +input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) +input_var = Variable(torch.FloatTensor(input_np)) +``` We use the dummy-variable to trace the model (with jit.trace): - ``` - from converter import pytorch_to_keras - # we should specify shape of the input tensor - k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) - ``` +``` +from converter import pytorch_to_keras +# we should specify shape of the input tensor +k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) +``` You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): - ``` - from converter import pytorch_to_keras - # we should specify shape of the input tensor - k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) - ``` +``` +from converter import pytorch_to_keras +# we should specify shape of the input tensor +k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) +``` That's all! If all the modules have converted properly, the Keras model will be stored in the `k_model` variable. @@ -63,7 +63,6 @@ Here is a short instruction how to get a tensorflow.js model: 2. Now you have Keras model. You can save it as h5 file and then convert it with `tensorflowjs_converter` but it doesn't work sometimes. As alternative, you may get Tensorflow Graph and save it as a frozen model: - ``` # Function below copied from here: # https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): @@ -103,22 +102,21 @@ Here is a short instruction how to get a tensorflow.js model: tf.train.write_graph(frozen_graph, ".", "my_model.pb", as_text=False) print([i for i in k_model.outputs]) - ``` 3. You will see the output layer name, so, now it's time to convert `my_model.pb` to tfjs model: - ``` - tensorflowjs_converter \ - --input_format=tf_frozen_model \ - --output_node_names='TANHTObs/Tanh' \ - my_model.pb \ - model_tfjs - ``` + ``` + tensorflowjs_converter \ + --input_format=tf_frozen_model \ + --output_node_names='TANHTObs/Tanh' \ + my_model.pb \ + model_tfjs + ``` 4. Thats all! - ``` - const MODEL_URL = `model_tfjs/tensorflowjs_model.pb`; - const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; - cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); - ``` + ``` + const MODEL_URL = `model_tfjs/tensorflowjs_model.pb`; + const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; + cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); + ``` diff --git a/docs/index.md b/docs/index.md index 69edf02..5902baf 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,32 +7,3 @@ ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) PyTorch to Keras model converter. This project is created to make a model conversation easier, so, the converter API is developed with maximal simplicity. - - -## PyTorch 0.4.1 and greater - -There is [the problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): - -To make it work, please, cast all your `.view()` parameters to `int`. For example: - -``` -class ResNet(torchvision.models.resnet.ResNet): - def __init__(self, *args, **kwargs): - super(ResNet, self).__init__(*args, **kwargs) - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(int(x.size(0)), -1) # << Here - x = self.fc(x) - return x -``` diff --git a/docs/known_problems.md b/docs/known_problems.md new file mode 100644 index 0000000..769fa84 --- /dev/null +++ b/docs/known_problems.md @@ -0,0 +1,31 @@ +## Linear layer problem with PyTorch 0.4.1 and greater + +There is [the problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): + +To make it work, please, cast all your `.view()` parameters to `int`. For example: + +``` +class ResNet(torchvision.models.resnet.ResNet): + def __init__(self, *args, **kwargs): + super(ResNet, self).__init__(*args, **kwargs) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(int(x.size(0)), -1) # << Here + x = self.fc(x) + return x +``` + +## Recurrent layers + +The recurrent layers are not supported due to complicated onnx-translation. The support is planned, but haven't implemented yet. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 0765465..6be292a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -7,4 +7,5 @@ pages: - Getting started: getting_started.md - Supported layers and models: supported_layers_and_models.md - API: api.md +- Known problems: known_problems.md - License: license.md \ No newline at end of file From ec4ec673b0397db87679d3e856981c9ab9875451 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 17:34:05 +0300 Subject: [PATCH 136/180] Remove excess header. --- docs/index.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 5902baf..746b334 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,5 +1,3 @@ -# pytorch2keras - [![Build Status](https://travis-ci.com/nerox8664/pytorch2keras.svg?branch=master)](https://travis-ci.com/nerox8664/pytorch2keras) [![GitHub License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) From 1da49d5dc61c133ec892db5efa1448c3448a8583 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 21 Jan 2019 18:24:21 +0300 Subject: [PATCH 137/180] Fix API page. --- docs/api.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/api.md b/docs/api.md index 3ede65f..1f062a9 100644 --- a/docs/api.md +++ b/docs/api.md @@ -9,10 +9,10 @@ def pytorch_to_keras( Options: - * `model` -- a PyTorch module to convert; - * `args` -- list of dummy variables with proper shapes; - * `input_shapes` -- list with shape tuples; - * `change_ordering` -- boolean, if enabled, the converter will try to change `BCHW` to `BHWC` - * `training` -- boolean, switch model to training mode (never use it) - * `verbose` -- boolean, verbose output - * `names` -- choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. \ No newline at end of file +* `model` - a PyTorch module to convert; +* `args` - list of dummy variables with proper shapes; +* `input_shapes` - list with shape tuples; +* `change_ordering` - boolean, if enabled, the converter will try to change `BCHW` to `BHWC` +* `training` - boolean, switch model to training mode (never use it) +* `verbose` - boolean, verbose output +* `names` - choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. \ No newline at end of file From 4b8aa7e9b69c4faa340f836665773d740f7019fc Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 28 Jan 2019 20:53:54 +0300 Subject: [PATCH 138/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6a46818..759051e 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.13', + version='0.1.14', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 10770a1902d92abfea4ebb9b94f07ed3fa207a54 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 28 Jan 2019 20:59:38 +0300 Subject: [PATCH 139/180] Update docs. --- README.md | 2 +- docs/getting_started.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 163c641..311a8ee 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): ``` -from converter import pytorch_to_keras +from pytorch2keras.converter import pytorch_to_keras # we should specify shape of the input tensor k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) ``` diff --git a/docs/getting_started.md b/docs/getting_started.md index 0692dbd..0818250 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -42,7 +42,7 @@ k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): ``` -from converter import pytorch_to_keras +from pytorch2keras.converter import pytorch_to_keras # we should specify shape of the input tensor k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) ``` From 198d7eee040d0eb3c5183cdf50a6350db53a5623 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 29 Jan 2019 14:08:11 +0300 Subject: [PATCH 140/180] Fix problem related to leaf indices. --- pytorch2keras/converter.py | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 380394c..4217f4a 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -140,17 +140,8 @@ def get_node_id(node): def get_leaf_id(node, state={}): - import re - try: - node_id = re.search(r"[\d\w]+ defined in", node.__str__()) - int(node_id.group(0)[:-11]) - return node_id.group(0)[:-11] - except: - if node_id.group(0)[:-11] in state: - return state[node_id.group(0)[:-11]] - else: - state[node_id.group(0)[:-11]] = str(len(state.keys())) - return str(state[node_id.group(0)[:-11]]) + return str(node.uniqueName()) + def pytorch_to_keras( model, args, input_shapes, @@ -246,8 +237,7 @@ def pytorch_to_keras( # Collect graph inputs and outputs graph_outputs = [get_leaf_id(n) for n in trace.graph().outputs()] graph_inputs = [get_leaf_id(n) for n in trace.graph().inputs()] - for i in trace.graph().inputs(): - print(i) + # Collect model state dict state_dict = _unique_state_dict(model) if verbose: @@ -263,27 +253,20 @@ def pytorch_to_keras( layers = dict() keras_inputs = [] for i in range(len(args)): - layers['input{0}'.format(i)] = keras.layers.InputLayer( + layers[graph_inputs[i]] = keras.layers.InputLayer( input_shape=input_shapes[i], name='input{0}'.format(i) ).output - keras_inputs.append(layers['input{0}'.format(i)]) + keras_inputs.append(layers[graph_inputs[i]]) outputs = [] - - input_index = 0 - model_inputs = ['input' + i for i in graph_inputs] - group_indices = defaultdict(lambda: 0, {}) for node in nodes: node_inputs = list(node.inputs()) node_input_names = [] - + for node_input in node_inputs: - if 'input{0}'.format(get_leaf_id(node_input)) in model_inputs: - node_input_names.append('input{0}'.format(get_leaf_id(node_input))) - else: - node_input_names.append(get_leaf_id(node_input)) + node_input_names.append(get_leaf_id(node_input)) node_type = node.kind() From c08ed38748c2c68e0bfad611c3d1c9421d3a9ade Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 29 Jan 2019 14:09:04 +0300 Subject: [PATCH 141/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 759051e..23928af 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.14', + version='0.1.15', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From dd961b0658b7887484e6166c63035a82aad88b2c Mon Sep 17 00:00:00 2001 From: EloiZalczer Date: Tue, 29 Jan 2019 14:23:53 +0100 Subject: [PATCH 142/180] Update conditions on pytorch versions Update conditions to support pytorch sub-versions between 0.4.1 and 1.0.0 --- pytorch2keras/converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 4217f4a..9228fa9 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -38,7 +38,7 @@ def set_training(model, mode): model.train(old_mode) -if version.parse('0.4.1') < version.parse(torch.__version__): +if version.parse('1.0.0') <= version.parse(torch.__version__): from torch._C import ListType # ONNX can't handle constants that are lists of tensors, which can @@ -59,7 +59,7 @@ def _split_tensor_list_constants(g, block): .setType(ListType.ofTensors())) node.output().replaceAllUsesWith(lc) -if version.parse('0.4.0') >= version.parse(torch.__version__): +if version.parse('1.0.0') > version.parse(torch.__version__): def _optimize_graph(graph, aten): # run dce first to eliminate dead parts of the graph that might have been # left behind by things like symbolic_override @@ -79,7 +79,7 @@ def _optimize_graph(graph, aten): return graph else: def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): - if version.parse('0.4.1') < version.parse(torch.__version__): + if version.parse('1.0.0') <= version.parse(torch.__version__): torch._C._jit_pass_remove_inplace_ops(graph) # we record now record some ops like ones/zeros # into a trace where we previously recorded constants From 73b523289e83cb84126cfb41a2c8b6632b5a20b6 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 12 Feb 2019 20:34:37 +0300 Subject: [PATCH 143/180] Fix constant layer. Fixed python2 support. --- pytorch2keras/constant_layers.py | 2 +- pytorch2keras/converter.py | 54 +++++++++++++++++--------------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/pytorch2keras/constant_layers.py b/pytorch2keras/constant_layers.py index a356cdb..cc08e62 100644 --- a/pytorch2keras/constant_layers.py +++ b/pytorch2keras/constant_layers.py @@ -28,4 +28,4 @@ def target_layer(x, value=params_list): lambda_layer = keras.layers.Lambda(target_layer) layers[scope_name + '_np'] = params_list # ad-hoc - layers[scope_name] = lambda_layer(layers['input0']) # Temporary fix for nonexistent input name created by converter.py + layers[scope_name] = lambda_layer(layers[list(layers.keys())[0]]) # Temporary fix for nonexistent input name created by converter.py diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 4217f4a..5e63bb4 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -5,6 +5,7 @@ import contextlib from packaging import version from collections import defaultdict +import six import torch import torch.jit @@ -207,32 +208,33 @@ def pytorch_to_keras( # %529 : Float(1, 512) = onnx::Reshape(%522, %536), scope: ResNet # # It's better to replace it with onnx::Flatten - from types import SimpleNamespace - seq_to_find = \ - ['onnx::Constant', 'onnx::Shape', 'onnx::Gather', - 'onnx::Constant', 'onnx::Unsqueeze', 'onnx::Unsqueeze', 'onnx::Concat', 'onnx::Reshape'] - k = 0 - s = 0 - for i, node in enumerate(nodes): - if node.kind() == seq_to_find[k]: - if k == 0: - s = i - k += 1 - if k == len(seq_to_find): - reshape_op = nodes[s + k - 1] - flatten_op = { - 'kind': (lambda: 'onnx::Flatten'), - 'attributeNames': (lambda: {}), - 'outputs': (lambda: list(reshape_op.outputs())), - 'scopeName': (lambda: reshape_op.scopeName()), - 'inputs': (lambda: list(reshape_op.inputs())[:1]), - '__str__': (lambda: reshape_op.__str__()), - } - nodes = nodes[:s] + [SimpleNamespace(**flatten_op)] + nodes[s+k:] - break - else: - k = 0 - s = -1 + if six.PY3: + from types import SimpleNamespace + seq_to_find = \ + ['onnx::Constant', 'onnx::Shape', 'onnx::Gather', + 'onnx::Constant', 'onnx::Unsqueeze', 'onnx::Unsqueeze', 'onnx::Concat', 'onnx::Reshape'] + k = 0 + s = 0 + for i, node in enumerate(nodes): + if node.kind() == seq_to_find[k]: + if k == 0: + s = i + k += 1 + if k == len(seq_to_find): + reshape_op = nodes[s + k - 1] + flatten_op = { + 'kind': (lambda: 'onnx::Flatten'), + 'attributeNames': (lambda: {}), + 'outputs': (lambda: list(reshape_op.outputs())), + 'scopeName': (lambda: reshape_op.scopeName()), + 'inputs': (lambda: list(reshape_op.inputs())[:1]), + '__str__': (lambda: reshape_op.__str__()), + } + nodes = nodes[:s] + [SimpleNamespace(**flatten_op)] + nodes[s+k:] + break + else: + k = 0 + s = -1 # Collect graph inputs and outputs graph_outputs = [get_leaf_id(n) for n in trace.graph().outputs()] From d9d88cc169920b8ee73de734b10ece4ca4045241 Mon Sep 17 00:00:00 2001 From: Elte Hupkes Date: Fri, 8 Mar 2019 14:05:44 +0100 Subject: [PATCH 144/180] Explicit Tensorflow import in lambda layer This prevents a crash when `change_ordering = True`. --- pytorch2keras/operation_layers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch2keras/operation_layers.py b/pytorch2keras/operation_layers.py index 3bbab5d..87516a3 100644 --- a/pytorch2keras/operation_layers.py +++ b/pytorch2keras/operation_layers.py @@ -140,6 +140,7 @@ def convert_clip(params, w_name, scope_name, inputs, layers, weights, names): print('Converting clip ...') def target_layer(x, vmin=params['min'], vmax=params['max']): + import tensorflow as tf return tf.clip_by_value(x, vmin, vmax) lambda_layer = keras.layers.Lambda(target_layer) From c23aaeec82f5d1d5f5876d15f7adfc18ba399da5 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 11 Mar 2019 18:06:05 +0300 Subject: [PATCH 145/180] Fixed multiple random errors with BN, upsample and reshape. --- pytorch2keras/normalization_layers.py | 16 ++++++++++++---- pytorch2keras/reshape_layers.py | 10 ++-------- pytorch2keras/upsampling_layers.py | 6 +++++- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/pytorch2keras/normalization_layers.py b/pytorch2keras/normalization_layers.py index a4d8977..55d8552 100644 --- a/pytorch2keras/normalization_layers.py +++ b/pytorch2keras/normalization_layers.py @@ -85,11 +85,19 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, na assert(len(inputs) == 3) + bias_name = '{0}.bias'.format(w_name) + weights_name = '{0}.weight'.format(w_name) + # Use previously taken constants - assert(inputs[-2] + '_np' in layers) - assert(inputs[-1] + '_np' in layers) - gamma = layers[inputs[-2] + '_np'] - beta = layers[inputs[-1] + '_np'] + if inputs[-2] + '_np' in layers: + gamma = layers[inputs[-2] + '_np'] + else: + gamma = weights[weights_name].numpy() + + if inputs[-1] + '_np' in layers: + beta = layers[inputs[-1] + '_np'] + else: + beta = weights[bias_name].numpy() def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): layer = tf.contrib.layers.instance_norm( diff --git a/pytorch2keras/reshape_layers.py b/pytorch2keras/reshape_layers.py index eb8605b..83a8efb 100644 --- a/pytorch2keras/reshape_layers.py +++ b/pytorch2keras/reshape_layers.py @@ -86,14 +86,8 @@ def convert_reshape(params, w_name, scope_name, inputs, layers, weights, names): if layers[inputs[1]][0] == -1: print('Cannot deduct batch size! It will be omitted, but result may be wrong.') - def target_layer(x, shape=layers[inputs[1]]): - import tensorflow as tf - return tf.reshape(x, shape) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - # layers[scope_name] = reshape(layers[inputs[0]]) + reshape = keras.layers.Reshape(layers[inputs[1] + '_np'], name=tf_name) + layers[scope_name] = reshape(layers[inputs[0]]) else: if inputs[0] in layers: reshape = keras.layers.Reshape(params['shape'][1:], name=tf_name) diff --git a/pytorch2keras/upsampling_layers.py b/pytorch2keras/upsampling_layers.py index 42fa002..1375299 100644 --- a/pytorch2keras/upsampling_layers.py +++ b/pytorch2keras/upsampling_layers.py @@ -67,7 +67,11 @@ def convert_upsample(params, w_name, scope_name, inputs, layers, weights, names) else: tf_name = w_name + str(random.random()) - scale = (params['height_scale'], params['width_scale']) + if 'height_scale' in params: + scale = (params['height_scale'], params['width_scale']) + elif len(inputs) == 2: + scale = layers[inputs[-1] + '_np'][-2:] + upsampling = keras.layers.UpSampling2D( size=scale, name=tf_name ) From 1e635e879475a5b97c1b8babce98f381687665c9 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 11 Mar 2019 18:07:38 +0300 Subject: [PATCH 146/180] Update PyPi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 23928af..74dcf29 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.15', + version='0.1.16', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 62c58e0a7d8dc021802730d6c6d8241f2e54c3c3 Mon Sep 17 00:00:00 2001 From: Elte Hupkes Date: Wed, 13 Mar 2019 15:15:32 +0100 Subject: [PATCH 147/180] Using DepthwiseConv2D rather than separable conv with noop pointwise conv --- pytorch2keras/convolution_layers.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/pytorch2keras/convolution_layers.py b/pytorch2keras/convolution_layers.py index 3fb04a8..8c614df 100644 --- a/pytorch2keras/convolution_layers.py +++ b/pytorch2keras/convolution_layers.py @@ -98,26 +98,21 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): biases = None has_bias = False - # We are just doing depthwise conv, so make the pointwise a no-op - pointwise_wt = np.expand_dims(np.expand_dims(np.identity(out_channels), 0), 0) W = W.transpose(0, 1, 3, 2) if has_bias: - weights = [W, pointwise_wt, biases] + weights = [W, biases] else: - weights = [W, pointwise_wt] + weights = [W] - conv = keras.layers.SeparableConv2D( - filters=out_channels, - depth_multiplier=1, + conv = keras.layers.DepthwiseConv2D( kernel_size=(height, width), strides=(params['strides'][0], params['strides'][1]), padding='valid', - weights=weights, use_bias=has_bias, activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name + depth_multiplier=1, + weights = weights, + bias_initializer='zeros', kernel_initializer='zeros' ) layers[scope_name] = conv(layers[input_name]) From acbda031d6f002cec727b10aeab27bfc7a5fd383 Mon Sep 17 00:00:00 2001 From: Elte Hupkes Date: Wed, 13 Mar 2019 15:34:10 +0100 Subject: [PATCH 148/180] Adding dilation rate parameter --- pytorch2keras/convolution_layers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch2keras/convolution_layers.py b/pytorch2keras/convolution_layers.py index 8c614df..1d92785 100644 --- a/pytorch2keras/convolution_layers.py +++ b/pytorch2keras/convolution_layers.py @@ -112,6 +112,7 @@ def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): activation=None, depth_multiplier=1, weights = weights, + dilation_rate=params['dilations'][0], bias_initializer='zeros', kernel_initializer='zeros' ) layers[scope_name] = conv(layers[input_name]) From 13f241421009f93b91126c6cc09852193dd1f321 Mon Sep 17 00:00:00 2001 From: Elte Hupkes Date: Thu, 14 Mar 2019 11:41:18 +0100 Subject: [PATCH 149/180] Converting clip with a min value of 0 as a ReLU --- pytorch2keras/operation_layers.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pytorch2keras/operation_layers.py b/pytorch2keras/operation_layers.py index 87516a3..c29fdbc 100644 --- a/pytorch2keras/operation_layers.py +++ b/pytorch2keras/operation_layers.py @@ -139,9 +139,13 @@ def convert_clip(params, w_name, scope_name, inputs, layers, weights, names): """ print('Converting clip ...') - def target_layer(x, vmin=params['min'], vmax=params['max']): - import tensorflow as tf - return tf.clip_by_value(x, vmin, vmax) + if params['min'] == 0: + print("using ReLU({0})".format(params['max'])) + layer = keras.layers.ReLU(max_value=params['max']) + else: + def target_layer(x, vmin=params['min'], vmax=params['max']): + import tensorflow as tf + return tf.clip_by_value(x, vmin, vmax) + layer = keras.layers.Lambda(target_layer) - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) + layers[scope_name] = layer(layers[inputs[0]]) From 8290e65668a627f85a7b95941f787ffbc9f7e689 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 9 May 2019 12:10:20 +0300 Subject: [PATCH 150/180] Update docs. --- docs/getting_started.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/getting_started.md b/docs/getting_started.md index 0818250..28bcd61 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -5,6 +5,9 @@ It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. Firstly, we need to load (or create) a valid PyTorch model: ``` +import torch +import torch.nn as nn + class TestConv2d(nn.Module): """ Module for Conv2d testing @@ -27,6 +30,9 @@ model = TestConv2d() The next step - create a dummy variable with correct shape: ``` +from torch.autograd import Variable +import numpy as np + input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) input_var = Variable(torch.FloatTensor(input_np)) ``` From ac4455772c29902ccd37e014b5d46ca65b51872d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 9 May 2019 12:14:13 +0300 Subject: [PATCH 151/180] Update readthedocs.yml. --- readthedocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs.yml b/readthedocs.yml index eba6050..8032bcb 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -4,6 +4,6 @@ build: image: latest python: - setup_py_install: true + setup_py_install: true requirements_file: docs/requirements.txt \ No newline at end of file From 776ac6b5c7fa8536bfada3d1becab2e622b979d8 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 9 May 2019 12:17:36 +0300 Subject: [PATCH 152/180] Try not no install all packages. --- readthedocs.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/readthedocs.yml b/readthedocs.yml index 8032bcb..f52cac7 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -3,7 +3,4 @@ build: image: latest -python: - setup_py_install: true - requirements_file: docs/requirements.txt \ No newline at end of file From ef8ecc0a7f13536af3c51aea72434e7f8b700b8d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 13 May 2019 12:30:54 +0300 Subject: [PATCH 153/180] Update softmax converter. --- pytorch2keras/activation_layers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pytorch2keras/activation_layers.py b/pytorch2keras/activation_layers.py index cdf8604..ebf8b8d 100644 --- a/pytorch2keras/activation_layers.py +++ b/pytorch2keras/activation_layers.py @@ -107,7 +107,15 @@ def convert_softmax(params, w_name, scope_name, inputs, layers, weights, names): else: tf_name = w_name + str(random.random()) - def target_layer(x, dim=params['dim']): + if 'axis' in params: + axis = params['axis'] + if 'value' in params: + axis = params['value'].item() + else: + if len(inputs) > 1: + axis = layers[inputs[1] + '_np'] + + def target_layer(x, dim=axis): import keras return keras.activations.softmax(x, axis=dim) From 3fcf8709e40fa9c67663a5ca584511c945f91be0 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Mon, 13 May 2019 12:32:38 +0300 Subject: [PATCH 154/180] Update PyPi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 74dcf29..f54ce23 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.16', + version='0.1.17', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 75e5d119e37c9758867c3fc2be3218c2a1e0c185 Mon Sep 17 00:00:00 2001 From: Vent1narc Date: Mon, 10 Jun 2019 14:03:54 +0300 Subject: [PATCH 155/180] onnx:ELU support --- .DS_Store | Bin 0 -> 6148 bytes pytorch2keras/activation_layers.py | 26 ++++++++++++++++++++++++++ pytorch2keras/layers.py | 3 ++- 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..1908cae69118dd32034af763652ceab892450ad5 GIT binary patch literal 6148 zcmeHKL2lDP6#dht#7R^&3#4}G4HAo}N+@bU2q{eyq)31eu3!PEwQH-fn%E(B8cGPt zhC2W^;3%AcgKz+Ne>|yTCq=9fg89<;k7wQ+&wumn@eq+{4BZ-0nTQM&_PHW1ON{%u zEZB-_VY1*c+THNP>4pyOxlNl^Kr8TfD5 z{Uw7gUZQ=B9#BXF^3oi!nc_CGtQ@%*{e+?n?t2E8r-#(0 z18UJ@vZ$3t6G!`TF4_`3qJ1FS^Z*Qt4lx^_f((nJo@W&?-?Q0#u9!bZJVSTDS?I^oEQ?RXe9 z?7<7Zdt%2ON7WC*uzOjzooBu`Ix1g32!q)618*S7zUu+<^0^Z6SV@O` z&{277Jl@&8TQO^Qs*{R2-rK8I%-cJ=lS#qYyivQ~KJHz-9lx8r|Ag>h0-K4n4drL< zGb+Cj>)aXy-7q)>ibp8WQ!Ly*HL;K_EJcsp^x_Flka;;bI7N|sE*7JMno6tX%g6~M zjJbpfk%#_SVX<4Uk(+X$>{%ZyJ`(UE5uPZ-Iy!>?FB98#@IW$W{UHWSA^t$ zTju@0b@ll_P11K-0j*jtM+;2<2uK<9MJw<}75E7YoD^ID literal 0 HcmV?d00001 diff --git a/pytorch2keras/activation_layers.py b/pytorch2keras/activation_layers.py index ebf8b8d..251dc7a 100644 --- a/pytorch2keras/activation_layers.py +++ b/pytorch2keras/activation_layers.py @@ -6,6 +6,32 @@ from .common import random_string +def convert_elu(params, w_name, scope_name, inputs, layers, weights, names): + """ + Convert elu layer. + + Args: + params: dictionary with layer parameters + w_name: name prefix in state_dict + scope_name: pytorch scope name + inputs: pytorch node inputs + layers: dictionary with keras tensors + weights: pytorch state_dict + names: use short names for keras layers + """ + print('Converting elu ...') + + if names == 'short': + tf_name = 'ELU' + random_string(4) + elif names == 'keep': + tf_name = w_name + else: + tf_name = w_name + str(random.random()) + + elu = keras.layers.Activation('elu', name=tf_name) + layers[scope_name] = elu(layers[inputs[0]]) + + def convert_relu(params, w_name, scope_name, inputs, layers, weights, names): """ Convert relu layer. diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py index 9dea97f..cb437b1 100644 --- a/pytorch2keras/layers.py +++ b/pytorch2keras/layers.py @@ -11,7 +11,7 @@ from .elementwise_layers import convert_elementwise_add, convert_elementwise_mul, \ convert_elementwise_div, convert_elementwise_sub from .activation_layers import convert_relu, convert_lrelu, convert_selu, \ - convert_softmax, convert_sigmoid, convert_tanh, convert_hardtanh + convert_softmax, convert_sigmoid, convert_tanh, convert_hardtanh, convert_elu from .pooling_layers import convert_avgpool, convert_maxpool, convert_maxpool3, \ convert_adaptive_avg_pool2d, convert_adaptive_max_pool2d from .normalization_layers import convert_batchnorm, convert_instancenorm, convert_dropout @@ -45,6 +45,7 @@ 'onnx::Sub': convert_elementwise_sub, 'onnx::Sum': convert_sum, 'onnx::Concat': convert_concat, + 'onnx::Elu': convert_elu, 'onnx::Relu': convert_relu, 'onnx::LeakyRelu': convert_lrelu, 'onnx::Sigmoid': convert_sigmoid, From afde59a5c98ceeac301dd188c3055274e46be9bf Mon Sep 17 00:00:00 2001 From: Alexander Vorobyev <33327811+Vent1narc@users.noreply.github.com> Date: Mon, 10 Jun 2019 14:05:28 +0300 Subject: [PATCH 156/180] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 311a8ee..711abcb 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Readthedocs](https://img.shields.io/readthedocs/pytorch2keras.svg)](https://pytorch2keras.readthedocs.io/en/latest/) -PyTorch to Keras model converter. +PyTorch to Keras model converter. With support for ELU Activation Layer. ## Installation @@ -207,6 +207,7 @@ Options: ## Supported layers * Activations: + + ELU + ReLU + LeakyReLU + SELU @@ -319,4 +320,4 @@ Options: Look at the `tests` directory. ## License -This software is covered by MIT License. \ No newline at end of file +This software is covered by MIT License. From 4399f7f109cebcb82d4097c14f3f92733b27dd78 Mon Sep 17 00:00:00 2001 From: Vent1narc Date: Thu, 13 Jun 2019 12:57:27 +0300 Subject: [PATCH 157/180] Converted list to NumPy array --- .DS_Store | Bin 6148 -> 6148 bytes pytorch2keras/convolution_layers.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index 1908cae69118dd32034af763652ceab892450ad5..a23027d6301f4d83dac6920ae50e734b87e3697b 100644 GIT binary patch delta 21 ccmZoMXffDO%*J72Y^bAPWNfy%nyph107O#-=Kufz delta 21 ccmZoMXffDO%*J7CW~rlKXkoCqnyph107SV3?*IS* diff --git a/pytorch2keras/convolution_layers.py b/pytorch2keras/convolution_layers.py index 1d92785..3ccc138 100644 --- a/pytorch2keras/convolution_layers.py +++ b/pytorch2keras/convolution_layers.py @@ -133,7 +133,7 @@ def convolve_lambda(i, k): return tf.nn.conv2d(i, k, strides=[1, stride_y, stride_x, 1], padding='VALID') input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) - weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W.transpose(0, 1, 2, 3)) + weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=np.array(W, dtype=np.float32).transpose(0, 1, 2, 3)) output_groups = [convolve_lambda(i, k) for i, k in zip(input_groups, weight_groups)] layer = tf.concat(axis=3, values=output_groups) From 83a8c58b2f9f0c705a8f4659a8152da8b6032085 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 13 Jun 2019 15:40:21 +0300 Subject: [PATCH 158/180] Fix problem with normalization layer lambda. --- pytorch2keras/normalization_layers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch2keras/normalization_layers.py b/pytorch2keras/normalization_layers.py index 55d8552..315fe3a 100644 --- a/pytorch2keras/normalization_layers.py +++ b/pytorch2keras/normalization_layers.py @@ -100,6 +100,7 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, na beta = weights[bias_name].numpy() def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): + import tensorflow as tf layer = tf.contrib.layers.instance_norm( x, param_initializers={'beta': tf.constant_initializer(beta), 'gamma': tf.constant_initializer(gamma)}, From a833a11ef74eb607456c6ca298d56c41ea07d126 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 13 Jun 2019 15:41:53 +0300 Subject: [PATCH 159/180] Update PyPi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f54ce23..1e049d7 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.17', + version='0.1.18', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 89100423e9e5e236404ef059468e565258c8142b Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 13 Jun 2019 17:10:15 +0300 Subject: [PATCH 160/180] Fix channel ordering in runtime. --- pytorch2keras/normalization_layers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pytorch2keras/normalization_layers.py b/pytorch2keras/normalization_layers.py index 315fe3a..cac48aa 100644 --- a/pytorch2keras/normalization_layers.py +++ b/pytorch2keras/normalization_layers.py @@ -101,10 +101,13 @@ def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, na def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): import tensorflow as tf + from keras import backend as K + data_format = 'NCHW' if K.image_data_format() == 'channels_first' else 'NHWC' + layer = tf.contrib.layers.instance_norm( x, param_initializers={'beta': tf.constant_initializer(beta), 'gamma': tf.constant_initializer(gamma)}, - epsilon=epsilon, data_format='NCHW', + epsilon=epsilon, data_format=data_format, trainable=False ) return layer From c76ba52317c63dfa7e136c1300f384d86a63fa18 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 13 Jun 2019 17:10:47 +0300 Subject: [PATCH 161/180] Update PyPi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1e049d7..1023143 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.18', + version='0.1.19', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 57dfde2fa6585f2283dc42248eedd9ef7f986d94 Mon Sep 17 00:00:00 2001 From: Alexander Vorobyev <33327811+Vent1narc@users.noreply.github.com> Date: Thu, 13 Jun 2019 17:32:33 +0300 Subject: [PATCH 162/180] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 711abcb..6b85aef 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Readthedocs](https://img.shields.io/readthedocs/pytorch2keras.svg)](https://pytorch2keras.readthedocs.io/en/latest/) -PyTorch to Keras model converter. With support for ELU Activation Layer. +PyTorch to Keras model converter. ## Installation From 324b5290006da65dd6c4f811318835497d204f0d Mon Sep 17 00:00:00 2001 From: Alexander Vorobyev <33327811+Vent1narc@users.noreply.github.com> Date: Thu, 13 Jun 2019 17:32:54 +0300 Subject: [PATCH 163/180] Delete .DS_Store --- .DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index a23027d6301f4d83dac6920ae50e734b87e3697b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKL2lDP6#dht#7R^&3#4}G4HAo}O4^Dp5K@{ZNRa>`T)_fRYsXe&k=P-28cGPt zhC2W^;3%AcgKz+Ne>|yTCq=9fg89<;k7wQ+&wumn@eq+{jNBSgnTQM&_PHW1D~$WO zEZLfBVY1*c+P$#j^g;*s+@?({pcVML6_77J?b12<6ldPwyO{dzAJSwH`8kfdG$JR1 z{*pl#uh2e54=AJ|d1;Q=&TvZL#K75ff%!zZyQDt#GOQ07R*qbZ-k~Ui`<}t&=>fIr zm|FCRENZ3E#L<3Sh_*xz=?KU+-3P;>6U@e^Aj9IQ7g#J5kE09)TJ-IFO z{@=R({GTT2JFS3L;Ga@J Date: Thu, 27 Jun 2019 11:32:24 +0300 Subject: [PATCH 164/180] Remove converter functions. Make a reference to onnx2keras. --- pytorch2keras/activation_layers.py | 197 -------------- pytorch2keras/common.py | 11 - pytorch2keras/constant_layers.py | 31 --- pytorch2keras/converter.py | 364 +++----------------------- pytorch2keras/convolution_layers.py | 297 --------------------- pytorch2keras/elementwise_layers.py | 156 ----------- pytorch2keras/embedding_layers.py | 42 --- pytorch2keras/layers.py | 74 ------ pytorch2keras/linear_layers.py | 101 ------- pytorch2keras/normalization_layers.py | 142 ---------- pytorch2keras/operation_layers.py | 151 ----------- pytorch2keras/padding_layers.py | 52 ---- pytorch2keras/pooling_layers.py | 264 ------------------- pytorch2keras/reshape_layers.py | 174 ------------ pytorch2keras/upsampling_layers.py | 78 ------ requirements.txt | 3 +- setup.py | 2 +- 17 files changed, 36 insertions(+), 2103 deletions(-) delete mode 100644 pytorch2keras/activation_layers.py delete mode 100644 pytorch2keras/common.py delete mode 100644 pytorch2keras/constant_layers.py delete mode 100644 pytorch2keras/convolution_layers.py delete mode 100644 pytorch2keras/elementwise_layers.py delete mode 100644 pytorch2keras/embedding_layers.py delete mode 100644 pytorch2keras/layers.py delete mode 100644 pytorch2keras/linear_layers.py delete mode 100644 pytorch2keras/normalization_layers.py delete mode 100644 pytorch2keras/operation_layers.py delete mode 100644 pytorch2keras/padding_layers.py delete mode 100644 pytorch2keras/pooling_layers.py delete mode 100644 pytorch2keras/reshape_layers.py delete mode 100644 pytorch2keras/upsampling_layers.py diff --git a/pytorch2keras/activation_layers.py b/pytorch2keras/activation_layers.py deleted file mode 100644 index ebf8b8d..0000000 --- a/pytorch2keras/activation_layers.py +++ /dev/null @@ -1,197 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_relu(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert relu layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting relu ...') - - if names == 'short': - tf_name = 'RELU' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - relu = keras.layers.Activation('relu', name=tf_name) - layers[scope_name] = relu(layers[inputs[0]]) - - -def convert_lrelu(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert leaky relu layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting lrelu ...') - - if names == 'short': - tf_name = 'lRELU' + random_string(3) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - leakyrelu = \ - keras.layers.LeakyReLU(alpha=params['alpha'], name=tf_name) - layers[scope_name] = leakyrelu(layers[inputs[0]]) - - -def convert_sigmoid(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert sigmoid layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting sigmoid ...') - - if names == 'short': - tf_name = 'SIGM' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - sigmoid = keras.layers.Activation('sigmoid', name=tf_name) - layers[scope_name] = sigmoid(layers[inputs[0]]) - - -def convert_softmax(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert softmax layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting softmax ...') - - if names == 'short': - tf_name = 'SMAX' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if 'axis' in params: - axis = params['axis'] - if 'value' in params: - axis = params['value'].item() - else: - if len(inputs) > 1: - axis = layers[inputs[1] + '_np'] - - def target_layer(x, dim=axis): - import keras - return keras.activations.softmax(x, axis=dim) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_tanh(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert tanh layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting tanh ...') - - if names == 'short': - tf_name = 'TANH' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - tanh = keras.layers.Activation('tanh', name=tf_name) - layers[scope_name] = tanh(layers[inputs[0]]) - - -def convert_hardtanh(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert hardtanh layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting hardtanh (clip) ...') - - def target_layer(x, max_val=float(params['max_val']), min_val=float(params['min_val'])): - return tf.minimum(max_val, tf.maximum(min_val, x)) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_selu(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert selu layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting selu ...') - - if names == 'short': - tf_name = 'SELU' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - selu = keras.layers.Activation('selu', name=tf_name) - layers[scope_name] = selu(layers[inputs[0]]) diff --git a/pytorch2keras/common.py b/pytorch2keras/common.py deleted file mode 100644 index 178ab8d..0000000 --- a/pytorch2keras/common.py +++ /dev/null @@ -1,11 +0,0 @@ -import random -import string - - -def random_string(length): - """ - Generate a random string for the layer name. - :param length: a length of required random string - :return: generated random string - """ - return ''.join(random.choice(string.ascii_letters) for _ in range(length)) \ No newline at end of file diff --git a/pytorch2keras/constant_layers.py b/pytorch2keras/constant_layers.py deleted file mode 100644 index cc08e62..0000000 --- a/pytorch2keras/constant_layers.py +++ /dev/null @@ -1,31 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_constant(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert constant layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting constant ...') - - params_list = params['value'].numpy() - - def target_layer(x, value=params_list): - return tf.constant(value.tolist(), shape=value.shape) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name + '_np'] = params_list # ad-hoc - layers[scope_name] = lambda_layer(layers[list(layers.keys())[0]]) # Temporary fix for nonexistent input name created by converter.py diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index e8132c1..f02d8d3 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -1,162 +1,26 @@ """ -The PyTorch2Keras converter module over JIT-trace. +The PyTorch2Keras converter interface """ -import contextlib -from packaging import version -from collections import defaultdict -import six - +from onnx2keras import onnx_to_keras import torch -import torch.jit -import torch.autograd -import torch.serialization -from torch.jit import _unique_state_dict - -if version.parse('0.4.1') <= version.parse(torch.__version__): - from torch.onnx import OperatorExportTypes - -from .layers import AVAILABLE_CONVERTERS - - -@contextlib.contextmanager -def set_training(model, mode): - """ - A context manager to temporarily set the training mode of 'model' - to 'mode', resetting it when we exit the with-block. A no-op if - mode is None. - """ - if mode is None: - yield - return - old_mode = model.training - if old_mode != mode: - model.train(mode) - try: - yield - finally: - if old_mode != mode: - model.train(old_mode) - - -if version.parse('1.0.0') <= version.parse(torch.__version__): - from torch._C import ListType - - # ONNX can't handle constants that are lists of tensors, which can - # get generated in constant prop. So we split them back into prim::ListConstructs - def _split_tensor_list_constants(g, block): - for node in block.nodes(): - for subblock in node.blocks(): - _split_tensor_list_constants(g, subblock) - if node.kind() == "prim::Constant": - output_type = node.output().type() - if output_type.isSubtypeOf(ListType.ofTensors()): - inputs = [g.create("prim::Constant").t_('value', t) - .insertBefore(node).output() - for t in node['value']] - lc = (g.create("prim::ListConstruct", inputs) - .insertBefore(node) - .output() - .setType(ListType.ofTensors())) - node.output().replaceAllUsesWith(lc) - -if version.parse('1.0.0') > version.parse(torch.__version__): - def _optimize_graph(graph, aten): - # run dce first to eliminate dead parts of the graph that might have been - # left behind by things like symbolic_override - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - - torch._C._jit_pass_peephole(graph) - torch._C._jit_pass_lint(graph) - graph = torch._C._jit_pass_onnx(graph, aten) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_onnx_peephole(graph) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - graph = torch._C._jit_pass_canonicalize(graph) - torch._C._jit_pass_lint(graph) - return graph -else: - def _optimize_graph(graph, operator_export_type=OperatorExportTypes.RAW): - if version.parse('1.0.0') <= version.parse(torch.__version__): - torch._C._jit_pass_remove_inplace_ops(graph) - # we record now record some ops like ones/zeros - # into a trace where we previously recorded constants - # use constant prop to maintain our current level of onnx support - # without implementing symbolics for all of them - torch._C._jit_pass_constant_propagation(graph) - # _split_tensor_list_constants(graph, graph) - # run dce to eliminate dead parts of the graph that might have been - # left behind by things like symbolic_override - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_canonicalize_ops(graph) - torch._C._jit_pass_lint(graph) - - torch._C._jit_pass_peephole(graph, True) - torch._C._jit_pass_lint(graph) - - # onnx only supports tensors, but 1 / 2 = 0.5 and tensor(1) / tensor(2) = 0 - torch._C._jit_pass_prepare_division_for_onnx(graph) - # onnx only supports tensors, so we turn all out number types into tensors - torch._C._jit_pass_erase_number_types(graph) - # onnx does not support tuples, so try to remove them - torch._C._jit_pass_lower_all_tuples(graph) - # torch._C._jit_pass_peephole(graph, True) - torch._C._jit_pass_lint(graph) - - if operator_export_type != OperatorExportTypes.RAW: - graph = torch._C._jit_pass_onnx(graph, operator_export_type) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_onnx_peephole(graph) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - else: - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - - torch._C._jit_pass_peephole(graph) - torch._C._jit_pass_lint(graph) - - # torch._C._jit_pass_peephole(graph, True) - torch._C._jit_pass_lint(graph) - - if operator_export_type != OperatorExportTypes.RAW: - graph = torch._C._jit_pass_onnx(graph, operator_export_type) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_onnx_peephole(graph) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - return graph - - -def get_node_id(node): - import re - node_id = re.search(r"[\d]+", node.__str__()) - return node_id.group(0) - - -def get_leaf_id(node, state={}): - return str(node.uniqueName()) +import onnx +import io +import logging def pytorch_to_keras( - model, args, input_shapes, - change_ordering=False, training=False, verbose=False, names=False, + model, args, input_shapes=None, + change_ordering=False, verbose=False, names=None, ): """ - By given pytorch model convert layers with specified convertors. + By given PyTorch model convert layers with ONNX. Args: model: pytorch model args: pytorch model arguments input_shapes: keras input shapes (using for each InputLayer) change_ordering: change CHW to HWC - training: switch model to training mode verbose: verbose output names: use short names, use random-suffix or keep original names for keras layers @@ -164,205 +28,43 @@ def pytorch_to_keras( model: created keras model. """ - # PyTorch JIT tracing - if isinstance(args, torch.autograd.Variable): - args = (args, ) - - # Workaround for previous versions - if isinstance(input_shapes, tuple): - input_shapes = [input_shapes] - - orig_state_dict_keys = _unique_state_dict(model).keys() - - with set_training(model, training): - trace, torch_out = torch.jit.get_trace_graph(model, tuple(args)) - - if orig_state_dict_keys != _unique_state_dict(model).keys(): - raise RuntimeError("state_dict changed after running the tracer; " - "something weird is happening in your model!") - - # _optimize_trace(trace, False) - if version.parse('0.4.0') < version.parse(torch.__version__): - trace.set_graph(_optimize_graph(trace.graph(), OperatorExportTypes.ONNX)) - else: - trace.set_graph(_optimize_graph(trace.graph(), False)) - - trace.graph().lint() - - if verbose: - print(trace.graph()) - - # Get all graph nodes - nodes = list(trace.graph().nodes()) + logger = logging.getLogger('pytorch2keras') - # Optimize Flatten: - # When we have something loke that: - # - # %523 : Long() = onnx::Constant[value={0}](), scope: ResNet - # %524 : Dynamic = onnx::Shape(%522), scope: ResNet - # %526 : Long() = onnx::Gather[axis=0](%524, %523), scope: ResNet - # %527 : Long() = onnx::Constant[value={-1}](), scope: ResNet - # %534 : Dynamic = onnx::Unsqueeze[axes=[0]](%526) - # %535 : Dynamic = onnx::Unsqueeze[axes=[0]](%527) - # %536 : Dynamic = onnx::Concat[axis=0](%534, %535) - # %529 : Float(1, 512) = onnx::Reshape(%522, %536), scope: ResNet - # - # It's better to replace it with onnx::Flatten - if six.PY3: - from types import SimpleNamespace - seq_to_find = \ - ['onnx::Constant', 'onnx::Shape', 'onnx::Gather', - 'onnx::Constant', 'onnx::Unsqueeze', 'onnx::Unsqueeze', 'onnx::Concat', 'onnx::Reshape'] - k = 0 - s = 0 - for i, node in enumerate(nodes): - if node.kind() == seq_to_find[k]: - if k == 0: - s = i - k += 1 - if k == len(seq_to_find): - reshape_op = nodes[s + k - 1] - flatten_op = { - 'kind': (lambda: 'onnx::Flatten'), - 'attributeNames': (lambda: {}), - 'outputs': (lambda: list(reshape_op.outputs())), - 'scopeName': (lambda: reshape_op.scopeName()), - 'inputs': (lambda: list(reshape_op.inputs())[:1]), - '__str__': (lambda: reshape_op.__str__()), - } - nodes = nodes[:s] + [SimpleNamespace(**flatten_op)] + nodes[s+k:] - break - else: - k = 0 - s = -1 - - # Collect graph inputs and outputs - graph_outputs = [get_leaf_id(n) for n in trace.graph().outputs()] - graph_inputs = [get_leaf_id(n) for n in trace.graph().inputs()] - - # Collect model state dict - state_dict = _unique_state_dict(model) if verbose: - print('Graph inputs:', graph_inputs) - print('Graph outputs:', graph_outputs) - print('State dict:', list(state_dict)) - - import re - import keras - from keras import backend as K - K.set_image_data_format('channels_first') - - layers = dict() - keras_inputs = [] - for i in range(len(args)): - layers[graph_inputs[i]] = keras.layers.InputLayer( - input_shape=input_shapes[i], name='input{0}'.format(i) - ).output - keras_inputs.append(layers[graph_inputs[i]]) - - outputs = [] - group_indices = defaultdict(lambda: 0, {}) - - for node in nodes: - node_inputs = list(node.inputs()) - node_input_names = [] - - for node_input in node_inputs: - node_input_names.append(get_leaf_id(node_input)) - - node_type = node.kind() - - node_scope_name = node.scopeName() - node_id = get_node_id(node) - node_name_regex = re.findall(r'\[([\w\d.\-\[\]\s]+)\]', node_scope_name) - - try: - int(node_name_regex[-1]) - node_weigth_group_name = '.'.join( - node_name_regex[:-1] - ) - node_weights_name = node_weigth_group_name + '.' + str(group_indices[node_weigth_group_name]) - group_indices[node_weigth_group_name] += 1 + logging.basicConfig(level=logging.DEBUG) - except ValueError: - node_weights_name = '.'.join( - node_name_regex - ) - except IndexError: - node_weights_name = '.'.join(node_input_names) + logger.info('Converter is called.') - node_attrs = {k: node[k] for k in node.attributeNames()} + if names: + logger.warning('Name policy isn\'t supported now.') - node_outputs = list(node.outputs()) - node_outputs_names = [] - for node_output in node_outputs: - if node_output.node().scopeName(): - node_outputs_names.append(node_output.node().scopeName()) + if input_shapes: + logger.warning('Custom shapes isn\'t supported now.') - if verbose: - print(' ____ ') - print('graph node:', node_scope_name) - print('node id:', node_id) - print('type:', node_type) - print('inputs:', node_input_names) - print('outputs:', node_outputs_names) - print('name in state_dict:', node_weights_name) - print('attrs:', node_attrs) - print('is_terminal:', node_id in graph_outputs) - AVAILABLE_CONVERTERS[node_type]( - node_attrs, - node_weights_name, node_id, - node_input_names, - layers, state_dict, - names - ) - if node_id in graph_outputs: - outputs.append(layers[node_id]) + if not isinstance(args, list): + args = [args] - model = keras.models.Model(inputs=keras_inputs, outputs=outputs) + args = tuple(args) - if change_ordering: - import numpy as np - conf = model.get_config() + dummy_output = model(*args) - for layer in conf['layers']: - if layer['config'] and 'batch_input_shape' in layer['config']: - layer['config']['batch_input_shape'] = \ - tuple(np.reshape(np.array( - [ - [None] + - list(layer['config']['batch_input_shape'][2:][:]) + - [layer['config']['batch_input_shape'][1]] - ]), -1 - )) - if layer['config'] and 'target_shape' in layer['config']: - if len(list(layer['config']['target_shape'][1:][:])) > 0: - layer['config']['target_shape'] = \ - tuple(np.reshape(np.array( - [ - list(layer['config']['target_shape'][1:][:]), - layer['config']['target_shape'][0] - ]), -1 - ),) + if isinstance(dummy_output, torch.autograd.Variable): + dummy_output = [dummy_output] - if layer['config'] and 'data_format' in layer['config']: - layer['config']['data_format'] = 'channels_last' - if layer['config'] and 'axis' in layer['config']: - layer['config']['axis'] = 3 + input_names = ['input_{0}'.format(i) for i in range(len(args))] + output_names = ['output_{0}'.format(i) for i in range(len(dummy_output))] - K.set_image_data_format('channels_last') - model_tf_ordering = keras.models.Model.from_config(conf) + logger.debug('Input_names:') + logger.debug(input_names) - # from keras.utils.layer_utils import convert_all_kernels_in_model - # convert_all_kernels_in_model(model) + logger.debug('Output_names:') + logger.debug(output_names) - for dst_layer, src_layer in zip( - model_tf_ordering.layers, model.layers - ): - dst_layer.set_weights(src_layer.get_weights()) + stream = io.BytesIO() + torch.onnx.export(model, args, stream, verbose=verbose, input_names=input_names, output_names=output_names) - model = model_tf_ordering + stream.seek(0) + onnx_model = onnx.load(stream) + k_model = onnx_to_keras(onnx_model, input_names, verbose=verbose, change_ordering=change_ordering) - print('Your model was (probably) successfully converted! ' - 'Please, follow the repository https://github.com/nerox8664/pytorch2keras and give a star :)') - return model + return k_model diff --git a/pytorch2keras/convolution_layers.py b/pytorch2keras/convolution_layers.py deleted file mode 100644 index 1d92785..0000000 --- a/pytorch2keras/convolution_layers.py +++ /dev/null @@ -1,297 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_conv(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert convolution layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting convolution ...') - - if names == 'short': - tf_name = 'C' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - input_name = inputs[0] - - if len(weights[weights_name].numpy().shape) == 5: # 3D conv - W = weights[weights_name].numpy().transpose(2, 3, 4, 1, 0) - height, width, channels, n_layers, n_filters = W.shape - - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - if params['pads'][0] > 0 or params['pads'][1] > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding3D( - padding=(params['pads'][0], - params['pads'][1], - params['pads'][2]), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[input_name]) - input_name = padding_name - - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.Conv3D( - filters=n_filters, - kernel_size=(channels, height, width), - strides=(params['strides'][0], - params['strides'][1], - params['strides'][2]), - padding='valid', - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - layers[scope_name] = conv(layers[input_name]) - - elif len(weights[weights_name].numpy().shape) == 4: # 2D conv - if params['pads'][0] > 0 or params['pads'][1] > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(params['pads'][0], params['pads'][1]), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[input_name]) - input_name = padding_name - - W = weights[weights_name].numpy().transpose(2, 3, 1, 0) - height, width, channels_per_group, out_channels = W.shape - n_groups = params['group'] - in_channels = channels_per_group * n_groups - - if n_groups == in_channels and n_groups != 1: - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - W = W.transpose(0, 1, 3, 2) - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.DepthwiseConv2D( - kernel_size=(height, width), - strides=(params['strides'][0], params['strides'][1]), - padding='valid', - use_bias=has_bias, - activation=None, - depth_multiplier=1, - weights = weights, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros' - ) - layers[scope_name] = conv(layers[input_name]) - - elif n_groups != 1: - # Example from https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html - # # Split input and weights and convolve them separately - # input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) - # weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights) - # output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)] - - # # Concat the convolved output together again - # conv = tf.concat(axis=3, values=output_groups) - def target_layer(x, groups=params['group'], stride_y=params['strides'][0], stride_x=params['strides'][1]): - x = tf.transpose(x, [0, 2, 3, 1]) - - def convolve_lambda(i, k): - return tf.nn.conv2d(i, k, strides=[1, stride_y, stride_x, 1], padding='VALID') - - input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x) - weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W.transpose(0, 1, 2, 3)) - output_groups = [convolve_lambda(i, k) for i, k in zip(input_groups, weight_groups)] - - layer = tf.concat(axis=3, values=output_groups) - - layer = tf.transpose(layer, [0, 3, 1, 2]) - return layer - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[input_name]) - - else: - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.Conv2D( - filters=out_channels, - kernel_size=(height, width), - strides=(params['strides'][0], params['strides'][1]), - padding='valid', - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - layers[scope_name] = conv(layers[input_name]) - - else: # 1D conv - W = weights[weights_name].numpy().transpose(2, 1, 0) - width, channels, n_filters = W.shape - n_groups = params['group'] - if n_groups > 1: - raise AssertionError('Cannot convert conv1d with groups != 1') - - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding1D( - padding=params['pads'][0], - name=padding_name - ) - layers[padding_name] = padding_layer(layers[inputs[0]]) - input_name = padding_name - - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.Conv1D( - filters=channels, - kernel_size=width, - strides=params['strides'], - padding='valid', - weights=weights, - use_bias=has_bias, - activation=None, - data_format='channels_first', - dilation_rate=params['dilations'], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - layers[scope_name] = conv(layers[input_name]) - - -def convert_convtranspose(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert transposed convolution layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting transposed convolution ...') - - if names == 'short': - tf_name = 'C' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - - if len(weights[weights_name].numpy().shape) == 4: - W = weights[weights_name].numpy().transpose(2, 3, 1, 0) - height, width, n_filters, channels = W.shape - - n_groups = params['group'] - if n_groups > 1: - raise AssertionError('Cannot convert conv1d with groups != 1') - - if params['dilations'][0] > 1: - raise AssertionError('Cannot convert conv1d with dilation_rate != 1') - - if bias_name in weights: - biases = weights[bias_name].numpy() - has_bias = True - else: - biases = None - has_bias = False - - input_name = inputs[0] - - if has_bias: - weights = [W, biases] - else: - weights = [W] - - conv = keras.layers.Conv2DTranspose( - filters=n_filters, - kernel_size=(height, width), - strides=(params['strides'][0], params['strides'][1]), - padding='valid', - output_padding=0, - weights=weights, - use_bias=has_bias, - activation=None, - dilation_rate=params['dilations'][0], - bias_initializer='zeros', kernel_initializer='zeros', - name=tf_name - ) - - layers[scope_name] = conv(layers[input_name]) - - # Magic ad-hoc. - # See the Keras issue: https://github.com/keras-team/keras/issues/6777 - layers[scope_name].set_shape(layers[scope_name]._keras_shape) - - pads = params['pads'] - if pads[0] > 0: - assert(len(pads) == 2 or (pads[2] == pads[0] and pads[3] == pads[1])) - - crop = keras.layers.Cropping2D( - pads[:2], - name=tf_name + '_crop' - ) - layers[scope_name] = crop(layers[scope_name]) - else: - raise AssertionError('Layer is not supported for now') diff --git a/pytorch2keras/elementwise_layers.py b/pytorch2keras/elementwise_layers.py deleted file mode 100644 index acbde84..0000000 --- a/pytorch2keras/elementwise_layers.py +++ /dev/null @@ -1,156 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_elementwise_add( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert elementwise addition. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting elementwise_add ...') - if 'broadcast' in params: - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] - - if names == 'short': - tf_name = 'A' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x): - layer = tf.add(x[0], x[1]) - return layer - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) - layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) - else: - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] - - if names == 'short': - tf_name = 'A' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - add = keras.layers.Add(name=tf_name) - layers[scope_name] = add([model0, model1]) - - -def convert_elementwise_mul( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert elementwise multiplication. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting elementwise_mul ...') - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] - - if names == 'short': - tf_name = 'M' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x): - layer = tf.multiply( - x[0], - x[1] - ) - return layer - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) - layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) - - -def convert_elementwise_div( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert elementwise multiplication. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting elementwise_div ...') - - if names == 'short': - tf_name = 'D' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x): - layer = tf.div( - x[0], - x[1] - ) - return layer - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) - layers[scope_name] = lambda_layer([layers[inputs[0]], layers[inputs[1]]]) - - -def convert_elementwise_sub( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert elementwise subtraction. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting elementwise_sub ...') - model0 = layers[inputs[0]] - model1 = layers[inputs[1]] - - if names == 'short': - tf_name = 'S' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - sub = keras.layers.Subtract(name=tf_name) - layers[scope_name] = sub([model0, model1]) diff --git a/pytorch2keras/embedding_layers.py b/pytorch2keras/embedding_layers.py deleted file mode 100644 index 3f0960a..0000000 --- a/pytorch2keras/embedding_layers.py +++ /dev/null @@ -1,42 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_gather(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert gather (embedding) layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting embedding ...') - - if names == 'short': - tf_name = 'EMBD' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - weights_name = '{0}.weight'.format(w_name) - - W = weights[weights_name].numpy() - input_channels, output_channels = W.shape - - keras_weights = [W] - - dense = keras.layers.Embedding( - input_channels, - weights=keras_weights, output_dim=output_channels, name=tf_name - ) - layers[scope_name] = dense(layers[inputs[1]]) diff --git a/pytorch2keras/layers.py b/pytorch2keras/layers.py deleted file mode 100644 index 9dea97f..0000000 --- a/pytorch2keras/layers.py +++ /dev/null @@ -1,74 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf - - -from .convolution_layers import convert_conv, convert_convtranspose -from .reshape_layers import convert_flatten, convert_transpose, convert_reshape, \ - convert_squeeze, convert_unsqueeze, convert_shape -from .elementwise_layers import convert_elementwise_add, convert_elementwise_mul, \ - convert_elementwise_div, convert_elementwise_sub -from .activation_layers import convert_relu, convert_lrelu, convert_selu, \ - convert_softmax, convert_sigmoid, convert_tanh, convert_hardtanh -from .pooling_layers import convert_avgpool, convert_maxpool, convert_maxpool3, \ - convert_adaptive_avg_pool2d, convert_adaptive_max_pool2d -from .normalization_layers import convert_batchnorm, convert_instancenorm, convert_dropout -from .linear_layers import convert_gemm, convert_matmul -from .embedding_layers import convert_gather -from .upsampling_layers import convert_upsample_bilinear, convert_upsample -from .padding_layers import convert_padding -from .operation_layers import convert_concat, convert_slice, convert_sum, \ - convert_reduce_sum, convert_slice, convert_clip -from .constant_layers import convert_constant - - -AVAILABLE_CONVERTERS = { - 'onnx::Conv': convert_conv, - 'onnx::ConvTranspose': convert_convtranspose, - 'onnx::Flatten': convert_flatten, - 'onnx::Gemm': convert_gemm, - 'onnx::MaxPool': convert_maxpool, - 'max_pool2d': convert_maxpool, - 'aten::max_pool3d': convert_maxpool3, - 'aten::max_pool2d_with_indices': convert_maxpool, - 'aten::max_pool2d': convert_maxpool, - 'aten::avg_pool2d': convert_avgpool, - 'onnx::AveragePool': convert_avgpool, - 'onnx::Dropout': convert_dropout, - 'onnx::BatchNormalization': convert_batchnorm, - 'onnx::InstanceNormalization': convert_instancenorm, - 'onnx::Add': convert_elementwise_add, - 'onnx::Mul': convert_elementwise_mul, - 'onnx::Div': convert_elementwise_div, - 'onnx::Sub': convert_elementwise_sub, - 'onnx::Sum': convert_sum, - 'onnx::Concat': convert_concat, - 'onnx::Relu': convert_relu, - 'onnx::LeakyRelu': convert_lrelu, - 'onnx::Sigmoid': convert_sigmoid, - 'onnx::Softmax': convert_softmax, - 'aten::softmax': convert_softmax, - 'onnx::Tanh': convert_tanh, - 'aten::hardtanh': convert_hardtanh, - 'onnx::Selu': convert_selu, - 'onnx::Transpose': convert_transpose, - 'onnx::Reshape': convert_reshape, - 'onnx::MatMul': convert_matmul, - 'onnx::Gather': convert_gather, - 'onnx::ReduceSum': convert_reduce_sum, - 'onnx::Constant': convert_constant, - 'aten::upsample_bilinear2d': convert_upsample_bilinear, - 'onnx::Upsample': convert_upsample, - 'onnx::Pad': convert_padding, - 'onnx::GlobalAveragePool': convert_adaptive_avg_pool2d, - 'aten::adaptive_avg_pool2d': convert_adaptive_avg_pool2d, - 'onnx::GlobalMaxPool': convert_adaptive_max_pool2d, - 'aten::adaptive_max_pool2d': convert_adaptive_max_pool2d, - 'onnx::Slice': convert_slice, - 'onnx::Squeeze': convert_squeeze, - 'onnx::Unsqueeze': convert_unsqueeze, - 'onnx::Shape': convert_shape, - 'onnx::Clip': convert_clip, -} diff --git a/pytorch2keras/linear_layers.py b/pytorch2keras/linear_layers.py deleted file mode 100644 index 147eeef..0000000 --- a/pytorch2keras/linear_layers.py +++ /dev/null @@ -1,101 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_gemm(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert Linear. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting Linear ...') - - if names == 'short': - tf_name = 'FC' + random_string(6) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - - W = weights[weights_name].numpy().transpose() - input_channels, output_channels = W.shape - - keras_weights = [W] - has_bias = False - if bias_name in weights: - bias = weights[bias_name].numpy() - keras_weights = [W, bias] - has_bias = True - - dense = keras.layers.Dense( - output_channels, - weights=keras_weights, use_bias=has_bias, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', - ) - - layers[scope_name] = dense(layers[inputs[0]]) - - -def convert_matmul(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert matmul layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting matmul ...') - - if names == 'short': - tf_name = 'MMUL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if len(inputs) == 1: - weights_name = '{0}.weight'.format(w_name) - - W = weights[weights_name].numpy().transpose() - input_channels, output_channels = W.shape - - keras_weights = [W] - - dense = keras.layers.Dense( - output_channels, - weights=keras_weights, use_bias=False, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', - ) - layers[scope_name] = dense(layers[inputs[0]]) - elif len(inputs) == 2: - weights_name = '{0}.weight'.format(w_name) - - W = weights[weights_name].numpy().transpose() - input_channels, output_channels = W.shape - - keras_weights = [W] - - dense = keras.layers.Dense( - output_channels, - weights=keras_weights, use_bias=False, name=tf_name, bias_initializer='zeros', kernel_initializer='zeros', - ) - layers[scope_name] = dense(layers[inputs[0]]) - else: - raise AssertionError('Cannot convert matmul layer') diff --git a/pytorch2keras/normalization_layers.py b/pytorch2keras/normalization_layers.py deleted file mode 100644 index cac48aa..0000000 --- a/pytorch2keras/normalization_layers.py +++ /dev/null @@ -1,142 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_batchnorm(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert batch normalization layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting batchnorm ...') - - if names == 'short': - tf_name = 'BN' + random_string(6) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - mean_name = '{0}.running_mean'.format(w_name) - var_name = '{0}.running_var'.format(w_name) - - if bias_name in weights: - beta = weights[bias_name].numpy() - - if weights_name in weights: - gamma = weights[weights_name].numpy() - - mean = weights[mean_name].numpy() - variance = weights[var_name].numpy() - - eps = params['epsilon'] - momentum = params['momentum'] - - if weights_name not in weights: - bn = keras.layers.BatchNormalization( - axis=1, momentum=momentum, epsilon=eps, - center=False, scale=False, - weights=[mean, variance], - name=tf_name - ) - else: - bn = keras.layers.BatchNormalization( - axis=1, momentum=momentum, epsilon=eps, - weights=[gamma, beta, mean, variance], - name=tf_name - ) - layers[scope_name] = bn(layers[inputs[0]]) - - -def convert_instancenorm(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert instance normalization layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting instancenorm ...') - - if names == 'short': - tf_name = 'IN' + random_string(6) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - assert(len(inputs) == 3) - - bias_name = '{0}.bias'.format(w_name) - weights_name = '{0}.weight'.format(w_name) - - # Use previously taken constants - if inputs[-2] + '_np' in layers: - gamma = layers[inputs[-2] + '_np'] - else: - gamma = weights[weights_name].numpy() - - if inputs[-1] + '_np' in layers: - beta = layers[inputs[-1] + '_np'] - else: - beta = weights[bias_name].numpy() - - def target_layer(x, epsilon=params['epsilon'], gamma=gamma, beta=beta): - import tensorflow as tf - from keras import backend as K - data_format = 'NCHW' if K.image_data_format() == 'channels_first' else 'NHWC' - - layer = tf.contrib.layers.instance_norm( - x, - param_initializers={'beta': tf.constant_initializer(beta), 'gamma': tf.constant_initializer(gamma)}, - epsilon=epsilon, data_format=data_format, - trainable=False - ) - return layer - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_dropout(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert dropout. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting dropout ...') - - if names == 'short': - tf_name = 'DO' + random_string(6) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - dropout = keras.layers.Dropout(rate=params['ratio'], name=tf_name) - layers[scope_name] = dropout(layers[inputs[0]]) \ No newline at end of file diff --git a/pytorch2keras/operation_layers.py b/pytorch2keras/operation_layers.py deleted file mode 100644 index c29fdbc..0000000 --- a/pytorch2keras/operation_layers.py +++ /dev/null @@ -1,151 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - - -def convert_sum( - params, w_name, scope_name, inputs, layers, weights, names -): - """ - Convert sum. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting Sum ...') - - def target_layer(x): - import keras.backend as K - return K.sum(x) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_reduce_sum(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert reduce_sum layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting reduce_sum ...') - - keepdims = params['keepdims'] > 0 - axis = params['axes'] - - def target_layer(x, keepdims=keepdims, axis=axis): - import keras.backend as K - return K.sum(x, keepdims=keepdims, axis=axis) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - -def convert_concat(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert concatenation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting concat ...') - concat_nodes = [layers[i] for i in inputs] - - if len(concat_nodes) == 1: - # no-op - layers[scope_name] = concat_nodes[0] - return - - if names == 'short': - tf_name = 'CAT' + random_string(5) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - cat = keras.layers.Concatenate(name=tf_name, axis=params['axis']) - layers[scope_name] = cat(concat_nodes) - - -def convert_slice(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert slice operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting slice ...') - - if len(params['axes']) > 1: - raise AssertionError('Cannot convert slice by multiple dimensions') - - if params['axes'][0] not in [0, 1, 2, 3]: - raise AssertionError('Slice by dimension more than 3 or less than 0 is not supported') - - def target_layer(x, axis=int(params['axes'][0]), start=int(params['starts'][0]), end=int(params['ends'][0])): - if axis == 0: - return x[start:end] - elif axis == 1: - return x[:, start:end] - elif axis == 2: - return x[:, :, start:end] - elif axis == 3: - return x[:, :, :, start:end] - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_clip(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert clip operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting clip ...') - - if params['min'] == 0: - print("using ReLU({0})".format(params['max'])) - layer = keras.layers.ReLU(max_value=params['max']) - else: - def target_layer(x, vmin=params['min'], vmax=params['max']): - import tensorflow as tf - return tf.clip_by_value(x, vmin, vmax) - layer = keras.layers.Lambda(target_layer) - - layers[scope_name] = layer(layers[inputs[0]]) diff --git a/pytorch2keras/padding_layers.py b/pytorch2keras/padding_layers.py deleted file mode 100644 index f136dbb..0000000 --- a/pytorch2keras/padding_layers.py +++ /dev/null @@ -1,52 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_padding(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert padding layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting padding...') - - if params['mode'] == 'constant': - # raise AssertionError('Cannot convert non-constant padding') - - if params['value'] != 0.0: - raise AssertionError('Cannot convert non-zero padding') - - if names: - tf_name = 'PADD' + random_string(4) - else: - tf_name = w_name + str(random.random()) - - # Magic ordering - padding_name = tf_name - padding_layer = keras.layers.ZeroPadding2D( - padding=((params['pads'][2], params['pads'][6]), (params['pads'][3], params['pads'][7])), - name=padding_name - ) - - layers[scope_name] = padding_layer(layers[inputs[0]]) - elif params['mode'] == 'reflect': - - def target_layer(x, pads=params['pads']): - # x = tf.transpose(x, [0, 2, 3, 1]) - layer = tf.pad(x, [[0, 0], [0, 0], [pads[2], pads[6]], [pads[3], pads[7]]], 'REFLECT') - # layer = tf.transpose(layer, [0, 3, 1, 2]) - return layer - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) diff --git a/pytorch2keras/pooling_layers.py b/pytorch2keras/pooling_layers.py deleted file mode 100644 index 6784869..0000000 --- a/pytorch2keras/pooling_layers.py +++ /dev/null @@ -1,264 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_avgpool(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert Average pooling. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting pooling ...') - - if names == 'short': - tf_name = 'P' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if 'kernel_shape' in params: - height, width = params['kernel_shape'] - else: - height, width = params['kernel_size'] - - if 'strides' in params: - stride_height, stride_width = params['strides'] - else: - stride_height, stride_width = params['stride'] - - if 'pads' in params: - padding_h, padding_w, _, _ = params['pads'] - else: - padding_h, padding_w = params['padding'] - - input_name = inputs[0] - pad = 'valid' - - if height % 2 == 1 and width % 2 == 1 and \ - height // 2 == padding_h and width // 2 == padding_w and \ - stride_height == 1 and stride_width == 1: - pad = 'same' - else: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(padding_h, padding_w), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[inputs[0]]) - input_name = padding_name - - # Pooling type AveragePooling2D - pooling = keras.layers.AveragePooling2D( - pool_size=(height, width), - strides=(stride_height, stride_width), - padding=pad, - name=tf_name, - data_format='channels_first' - ) - - layers[scope_name] = pooling(layers[input_name]) - - -def convert_maxpool(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert Max pooling. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - - print('Converting pooling ...') - - if names == 'short': - tf_name = 'P' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if 'kernel_shape' in params: - height, width = params['kernel_shape'] - else: - height, width = params['kernel_size'] - - if 'strides' in params: - stride_height, stride_width = params['strides'] - else: - stride_height, stride_width = params['stride'] - - if 'pads' in params: - padding_h, padding_w, _, _ = params['pads'] - else: - padding_h, padding_w = params['padding'] - - input_name = inputs[0] - pad = 'valid' - - if height % 2 == 1 and width % 2 == 1 and \ - height // 2 == padding_h and width // 2 == padding_w and \ - stride_height == 1 and stride_width == 1: - pad = 'same' - else: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding2D( - padding=(padding_h, padding_w), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[inputs[0]]) - input_name = padding_name - - # Pooling type MaxPooling2D - pooling = keras.layers.MaxPooling2D( - pool_size=(height, width), - strides=(stride_height, stride_width), - padding=pad, - name=tf_name, - data_format='channels_first' - ) - - layers[scope_name] = pooling(layers[input_name]) - - -def convert_maxpool3(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert 3d Max pooling. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - - print('Converting pooling ...') - - if names == 'short': - tf_name = 'P' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if 'kernel_shape' in params: - height, width, depth = params['kernel_shape'] - else: - height, width, depth = params['kernel_size'] - - if 'strides' in params: - stride_height, stride_width, stride_depth = params['strides'] - else: - stride_height, stride_width, stride_depth = params['stride'] - - if 'pads' in params: - padding_h, padding_w, padding_d, _, _ = params['pads'] - else: - padding_h, padding_w, padding_d = params['padding'] - - input_name = inputs[0] - if padding_h > 0 and padding_w > 0 and padding_d > 0: - padding_name = tf_name + '_pad' - padding_layer = keras.layers.ZeroPadding3D( - padding=(padding_h, padding_w, padding_d), - name=padding_name - ) - layers[padding_name] = padding_layer(layers[inputs[0]]) - input_name = padding_name - - # Pooling type - pooling = keras.layers.MaxPooling3D( - pool_size=(height, width, depth), - strides=(stride_height, stride_width, stride_depth), - padding='valid', - name=tf_name - ) - - layers[scope_name] = pooling(layers[input_name]) - - -def convert_adaptive_avg_pool2d(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert adaptive_avg_pool2d layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting adaptive_avg_pool2d...') - - if names == 'short': - tf_name = 'APOL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - global_pool = keras.layers.GlobalAveragePooling2D(data_format='channels_first', name=tf_name) - layers[scope_name] = global_pool(layers[inputs[0]]) - - def target_layer(x): - import keras - return keras.backend.expand_dims(x) - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims - layers[scope_name] = lambda_layer(layers[scope_name]) - - -def convert_adaptive_max_pool2d(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert convert_adaptive_max_pool2d layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting adaptive_avg_pool2d...') - - if names == 'short': - tf_name = 'APOL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - global_pool = keras.layers.GlobalMaxPooling2D(data_format='channels_first', name=tf_name) - layers[scope_name] = global_pool(layers[inputs[0]]) - - def target_layer(x): - import keras - return keras.backend.expand_dims(x) - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[scope_name]) # double expand dims - layers[scope_name] = lambda_layer(layers[scope_name]) diff --git a/pytorch2keras/reshape_layers.py b/pytorch2keras/reshape_layers.py deleted file mode 100644 index 83a8efb..0000000 --- a/pytorch2keras/reshape_layers.py +++ /dev/null @@ -1,174 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_flatten(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert reshape(view). - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting flatten ...') - - if names == 'short': - tf_name = 'R' + random_string(7) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - reshape = keras.layers.Reshape([-1], name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) - - -def convert_transpose(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert transpose layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting transpose ...') - if params['perm'][0] != 0: - if inputs[0] in layers: - print('!!! Cannot permute batch dimension. Result may be wrong !!!') - layers[scope_name] = layers[inputs[0]] - else: - print('Skip weight matrix transpose, result may be wrong.') - else: - if names: - tf_name = 'PERM' + random_string(4) - else: - tf_name = w_name + str(random.random()) - permute = keras.layers.Permute(params['perm'][1:], name=tf_name) - layers[scope_name] = permute(layers[inputs[0]]) - - -def convert_reshape(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert reshape layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting reshape ...') - if names == 'short': - tf_name = 'RESH' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if len(inputs) > 1: - if layers[inputs[1]][0] == -1: - print('Cannot deduct batch size! It will be omitted, but result may be wrong.') - - reshape = keras.layers.Reshape(layers[inputs[1] + '_np'], name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) - else: - if inputs[0] in layers: - reshape = keras.layers.Reshape(params['shape'][1:], name=tf_name) - layers[scope_name] = reshape(layers[inputs[0]]) - else: - print('Skip weight matrix transpose, but result may be wrong.') - -def convert_squeeze(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert squeeze operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting squeeze ...') - - if len(params['axes']) > 1: - raise AssertionError('Cannot convert squeeze by multiple dimensions') - - def target_layer(x, axis=int(params['axes'][0])): - import tensorflow as tf - return tf.squeeze(x, axis=axis) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_unsqueeze(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert unsqueeze operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting unsqueeze ...') - - if names == 'short': - tf_name = 'UNSQ' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - def target_layer(x): - import keras - return keras.backend.expand_dims(x) - - lambda_layer = keras.layers.Lambda(target_layer, name=tf_name + 'E') - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_shape(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert shape operation. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting shape ...') - - def target_layer(x): - import tensorflow as tf - return tf.shape(x) - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) \ No newline at end of file diff --git a/pytorch2keras/upsampling_layers.py b/pytorch2keras/upsampling_layers.py deleted file mode 100644 index 1375299..0000000 --- a/pytorch2keras/upsampling_layers.py +++ /dev/null @@ -1,78 +0,0 @@ -import keras.layers -import numpy as np -import random -import string -import tensorflow as tf -from .common import random_string - - -def convert_upsample_bilinear(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert upsample_bilinear2d layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting upsample...') - - if names == 'short': - tf_name = 'UPSL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - output_size = params['output_size'] - align_corners = params['align_corners'] > 0 - - def target_layer(x, size=output_size, align_corners=align_corners): - import tensorflow as tf - x = tf.transpose(x, [0, 2, 3, 1]) - x = tf.image.resize_images(x, size, align_corners=align_corners) - x = tf.transpose(x, [0, 3, 1, 2]) - return x - - lambda_layer = keras.layers.Lambda(target_layer) - layers[scope_name] = lambda_layer(layers[inputs[0]]) - - -def convert_upsample(params, w_name, scope_name, inputs, layers, weights, names): - """ - Convert nearest upsampling layer. - - Args: - params: dictionary with layer parameters - w_name: name prefix in state_dict - scope_name: pytorch scope name - inputs: pytorch node inputs - layers: dictionary with keras tensors - weights: pytorch state_dict - names: use short names for keras layers - """ - print('Converting upsample...') - - if params['mode'] != 'nearest': - raise AssertionError('Cannot convert non-nearest upsampling') - - if names == 'short': - tf_name = 'UPSL' + random_string(4) - elif names == 'keep': - tf_name = w_name - else: - tf_name = w_name + str(random.random()) - - if 'height_scale' in params: - scale = (params['height_scale'], params['width_scale']) - elif len(inputs) == 2: - scale = layers[inputs[-1] + '_np'][-2:] - - upsampling = keras.layers.UpSampling2D( - size=scale, name=tf_name - ) - layers[scope_name] = upsampling(layers[inputs[0]]) diff --git a/requirements.txt b/requirements.txt index bc9860e..97ec6ad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ tensorflow numpy torch torchvision -packaging \ No newline at end of file +onnx +git+git://github.com/nerox8664/onnx2keras@master#egg=onnx2keras \ No newline at end of file diff --git a/setup.py b/setup.py index 1023143..7a80a2c 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.1.19', + version='0.2.0', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 9d5caec219cb8276c703bf7a9128b64c486e357a Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 27 Jun 2019 11:39:50 +0300 Subject: [PATCH 165/180] Returned limited support of fully-conv networks --- pytorch2keras/converter.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index f02d8d3..e8c5d07 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -11,7 +11,7 @@ def pytorch_to_keras( model, args, input_shapes=None, - change_ordering=False, verbose=False, names=None, + change_ordering=False, verbose=False, name_policy=None, ): """ By given PyTorch model convert layers with ONNX. @@ -22,7 +22,7 @@ def pytorch_to_keras( input_shapes: keras input shapes (using for each InputLayer) change_ordering: change CHW to HWC verbose: verbose output - names: use short names, use random-suffix or keep original names for keras layers + name_policy: use short names, use random-suffix or keep original names for keras layers Returns: model: created keras model. @@ -35,12 +35,15 @@ def pytorch_to_keras( logger.info('Converter is called.') - if names: + if name_policy: logger.warning('Name policy isn\'t supported now.') if input_shapes: logger.warning('Custom shapes isn\'t supported now.') + if not isinstance(input_shapes, list): + input_shapes = [input_shapes] + if not isinstance(args, list): args = [args] @@ -65,6 +68,8 @@ def pytorch_to_keras( stream.seek(0) onnx_model = onnx.load(stream) - k_model = onnx_to_keras(onnx_model, input_names, verbose=verbose, change_ordering=change_ordering) + k_model = onnx_to_keras(onnx_model=onnx_model, input_names=input_names, + input_shapes=input_shapes, name_policy=name_policy, + verbose=verbose, change_ordering=change_ordering) return k_model From d16a3f9f509c3e21f6cfe58599713a248bac6451 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 27 Jun 2019 12:00:00 +0300 Subject: [PATCH 166/180] Add onnx2keras package to requirements. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 97ec6ad..917f0b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ numpy torch torchvision onnx -git+git://github.com/nerox8664/onnx2keras@master#egg=onnx2keras \ No newline at end of file +onnx2keras \ No newline at end of file From 52c4160066a018bd9d9499cc4bc2220bf3e042c8 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 27 Jun 2019 12:01:13 +0300 Subject: [PATCH 167/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7a80a2c..73b022c 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.2.0', + version='0.2.1', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From c27f1a1ddf5b77a575face22e8c48ab6b1fbf53e Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 27 Jun 2019 14:56:30 +0300 Subject: [PATCH 168/180] Don't wrap None into List. --- pytorch2keras/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index e8c5d07..0ac198c 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -41,7 +41,7 @@ def pytorch_to_keras( if input_shapes: logger.warning('Custom shapes isn\'t supported now.') - if not isinstance(input_shapes, list): + if input_shapes and not isinstance(input_shapes, list): input_shapes = [input_shapes] if not isinstance(args, list): From 52d5fdc270099ff0fa4f856e413005b9dc789f29 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 27 Jun 2019 15:01:22 +0300 Subject: [PATCH 169/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 73b022c..4f2f7d6 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.2.1', + version='0.2.2', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From da059f38a750fb802d90b95dc72b0bcfb18507df Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 27 Jun 2019 22:36:44 +0300 Subject: [PATCH 170/180] Update docs and readme. --- README.md | 119 +++------------------------- docs/api.md | 21 +++-- docs/getting_started.md | 7 ++ docs/index.md | 2 +- docs/installation.md | 30 +++---- docs/known_problems.md | 27 +------ docs/supported_layers_and_models.md | 68 ---------------- 7 files changed, 47 insertions(+), 227 deletions(-) diff --git a/README.md b/README.md index 6b85aef..2bd043e 100644 --- a/README.md +++ b/README.md @@ -28,34 +28,6 @@ To use the converter properly, please, make changes in your `~/.keras/keras.json ... ``` -## PyTorch 0.4.1 and greater - -There is [the problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): - -To make it work, please, cast all your `.view()` parameters to `int`. For example: - -``` -class ResNet(torchvision.models.resnet.ResNet): - def __init__(self, *args, **kwargs): - super(ResNet, self).__init__(*args, **kwargs) - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(int(x.size(0)), -1) # << Here - x = self.fc(x) - return x -``` - ## Tensorflow.js For the proper conversion to a tensorflow.js format, please use the new flag `names='short'`. @@ -168,7 +140,7 @@ input_var = Variable(torch.FloatTensor(input_np)) We use the dummy-variable to trace the model (with jit.trace): ``` -from converter import pytorch_to_keras +from pytorch2keras import pytorch_to_keras # we should specify shape of the input tensor k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) ``` @@ -187,39 +159,36 @@ That's all! If all the modules have converted properly, the Keras model will be ## API Here is the only method `pytorch_to_keras` from `pytorch2keras` module. + ``` def pytorch_to_keras( - model, args, input_shapes, - change_ordering=False, training=False, verbose=False, names=False, -) + model, args, input_shapes=None, + change_ordering=False, verbose=False, name_policy=None, +): ``` Options: -* model -- a PyTorch module to convert; -* args -- list of dummy variables with proper shapes; -* input_shapes -- list with shape tuples; -* change_ordering -- boolean, if enabled, the converter will try to change `BCHW` to `BHWC` -* training -- boolean, switch model to training mode (never use it) -* verbose -- boolean, verbose output -* names -- choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. +* `model` - a PyTorch model (nn.Module) to convert; +* `args` - a list of dummy variables with proper shapes; +* `input_shapes` - (experimental) list with overrided shapes for inputs; +* `change_ordering` - (experimental) boolean, if enabled, the converter will try to change `BCHW` to `BHWC` +* `verbose` - boolean, detailed log of conversion +* `name_policy` - (experimental) choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. ## Supported layers * Activations: - + ELU + ReLU + LeakyReLU + SELU + Sigmoid + Softmax + Tanh - + HardTanh * Constants * Convolutions: - + Conv1d + Conv2d + ConvTrsnpose2d @@ -229,25 +198,16 @@ Options: + Sub + Div -* Embedding - * Linear * Normalizations: + BatchNorm2d + InstanceNorm2d - + Dropout * Poolings: + MaxPool2d + AvgPool2d + Global MaxPool2d (adaptive pooling to shape [1, 1]) - + Global AvgPool2d (adaptive pooling to shape [1, 1]) - -* Not tested yet: - + Upsampling - + Padding - + Reshape ## Models converted with pytorch2keras @@ -255,66 +215,9 @@ Options: * ResNet* * VGG* * PreResNet* -* SqueezeNet (with ceil_mode=False) -* SqueezeNext * DenseNet* * AlexNet -* Inception -* SeNet * Mobilenet v2 -* DiracNet -* DARTS -* DRNC - -| Model | Top1 | Top5 | Params | FLOPs | Source weights | Remarks | -| --- | ---: | ---: | ---: | ---: | ---: | ---: | -| ResNet-10 | 37.09 | 15.55 | 5,418,792 | 892.62M | osmr's repo | Success | -| ResNet-12 | 35.86 | 14.46 | 5,492,776 | 1,124.23M | osmr's repo | Success | -| ResNet-14 | 32.85 | 12.41 | 5,788,200 | 1,355.64M | osmr's repo | Success | -| ResNet-16 | 30.68 | 11.10 | 6,968,872 | 1,586.95M | osmr's repo | Success | -| ResNet-18 x0.25 | 49.16 | 24.45 | 831,096 | 136.64M | osmr's repo | Success | -| ResNet-18 x0.5 | 36.54 | 14.96 | 3,055,880 | 485.22M | osmr's repo | Success | -| ResNet-18 x0.75 | 33.25 | 12.54 | 6,675,352 | 1,045.75M | osmr's repo | Success | -| ResNet-18 | 29.13 | 9.94 | 11,689,512 | 1,818.21M | osmr's repo | Success | -| ResNet-34 | 25.34 | 7.92 | 21,797,672 | 3,669.16M | osmr's repo | Success | -| ResNet-50 | 23.50 | 6.87 | 25,557,032 | 3,868.96M | osmr's repo | Success | -| ResNet-50b | 22.92 | 6.44 | 25,557,032 | 4,100.70M | osmr's repo | Success | -| ResNet-101 | 21.66 | 5.99 | 44,549,160 | 7,586.30M | osmr's repo | Success | -| ResNet-101b | 21.18 | 5.60 | 44,549,160 | 7,818.04M | osmr's repo | Success | -| ResNet-152 | 21.01 | 5.61 | 60,192,808 | 11,304.85M | osmr's repo | Success | -| ResNet-152b | 20.54 | 5.37 | 60,192,808 | 11,536.58M | osmr's repo | Success | -| PreResNet-18 | 28.72 | 9.88 | 11,687,848 | 1,818.41M | osmr's repo | Success | -| PreResNet-34 | 25.88 | 8.11 | 21,796,008 | 3,669.36M | osmr's repo | Success | -| PreResNet-50 | 23.39 | 6.68 | 25,549,480 | 3,869.16M | osmr's repo | Success | -| PreResNet-50b | 23.16 | 6.64 | 25,549,480 | 4,100.90M | osmr's repo | Success | -| PreResNet-101 | 21.45 | 5.75 | 44,541,608 | 7,586.50M | osmr's repo | Success | -| PreResNet-101b | 21.73 | 5.88 | 44,541,608 | 7,818.24M | osmr's repo | Success | -| PreResNet-152 | 20.70 | 5.32 | 60,185,256 | 11,305.05M | osmr's repo | Success | -| PreResNet-152b | 21.00 | 5.75 | 60,185,256 | 11,536.78M | Gluon Model Zoo| Success | -| PreResNet-200b | 21.10 | 5.64 | 64,666,280 | 15,040.27M | tornadomeet/ResNet | Success | -| DenseNet-121 | 25.11 | 7.80 | 7,978,856 | 2,852.39M | Gluon Model Zoo| Success | -| DenseNet-161 | 22.40 | 6.18 | 28,681,000 | 7,761.25M | Gluon Model Zoo| Success | -| DenseNet-169 | 23.89 | 6.89 | 14,149,480 | 3,381.48M | Gluon Model Zoo| Success | -| DenseNet-201 | 22.71 | 6.36 | 20,013,928 | 4,318.75M | Gluon Model Zoo| Success | -| DarkNet Tiny | 40.31 | 17.46 | 1,042,104 | 496.34M | osmr's repo | Success | -| DarkNet Ref | 38.00 | 16.68 | 7,319,416 | 365.55M | osmr's repo | Success | -| SqueezeNet v1.0 | 40.97 | 18.96 | 1,248,424 | 828.30M | osmr's repo | Success | -| SqueezeNet v1.1 | 39.09 | 17.39 | 1,235,496 | 354.88M | osmr's repo | Success | -| MobileNet x0.25 | 45.78 | 22.18 | 470,072 | 42.30M | osmr's repo | Success | -| MobileNet x0.5 | 36.12 | 14.81 | 1,331,592 | 152.04M | osmr's repo | Success | -| MobileNet x0.75 | 32.71 | 12.28 | 2,585,560 | 329.22M | Gluon Model Zoo| Success | -| MobileNet x1.0 | 29.25 | 10.03 | 4,231,976 | 573.83M | Gluon Model Zoo| Success | -| FD-MobileNet x0.25 | 56.19 | 31.38 | 383,160 | 12.44M | osmr's repo | Success | -| FD-MobileNet x0.5 | 42.62 | 19.69 | 993,928 | 40.93M | osmr's repo | Success | -| FD-MobileNet x1.0 | 35.95 | 14.72 | 2,901,288 | 146.08M | clavichord93/FD-MobileNet | Success | -| MobileNetV2 x0.25 | 48.89 | 25.24 | 1,516,392 | 32.22M | Gluon Model Zoo| Success | -| MobileNetV2 x0.5 | 35.51 | 14.64 | 1,964,736 | 95.62M | Gluon Model Zoo| Success | -| MobileNetV2 x0.75 | 30.82 | 11.26 | 2,627,592 | 191.61M | Gluon Model Zoo| Success | -| MobileNetV2 x1.0 | 28.51 | 9.90 | 3,504,960 | 320.19M | Gluon Model Zoo| Success | -| InceptionV3 | 21.22 | 5.59 | 23,834,568 | 5,746.72M | Gluon Model Zoo| Success | -| DiracNetV2-18 | 31.47 | 11.70 | 11,511,784 | 1,798.43M | szagoruyko/diracnets | Success | -| DiracNetV2-34 | 28.75 | 9.93 | 21,616,232 | 3,649.37M | szagoruyko/diracnets | Success | -| DARTS | 26.70 | 8.74 | 4,718,752 | 537.64M | szagoruyko/diracnets | Success | ## Usage Look at the `tests` directory. diff --git a/docs/api.md b/docs/api.md index 1f062a9..a67194e 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,18 +1,17 @@ -Here is the only method `pytorch_to_keras` from `pytorch2keras` module. +Since version `0.2.1` the converter has the following API: ``` def pytorch_to_keras( - model, args, input_shapes, - change_ordering=False, training=False, verbose=False, names=False, -) + model, args, input_shapes=None, + change_ordering=False, verbose=False, name_policy=None, +): ``` Options: -* `model` - a PyTorch module to convert; -* `args` - list of dummy variables with proper shapes; -* `input_shapes` - list with shape tuples; -* `change_ordering` - boolean, if enabled, the converter will try to change `BCHW` to `BHWC` -* `training` - boolean, switch model to training mode (never use it) -* `verbose` - boolean, verbose output -* `names` - choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. \ No newline at end of file +* `model` - a PyTorch model (nn.Module) to convert; +* `args` - a list of dummy variables with proper shapes; +* `input_shapes` - (experimental) list with overrided shapes for inputs; +* `change_ordering` - (experimental) boolean, if enabled, the converter will try to change `BCHW` to `BHWC` +* `verbose` - boolean, detailed log of conversion +* `name_policy` - (experimental) choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. \ No newline at end of file diff --git a/docs/getting_started.md b/docs/getting_started.md index 28bcd61..c35d7df 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -1,5 +1,12 @@ ## Basic usage +To use the converter, import `pytorch_to_keras` function: + +``` +from pytorch2keras import pytorch_to_keras +``` + + It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. Firstly, we need to load (or create) a valid PyTorch model: diff --git a/docs/index.md b/docs/index.md index 746b334..6e09bbf 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,4 +4,4 @@ [![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) -PyTorch to Keras model converter. This project is created to make a model conversation easier, so, the converter API is developed with maximal simplicity. +PyTorch to Keras model converter. This project is created to make a model conversation as easier as possible. diff --git a/docs/installation.md b/docs/installation.md index e92a6be..ef53db9 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,19 +1,21 @@ -To install ready-to-use package, you may use PIP: +To install the package, the following ways may be used: -``` -pip install pytorch2keras -``` +1. pypi (latest actual version): -It's possible to install package with PIP right from git: + ``` + pip install pytorch2keras + ``` -``` -pip install -U git+https://github.com/nerox8664/pytorch2keras -``` +2. It's possible to install package with PIP right from git: -Or to clone it manually and then install it with PIP: + ``` + pip install -U git+https://github.com/nerox8664/pytorch2keras + ``` -``` -git clone https://github.com/nerox8664/pytorch2keras -cd pytorch2keras -pip install -e . -``` + Or to clone it manually and then install it with PIP: + + ``` + git clone https://github.com/nerox8664/pytorch2keras + cd pytorch2keras + pip install -e . + ``` diff --git a/docs/known_problems.md b/docs/known_problems.md index 769fa84..611d3da 100644 --- a/docs/known_problems.md +++ b/docs/known_problems.md @@ -1,30 +1,7 @@ -## Linear layer problem with PyTorch 0.4.1 and greater +## Change ordering -There is [the problem related to a new version](https://github.com/pytorch/pytorch/issues/13963): +Change ordering option works for very limited cases. -To make it work, please, cast all your `.view()` parameters to `int`. For example: - -``` -class ResNet(torchvision.models.resnet.ResNet): - def __init__(self, *args, **kwargs): - super(ResNet, self).__init__(*args, **kwargs) - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(int(x.size(0)), -1) # << Here - x = self.fc(x) - return x -``` ## Recurrent layers diff --git a/docs/supported_layers_and_models.md b/docs/supported_layers_and_models.md index 138d7ad..1548556 100644 --- a/docs/supported_layers_and_models.md +++ b/docs/supported_layers_and_models.md @@ -7,12 +7,10 @@ + Sigmoid + Softmax + Tanh - + HardTanh * Constants * Convolutions: - + Conv1d + Conv2d + ConvTrsnpose2d @@ -22,25 +20,16 @@ + Sub + Div -* Embedding - * Linear * Normalizations: + BatchNorm2d + InstanceNorm2d - + Dropout * Poolings: + MaxPool2d + AvgPool2d + Global MaxPool2d (adaptive pooling to shape [1, 1]) - + Global AvgPool2d (adaptive pooling to shape [1, 1]) - -* Not tested yet: - + Upsampling - + Padding - + Reshape ## Models converted with pytorch2keras @@ -48,63 +37,6 @@ * ResNet* * VGG* * PreResNet* -* SqueezeNet (with ceil_mode=False) -* SqueezeNext * DenseNet* * AlexNet -* Inception -* SeNet * Mobilenet v2 -* DiracNet -* DARTS -* DRNC - -| Model | Top1 | Top5 | Params | FLOPs | Source weights | Remarks | -| --- | ---: | ---: | ---: | ---: | ---: | ---: | -| ResNet-10 | 37.09 | 15.55 | 5,418,792 | 892.62M | osmr's repo | Success | -| ResNet-12 | 35.86 | 14.46 | 5,492,776 | 1,124.23M | osmr's repo | Success | -| ResNet-14 | 32.85 | 12.41 | 5,788,200 | 1,355.64M | osmr's repo | Success | -| ResNet-16 | 30.68 | 11.10 | 6,968,872 | 1,586.95M | osmr's repo | Success | -| ResNet-18 x0.25 | 49.16 | 24.45 | 831,096 | 136.64M | osmr's repo | Success | -| ResNet-18 x0.5 | 36.54 | 14.96 | 3,055,880 | 485.22M | osmr's repo | Success | -| ResNet-18 x0.75 | 33.25 | 12.54 | 6,675,352 | 1,045.75M | osmr's repo | Success | -| ResNet-18 | 29.13 | 9.94 | 11,689,512 | 1,818.21M | osmr's repo | Success | -| ResNet-34 | 25.34 | 7.92 | 21,797,672 | 3,669.16M | osmr's repo | Success | -| ResNet-50 | 23.50 | 6.87 | 25,557,032 | 3,868.96M | osmr's repo | Success | -| ResNet-50b | 22.92 | 6.44 | 25,557,032 | 4,100.70M | osmr's repo | Success | -| ResNet-101 | 21.66 | 5.99 | 44,549,160 | 7,586.30M | osmr's repo | Success | -| ResNet-101b | 21.18 | 5.60 | 44,549,160 | 7,818.04M | osmr's repo | Success | -| ResNet-152 | 21.01 | 5.61 | 60,192,808 | 11,304.85M | osmr's repo | Success | -| ResNet-152b | 20.54 | 5.37 | 60,192,808 | 11,536.58M | osmr's repo | Success | -| PreResNet-18 | 28.72 | 9.88 | 11,687,848 | 1,818.41M | osmr's repo | Success | -| PreResNet-34 | 25.88 | 8.11 | 21,796,008 | 3,669.36M | osmr's repo | Success | -| PreResNet-50 | 23.39 | 6.68 | 25,549,480 | 3,869.16M | osmr's repo | Success | -| PreResNet-50b | 23.16 | 6.64 | 25,549,480 | 4,100.90M | osmr's repo | Success | -| PreResNet-101 | 21.45 | 5.75 | 44,541,608 | 7,586.50M | osmr's repo | Success | -| PreResNet-101b | 21.73 | 5.88 | 44,541,608 | 7,818.24M | osmr's repo | Success | -| PreResNet-152 | 20.70 | 5.32 | 60,185,256 | 11,305.05M | osmr's repo | Success | -| PreResNet-152b | 21.00 | 5.75 | 60,185,256 | 11,536.78M | Gluon Model Zoo| Success | -| PreResNet-200b | 21.10 | 5.64 | 64,666,280 | 15,040.27M | tornadomeet/ResNet | Success | -| DenseNet-121 | 25.11 | 7.80 | 7,978,856 | 2,852.39M | Gluon Model Zoo| Success | -| DenseNet-161 | 22.40 | 6.18 | 28,681,000 | 7,761.25M | Gluon Model Zoo| Success | -| DenseNet-169 | 23.89 | 6.89 | 14,149,480 | 3,381.48M | Gluon Model Zoo| Success | -| DenseNet-201 | 22.71 | 6.36 | 20,013,928 | 4,318.75M | Gluon Model Zoo| Success | -| DarkNet Tiny | 40.31 | 17.46 | 1,042,104 | 496.34M | osmr's repo | Success | -| DarkNet Ref | 38.00 | 16.68 | 7,319,416 | 365.55M | osmr's repo | Success | -| SqueezeNet v1.0 | 40.97 | 18.96 | 1,248,424 | 828.30M | osmr's repo | Success | -| SqueezeNet v1.1 | 39.09 | 17.39 | 1,235,496 | 354.88M | osmr's repo | Success | -| MobileNet x0.25 | 45.78 | 22.18 | 470,072 | 42.30M | osmr's repo | Success | -| MobileNet x0.5 | 36.12 | 14.81 | 1,331,592 | 152.04M | osmr's repo | Success | -| MobileNet x0.75 | 32.71 | 12.28 | 2,585,560 | 329.22M | Gluon Model Zoo| Success | -| MobileNet x1.0 | 29.25 | 10.03 | 4,231,976 | 573.83M | Gluon Model Zoo| Success | -| FD-MobileNet x0.25 | 56.19 | 31.38 | 383,160 | 12.44M | osmr's repo | Success | -| FD-MobileNet x0.5 | 42.62 | 19.69 | 993,928 | 40.93M | osmr's repo | Success | -| FD-MobileNet x1.0 | 35.95 | 14.72 | 2,901,288 | 146.08M | clavichord93/FD-MobileNet | Success | -| MobileNetV2 x0.25 | 48.89 | 25.24 | 1,516,392 | 32.22M | Gluon Model Zoo| Success | -| MobileNetV2 x0.5 | 35.51 | 14.64 | 1,964,736 | 95.62M | Gluon Model Zoo| Success | -| MobileNetV2 x0.75 | 30.82 | 11.26 | 2,627,592 | 191.61M | Gluon Model Zoo| Success | -| MobileNetV2 x1.0 | 28.51 | 9.90 | 3,504,960 | 320.19M | Gluon Model Zoo| Success | -| InceptionV3 | 21.22 | 5.59 | 23,834,568 | 5,746.72M | Gluon Model Zoo| Success | -| DiracNetV2-18 | 31.47 | 11.70 | 11,511,784 | 1,798.43M | szagoruyko/diracnets | Success | -| DiracNetV2-34 | 28.75 | 9.93 | 21,616,232 | 3,649.37M | szagoruyko/diracnets | Success | -| DARTS | 26.70 | 8.74 | 4,718,752 | 537.64M | szagoruyko/diracnets | Success | From 1c592d1d199fbdc3599e46594c7aee8420277c7d Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 27 Jun 2019 22:36:52 +0300 Subject: [PATCH 171/180] Add init.py. --- pytorch2keras/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytorch2keras/__init__.py b/pytorch2keras/__init__.py index e69de29..326dfc6 100644 --- a/pytorch2keras/__init__.py +++ b/pytorch2keras/__init__.py @@ -0,0 +1,3 @@ +from .converter import pytorch_to_keras + +__all__ = ['pytorch_to_keras'] \ No newline at end of file From 154022f9038510e87ce5457b489f65fe23cc419a Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 27 Jun 2019 22:37:09 +0300 Subject: [PATCH 172/180] Update pypi version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4f2f7d6..e21b5c0 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup(name='pytorch2keras', - version='0.2.2', + version='0.2.3', description='The deep learning models convertor', long_description=long_description, long_description_content_type='text/markdown', From 5586b42cac1f905232d6ad4aa1d1683bb19f36bf Mon Sep 17 00:00:00 2001 From: Some Guy Date: Sun, 7 Jul 2019 04:18:55 +0200 Subject: [PATCH 173/180] Exposed onnx optimizer --- pytorch2keras/converter.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 0ac198c..3767099 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -5,6 +5,7 @@ from onnx2keras import onnx_to_keras import torch import onnx +from onnx import optimizer import io import logging @@ -12,6 +13,7 @@ def pytorch_to_keras( model, args, input_shapes=None, change_ordering=False, verbose=False, name_policy=None, + use_optimizer=False, do_constant_folding=False ): """ By given PyTorch model convert layers with ONNX. @@ -27,7 +29,6 @@ def pytorch_to_keras( Returns: model: created keras model. """ - logger = logging.getLogger('pytorch2keras') if verbose: @@ -64,10 +65,19 @@ def pytorch_to_keras( logger.debug(output_names) stream = io.BytesIO() - torch.onnx.export(model, args, stream, verbose=verbose, input_names=input_names, output_names=output_names) + torch.onnx.export(model, args, stream, do_constant_folding=do_constant_folding, verbose=verbose, input_names=input_names, output_names=output_names) stream.seek(0) onnx_model = onnx.load(stream) + if use_optimizer: + if use_optimizer is True: + optimizer2run = optimizer.get_available_passes() + else: + use_optimizer = set(use_optimizer) + optimizer2run = [x for x in optimizer.get_available_passes() if x in use_optimizer] + logger.info("Running optimizer:\n%s", "\n".join(optimizer2run)) + onnx_model = optimizer.optimize(onnx_model, optimizer2run) + k_model = onnx_to_keras(onnx_model=onnx_model, input_names=input_names, input_shapes=input_shapes, name_policy=name_policy, verbose=verbose, change_ordering=change_ordering) From 8bf504866d023ef3f38b3b4b8f6b4045b53619fb Mon Sep 17 00:00:00 2001 From: Kittinan <144775+kittinan@users.noreply.github.com> Date: Thu, 18 Jul 2019 09:03:47 +0700 Subject: [PATCH 174/180] Update README.md to highlight code blocks and fix missing typo --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 2bd043e..10400b8 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ pip install pytorch2keras To use the converter properly, please, make changes in your `~/.keras/keras.json`: -``` +```json ... "backend": "tensorflow", "image_data_format": "channels_first", @@ -36,13 +36,13 @@ Here is a short instruction how to get a tensorflow.js model: 1. First of all, you have to convert your model to Keras with this converter: -``` +```python k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True, names='short') ``` 2. Now you have Keras model. You can save it as h5 file and then convert it with `tensorflowjs_converter` but it doesn't work sometimes. As alternative, you may get Tensorflow Graph and save it as a frozen model: -``` +```python # Function below copied from here: # https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): @@ -88,7 +88,7 @@ print([i for i in k_model.outputs]) 3. You will see the output layer name, so, now it's time to convert `my_model.pb` to tfjs model: -``` +```bash tensorflowjs_converter \ --input_format=tf_frozen_model \ --output_node_names='TANHTObs/Tanh' \ @@ -98,10 +98,10 @@ tensorflowjs_converter \ 4. Thats all! -``` +```js const MODEL_URL = `model_tfjs/tensorflowjs_model.pb`; const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; -cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); +const model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); ``` ## How to use @@ -110,7 +110,7 @@ It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. Firstly, we need to load (or create) a valid PyTorch model: -``` +```python class TestConv2d(nn.Module): """ Module for Conv2d testing @@ -132,14 +132,14 @@ model = TestConv2d() The next step - create a dummy variable with correct shape: -``` +```python input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) input_var = Variable(torch.FloatTensor(input_np)) ``` We use the dummy-variable to trace the model (with jit.trace): -``` +```python from pytorch2keras import pytorch_to_keras # we should specify shape of the input tensor k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) @@ -147,7 +147,7 @@ k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): -``` +```python from pytorch2keras.converter import pytorch_to_keras # we should specify shape of the input tensor k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) @@ -160,7 +160,7 @@ That's all! If all the modules have converted properly, the Keras model will be Here is the only method `pytorch_to_keras` from `pytorch2keras` module. -``` +```python def pytorch_to_keras( model, args, input_shapes=None, change_ordering=False, verbose=False, name_policy=None, From c021bccef20ac747060218ace7eab575b7f3f6be Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Fri, 8 Nov 2019 13:51:30 +0200 Subject: [PATCH 175/180] Test funding. --- .github/FUNDING.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..9c90d70 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,12 @@ +# These are supported funding model platforms + +github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +custom: ['https://money.yandex.ru/to/410017251832699'] # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] From 9ba448bebdb912c31d1d5b53e10291d65d9e7df9 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Tue, 14 Jan 2020 22:58:00 +0200 Subject: [PATCH 176/180] Update FUNDING.yml. --- .github/FUNDING.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 9c90d70..5d7fb87 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -3,10 +3,10 @@ github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username -ko_fi: # Replace with a single Ko-fi username +ko_fi: nerox8664 # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username -custom: ['https://money.yandex.ru/to/410017251832699'] # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] +custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] From 59bdb46ca55e951f527c9b729ee8602dcfdb28b6 Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Thu, 14 May 2020 13:03:04 +0300 Subject: [PATCH 177/180] Update setup.py script for new pip. --- setup.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/setup.py b/setup.py index e21b5c0..d7903d2 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,13 @@ from setuptools import setup, find_packages -try: # for pip >= 10 - from pip._internal.req import parse_requirements -except ImportError: # for pip <= 9.0.3 - from pip.req import parse_requirements +def parse_requirements(filename): + """ load requirements from a pip requirements file """ + lineiter = (line.strip() for line in open(filename)) + return [line for line in lineiter if line and not line.startswith("#")] -# parse_requirements() returns generator of pip.req.InstallRequirement objects -install_reqs = parse_requirements('requirements.txt', session='null') - - -# reqs is a list of requirement -# e.g. ['django==1.5.1', 'mezzanine==1.4.6'] -reqs = [str(ir.req) for ir in install_reqs] +reqs = parse_requirements('requirements.txt') with open('README.md') as f: @@ -21,8 +15,8 @@ setup(name='pytorch2keras', - version='0.2.3', - description='The deep learning models convertor', + version='0.2.4', + description='The deep learning models converter', long_description=long_description, long_description_content_type='text/markdown', url='https://github.com/nerox8664/pytorch2keras', From ebd86f89f5721974eb8152e46f27c5bb752887ce Mon Sep 17 00:00:00 2001 From: Grigory Malivenko Date: Sun, 9 May 2021 11:06:07 +0300 Subject: [PATCH 178/180] Remove deprecated docs. --- .github/FUNDING.yml | 12 --- README.md | 4 +- docs/api.md | 17 ---- docs/getting_started.md | 135 ---------------------------- docs/index.md | 7 -- docs/installation.md | 21 ----- docs/known_problems.md | 8 -- docs/license.md | 21 ----- docs/requirements.txt | 1 - docs/supported_layers_and_models.md | 42 --------- setup.py | 4 +- 11 files changed, 4 insertions(+), 268 deletions(-) delete mode 100644 .github/FUNDING.yml delete mode 100644 docs/api.md delete mode 100644 docs/getting_started.md delete mode 100644 docs/index.md delete mode 100644 docs/installation.md delete mode 100644 docs/known_problems.md delete mode 100644 docs/license.md delete mode 100644 docs/requirements.txt delete mode 100644 docs/supported_layers_and_models.md diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml deleted file mode 100644 index 5d7fb87..0000000 --- a/.github/FUNDING.yml +++ /dev/null @@ -1,12 +0,0 @@ -# These are supported funding model platforms - -github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] -patreon: # Replace with a single Patreon username -open_collective: # Replace with a single Open Collective username -ko_fi: nerox8664 # Replace with a single Ko-fi username -tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel -community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry -liberapay: # Replace with a single Liberapay username -issuehunt: # Replace with a single IssueHunt username -otechie: # Replace with a single Otechie username -custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/README.md b/README.md index 10400b8..400cc45 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # pytorch2keras -[![Build Status](https://travis-ci.com/nerox8664/pytorch2keras.svg?branch=master)](https://travis-ci.com/nerox8664/pytorch2keras) +[![Build Status](https://travis-ci.com/gmalivenko/pytorch2keras.svg?branch=master)](https://travis-ci.com/gmalivenko/pytorch2keras) [![GitHub License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) -[![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) +[![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/gmalivenko/pytorch2keras) [![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) ![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) [![Readthedocs](https://img.shields.io/readthedocs/pytorch2keras.svg)](https://pytorch2keras.readthedocs.io/en/latest/) diff --git a/docs/api.md b/docs/api.md deleted file mode 100644 index a67194e..0000000 --- a/docs/api.md +++ /dev/null @@ -1,17 +0,0 @@ -Since version `0.2.1` the converter has the following API: - -``` -def pytorch_to_keras( - model, args, input_shapes=None, - change_ordering=False, verbose=False, name_policy=None, -): -``` - -Options: - -* `model` - a PyTorch model (nn.Module) to convert; -* `args` - a list of dummy variables with proper shapes; -* `input_shapes` - (experimental) list with overrided shapes for inputs; -* `change_ordering` - (experimental) boolean, if enabled, the converter will try to change `BCHW` to `BHWC` -* `verbose` - boolean, detailed log of conversion -* `name_policy` - (experimental) choice from [`keep`, `short`, `random`]. The selector set the target layer naming policy. \ No newline at end of file diff --git a/docs/getting_started.md b/docs/getting_started.md deleted file mode 100644 index c35d7df..0000000 --- a/docs/getting_started.md +++ /dev/null @@ -1,135 +0,0 @@ -## Basic usage - -To use the converter, import `pytorch_to_keras` function: - -``` -from pytorch2keras import pytorch_to_keras -``` - - -It's the converter of PyTorch graph to a Keras (Tensorflow backend) model. - -Firstly, we need to load (or create) a valid PyTorch model: - -``` -import torch -import torch.nn as nn - -class TestConv2d(nn.Module): - """ - Module for Conv2d testing - """ - - def __init__(self, inp=10, out=16, kernel_size=3): - super(TestConv2d, self).__init__() - self.conv2d = nn.Conv2d(inp, out, stride=1, kernel_size=kernel_size, bias=True) - - def forward(self, x): - x = self.conv2d(x) - return x - -model = TestConv2d() - -# load weights here -# model.load_state_dict(torch.load(path_to_weights.pth)) -``` - -The next step - create a dummy variable with correct shape: - -``` -from torch.autograd import Variable -import numpy as np - -input_np = np.random.uniform(0, 1, (1, 10, 32, 32)) -input_var = Variable(torch.FloatTensor(input_np)) -``` - -We use the dummy-variable to trace the model (with jit.trace): - -``` -from converter import pytorch_to_keras -# we should specify shape of the input tensor -k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True) -``` - -You can also set H and W dimensions to None to make your model shape-agnostic (e.g. fully convolutional netowrk): - -``` -from pytorch2keras.converter import pytorch_to_keras -# we should specify shape of the input tensor -k_model = pytorch_to_keras(model, input_var, [(10, None, None,)], verbose=True) -``` - -That's all! If all the modules have converted properly, the Keras model will be stored in the `k_model` variable. - -## Tensorflow.js - -For the proper conversion to a tensorflow.js format, please use the new flag `names='short'`. - -Here is a short instruction how to get a tensorflow.js model: - -1. First of all, you have to convert your model to Keras with this converter: - - ``` - k_model = pytorch_to_keras(model, input_var, [(10, 32, 32,)], verbose=True, names='short') - ``` - -2. Now you have Keras model. You can save it as h5 file and then convert it with `tensorflowjs_converter` but it doesn't work sometimes. As alternative, you may get Tensorflow Graph and save it as a frozen model: - - # Function below copied from here: - # https://stackoverflow.com/questions/45466020/how-to-export-keras-h5-to-tensorflow-pb - def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): - """ - Freezes the state of a session into a pruned computation graph. - - Creates a new computation graph where variable nodes are replaced by - constants taking their current value in the session. The new graph will be - pruned so subgraphs that are not necessary to compute the requested - outputs are removed. - @param session The TensorFlow session to be frozen. - @param keep_var_names A list of variable names that should not be frozen, - or None to freeze all the variables in the graph. - @param output_names Names of the relevant graph outputs. - @param clear_devices Remove the device directives from the graph for better portability. - @return The frozen graph definition. - """ - from tensorflow.python.framework.graph_util import convert_variables_to_constants - graph = session.graph - with graph.as_default(): - freeze_var_names = \ - list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or [])) - output_names = output_names or [] - output_names += [v.op.name for v in tf.global_variables()] - input_graph_def = graph.as_graph_def() - if clear_devices: - for node in input_graph_def.node: - node.device = "" - frozen_graph = convert_variables_to_constants(session, input_graph_def, - output_names, freeze_var_names) - return frozen_graph - - from keras import backend as K - import tensorflow as tf - frozen_graph = freeze_session(K.get_session(), - output_names=[out.op.name for out in k_model.outputs]) - - tf.train.write_graph(frozen_graph, ".", "my_model.pb", as_text=False) - print([i for i in k_model.outputs]) - -3. You will see the output layer name, so, now it's time to convert `my_model.pb` to tfjs model: - - ``` - tensorflowjs_converter \ - --input_format=tf_frozen_model \ - --output_node_names='TANHTObs/Tanh' \ - my_model.pb \ - model_tfjs - ``` - -4. Thats all! - - ``` - const MODEL_URL = `model_tfjs/tensorflowjs_model.pb`; - const WEIGHTS_URL = `model_tfjs/weights_manifest.json`; - cont model = await tf.loadFrozenModel(MODEL_URL, WEIGHTS_URL); - ``` diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 6e09bbf..0000000 --- a/docs/index.md +++ /dev/null @@ -1,7 +0,0 @@ -[![Build Status](https://travis-ci.com/nerox8664/pytorch2keras.svg?branch=master)](https://travis-ci.com/nerox8664/pytorch2keras) -[![GitHub License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) -[![Python Version](https://img.shields.io/badge/python-2.7%2C3.6-lightgrey.svg)](https://github.com/nerox8664/pytorch2keras) -[![Downloads](https://pepy.tech/badge/pytorch2keras)](https://pepy.tech/project/pytorch2keras) -![PyPI](https://img.shields.io/pypi/v/pytorch2keras.svg) - -PyTorch to Keras model converter. This project is created to make a model conversation as easier as possible. diff --git a/docs/installation.md b/docs/installation.md deleted file mode 100644 index ef53db9..0000000 --- a/docs/installation.md +++ /dev/null @@ -1,21 +0,0 @@ -To install the package, the following ways may be used: - -1. pypi (latest actual version): - - ``` - pip install pytorch2keras - ``` - -2. It's possible to install package with PIP right from git: - - ``` - pip install -U git+https://github.com/nerox8664/pytorch2keras - ``` - - Or to clone it manually and then install it with PIP: - - ``` - git clone https://github.com/nerox8664/pytorch2keras - cd pytorch2keras - pip install -e . - ``` diff --git a/docs/known_problems.md b/docs/known_problems.md deleted file mode 100644 index 611d3da..0000000 --- a/docs/known_problems.md +++ /dev/null @@ -1,8 +0,0 @@ -## Change ordering - -Change ordering option works for very limited cases. - - -## Recurrent layers - -The recurrent layers are not supported due to complicated onnx-translation. The support is planned, but haven't implemented yet. \ No newline at end of file diff --git a/docs/license.md b/docs/license.md deleted file mode 100644 index 4d7d753..0000000 --- a/docs/license.md +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2019 Grigory Malivenko - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index b854bca..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -mkdocs \ No newline at end of file diff --git a/docs/supported_layers_and_models.md b/docs/supported_layers_and_models.md deleted file mode 100644 index 1548556..0000000 --- a/docs/supported_layers_and_models.md +++ /dev/null @@ -1,42 +0,0 @@ -## Supported layers - -* Activations: - + ReLU - + LeakyReLU - + SELU - + Sigmoid - + Softmax - + Tanh - -* Constants - -* Convolutions: - + Conv2d - + ConvTrsnpose2d - -* Element-wise: - + Add - + Mul - + Sub - + Div - -* Linear - -* Normalizations: - + BatchNorm2d - + InstanceNorm2d - -* Poolings: - + MaxPool2d - + AvgPool2d - + Global MaxPool2d (adaptive pooling to shape [1, 1]) - - -## Models converted with pytorch2keras - -* ResNet* -* VGG* -* PreResNet* -* DenseNet* -* AlexNet -* Mobilenet v2 diff --git a/setup.py b/setup.py index d7903d2..a6767b2 100644 --- a/setup.py +++ b/setup.py @@ -19,9 +19,9 @@ def parse_requirements(filename): description='The deep learning models converter', long_description=long_description, long_description_content_type='text/markdown', - url='https://github.com/nerox8664/pytorch2keras', + url='https://github.com/gmalivenko/pytorch2keras', author='Grigory Malivenko', - author_email='nerox8664@gmail.com', + author_email='', classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Science/Research', From 2f855e07d66d13587a3322c9f0814e8b217e290a Mon Sep 17 00:00:00 2001 From: Marijn van Vliet Date: Thu, 5 Aug 2021 16:18:33 +0300 Subject: [PATCH 179/180] Remove optimizer as it is no longer supported --- pytorch2keras/converter.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 3767099..6f27b63 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -5,7 +5,6 @@ from onnx2keras import onnx_to_keras import torch import onnx -from onnx import optimizer import io import logging @@ -13,7 +12,7 @@ def pytorch_to_keras( model, args, input_shapes=None, change_ordering=False, verbose=False, name_policy=None, - use_optimizer=False, do_constant_folding=False + do_constant_folding=False ): """ By given PyTorch model convert layers with ONNX. @@ -69,17 +68,5 @@ def pytorch_to_keras( stream.seek(0) onnx_model = onnx.load(stream) - if use_optimizer: - if use_optimizer is True: - optimizer2run = optimizer.get_available_passes() - else: - use_optimizer = set(use_optimizer) - optimizer2run = [x for x in optimizer.get_available_passes() if x in use_optimizer] - logger.info("Running optimizer:\n%s", "\n".join(optimizer2run)) - onnx_model = optimizer.optimize(onnx_model, optimizer2run) - - k_model = onnx_to_keras(onnx_model=onnx_model, input_names=input_names, - input_shapes=input_shapes, name_policy=name_policy, - verbose=verbose, change_ordering=change_ordering) return k_model From d493cd37031da197f882b3e8993762cadad13dc7 Mon Sep 17 00:00:00 2001 From: Marijn van Vliet Date: Fri, 6 Aug 2021 11:15:29 +0300 Subject: [PATCH 180/180] Put back line that was removed by accident --- pytorch2keras/converter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytorch2keras/converter.py b/pytorch2keras/converter.py index 6f27b63..931c71b 100644 --- a/pytorch2keras/converter.py +++ b/pytorch2keras/converter.py @@ -69,4 +69,7 @@ def pytorch_to_keras( stream.seek(0) onnx_model = onnx.load(stream) + k_model = onnx_to_keras(onnx_model=onnx_model, input_names=input_names, + input_shapes=input_shapes, name_policy=name_policy, + verbose=verbose, change_ordering=change_ordering) return k_model