Transform Network网络结构
- import tensorflow as tf
- def conv2d(x, input_filters, output_filters, kernel, strides, padding=’SAME’):
- with tf.variable_scope('conv') as scope:
- shape = [kernel, kernel, input_filters, output_filters]
- weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight')
- convolved = tf.nn.conv2d(x, weight, strides=[1, strides, strides, 1], padding=padding, name='conv')
- normalized = batch_norm(convolved, output_filters)
- return normalized
- def conv2d_transpose(x, input_filters, output_filters, kernel, strides, padding=’SAME’):
- with tf.variable_scope('conv_transpose') as scope:
- shape = [kernel, kernel, output_filters, input_filters]
- weight = tf.Variable(tf.truncated_norm
- al(shape, stddev=0.1), name='weight')
- batch_size = tf.shape(x)[0]
- height = tf.shape(x)[1] * strides
- width = tf.shape(x)[2] * strides
- output_shape = tf.pack([batch_size, height, width, output_filters])
- convolved = tf.nn.conv2d_transpose(x, weight, output_shape, strides=[1, strides, strides, 1], padding=padding, name='conv_transpose')
- normalized = batch_norm(convolved, output_filters)
- return normalized
- def batch_norm(x, size):
- batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], keep_dims=True)
- beta = tf.Variable(tf.zeros([size]), name='beta')
- scale = tf.Variable(tf.ones([size]), name='scale')
- epsilon = 1e-3
- return tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, epsilon, name='batch')
- def residual(x, filters, kernel, strides, padding=’SAME’):
- with tf.variable_scope('residual') as scope:
- conv1 = conv2d(x, filters, filters, kernel, strides, padding=padding)
- conv2 = conv2d(tf.nn.relu(conv1), filters, filters, kernel, strides, padding=padding)
- residual = x + conv2
- return residual
- def net(image):
- with tf.variable_scope('conv1'):
- conv1 = tf.nn.relu(conv2d(image, 3, 32, 9, 1))
- with tf.variable_scope('conv2'):
- conv2 = tf.nn.relu(conv2d(conv1, 32, 64, 3, 2))
- with tf.variable_scope('conv3'):
- conv3 = tf.nn.relu(conv2d(conv2, 64, 128, 3, 2))
- with tf.variable_scope('res1'):
- res1 = residual(conv3, 128, 3, 1)
- with tf.variable_scope('res2'):
- res2 = residual(res1, 128, 3, 1)
- with tf.variable_scope('res3'):
- res3 = residual(res2, 128, 3, 1)
- with tf.variable_scope('res4'):
- res4 = residual(res3, 128, 3, 1)
- with tf.variable_scope('res5'):
- res5 = residual(res4, 128, 3, 1)
- with tf.variable_scope('deconv1'):
- deconv1 = tf.nn.relu(conv2d_transpose(res5, 128, 64, 3, 2))
- with tf.variable_scope('deconv2'):
- deconv2 = tf.nn.relu(conv2d_transpose(deconv1, 64, 32, 3, 2))
- with tf.variable_scope('deconv3'):
- deconv3 = tf.nn.tanh(conv2d_transpose(deconv2, 32, 3, 9, 1))
- y = deconv3 * 127.5
- return y
使用deep residual network来训练COCO数据集,能够在保证性能的前提下,训练更深的模型。 而Loss Network是有pretrained的VGG网络来计算,网络结构:
- import tensorflow as tf
- import numpy as np
- import scipy.io
- from scipy import misc
- def net(data_path, input_image):
- layers = (
- 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
- 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
- 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
- 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
- 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
- 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
- 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
- 'relu5_3', 'conv5_4', 'relu5_4'
- )
- data = scipy.io.loadmat(data_path)
- mean = data['normalization'][0][0][0]
- mean_pixel = np.mean(mean, axis=(0, 1))
- weights = data['layers'][0]
- net = {}
- current = input_image
- for i, name in enumerate(layers):
- kind = name[:4]
- if kind == 'conv':
- kernels, bias = weights[i][0][0][0][0]
- # matconvnet: weights are [width, height, in_channels, out_channels]
- # tensorflow: weights are [height, width, in_channels, out_channels]
- kernels = np.transpose(kernels, (1, 0, 2, 3))
- bias = bias.reshape(-1)
- current = _conv_layer(current, kernels, bias, name=name)
- elif kind == 'relu':
- current = tf.nn.relu(current, name=name)
- elif kind == 'pool':
- current = _pool_layer(current, name=name)
- net[name] = current
- assert len(net) == len(layers)
- return net, mean_pixel
- def _conv_layer(input, weights, bias, name=None):
- conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
- padding='SAME', name=name)
- return tf.nn.bias_add(conv, bias)
- def _pool_layer(input, name=None):
- return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
- padding='SAME', name=name)
- def preprocess(image, mean_pixel):
- return image - mean_pixel
- def unprocess(image, mean_pixel):
- return image + mean_pixel