- g = tf.Graph()
- with g.as_default(), g.device(‘/cpu:0’), tf.Session() as sess:
- image = tf.placeholder('float', shape=shape)
- net, mean_pixel = vgg.net(network, image)
- content_pre = np.array([vgg.preprocess(content, mean_pixel)])
- content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
- feed_dict={image: content_pre})
这里会调用imagenet-vgg-verydeep-19.mat这个model,在这个基础上通过vgg里面的net构建前文当中提到的abcde那五个卷积层conv1_1, conv2_1, conv3_1, conv4_1, conv5_1,net每个不同的key表示对应的层,然后ceontent_pre得到经过model输出后再经过abcde后的content的的feature
- for i in range(len(styles)):
- g = tf.Graph()
- with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
- image = tf.placeholder('float', shape=style_shapes[i])
- net, _ = vgg.net(network, image)
- style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
- for layer in STYLE_LAYERS:
- features = net[layer].eval(feed_dict={image: style_pre})
- features = np.reshape(features, (-1, features.shape[3]))
- gram = np.dot(features.T, features) / features.size
- style_features[i][layer] = gram
这里和content的feature的计算一样,只不过,由于计算loss的方法不同(style loss 为total loss包括每一层输出的loss),因此CONTENT_LAYER = 'relu4_2'``STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1').
然后就是最小化loss的过程:
- with tf.Graph().as_default():
- if initial is None:
- noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
- initial = tf.random_normal(shape) * 0.256
- else:
- initial = np.array([vgg.preprocess(initial, mean_pixel)])
- initial = initial.astype('float32')
- image = tf.Variable(initial)
- net, _ = vgg.net(network, image)
- # content loss
- content_loss = content_weight * (2 * tf.nn.l2_loss(
- net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
- content_features[CONTENT_LAYER].size)
- # style loss
- style_loss = 0
- for i in range(len(styles)):
- style_losses = []
- for style_layer in STYLE_LAYERS:
- layer = net[style_layer]
- _, height, width, number = map(lambda i: i.value, layer.get_shape())
- size = height * width * number
- feats = tf.reshape(layer, (-1, number))
- gram = tf.matmul(tf.transpose(feats), feats) / size
- style_gram = style_features[i][style_layer]
- style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
- style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
- # total variation denoising
- tv_y_size = _tensor_size(image[:,1:,:,:])
- tv_x_size = _tensor_size(image[:,:,1:,:])
- tv_loss = tv_weight * 2 * (
- (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
- tv_y_size) +
- (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
- tv_x_size))
- # overall loss
- loss = content_loss + style_loss + tv_loss
- # optimizer setup
- train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
和上文中提到的公式一一对应,除了多了一个total variation denoising,定义好 total loss后调用AdamOptimizer来进行迭代计算,最小化loss注意这里的代码还是按像素点计算,并未向量化,所以看起来会有点头疼,后面如果更加熟悉tensorflow后,我再来这儿试图改改,看看能不能把这里计算的部分做稍微高效点。
如果想要详细了解这部分代码的童靴,可以clone这个项目下来,仔细研究研究,当做学习tensorflow。