这里因为我最近工作变动的问题,没有了机器来跑一篇,也无法画下基本的网络结构图,之后我会补上。这里指的提出的是中间cccp1和ccp2(cross channel pooling)等价于1*1kernel大小的卷积层。caffe中NIN的实现如下:
- name: "nin_imagenet"
- layers {
- top: "data"
- top: "label"
- name: "data"
- type: DATA
- data_param {
- source: "/home/linmin/IMAGENET-LMDB/imagenet-train-lmdb"
- backend: LMDB
- batch_size: 64
- }
- transform_param {
- crop_size: 224
- mirror: true
- mean_file: "/home/linmin/IMAGENET-LMDB/imagenet-train-mean"
- }
- include: { phase: TRAIN }
- }
- layers {
- top: "data"
- top: "label"
- name: "data"
- type: DATA
- data_param {
- source: "/home/linmin/IMAGENET-LMDB/imagenet-val-lmdb"
- backend: LMDB
- batch_size: 89
- }
- transform_param {
- crop_size: 224
- mirror: false
- mean_file: "/home/linmin/IMAGENET-LMDB/imagenet-train-mean"
- }
- include: { phase: TEST }
- }
- layers {
- bottom: "data"
- top: "conv1"
- name: "conv1"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 96
- kernel_size: 11
- stride: 4
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.01
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "conv1"
- top: "conv1"
- name: "relu0"
- type: RELU
- }
- layers {
- bottom: "conv1"
- top: "cccp1"
- name: "cccp1"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 96
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "cccp1"
- top: "cccp1"
- name: "relu1"
- type: RELU
- }
- layers {
- bottom: "cccp1"
- top: "cccp2"
- name: "cccp2"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 96
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "cccp2"
- top: "cccp2"
- name: "relu2"
- type: RELU
- }
- layers {
- bottom: "cccp2"
- top: "pool0"
- name: "pool0"
- type: POOLING
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
- }
- layers {
- bottom: "pool0"
- top: "conv2"
- name: "conv2"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 256
- pad: 2
- kernel_size: 5
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "conv2"
- top: "conv2"
- name: "relu3"
- type: RELU
- }
- layers {
- bottom: "conv2"
- top: "cccp3"
- name: "cccp3"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 256
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "cccp3"
- top: "cccp3"
- name: "relu5"
- type: RELU
- }
- layers {
- bottom: "cccp3"
- top: "cccp4"
- name: "cccp4"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 256
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "cccp4"
- top: "cccp4"
- name: "relu6"
- type: RELU
- }
- layers {
- bottom: "cccp4"
- top: "pool2"
- name: "pool2"
- type: POOLING
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
- }
- layers {
- bottom: "pool2"
- top: "conv3"
- name: "conv3"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 384
- pad: 1
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.01
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "conv3"
- top: "conv3"
- name: "relu7"
- type: RELU
- }
- layers {
- bottom: "conv3"
- top: "cccp5"
- name: "cccp5"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 384
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "cccp5"
- top: "cccp5"
- name: "relu8"
- type: RELU
- }
- layers {
- bottom: "cccp5"
- top: "cccp6"
- name: "cccp6"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 384
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "cccp6"
- top: "cccp6"
- name: "relu9"
- type: RELU
- }
- layers {
- bottom: "cccp6"
- top: "pool3"
- name: "pool3"
- type: POOLING
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
- }
- layers {
- bottom: "pool3"
- top: "pool3"
- name: "drop"
- type: DROPOUT
- dropout_param {
- dropout_ratio: 0.5
- }
- }
- layers {
- bottom: "pool3"
- top: "conv4"
- name: "conv4-1024"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 1024
- pad: 1
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "conv4"
- top: "conv4"
- name: "relu10"
- type: RELU
- }
- layers {
- bottom: "conv4"
- top: "cccp7"
- name: "cccp7-1024"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 1024
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.05
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "cccp7"
- top: "cccp7"
- name: "relu11"
- type: RELU
- }
- layers {
- bottom: "cccp7"
- top: "cccp8"
- name: "cccp8-1024"
- type: CONVOLUTION
- blobs_lr: 1
- blobs_lr: 2
- weight_decay: 1
- weight_decay: 0
- convolution_param {
- num_output: 1000
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "gaussian"
- mean: 0
- std: 0.01
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layers {
- bottom: "cccp8"
- top: "cccp8"
- name: "relu12"
- type: RELU
- }
- layers {
- bottom: "cccp8"
- top: "pool4"
- name: "pool4"
- type: POOLING
- pooling_param {
- pool: AVE
- kernel_size: 6
- stride: 1
- }
- }
- layers {
- name: "accuracy"
- type: ACCURACY
- bottom: "pool4"
- bottom: "label"
- top: "accuracy"
- include: { phase: TEST }
- }
- layers {
- bottom: "pool4"
- bottom: "label"
- name: "loss"
- type: SOFTMAX_LOSS
- include: { phase: TRAIN }
- }
NIN的提出其实也可以认为我们加深了网络的深度,通过加深网络深度(增加单个NIN的特征表示能力)以及将原先全连接层变为aver_pool层,大大减少了原先需要的filters数,减少了model的参数。paper中实验证明达到Alexnet相同的性能,最终model大小仅为29M。
理解NIN之后,再来看GoogLeNet就不会有不明所理的感觉。