123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579 |
- # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- """Contains the definition for inception v3 classification network."""
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import tensorflow as tf
- from nets import inception_utils
- slim = tf.contrib.slim
- trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
- def inception_v3_base(inputs,
- final_endpoint='Mixed_7c',
- min_depth=16,
- depth_multiplier=1.0,
- scope=None):
- """Inception model from http://arxiv.org/abs/1512.00567.
- Constructs an Inception v3 network from inputs to the given final endpoint.
- This method can construct the network up to the final inception block
- Mixed_7c.
- Note that the names of the layers in the paper do not correspond to the names
- of the endpoints registered by this function although they build the same
- network.
- Here is a mapping from the old_names to the new names:
- Old name | New name
- =======================================
- conv0 | Conv2d_1a_3x3
- conv1 | Conv2d_2a_3x3
- conv2 | Conv2d_2b_3x3
- pool1 | MaxPool_3a_3x3
- conv3 | Conv2d_3b_1x1
- conv4 | Conv2d_4a_3x3
- pool2 | MaxPool_5a_3x3
- mixed_35x35x256a | Mixed_5b
- mixed_35x35x288a | Mixed_5c
- mixed_35x35x288b | Mixed_5d
- mixed_17x17x768a | Mixed_6a
- mixed_17x17x768b | Mixed_6b
- mixed_17x17x768c | Mixed_6c
- mixed_17x17x768d | Mixed_6d
- mixed_17x17x768e | Mixed_6e
- mixed_8x8x1280a | Mixed_7a
- mixed_8x8x2048a | Mixed_7b
- mixed_8x8x2048b | Mixed_7c
- Args:
- inputs: a tensor of size [batch_size, height, width, channels].
- final_endpoint: specifies the endpoint to construct the network up to. It
- can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
- 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
- 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
- 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
- min_depth: Minimum depth value (number of channels) for all convolution ops.
- Enforced when depth_multiplier < 1, and not an active constraint when
- depth_multiplier >= 1.
- depth_multiplier: Float multiplier for the depth (number of channels)
- for all convolution ops. The value must be greater than zero. Typical
- usage will be to set this value in (0, 1) to reduce the number of
- parameters or computation cost of the model.
- scope: Optional variable_scope.
- Returns:
- tensor_out: output tensor corresponding to the final_endpoint.
- end_points: a set of activations for external use, for example summaries or
- losses.
- Raises:
- ValueError: if final_endpoint is not set to one of the predefined values,
- or depth_multiplier <= 0
- """
- # end_points will collect relevant activations for external use, for example
- # summaries or losses.
- end_points = {}
- if depth_multiplier <= 0:
- raise ValueError('depth_multiplier is not greater than zero.')
- depth = lambda d: max(int(d * depth_multiplier), min_depth)
- with tf.variable_scope(scope, 'InceptionV3', [inputs]):
- with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
- stride=1, padding='VALID'):
- # 299 x 299 x 3
- end_point = 'Conv2d_1a_3x3'
- net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # 149 x 149 x 32
- end_point = 'Conv2d_2a_3x3'
- net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # 147 x 147 x 32
- end_point = 'Conv2d_2b_3x3'
- net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point)
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # 147 x 147 x 64
- end_point = 'MaxPool_3a_3x3'
- net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # 73 x 73 x 64
- end_point = 'Conv2d_3b_1x1'
- net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # 73 x 73 x 80.
- end_point = 'Conv2d_4a_3x3'
- net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # 71 x 71 x 192.
- end_point = 'MaxPool_5a_3x3'
- net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # 35 x 35 x 192.
- # Inception blocks
- with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
- stride=1, padding='SAME'):
- # mixed: 35 x 35 x 256.
- end_point = 'Mixed_5b'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
- scope='Conv2d_0b_5x5')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
- scope='Conv2d_0b_3x3')
- branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
- scope='Conv2d_0c_3x3')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(branch_3, depth(32), [1, 1],
- scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_1: 35 x 35 x 288.
- end_point = 'Mixed_5c'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1')
- branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
- scope='Conv_1_0c_5x5')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(64), [1, 1],
- scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
- scope='Conv2d_0b_3x3')
- branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
- scope='Conv2d_0c_3x3')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
- scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_2: 35 x 35 x 288.
- end_point = 'Mixed_5d'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
- scope='Conv2d_0b_5x5')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
- scope='Conv2d_0b_3x3')
- branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
- scope='Conv2d_0c_3x3')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
- scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_3: 17 x 17 x 768.
- end_point = 'Mixed_6a'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2,
- padding='VALID', scope='Conv2d_1a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
- scope='Conv2d_0b_3x3')
- branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2,
- padding='VALID', scope='Conv2d_1a_1x1')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
- scope='MaxPool_1a_3x3')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed4: 17 x 17 x 768.
- end_point = 'Mixed_6b'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(128), [1, 7],
- scope='Conv2d_0b_1x7')
- branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
- scope='Conv2d_0c_7x1')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
- scope='Conv2d_0b_7x1')
- branch_2 = slim.conv2d(branch_2, depth(128), [1, 7],
- scope='Conv2d_0c_1x7')
- branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
- scope='Conv2d_0d_7x1')
- branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
- scope='Conv2d_0e_1x7')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
- scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_5: 17 x 17 x 768.
- end_point = 'Mixed_6c'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
- scope='Conv2d_0b_1x7')
- branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
- scope='Conv2d_0c_7x1')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
- scope='Conv2d_0b_7x1')
- branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
- scope='Conv2d_0c_1x7')
- branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
- scope='Conv2d_0d_7x1')
- branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
- scope='Conv2d_0e_1x7')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
- scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_6: 17 x 17 x 768.
- end_point = 'Mixed_6d'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
- scope='Conv2d_0b_1x7')
- branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
- scope='Conv2d_0c_7x1')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
- scope='Conv2d_0b_7x1')
- branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
- scope='Conv2d_0c_1x7')
- branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
- scope='Conv2d_0d_7x1')
- branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
- scope='Conv2d_0e_1x7')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
- scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_7: 17 x 17 x 768.
- end_point = 'Mixed_6e'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
- scope='Conv2d_0b_1x7')
- branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
- scope='Conv2d_0c_7x1')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
- scope='Conv2d_0b_7x1')
- branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
- scope='Conv2d_0c_1x7')
- branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
- scope='Conv2d_0d_7x1')
- branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
- scope='Conv2d_0e_1x7')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
- scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_8: 8 x 8 x 1280.
- end_point = 'Mixed_7a'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
- branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2,
- padding='VALID', scope='Conv2d_1a_3x3')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
- scope='Conv2d_0b_1x7')
- branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
- scope='Conv2d_0c_7x1')
- branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2,
- padding='VALID', scope='Conv2d_1a_3x3')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
- scope='MaxPool_1a_3x3')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_9: 8 x 8 x 2048.
- end_point = 'Mixed_7b'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = tf.concat(axis=3, values=[
- slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
- slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')])
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(
- branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
- branch_2 = tf.concat(axis=3, values=[
- slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
- slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(
- branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- # mixed_10: 8 x 8 x 2048.
- end_point = 'Mixed_7c'
- with tf.variable_scope(end_point):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
- branch_1 = tf.concat(axis=3, values=[
- slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
- slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')])
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(
- branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
- branch_2 = tf.concat(axis=3, values=[
- slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
- slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(
- branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
- net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
- end_points[end_point] = net
- if end_point == final_endpoint: return net, end_points
- raise ValueError('Unknown final endpoint %s' % final_endpoint)
- def inception_v3(inputs,
- num_classes=1000,
- is_training=True,
- dropout_keep_prob=0.8,
- min_depth=16,
- depth_multiplier=1.0,
- prediction_fn=slim.softmax,
- spatial_squeeze=True,
- reuse=None,
- create_aux_logits=True,
- scope='InceptionV3',
- global_pool=False):
- """Inception model from http://arxiv.org/abs/1512.00567.
- "Rethinking the Inception Architecture for Computer Vision"
- Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
- Zbigniew Wojna.
- With the default arguments this method constructs the exact model defined in
- the paper. However, one can experiment with variations of the inception_v3
- network by changing arguments dropout_keep_prob, min_depth and
- depth_multiplier.
- The default image size used to train this network is 299x299.
- Args:
- inputs: a tensor of size [batch_size, height, width, channels].
- num_classes: number of predicted classes. If 0 or None, the logits layer
- is omitted and the input features to the logits layer (before dropout)
- are returned instead.
- is_training: whether is training or not.
- dropout_keep_prob: the percentage of activation values that are retained.
- min_depth: Minimum depth value (number of channels) for all convolution ops.
- Enforced when depth_multiplier < 1, and not an active constraint when
- depth_multiplier >= 1.
- depth_multiplier: Float multiplier for the depth (number of channels)
- for all convolution ops. The value must be greater than zero. Typical
- usage will be to set this value in (0, 1) to reduce the number of
- parameters or computation cost of the model.
- prediction_fn: a function to get predictions out of logits.
- spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
- shape [B, 1, 1, C], where B is batch_size and C is number of classes.
- reuse: whether or not the network and its variables should be reused. To be
- able to reuse 'scope' must be given.
- create_aux_logits: Whether to create the auxiliary logits.
- scope: Optional variable_scope.
- global_pool: Optional boolean flag to control the avgpooling before the
- logits layer. If false or unset, pooling is done with a fixed window
- that reduces default-sized inputs to 1x1, while larger inputs lead to
- larger outputs. If true, any input size is pooled down to 1x1.
- Returns:
- net: a Tensor with the logits (pre-softmax activations) if num_classes
- is a non-zero integer, or the non-dropped-out input to the logits layer
- if num_classes is 0 or None.
- end_points: a dictionary from components of the network to the corresponding
- activation.
- Raises:
- ValueError: if 'depth_multiplier' is less than or equal to zero.
- """
- if depth_multiplier <= 0:
- raise ValueError('depth_multiplier is not greater than zero.')
- depth = lambda d: max(int(d * depth_multiplier), min_depth)
- with tf.variable_scope(scope, 'InceptionV3', [inputs], reuse=reuse) as scope:
- with slim.arg_scope([slim.batch_norm, slim.dropout],
- is_training=is_training):
- net, end_points = inception_v3_base(
- inputs, scope=scope, min_depth=min_depth,
- depth_multiplier=depth_multiplier)
- # Auxiliary Head logits
- if create_aux_logits and num_classes:
- with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
- stride=1, padding='SAME'):
- aux_logits = end_points['Mixed_6e']
- with tf.variable_scope('AuxLogits'):
- aux_logits = slim.avg_pool2d(
- aux_logits, [5, 5], stride=3, padding='VALID',
- scope='AvgPool_1a_5x5')
- aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1],
- scope='Conv2d_1b_1x1')
- # Shape of feature map before the final layer.
- kernel_size = _reduced_kernel_size_for_small_input(
- aux_logits, [5, 5])
- aux_logits = slim.conv2d(
- aux_logits, depth(768), kernel_size,
- weights_initializer=trunc_normal(0.01),
- padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size))
- aux_logits = slim.conv2d(
- aux_logits, num_classes, [1, 1], activation_fn=None,
- normalizer_fn=None, weights_initializer=trunc_normal(0.001),
- scope='Conv2d_2b_1x1')
- if spatial_squeeze:
- aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
- end_points['AuxLogits'] = aux_logits
- # Final pooling and prediction
- with tf.variable_scope('Logits'):
- if global_pool:
- # Global average pooling.
- net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='GlobalPool')
- end_points['global_pool'] = net
- else:
- # Pooling with a fixed kernel size.
- kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
- net = slim.avg_pool2d(net, kernel_size, padding='VALID',
- scope='AvgPool_1a_{}x{}'.format(*kernel_size))
- end_points['AvgPool_1a'] = net
- if not num_classes:
- return net, end_points
- # 1 x 1 x 2048
- net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
- end_points['PreLogits'] = net
- # 2048
- logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
- normalizer_fn=None, scope='Conv2d_1c_1x1')
- if spatial_squeeze:
- logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
- # 1000
- end_points['Logits'] = logits
- end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
- return logits, end_points
- inception_v3.default_image_size = 299
- def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
- """Define kernel size which is automatically reduced for small input.
- If the shape of the input images is unknown at graph construction time this
- function assumes that the input images are is large enough.
- Args:
- input_tensor: input tensor of size [batch_size, height, width, channels].
- kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
- Returns:
- a tensor with the kernel size.
- TODO(jrru): Make this function work with unknown shapes. Theoretically, this
- can be done with the code below. Problems are two-fold: (1) If the shape was
- known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
- handle tensors that define the kernel size.
- shape = tf.shape(input_tensor)
- return = tf.stack([tf.minimum(shape[1], kernel_size[0]),
- tf.minimum(shape[2], kernel_size[1])])
- """
- shape = input_tensor.get_shape().as_list()
- if shape[1] is None or shape[2] is None:
- kernel_size_out = kernel_size
- else:
- kernel_size_out = [min(shape[1], kernel_size[0]),
- min(shape[2], kernel_size[1])]
- return kernel_size_out
- inception_v3_arg_scope = inception_utils.inception_arg_scope
|