dcgan.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """DCGAN generator and discriminator from https://arxiv.org/abs/1511.06434."""
  16. from __future__ import absolute_import
  17. from __future__ import division
  18. from __future__ import print_function
  19. from math import log
  20. from six.moves import xrange
  21. import tensorflow as tf
  22. slim = tf.contrib.slim
  23. def _validate_image_inputs(inputs):
  24. inputs.get_shape().assert_has_rank(4)
  25. inputs.get_shape()[1:3].assert_is_fully_defined()
  26. if inputs.get_shape()[1] != inputs.get_shape()[2]:
  27. raise ValueError('Input tensor does not have equal width and height: ',
  28. inputs.get_shape()[1:3])
  29. width = inputs.get_shape().as_list()[1]
  30. if log(width, 2) != int(log(width, 2)):
  31. raise ValueError('Input tensor `width` is not a power of 2: ', width)
  32. # TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
  33. # setups need the gradient of gradient FusedBatchNormGrad.
  34. def discriminator(inputs,
  35. depth=64,
  36. is_training=True,
  37. reuse=None,
  38. scope='Discriminator',
  39. fused_batch_norm=False):
  40. """Discriminator network for DCGAN.
  41. Construct discriminator network from inputs to the final endpoint.
  42. Args:
  43. inputs: A tensor of size [batch_size, height, width, channels]. Must be
  44. floating point.
  45. depth: Number of channels in first convolution layer.
  46. is_training: Whether the network is for training or not.
  47. reuse: Whether or not the network variables should be reused. `scope`
  48. must be given to be reused.
  49. scope: Optional variable_scope.
  50. fused_batch_norm: If `True`, use a faster, fused implementation of
  51. batch norm.
  52. Returns:
  53. logits: The pre-softmax activations, a tensor of size [batch_size, 1]
  54. end_points: a dictionary from components of the network to their activation.
  55. Raises:
  56. ValueError: If the input image shape is not 4-dimensional, if the spatial
  57. dimensions aren't defined at graph construction time, if the spatial
  58. dimensions aren't square, or if the spatial dimensions aren't a power of
  59. two.
  60. """
  61. normalizer_fn = slim.batch_norm
  62. normalizer_fn_args = {
  63. 'is_training': is_training,
  64. 'zero_debias_moving_mean': True,
  65. 'fused': fused_batch_norm,
  66. }
  67. _validate_image_inputs(inputs)
  68. inp_shape = inputs.get_shape().as_list()[1]
  69. end_points = {}
  70. with tf.variable_scope(scope, values=[inputs], reuse=reuse) as scope:
  71. with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
  72. with slim.arg_scope([slim.conv2d],
  73. stride=2,
  74. kernel_size=4,
  75. activation_fn=tf.nn.leaky_relu):
  76. net = inputs
  77. for i in xrange(int(log(inp_shape, 2))):
  78. scope = 'conv%i' % (i + 1)
  79. current_depth = depth * 2**i
  80. normalizer_fn_ = None if i == 0 else normalizer_fn
  81. net = slim.conv2d(
  82. net, current_depth, normalizer_fn=normalizer_fn_, scope=scope)
  83. end_points[scope] = net
  84. logits = slim.conv2d(net, 1, kernel_size=1, stride=1, padding='VALID',
  85. normalizer_fn=None, activation_fn=None)
  86. logits = tf.reshape(logits, [-1, 1])
  87. end_points['logits'] = logits
  88. return logits, end_points
  89. # TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
  90. # setups need the gradient of gradient FusedBatchNormGrad.
  91. def generator(inputs,
  92. depth=64,
  93. final_size=32,
  94. num_outputs=3,
  95. is_training=True,
  96. reuse=None,
  97. scope='Generator',
  98. fused_batch_norm=False):
  99. """Generator network for DCGAN.
  100. Construct generator network from inputs to the final endpoint.
  101. Args:
  102. inputs: A tensor with any size N. [batch_size, N]
  103. depth: Number of channels in last deconvolution layer.
  104. final_size: The shape of the final output.
  105. num_outputs: Number of output features. For images, this is the number of
  106. channels.
  107. is_training: whether is training or not.
  108. reuse: Whether or not the network has its variables should be reused. scope
  109. must be given to be reused.
  110. scope: Optional variable_scope.
  111. fused_batch_norm: If `True`, use a faster, fused implementation of
  112. batch norm.
  113. Returns:
  114. logits: the pre-softmax activations, a tensor of size
  115. [batch_size, 32, 32, channels]
  116. end_points: a dictionary from components of the network to their activation.
  117. Raises:
  118. ValueError: If `inputs` is not 2-dimensional.
  119. ValueError: If `final_size` isn't a power of 2 or is less than 8.
  120. """
  121. normalizer_fn = slim.batch_norm
  122. normalizer_fn_args = {
  123. 'is_training': is_training,
  124. 'zero_debias_moving_mean': True,
  125. 'fused': fused_batch_norm,
  126. }
  127. inputs.get_shape().assert_has_rank(2)
  128. if log(final_size, 2) != int(log(final_size, 2)):
  129. raise ValueError('`final_size` (%i) must be a power of 2.' % final_size)
  130. if final_size < 8:
  131. raise ValueError('`final_size` (%i) must be greater than 8.' % final_size)
  132. end_points = {}
  133. num_layers = int(log(final_size, 2)) - 1
  134. with tf.variable_scope(scope, values=[inputs], reuse=reuse) as scope:
  135. with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
  136. with slim.arg_scope([slim.conv2d_transpose],
  137. normalizer_fn=normalizer_fn,
  138. stride=2,
  139. kernel_size=4):
  140. net = tf.expand_dims(tf.expand_dims(inputs, 1), 1)
  141. # First upscaling is different because it takes the input vector.
  142. current_depth = depth * 2 ** (num_layers - 1)
  143. scope = 'deconv1'
  144. net = slim.conv2d_transpose(
  145. net, current_depth, stride=1, padding='VALID', scope=scope)
  146. end_points[scope] = net
  147. for i in xrange(2, num_layers):
  148. scope = 'deconv%i' % (i)
  149. current_depth = depth * 2 ** (num_layers - i)
  150. net = slim.conv2d_transpose(net, current_depth, scope=scope)
  151. end_points[scope] = net
  152. # Last layer has different normalizer and activation.
  153. scope = 'deconv%i' % (num_layers)
  154. net = slim.conv2d_transpose(
  155. net, depth, normalizer_fn=None, activation_fn=None, scope=scope)
  156. end_points[scope] = net
  157. # Convert to proper channels.
  158. scope = 'logits'
  159. logits = slim.conv2d(
  160. net,
  161. num_outputs,
  162. normalizer_fn=None,
  163. activation_fn=None,
  164. kernel_size=1,
  165. stride=1,
  166. padding='VALID',
  167. scope=scope)
  168. end_points[scope] = logits
  169. logits.get_shape().assert_has_rank(4)
  170. logits.get_shape().assert_is_compatible_with(
  171. [None, final_size, final_size, num_outputs])
  172. return logits, end_points