Add regularization losses in implicit_klqp, map, wake_sleep (#823)

siddharth-agrawal · dustinvtran · commit 6419751d1d84 · 2018-01-15T12:30:20.000-08:00
* Add regularization losses in implicit_klqp, map, wake_sleep

* Add test for MAP regularization
diff --git a/edward/inferences/implicit_klqp.py b/edward/inferences/implicit_klqp.py
@@ -48,6 +48,9 @@ class ImplicitKLqp(GANInference):
   + If `scale` has more than one item, then in order to scale
   its corresponding output, `discriminator` must output a
   dictionary of same size and keys as `scale`.
+
+  The objective function also adds to itself a summation over all
+  tensors in the `REGULARIZATION_LOSSES` collection.
   """
   def __init__(self, latent_vars, data=None, discriminator=None,
                global_vars=None):
@@ -203,8 +206,14 @@ def build_loss_and_gradients(self, var_list):
                       for key in six.iterkeys(self.scale)]
       scaled_ratio = tf.reduce_sum(scaled_ratio)
 
+    reg_terms_d = tf.losses.get_regularization_losses(scope="Disc")
+    reg_terms_all = tf.losses.get_regularization_losses()
+    reg_terms = [r for r in reg_terms_all if r not in reg_terms_d]
+
     # Form variational objective.
-    loss = -(pbeta_log_prob - qbeta_log_prob + scaled_ratio)
+    loss = -(pbeta_log_prob - qbeta_log_prob + scaled_ratio -
+             tf.reduce_sum(reg_terms))
+    loss_d = loss_d + tf.reduce_sum(reg_terms_d)
 
     var_list_d = tf.get_collection(
         tf.GraphKeys.TRAINABLE_VARIABLES, scope="Disc")
diff --git a/edward/inferences/map.py b/edward/inferences/map.py
@@ -71,6 +71,9 @@ class MAP(VariationalInference):
   unconstrained; see, e.g., `qsigma` above. This is different than
   performing MAP on the unconstrained space: in general, the MAP of
   the transform is not the transform of the MAP.
+
+  The objective function also adds to itself a summation over all
+  tensors in the `REGULARIZATION_LOSSES` collection.
   """
   def __init__(self, latent_vars=None, data=None):
     """Create an inference algorithm.
@@ -142,7 +145,8 @@ def build_loss_and_gradients(self, var_list):
         p_log_prob += tf.reduce_sum(
             self.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
 
-    loss = -p_log_prob
+    reg_penalty = tf.reduce_sum(tf.losses.get_regularization_losses())
+    loss = -p_log_prob + reg_penalty
 
     grads = tf.gradients(loss, var_list)
     grads_and_vars = list(zip(grads, var_list))
diff --git a/edward/inferences/wake_sleep.py b/edward/inferences/wake_sleep.py
@@ -51,6 +51,9 @@ class WakeSleep(VariationalInference):
 
   where $z^{(s)} \sim q(z; \lambda)$ and $\\beta^{(s)}
   \sim q(\\beta)$.
+
+  The objective function also adds to itself a summation over all
+  tensors in the `REGULARIZATION_LOSSES` collection.
   """
   def __init__(self, *args, **kwargs):
     super(WakeSleep, self).__init__(*args, **kwargs)
@@ -129,15 +132,18 @@ def build_loss_and_gradients(self, var_list):
 
     p_log_prob = tf.reduce_mean(p_log_prob)
     q_log_prob = tf.reduce_mean(q_log_prob)
+    reg_penalty = tf.reduce_sum(tf.losses.get_regularization_losses())
 
     if self.logging:
       tf.summary.scalar("loss/p_log_prob", p_log_prob,
                         collections=[self._summary_key])
       tf.summary.scalar("loss/q_log_prob", q_log_prob,
                         collections=[self._summary_key])
+      tf.summary.scalar("loss/reg_penalty", reg_penalty,
+                        collections=[self._summary_key])
 
-    loss_p = -p_log_prob
-    loss_q = -q_log_prob
+    loss_p = -p_log_prob + reg_penalty
+    loss_q = -q_log_prob + reg_penalty
 
     q_rvs = list(six.itervalues(self.latent_vars))
     q_vars = [v for v in var_list
diff --git a/tests/inferences/test_map.py b/tests/inferences/test_map.py
@@ -26,6 +26,31 @@ def test_normalnormal_run(self):
 
       self.assertAllClose(qmu.mean().eval(), 0)
 
+  def test_normalnormal_regularization(self):
+    with self.test_session() as sess:
+      x_data = np.array([5.0] * 50, dtype=np.float32)
+
+      mu = Normal(loc=0.0, scale=1.0)
+      x = Normal(loc=mu, scale=1.0, sample_shape=50)
+
+      qmu = PointMass(params=tf.Variable(1.0))
+
+      inference = ed.MAP({mu: qmu}, data={x: x_data})
+      inference.run(n_iter=1000)
+      mu_val = qmu.mean().eval()
+
+      # regularized solution
+      regularizer = tf.contrib.layers.l2_regularizer(scale=1.0)
+      mu_reg = tf.get_variable("mu_reg", shape=[],
+                               regularizer=regularizer)
+      x_reg = Normal(loc=mu_reg, scale=1.0, sample_shape=50)
+
+      inference_reg = ed.MAP(None, data={x_reg: x_data})
+      inference_reg.run(n_iter=1000)
+
+      mu_reg_val = mu_reg.eval()
+      self.assertAllClose(mu_val, mu_reg_val)
+
 if __name__ == '__main__':
   ed.set_seed(42)
   tf.test.main()