Merge pull request #122 from bagibence/optax_linesearch_fix

johannahaffner · web-flow · commit 15e959bdc3b0 · 2025-06-10T20:08:16.000+02:00
Support Optax solvers that include a linesearch
diff --git a/optimistix/_solver/optax.py b/optimistix/_solver/optax.py
@@ -97,7 +97,13 @@ def step(
                 ("loss" in self.verbose, "Loss", f),
                 ("y" in self.verbose, "y", y),
             )
-        updates, new_opt_state = self.optim.update(grads, state.opt_state, y)
+
+        # fix args and discard aux
+        _fn_for_optax = lambda y: fn(y, args)[0]
+
+        updates, new_opt_state = self.optim.update(
+            grads, state.opt_state, y, value=f, grad=grads, value_fn=_fn_for_optax
+        )
         new_y = eqx.apply_updates(y, updates)
         terminate = cauchy_termination(
             self.rtol,
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -243,14 +243,20 @@ class DFPClassicalTrustRegionHessian(optx.AbstractQuasiNewton):
 
 
 atol = rtol = 1e-8
-minimisers = (
+_general_minimisers = (
     optx.NelderMead(rtol, atol),
     optx.BFGS(rtol, atol, use_inverse=False),
     optx.BFGS(rtol, atol, use_inverse=True),
     BFGSDampedNewton(rtol, atol),
     BFGSIndirectDampedNewton(rtol, atol),
     # Tighter tolerance needed to have BFGSDogleg pass the JVP test.
     BFGSDogleg(1e-10, 1e-10),
+    optx.OptaxMinimiser(optax.adam(learning_rate=3e-3), rtol=rtol, atol=atol),
+    # optax.lbfgs includes their linesearch by default
+    optx.OptaxMinimiser(optax.lbfgs(), rtol=rtol, atol=atol),
+)
+
+_minim_only = (
     BFGSClassicalTrustRegionHessian(rtol, atol),
     BFGSLinearTrustRegionHessian(rtol, atol),
     BFGSLinearTrustRegion(rtol, atol),
@@ -264,18 +270,32 @@ class DFPClassicalTrustRegionHessian(optx.AbstractQuasiNewton):
     optx.GradientDescent(1.5e-2, rtol, atol),
     # Tighter tolerance needed to have NonlinearCG pass the JVP test.
     optx.NonlinearCG(1e-10, 1e-10),
-    optx.OptaxMinimiser(optax.adam(learning_rate=3e-3), rtol=rtol, atol=atol),
+    # explicitly including a linesearch
+    optx.OptaxMinimiser(
+        optax.chain(
+            optax.sgd(learning_rate=1.0),
+            optax.scale_by_zoom_linesearch(15, curv_rtol=jnp.inf),
+        ),
+        rtol=rtol,
+        atol=atol,
+    ),
+    optx.OptaxMinimiser(
+        optax.chain(
+            optax.sgd(learning_rate=1.0),
+            optax.scale_by_backtracking_linesearch(15),
+        ),
+        rtol=rtol,
+        atol=atol,
+    ),
 )
 
+minimisers = _general_minimisers + _minim_only
+
 # the minimisers can handle least squares problems, but the least squares
 # solvers cannot handle general minimisation problems.
-least_squares_optimisers = _lsqr_only + minimisers
-# Remove ones that work, but are just pretty bad!
-least_squares_optimisers = [
-    x
-    for x in least_squares_optimisers
-    if not isinstance(x, (optx.GradientDescent, optx.NonlinearCG))
-]
+# without the ones that work, but are just pretty bad!
+least_squares_optimisers = _lsqr_only + _general_minimisers
+
 
 #
 # MINIMISATION PROBLEMS