Merge pull request #435 from dreycenfoiles/Ask-Tell

ChrisRackauckas · web-flow · commit 9182073b9d5c · 2023-09-21T20:19:58.000-04:00
Parallel Surrogate Models Through Ask-tell Interface
diff --git a/docs/pages.jl b/docs/pages.jl
@@ -16,6 +16,7 @@ pages = ["index.md"
              "Gradient Enhanced Kriging" => "gek.md",
              "GEKPLS" => "gekpls.md",
              "MOE" => "moe.md",
+             "Parallel Optimization" => "parallel.md"
          ]
          "User guide" => [
              "Samples" => "samples.md",
diff --git a/docs/src/parallel.md b/docs/src/parallel.md
@@ -0,0 +1,56 @@
+# Parallel Optimization
+
+There are some situations where it can be beneficial to run multiple optimizations in parallel. For example, if your objective function is very expensive to evaluate, you may want to run multiple evaluations in parallel. 
+
+## Ask-Tell Interface
+
+To enable parallel optimization, we make use of an Ask-Tell interface. The user will construct the initial surrogate model the same way as for non-parallel surrogate models, but instead of using `surrogate_optimize`, the user will use `potential_optimal_points`. This will return the coordinates of points that the optimizer has determined are most useful to evaluate next. How the user evaluates these points is up to them. The Ask-Tell interface requires more manual control than `surrogate_optimize`, but it allows for more flexibility. After the point has been evaluated, the user will *tell* the surrogate model the new points with the `add_point!` function.
+
+## Virtual Points
+
+To ensure that points of interest returned by `potential_optimal_points` are sufficiently far from each other, the function makes use of *virtual points*. They are used as follows:
+1. `potential_optimal_points` is told to return `n` points.
+2. The point with the highest merit function value is selected.
+3. This point is now treated as a virtual point and is assigned a temporary value that changes the landscape of the merit function. How the the temporary value is chosen depends on the strategy used. (see below)
+4. The point with the new highest merit is selected.
+5. The process is repeated until `n` points have been selected.
+
+The following strategies are available for virtual point selection for all optimization algorithms:
+
+- "Minimum Constant Liar (CLmin)":
+  - The virtual point is assigned using the lowest known value of the merit function across all evaluated points.
+- "Mean Constant Liar (CLmean)":
+  - The virtual point is assigned using the mean of the merit function across all evaluated points.
+- "Maximum Constant Liar (CLmax)":
+  - The virtual point is assigned using the great known value of the merit function across all evaluated points.
+
+For Kriging surrogates, specifically, the above and follow strategies are available:  
+
+- "Kriging Believer (KB)":
+  - The virtual point is assigned using the mean of the Kriging surrogate at the virtual point.
+- "Kriging Believer Upper Bound (KBUB)":
+  - The virtual point is assigned using 3$\sigma$ above the mean of the Kriging surrogate at the virtual point.
+- "Kriging Believer Lower Bound (KBLB)":
+  - The virtual point is assigned using 3$\sigma$ below the mean of the Kriging surrogate at the virtual point.
+
+
+In general, CLmin and KBLB tend to favor exploitation while CLmax and KBUB tend to favor exploration. CLmean and KB tend to be a compromise between the two.
+
+## Examples
+
+```@example
+using Surrogates
+
+lb = 0.0
+ub = 10.0
+f = x -> log(x) * exp(x)
+x = sample(5, lb, ub, SobolSample())
+y = f.(x)
+
+my_k = Kriging(x, y, lb, ub)
+
+for _ in 1:10
+    new_x, eis = potential_optimal_points(EI(), lb, ub, my_k, SobolSample(), 3, CLmean!)
+    add_point!(my_k, new_x, f.(new_x))
+end
+```
diff --git a/src/Optimization.jl b/src/Optimization.jl
@@ -3,6 +3,14 @@ using Distributions
 using Zygote
 
 abstract type SurrogateOptimizationAlgorithm end
+abstract type ParallelStrategy end
+
+struct KrigingBeliever <: ParallelStrategy end
+struct KrigingBelieverUpperBound <: ParallelStrategy end
+struct KrigingBelieverLowerBound <: ParallelStrategy end
+struct MinimumConstantLiar <: ParallelStrategy end
+struct MaximumConstantLiar <: ParallelStrategy end
+struct MeanConstantLiar <: ParallelStrategy end
 
 #single objective optimization
 struct SRBF <: SurrogateOptimizationAlgorithm end
@@ -375,6 +383,214 @@ function surrogate_optimize(obj::Function, ::SRBF, lb::Number, ub::Number,
     end
 end
 
+# Ask SRBF ND
+function potential_optimal_points(::SRBF, strategy, lb, ub, surr::AbstractSurrogate, sample_type::SamplingAlgorithm, n_parallel;
+    num_new_samples = 500)
+
+    scale = 0.2
+    w_range = [0.3, 0.5, 0.7, 0.95]
+    w_cycle = Iterators.cycle(w_range)
+
+    w, state = iterate(w_cycle)
+
+    #Vector containing size in each direction
+    box_size = lb - ub
+    dtol = 1e-3 * norm(ub - lb)
+    d = length(surr.x)
+    incumbent_x = surr.x[argmin(surr.y)]
+
+    new_lb = incumbent_x .- 3 * scale * norm(incumbent_x .- lb)
+    new_ub = incumbent_x .+ 3 * scale * norm(incumbent_x .- ub)
+
+    @inbounds for i in 1:length(new_lb)
+        if new_lb[i] < lb[i]
+            new_lb = collect(new_lb)
+            new_lb[i] = lb[i]
+        end
+        if new_ub[i] > ub[i]
+            new_ub = collect(new_ub)
+            new_ub[i] = ub[i]
+        end
+    end
+
+    new_sample = sample(num_new_samples, new_lb, new_ub, sample_type)
+    s = zeros(eltype(surr.x[1]), num_new_samples)
+    for j in 1:num_new_samples
+        s[j] = surr(new_sample[j])
+    end
+    s_max = maximum(s)
+    s_min = minimum(s)
+
+    d_min = norm(box_size .+ 1)
+    d_max = 0.0
+    for r in 1:length(surr.x)
+        for c in 1:num_new_samples
+            distance_rc = norm(surr.x[r] .- new_sample[c])
+            if distance_rc > d_max
+                d_max = distance_rc
+            end
+            if distance_rc < d_min
+                d_min = distance_rc
+            end
+        end
+    end
+
+    tmp_surr = deepcopy(surr)
+
+    
+    new_addition = 0
+    diff_x = zeros(eltype(surr.x[1]), d)
+
+    evaluation_of_merit_function = zeros(float(eltype(surr.x[1])), num_new_samples)
+    proposed_points_x = Vector{typeof(surr.x[1])}(undef, n_parallel)
+    merit_of_proposed_points = zeros(Float64, n_parallel)
+
+    while new_addition < n_parallel
+        #find minimum
+
+        @inbounds for r in eachindex(evaluation_of_merit_function)
+            evaluation_of_merit_function[r] = merit_function(new_sample[r], w, tmp_surr,
+                s_max, s_min, d_max, d_min,
+                box_size)
+        end
+
+        min_index = argmin(evaluation_of_merit_function)
+        new_min_x = new_sample[min_index]
+        min_x_merit = evaluation_of_merit_function[min_index]
+
+        for l in 1:d
+            diff_x[l] = norm(surr.x[l] .- new_min_x)
+        end
+        bit_x = diff_x .> dtol
+        #new_min_x has to have some distance from krig.x
+        if false in bit_x
+            #The new_point is not actually that new, discard it!
+
+            deleteat!(evaluation_of_merit_function, min_index)
+            deleteat!(new_sample, min_index)
+
+            if length(new_sample) == 0
+                println("Out of sampling points")
+                index = argmin(surr.y)
+                return (surr.x[index], surr.y[index])
+            end
+        else
+            new_addition += 1
+            proposed_points_x[new_addition] = new_min_x
+            merit_of_proposed_points[new_addition] = min_x_merit
+
+            # Update temporary surrogate using provided strategy
+            calculate_liars(strategy, tmp_surr, surr, new_min_x)
+        end
+
+        #4) Update w
+        w, state = iterate(w_cycle, state)
+    end
+
+    return (proposed_points_x, merit_of_proposed_points)
+end
+
+# Ask SRBF 1D
+function potential_optimal_points(::SRBF, strategy, lb::Number, ub::Number, surr::AbstractSurrogate,
+    sample_type::SamplingAlgorithm, n_parallel;
+    num_new_samples = 500)
+    scale = 0.2
+    success = 0
+    w_range = [0.3, 0.5, 0.7, 0.95]
+    w_cycle = Iterators.cycle(w_range)
+
+    w, state = iterate(w_cycle)
+
+    box_size = lb - ub
+    success = 0
+    failures = 0
+    dtol = 1e-3 * norm(ub - lb)
+    num_of_iterations = 0
+
+    #1) Sample near incumbent (the 2 fraction is arbitrary here)
+    incumbent_x = surr.x[argmin(surr.y)]
+
+    new_lb = incumbent_x - scale * norm(incumbent_x - lb)
+    new_ub = incumbent_x + scale * norm(incumbent_x - ub)
+    if new_lb < lb
+        new_lb = lb
+    end
+    if new_ub > ub
+        new_ub = ub
+    end
+
+    new_sample = sample(num_new_samples, new_lb, new_ub, sample_type)
+
+    #2) Create  merit function
+    s = zeros(eltype(surr.x[1]), num_new_samples)
+    for j in 1:num_new_samples
+        s[j] = surr(new_sample[j])
+    end
+    s_max = maximum(s)
+    s_min = minimum(s)
+
+    d_min = box_size + 1
+    d_max = 0.0
+    for r in 1:length(surr.x)
+        for c in 1:num_new_samples
+            distance_rc = norm(surr.x[r] - new_sample[c])
+            if distance_rc > d_max
+                d_max = distance_rc
+            end
+            if distance_rc < d_min
+                d_min = distance_rc
+            end
+        end
+    end
+
+    new_addition = 0
+    proposed_points_x = zeros(eltype(new_sample[1]), n_parallel)
+    merit_of_proposed_points = zeros(eltype(new_sample[1]), n_parallel)
+
+    # Temporary surrogate for virtual points
+    tmp_surr = deepcopy(surr)
+
+    # Loop until we have n_parallel new points
+    while new_addition < n_parallel
+
+        #3) Evaluate merit function at the sampled points in parallel 
+        evaluation_of_merit_function = merit_function.(new_sample, w, tmp_surr, s_max,
+            s_min, d_max, d_min, box_size)
+
+        #find minimum
+        min_index = argmin(evaluation_of_merit_function)
+        new_min_x = new_sample[min_index]
+        min_x_merit = evaluation_of_merit_function[min_index]
+
+        diff_x = abs.(tmp_surr.x .- new_min_x)
+        bit_x = diff_x .> dtol
+        #new_min_x has to have some distance from krig.x
+        if false in bit_x
+            #The new_point is not actually that new, discard it!
+            deleteat!(evaluation_of_merit_function, min_index)
+            deleteat!(new_sample, min_index)
+            if length(new_sample) == 0
+                println("Out of sampling points")
+                return (proposed_points_x[1:new_addition],
+                    merit_of_proposed_points[1:new_addition])
+            end
+        else
+            new_addition += 1
+            proposed_points_x[new_addition] = new_min_x
+            merit_of_proposed_points[new_addition] = min_x_merit
+
+            # Update temporary surrogate using provided strategy
+            calculate_liars(strategy, tmp_surr, surr, new_min_x)
+        end
+
+        #4) Update w
+        w, state = iterate(w_cycle, state)
+    end
+
+    return (proposed_points_x, merit_of_proposed_points)
+end
+
+
 """
 This is an implementation of Lower Confidence Bound (LCB),
 a popular acquisition function in Bayesian optimization.
@@ -569,6 +785,73 @@ function surrogate_optimize(obj::Function, ::EI, lb::Number, ub::Number, krig,
     println("Completed maximum number of iterations")
 end
 
+# Ask EI 1D & ND
+function potential_optimal_points(::EI, strategy, lb, ub, krig, sample_type::SamplingAlgorithm, n_parallel::Number;
+             num_new_samples = 100)
+
+    lb = krig.lb
+    ub = krig.ub
+
+    dtol = 1e-3 * norm(ub - lb)
+    eps = 0.01
+
+    tmp_krig = deepcopy(krig) # Temporary copy of the kriging model to store virtual points
+
+    new_x_max = Vector{typeof(tmp_krig.x[1])}(undef, n_parallel)             # New x point
+    new_EI_max = zeros(eltype(tmp_krig.x[1]), n_parallel)                    # EI at new x point
+
+    for i in 1:n_parallel
+        # Sample lots of points from the design space -- we will evaluate the EI function at these points
+        new_sample = sample(num_new_samples, lb, ub, sample_type)
+
+        # Find the best point so far
+        f_min = minimum(tmp_krig.y)
+
+        # Allocate some arrays
+        evaluations = zeros(eltype(tmp_krig.x[1]), num_new_samples)  # Holds EI function evaluations
+        point_found = false                                     # Whether we have found a new point to test
+        while point_found == false
+            # For each point in the sample set, evaluate the Expected Improvement function
+            for j in eachindex(new_sample)
+                std = std_error_at_point(tmp_krig, new_sample[j])
+                u = tmp_krig(new_sample[j])
+                if abs(std) > 1e-6
+                    z = (f_min - u - eps) / std
+                else
+                    z = 0
+                end
+                # Evaluate EI at point new_sample[j]
+                evaluations[j] = (f_min - u - eps) * cdf(Normal(), z) +
+                                 std * pdf(Normal(), z)
+            end
+            # find the sample which maximizes the EI function
+            index_max = argmax(evaluations)
+            x_new = new_sample[index_max]   # x point which maximized EI
+            y_new = maximum(evaluations)    # EI at the new point
+            diff_x = [norm(prev_point .- x_new) for prev_point in tmp_krig.x]
+            bit_x = [diff_x_point .> dtol for diff_x_point in diff_x]
+            #new_min_x has to have some distance from tmp_krig.x
+            if false in bit_x
+                #The new_point is not actually that new, discard it!
+                deleteat!(evaluations, index_max)
+                deleteat!(new_sample, index_max)
+                if length(new_sample) == 0
+                    println("Out of sampling points")
+                    index = argmin(tmp_krig.y)
+                    return (tmp_krig.x[index], tmp_krig.y[index])
+                end
+            else
+                point_found = true
+                new_x_max[i] = x_new
+                new_EI_max[i] = y_new
+                calculate_liars(strategy, tmp_krig, krig, x_new)
+            end
+        end
+    end
+
+    return (new_x_max, new_EI_max)
+end
+
 """
 This is an implementation of Expected Improvement (EI),
 arguably the most popular acquisition function in Bayesian optimization.
diff --git a/src/Surrogates.jl b/src/Surrogates.jl
@@ -18,6 +18,7 @@ include("VariableFidelity.jl")
 include("Earth.jl")
 include("GEK.jl")
 include("GEKPLS.jl")
+include("VirtualStrategy.jl")
 
 current_surrogates = ["Kriging", "LinearSurrogate", "LobachevskySurrogate",
     "NeuralSurrogate",
@@ -88,6 +89,11 @@ export LobachevskyStructure, NeuralStructure, RandomForestStructure,
 export WendlandStructure
 export AbstractSurrogate, SamplingAlgorithm
 export Kriging, RadialBasis, add_point!, current_estimate, std_error_at_point
+# Parallelization Strategies
+export potential_optimal_points
+export MinimumConstantLiar, MaximumConstantLiar, MeanConstantLiar, KrigingBeliever,
+       KrigingBelieverUpperBound, KrigingBelieverLowerBound
+
 # radial basis functions
 export linearRadial, cubicRadial, multiquadricRadial, thinplateRadial
 
diff --git a/src/VirtualStrategy.jl b/src/VirtualStrategy.jl
diff --git a/test/parallel.jl b/test/parallel.jl

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@ pages = ["index.md"`
`16`	`16`	`"Gradient Enhanced Kriging" => "gek.md",`
`17`	`17`	`"GEKPLS" => "gekpls.md",`
`18`	`18`	`"MOE" => "moe.md",`
	`19`	`+ "Parallel Optimization" => "parallel.md"`
`19`	`20`	`]`
`20`	`21`	`"User guide" => [`
`21`	`22`	`"Samples" => "samples.md",`