collect approach

sshin23 · sshin23 · commit e2a45ec85c6a · 2024-04-04T12:57:34.000-05:00
diff --git a/ext/ExaModelsCUDA.jl b/ext/ExaModelsCUDA.jl
@@ -4,9 +4,6 @@ import ExaModels: ExaModels, NLPModels
 import CUDA: CUDA, CUDABackend, CuArray
 
 ExaModels.ExaCore(backend::CUDABackend) = ExaModels.ExaCore(Float64, backend)
-ExaModels.convert_array(v::Base.Iterators.ProductIterator, backend::CUDABackend) =
-    Base.product((ExaModels.convert_array(i, backend) for i in v.iterators)...)
-ExaModels.convert_array(v::UnitRange, backend::CUDABackend) = v
 ExaModels.convert_array(v, backend::CUDABackend) = CuArray(v)
 
 end
diff --git a/ext/ExaModelsKernelAbstractions.jl b/ext/ExaModelsKernelAbstractions.jl
@@ -137,7 +137,7 @@ function ExaModels.ExaModel(
 end
 
 function _conaug_structure!(backend, cons, sparsity)
-    kers(backend)(sparsity, cons.f, cons.itr, cons.oa; ndrange = size(cons.itr))
+    kers(backend)(sparsity, cons.f, cons.itr, cons.oa; ndrange = length(cons.itr))
     _conaug_structure!(backend, cons.inner, sparsity)
     synchronize(backend)
 end
@@ -214,7 +214,7 @@ function ExaModels.obj(
     end
 end
 function _obj(backend, objbuffer, obj, x)
-    kerf(backend)(objbuffer, obj.f, obj.itr, x; ndrange = size(obj.itr))
+    kerf(backend)(objbuffer, obj.f, obj.itr, x; ndrange = length(obj.itr))
     _obj(backend, objbuffer, obj.inner, x)
     synchronize(backend)
 end
@@ -240,7 +240,7 @@ function ExaModels.cons_nln!(
     end
 end
 function _cons_nln!(backend, y, con::ExaModels.Constraint, x)
-    kerf(backend)(y, con.f, con.itr, x; ndrange = size(con.itr))
+    kerf(backend)(y, con.f, con.itr, x; ndrange = length(con.itr))
     _cons_nln!(backend, y, con.inner, x)
     synchronize(backend)
 end
@@ -252,7 +252,7 @@ end
 
 
 function _conaugs!(backend, y, con::ExaModels.ConstraintAug, x)
-    kerf2(backend)(y, con.f, con.itr, x, con.oa; ndrange = size(con.itr))
+    kerf2(backend)(y, con.f, con.itr, x, con.oa; ndrange = length(con.itr))
     _conaugs!(backend, y, con.inner, x)
     synchronize(backend)
 end
@@ -477,8 +477,9 @@ function ExaModels.sgradient!(
     f,
     x,
     adj,
-    ) where {B<:KernelAbstractions.Backend}
-    return kerg(backend)(y, f.f, f.itr, x, adj; ndrange = size(f.itr))
+) where {B<:KernelAbstractions.Backend}
+
+    return kerg(backend)(y, f.f, f.itr, x, adj; ndrange = length(f.itr))
 end
 
 function ExaModels.sjacobian!(
@@ -489,7 +490,7 @@ function ExaModels.sjacobian!(
     x,
     adj,
 ) where {B<:KernelAbstractions.Backend}
-    return kerj(backend)(y1, y2, f.f, f.itr, x, adj; ndrange = size(f.itr))
+    return kerj(backend)(y1, y2, f.f, f.itr, x, adj; ndrange = length(f.itr))
 end
 
 function ExaModels.shessian!(
@@ -501,7 +502,7 @@ function ExaModels.shessian!(
     adj,
     adj2,
 ) where {B<:KernelAbstractions.Backend}
-    return kerh(backend)(y1, y2, f.f, f.itr, x, adj, adj2; ndrange = size(f.itr))
+    return kerh(backend)(y1, y2, f.f, f.itr, x, adj, adj2; ndrange = length(f.itr))
 end
 
 function ExaModels.shessian!(
@@ -513,13 +514,13 @@ function ExaModels.shessian!(
     adj::V,
     adj2,
 ) where {B<:KernelAbstractions.Backend,V<:AbstractVector}
-    return kerh2(backend)(y1, y2, f.f, f.itr, x, adj, adj2; ndrange = size(f.itr))
+    return kerh2(backend)(y1, y2, f.f, f.itr, x, adj, adj2; ndrange = length(f.itr))
 end
 
 @kernel function kerh(y1, y2, @Const(f), @Const(itr), @Const(x), @Const(adj1), @Const(adj2))
     I = @index(Global)
     @inbounds ExaModels.hrpass0(
-        f.f(ExaModels.idx(itr, I), ExaModels.SecondAdjointNodeSource(x)),
+        f.f(itr[I], ExaModels.SecondAdjointNodeSource(x)),
         f.comp2,
         y1,
         y2,
@@ -541,7 +542,7 @@ end
 )
     I = @index(Global)
     @inbounds ExaModels.hrpass0(
-        f.f(ExaModels.idx(itr, I), ExaModels.SecondAdjointNodeSource(x)),
+        f.f(itr[I], ExaModels.SecondAdjointNodeSource(x)),
         f.comp2,
         y1,
         y2,
@@ -555,7 +556,7 @@ end
 @kernel function kerj(y1, y2, @Const(f), @Const(itr), @Const(x), @Const(adj))
     I = @index(Global)
     @inbounds ExaModels.jrpass(
-        f.f(ExaModels.idx(itr, I), ExaModels.AdjointNodeSource(x)),
+        f.f(itr[I], ExaModels.AdjointNodeSource(x)),
         f.comp1,
         ExaModels.offset0(f, itr, I),
         y1,
@@ -569,7 +570,7 @@ end
 @kernel function kerg(y, @Const(f), @Const(itr), @Const(x), @Const(adj))
     I = @index(Global)
     @inbounds ExaModels.grpass(
-        f.f(ExaModels.idx(itr, I), ExaModels.AdjointNodeSource(x)),
+        f.f(itr[I], ExaModels.AdjointNodeSource(x)),
         f.comp1,
         y,
         ExaModels.offset1(f, I),
@@ -580,11 +581,11 @@ end
 
 @kernel function kerf(y, @Const(f), @Const(itr), @Const(x))
     I = @index(Global)
-    @inbounds y[ExaModels.offset0(f, itr, I)] = f.f(ExaModels.idx(itr, I), x)
+    @inbounds y[ExaModels.offset0(f, itr, I)] = f.f(itr[I], x)
 end
 @kernel function kerf2(y, @Const(f), @Const(itr), @Const(x), @Const(oa))
     I = @index(Global)
-    @inbounds y[oa+I] = f.f(ExaModels.idx(itr, I), x)
+    @inbounds y[oa+I] = f.f(itr[I], x)
 end
 
 
diff --git a/src/gradient.jl b/src/gradient.jl
@@ -37,8 +37,8 @@ Performs dense gradient evalution
 - `adj`: initial adjoint
 """
 function gradient!(y, f, x, adj)
-    for (k,p) in enumerate(f.itr)
-        @inbounds gradient!(y, f.f.f, x, p, adj)
+    @simd for k in eachindex(f.itr)
+        @inbounds gradient!(y, f.f.f, x, f.itr[k], adj)
     end
     return y
 end
@@ -112,7 +112,7 @@ Performs sparse gradient evalution
 - `adj`: initial adjoint
 """
 function sgradient!(y, f, x, adj)
-     for (k,p) in enumerate(f.itr)
+    @simd for k in eachindex(f.itr)
         @inbounds sgradient!(y, f.f.f, f.itr[k], x, f.itr.comp1, offset1(f, k), adj)
     end
     return y
diff --git a/src/hessian.jl b/src/hessian.jl
@@ -619,12 +619,12 @@ Performs sparse jacobian evalution
 - `adj2`: initial second adjoint
 """
 function shessian!(y1, y2, f, x, adj1, adj2)
-    for (k,p) in enumerate(f.itr)
+    @simd for k in eachindex(f.itr)
         @inbounds shessian!(
             y1,
             y2,
             f.f.f,
-            p,
+            f.itr[k],
             x,
             f.f.comp2,
             offset2(f, k),
@@ -634,12 +634,12 @@ function shessian!(y1, y2, f, x, adj1, adj2)
     end
 end
 function shessian!(y1, y2, f, x, adj1s::V, adj2) where {V<:AbstractVector}
-    for (k,p) in enumerate(f.itr)
+    @simd for k in eachindex(f.itr)
         @inbounds shessian!(
             y1,
             y2,
             f.f.f,
-            p,
+            f.itr[k],
             x,
             f.f.comp2,
             offset2(f, k),
diff --git a/src/jacobian.jl b/src/jacobian.jl
@@ -101,12 +101,12 @@ Performs sparse jacobian evalution
 - `adj`: initial adjoint
 """
 function sjacobian!(y1, y2, f, x, adj)
-    for (i,p) in enumerate(f.itr)
+    @simd for i in eachindex(f.itr)
         @inbounds sjacobian!(
             y1,
             y2,
             f.f.f,
-            p,
+            f.itr[i],
             x,
             f.f.comp1,
             offset0(f, i),
diff --git a/src/nlp.jl b/src/nlp.jl
@@ -370,6 +370,7 @@ Objective
 ```
 """
 function objective(c::C, gen) where {C<:ExaCore}
+    gen = _adapt_gen(gen)
     f = SIMDFunction(gen, c.nobj, c.nnzg, c.nnzh)
     pars = gen.iter
 
@@ -429,8 +430,9 @@ function constraint(
     start = zero(T),
     lcon = zero(T),
     ucon = zero(T),
-) where {T,C<:ExaCore{T}}
-
+    ) where {T,C<:ExaCore{T}}
+    
+    gen = _adapt_gen(gen)
     f = SIMDFunction(gen, c.ncon, c.nnzj, c.nnzh)
     pars = gen.iter
 
@@ -490,14 +492,16 @@ function _constraint(c, f, pars, start, lcon, ucon)
 end
 
 function constraint!(c::C, c1, gen::Base.Generator) where {C<:ExaCore}
-    f = SIMDFunction(gen, offset0(c1, 0), c.nnzj, c.nnzh; tsize = Base.size(c1.itr))
+    
+    gen = _adapt_gen(gen)
+    f = SIMDFunction(gen, offset0(c1, 0), c.nnzj, c.nnzh)
     pars = gen.iter
 
     _constraint!(c, f, pars)
 end
 
 function constraint!(c::C, c1, expr, pars) where {C<:ExaCore}
-    f = _simdfunction(expr, offset0(c1, 0), c.nnzj, c.nnzh; tsize = Base.size(c1.itr))
+    f = _simdfunction(expr, offset0(c1, 0), c.nnzj, c.nnzh)
 
     _constraint!(c, f, pars)
 end
@@ -510,6 +514,7 @@ function _constraint!(c, f, pars)
     c.nconaug += nitr
     c.nnzj += nitr * f.o1step
     c.nnzh += nitr * f.o2step
+
     c.con = ConstraintAug(c.con, f, convert_array(pars, c.backend), oa)
 end
 
@@ -558,8 +563,8 @@ end
 
 function _cons_nln!(cons, x, g)
     _cons_nln!(cons.inner, x, g)
-    for (i,p) in enumerate(cons.itr)
-        g[offset0(cons, i)] += cons.f.f(p, x)
+    @simd for i in eachindex(cons.itr)
+        g[offset0(cons, i)] += cons.f.f(cons.itr[i], x)
     end
 end
 _cons_nln!(cons::ConstraintNull, x, g) = nothing
@@ -672,6 +677,18 @@ function _con_hprod!(cons, x, y, v, Hv, obj_weight)
     shessian!((Hv, v), nothing, cons, x, y, zero(eltype(Hv)))
 end
 
+@inbounds @inline offset0(a, i) = offset0(a.f, i)
+@inbounds @inline offset1(a, i) = offset1(a.f, i)
+@inbounds @inline offset2(a, i) = offset2(a.f, i)
+@inbounds @inline offset0(f, itr, i) = offset0(f, i)
+@inbounds @inline offset0(f::F, i) where {F<:SIMDFunction} = f.o0 + i
+@inbounds @inline offset1(f::F, i) where {F<:SIMDFunction} = f.o1 + f.o1step * (i - 1)
+@inbounds @inline offset2(f::F, i) where {F<:SIMDFunction} = f.o2 + f.o2step * (i - 1)
+@inbounds @inline offset0(a::C, i) where {C<:ConstraintAug} = offset0(a.f, a.itr, i)
+@inbounds @inline offset0(f::F, itr, i) where {P<:Pair,F<:SIMDFunction{P}} =
+    f.o0 + f.f.first(itr[i], nothing)
+@inbounds @inline offset0(f::F, itr, i) where {T<:Tuple,P<:Pair{T},F<:SIMDFunction{P}} = f.o0 + idxx(coord(itr, i, f.f.first), Base.size(itr))
+
 idx(itr, I) = @inbounds itr[I]
 idx(itr::Base.Iterators.ProductIterator{V}, I) where V =  _idx(I-1, itr.iterators, Base.size(itr))
 function _idx(n, (vec1, vec...), (si1, si...))
@@ -687,18 +704,6 @@ _idxx(::Tuple{}, ::Tuple{}, a) = 0
 coord(itr, i, (f,fs...)) = (f(idx(itr,i), nothing), coord(itr, i, fs)...)
 coord(itr, i, ::Tuple{}) = ()
 
-@inbounds @inline offset0(a, i) = offset0(a.f, i)
-@inbounds @inline offset1(a, i) = offset1(a.f, i)
-@inbounds @inline offset2(a, i) = offset2(a.f, i)
-@inbounds @inline offset0(f, itr, i) = offset0(f, i)
-@inbounds @inline offset0(f::F, i) where {F<:SIMDFunction} = f.o0 + i
-@inbounds @inline offset1(f::F, i) where {F<:SIMDFunction} = f.o1 + f.o1step * (i - 1)
-@inbounds @inline offset2(f::F, i) where {F<:SIMDFunction} = f.o2 + f.o2step * (i - 1)
-@inbounds @inline offset0(a::C, i) where {C<:ConstraintAug} = offset0(a.f, a.itr, i)
-@inbounds @inline offset0(f::F, itr, i) where {P<:Pair,F<:SIMDFunction{P}} =
-    f.o0 + f.f.first(itr[i], nothing)
-@inbounds @inline offset0(f::F, itr, i) where {T<:Tuple,P<:Pair{T},F<:SIMDFunction{P}} = f.o0 + idxx(coord(itr, i, f.f.first), f.tsize)
-
 for (thing, val) in [(:solution, 1), (:multipliers_L, 0), (:multipliers_U, 2)]
     @eval begin
         """
@@ -772,3 +777,7 @@ function multipliers(result::SolverCore.AbstractExecutionStats, y::Constraint)
     len = length(y.itr)
     return view(result.multipliers, o+1:o+len)
 end
+
+
+_adapt_gen(gen) = gen
+_adapt_gen(gen::Base.Generator{P}) where P <: Base.Iterators.ProductIterator = Base.Generator(gen.f, collect(gen.iter))
diff --git a/src/simdfunction.jl b/src/simdfunction.jl
@@ -13,7 +13,7 @@ struct Compressor{I}
 end
 @inline (i::Compressor{I})(n) where {I} = @inbounds i.inner[n]
 
-struct SIMDFunction{F,C1,C2,T}
+struct SIMDFunction{F,C1,C2}
     f::F
     comp1::C1
     comp2::C2
@@ -22,7 +22,6 @@ struct SIMDFunction{F,C1,C2,T}
     o2::Int
     o1step::Int
     o2step::Int
-    tsize::T
 end
 
 """
@@ -36,14 +35,14 @@ Returns a `SIMDFunction` using the `gen`.
 - `o1`: offset for the derivative evalution
 - `o2`: offset for the second-order derivative evalution
 """
-function SIMDFunction(gen::Base.Generator, o0 = 0, o1 = 0, o2 = 0; tsize = ())
+function SIMDFunction(gen::Base.Generator, o0 = 0, o1 = 0, o2 = 0)
 
     f = gen.f(Par(eltype(gen.iter)))
 
-    _simdfunction(f, o0, o1, o2; tsize = tsize)
+    _simdfunction(f, o0, o1, o2)
 end
 
-function _simdfunction(f, o0, o1, o2; tsize = ())
+function _simdfunction(f, o0, o1, o2)
     d = f(Identity(), AdjointNodeSource(nothing))
     y1 = []
     ExaModels.grpass(d, nothing, y1, nothing, 0, NaN)
@@ -60,5 +59,5 @@ function _simdfunction(f, o0, o1, o2; tsize = ())
     o2step = length(a2)
     c2 = Compressor(Tuple(findfirst(isequal(i), a2) for i in y2))
 
-    SIMDFunction(f, c1, c2, o0, o1, o2, o1step, o2step, tsize)
+    SIMDFunction(f, c1, c2, o0, o1, o2, o1step, o2step)
 end