Skip to content

Commit e2a45ec

Browse files
committed
collect approach
1 parent c60e9d4 commit e2a45ec

File tree

7 files changed

+57
-51
lines changed

7 files changed

+57
-51
lines changed

ext/ExaModelsCUDA.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@ import ExaModels: ExaModels, NLPModels
44
import CUDA: CUDA, CUDABackend, CuArray
55

66
ExaModels.ExaCore(backend::CUDABackend) = ExaModels.ExaCore(Float64, backend)
7-
ExaModels.convert_array(v::Base.Iterators.ProductIterator, backend::CUDABackend) =
8-
Base.product((ExaModels.convert_array(i, backend) for i in v.iterators)...)
9-
ExaModels.convert_array(v::UnitRange, backend::CUDABackend) = v
107
ExaModels.convert_array(v, backend::CUDABackend) = CuArray(v)
118

129
end

ext/ExaModelsKernelAbstractions.jl

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ function ExaModels.ExaModel(
137137
end
138138

139139
function _conaug_structure!(backend, cons, sparsity)
140-
kers(backend)(sparsity, cons.f, cons.itr, cons.oa; ndrange = size(cons.itr))
140+
kers(backend)(sparsity, cons.f, cons.itr, cons.oa; ndrange = length(cons.itr))
141141
_conaug_structure!(backend, cons.inner, sparsity)
142142
synchronize(backend)
143143
end
@@ -214,7 +214,7 @@ function ExaModels.obj(
214214
end
215215
end
216216
function _obj(backend, objbuffer, obj, x)
217-
kerf(backend)(objbuffer, obj.f, obj.itr, x; ndrange = size(obj.itr))
217+
kerf(backend)(objbuffer, obj.f, obj.itr, x; ndrange = length(obj.itr))
218218
_obj(backend, objbuffer, obj.inner, x)
219219
synchronize(backend)
220220
end
@@ -240,7 +240,7 @@ function ExaModels.cons_nln!(
240240
end
241241
end
242242
function _cons_nln!(backend, y, con::ExaModels.Constraint, x)
243-
kerf(backend)(y, con.f, con.itr, x; ndrange = size(con.itr))
243+
kerf(backend)(y, con.f, con.itr, x; ndrange = length(con.itr))
244244
_cons_nln!(backend, y, con.inner, x)
245245
synchronize(backend)
246246
end
@@ -252,7 +252,7 @@ end
252252

253253

254254
function _conaugs!(backend, y, con::ExaModels.ConstraintAug, x)
255-
kerf2(backend)(y, con.f, con.itr, x, con.oa; ndrange = size(con.itr))
255+
kerf2(backend)(y, con.f, con.itr, x, con.oa; ndrange = length(con.itr))
256256
_conaugs!(backend, y, con.inner, x)
257257
synchronize(backend)
258258
end
@@ -477,8 +477,9 @@ function ExaModels.sgradient!(
477477
f,
478478
x,
479479
adj,
480-
) where {B<:KernelAbstractions.Backend}
481-
return kerg(backend)(y, f.f, f.itr, x, adj; ndrange = size(f.itr))
480+
) where {B<:KernelAbstractions.Backend}
481+
482+
return kerg(backend)(y, f.f, f.itr, x, adj; ndrange = length(f.itr))
482483
end
483484

484485
function ExaModels.sjacobian!(
@@ -489,7 +490,7 @@ function ExaModels.sjacobian!(
489490
x,
490491
adj,
491492
) where {B<:KernelAbstractions.Backend}
492-
return kerj(backend)(y1, y2, f.f, f.itr, x, adj; ndrange = size(f.itr))
493+
return kerj(backend)(y1, y2, f.f, f.itr, x, adj; ndrange = length(f.itr))
493494
end
494495

495496
function ExaModels.shessian!(
@@ -501,7 +502,7 @@ function ExaModels.shessian!(
501502
adj,
502503
adj2,
503504
) where {B<:KernelAbstractions.Backend}
504-
return kerh(backend)(y1, y2, f.f, f.itr, x, adj, adj2; ndrange = size(f.itr))
505+
return kerh(backend)(y1, y2, f.f, f.itr, x, adj, adj2; ndrange = length(f.itr))
505506
end
506507

507508
function ExaModels.shessian!(
@@ -513,13 +514,13 @@ function ExaModels.shessian!(
513514
adj::V,
514515
adj2,
515516
) where {B<:KernelAbstractions.Backend,V<:AbstractVector}
516-
return kerh2(backend)(y1, y2, f.f, f.itr, x, adj, adj2; ndrange = size(f.itr))
517+
return kerh2(backend)(y1, y2, f.f, f.itr, x, adj, adj2; ndrange = length(f.itr))
517518
end
518519

519520
@kernel function kerh(y1, y2, @Const(f), @Const(itr), @Const(x), @Const(adj1), @Const(adj2))
520521
I = @index(Global)
521522
@inbounds ExaModels.hrpass0(
522-
f.f(ExaModels.idx(itr, I), ExaModels.SecondAdjointNodeSource(x)),
523+
f.f(itr[I], ExaModels.SecondAdjointNodeSource(x)),
523524
f.comp2,
524525
y1,
525526
y2,
@@ -541,7 +542,7 @@ end
541542
)
542543
I = @index(Global)
543544
@inbounds ExaModels.hrpass0(
544-
f.f(ExaModels.idx(itr, I), ExaModels.SecondAdjointNodeSource(x)),
545+
f.f(itr[I], ExaModels.SecondAdjointNodeSource(x)),
545546
f.comp2,
546547
y1,
547548
y2,
@@ -555,7 +556,7 @@ end
555556
@kernel function kerj(y1, y2, @Const(f), @Const(itr), @Const(x), @Const(adj))
556557
I = @index(Global)
557558
@inbounds ExaModels.jrpass(
558-
f.f(ExaModels.idx(itr, I), ExaModels.AdjointNodeSource(x)),
559+
f.f(itr[I], ExaModels.AdjointNodeSource(x)),
559560
f.comp1,
560561
ExaModels.offset0(f, itr, I),
561562
y1,
@@ -569,7 +570,7 @@ end
569570
@kernel function kerg(y, @Const(f), @Const(itr), @Const(x), @Const(adj))
570571
I = @index(Global)
571572
@inbounds ExaModels.grpass(
572-
f.f(ExaModels.idx(itr, I), ExaModels.AdjointNodeSource(x)),
573+
f.f(itr[I], ExaModels.AdjointNodeSource(x)),
573574
f.comp1,
574575
y,
575576
ExaModels.offset1(f, I),
@@ -580,11 +581,11 @@ end
580581

581582
@kernel function kerf(y, @Const(f), @Const(itr), @Const(x))
582583
I = @index(Global)
583-
@inbounds y[ExaModels.offset0(f, itr, I)] = f.f(ExaModels.idx(itr, I), x)
584+
@inbounds y[ExaModels.offset0(f, itr, I)] = f.f(itr[I], x)
584585
end
585586
@kernel function kerf2(y, @Const(f), @Const(itr), @Const(x), @Const(oa))
586587
I = @index(Global)
587-
@inbounds y[oa+I] = f.f(ExaModels.idx(itr, I), x)
588+
@inbounds y[oa+I] = f.f(itr[I], x)
588589
end
589590

590591

src/gradient.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ Performs dense gradient evalution
3737
- `adj`: initial adjoint
3838
"""
3939
function gradient!(y, f, x, adj)
40-
for (k,p) in enumerate(f.itr)
41-
@inbounds gradient!(y, f.f.f, x, p, adj)
40+
@simd for k in eachindex(f.itr)
41+
@inbounds gradient!(y, f.f.f, x, f.itr[k], adj)
4242
end
4343
return y
4444
end
@@ -112,7 +112,7 @@ Performs sparse gradient evalution
112112
- `adj`: initial adjoint
113113
"""
114114
function sgradient!(y, f, x, adj)
115-
for (k,p) in enumerate(f.itr)
115+
@simd for k in eachindex(f.itr)
116116
@inbounds sgradient!(y, f.f.f, f.itr[k], x, f.itr.comp1, offset1(f, k), adj)
117117
end
118118
return y

src/hessian.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -619,12 +619,12 @@ Performs sparse jacobian evalution
619619
- `adj2`: initial second adjoint
620620
"""
621621
function shessian!(y1, y2, f, x, adj1, adj2)
622-
for (k,p) in enumerate(f.itr)
622+
@simd for k in eachindex(f.itr)
623623
@inbounds shessian!(
624624
y1,
625625
y2,
626626
f.f.f,
627-
p,
627+
f.itr[k],
628628
x,
629629
f.f.comp2,
630630
offset2(f, k),
@@ -634,12 +634,12 @@ function shessian!(y1, y2, f, x, adj1, adj2)
634634
end
635635
end
636636
function shessian!(y1, y2, f, x, adj1s::V, adj2) where {V<:AbstractVector}
637-
for (k,p) in enumerate(f.itr)
637+
@simd for k in eachindex(f.itr)
638638
@inbounds shessian!(
639639
y1,
640640
y2,
641641
f.f.f,
642-
p,
642+
f.itr[k],
643643
x,
644644
f.f.comp2,
645645
offset2(f, k),

src/jacobian.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,12 @@ Performs sparse jacobian evalution
101101
- `adj`: initial adjoint
102102
"""
103103
function sjacobian!(y1, y2, f, x, adj)
104-
for (i,p) in enumerate(f.itr)
104+
@simd for i in eachindex(f.itr)
105105
@inbounds sjacobian!(
106106
y1,
107107
y2,
108108
f.f.f,
109-
p,
109+
f.itr[i],
110110
x,
111111
f.f.comp1,
112112
offset0(f, i),

src/nlp.jl

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ Objective
370370
```
371371
"""
372372
function objective(c::C, gen) where {C<:ExaCore}
373+
gen = _adapt_gen(gen)
373374
f = SIMDFunction(gen, c.nobj, c.nnzg, c.nnzh)
374375
pars = gen.iter
375376

@@ -429,8 +430,9 @@ function constraint(
429430
start = zero(T),
430431
lcon = zero(T),
431432
ucon = zero(T),
432-
) where {T,C<:ExaCore{T}}
433-
433+
) where {T,C<:ExaCore{T}}
434+
435+
gen = _adapt_gen(gen)
434436
f = SIMDFunction(gen, c.ncon, c.nnzj, c.nnzh)
435437
pars = gen.iter
436438

@@ -490,14 +492,16 @@ function _constraint(c, f, pars, start, lcon, ucon)
490492
end
491493

492494
function constraint!(c::C, c1, gen::Base.Generator) where {C<:ExaCore}
493-
f = SIMDFunction(gen, offset0(c1, 0), c.nnzj, c.nnzh; tsize = Base.size(c1.itr))
495+
496+
gen = _adapt_gen(gen)
497+
f = SIMDFunction(gen, offset0(c1, 0), c.nnzj, c.nnzh)
494498
pars = gen.iter
495499

496500
_constraint!(c, f, pars)
497501
end
498502

499503
function constraint!(c::C, c1, expr, pars) where {C<:ExaCore}
500-
f = _simdfunction(expr, offset0(c1, 0), c.nnzj, c.nnzh; tsize = Base.size(c1.itr))
504+
f = _simdfunction(expr, offset0(c1, 0), c.nnzj, c.nnzh)
501505

502506
_constraint!(c, f, pars)
503507
end
@@ -510,6 +514,7 @@ function _constraint!(c, f, pars)
510514
c.nconaug += nitr
511515
c.nnzj += nitr * f.o1step
512516
c.nnzh += nitr * f.o2step
517+
513518
c.con = ConstraintAug(c.con, f, convert_array(pars, c.backend), oa)
514519
end
515520

@@ -558,8 +563,8 @@ end
558563

559564
function _cons_nln!(cons, x, g)
560565
_cons_nln!(cons.inner, x, g)
561-
for (i,p) in enumerate(cons.itr)
562-
g[offset0(cons, i)] += cons.f.f(p, x)
566+
@simd for i in eachindex(cons.itr)
567+
g[offset0(cons, i)] += cons.f.f(cons.itr[i], x)
563568
end
564569
end
565570
_cons_nln!(cons::ConstraintNull, x, g) = nothing
@@ -672,6 +677,18 @@ function _con_hprod!(cons, x, y, v, Hv, obj_weight)
672677
shessian!((Hv, v), nothing, cons, x, y, zero(eltype(Hv)))
673678
end
674679

680+
@inbounds @inline offset0(a, i) = offset0(a.f, i)
681+
@inbounds @inline offset1(a, i) = offset1(a.f, i)
682+
@inbounds @inline offset2(a, i) = offset2(a.f, i)
683+
@inbounds @inline offset0(f, itr, i) = offset0(f, i)
684+
@inbounds @inline offset0(f::F, i) where {F<:SIMDFunction} = f.o0 + i
685+
@inbounds @inline offset1(f::F, i) where {F<:SIMDFunction} = f.o1 + f.o1step * (i - 1)
686+
@inbounds @inline offset2(f::F, i) where {F<:SIMDFunction} = f.o2 + f.o2step * (i - 1)
687+
@inbounds @inline offset0(a::C, i) where {C<:ConstraintAug} = offset0(a.f, a.itr, i)
688+
@inbounds @inline offset0(f::F, itr, i) where {P<:Pair,F<:SIMDFunction{P}} =
689+
f.o0 + f.f.first(itr[i], nothing)
690+
@inbounds @inline offset0(f::F, itr, i) where {T<:Tuple,P<:Pair{T},F<:SIMDFunction{P}} = f.o0 + idxx(coord(itr, i, f.f.first), Base.size(itr))
691+
675692
idx(itr, I) = @inbounds itr[I]
676693
idx(itr::Base.Iterators.ProductIterator{V}, I) where V = _idx(I-1, itr.iterators, Base.size(itr))
677694
function _idx(n, (vec1, vec...), (si1, si...))
@@ -687,18 +704,6 @@ _idxx(::Tuple{}, ::Tuple{}, a) = 0
687704
coord(itr, i, (f,fs...)) = (f(idx(itr,i), nothing), coord(itr, i, fs)...)
688705
coord(itr, i, ::Tuple{}) = ()
689706

690-
@inbounds @inline offset0(a, i) = offset0(a.f, i)
691-
@inbounds @inline offset1(a, i) = offset1(a.f, i)
692-
@inbounds @inline offset2(a, i) = offset2(a.f, i)
693-
@inbounds @inline offset0(f, itr, i) = offset0(f, i)
694-
@inbounds @inline offset0(f::F, i) where {F<:SIMDFunction} = f.o0 + i
695-
@inbounds @inline offset1(f::F, i) where {F<:SIMDFunction} = f.o1 + f.o1step * (i - 1)
696-
@inbounds @inline offset2(f::F, i) where {F<:SIMDFunction} = f.o2 + f.o2step * (i - 1)
697-
@inbounds @inline offset0(a::C, i) where {C<:ConstraintAug} = offset0(a.f, a.itr, i)
698-
@inbounds @inline offset0(f::F, itr, i) where {P<:Pair,F<:SIMDFunction{P}} =
699-
f.o0 + f.f.first(itr[i], nothing)
700-
@inbounds @inline offset0(f::F, itr, i) where {T<:Tuple,P<:Pair{T},F<:SIMDFunction{P}} = f.o0 + idxx(coord(itr, i, f.f.first), f.tsize)
701-
702707
for (thing, val) in [(:solution, 1), (:multipliers_L, 0), (:multipliers_U, 2)]
703708
@eval begin
704709
"""
@@ -772,3 +777,7 @@ function multipliers(result::SolverCore.AbstractExecutionStats, y::Constraint)
772777
len = length(y.itr)
773778
return view(result.multipliers, o+1:o+len)
774779
end
780+
781+
782+
_adapt_gen(gen) = gen
783+
_adapt_gen(gen::Base.Generator{P}) where P <: Base.Iterators.ProductIterator = Base.Generator(gen.f, collect(gen.iter))

src/simdfunction.jl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct Compressor{I}
1313
end
1414
@inline (i::Compressor{I})(n) where {I} = @inbounds i.inner[n]
1515

16-
struct SIMDFunction{F,C1,C2,T}
16+
struct SIMDFunction{F,C1,C2}
1717
f::F
1818
comp1::C1
1919
comp2::C2
@@ -22,7 +22,6 @@ struct SIMDFunction{F,C1,C2,T}
2222
o2::Int
2323
o1step::Int
2424
o2step::Int
25-
tsize::T
2625
end
2726

2827
"""
@@ -36,14 +35,14 @@ Returns a `SIMDFunction` using the `gen`.
3635
- `o1`: offset for the derivative evalution
3736
- `o2`: offset for the second-order derivative evalution
3837
"""
39-
function SIMDFunction(gen::Base.Generator, o0 = 0, o1 = 0, o2 = 0; tsize = ())
38+
function SIMDFunction(gen::Base.Generator, o0 = 0, o1 = 0, o2 = 0)
4039

4140
f = gen.f(Par(eltype(gen.iter)))
4241

43-
_simdfunction(f, o0, o1, o2; tsize = tsize)
42+
_simdfunction(f, o0, o1, o2)
4443
end
4544

46-
function _simdfunction(f, o0, o1, o2; tsize = ())
45+
function _simdfunction(f, o0, o1, o2)
4746
d = f(Identity(), AdjointNodeSource(nothing))
4847
y1 = []
4948
ExaModels.grpass(d, nothing, y1, nothing, 0, NaN)
@@ -60,5 +59,5 @@ function _simdfunction(f, o0, o1, o2; tsize = ())
6059
o2step = length(a2)
6160
c2 = Compressor(Tuple(findfirst(isequal(i), a2) for i in y2))
6261

63-
SIMDFunction(f, c1, c2, o0, o1, o2, o1step, o2step, tsize)
62+
SIMDFunction(f, c1, c2, o0, o1, o2, o1step, o2step)
6463
end

0 commit comments

Comments
 (0)