@@ -121,34 +121,56 @@ struct LocalRunner
121121 inbuffers_pure
122122 outbuffers
123123 progress
124- end
125- function LocalRunner (op,exec_plan,outars= create_outars (op,exec_plan);threaded= true ,showprogress= true )
124+ restarter
125+ end
126+ function LocalRunner (op,exec_plan,
127+ outars= create_outars (op,exec_plan);
128+ threaded= true ,
129+ showprogress= true ,
130+ restartfile= nothing ,
131+ restartmode= :continue ,
132+ )
126133 loopranges = plan_to_loopranges (exec_plan)
127134 inbuffers_pure = generate_inbuffers (op. inars, loopranges)
128135 outbuffers = generate_outbuffers (op. outspecs,op. f, loopranges)
129136 pm = showprogress ? Progress (length (loopranges)) : nothing
130- LocalRunner (op,plan_to_loopranges (loopranges),outars, threaded, inbuffers_pure,outbuffers,pm)
137+ loopranges = plan_to_loopranges (exec_plan)
138+ restarter = create_restarter (restartfile,loopranges,restartmode)
139+ LocalRunner (op,loopranges,outars, threaded, inbuffers_pure,outbuffers,pm,restarter)
131140end
132141
133142update_progress! (:: Nothing ) = nothing
134143update_progress! (pm) = next! (pm)
144+ need_run (inow,restarter:: Restarter ) = need_run (inow,restarter. remaining_loopranges)
145+ need_run (inow,:: Nothing ) = true
146+ need_run (inow,remaining_loopranges) = inow in remaining_loopranges
147+
135148
136149function run_loop (runner:: LocalRunner ,loopranges = runner. loopranges;groupspecs= nothing )
137150 run_loop (
138- runner,runner. op, runner. inbuffers_pure,runner. outbuffers,runner. threaded,runner. outars,runner. progress,loopranges;groupspecs
151+ runner,runner. op, runner. inbuffers_pure,runner. outbuffers,runner. threaded,runner. outars,runner. progress,loopranges, runner . restarter ;groupspecs
139152 )
140153end
141154
142- @noinline function run_loop ( :: LocalRunner , op,inbuffers_pure,outbuffers,threaded,outars,progress,loopranges;groupspecs = nothing )
143- for inow in loopranges
144- @debug " inow = " , inow
155+ function default_loopbody (inow, re, op, inbuffers_pure, outbuffers, threaded,outars, progress)
156+ @debug " inow = " , inow
157+ if need_run ( inow,re)
145158 inbuffers_wrapped = read_range .((inow,),op. inars,inbuffers_pure);
146159 outbuffers_now = extract_outbuffer .((inow,),op. outspecs,op. f. init,op. f. buftype,outbuffers)
147160 run_block (op,inow,inbuffers_wrapped,outbuffers_now,threaded)
148161 put_buffer .((inow,),outbuffers_now,outars,nothing )
149162 clean_aggregator .(outbuffers)
150163 update_progress! (progress)
164+ update_restarter (re, inow)
165+ end
166+ end
167+
168+ @noinline function run_loop (:: LocalRunner ,op,inbuffers_pure,outbuffers,threaded,outars,progress,loopranges,re;groupspecs= nothing )
169+ for inow in loopranges
170+ default_loopbody (inow, re, op, inbuffers_pure, outbuffers, threaded,outars, progress)
151171 end
172+ finish_progress (progress)
173+ finish_restarter (re)
152174end
153175
154176using Distributed
@@ -161,8 +183,9 @@ struct PMapRunner
161183 inbuffers_pure
162184 outbuffers
163185 progress_channel
186+ restarter
164187end
165- function PMapRunner (op,exec_plan,outars= create_outars (op,exec_plan);threaded= true ,showprogress= true )
188+ function PMapRunner (op,exec_plan,outars= create_outars (op,exec_plan);threaded= true ,showprogress= true ,restartfile = nothing ,restartmode = :continue )
166189 all (isnothing,op. f. red) || error (" PMapRunner can not be used for reductions. Use DaggerRunner instead" )
167190 loopranges = plan_to_loopranges (exec_plan)
168191 inbuffers_pure = generate_inbuffers (op. inars, loopranges)
@@ -174,30 +197,42 @@ function PMapRunner(op,exec_plan,outars=create_outars(op,exec_plan);threaded=tru
174197 next! (progress)
175198 end
176199 channel
200+ else
201+ nothing
177202 end
178- PMapRunner (op,plan_to_loopranges (loopranges),outars, threaded, inbuffers_pure,outbuffers,progress_channel)
203+ restarter = create_restarter (restartfile,loopranges,restartmode)
204+ restart_channel = if isnothing (restarter)
205+ nothing
206+ else
207+ nd = ndims (restarter)
208+ channel = Distributed. RemoteChannel (()-> Channel {Union{Nothing,NTuple{nd,Int}}} (), 1 )
209+ @async while true
210+ update = take! (channel)
211+ isnothing (update) && break
212+ add_entry (restarter,update)
213+ end
214+ end
215+ PMapRunner (op,loopranges, outars, threaded, inbuffers_pure,outbuffers,progress_channel,restart_channel)
179216end
180217
181218update_progress! (pm:: RemoteChannel ) = put! (pm, true )
182-
219+ update_restarter (re:: RemoteChannel ,i) = put! (re, i)
220+ update_restarter (:: Nothing ,i) = nothing
221+ update_restarter (re:: Restarter ,i) = add_entry (re,i)
183222finish_progress (:: Any ) = nothing
184223finish_progress (pm:: RemoteChannel ) = put! (pm,false )
224+ finish_restarter (:: Any ) = nothing
225+ finish_restarter (re:: RemoteChannel ) = put! (re,nothing )
185226
186227function run_loop (runner:: PMapRunner ,loopranges = runner. loopranges;groupspecs= nothing )
187228 run_loop (
188- runner,runner. op, runner. inbuffers_pure,runner. outbuffers,runner. threaded,runner. outars,runner. progress_channel,loopranges;groupspecs
229+ runner,runner. op, runner. inbuffers_pure,runner. outbuffers,runner. threaded,runner. outars,runner. progress_channel,runner . restarter, loopranges;groupspecs
189230 )
190231end
191232
192- @noinline function run_loop (:: PMapRunner ,op,inbuffers_pure,outbuffers,threaded,outars,progress,loopranges;groupspecs= nothing )
233+ @noinline function run_loop (:: PMapRunner ,op,inbuffers_pure,outbuffers,threaded,outars,progress,restarter, loopranges;groupspecs= nothing )
193234 pmap (CachingPool (workers ()),loopranges) do inow
194- @debug " inow = " , inow
195- inbuffers_wrapped = read_range .((inow,),op. inars,inbuffers_pure);
196- outbuffers_now = extract_outbuffer .((inow,),op. outspecs,op. f. init,op. f. buftype,outbuffers)
197- run_block (op,inow,inbuffers_wrapped,outbuffers_now,threaded)
198- put_buffer .((inow,),outbuffers_now,outars,nothing )
199- clean_aggregator .(outbuffers)
200- update_progress! (progress)
235+ default_loopbody (inow, restarter, op, inbuffers_pure, outbuffers, threaded,outars, progress)
201236 end
202237 finish_progress (progress)
203238end
0 commit comments