@@ -150,12 +150,20 @@ size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask,
150150
151151 const bool use_i32 = (cumsum_typeid == int32_typeid);
152152
153+ std::vector<sycl::event> host_task_events;
154+
153155 if (mask.is_c_contiguous ()) {
154156 auto fn = (use_i32)
155157 ? mask_positions_contig_i32_dispatch_vector[mask_typeid]
156158 : mask_positions_contig_i64_dispatch_vector[mask_typeid];
157159
158- return fn (exec_q, mask_size, mask_data, cumsum_data, depends);
160+ size_t total_set = fn (exec_q, mask_size, mask_data, cumsum_data,
161+ host_task_events, depends);
162+ {
163+ py::gil_scoped_release release;
164+ sycl::event::wait (host_task_events);
165+ }
166+ return total_set;
159167 }
160168
161169 const py::ssize_t *shape = mask.get_shape_raw ();
@@ -175,7 +183,6 @@ size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask,
175183 auto strided_fn =
176184 (use_i32) ? mask_positions_strided_i32_dispatch_vector[mask_typeid]
177185 : mask_positions_strided_i64_dispatch_vector[mask_typeid];
178- std::vector<sycl::event> host_task_events;
179186
180187 using dpctl::tensor::offset_utils::device_allocate_and_pack;
181188 const auto &ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
@@ -189,7 +196,10 @@ size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask,
189196
190197 if (2 * static_cast <size_t >(nd) != std::get<1 >(ptr_size_event_tuple)) {
191198 copy_shape_ev.wait ();
192- sycl::event::wait (host_task_events);
199+ {
200+ py::gil_scoped_release release;
201+ sycl::event::wait (host_task_events);
202+ }
193203 sycl::free (shape_strides, exec_q);
194204 throw std::runtime_error (" Unexpected error" );
195205 }
@@ -200,10 +210,14 @@ size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask,
200210 dependent_events.insert (dependent_events.end (), depends.begin (),
201211 depends.end ());
202212
203- size_t total_set = strided_fn (exec_q, mask_size, mask_data, nd,
204- shape_strides, cumsum_data, dependent_events);
213+ size_t total_set =
214+ strided_fn (exec_q, mask_size, mask_data, nd, shape_strides, cumsum_data,
215+ host_task_events, dependent_events);
205216
206- sycl::event::wait (host_task_events);
217+ {
218+ py::gil_scoped_release release;
219+ sycl::event::wait (host_task_events);
220+ }
207221 sycl::free (shape_strides, exec_q);
208222
209223 return total_set;
@@ -283,14 +297,22 @@ size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src,
283297 " Cumulative sum array must have int64 data-type." );
284298 }
285299
300+ std::vector<sycl::event> host_task_events;
301+
286302 if (src.is_c_contiguous ()) {
287303 auto fn = cumsum_1d_contig_dispatch_vector[src_typeid];
288304 if (fn == nullptr ) {
289305 throw std::runtime_error (
290306 " this cumsum requires integer type, got src_typeid=" +
291307 std::to_string (src_typeid));
292308 }
293- return fn (exec_q, src_size, src_data, cumsum_data, depends);
309+ size_t total = fn (exec_q, src_size, src_data, cumsum_data,
310+ host_task_events, depends);
311+ {
312+ py::gil_scoped_release release;
313+ sycl::event::wait (host_task_events);
314+ }
315+ return total;
294316 }
295317
296318 const py::ssize_t *shape = src.get_shape_raw ();
@@ -313,7 +335,6 @@ size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src,
313335 " this cumsum requires integer type, got src_typeid=" +
314336 std::to_string (src_typeid));
315337 }
316- std::vector<sycl::event> host_task_events;
317338
318339 using dpctl::tensor::offset_utils::device_allocate_and_pack;
319340 const auto &ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
@@ -339,9 +360,12 @@ size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src,
339360 depends.end ());
340361
341362 size_t total = strided_fn (exec_q, src_size, src_data, nd, shape_strides,
342- cumsum_data, dependent_events);
363+ cumsum_data, host_task_events, dependent_events);
343364
344- sycl::event::wait (host_task_events);
365+ {
366+ py::gil_scoped_release release;
367+ sycl::event::wait (host_task_events);
368+ }
345369 sycl::free (shape_strides, exec_q);
346370
347371 return total;
0 commit comments