|
32 | 32 | from modin.logging.metrics import emit_metric
|
33 | 33 |
|
34 | 34 |
|
35 |
| -def all_switchable_backends(): |
36 |
| - yield from filter( |
37 |
| - # Disable automatically switching to these engines for now, because |
38 |
| - # 1) _get_prepared_factory_for_backend() currently calls |
39 |
| - # _initialize_engine(), which starts up the ray/dask/unidist |
40 |
| - # processes |
41 |
| - # 2) we can't decide to switch to unidist in the middle of execution. |
42 |
| - lambda backend: backend not in ("Ray", "Unidist", "Dask"), |
43 |
| - Backend.get_active_backends(), |
| 35 | +def all_switchable_backends() -> list[str]: |
| 36 | + return list( |
| 37 | + filter( |
| 38 | + # Disable automatically switching to these engines for now, because |
| 39 | + # 1) _get_prepared_factory_for_backend() currently calls |
| 40 | + # _initialize_engine(), which starts up the ray/dask/unidist |
| 41 | + # processes |
| 42 | + # 2) we can't decide to switch to unidist in the middle of execution. |
| 43 | + lambda backend: backend not in ("Ray", "Unidist", "Dask"), |
| 44 | + Backend.get_active_backends(), |
| 45 | + ) |
44 | 46 | )
|
45 | 47 |
|
46 | 48 |
|
@@ -154,7 +156,7 @@ def calculate(self) -> str:
|
154 | 156 | unrelated backends would outweigh any possible gains in computation speed. However, certain
|
155 | 157 | pathological cases that significantly changed the size of input or output data (e.g. cross join)
|
156 | 158 | would create situations where transferring data after the computation became prohibitively
|
157 |
| - expensive, so we chose to instead. -------------------- |
| 159 | + expensive, so we chose to allow switching to unrelated backends. |
158 | 160 | Additionally, the original implementation had a bug where stay_cost was only computed for the
|
159 | 161 | _first_ query compiler of each backend, thus under-reporting the cost of computation for any
|
160 | 162 | backend with multiple QCs present. In practice this very rarely affected the chosen result.
|
@@ -226,7 +228,7 @@ def calculate(self) -> str:
|
226 | 228 |
|
227 | 229 | if len(self._backend_data) > 1:
|
228 | 230 | get_logger().info(
|
229 |
| - f"BackendCostCalculator results for {'pd' if self._api_cls_name else self._api_cls_name}.{self._op}: {self._calc_result_log(self._result_backend)}" |
| 231 | + f"BackendCostCalculator results for {'pd' if self._api_cls_name is None else self._api_cls_name}.{self._op}: {self._calc_result_log(self._result_backend)}" |
230 | 232 | )
|
231 | 233 | # Does not need to be secure, should not use system entropy
|
232 | 234 | metrics_group = "%04x" % random.randrange(16**4)
|
|
0 commit comments