Apple MPS -> CPU NMS fallback strategy (#9600)

glenn-jocher · web-flow · commit c4c0ee8fc359 · 2022-09-26T14:13:03.000+02:00
Until more ops are fully supported this update will allow for seamless MPS inference (but slower MPS to CPU transfer before NMS, so slower NMS times). Partially resolves #9596 Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
diff --git a/utils/general.py b/utils/general.py
@@ -843,6 +843,8 @@ def non_max_suppression(
     if isinstance(prediction, (list, tuple)):  # YOLOv5 model in validation model, output = (inference_out, loss_out)
         prediction = prediction[0]  # select only inference output
 
+    if 'mps' in prediction.device.type:  # MPS not fully supported yet, convert tensors to CPU before NMS
+        prediction = prediction.cpu()
     bs = prediction.shape[0]  # batch size
     nc = prediction.shape[2] - nm - 5  # number of classes
     xc = prediction[..., 4] > conf_thres  # candidates