[XPU] add multinomial op (PaddlePaddle#65413)

houj04 · co63oc · commit a51ad9d774e5 · 2024-06-26T07:42:59.000+08:00
diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake
@@ -30,7 +30,7 @@ if(NOT DEFINED XPU_XRE_BASE_VERSION)
   set(XPU_XRE_BASE_VERSION "4.32.0.1")
 endif()
 if(NOT DEFINED XPU_XHPC_BASE_DATE)
-  set(XPU_XHPC_BASE_DATE "20240601")
+  set(XPU_XHPC_BASE_DATE "20240621")
 endif()
 set(XPU_XCCL_BASE_VERSION "1.2.1.2")
 if(NOT DEFINED XPU_XFT_BASE_VERSION)
diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc
@@ -684,6 +684,7 @@ XPUOpMap& get_kl3_ops() {
       {"multi_encoder_xpu",
        XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
       {"multiclass_nms3", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"multinomial", XPUKernelSet({phi::DataType::FLOAT32})},
       {"nearest_interp_v2",
        XPUKernelSet({phi::DataType::FLOAT32,
                      phi::DataType::FLOAT16,
diff --git a/paddle/phi/kernels/xpu/multinomial_kernel.cc b/paddle/phi/kernels/xpu/multinomial_kernel.cc
@@ -0,0 +1,56 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/multinomial_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void MultinomialKernel(const Context& dev_ctx,
+                       const DenseTensor& x,
+                       const Scalar& num_samples,
+                       bool replacement,
+                       DenseTensor* out) {
+  auto int_num_samples = num_samples.to<int64_t>();
+  auto* in_data = x.data<T>();
+  int64_t* out_data = dev_ctx.template Alloc<int64_t>(out);
+  auto in_dims = x.dims();
+  int64_t dim_size = in_dims.size();
+  const int64_t num_categories = in_dims[dim_size - 1];
+  const int64_t num_distributions = dim_size > 1 ? in_dims[dim_size - 2] : 1;
+  int64_t seed = dev_ctx.GetGenerator()->Random64();
+
+  // int multinomial(Context* ctx, const T* x, TID* y, int64_t num_samples,
+  // int64_t num_categories, int64_t num_distributions, bool replacement,
+  // int64_t seed);
+  int r = xpu::multinomial<T, int64_t>(dev_ctx.x_context(),
+                                       in_data,
+                                       out_data,
+                                       int_num_samples,
+                                       num_categories,
+                                       num_distributions,
+                                       replacement,
+                                       seed);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "multinomial");
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    multinomial, XPU, ALL_LAYOUT, phi::MultinomialKernel, float) {
+  kernel->OutputAt(0).SetDataType(phi::DataType::INT64);
+}
diff --git a/test/xpu/test_multinomial_op_xpu.py b/test/xpu/test_multinomial_op_xpu.py
@@ -0,0 +1,127 @@
+#   Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import unittest
+
+import numpy as np
+from get_test_cover_info import (
+    XPUOpTestWrapper,
+    create_test_class,
+    get_xpu_op_support_types,
+)
+from op_test_xpu import XPUOpTest
+
+import paddle
+
+paddle.enable_static()
+
+
+def sample_output_one_dimension(out, dim):
+    # count numbers of different categories
+    sample_prob = np.zeros(dim).astype("float32")
+    sample_index_prob = np.unique(out, return_counts=True)
+    sample_prob[sample_index_prob[0]] = sample_index_prob[1]
+    sample_prob /= sample_prob.sum()
+    return sample_prob
+
+
+def sample_output_two_dimension(out, shape):
+    num_dist = shape[0]
+    out_list = np.split(out, num_dist, axis=0)
+    sample_prob = np.zeros(shape).astype("float32")
+    for i in range(num_dist):
+        sample_index_prob = np.unique(out_list[i], return_counts=True)
+        sample_prob[i][sample_index_prob[0]] = sample_index_prob[1]
+    sample_prob /= sample_prob.sum(axis=-1, keepdims=True)
+    return sample_prob
+
+
+class XPUTestMultinomialOp(XPUOpTestWrapper):
+    def __init__(self):
+        self.op_name = 'multinomial'
+        self.use_dynamic_create_class = False
+
+    class TestMultinomialOp(XPUOpTest):
+        def setUp(self):
+            self.dtype = self.in_type
+            self.place = paddle.XPUPlace(0)
+            paddle.enable_static()
+            self.op_type = "multinomial"
+            self.python_api = paddle.multinomial
+            self.init_data()
+            self.inputs = {"X": self.input_np}
+
+        def init_data(self):
+            # input probability is a vector, and replacement is True
+            self.input_np = np.random.rand(4).astype(self.dtype)
+            self.outputs = {"Out": np.zeros(100000).astype("int64")}
+            self.attrs = {"num_samples": 100000, "replacement": True}
+
+        def test_check_output(self):
+            self.check_output_with_place_customized(
+                self.verify_output, self.place
+            )
+
+        def sample_output(self, out):
+            return sample_output_one_dimension(out, 4)
+
+        def verify_output(self, outs):
+            # normalize the input to get the probability
+            prob = self.input_np / self.input_np.sum(axis=-1, keepdims=True)
+            sample_prob = self.sample_output(np.array(outs[0]))
+            np.testing.assert_allclose(
+                sample_prob,
+                prob,
+                rtol=0,
+                atol=0.01,
+                err_msg='sample_prob: '
+                + str(sample_prob)
+                + '\nprob: '
+                + str(prob),
+            )
+
+    class TestMultinomialOp2(TestMultinomialOp):
+        def init_data(self):
+            # input probability is a matrix
+            self.input_np = np.random.rand(3, 4).astype(self.dtype)
+            self.outputs = {"Out": np.zeros((3, 100000)).astype("int64")}
+            self.attrs = {"num_samples": 100000, "replacement": True}
+
+        def sample_output(self, out):
+            return sample_output_two_dimension(out, [3, 4])
+
+    class TestMultinomialOp3(TestMultinomialOp):
+        def init_data(self):
+            # replacement is False. number of samples must be less than number of categories.
+            self.input_np = np.random.rand(1000).astype(self.dtype)
+            self.outputs = {"Out": np.zeros(100).astype("int64")}
+            self.attrs = {"num_samples": 100, "replacement": False}
+
+        def verify_output(self, outs):
+            out = np.array(outs[0])
+            unique_out = np.unique(out)
+            self.assertEqual(
+                len(unique_out),
+                100,
+                "replacement is False. categories can't be sampled repeatedly",
+            )
+
+
+support_types = get_xpu_op_support_types('multinomial')
+for stype in support_types:
+    create_test_class(globals(), XPUTestMultinomialOp, stype)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tools/xpu/pack_paddle_depence.sh b/tools/xpu/pack_paddle_depence.sh
@@ -72,6 +72,11 @@ function xhpc_prepare() {
   cp -r ${XHPC_DIR_NAME}/xdnn/so/libxpuapi.so xpu/lib
 
   check_files ${XHPC_DIR_NAME}/xfa/include/flash_api.h ${XHPC_DIR_NAME}/xfa/so/libxpu_flash_attention.so
+
+  # remove '#include "xpu/refactor/core/quant.h"' in flash_api.h
+  # TODO(houj04): remove this hack when compile issue is resolved in XHPC
+  sed -i '8d' ${XHPC_DIR_NAME}/xfa/include/flash_api.h
+
   cp -r ${XHPC_DIR_NAME}/xfa/include/* xpu/include/xhpc/xfa
   cp -r ${XHPC_DIR_NAME}/xfa/so/libxpu_flash_attention.so xpu/lib/
 }