Skip to content

Commit efd4815

Browse files
authored
[XPU] fix bugs of depthwise conv test and change default quant type (#70859)
* [XPU] fix bugs of depthwise conv test and change default quant type * fix typo * change default quant to float for fp32 * fp32 use tf32 * fix some ci bugs * fix more ci bugs
1 parent 4c87902 commit efd4815

18 files changed

+303
-246
lines changed

paddle/phi/kernels/xpu/xpu_api_wrapper.h

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#ifdef PADDLE_WITH_XPU
1818

19+
#include <unordered_map>
1920
#include <vector>
2021
#include "paddle/phi/backends/xpu/enforce_xpu.h"
2122
#include "paddle/phi/backends/xpu/xpu_header.h"
@@ -41,29 +42,60 @@ enum XPUFCCalcType {
4142
FC_FLOAT16,
4243
};
4344

44-
template <typename T>
45-
XPUFCCalcType FCCalcType() {
46-
const char* xpu_paddle_fc_float16 = std::getenv("XPU_PADDLE_FC_FLOAT16");
47-
if (xpu_paddle_fc_float16 != nullptr &&
48-
(std::is_same<phi::dtype::float16, T>::value ||
49-
std::is_same<XPUTypeFP16, T>::value || std::is_same<float, T>::value)) {
50-
return XPUFCCalcType::FC_FLOAT16;
51-
} else if (std::is_same<phi::dtype::float16, T>::value ||
52-
std::is_same<XPUTypeFP16, T>::value) {
53-
return XPUFCCalcType::FC_INT16;
54-
} else if (std::getenv("XPU_PADDLE_FC_INT32") != nullptr) {
55-
return XPUFCCalcType::FC_INT32;
56-
} else if (std::getenv("XPU_PADDLE_FC_LOCAL_INT16") != nullptr) {
57-
return XPUFCCalcType::FC_FLOAT;
58-
} else if (std::getenv("XPU_PADDLE_FC_INT32_WITH_LL") != nullptr) {
59-
return XPUFCCalcType::FC_INT32_WITH_LL;
60-
} else if ((std::is_same<phi::dtype::bfloat16, T>::value ||
61-
std::is_same<XPUTypeBF16, T>::value) ||
62-
(std::is_same<float, T>::value &&
63-
std::getenv("XPU_PADDLE_FC_TF32") != nullptr)) {
64-
return XPUFCCalcType::FC_TF32;
45+
using XPUFCCalcTypeMap = std::vector<std::pair<const char*, XPUFCCalcType>>;
46+
47+
inline XPUFCCalcType GetFCCalcTypeFromEnv(const XPUFCCalcTypeMap& env_map,
48+
XPUFCCalcType default_calc_type) {
49+
for (auto [env_name, calc_type] : env_map) {
50+
if (std::getenv(env_name) != nullptr) {
51+
return calc_type;
52+
}
6553
}
66-
return XPUFCCalcType::FC_INT16;
54+
return default_calc_type;
55+
}
56+
57+
template <typename T>
58+
inline XPUFCCalcType FCCalcType() {
59+
// FLOAT32
60+
XPUFCCalcTypeMap calc_type_map = {
61+
{"XPU_PADDLE_FC_FLOAT", XPUFCCalcType::FC_FLOAT},
62+
{"XPU_PADDLE_FC_LOCAL_INT16", XPUFCCalcType::FC_FLOAT},
63+
{"XPU_PADDLE_FC_TF32", XPUFCCalcType::FC_TF32},
64+
{"XPU_PADDLE_FC_INT16", XPUFCCalcType::FC_INT16},
65+
{"XPU_PADDLE_FC_INT32", XPUFCCalcType::FC_INT32},
66+
{"XPU_PADDLE_FC_INT32_WITH_LL", XPUFCCalcType::FC_INT32_WITH_LL},
67+
};
68+
#ifdef PADDLE_WITH_XPU_XRE5
69+
auto default_calc_type = XPUFCCalcType::FC_TF32;
70+
#else
71+
auto default_calc_type = XPUFCCalcType::FC_INT16;
72+
#endif
73+
return GetFCCalcTypeFromEnv(calc_type_map, default_calc_type);
74+
}
75+
76+
template <>
77+
inline XPUFCCalcType FCCalcType<XPUTypeFP16>() {
78+
XPUFCCalcTypeMap calc_type_map = {
79+
{"XPU_PADDLE_FC_FLOAT16", XPUFCCalcType::FC_FLOAT16},
80+
{"XPU_PADDLE_FC_INT16", XPUFCCalcType::FC_INT16},
81+
{"XPU_PADDLE_FC_FLOAT", XPUFCCalcType::FC_FLOAT},
82+
{"XPU_PADDLE_FC_LOCAL_INT16", XPUFCCalcType::FC_FLOAT}};
83+
#ifdef PADDLE_WITH_XPU_XRE5
84+
auto default_calc_type = XPUFCCalcType::FC_FLOAT16;
85+
#else
86+
auto default_calc_type = XPUFCCalcType::FC_INT16;
87+
#endif
88+
return GetFCCalcTypeFromEnv(calc_type_map, default_calc_type);
89+
}
90+
91+
template <>
92+
inline XPUFCCalcType FCCalcType<XPUTypeBF16>() {
93+
XPUFCCalcTypeMap calc_type_map = {
94+
// TF32 is the default, do not need to be listed here.
95+
{"XPU_PADDLE_FC_FLOAT", XPUFCCalcType::FC_FLOAT},
96+
{"XPU_PADDLE_FC_LOCAL_INT16", XPUFCCalcType::FC_FLOAT}};
97+
auto default_calc_type = XPUFCCalcType::FC_TF32;
98+
return GetFCCalcTypeFromEnv(calc_type_map, default_calc_type);
6799
}
68100

69101
struct XpuFcInfo {

test/dygraph_to_static/test_save_inference_model.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ def forward(self, x):
8484
class TestDyToStaticSaveInferenceModel(Dy2StTestBase):
8585
def setUp(self):
8686
self.temp_dir = tempfile.TemporaryDirectory()
87+
self.atol = 0
88+
self.rtol = 1e-5
89+
if paddle.is_compiled_with_xpu():
90+
self.atol = 1e-4
91+
self.rtol = 1e-4
8792

8893
def tearDown(self):
8994
self.temp_dir.cleanup()
@@ -205,7 +210,9 @@ def check_save_inference_model(
205210
infer_model_dir, model_filename, params_filename, inputs
206211
)
207212

208-
np.testing.assert_allclose(gt_out, infer_out, rtol=1e-05)
213+
np.testing.assert_allclose(
214+
gt_out, infer_out, atol=self.atol, rtol=self.rtol
215+
)
209216

210217
def load_and_run_inference(
211218
self, model_path, model_filename, params_filename, inputs

test/ir/inference/test_xpu_matmul_weight_trans_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def sample_predictor_configs(self, program_config):
2525
config = self.create_inference_config(use_xpu=True)
2626
yield config, [
2727
"matmul_v2",
28-
], (1e-3, 1e-3)
28+
], (5e-3, 5e-3)
2929

3030
def sample_program_config(self, draw):
3131
# 1. Generate shape and attr of matmul

test/ir/pir/fused_pass/xpu/test_conv2d_add_fuse_xpu_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def sample_program(self):
7474
yield pir_program, False
7575

7676
def test_check_output(self):
77-
self.check_pass_correct(atol=1e-3, rtol=1e-3)
77+
self.check_pass_correct(atol=2e-3, rtol=2e-3)
7878

7979
def setUp(self):
8080
if core.is_compiled_with_xpu():

test/legacy_test/test_executor_and_mul.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,13 @@ def test_mul(self):
4545
)
4646

4747
self.assertEqual((100, 100), res.shape)
48-
np.testing.assert_allclose(res, np.dot(a_np, b_np), rtol=1e-05)
49-
np.testing.assert_allclose(res_array[0], a_np, rtol=1e-05)
50-
np.testing.assert_allclose(res_array[1], b_np, rtol=1e-05)
51-
np.testing.assert_allclose(res_array[2], res, rtol=1e-05)
48+
rtol = 1e-5
49+
if paddle.is_compiled_with_xpu():
50+
rtol = 1e-4
51+
np.testing.assert_allclose(res, np.dot(a_np, b_np), rtol=rtol)
52+
np.testing.assert_allclose(res_array[0], a_np, rtol=rtol)
53+
np.testing.assert_allclose(res_array[1], b_np, rtol=rtol)
54+
np.testing.assert_allclose(res_array[2], res, rtol=rtol)
5255

5356

5457
if __name__ == '__main__':

test/prim/process/test_prim_amp.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ def setUp(self):
4747
paddle.seed(2022)
4848
self.x = paddle.randn([4, 2, 6, 6], dtype="float32")
4949
self.x.stop_gradient = False
50+
self.atol = 1e-3
51+
self.rtol = 1e-3
52+
if paddle.is_compiled_with_xpu():
53+
self.atol = 5e-3
54+
self.rtol = 5e-3
5055

5156
def train(self, use_prim):
5257
core._set_prim_all_enabled(use_prim)
@@ -75,8 +80,8 @@ def test_amp_01(self):
7580
np.testing.assert_allclose(
7681
expected,
7782
actual,
78-
rtol=1e-3,
79-
atol=1e-3,
83+
rtol=self.rtol,
84+
atol=self.atol,
8085
)
8186

8287
def test_amp_O1_infer(self):
@@ -101,8 +106,8 @@ def test_amp_O1_infer(self):
101106
np.testing.assert_allclose(
102107
res,
103108
res_amp,
104-
rtol=1e-3,
105-
atol=1e-3,
109+
rtol=self.rtol,
110+
atol=self.atol,
106111
)
107112

108113

test/xpu/get_test_cover_info.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import contextlib
1516
import fcntl
1617
import inspect
1718
import os
@@ -362,6 +363,20 @@ def wrapper(cls):
362363
return wrapper
363364

364365

366+
@contextlib.contextmanager
367+
def xpu_matmul_quant_type_guard(dtype):
368+
# only fp32 is supported now
369+
assert dtype == "float"
370+
env_name = "XPU_PADDLE_FC_FLOAT"
371+
origin_env = os.getenv(env_name)
372+
os.environ[env_name] = "1"
373+
yield
374+
if origin_env is not None:
375+
os.environ[env_name] = origin_env
376+
else:
377+
del os.environ[env_name]
378+
379+
365380
def get_test_cover_info():
366381
xpu_version = core.get_xpu_device_version(0)
367382
version_str = get_version_str(xpu_version)

test/xpu/test_bmm_op_xpu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def set_xpu(self):
6161
self.__class__.op_type = self.in_type
6262

6363
def test_check_output(self):
64-
self.check_output_with_place(self.place)
64+
self.check_output_with_place(self.place, atol=5e-3, rtol=1e-3)
6565

6666
def test_check_grad_normal(self):
6767
self.check_grad_with_place(self.place, ['X', 'Y'], 'Out')

test/xpu/test_conv2d_op_xpu.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def has_cuda(self):
256256
def test_check_output(self):
257257
if core.is_compiled_with_xpu():
258258
paddle.enable_static()
259-
self.check_output_with_place(self.place)
259+
self.check_output_with_place(self.place, atol=0.005, rtol=0.005)
260260

261261
def test_check_grad(self):
262262
if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
@@ -418,7 +418,9 @@ def test_check_output(self):
418418
# TODO(wangzhongpu): support onednn op in dygraph mode
419419
if core.is_compiled_with_xpu():
420420
paddle.enable_static()
421-
self.check_output_with_place(place=self.place)
421+
self.check_output_with_place(
422+
place=self.place, atol=0.005, rtol=0.005
423+
)
422424

423425
def test_check_grad(self):
424426
# TODO(wangzhongpu): support onednn op in dygraph mode

test/xpu/test_conv3d_op_xpu.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,8 @@ def setUp(self):
225225
}
226226

227227
np.random.seed(100)
228-
input = np.random.random(self.input_size).astype(self.dtype)
229-
filter = np.random.random(self.filter_size).astype(self.dtype)
228+
input = np.random.random(self.input_size).astype(self.dtype) - 0.5
229+
filter = np.random.random(self.filter_size).astype(self.dtype) - 0.5
230230
output = conv3d_forward_naive(
231231
input,
232232
filter,
@@ -251,7 +251,7 @@ def setUp(self):
251251

252252
def test_check_output(self):
253253
place = paddle.XPUPlace(0)
254-
self.check_output_with_place(place)
254+
self.check_output_with_place(place, atol=0.005, rtol=0.005)
255255

256256
def test_check_grad(self):
257257
place = paddle.XPUPlace(0)
@@ -397,8 +397,8 @@ def setUp(self):
397397
}
398398

399399
np.random.seed(100)
400-
input = np.random.random(self.input_size).astype(self.dtype)
401-
filter = np.random.random(self.filter_size).astype(self.dtype)
400+
input = np.random.random(self.input_size).astype(self.dtype) - 0.5
401+
filter = np.random.random(self.filter_size).astype(self.dtype) - 0.5
402402
output = conv3d_forward_naive(
403403
input,
404404
filter,
@@ -426,7 +426,7 @@ def setUp(self):
426426

427427
def test_check_output(self):
428428
place = paddle.XPUPlace(0)
429-
self.check_output_with_place(place)
429+
self.check_output_with_place(place, atol=0.005, rtol=0.005)
430430

431431
def test_check_grad(self):
432432
place = paddle.XPUPlace(0)

0 commit comments

Comments
 (0)