[Bug Fixes] fix batch_norm default stream bug && apply igemm sp_conv pass to trt (#67443)

ming1753 · web-flow · commit 1f629f283f49 · 2024-08-19T12:39:51.000+08:00
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -108,7 +108,11 @@ const std::vector<std::string> kTRTSubgraphPasses({
       "trt_multihead_matmul_fuse_pass_v2",                        //
       "trt_multihead_matmul_fuse_pass_v3",                        //
       "multihead_matmul_roformer_fuse_pass",                      //
-      "constant_folding_pass",                                    //
+#if defined _WIN32  // Windows does not support sparse_conv3d_implicit_gemm
+#else
+      "sparse_conv_optim_pass",                //
+#endif
+      "constant_folding_pass",  //
 #ifdef PADDLE_WITH_TENSORRT
 #if !IS_TRT_VERSION_GE(8610)
       "trt_flash_multihead_matmul_fuse_pass",  //
diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
@@ -825,7 +825,7 @@ void BatchNormKernel(const Context &ctx,
           auto *inv_var_ptr = inv_var.data<BatchNormParamType<T>>();
           const int threads = 512 > C ? C : 512;
           const int blocks = (C + 511) / 512;
-          InverseVariance<T><<<blocks, threads>>>(
+          InverseVariance<T><<<blocks, threads, 0, ctx.stream()>>>(
               est_var->template data<BatchNormParamType<T>>(),
               epsilon,
               C,
diff --git a/paddle/phi/kernels/sparse/gpu/conv_kernel_igemm.cu b/paddle/phi/kernels/sparse/gpu/conv_kernel_igemm.cu
@@ -186,7 +186,7 @@ void Conv3dImplicitGemmKernel(const Context& dev_ctx,
                               SparseCooTensor* out) {
 #ifdef PADDLE_WITH_CUDA
   PD_VISIT_BASE_INTEGRAL_TYPES(
-      x.indices().dtype(), "Conv3dImplicitGemmGPUKernel", ([&] {
+      x.indices().dtype(), "Conv3dImplicitGemmGPUKernel's indices", ([&] {
         // Conv3dImplicitGemmGPUKernel<T, data_t>(dev_ctx,
         Conv3dImplicitGemmGPUKernel<T, int64_t>(dev_ctx,
                                                 x,