Fill allocation with nan on tests (pytorch#1956)

zasdfgbnm · web-flow · commit 357ba224c0fb · 2022-09-02T13:41:48.000-07:00
diff --git a/torch/csrc/jit/codegen/cuda/executor.cpp b/torch/csrc/jit/codegen/cuda/executor.cpp
@@ -20,6 +20,7 @@
 #include <c10/cuda/CUDAStream.h>
 #include <c10/util/irange.h>
 
+#include <cmath>
 #include <fstream>
 
 namespace torch {
@@ -29,6 +30,16 @@ namespace cuda {
 
 int FusionExecutor::fusion_id_counter_ = 0; // NOLINT
 
+bool fill_allocation_with_nan_ = false;
+
+bool shouldFillAllocationWithNan() {
+  return fill_allocation_with_nan_;
+}
+
+void setFillAllocationWithNan(bool value) {
+  fill_allocation_with_nan_ = value;
+}
+
 namespace {
 
 static const char* defineIndexMode(KernelIndexMode index_mode) {
@@ -280,6 +291,42 @@ void FusionExecutor::compileFusion(
 
 namespace {
 
+void fillTensorWithNan(at::Tensor& t) {
+  switch (t.scalar_type()) {
+    case at::ScalarType::Byte:
+      t.fill_(0xFF);
+      break;
+    case at::ScalarType::Char:
+      t.fill_(0x7F);
+      break;
+    case at::ScalarType::Short:
+      t.fill_(0x7FFF);
+      break;
+    case at::ScalarType::Int:
+      t.fill_(0x7FFFFFFF);
+      break;
+    case at::ScalarType::Long:
+      t.fill_(0x7FFFFFFFFFFFFFFFL);
+      break;
+    case at::ScalarType::Bool:
+      t.fill_(true);
+      break;
+    case at::ScalarType::Half:
+    case at::ScalarType::Float:
+    case at::ScalarType::Double:
+    case at::ScalarType::BFloat16:
+      t.fill_(std::nan(""));
+      break;
+    case at::ScalarType::ComplexHalf:
+    case at::ScalarType::ComplexFloat:
+    case at::ScalarType::ComplexDouble:
+      t.fill_(c10::complex<double>(std::nan(""), std::nan("")));
+      break;
+    default:
+      TORCH_INTERNAL_ASSERT(false, "Unknown dtype");
+  }
+}
+
 at::Tensor inferAndAlloc(
     const TensorView* tv,
     const std::vector<Val*>& sizes,
@@ -349,6 +396,9 @@ at::Tensor inferAndAlloc(
     // Non Variable type guard for empty_cuda call
     at::AutoDispatchBelowADInplaceOrView non_variable_type_mode;
     auto empty = at::empty(isizes, tensor_options);
+    if (shouldFillAllocationWithNan()) {
+      fillTensorWithNan(empty);
+    }
     if (expanded_dim) {
       return empty.expand(expanded_sizes);
     }
@@ -892,6 +942,9 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
               c10::nullopt,
               options_.device,
               c10::nullopt));
+          if (shouldFillAllocationWithNan()) {
+            fillTensorWithNan(allocated_outputs.back());
+          }
         }
         // Note: aliased output is not returned as output. But we still need it
         // for kernel execution, so would need to push them to args
@@ -932,6 +985,9 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
                 c10::nullopt,
                 options_.device,
                 c10::nullopt));
+            if (shouldFillAllocationWithNan()) {
+              fillTensorWithNan(global_buffers.buffers.back());
+            }
             global_buffers.zero_init.push_back(false);
           }
         }
diff --git a/torch/csrc/jit/codegen/cuda/executor.h b/torch/csrc/jit/codegen/cuda/executor.h
@@ -16,6 +16,9 @@ namespace jit {
 namespace fuser {
 namespace cuda {
 
+TORCH_CUDA_CU_API bool shouldFillAllocationWithNan();
+TORCH_CUDA_CU_API void setFillAllocationWithNan(bool value);
+
 // TODO: Should this actually be in launch params?
 struct TORCH_CUDA_CU_API CompileOptions {
   c10::Device device = c10::Device(c10::DeviceType::CUDA, 0);
diff --git a/torch/csrc/jit/codegen/cuda/test/test_gpu_validator.h b/torch/csrc/jit/codegen/cuda/test/test_gpu_validator.h
@@ -1,3 +1,4 @@
+#include <torch/csrc/jit/codegen/cuda/executor.h>
 #include <torch/csrc/jit/codegen/cuda/executor_utils.h>
 #include <torch/csrc/jit/codegen/cuda/expr_evaluator.h>
 #include <torch/csrc/jit/codegen/cuda/fusion.h>
@@ -36,6 +37,7 @@ class NVFuserTest : public ::testing::Test {
     if (!deviceMajorMinorCheck(6)) {
       GTEST_SKIP() << "skipping tests on pre-PASCAL GPUs";
     }
+    setFillAllocationWithNan(true);
   }
 
   void TearDown() override {

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+#include <torch/csrc/jit/codegen/cuda/executor.h>`
`1`	`2`	`#include <torch/csrc/jit/codegen/cuda/executor_utils.h>`
`2`	`3`	`#include <torch/csrc/jit/codegen/cuda/expr_evaluator.h>`
`3`	`4`	`#include <torch/csrc/jit/codegen/cuda/fusion.h>`
`@@ -36,6 +37,7 @@ class NVFuserTest : public ::testing::Test {`
`36`	`37`	`if (!deviceMajorMinorCheck(6)) {`
`37`	`38`	`GTEST_SKIP() << "skipping tests on pre-PASCAL GPUs";`
`38`	`39`	`}`
	`40`	`+ setFillAllocationWithNan(true);`
`39`	`41`	`}`
`40`	`42`
`41`	`43`	`void TearDown() override {`