Skip to content

Commit 357ba22

Browse files
authored
Fill allocation with nan on tests (pytorch#1956)
1 parent 8eafc54 commit 357ba22

File tree

3 files changed

+61
-0
lines changed

3 files changed

+61
-0
lines changed

torch/csrc/jit/codegen/cuda/executor.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <c10/cuda/CUDAStream.h>
2121
#include <c10/util/irange.h>
2222

23+
#include <cmath>
2324
#include <fstream>
2425

2526
namespace torch {
@@ -29,6 +30,16 @@ namespace cuda {
2930

3031
int FusionExecutor::fusion_id_counter_ = 0; // NOLINT
3132

33+
bool fill_allocation_with_nan_ = false;
34+
35+
bool shouldFillAllocationWithNan() {
36+
return fill_allocation_with_nan_;
37+
}
38+
39+
void setFillAllocationWithNan(bool value) {
40+
fill_allocation_with_nan_ = value;
41+
}
42+
3243
namespace {
3344

3445
static const char* defineIndexMode(KernelIndexMode index_mode) {
@@ -280,6 +291,42 @@ void FusionExecutor::compileFusion(
280291

281292
namespace {
282293

294+
void fillTensorWithNan(at::Tensor& t) {
295+
switch (t.scalar_type()) {
296+
case at::ScalarType::Byte:
297+
t.fill_(0xFF);
298+
break;
299+
case at::ScalarType::Char:
300+
t.fill_(0x7F);
301+
break;
302+
case at::ScalarType::Short:
303+
t.fill_(0x7FFF);
304+
break;
305+
case at::ScalarType::Int:
306+
t.fill_(0x7FFFFFFF);
307+
break;
308+
case at::ScalarType::Long:
309+
t.fill_(0x7FFFFFFFFFFFFFFFL);
310+
break;
311+
case at::ScalarType::Bool:
312+
t.fill_(true);
313+
break;
314+
case at::ScalarType::Half:
315+
case at::ScalarType::Float:
316+
case at::ScalarType::Double:
317+
case at::ScalarType::BFloat16:
318+
t.fill_(std::nan(""));
319+
break;
320+
case at::ScalarType::ComplexHalf:
321+
case at::ScalarType::ComplexFloat:
322+
case at::ScalarType::ComplexDouble:
323+
t.fill_(c10::complex<double>(std::nan(""), std::nan("")));
324+
break;
325+
default:
326+
TORCH_INTERNAL_ASSERT(false, "Unknown dtype");
327+
}
328+
}
329+
283330
at::Tensor inferAndAlloc(
284331
const TensorView* tv,
285332
const std::vector<Val*>& sizes,
@@ -349,6 +396,9 @@ at::Tensor inferAndAlloc(
349396
// Non Variable type guard for empty_cuda call
350397
at::AutoDispatchBelowADInplaceOrView non_variable_type_mode;
351398
auto empty = at::empty(isizes, tensor_options);
399+
if (shouldFillAllocationWithNan()) {
400+
fillTensorWithNan(empty);
401+
}
352402
if (expanded_dim) {
353403
return empty.expand(expanded_sizes);
354404
}
@@ -892,6 +942,9 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
892942
c10::nullopt,
893943
options_.device,
894944
c10::nullopt));
945+
if (shouldFillAllocationWithNan()) {
946+
fillTensorWithNan(allocated_outputs.back());
947+
}
895948
}
896949
// Note: aliased output is not returned as output. But we still need it
897950
// for kernel execution, so would need to push them to args
@@ -932,6 +985,9 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
932985
c10::nullopt,
933986
options_.device,
934987
c10::nullopt));
988+
if (shouldFillAllocationWithNan()) {
989+
fillTensorWithNan(global_buffers.buffers.back());
990+
}
935991
global_buffers.zero_init.push_back(false);
936992
}
937993
}

torch/csrc/jit/codegen/cuda/executor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ namespace jit {
1616
namespace fuser {
1717
namespace cuda {
1818

19+
TORCH_CUDA_CU_API bool shouldFillAllocationWithNan();
20+
TORCH_CUDA_CU_API void setFillAllocationWithNan(bool value);
21+
1922
// TODO: Should this actually be in launch params?
2023
struct TORCH_CUDA_CU_API CompileOptions {
2124
c10::Device device = c10::Device(c10::DeviceType::CUDA, 0);

torch/csrc/jit/codegen/cuda/test/test_gpu_validator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <torch/csrc/jit/codegen/cuda/executor.h>
12
#include <torch/csrc/jit/codegen/cuda/executor_utils.h>
23
#include <torch/csrc/jit/codegen/cuda/expr_evaluator.h>
34
#include <torch/csrc/jit/codegen/cuda/fusion.h>
@@ -36,6 +37,7 @@ class NVFuserTest : public ::testing::Test {
3637
if (!deviceMajorMinorCheck(6)) {
3738
GTEST_SKIP() << "skipping tests on pre-PASCAL GPUs";
3839
}
40+
setFillAllocationWithNan(true);
3941
}
4042

4143
void TearDown() override {

0 commit comments

Comments
 (0)