@@ -25747,6 +25747,51 @@ TEST_F(NVFuserTest, FusionMergeBroadcastingTrivialReduction2_CUDA) {
2574725747 fusion, {out}, {t0, t1}, {t1 + t0.squeeze(-1)}, __LINE__, __FILE__);
2574825748}
2574925749
25750+ TEST_F(NVFuserTest, FusionMappingRelation_CUDA) {
25751+ std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
25752+ auto fusion = fusion_ptr.get();
25753+ FusionGuard fg(fusion);
25754+
25755+ TensorView* tv0 = makeConcreteTensor({1, 1});
25756+ TensorView* tv1 = makeConcreteTensor({-1, 1, 1});
25757+ fusion->addInput(tv0);
25758+ fusion->addInput(tv1);
25759+ auto tv2 = set(tv0);
25760+ auto tv3 = broadcast(tv2, {true, false, false});
25761+ auto tv4 = add(tv3, tv1);
25762+
25763+ fusion->addOutput(tv4);
25764+
25765+ tv4->merge(-2);
25766+ tv4->merge(-1);
25767+
25768+ tv0->computeAt(tv4, -1);
25769+ tv1->computeAt(tv4, -1);
25770+
25771+ ComputeAtMap ca_map(fusion);
25772+
25773+ // FIXME: This is the concerning part that would motivate some
25774+ // more formalization on concrete/permissive mapping:
25775+ // exact mapping should ideally imply permissive mapping.
25776+ auto tv4_inner_node = tv4->axis(0)->definition()->input(1)->as<IterDomain>();
25777+ TORCH_CHECK(
25778+ ca_map.areMapped(tv2->axis(0), tv4_inner_node, IdMappingMode::EXACT));
25779+ TORCH_CHECK(!ca_map.areMapped(
25780+ tv2->axis(0), tv4_inner_node, IdMappingMode::PERMISSIVE));
25781+
25782+ auto options = at::TensorOptions().dtype(kFloat).device(at::kCUDA, 0);
25783+ at::Tensor t0 = at::randn({1, 1}, options);
25784+ at::Tensor t1 = at::randn({2, 1, 1}, options);
25785+
25786+ FusionExecutor fe;
25787+ fe.compileFusion(fusion, {t0, t1});
25788+ auto cg_outputs = fe.runFusion({t0, t1});
25789+ auto out = cg_outputs[0];
25790+
25791+ testValidate(
25792+ fusion, {out}, {t0, t1}, {t1 + t0.squeeze(0)}, __LINE__, __FILE__);
25793+ }
25794+
2575025795} // namespace jit
2575125796} // namespace torch
2575225797#endif // #if defined(USE_CUDA)
0 commit comments