[CIR][CIRGen] Fix intrinsic type signedness using frontend AST information

RiverDave · RiverDave · commit cd2fc0d060f5 · 2025-08-28T13:11:38.000-04:00
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -532,19 +532,56 @@ decodeFixedType(ArrayRef<llvm::Intrinsic::IITDescriptor> &infos,
 }
 
 // llvm::Intrinsics accepts only LLVMContext. We need to reimplement it here.
+// Takes AST information to infer correct signedness when
+// IIT returns signed but AST shows unsigned in Function Decl (indicating
+// intrinsic expects unsigned)
 static cir::FuncType getIntrinsicType(mlir::MLIRContext *context,
-                                      llvm::Intrinsic::ID id) {
+                                      llvm::Intrinsic::ID id,
+                                      const CallExpr *E) {
   using namespace llvm::Intrinsic;
 
+  // Get the FunctionDecl from the CallExpr
+  const FunctionDecl *FD = nullptr;
+  if (const auto *DRE =
+          dyn_cast<DeclRefExpr>(E->getCallee()->IgnoreImpCasts())) {
+    FD = dyn_cast<FunctionDecl>(DRE->getDecl());
+  }
+
   SmallVector<IITDescriptor, 8> table;
   getIntrinsicInfoTableEntries(id, table);
 
   ArrayRef<IITDescriptor> tableRef = table;
   mlir::Type resultTy = decodeFixedType(tableRef, context);
 
+  // Use FunctionDecl return type if available
+  if (auto intTy = dyn_cast<cir::IntType>(resultTy)) {
+    if (FD && FD->getReturnType()->isUnsignedIntegerType()) {
+      resultTy = IntType::get(context, intTy.getWidth(), /*signed=*/false);
+    }
+    // Otherwise keep IIT default (signed)
+  }
+
   SmallVector<mlir::Type, 8> argTypes;
-  while (!tableRef.empty())
-    argTypes.push_back(decodeFixedType(tableRef, context));
+  unsigned argIndex = 0;
+  while (!tableRef.empty()) {
+    mlir::Type argTy = decodeFixedType(tableRef, context);
+
+    // Adjust argument type signedness based on FunctionDecl parameter
+    // definition
+    if (auto intTy = dyn_cast<cir::IntType>(argTy)) {
+      if (FD && argIndex < FD->getNumParams()) {
+        QualType paramType = FD->getParamDecl(argIndex)->getType();
+        if (paramType->isUnsignedIntegerType()) {
+          argTy = IntType::get(context, intTy.getWidth(), /*signed=*/false);
+        }
+        // Otherwise keep IIT default (signed)
+      }
+      // If no FunctionDecl, keep IIT default (signed)
+    }
+
+    argTypes.push_back(argTy);
+    argIndex++;
+  }
 
   return FuncType::get(argTypes, resultTy);
 }
@@ -2726,16 +2763,34 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       assert(false && "bad intrinsic name!");
 
     cir::FuncType intrinsicType =
-        getIntrinsicType(&getMLIRContext(), intrinsicID);
+        getIntrinsicType(&getMLIRContext(), intrinsicID, E);
 
     SmallVector<mlir::Value> args;
     for (unsigned i = 0; i < E->getNumArgs(); i++) {
-      mlir::Value arg = emitScalarOrConstFoldImmArg(iceArguments, i, E);
-      mlir::Type argType = arg.getType();
-      if (argType != intrinsicType.getInput(i))
-        llvm_unreachable("NYI");
+      mlir::Value argValue = emitScalarOrConstFoldImmArg(iceArguments, i, E);
+      // If the intrinsic arg type is different from the builtin arg type
+      // we need to do a bit cast.
+      mlir::Type argType = argValue.getType();
+      mlir::Type expectedTy = intrinsicType.getInput(i);
+      if (argType != expectedTy) {
+        // XXX - vector of pointers?
+        if (cir::PointerType expectedPtrTy =
+                dyn_cast<cir::PointerType>(expectedTy)) {
+          if (cir::PointerType argPtrTy = dyn_cast<cir::PointerType>(argType)) {
+            if (expectedPtrTy.getAddrSpace() != argPtrTy.getAddrSpace()) {
+              argValue = builder.createAddrSpaceCast(
+                  getLoc(E->getExprLoc()), argValue,
+                  cir::PointerType::get(expectedPtrTy,
+                                        expectedPtrTy.getAddrSpace()));
+            }
+          }
+        }
+        // TODO(cir): Cast vector type (e.g., v256i32) to x86_amx, this only
+        // happens in amx intrinsics.
+        argValue = builder.createBitcast(argValue, expectedTy);
+      }
 
-      args.push_back(arg);
+      args.push_back(argValue);
     }
 
     auto intrinsicCall = builder.create<cir::LLVMIntrinsicCallOp>(
diff --git a/clang/test/CIR/CodeGen/CUDA/builtins-nvptx-ptx60.cu b/clang/test/CIR/CodeGen/CUDA/builtins-nvptx-ptx60.cu
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_70 \
+// RUN:            -fcuda-is-device -target-feature +ptx60 \
+// RUN:            -emit-cir -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CIR %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx65 \
+// RUN:            -emit-cir -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CIR %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx70 \
+// RUN:            -emit-cir -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CIR %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_70 \
+// RUN:            -fcuda-is-device -target-feature +ptx60 \
+// RUN:            -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=LLVM %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx65 \
+// RUN:            -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=LLVM %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx70 \
+// RUN:            -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=LLVM %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_70 \
+// RUN:            -fcuda-is-device -target-feature +ptx60 \
+// RUN:            -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=OGCHECK %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx65 \
+// RUN:            -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=OGCHECK %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx70 \
+// RUN:            -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=OGCHECK %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+typedef unsigned long long uint64_t;
+
+__device__ void nvvm_sync(unsigned mask, int i, float f, int a, int b,
+                          bool pred, uint64_t i64) {
+
+  // CIR: cir.llvm.intrinsic "nvvm.bar.warp.sync" {{.*}} : (!u32i)
+  // LLVM: call void @llvm.nvvm.bar.warp.sync(i32
+  // OGCHECK: call void @llvm.nvvm.bar.warp.sync(i32
+  __nvvm_bar_warp_sync(mask);
+
+
+  // CHECK: ret void
+}