cilium
diff --git a/‎asm/instruction.go
Lines changed: 15 additions & 6 deletions b/‎asm/instruction.go
Lines changed: 15 additions & 6 deletions
diff --git a/‎asm/instruction_test.go
Lines changed: 94 additions & 0 deletions b/‎asm/instruction_test.go
Lines changed: 94 additions & 0 deletions
diff --git a/‎asm/load_store.go
Lines changed: 115 additions & 8 deletions b/‎asm/load_store.go
Lines changed: 115 additions & 8 deletions
diff --git a/‎asm/load_store_string.go
Lines changed: 2 additions & 2 deletions b/‎asm/load_store_string.go
Lines changed: 2 additions & 2 deletions
@@ -62,6 +62,10 @@ func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder, platform str
 
 	ins.Offset = int16(bo.Uint16(data[2:4]))
 
+	// Convert to int32 before widening to int64
+	// to ensure the signed bit is carried over.
+	ins.Constant = int64(int32(bo.Uint32(data[4:8])))
+
 	if ins.IsBuiltinCall() {
 		fn, err := BuiltinFuncForPlatform(platform, uint32(ins.Constant))
 		if err != nil {
@@ -93,12 +97,14 @@ func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder, platform str
 				ins.Offset = 0
 			}
 		}
+	} else if ins.OpCode.Class() == StXClass &&
+		ins.OpCode.Mode() == AtomicMode {
+		// For atomic ops, part of the opcode is stored in the
+		// constant field. Shift over 8 bytes so we can OR with the actual opcode and
+		// apply `atomicMask` to avoid merging unknown bits that may be added in the future.
+		ins.OpCode |= (OpCode((ins.Constant << 8)) & atomicMask)
 	}
 
-	// Convert to int32 before widening to int64
-	// to ensure the signed bit is carried over.
-	ins.Constant = int64(int32(bo.Uint32(data[4:8])))
-
 	if !ins.OpCode.IsDWordLoad() {
 		return nil
 	}
@@ -171,6 +177,9 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
 			return 0, fmt.Errorf("extended ALU opcodes should have an .Offset of 0: %s", ins)
 		}
 		ins.Offset = newOffset
+	} else if atomic := ins.OpCode.AtomicOp(); atomic != InvalidAtomic {
+		ins.OpCode = ins.OpCode &^ atomicMask
+		ins.Constant = int64(atomic >> 8)
 	}
 
 	op, err := ins.OpCode.bpfOpCode()
@@ -382,8 +391,8 @@ func (ins Instruction) Format(f fmt.State, c rune) {
 			fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant)
 		case MemMode, MemSXMode:
 			fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant)
-		case XAddMode:
-			fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src)
+		case AtomicMode:
+			fmt.Fprintf(f, "dst: %s src: %s off: %d", ins.Dst, ins.Src, ins.Offset)
 		}
 
 	case cls.IsALU():
 
@@ -328,6 +328,82 @@ func (t testFDer) FD() int {
 	return int(t)
 }
 
+func TestAtomics(t *testing.T) {
+	rawInsns := []byte{
+		0xc3, 0x21, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // lock *(u32 *)(r1 + 0x1) += w2
+		0xc3, 0x21, 0x01, 0x00, 0x50, 0x00, 0x00, 0x00, // lock *(u32 *)(r1 + 0x1) &= w2
+		0xc3, 0x21, 0x01, 0x00, 0xa0, 0x00, 0x00, 0x00, // lock *(u32 *)(r1 + 0x1) ^= w2
+		0xc3, 0x21, 0x01, 0x00, 0x40, 0x00, 0x00, 0x00, // lock *(u32 *)(r1 + 0x1) |= w2
+
+		0xdb, 0x21, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // lock *(u64 *)(r1 + 0x1) += r2
+		0xdb, 0x21, 0x01, 0x00, 0x50, 0x00, 0x00, 0x00, // lock *(u64 *)(r1 + 0x1) &= r2
+		0xdb, 0x21, 0x01, 0x00, 0xa0, 0x00, 0x00, 0x00, // lock *(u64 *)(r1 + 0x1) ^= r2
+		0xdb, 0x21, 0x01, 0x00, 0x40, 0x00, 0x00, 0x00, // lock *(u64 *)(r1 + 0x1) |= r2
+
+		0xc3, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // w0 = atomic_fetch_add((u32 *)(r1 + 0x0), w0)
+		0xc3, 0x01, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, // w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0)
+		0xc3, 0x01, 0x00, 0x00, 0xa1, 0x00, 0x00, 0x00, // w0 = atomic_fetch_xor((u32 *)(r1 + 0x0), w0)
+		0xc3, 0x01, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, // w0 = atomic_fetch_or((u32 *)(r1 + 0x0), w0)
+
+		0xdb, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // r0 = atomic_fetch_add((u64 *)(r1 + 0x0), r0)
+		0xdb, 0x01, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, // r0 = atomic_fetch_and((u64 *)(r1 + 0x0), r0)
+		0xdb, 0x01, 0x00, 0x00, 0xa1, 0x00, 0x00, 0x00, // r0 = atomic_fetch_xor((u64 *)(r1 + 0x0), r0)
+		0xdb, 0x01, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, // r0 = atomic_fetch_or((u64 *)(r1 + 0x0), r0)
+
+		0xc3, 0x01, 0x00, 0x00, 0xe1, 0x00, 0x00, 0x00, // w0 = xchg32_32(r1 + 0x0, w0)
+		0xdb, 0x01, 0x00, 0x00, 0xe1, 0x00, 0x00, 0x00, // r0 = xchg_64(r1 + 0x0, r0)
+
+		0xc3, 0x11, 0x00, 0x00, 0xf1, 0x00, 0x00, 0x00, // w0 = cmpxchg32_32(r1 + 0x0, w0, w1)
+		0xdb, 0x11, 0x00, 0x00, 0xf1, 0x00, 0x00, 0x00, // r0 = cmpxchg_64(r1 + 0x0, r0, r1)
+	}
+
+	insns, err := AppendInstructions(nil, bytes.NewReader(rawInsns), binary.LittleEndian, platform.Linux)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	lines := []string{
+		"StXAtomicAddW dst: r1 src: r2 off: 1",
+		"StXAtomicAndW dst: r1 src: r2 off: 1",
+		"StXAtomicXorW dst: r1 src: r2 off: 1",
+		"StXAtomicOrW dst: r1 src: r2 off: 1",
+		"StXAtomicAddDW dst: r1 src: r2 off: 1",
+		"StXAtomicAndDW dst: r1 src: r2 off: 1",
+		"StXAtomicXorDW dst: r1 src: r2 off: 1",
+		"StXAtomicOrDW dst: r1 src: r2 off: 1",
+		"StXAtomicFetchAddW dst: r1 src: r0 off: 0",
+		"StXAtomicFetchAndW dst: r1 src: r0 off: 0",
+		"StXAtomicFetchXorW dst: r1 src: r0 off: 0",
+		"StXAtomicFetchOrW dst: r1 src: r0 off: 0",
+		"StXAtomicFetchAddDW dst: r1 src: r0 off: 0",
+		"StXAtomicFetchAndDW dst: r1 src: r0 off: 0",
+		"StXAtomicFetchXorDW dst: r1 src: r0 off: 0",
+		"StXAtomicFetchOrDW dst: r1 src: r0 off: 0",
+		"StXAtomicXchgW dst: r1 src: r0 off: 0",
+		"StXAtomicXchgDW dst: r1 src: r0 off: 0",
+		"StXAtomicCmpXchgW dst: r1 src: r1 off: 0",
+		"StXAtomicCmpXchgDW dst: r1 src: r1 off: 0",
+	}
+
+	for i, ins := range insns {
+		if want, got := lines[i], fmt.Sprint(ins); want != got {
+			t.Errorf("Expected %q, got %q", want, got)
+		}
+	}
+
+	// Marshal and unmarshal again to make sure the instructions are
+	// still valid.
+	var buf bytes.Buffer
+	err = insns.Marshal(&buf, binary.LittleEndian)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !bytes.Equal(buf.Bytes(), rawInsns) {
+		t.Error("Expected instructions to be equal after marshalling")
+	}
+}
+
 func TestISAv4(t *testing.T) {
 	rawInsns := []byte{
 		0xd7, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, // r1 = bswap16 r1
@@ -355,6 +431,16 @@ func TestISAv4(t *testing.T) {
 
 		0x3c, 0x31, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // w1 s/= w3
 		0x9c, 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // w2 s%= w4
+
+		0xd3, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // w0 = load_acquire((u8 *)(r1 + 0x0))
+		0xcb, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // w0 = load_acquire((u16 *)(r1 + 0x0))
+		0xc3, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // w0 = load_acquire((u32 *)(r1 + 0x0))
+		0xdb, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // r0 = load_acquire((u64 *)(r1 + 0x0))
+
+		0xd3, 0x21, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, // store_release((u8 *)(r1 + 0x0), w2)
+		0xcb, 0x21, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, // store_release((u16 *)(r1 + 0x0), w2)
+		0xc3, 0x21, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, // store_release((u32 *)(r1 + 0x0), w2)
+		0xdb, 0x21, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, // store_release((u64 *)(r1 + 0x0), r2)
 	}
 
 	insns, err := AppendInstructions(nil, bytes.NewReader(rawInsns), binary.LittleEndian, platform.Linux)
@@ -381,6 +467,14 @@ func TestISAv4(t *testing.T) {
 		"SModReg dst: r2 src: r4",
 		"SDivReg32 dst: r1 src: r3",
 		"SModReg32 dst: r2 src: r4",
+		"StXAtomicLdAcqB dst: r0 src: r1 off: 0",
+		"StXAtomicLdAcqH dst: r0 src: r1 off: 0",
+		"StXAtomicLdAcqW dst: r0 src: r1 off: 0",
+		"StXAtomicLdAcqDW dst: r0 src: r1 off: 0",
+		"StXAtomicStRelB dst: r1 src: r2 off: 0",
+		"StXAtomicStRelH dst: r1 src: r2 off: 0",
+		"StXAtomicStRelW dst: r1 src: r2 off: 0",
+		"StXAtomicStRelDW dst: r1 src: r2 off: 0",
 	}
 
 	for i, ins := range insns {
 
@@ -1,5 +1,7 @@
 package asm
 
+import "fmt"
+
 //go:generate go run golang.org/x/tools/cmd/stringer@latest -output load_store_string.go -type=Mode,Size
 
 // Mode for load and store operations
@@ -26,10 +28,119 @@ const (
 	MemMode Mode = 0x60
 	// MemSXMode - load from memory, sign extension
 	MemSXMode Mode = 0x80
-	// XAddMode - add atomically across processors.
-	XAddMode Mode = 0xc0
+	// AtomicMode - add atomically across processors.
+	AtomicMode Mode = 0xc0
 )
 
+const atomicMask OpCode = 0x0001_ff00
+
+type AtomicOp uint32
+
+const (
+	InvalidAtomic AtomicOp = 0xffff_ffff
+
+	// AddAtomic - add src to memory address dst atomically
+	AddAtomic AtomicOp = AtomicOp(Add) << 8
+	// FetchAdd - add src to memory address dst atomically, store result in src
+	FetchAdd AtomicOp = AddAtomic | fetch
+	// AndAtomic - bitwise AND src with memory address at dst atomically
+	AndAtomic AtomicOp = AtomicOp(And) << 8
+	// FetchAnd - bitwise AND src with memory address at dst atomically, store result in src
+	FetchAnd AtomicOp = AndAtomic | fetch
+	// OrAtomic - bitwise OR src with memory address at dst atomically
+	OrAtomic AtomicOp = AtomicOp(Or) << 8
+	// FetchOr - bitwise OR src with memory address at dst atomically, store result in src
+	FetchOr AtomicOp = OrAtomic | fetch
+	// XorAtomic - bitwise XOR src with memory address at dst atomically
+	XorAtomic AtomicOp = AtomicOp(Xor) << 8
+	// FetchXor - bitwise XOR src with memory address at dst atomically, store result in src
+	FetchXor AtomicOp = XorAtomic | fetch
+
+	// Xchg - atomically exchange the old value with the new value
+	//
+	// src gets populated with the old value of *(size *)(dst + offset).
+	Xchg AtomicOp = 0x0000_e000 | fetch
+	// CmpXchg - atomically compare and exchange the old value with the new value
+	//
+	// Compares R0 and *(size *)(dst + offset), writes src to *(size *)(dst + offset) on match.
+	// R0 gets populated with the old value of *(size *)(dst + offset), even if no exchange occurs.
+	CmpXchg AtomicOp = 0x0000_f000 | fetch
+
+	// fetch modifier for copy-modify-write atomics
+	fetch AtomicOp = 0x0000_0100
+	// loadAcquire - atomically load with acquire semantics
+	loadAcquire AtomicOp = 0x0001_0000
+	// storeRelease - atomically store with release semantics
+	storeRelease AtomicOp = 0x0001_1000
+)
+
+func (op AtomicOp) String() string {
+	var name string
+	switch op {
+	case AddAtomic, AndAtomic, OrAtomic, XorAtomic:
+		name = ALUOp(op >> 8).String()
+	case FetchAdd, FetchAnd, FetchOr, FetchXor:
+		name = "Fetch" + ALUOp((op^fetch)>>8).String()
+	case Xchg:
+		name = "Xchg"
+	case CmpXchg:
+		name = "CmpXchg"
+	case loadAcquire:
+		name = "LdAcq"
+	case storeRelease:
+		name = "StRel"
+	default:
+		name = fmt.Sprintf("AtomicOp(%#x)", uint32(op))
+	}
+
+	return name
+}
+
+func (op AtomicOp) OpCode(size Size) OpCode {
+	switch op {
+	case AddAtomic, AndAtomic, OrAtomic, XorAtomic,
+		FetchAdd, FetchAnd, FetchOr, FetchXor,
+		Xchg, CmpXchg:
+		switch size {
+		case Byte, Half:
+			// 8-bit and 16-bit atomic copy-modify-write atomics are not supported
+			return InvalidOpCode
+		}
+	}
+
+	return OpCode(StXClass).SetMode(AtomicMode).SetSize(size).SetAtomicOp(op)
+}
+
+// Mem emits `*(size *)(dst + offset) (op) src`.
+func (op AtomicOp) Mem(dst, src Register, size Size, offset int16) Instruction {
+	return Instruction{
+		OpCode: op.OpCode(size),
+		Dst:    dst,
+		Src:    src,
+		Offset: offset,
+	}
+}
+
+// Emits `lock-acquire dst = *(size *)(src + offset)`.
+func LoadAcquire(dst, src Register, size Size, offset int16) Instruction {
+	return Instruction{
+		OpCode: loadAcquire.OpCode(size),
+		Dst:    dst,
+		Src:    src,
+		Offset: offset,
+	}
+}
+
+// Emits `lock-release *(size *)(dst + offset) = src`.
+func StoreRelease(dst, src Register, size Size, offset int16) Instruction {
+	return Instruction{
+		OpCode: storeRelease.OpCode(size),
+		Dst:    dst,
+		Src:    src,
+		Offset: offset,
+	}
+}
+
 // Size of load and store operations
 //
 //	msb      lsb
@@ -212,14 +323,10 @@ func StoreImm(dst Register, offset int16, value int64, size Size) Instruction {
 
 // StoreXAddOp returns the OpCode to atomically add a register to a value in memory.
 func StoreXAddOp(size Size) OpCode {
-	return OpCode(StXClass).SetMode(XAddMode).SetSize(size)
+	return AddAtomic.OpCode(size)
 }
 
 // StoreXAdd atomically adds src to *dst.
 func StoreXAdd(dst, src Register, size Size) Instruction {
-	return Instruction{
-		OpCode: StoreXAddOp(size),
-		Dst:    dst,
-		Src:    src,
-	}
+	return AddAtomic.Mem(dst, src, size, 0)
 }