Skip to content

Commit 9f66cb8

Browse files
authored
md4: Optimize compress to improve hash performance (#519)
1 parent 70a2b62 commit 9f66cb8

File tree

1 file changed

+52
-55
lines changed

1 file changed

+52
-55
lines changed

md4/src/lib.rs

Lines changed: 52 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,10 @@
3030
)]
3131
#![forbid(unsafe_code)]
3232
#![warn(rust_2018_idioms)]
33-
#![allow(clippy::many_single_char_names)]
3433

3534
pub use digest::{self, Digest};
3635

37-
use core::{convert::TryInto, fmt};
36+
use core::{convert::TryInto, fmt, num::Wrapping as W};
3837
#[cfg(feature = "oid")]
3938
use digest::const_oid::{AssociatedOid, ObjectIdentifier};
4039
use digest::{
@@ -47,10 +46,20 @@ use digest::{
4746
HashMarker, Output,
4847
};
4948

49+
type Wu32 = W<u32>;
50+
const S0: [Wu32; 4] = [
51+
W(0x6745_2301),
52+
W(0xEFCD_AB89),
53+
W(0x98BA_DCFE),
54+
W(0x1032_5476),
55+
];
56+
const K1: Wu32 = W(0x5A82_7999);
57+
const K2: Wu32 = W(0x6ED9_EBA1);
58+
5059
#[derive(Clone)]
5160
pub struct Md4Core {
52-
block_len: u64,
53-
state: [u32; 4],
61+
block_len: W<u64>,
62+
state: [Wu32; 4],
5463
}
5564

5665
impl HashMarker for Md4Core {}
@@ -70,7 +79,7 @@ impl OutputSizeUser for Md4Core {
7079
impl UpdateCore for Md4Core {
7180
#[inline]
7281
fn update_blocks(&mut self, blocks: &[Block<Self>]) {
73-
self.block_len = self.block_len.wrapping_add(blocks.len() as u64);
82+
self.block_len += W(blocks.len() as u64);
7483
for block in blocks {
7584
compress(&mut self.state, block);
7685
}
@@ -80,27 +89,25 @@ impl UpdateCore for Md4Core {
8089
impl FixedOutputCore for Md4Core {
8190
#[inline]
8291
fn finalize_fixed_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) {
83-
let bit_len = self
84-
.block_len
85-
.wrapping_mul(Self::BlockSize::U64)
86-
.wrapping_add(buffer.get_pos() as u64)
87-
.wrapping_mul(8);
92+
let tail_len = W(buffer.get_pos() as u64);
93+
let bytes_len = W(Self::BlockSize::U64) * self.block_len + tail_len;
94+
let bits_len = W(8) * bytes_len;
95+
8896
let mut state = self.state;
89-
buffer.len64_padding_le(bit_len, |block| compress(&mut state, block));
97+
buffer.len64_padding_le(bits_len.0, |block| compress(&mut state, block));
9098

9199
for (chunk, v) in out.chunks_exact_mut(4).zip(state.iter()) {
92-
chunk.copy_from_slice(&v.to_le_bytes());
100+
chunk.copy_from_slice(&v.0.to_le_bytes());
93101
}
94102
}
95103
}
96104

97105
impl Default for Md4Core {
98106
#[inline]
99107
fn default() -> Self {
100-
let state = [0x6745_2301, 0xEFCD_AB89, 0x98BA_DCFE, 0x1032_5476];
101108
Self {
102-
state,
103-
block_len: 0,
109+
state: S0,
110+
block_len: W(0),
104111
}
105112
}
106113
}
@@ -133,35 +140,25 @@ impl AssociatedOid for Md4Core {
133140
/// MD4 hasher state.
134141
pub type Md4 = CoreWrapper<Md4Core>;
135142

136-
fn compress(state: &mut [u32; 4], input: &Block<Md4Core>) {
137-
fn f(x: u32, y: u32, z: u32) -> u32 {
138-
(x & y) | (!x & z)
143+
fn compress(state: &mut [Wu32; 4], input: &Block<Md4Core>) {
144+
fn f(x: Wu32, y: Wu32, z: Wu32) -> Wu32 {
145+
z ^ (x & (y ^ z))
139146
}
140147

141-
fn g(x: u32, y: u32, z: u32) -> u32 {
148+
fn g(x: Wu32, y: Wu32, z: Wu32) -> Wu32 {
142149
(x & y) | (x & z) | (y & z)
143150
}
144151

145-
fn h(x: u32, y: u32, z: u32) -> u32 {
152+
fn h(x: Wu32, y: Wu32, z: Wu32) -> Wu32 {
146153
x ^ y ^ z
147154
}
148155

149-
fn op1(a: u32, b: u32, c: u32, d: u32, k: u32, s: u32) -> u32 {
150-
a.wrapping_add(f(b, c, d)).wrapping_add(k).rotate_left(s)
151-
}
152-
153-
fn op2(a: u32, b: u32, c: u32, d: u32, k: u32, s: u32) -> u32 {
154-
a.wrapping_add(g(b, c, d))
155-
.wrapping_add(k)
156-
.wrapping_add(0x5A82_7999)
157-
.rotate_left(s)
158-
}
159-
160-
fn op3(a: u32, b: u32, c: u32, d: u32, k: u32, s: u32) -> u32 {
161-
a.wrapping_add(h(b, c, d))
162-
.wrapping_add(k)
163-
.wrapping_add(0x6ED9_EBA1)
164-
.rotate_left(s)
156+
fn op<F>(f: F, a: Wu32, b: Wu32, c: Wu32, d: Wu32, k: Wu32, s: u32) -> Wu32
157+
where
158+
F: Fn(Wu32, Wu32, Wu32) -> Wu32,
159+
{
160+
let t = a + f(b, c, d) + k;
161+
W(t.0.rotate_left(s))
165162
}
166163

167164
let mut a = state[0];
@@ -170,37 +167,37 @@ fn compress(state: &mut [u32; 4], input: &Block<Md4Core>) {
170167
let mut d = state[3];
171168

172169
// load block to data
173-
let mut data = [0u32; 16];
170+
let mut data = [W(0u32); 16];
174171
for (o, chunk) in data.iter_mut().zip(input.chunks_exact(4)) {
175-
*o = u32::from_le_bytes(chunk.try_into().unwrap());
172+
*o = W(u32::from_le_bytes(chunk.try_into().unwrap()));
176173
}
177174

178175
// round 1
179176
for &i in &[0, 4, 8, 12] {
180-
a = op1(a, b, c, d, data[i], 3);
181-
d = op1(d, a, b, c, data[i + 1], 7);
182-
c = op1(c, d, a, b, data[i + 2], 11);
183-
b = op1(b, c, d, a, data[i + 3], 19);
177+
a = op(f, a, b, c, d, data[i], 3);
178+
d = op(f, d, a, b, c, data[i + 1], 7);
179+
c = op(f, c, d, a, b, data[i + 2], 11);
180+
b = op(f, b, c, d, a, data[i + 3], 19);
184181
}
185182

186183
// round 2
187-
for i in 0..4 {
188-
a = op2(a, b, c, d, data[i], 3);
189-
d = op2(d, a, b, c, data[i + 4], 5);
190-
c = op2(c, d, a, b, data[i + 8], 9);
191-
b = op2(b, c, d, a, data[i + 12], 13);
184+
for &i in &[0, 1, 2, 3] {
185+
a = op(g, a, b, c, d, data[i] + K1, 3);
186+
d = op(g, d, a, b, c, data[i + 4] + K1, 5);
187+
c = op(g, c, d, a, b, data[i + 8] + K1, 9);
188+
b = op(g, b, c, d, a, data[i + 12] + K1, 13);
192189
}
193190

194191
// round 3
195192
for &i in &[0, 2, 1, 3] {
196-
a = op3(a, b, c, d, data[i], 3);
197-
d = op3(d, a, b, c, data[i + 8], 9);
198-
c = op3(c, d, a, b, data[i + 4], 11);
199-
b = op3(b, c, d, a, data[i + 12], 15);
193+
a = op(h, a, b, c, d, data[i] + K2, 3);
194+
d = op(h, d, a, b, c, data[i + 8] + K2, 9);
195+
c = op(h, c, d, a, b, data[i + 4] + K2, 11);
196+
b = op(h, b, c, d, a, data[i + 12] + K2, 15);
200197
}
201198

202-
state[0] = state[0].wrapping_add(a);
203-
state[1] = state[1].wrapping_add(b);
204-
state[2] = state[2].wrapping_add(c);
205-
state[3] = state[3].wrapping_add(d);
199+
state[0] += a;
200+
state[1] += b;
201+
state[2] += c;
202+
state[3] += d;
206203
}

0 commit comments

Comments
 (0)