Skip to content

Commit 4e9adb7

Browse files
j-bermanjeffro256
andcommitted
crypto: fast fe_batch_invert using Montgomery's trick
https://iacr.org/archive/pkc2004/29470042/29470042.pdf 2.2 Co-authored-by: Jeffro <[email protected]>
1 parent 864430c commit 4e9adb7

File tree

6 files changed

+147
-0
lines changed

6 files changed

+147
-0
lines changed

src/crypto/crypto-ops.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,33 @@ void fe_invert(fe out, const fe z) {
313313
return;
314314
}
315315

316+
// Montgomery's trick
317+
// https://iacr.org/archive/pkc2004/29470042/29470042.pdf 2.2
318+
int fe_batch_invert(fe *out, const fe *in, const int n) {
319+
if (n == 0) {
320+
return 0;
321+
}
322+
323+
// Step 1: collect initial muls
324+
fe_copy(out[0], in[0]);
325+
for (int i = 1; i < n; ++i) {
326+
fe_mul(out[i], out[i-1], in[i]);
327+
}
328+
329+
// Step 2: get the inverse of all elems multiplied together
330+
fe a;
331+
fe_invert(a, out[n-1]);
332+
333+
// Step 3: get each inverse
334+
for (int i = n; i > 1; --i) {
335+
fe_mul(out[i-1], a, out[i-2]);
336+
fe_mul(a, a, in[i-1]);
337+
}
338+
fe_copy(out[0], a);
339+
340+
return 0;
341+
}
342+
316343
/* From fe_isnegative.c */
317344

318345
/*

src/crypto/crypto-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
164164
void fe_add(fe h, const fe f, const fe g);
165165
void fe_tobytes(unsigned char *, const fe);
166166
void fe_invert(fe out, const fe z);
167+
int fe_batch_invert(fe *out, const fe *in, const int n);
167168
void fe_mul(fe out, const fe, const fe);
168169
void fe_0(fe h);
169170

tests/performance_tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ set(performance_tests_headers
3636
construct_tx.h
3737
derive_public_key.h
3838
derive_secret_key.h
39+
fe_batch_invert.h
3940
ge_frombytes_vartime.h
4041
generate_key_derivation.h
4142
generate_key_image.h
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Copyright (c) 2025, The Monero Project
2+
//
3+
// All rights reserved.
4+
//
5+
// Redistribution and use in source and binary forms, with or without modification, are
6+
// permitted provided that the following conditions are met:
7+
//
8+
// 1. Redistributions of source code must retain the above copyright notice, this list of
9+
// conditions and the following disclaimer.
10+
//
11+
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
12+
// of conditions and the following disclaimer in the documentation and/or other
13+
// materials provided with the distribution.
14+
//
15+
// 3. Neither the name of the copyright holder nor the names of its contributors may be
16+
// used to endorse or promote products derived from this software without specific
17+
// prior written permission.
18+
//
19+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
20+
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21+
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22+
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26+
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
27+
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
//
29+
// Parts of this file are originally copyright (c) 2012-2013 The Cryptonote developers
30+
31+
#pragma once
32+
33+
#include "crypto/crypto.h"
34+
35+
template<bool batched>
36+
class test_fe_batch_invert
37+
{
38+
public:
39+
static const size_t loop_count = 50;
40+
static const size_t n_elems = 1000;
41+
42+
bool init()
43+
{
44+
m_fes = (fe *) malloc(n_elems * sizeof(fe));
45+
46+
for (std::size_t i = 0; i < n_elems; ++i)
47+
{
48+
crypto::secret_key r;
49+
crypto::random32_unbiased((unsigned char*)r.data);
50+
51+
ge_p3 point;
52+
ge_scalarmult_base(&point, (unsigned char*)r.data);
53+
54+
memcpy(m_fes[i], &point.Y, sizeof(fe));
55+
}
56+
57+
return true;
58+
}
59+
60+
bool test()
61+
{
62+
fe *inv_fes = (fe *) malloc(n_elems * sizeof(fe));
63+
64+
if (batched)
65+
fe_batch_invert(inv_fes, m_fes, n_elems);
66+
else
67+
{
68+
for (std::size_t i = 0; i < n_elems; ++i)
69+
fe_invert(inv_fes[i], m_fes[i]);
70+
}
71+
72+
free(inv_fes);
73+
74+
return true;
75+
}
76+
77+
private:
78+
fe *m_fes;
79+
};

tests/performance_tests/main.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "derive_public_key.h"
4444
#include "derive_secret_key.h"
4545
#include "derive_view_tag.h"
46+
#include "fe_batch_invert.h"
4647
#include "ge_frombytes_vartime.h"
4748
#include "ge_tobytes.h"
4849
#include "generate_key_derivation.h"
@@ -198,6 +199,8 @@ int main(int argc, char** argv)
198199
TEST_PERFORMANCE0(filter, p, test_generate_key_image);
199200
TEST_PERFORMANCE0(filter, p, test_derive_public_key);
200201
TEST_PERFORMANCE0(filter, p, test_derive_secret_key);
202+
TEST_PERFORMANCE1(filter, p, test_fe_batch_invert, true); // batched
203+
TEST_PERFORMANCE1(filter, p, test_fe_batch_invert, false); // individual inversions
201204
TEST_PERFORMANCE0(filter, p, test_ge_frombytes_vartime);
202205
TEST_PERFORMANCE0(filter, p, test_ge_tobytes);
203206
TEST_PERFORMANCE0(filter, p, test_generate_keypair);

tests/unit_tests/crypto.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,3 +345,39 @@ TEST(Crypto, generator_consistency)
345345
// ringct/rctTypes.h
346346
ASSERT_TRUE(memcmp(H.data, rct::H.bytes, 32) == 0);
347347
}
348+
349+
TEST(Crypto, batch_inversion)
350+
{
351+
const std::size_t MAX_TEST_ELEMS = 1000;
352+
353+
// Memory allocator
354+
auto alloc = [](const std::size_t n) -> fe*
355+
{
356+
fe *ptr = (fe *) malloc(n * sizeof(fe));
357+
if (!ptr)
358+
throw std::runtime_error("failed to malloc fe *");
359+
return ptr;
360+
};
361+
362+
// Init test elems and individual inversions
363+
fe *init_elems = alloc(MAX_TEST_ELEMS);
364+
fe *norm_inverted = alloc(MAX_TEST_ELEMS);
365+
for (std::size_t i = 0; i < MAX_TEST_ELEMS; ++i)
366+
{
367+
const cryptonote::keypair kp = cryptonote::keypair::generate(hw::get_device("default"));
368+
ASSERT_EQ(fe_frombytes_vartime(init_elems[i], (unsigned char*)kp.pub.data), 0);
369+
fe_invert(norm_inverted[i], init_elems[i]);
370+
}
371+
372+
// Do batch inversions and compare to individual inversions
373+
for (std::size_t n_elems = 1; n_elems <= MAX_TEST_ELEMS; ++n_elems)
374+
{
375+
fe *batch_inverted = alloc(n_elems);
376+
ASSERT_EQ(fe_batch_invert(batch_inverted, init_elems, n_elems), 0);
377+
ASSERT_EQ(memcmp(batch_inverted, norm_inverted, n_elems * sizeof(fe)), 0);
378+
free(batch_inverted);
379+
}
380+
381+
free(init_elems);
382+
free(norm_inverted);
383+
}

0 commit comments

Comments
 (0)