Skip to content

Commit 39cb448

Browse files
fix: ensure to compile asm only on x86_64
1 parent 5bfc9d6 commit 39cb448

File tree

3 files changed

+132
-84
lines changed

3 files changed

+132
-84
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ edition = "2018"
1212

1313
[dependencies]
1414
byteorder = "1"
15-
fff_derive = { version = "0.2.0", path = "ff_derive", optional = true }
15+
fff_derive = { version = "0.2.1", path = "ff_derive", optional = true }
1616
rand_core = "0.5"
1717
lazy_static = "1.4.0"
1818

@@ -23,5 +23,5 @@ derive = ["fff_derive"]
2323
[badges]
2424
maintenance = { status = "actively-developed" }
2525

26-
[build-dependencies]
26+
[target.'cfg(target_arch = "x86_64")'.build-dependencies]
2727
cc = "1.0.50"

build.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
#[cfg(target_arch = "x86_64")]
2-
extern crate cc;
3-
41
fn main() {
5-
if cfg!(target_arch = "x86_64") {
2+
let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap();
3+
if target_arch == "x86_64" {
64
cc::Build::new()
75
.flag("-c")
86
.file("./asm/mul_4.S")

ff_derive/src/lib.rs

Lines changed: 128 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ fn prime_field_impl(
743743
limbs: usize,
744744
modulus_raw: &str,
745745
) -> proc_macro2::TokenStream {
746-
if limbs == 4 && modulus_raw == BLS_381_FR_MODULUS && cfg!(target_arch = "x86_64") {
746+
if limbs == 4 && modulus_raw == BLS_381_FR_MODULUS {
747747
mul_impl_asm4(a, b)
748748
} else {
749749
mul_impl_default(a, b, limbs)
@@ -755,14 +755,20 @@ fn prime_field_impl(
755755
b: proc_macro2::TokenStream,
756756
) -> proc_macro2::TokenStream {
757757
// x86_64 asm for four limbs
758-
759758
let default_impl = mul_impl_default(a.clone(), b.clone(), 4);
760759

761760
let mut gen = proc_macro2::TokenStream::new();
762761
gen.extend(quote! {
763-
if *::fff::CPU_SUPPORTS_ADX_INSTRUCTION {
764-
::fff::mod_mul_4w_assign(&mut (#a.0).0, &(#b.0).0);
765-
} else {
762+
#[cfg(target_arch = "x86_64")]
763+
{
764+
if *::fff::CPU_SUPPORTS_ADX_INSTRUCTION {
765+
::fff::mod_mul_4w_assign(&mut (#a.0).0, &(#b.0).0);
766+
} else {
767+
#default_impl
768+
}
769+
}
770+
#[cfg(not(target_arch = "x86_64"))]
771+
{
766772
#default_impl
767773
}
768774
});
@@ -816,8 +822,124 @@ fn prime_field_impl(
816822
gen
817823
}
818824

825+
fn add_assign_impl(
826+
a: proc_macro2::TokenStream,
827+
b: proc_macro2::TokenStream,
828+
limbs: usize,
829+
) -> proc_macro2::TokenStream {
830+
if limbs == 4 {
831+
add_assign_asm_impl(a, b, limbs)
832+
} else {
833+
add_assign_default_impl(a, b, limbs)
834+
}
835+
}
836+
837+
fn add_assign_asm_impl(
838+
a: proc_macro2::TokenStream,
839+
b: proc_macro2::TokenStream,
840+
limbs: usize,
841+
) -> proc_macro2::TokenStream {
842+
let mut gen = proc_macro2::TokenStream::new();
843+
let default_impl = add_assign_default_impl(a.clone(), b.clone(), limbs);
844+
845+
gen.extend(quote! {
846+
#[cfg(target_arch = "x86_64")]
847+
{
848+
// This cannot exceed the backing capacity.
849+
use std::arch::x86_64::*;
850+
use std::mem;
851+
852+
unsafe {
853+
let mut carry = _addcarry_u64(
854+
0,
855+
(#a.0).0[0],
856+
(#b.0).0[0],
857+
&mut (#a.0).0[0]
858+
);
859+
carry = _addcarry_u64(
860+
carry, (#a.0).0[1],
861+
(#b.0).0[1],
862+
&mut (#a.0).0[1]
863+
);
864+
carry = _addcarry_u64(
865+
carry, (#a.0).0[2],
866+
(#b.0).0[2],
867+
&mut (#a.0).0[2]
868+
);
869+
_addcarry_u64(
870+
carry,
871+
(#a.0).0[3],
872+
(#b.0).0[3],
873+
&mut (#a.0).0[3]
874+
);
875+
876+
let mut s_sub: [u64; 4] = mem::uninitialized();
877+
878+
carry = _subborrow_u64(
879+
0,
880+
(#a.0).0[0],
881+
MODULUS.0[0],
882+
&mut s_sub[0]
883+
);
884+
carry = _subborrow_u64(
885+
carry,
886+
(#a.0).0[1],
887+
MODULUS.0[1],
888+
&mut s_sub[1]
889+
);
890+
carry = _subborrow_u64(
891+
carry,
892+
(#a.0).0[2],
893+
MODULUS.0[2],
894+
&mut s_sub[2]
895+
);
896+
carry = _subborrow_u64(
897+
carry,
898+
(#a.0).0[3],
899+
MODULUS.0[3],
900+
&mut s_sub[3]
901+
);
902+
903+
if carry == 0 {
904+
// Direct assign fails since size can be 4 or 6
905+
// Obviously code doesn't work at all for size 6
906+
// (#a).0 = s_sub;
907+
(#a.0).0[0] = s_sub[0];
908+
(#a.0).0[1] = s_sub[1];
909+
(#a.0).0[2] = s_sub[2];
910+
(#a.0).0[3] = s_sub[3];
911+
}
912+
}
913+
}
914+
#[cfg(not(target_arch = "x86_64"))]
915+
{
916+
#default_impl
917+
}
918+
});
919+
920+
gen
921+
}
922+
923+
fn add_assign_default_impl(
924+
a: proc_macro2::TokenStream,
925+
b: proc_macro2::TokenStream,
926+
_limbs: usize,
927+
) -> proc_macro2::TokenStream {
928+
let mut gen = proc_macro2::TokenStream::new();
929+
930+
gen.extend(quote! {
931+
// This cannot exceed the backing capacity.
932+
#a.0.add_nocarry(&#b.0);
933+
934+
// However, it may need to be reduced.
935+
#a.reduce();
936+
});
937+
gen
938+
}
939+
819940
let squaring_impl = sqr_impl(quote! {self}, limbs);
820941
let multiply_impl = mul_impl(quote! {self}, quote! {other}, limbs, modulus_raw);
942+
let add_assign = add_assign_impl(quote! {self}, quote! {other}, limbs);
821943
let montgomery_impl = mont_impl(limbs);
822944

823945
// (self.0).0[0], (self.0).0[1], ..., 0, 0, 0, 0, ...
@@ -962,79 +1084,7 @@ fn prime_field_impl(
9621084

9631085
#[inline]
9641086
fn add_assign(&mut self, other: &#name) {
965-
if #limbs == 4 && cfg!(target_arch = "x86_64") {
966-
// This cannot exceed the backing capacity.
967-
use std::arch::x86_64::*;
968-
use std::mem;
969-
970-
unsafe {
971-
let mut carry = _addcarry_u64(
972-
0,
973-
(self.0).0[0],
974-
(other.0).0[0],
975-
&mut (self.0).0[0]
976-
);
977-
carry = _addcarry_u64(
978-
carry, (self.0).0[1],
979-
(other.0).0[1],
980-
&mut (self.0).0[1]
981-
);
982-
carry = _addcarry_u64(
983-
carry, (self.0).0[2],
984-
(other.0).0[2],
985-
&mut (self.0).0[2]
986-
);
987-
_addcarry_u64(
988-
carry,
989-
(self.0).0[3],
990-
(other.0).0[3],
991-
&mut (self.0).0[3]
992-
);
993-
994-
let mut s_sub: [u64; 4] = mem::uninitialized();
995-
996-
carry = _subborrow_u64(
997-
0,
998-
(self.0).0[0],
999-
MODULUS.0[0],
1000-
&mut s_sub[0]
1001-
);
1002-
carry = _subborrow_u64(
1003-
carry,
1004-
(self.0).0[1],
1005-
MODULUS.0[1],
1006-
&mut s_sub[1]
1007-
);
1008-
carry = _subborrow_u64(
1009-
carry,
1010-
(self.0).0[2],
1011-
MODULUS.0[2],
1012-
&mut s_sub[2]
1013-
);
1014-
carry = _subborrow_u64(
1015-
carry,
1016-
(self.0).0[3],
1017-
MODULUS.0[3],
1018-
&mut s_sub[3]
1019-
);
1020-
1021-
if carry == 0 {
1022-
// Direct assign fails since size can be 4 or 6
1023-
// Obviously code doesn't work at all for size 6
1024-
// (self.0).0 = s_sub;
1025-
(self.0).0[0] = s_sub[0];
1026-
(self.0).0[1] = s_sub[1];
1027-
(self.0).0[2] = s_sub[2];
1028-
(self.0).0[3] = s_sub[3];
1029-
}
1030-
}
1031-
} else {
1032-
// This cannot exceed the backing capacity.
1033-
self.0.add_nocarry(&other.0);
1034-
1035-
// However, it may need to be reduced.
1036-
self.reduce();
1037-
}
1087+
#add_assign
10381088
}
10391089

10401090
#[inline]

0 commit comments

Comments
 (0)