@@ -743,7 +743,7 @@ fn prime_field_impl(
743
743
limbs : usize ,
744
744
modulus_raw : & str ,
745
745
) -> proc_macro2:: TokenStream {
746
- if limbs == 4 && modulus_raw == BLS_381_FR_MODULUS && cfg ! ( target_arch = "x86_64" ) {
746
+ if limbs == 4 && modulus_raw == BLS_381_FR_MODULUS {
747
747
mul_impl_asm4 ( a, b)
748
748
} else {
749
749
mul_impl_default ( a, b, limbs)
@@ -755,14 +755,20 @@ fn prime_field_impl(
755
755
b : proc_macro2:: TokenStream ,
756
756
) -> proc_macro2:: TokenStream {
757
757
// x86_64 asm for four limbs
758
-
759
758
let default_impl = mul_impl_default ( a. clone ( ) , b. clone ( ) , 4 ) ;
760
759
761
760
let mut gen = proc_macro2:: TokenStream :: new ( ) ;
762
761
gen. extend ( quote ! {
763
- if * :: fff:: CPU_SUPPORTS_ADX_INSTRUCTION {
764
- :: fff:: mod_mul_4w_assign( & mut ( #a. 0 ) . 0 , & ( #b. 0 ) . 0 ) ;
765
- } else {
762
+ #[ cfg( target_arch = "x86_64" ) ]
763
+ {
764
+ if * :: fff:: CPU_SUPPORTS_ADX_INSTRUCTION {
765
+ :: fff:: mod_mul_4w_assign( & mut ( #a. 0 ) . 0 , & ( #b. 0 ) . 0 ) ;
766
+ } else {
767
+ #default_impl
768
+ }
769
+ }
770
+ #[ cfg( not( target_arch = "x86_64" ) ) ]
771
+ {
766
772
#default_impl
767
773
}
768
774
} ) ;
@@ -816,8 +822,124 @@ fn prime_field_impl(
816
822
gen
817
823
}
818
824
825
+ fn add_assign_impl (
826
+ a : proc_macro2:: TokenStream ,
827
+ b : proc_macro2:: TokenStream ,
828
+ limbs : usize ,
829
+ ) -> proc_macro2:: TokenStream {
830
+ if limbs == 4 {
831
+ add_assign_asm_impl ( a, b, limbs)
832
+ } else {
833
+ add_assign_default_impl ( a, b, limbs)
834
+ }
835
+ }
836
+
837
+ fn add_assign_asm_impl (
838
+ a : proc_macro2:: TokenStream ,
839
+ b : proc_macro2:: TokenStream ,
840
+ limbs : usize ,
841
+ ) -> proc_macro2:: TokenStream {
842
+ let mut gen = proc_macro2:: TokenStream :: new ( ) ;
843
+ let default_impl = add_assign_default_impl ( a. clone ( ) , b. clone ( ) , limbs) ;
844
+
845
+ gen. extend ( quote ! {
846
+ #[ cfg( target_arch = "x86_64" ) ]
847
+ {
848
+ // This cannot exceed the backing capacity.
849
+ use std:: arch:: x86_64:: * ;
850
+ use std:: mem;
851
+
852
+ unsafe {
853
+ let mut carry = _addcarry_u64(
854
+ 0 ,
855
+ ( #a. 0 ) . 0 [ 0 ] ,
856
+ ( #b. 0 ) . 0 [ 0 ] ,
857
+ & mut ( #a. 0 ) . 0 [ 0 ]
858
+ ) ;
859
+ carry = _addcarry_u64(
860
+ carry, ( #a. 0 ) . 0 [ 1 ] ,
861
+ ( #b. 0 ) . 0 [ 1 ] ,
862
+ & mut ( #a. 0 ) . 0 [ 1 ]
863
+ ) ;
864
+ carry = _addcarry_u64(
865
+ carry, ( #a. 0 ) . 0 [ 2 ] ,
866
+ ( #b. 0 ) . 0 [ 2 ] ,
867
+ & mut ( #a. 0 ) . 0 [ 2 ]
868
+ ) ;
869
+ _addcarry_u64(
870
+ carry,
871
+ ( #a. 0 ) . 0 [ 3 ] ,
872
+ ( #b. 0 ) . 0 [ 3 ] ,
873
+ & mut ( #a. 0 ) . 0 [ 3 ]
874
+ ) ;
875
+
876
+ let mut s_sub: [ u64 ; 4 ] = mem:: uninitialized( ) ;
877
+
878
+ carry = _subborrow_u64(
879
+ 0 ,
880
+ ( #a. 0 ) . 0 [ 0 ] ,
881
+ MODULUS . 0 [ 0 ] ,
882
+ & mut s_sub[ 0 ]
883
+ ) ;
884
+ carry = _subborrow_u64(
885
+ carry,
886
+ ( #a. 0 ) . 0 [ 1 ] ,
887
+ MODULUS . 0 [ 1 ] ,
888
+ & mut s_sub[ 1 ]
889
+ ) ;
890
+ carry = _subborrow_u64(
891
+ carry,
892
+ ( #a. 0 ) . 0 [ 2 ] ,
893
+ MODULUS . 0 [ 2 ] ,
894
+ & mut s_sub[ 2 ]
895
+ ) ;
896
+ carry = _subborrow_u64(
897
+ carry,
898
+ ( #a. 0 ) . 0 [ 3 ] ,
899
+ MODULUS . 0 [ 3 ] ,
900
+ & mut s_sub[ 3 ]
901
+ ) ;
902
+
903
+ if carry == 0 {
904
+ // Direct assign fails since size can be 4 or 6
905
+ // Obviously code doesn't work at all for size 6
906
+ // (#a).0 = s_sub;
907
+ ( #a. 0 ) . 0 [ 0 ] = s_sub[ 0 ] ;
908
+ ( #a. 0 ) . 0 [ 1 ] = s_sub[ 1 ] ;
909
+ ( #a. 0 ) . 0 [ 2 ] = s_sub[ 2 ] ;
910
+ ( #a. 0 ) . 0 [ 3 ] = s_sub[ 3 ] ;
911
+ }
912
+ }
913
+ }
914
+ #[ cfg( not( target_arch = "x86_64" ) ) ]
915
+ {
916
+ #default_impl
917
+ }
918
+ } ) ;
919
+
920
+ gen
921
+ }
922
+
923
+ fn add_assign_default_impl (
924
+ a : proc_macro2:: TokenStream ,
925
+ b : proc_macro2:: TokenStream ,
926
+ _limbs : usize ,
927
+ ) -> proc_macro2:: TokenStream {
928
+ let mut gen = proc_macro2:: TokenStream :: new ( ) ;
929
+
930
+ gen. extend ( quote ! {
931
+ // This cannot exceed the backing capacity.
932
+ #a. 0 . add_nocarry( & #b. 0 ) ;
933
+
934
+ // However, it may need to be reduced.
935
+ #a. reduce( ) ;
936
+ } ) ;
937
+ gen
938
+ }
939
+
819
940
let squaring_impl = sqr_impl ( quote ! { self } , limbs) ;
820
941
let multiply_impl = mul_impl ( quote ! { self } , quote ! { other} , limbs, modulus_raw) ;
942
+ let add_assign = add_assign_impl ( quote ! { self } , quote ! { other} , limbs) ;
821
943
let montgomery_impl = mont_impl ( limbs) ;
822
944
823
945
// (self.0).0[0], (self.0).0[1], ..., 0, 0, 0, 0, ...
@@ -962,79 +1084,7 @@ fn prime_field_impl(
962
1084
963
1085
#[ inline]
964
1086
fn add_assign( & mut self , other: & #name) {
965
- if #limbs == 4 && cfg!( target_arch = "x86_64" ) {
966
- // This cannot exceed the backing capacity.
967
- use std:: arch:: x86_64:: * ;
968
- use std:: mem;
969
-
970
- unsafe {
971
- let mut carry = _addcarry_u64(
972
- 0 ,
973
- ( self . 0 ) . 0 [ 0 ] ,
974
- ( other. 0 ) . 0 [ 0 ] ,
975
- & mut ( self . 0 ) . 0 [ 0 ]
976
- ) ;
977
- carry = _addcarry_u64(
978
- carry, ( self . 0 ) . 0 [ 1 ] ,
979
- ( other. 0 ) . 0 [ 1 ] ,
980
- & mut ( self . 0 ) . 0 [ 1 ]
981
- ) ;
982
- carry = _addcarry_u64(
983
- carry, ( self . 0 ) . 0 [ 2 ] ,
984
- ( other. 0 ) . 0 [ 2 ] ,
985
- & mut ( self . 0 ) . 0 [ 2 ]
986
- ) ;
987
- _addcarry_u64(
988
- carry,
989
- ( self . 0 ) . 0 [ 3 ] ,
990
- ( other. 0 ) . 0 [ 3 ] ,
991
- & mut ( self . 0 ) . 0 [ 3 ]
992
- ) ;
993
-
994
- let mut s_sub: [ u64 ; 4 ] = mem:: uninitialized( ) ;
995
-
996
- carry = _subborrow_u64(
997
- 0 ,
998
- ( self . 0 ) . 0 [ 0 ] ,
999
- MODULUS . 0 [ 0 ] ,
1000
- & mut s_sub[ 0 ]
1001
- ) ;
1002
- carry = _subborrow_u64(
1003
- carry,
1004
- ( self . 0 ) . 0 [ 1 ] ,
1005
- MODULUS . 0 [ 1 ] ,
1006
- & mut s_sub[ 1 ]
1007
- ) ;
1008
- carry = _subborrow_u64(
1009
- carry,
1010
- ( self . 0 ) . 0 [ 2 ] ,
1011
- MODULUS . 0 [ 2 ] ,
1012
- & mut s_sub[ 2 ]
1013
- ) ;
1014
- carry = _subborrow_u64(
1015
- carry,
1016
- ( self . 0 ) . 0 [ 3 ] ,
1017
- MODULUS . 0 [ 3 ] ,
1018
- & mut s_sub[ 3 ]
1019
- ) ;
1020
-
1021
- if carry == 0 {
1022
- // Direct assign fails since size can be 4 or 6
1023
- // Obviously code doesn't work at all for size 6
1024
- // (self.0).0 = s_sub;
1025
- ( self . 0 ) . 0 [ 0 ] = s_sub[ 0 ] ;
1026
- ( self . 0 ) . 0 [ 1 ] = s_sub[ 1 ] ;
1027
- ( self . 0 ) . 0 [ 2 ] = s_sub[ 2 ] ;
1028
- ( self . 0 ) . 0 [ 3 ] = s_sub[ 3 ] ;
1029
- }
1030
- }
1031
- } else {
1032
- // This cannot exceed the backing capacity.
1033
- self . 0 . add_nocarry( & other. 0 ) ;
1034
-
1035
- // However, it may need to be reduced.
1036
- self . reduce( ) ;
1037
- }
1087
+ #add_assign
1038
1088
}
1039
1089
1040
1090
#[ inline]
0 commit comments