@@ -81,7 +81,7 @@ func main() {
8181 o .maxSkip = 100
8282 o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm" , 17 , 14 , 7 , 7 , limit14B )
8383 o .maxSkip = 0
84- o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm64K" , 16 , 14 , 7 , 7 , 64 << 10 - 1 )
84+ o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm64K" , 16 , 13 , 7 , 7 , 64 << 10 - 1 )
8585 o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm12B" , 14 , 12 , 6 , 6 , limit12B )
8686 o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm10B" , 12 , 10 , 5 , 6 , limit10B )
8787 o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm8B" , 10 , 8 , 4 , 6 , limit8B )
@@ -146,6 +146,15 @@ func assert(fn func(ok LabelRef)) {
146146 }
147147}
148148
149+ type regTable struct {
150+ r reg.Register
151+ disp int
152+ }
153+
154+ func (r regTable ) Idx (idx reg.GPVirtual , scale uint8 ) Mem {
155+ return Mem {Base : r .r , Index : idx , Scale : scale , Disp : r .disp }
156+ }
157+
149158type options struct {
150159 snappy bool
151160 bmi1 bool
@@ -163,7 +172,15 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
163172 if o .skipOutput {
164173 dstTxt = ""
165174 }
166- TEXT (name , 0 , "func(" + dstTxt + "src []byte) int" )
175+
176+ var tableSize = 4 * (1 << tableBits )
177+ // Memzero needs at least 128 bytes.
178+ if tableSize < 128 {
179+ panic ("tableSize must be at least 128 bytes" )
180+ }
181+
182+ arrPtr := fmt .Sprintf (",tmp *[%d]byte" , tableSize )
183+ TEXT (name , 0 , "func(" + dstTxt + "src []byte" + arrPtr + ") int" )
167184 Doc (name + " encodes a non-empty src to a guaranteed-large-enough dst." ,
168185 fmt .Sprintf ("Maximum input %d bytes." , maxLen ),
169186 "It assumes that the varint-encoded length of the decompressed bytes has already been written." , "" )
@@ -173,7 +190,6 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
173190 o .maxOffset = maxLen - 1
174191 var literalMaxOverhead = maxLitOverheadFor (maxLen )
175192
176- var tableSize = 4 * (1 << tableBits )
177193 // Memzero needs at least 128 bytes.
178194 if tableSize < 128 {
179195 panic ("tableSize must be at least 128 bytes" )
@@ -209,8 +225,8 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
209225 // nextSTempL keeps nextS while other functions are being called.
210226 nextSTempL := AllocLocal (4 )
211227
212- // Alloc table last
213- table := AllocLocal ( tableSize )
228+ // Load pointer to temp table
229+ table := regTable { r : Load ( Param ( "tmp" ), GP64 ())}
214230
215231 dst := GP64 ()
216232 if ! o .skipOutput {
@@ -236,7 +252,7 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
236252 iReg := GP64 ()
237253 MOVQ (U32 (tableSize / 8 / 16 ), iReg )
238254 tablePtr := GP64 ()
239- LEAQ (table , tablePtr )
255+ MOVQ (table . r , tablePtr )
240256 zeroXmm := XMM ()
241257 PXOR (zeroXmm , zeroXmm )
242258
@@ -855,7 +871,17 @@ func maxLitOverheadFor(n int) int {
855871}
856872
857873func (o options ) genEncodeBetterBlockAsm (name string , lTableBits , sTableBits , skipLog , lHashBytes , maxLen int ) {
858- TEXT (name , 0 , "func(dst, src []byte) int" )
874+ var lTableSize = 4 * (1 << lTableBits )
875+ var sTableSize = 4 * (1 << sTableBits )
876+ tableSize := lTableSize + sTableSize
877+
878+ // Memzero needs at least 128 bytes.
879+ if tableSize < 128 {
880+ panic ("tableSize must be at least 128 bytes" )
881+ }
882+ arrPtr := fmt .Sprintf (", tmp *[%d]byte" , tableSize )
883+
884+ TEXT (name , 0 , "func(dst, src []byte" + arrPtr + ") int" )
859885 Doc (name + " encodes a non-empty src to a guaranteed-large-enough dst." ,
860886 fmt .Sprintf ("Maximum input %d bytes." , maxLen ),
861887 "It assumes that the varint-encoded length of the decompressed bytes has already been written." , "" )
@@ -870,9 +896,6 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
870896 o .maxLen = maxLen
871897 o .maxOffset = maxLen - 1
872898
873- var lTableSize = 4 * (1 << lTableBits )
874- var sTableSize = 4 * (1 << sTableBits )
875-
876899 // Memzero needs at least 128 bytes.
877900 if (lTableSize + sTableSize ) < 128 {
878901 panic ("tableSize must be at least 128 bytes" )
@@ -905,9 +928,9 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
905928 // nextSTempL keeps nextS while other functions are being called.
906929 nextSTempL := AllocLocal (4 )
907930
908- // Alloc table last, lTab must be before sTab.
909- lTab := AllocLocal ( lTableSize )
910- sTab := AllocLocal ( sTableSize )
931+ table := Load ( Param ( "tmp" ), GP64 ())
932+ lTab := regTable { r : table }
933+ sTab := regTable { r : table , disp : lTableSize }
911934
912935 dst := GP64 ()
913936 {
@@ -930,7 +953,7 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
930953 iReg := GP64 ()
931954 MOVQ (U32 ((sTableSize + lTableSize )/ 8 / 16 ), iReg )
932955 tablePtr := GP64 ()
933- LEAQ ( lTab , tablePtr )
956+ MOVQ ( table , tablePtr )
934957 zeroXmm := XMM ()
935958 PXOR (zeroXmm , zeroXmm )
936959
@@ -2916,7 +2939,7 @@ func (o options) cvtLZ4BlockAsm(lz4s bool) {
29162939 TEXT ("cvt" + srcAlgo + "Block" + snap , NOSPLIT , "func(dst, src []byte) (uncompressed int, dstUsed int)" )
29172940 Doc ("cvt" + srcAlgo + "Block converts an " + srcAlgo + " block to " + dstAlgo , "" )
29182941 Pragma ("noescape" )
2919- o .outputMargin = 10
2942+ o .outputMargin = 8
29202943 o .maxOffset = math .MaxUint16
29212944
29222945 const (
0 commit comments