@@ -149,11 +149,21 @@ class VlqBase128Le(KaitaiStruct):
149
149
<https://lucene.apache.org/core/3_5_0/fileformats.html#VInt>
150
150
* Apache Avro uses this as a basis for integer encoding, adding ZigZag on
151
151
top of it for signed ints
152
- <https://avro.apache.org/docs/current/spec.html#binary_encode_primitive >
152
+ <https://avro.apache.org/docs/1.12.0/specification/#primitive-types-1 >
153
153
154
154
More information on this encoding is available at <https://en.wikipedia.org/wiki/LEB128>
155
155
156
- This particular implementation supports serialized values to up 8 bytes long.
156
+ This particular implementation supports integer values up to 64 bits (i.e. the
157
+ maximum unsigned value supported is `2**64 - 1`), which implies that serialized
158
+ values can be up to 10 bytes in length.
159
+
160
+ If the most significant 10th byte (`groups[9]`) is present, its `has_next`
161
+ must be `false` (otherwise we would have 11 or more bytes, which is not
162
+ supported) and its `value` can be only `0` or `1` (because a 9-byte VLQ can
163
+ represent `9 * 7 = 63` bits already, so the 10th byte can only add 1 bit,
164
+ since only integers up to 64 bits are supported). These restrictions are
165
+ enforced by this implementation. They were inspired by the Protoscope tool,
166
+ see <https://github.com/protocolbuffers/protoscope/blob/8e7a6aafa2c9958527b1e0747e66e1bfff045819/writer.go#L644-L648>.
157
167
"""
158
168
SEQ_FIELDS = ["groups" ]
159
169
def __init__ (self , _io , _parent = None , _root = None ):
@@ -170,7 +180,7 @@ def _read(self):
170
180
if not 'arr' in self ._debug ['groups' ]:
171
181
self ._debug ['groups' ]['arr' ] = []
172
182
self ._debug ['groups' ]['arr' ].append ({'start' : self ._io .pos ()})
173
- _t_groups = VlqBase128Le .Group (self ._io , self , self ._root )
183
+ _t_groups = VlqBase128Le .Group (i , ( self . groups [( i - 1 )]. interm_value if i != 0 else 0 ), (( 9223372036854775808 if i == 9 else ( self . groups [( i - 1 )]. multiplier * 128 )) if i != 0 else 1 ), self ._io , self , self ._root )
174
184
_t_groups ._read ()
175
185
_ = _t_groups
176
186
self .groups .append (_ )
@@ -184,19 +194,34 @@ class Group(KaitaiStruct):
184
194
"""One byte group, clearly divided into 7-bit "value" chunk and 1-bit "continuation" flag.
185
195
"""
186
196
SEQ_FIELDS = ["has_next" , "value" ]
187
- def __init__ (self , _io , _parent = None , _root = None ):
197
+ def __init__ (self , idx , prev_interm_value , multiplier , _io , _parent = None , _root = None ):
188
198
self ._io = _io
189
199
self ._parent = _parent
190
200
self ._root = _root if _root else self
201
+ self .idx = idx
202
+ self .prev_interm_value = prev_interm_value
203
+ self .multiplier = multiplier
191
204
self ._debug = collections .defaultdict (dict )
192
205
193
206
def _read (self ):
194
207
self ._debug ['has_next' ]['start' ] = self ._io .pos ()
195
208
self .has_next = self ._io .read_bits_int_be (1 ) != 0
196
209
self ._debug ['has_next' ]['end' ] = self ._io .pos ()
210
+ if not self .has_next == (False if self .idx == 9 else self .has_next ):
211
+ raise kaitaistruct .ValidationNotEqualError ((False if self .idx == 9 else self .has_next ), self .has_next , self ._io , u"/types/group/seq/0" )
197
212
self ._debug ['value' ]['start' ] = self ._io .pos ()
198
213
self .value = self ._io .read_bits_int_be (7 )
199
214
self ._debug ['value' ]['end' ] = self ._io .pos ()
215
+ if not self .value <= (1 if self .idx == 9 else 127 ):
216
+ raise kaitaistruct .ValidationGreaterThanError ((1 if self .idx == 9 else 127 ), self .value , self ._io , u"/types/group/seq/1" )
217
+
218
+ @property
219
+ def interm_value (self ):
220
+ if hasattr (self , '_m_interm_value' ):
221
+ return self ._m_interm_value
222
+
223
+ self ._m_interm_value = (self .prev_interm_value + (self .value * self .multiplier ))
224
+ return getattr (self , '_m_interm_value' , None )
200
225
201
226
202
227
@property
@@ -213,27 +238,23 @@ def value(self):
213
238
if hasattr (self , '_m_value' ):
214
239
return self ._m_value
215
240
216
- self ._m_value = ((((((( self .groups [0 ]. value + (( self . groups [ 1 ].value << 7 ) if self . len >= 2 else 0 )) + (( self . groups [ 2 ]. value << 14 ) if self . len >= 3 else 0 )) + (( self . groups [ 3 ]. value << 21 ) if self . len >= 4 else 0 )) + (( self . groups [ 4 ]. value << 28 ) if self . len >= 5 else 0 )) + (( self . groups [ 5 ]. value << 35 ) if self . len >= 6 else 0 )) + (( self . groups [ 6 ]. value << 42 ) if self . len >= 7 else 0 )) + (( self . groups [ 7 ]. value << 49 ) if self . len >= 8 else 0 ))
241
+ self ._m_value = self .groups [- 1 ].interm_value
217
242
return getattr (self , '_m_value' , None )
218
243
219
244
@property
220
245
def sign_bit (self ):
221
246
if hasattr (self , '_m_sign_bit' ):
222
247
return self ._m_sign_bit
223
248
224
- self ._m_sign_bit = (1 << (( 7 * self .len ) - 1 ))
249
+ self ._m_sign_bit = (9223372036854775808 if self .len == 10 else ( self . groups [ - 1 ]. multiplier * 64 ))
225
250
return getattr (self , '_m_sign_bit' , None )
226
251
227
252
@property
228
253
def value_signed (self ):
229
- """
230
- .. seealso::
231
- Source - https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend
232
- """
233
254
if hasattr (self , '_m_value_signed' ):
234
255
return self ._m_value_signed
235
256
236
- self ._m_value_signed = (( self .value ^ self .sign_bit ) - self .sign_bit )
257
+ self ._m_value_signed = (- (( self .sign_bit - ( self . value - self .sign_bit ))) if (( self .sign_bit > 0 ) and ( self . value >= self . sign_bit )) else self . value )
237
258
return getattr (self , '_m_value_signed' , None )
238
259
239
260
0 commit comments