@@ -180,10 +180,13 @@ def parse_to_unicode(
180180 return {}, space_code , []
181181 process_rg : bool = False
182182 process_char : bool = False
183+ multiline_rg : Union [
184+ None , Tuple [int , int ]
185+ ] = None # tuple = (current_char, remaining size) ; cf #1285 for example of file
183186 cm = prepare_cm (ft )
184187 for l in cm .split (b"\n " ):
185- process_rg , process_char = process_cm_line (
186- l .strip (b" " ), process_rg , process_char , map_dict , int_entry
188+ process_rg , process_char , multiline_rg = process_cm_line (
189+ l .strip (b" " ), process_rg , process_char , multiline_rg , map_dict , int_entry
187190 )
188191
189192 for a , value in map_dict .items ():
@@ -228,11 +231,12 @@ def process_cm_line(
228231 l : bytes ,
229232 process_rg : bool ,
230233 process_char : bool ,
234+ multiline_rg : Union [None , Tuple [int , int ]],
231235 map_dict : Dict [Any , Any ],
232236 int_entry : List [int ],
233- ) -> Tuple [bool , bool ]:
237+ ) -> Tuple [bool , bool , Union [ None , Tuple [ int , int ]] ]:
234238 if l in (b"" , b" " ) or l [0 ] == 37 : # 37 = %
235- return process_rg , process_char
239+ return process_rg , process_char , multiline_rg
236240 if b"beginbfrange" in l :
237241 process_rg = True
238242 elif b"endbfrange" in l :
@@ -242,22 +246,29 @@ def process_cm_line(
242246 elif b"endbfchar" in l :
243247 process_char = False
244248 elif process_rg :
245- parse_bfrange (l , map_dict , int_entry )
249+ multiline_rg = parse_bfrange (l , map_dict , int_entry , multiline_rg )
246250 elif process_char :
247251 parse_bfchar (l , map_dict , int_entry )
248- return process_rg , process_char
252+ return process_rg , process_char , multiline_rg
249253
250254
251- def parse_bfrange (l : bytes , map_dict : Dict [Any , Any ], int_entry : List [int ]) -> None :
255+ def parse_bfrange (
256+ l : bytes ,
257+ map_dict : Dict [Any , Any ],
258+ int_entry : List [int ],
259+ multiline_rg : Union [None , Tuple [int , int ]],
260+ ) -> Union [None , Tuple [int , int ]]:
252261 lst = [x for x in l .split (b" " ) if x ]
253- a = int (lst [0 ], 16 )
254- b = int (lst [1 ], 16 )
262+ closure_found = False
255263 nbi = len (lst [0 ])
256264 map_dict [- 1 ] = nbi // 2
257265 fmt = b"%%0%dX" % nbi
258- if lst [2 ] == b"[" :
259- for sq in lst [3 :]:
266+ if multiline_rg is not None :
267+ a = multiline_rg [0 ] # a, b not in the current line
268+ b = multiline_rg [1 ]
269+ for sq in lst [1 :]:
260270 if sq == b"]" :
271+ closure_found = True
261272 break
262273 map_dict [
263274 unhexlify (fmt % a ).decode (
@@ -268,18 +279,36 @@ def parse_bfrange(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> N
268279 int_entry .append (a )
269280 a += 1
270281 else :
271- c = int (lst [2 ], 16 )
272- fmt2 = b"%%0%dX" % max (4 , len (lst [2 ]))
273- while a <= b :
274- map_dict [
275- unhexlify (fmt % a ).decode (
276- "charmap" if map_dict [- 1 ] == 1 else "utf-16-be" ,
277- "surrogatepass" ,
278- )
279- ] = unhexlify (fmt2 % c ).decode ("utf-16-be" , "surrogatepass" )
280- int_entry .append (a )
281- a += 1
282- c += 1
282+ a = int (lst [0 ], 16 )
283+ b = int (lst [1 ], 16 )
284+ if lst [2 ] == b"[" :
285+ for sq in lst [3 :]:
286+ if sq == b"]" :
287+ closure_found = True
288+ break
289+ map_dict [
290+ unhexlify (fmt % a ).decode (
291+ "charmap" if map_dict [- 1 ] == 1 else "utf-16-be" ,
292+ "surrogatepass" ,
293+ )
294+ ] = unhexlify (sq ).decode ("utf-16-be" , "surrogatepass" )
295+ int_entry .append (a )
296+ a += 1
297+ else : # case without list
298+ c = int (lst [2 ], 16 )
299+ fmt2 = b"%%0%dX" % max (4 , len (lst [2 ]))
300+ closure_found = True
301+ while a <= b :
302+ map_dict [
303+ unhexlify (fmt % a ).decode (
304+ "charmap" if map_dict [- 1 ] == 1 else "utf-16-be" ,
305+ "surrogatepass" ,
306+ )
307+ ] = unhexlify (fmt2 % c ).decode ("utf-16-be" , "surrogatepass" )
308+ int_entry .append (a )
309+ a += 1
310+ c += 1
311+ return None if closure_found else (a , b )
283312
284313
285314def parse_bfchar (l : bytes , map_dict : Dict [Any , Any ], int_entry : List [int ]) -> None :
0 commit comments