Skip to content

Commit 75acae0

Browse files
authored
hint: Optimize byte ops in i/o and patch applying (#338)
Use bytearray to avoid repeatedly copying buffers when concatenating chunks for every single patch. Store vocabulary as bytes to avoid doing UTF-8 encoding on every patch with replacement ("v"). Remove ifs from the hint loading inner loop. Measurements show 5-10% speedup on big inputs (>10MB).
1 parent 845b538 commit 75acae0

18 files changed

+129
-121
lines changed

clang_delta/tests/test_clang_delta.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
import subprocess
55
import tempfile
6-
from typing import Union
6+
from typing import Optional, Union
77
import unittest
88

99

@@ -38,19 +38,19 @@ def run_clang_delta(testcase: str, arguments: str) -> str:
3838
return subprocess.check_output(cmd, shell=True, encoding='utf8')
3939

4040

41-
def run_apply_hints(hints_file: Path, begin_index: int, end_index: int, testcase: str) -> str:
41+
def run_apply_hints(hints_file: Path, begin_index: Optional[int], end_index: Optional[int], testcase: str) -> str:
4242
hints_tool = Path(__file__).parent.parent.parent / 'cvise-cli.py'
4343
cmd = [
4444
hints_tool,
4545
'--action=apply-hints',
4646
'--hints-file',
4747
hints_file,
48-
'--hint-begin-index',
49-
str(begin_index),
50-
'--hint-end-index',
51-
str(end_index),
5248
get_testcase_path(testcase),
5349
]
50+
if begin_index is not None:
51+
cmd += ['--hint-begin-index', str(begin_index)]
52+
if end_index is not None:
53+
cmd += ['--hint-end-index', str(end_index)]
5454
return subprocess.check_output(cmd, encoding='utf-8')
5555

5656

@@ -1165,8 +1165,8 @@ def test_replace_function_def_with_decl_simple_preserve_foo(self):
11651165
self.check_clang_delta_hints(
11661166
'replace-function-def-with-decl/simple.cpp',
11671167
'--transformation=replace-function-def-with-decl --preserve-routine=Moo::foo',
1168-
begin_index=0,
1169-
end_index=9999,
1168+
begin_index=None,
1169+
end_index=None,
11701170
output_file='replace-function-def-with-decl/simple.cpp.preserve_foo.output',
11711171
)
11721172

cvise-cli.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -544,11 +544,11 @@ def do_reduce(args):
544544
def do_apply_hints(args):
545545
if args.hints_file is None:
546546
sys.exit('--hints-file is mandatory for --action=apply-hints')
547-
if args.hint_begin_index is None:
548-
sys.exit('--hint-begin-index is mandatory for --action=apply-hints')
549-
if args.hint_end_index is None:
550-
sys.exit('--hint-end-index is mandatory for --action=apply-hints')
551-
if args.hint_begin_index >= args.hint_end_index:
547+
if (
548+
args.hint_begin_index is not None
549+
and args.hint_end_index is not None
550+
and args.hint_begin_index >= args.hint_end_index
551+
):
552552
sys.exit('HINT_BEGIN_INDEX must be smaller than HINT_END_INDEX')
553553
if len(args.test_cases) > 1:
554554
sys.exit('exactly one TEST_CASE must be supplied')

cvise/passes/balanced.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def generate_hints(self, test_case: Path, *args, **kwargs):
3636
vocabulary = []
3737
if config.replacement:
3838
assert config.to_delete == Deletion.ALL
39-
vocabulary.append(config.replacement)
39+
vocabulary.append(config.replacement.encode())
4040

4141
contents = test_case.read_bytes()
4242
prefixes = (

cvise/passes/blank.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88

99
class BlankPass(HintBasedPass):
1010
PATTERNS = {
11-
'blankline': rb'^\s*$',
12-
'hashline': rb'^#',
11+
b'blankline': rb'^\s*$',
12+
b'hashline': rb'^#',
1313
}
1414

1515
def check_prerequisites(self):
1616
return True
1717

18-
def output_hint_types(self) -> List[str]:
18+
def output_hint_types(self) -> List[bytes]:
1919
return list(self.PATTERNS.keys())
2020

2121
def generate_hints(self, test_case: Path, *args, **kwargs):

cvise/passes/clanghints.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def _generate_hints_for_standard(
139139
stdout = iter(stdout.splitlines())
140140
vocab_line = next(stdout, None)
141141
vocab_decoder = msgspec.json.Decoder(type=List[str])
142-
vocab = vocab_decoder.decode(vocab_line) if vocab_line else []
142+
vocab = [s.encode() for s in vocab_decoder.decode(vocab_line)] if vocab_line else []
143143

144144
hints = []
145145
hint_decoder = msgspec.json.Decoder(type=Hint)

cvise/passes/clangmodulemap.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
@unique
1818
class _Vocab(Enum):
1919
# Items must be listed in the index order; indices must be contiguous and start from zero.
20-
MAKE_HEADER_NON_MODULAR = (0, 'make-header-non-modular')
21-
DELETE_USE_DECL = (1, 'delete-use-decl')
22-
DELETE_EMPTY_SUBMODULE = (2, 'delete-empty-submodule')
23-
INLINE_SUBMODULE_CONTENTS = (3, 'inline-submodule-contents')
24-
DELETE_LINE = (4, 'delete-line')
20+
MAKE_HEADER_NON_MODULAR = (0, b'make-header-non-modular')
21+
DELETE_USE_DECL = (1, b'delete-use-decl')
22+
DELETE_EMPTY_SUBMODULE = (2, b'delete-empty-submodule')
23+
INLINE_SUBMODULE_CONTENTS = (3, b'inline-submodule-contents')
24+
DELETE_LINE = (4, b'delete-line')
2525

2626

2727
class ClangModuleMapPass(HintBasedPass):
@@ -36,19 +36,20 @@ def check_prerequisites(self):
3636
def supports_dir_test_cases(self):
3737
return True
3838

39-
def output_hint_types(self) -> List[str]:
39+
def output_hint_types(self) -> List[bytes]:
4040
return [v.value[1] for v in _Vocab]
4141

4242
def generate_hints(self, test_case: Path, *args, **kwargs):
4343
paths = list(test_case.rglob('*')) if test_case.is_dir() else [test_case]
4444
interesting_paths = [p for p in paths if _interesting_file(p)]
4545

46-
vocab: List[str] = [v.value[1] for v in _Vocab] # collect all strings used in hints
46+
vocab: List[bytes] = [v.value[1] for v in _Vocab] # collect all strings used in hints
4747
hints: List[Hint] = []
4848
for path in interesting_paths:
4949
file = _parse_file(path)
5050

51-
vocab.append(str(path.relative_to(test_case)))
51+
rel_path = path.relative_to(test_case)
52+
vocab.append(str(rel_path).encode())
5253
file_id = len(vocab) - 1
5354

5455
for mod in file.modules:

cvise/passes/clexhints.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def generate_hints(self, test_case: Path, process_event_notifier: ProcessEventNo
3535
work_dir = test_case
3636
paths = [p.relative_to(test_case) for p in test_case.rglob('*') if not p.is_dir()]
3737
stdin = b'\n'.join(bytes(p) for p in paths)
38-
files_vocab = [str(p) for p in paths]
38+
files_vocab = [str(p).encode() for p in paths]
3939
cmd_arg = '--'
4040
else:
4141
work_dir = '.'
@@ -52,7 +52,7 @@ def generate_hints(self, test_case: Path, process_event_notifier: ProcessEventNo
5252
stdout = iter(stdout.splitlines())
5353
vocab_line = next(stdout, None)
5454
vocab_decoder = msgspec.json.Decoder(type=List[str])
55-
orig_vocab = vocab_decoder.decode(vocab_line) if vocab_line else []
55+
orig_vocab = [s.encode() for s in vocab_decoder.decode(vocab_line)] if vocab_line else []
5656

5757
hints = []
5858
hint_decoder = msgspec.json.Decoder(type=Hint)

cvise/passes/comments.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
class CommentsPass(HintBasedPass):
1010
# The hints vocabulary - strings used by our hint.
11-
INITIAL_VOCAB = ('multi-line', 'single-line')
11+
INITIAL_VOCAB = (b'multi-line', b'single-line')
1212
# The indices must match the order in INITIAL_VOCAB.
1313
MULTI_LINE_VOCAB_ID = 0
1414
SINGLE_LINE_VOCAB_ID = 1
@@ -19,7 +19,7 @@ def check_prerequisites(self):
1919
def supports_dir_test_cases(self):
2020
return True
2121

22-
def output_hint_types(self) -> List[str]:
22+
def output_hint_types(self) -> List[bytes]:
2323
return list(self.INITIAL_VOCAB)
2424

2525
def generate_hints(self, test_case: Path, *args, **kwargs):
@@ -28,7 +28,8 @@ def generate_hints(self, test_case: Path, *args, **kwargs):
2828
if test_case.is_dir():
2929
for path in test_case.rglob('*'):
3030
if not path.is_dir():
31-
vocab.append(str(path.relative_to(test_case)))
31+
rel_path = path.relative_to(test_case)
32+
vocab.append(str(rel_path).encode())
3233
file_id = len(vocab) - 1
3334
hints += self._generate_hints_for_file(path, file_id)
3435
else:

cvise/passes/hint_based.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class PerTypeHintState:
2525
"""
2626

2727
# A hint type for which this state is for; an empty string if a hint doesn't explicitly specify types.
28-
type: str
28+
type: bytes
2929
# Only the base name, not a full path - it's a small optimization (and we anyway need to store the tmp dir in the
3030
# HintState).
3131
hints_file_name: Path
@@ -62,7 +62,7 @@ class SpecialHintState:
6262
another - hence there's no underlying_state here.
6363
"""
6464

65-
type: str
65+
type: bytes
6666
hints_file_name: Path
6767
hint_count: int
6868

@@ -97,10 +97,10 @@ def __repr__(self):
9797
parts = []
9898
for i, s in enumerate(self.per_type_states):
9999
mark = '[*]' if i == self.ptr and len(self.per_type_states) > 1 else ''
100-
type_s = s.type + ': ' if s.type else ''
100+
type_s = s.type.decode() + ': ' if s.type else ''
101101
parts.append(f'{mark}{type_s}{s.underlying_state.compact_repr()}')
102102
for s in self.special_hints:
103-
parts.append(f'{s.type}: {s.hint_count}')
103+
parts.append(f'{s.type.decode()}: {s.hint_count}')
104104
return f'HintState({", ".join(parts)})'
105105

106106
def real_chunk(self) -> int:
@@ -134,7 +134,7 @@ def advance(self) -> Union[HintState, None]:
134134
special_hints=self.special_hints,
135135
)
136136

137-
def advance_on_success(self, type_to_bundle: Dict[str, HintBundle]):
137+
def advance_on_success(self, type_to_bundle: Dict[bytes, HintBundle]):
138138
sub_states = []
139139
# Advance all previously present hint types' substates. We ignore any newly appearing hint types because it's
140140
# nontrivial to distinguish geniunely new hints from those that we (unsuccessfully) checked.
@@ -152,7 +152,7 @@ def advance_on_success(self, type_to_bundle: Dict[str, HintBundle]):
152152
tmp_dir=self.tmp_dir, per_type_states=tuple(sub_states), ptr=0, special_hints=self.special_hints
153153
)
154154

155-
def hint_bundle_paths(self) -> Dict[str, Path]:
155+
def hint_bundle_paths(self) -> Dict[bytes, Path]:
156156
return {
157157
substate.type: self.tmp_dir / substate.hints_file_name
158158
for substate in self.per_type_states + self.special_hints
@@ -186,7 +186,7 @@ def generate_hints(
186186
) -> HintBundle:
187187
raise NotImplementedError(f"Class {type(self).__name__} has not implemented 'generate_hints'!")
188188

189-
def input_hint_types(self) -> List[str]:
189+
def input_hint_types(self) -> List[bytes]:
190190
"""Declares hint types that are consumed by this pass as inputs.
191191
192192
Intended to be overridden by subclasses, in cases where dependencies between passes need to be implemented:
@@ -196,7 +196,7 @@ def input_hint_types(self) -> List[str]:
196196
"""
197197
return []
198198

199-
def output_hint_types(self) -> List[str]:
199+
def output_hint_types(self) -> List[bytes]:
200200
"""Declares hint types that are produced by this pass.
201201
202202
A pass must override this method if it produces hints with a nonempty type (the "t" field).
@@ -304,16 +304,16 @@ def advance_on_success_from_hints(self, bundle: HintBundle, state: HintState) ->
304304
store_hints_per_type(state.tmp_dir, type_to_bundle)
305305
return state.advance_on_success(type_to_bundle)
306306

307-
def backfill_pass_names(self, type_to_bundle: Dict[str, HintBundle]) -> None:
307+
def backfill_pass_names(self, type_to_bundle: Dict[bytes, HintBundle]) -> None:
308308
for bundle in type_to_bundle.values():
309309
if not bundle.pass_name:
310310
bundle.pass_name = repr(self)
311311

312312

313-
def store_hints_per_type(tmp_dir: Path, type_to_bundle: Dict[str, HintBundle]) -> Dict[str, Path]:
313+
def store_hints_per_type(tmp_dir: Path, type_to_bundle: Dict[bytes, HintBundle]) -> Dict[bytes, Path]:
314314
type_to_file_name = {}
315315
for type, sub_bundle in type_to_bundle.items():
316-
file_name = Path(HINTS_FILE_NAME_TEMPLATE.format(type=type))
316+
file_name = Path(HINTS_FILE_NAME_TEMPLATE.format(type=type.decode()))
317317
store_hints(sub_bundle, tmp_dir / file_name)
318318
type_to_file_name[type] = file_name
319319
return type_to_file_name

cvise/passes/lines.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def generate_hints(self, test_case: Path, process_event_notifier: ProcessEventNo
2121
if test_case.is_dir():
2222
for path in test_case.rglob('*'):
2323
if not path.is_dir():
24-
vocab.append(str(path.relative_to(test_case)))
24+
rel_path = path.relative_to(test_case)
25+
vocab.append(str(rel_path).encode())
2526
file_id = len(vocab) - 1
2627
hints += self._generate_hints_for_file(path, decoder, process_event_notifier, file_id)
2728
else:

0 commit comments

Comments
 (0)