Skip to content

Commit f13a500

Browse files
authored
hints: Introduce "special" hint types (#336)
They're intended to convey information from one pass to another, but shouldn't be attempted individually (as a matter of optimization). For example, for the unused file removal pass other passes could emit hints with some type like @FileRef; we shouldn't waste time deleting those file references one-by-one.
1 parent 9abb87c commit f13a500

File tree

5 files changed

+140
-30
lines changed

5 files changed

+140
-30
lines changed

cvise/passes/hint_based.py

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,15 @@
44
from typing import Any, Dict, List, Sequence, Tuple, Union
55

66
from cvise.passes.abstract import AbstractPass, BinaryState, PassResult, ProcessEventNotifier
7-
from cvise.utils.hint import apply_hints, group_hints_by_type, HintBundle, HintApplicationStats, load_hints, store_hints
7+
from cvise.utils.hint import (
8+
apply_hints,
9+
group_hints_by_type,
10+
is_special_hint_type,
11+
HintBundle,
12+
HintApplicationStats,
13+
load_hints,
14+
store_hints,
15+
)
816

917
HINTS_FILE_NAME_TEMPLATE = 'hints{type}.jsonl.zst'
1018

@@ -46,6 +54,19 @@ def advance_on_success(self, new_hint_count: int) -> Union[PerTypeHintState, Non
4654
)
4755

4856

57+
@dataclass(frozen=True)
58+
class SpecialHintState:
59+
"""A sub-item of HintState for "special" hint types - those that start from "@".
60+
61+
Such hints aren't attempted as reduction attempts themselves, instead they convey information from one pass to
62+
another - hence there's no underlying_state here.
63+
"""
64+
65+
type: str
66+
hints_file_name: Path
67+
hint_count: int
68+
69+
4970
@dataclass(frozen=True)
5071
class HintState:
5172
"""Stores the current state of the HintBasedPass.
@@ -57,21 +78,29 @@ class HintState:
5778
tmp_dir: Path
5879
# The enumeration state for each hint type. Sorted by type (in order to have deterministic and repeatable
5980
# enumeration order).
60-
per_type_states: Tuple[PerTypeHintState]
81+
per_type_states: Tuple[PerTypeHintState, ...]
6182
# Pointer to the current per-type state in the round-robin enumeration.
6283
ptr: int
84+
# Information for "special" hint types (those that start with "@"). They're stored separately because we don't
85+
# attempt applying them during enumeration - they're only intended as inputs for other passes that depend on them.
86+
special_hints: Tuple[SpecialHintState, ...]
6387

6488
@staticmethod
65-
def create(tmp_dir: Path, per_type_states: List[PerTypeHintState]):
89+
def create(tmp_dir: Path, per_type_states: List[PerTypeHintState], special_hints: List[SpecialHintState]):
6690
sorted_states = sorted(per_type_states, key=lambda s: s.type)
67-
return HintState(tmp_dir=tmp_dir, per_type_states=tuple(sorted_states), ptr=0)
91+
sorted_special_hints = sorted(special_hints, key=lambda s: s.type)
92+
return HintState(
93+
tmp_dir=tmp_dir, per_type_states=tuple(sorted_states), ptr=0, special_hints=tuple(sorted_special_hints)
94+
)
6895

6996
def __repr__(self):
7097
parts = []
7198
for i, s in enumerate(self.per_type_states):
7299
mark = '[*]' if i == self.ptr and len(self.per_type_states) > 1 else ''
73100
type_s = s.type + ': ' if s.type else ''
74101
parts.append(f'{mark}{type_s}{s.underlying_state.compact_repr()}')
102+
for s in self.special_hints:
103+
parts.append(f'{s.type}: {s.hint_count}')
75104
return f'HintState({", ".join(parts)})'
76105

77106
def real_chunk(self) -> int:
@@ -98,7 +127,12 @@ def advance(self) -> Union[HintState, None]:
98127
new_ptr += 1
99128
new_ptr %= len(new_per_type_states)
100129

101-
return HintState(tmp_dir=self.tmp_dir, per_type_states=tuple(new_per_type_states), ptr=new_ptr)
130+
return HintState(
131+
tmp_dir=self.tmp_dir,
132+
per_type_states=tuple(new_per_type_states),
133+
ptr=new_ptr,
134+
special_hints=self.special_hints,
135+
)
102136

103137
def advance_on_success(self, type_to_bundle: Dict[str, HintBundle]):
104138
sub_states = []
@@ -114,10 +148,15 @@ def advance_on_success(self, type_to_bundle: Dict[str, HintBundle]):
114148
sub_states.append(new_substate)
115149
if not sub_states:
116150
return None
117-
return HintState(tmp_dir=self.tmp_dir, per_type_states=tuple(sub_states), ptr=0)
151+
return HintState(
152+
tmp_dir=self.tmp_dir, per_type_states=tuple(sub_states), ptr=0, special_hints=self.special_hints
153+
)
118154

119155
def hint_bundle_paths(self) -> Dict[str, Path]:
120-
return {substate.type: self.tmp_dir / substate.hints_file_name for substate in self.per_type_states}
156+
return {
157+
substate.type: self.tmp_dir / substate.hints_file_name
158+
for substate in self.per_type_states + self.special_hints
159+
}
121160

122161

123162
class HintBasedPass(AbstractPass):
@@ -186,6 +225,8 @@ def new(
186225
return self.new_from_hints(hints, tmp_dir)
187226

188227
def transform(self, test_case: Path, state: HintState, original_test_case: Path, *args, **kwargs):
228+
if not state.per_type_states: # possible if all hints produced by new() were "special"
229+
return PassResult.STOP, state
189230
self.load_and_apply_hints(original_test_case, test_case, [state])
190231
return PassResult.OK, state
191232

@@ -230,18 +271,27 @@ def new_from_hints(self, bundle: HintBundle, tmp_dir: Path) -> Union[HintState,
230271
type_to_bundle = group_hints_by_type(bundle)
231272
self.backfill_pass_names(type_to_bundle)
232273
type_to_file_name = store_hints_per_type(tmp_dir, type_to_bundle)
233-
sub_states = []
234-
# Initialize a separate enumeration for each group of hints sharing a particular type.
274+
sub_states: List[PerTypeHintState] = []
275+
special_states: List[SpecialHintState] = []
235276
for type, sub_bundle in type_to_bundle.items():
236-
underlying = self.create_elementary_state(len(sub_bundle.hints))
237-
if underlying is None:
238-
continue
239-
sub_states.append(
240-
PerTypeHintState(type=type, hints_file_name=type_to_file_name[type], underlying_state=underlying)
241-
)
242-
if not sub_states:
277+
if is_special_hint_type(type):
278+
# "Special" hints aren't attempted in transform() jobs - only store them to be consumed by other passes.
279+
special_states.append(
280+
SpecialHintState(
281+
type=type, hints_file_name=type_to_file_name[type], hint_count=len(sub_bundle.hints)
282+
)
283+
)
284+
else:
285+
# Initialize a separate enumeration for this group of hints sharing a particular type.
286+
underlying = self.create_elementary_state(len(sub_bundle.hints))
287+
if underlying is None:
288+
continue
289+
sub_states.append(
290+
PerTypeHintState(type=type, hints_file_name=type_to_file_name[type], underlying_state=underlying)
291+
)
292+
if not sub_states and not special_states:
243293
return None
244-
return HintState.create(tmp_dir, sub_states)
294+
return HintState.create(tmp_dir, sub_states, special_states)
245295

246296
def advance_on_success_from_hints(self, bundle: HintBundle, state: HintState) -> Union[HintState, None]:
247297
"""Advances the state after a successful reduction, given pre-generated hints.

cvise/tests/test_folding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def create_stub_hint_state(type: str) -> HintState:
1212
tmp_dir=fake_path,
1313
per_type_states=(PerTypeHintState(type=type, hints_file_name=fake_path, underlying_state=underlying_state),),
1414
ptr=0,
15+
special_hints=(),
1516
)
1617

1718

cvise/tests/test_hint_based.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from cvise.passes.hint_based import HintBasedPass
55
from cvise.tests.testabstract import collect_all_transforms, iterate_pass, validate_hint_bundle
6-
from cvise.utils.hint import HintBundle
6+
from cvise.utils.hint import HintBundle, load_hints
77
from cvise.utils.process import ProcessEventNotifier
88

99

@@ -216,3 +216,50 @@ def test_hint_based_non_utf8(tmp_path: Path):
216216
assert b'f\0oo\xc3\x84' in all_transforms # hint34 applied
217217
assert b'f\0o\xffo' in all_transforms # hint57 applied
218218
assert b'fo' in all_transforms # all applied
219+
220+
221+
def test_hint_based_special_hints_not_attempted(tmp_path: Path):
222+
"""Test that special hints (whose type starts from "@") aren't attempted in the pass transform() calls."""
223+
input = b'foo'
224+
test_case = tmp_path / 'input.txt'
225+
vocab = ['sometype', '@specialtype']
226+
hint_regular = {'t': 0, 'p': [{'l': 0, 'r': 1}]}
227+
hint_special = {'t': 1, 'p': [{'l': 1, 'r': 2}]}
228+
pass_ = StubHintBasedPass(
229+
{
230+
input: [hint_regular, hint_special],
231+
},
232+
vocabulary=vocab,
233+
)
234+
test_case.write_bytes(input)
235+
236+
state = pass_.new(test_case, tmp_dir=tmp_path, process_event_notifier=ProcessEventNotifier(None), dependee_hints=[])
237+
all_transforms = collect_all_transforms(pass_, state, test_case)
238+
assert b'oo' in all_transforms # hint_regular applied
239+
assert b'fo' not in all_transforms # hint_special not applied
240+
241+
242+
def test_hint_based_special_hints_stored(tmp_path: Path):
243+
"""Test that special hints produced by a pass are stored on disk, even if no other hints are produced."""
244+
input = b'foo'
245+
test_case = tmp_path / 'input.txt'
246+
hint_type = '@specialtype'
247+
hint = {'t': 0, 'p': [{'l': 1, 'r': 2}]}
248+
pass_ = StubHintBasedPass(
249+
{
250+
input: [hint],
251+
},
252+
vocabulary=[hint_type],
253+
)
254+
test_case.write_bytes(input)
255+
256+
state = pass_.new(test_case, tmp_dir=tmp_path, process_event_notifier=ProcessEventNotifier(None), dependee_hints=[])
257+
assert state is not None
258+
bundle_paths = state.hint_bundle_paths()
259+
assert len(bundle_paths) == 1
260+
assert hint_type in bundle_paths
261+
bundle = load_hints(bundle_paths[hint_type], begin_index=None, end_index=None)
262+
assert bundle.hints == [hint]
263+
264+
all_transforms = collect_all_transforms(pass_, state, test_case)
265+
assert all_transforms == set()

cvise/tests/testabstract.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,14 @@ def collect_all_transforms(pass_: AbstractPass, state, input_path: Path) -> Set[
3030
tmp_path = Path(tmp_file.name)
3131
tmp_file.close()
3232
while state is not None:
33-
pass_.transform(
33+
result, _new_state = pass_.transform(
3434
tmp_path, state, process_event_notifier=ProcessEventNotifier(None), original_test_case=input_path
3535
)
36-
all_outputs.add(tmp_path.read_bytes())
37-
state = pass_.advance(input_path, state)
36+
if result == PassResult.OK:
37+
all_outputs.add(tmp_path.read_bytes())
38+
state = pass_.advance(input_path, state)
39+
elif result == PassResult.STOP:
40+
break
3841
return all_outputs
3942

4043

@@ -43,14 +46,17 @@ def collect_all_transforms_dir(pass_: AbstractPass, state, input_path: Path) ->
4346
while state is not None:
4447
with tempfile.TemporaryDirectory() as tmp_dir:
4548
tmp_path = Path(tmp_dir)
46-
pass_.transform(
49+
result, _new_state = pass_.transform(
4750
tmp_path, state, process_event_notifier=ProcessEventNotifier(None), original_test_case=input_path
4851
)
49-
contents = tuple(
50-
sorted((str(p.relative_to(tmp_dir)), p.read_bytes()) for p in tmp_path.rglob('*') if not p.is_dir())
51-
)
52-
all_outputs.add(contents)
53-
state = pass_.advance(input_path, state)
52+
if result == PassResult.OK:
53+
contents = tuple(
54+
sorted((str(p.relative_to(tmp_dir)), p.read_bytes()) for p in tmp_path.rglob('*') if not p.is_dir())
55+
)
56+
all_outputs.add(contents)
57+
state = pass_.advance(input_path, state)
58+
elif result == PassResult.STOP:
59+
break
5460
return all_outputs
5561

5662

cvise/utils/hint.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,11 @@ class HintBundle:
8989
},
9090
't': {
9191
'description': (
92-
'Indicates the type of the hint, as an index in the vocabulary. The purpose of the type is to let a '
93-
'pass split hints into distinct groups, to guide the generic logic that attempts taking consecutive '
94-
'ranges of same-typed hints.'
92+
'Indicates the type of the hint, as an index in the vocabulary. Types starting from "@" (the at sign) '
93+
'have special meaning - such hints are not attempted as reduction transformations, but are only '
94+
'intended to be consumed by other passes as input data. Types not starting from "@" are just used as a '
95+
'way to split hints from a particular pass into distinct groups, to guide the generic logic that '
96+
'attempts taking consecutive ranges of same-typed hints.'
9597
),
9698
'type': 'integer',
9799
'minimum': 0,
@@ -118,6 +120,10 @@ def get_passes_ordered_by_delta(self) -> List[str]:
118120
json_encoder: Union[msgspec.json.Encoder, None] = None
119121

120122

123+
def is_special_hint_type(type: str) -> bool:
124+
return type.startswith('@')
125+
126+
121127
def apply_hints(bundles: List[HintBundle], source_path: Path, destination_path: Path) -> HintApplicationStats:
122128
"""Creates the destination file/dir by applying the specified hints to the contents of the source file/dir."""
123129
# Take patches from all hints and group them by the file which they're applied to.

0 commit comments

Comments
 (0)