|
20 | 20 | distributions.
|
21 | 21 | """
|
22 | 22 |
|
| 23 | +import copy |
23 | 24 | from typing import Optional, Sequence
|
24 | 25 |
|
25 | 26 | import numpy as np
|
26 | 27 | from vizier import pyvizier as vz
|
27 | 28 | from vizier._src.benchmarks.experimenters import experimenter
|
28 | 29 |
|
29 | 30 |
|
30 |
| -def _default_multiarm_problem(num_arms: int) -> vz.ProblemStatement: |
| 31 | +def _default_multiarm_problem( |
| 32 | + num_arms: int, arms_as_chars: bool |
| 33 | +) -> vz.ProblemStatement: |
31 | 34 | """Returns default multi-arm problem statement."""
|
32 | 35 | problem = vz.ProblemStatement()
|
33 | 36 | problem.metric_information.append(
|
34 | 37 | vz.MetricInformation(name="reward", goal=vz.ObjectiveMetricGoal.MAXIMIZE)
|
35 | 38 | )
|
| 39 | + |
| 40 | + if arms_as_chars: |
| 41 | + # Starts with 'a' character. |
| 42 | + feasible_values = [chr(i + 97) for i in range(num_arms)] |
| 43 | + else: |
| 44 | + feasible_values = [str(i) for i in range(num_arms)] |
| 45 | + |
36 | 46 | problem.search_space.root.add_categorical_param(
|
37 |
| - name="arm", feasible_values=[str(i) for i in range(num_arms)] |
| 47 | + name="arm", feasible_values=feasible_values |
38 | 48 | )
|
39 | 49 | return problem
|
40 | 50 |
|
41 | 51 |
|
42 | 52 | class BernoulliMultiArmExperimenter(experimenter.Experimenter):
|
43 | 53 | """Uses a collection of Bernoulli arms with given probabilities."""
|
44 | 54 |
|
45 |
| - def __init__(self, probs: Sequence[float], seed: Optional[int] = None): |
| 55 | + def __init__( |
| 56 | + self, |
| 57 | + probs: Sequence[float], |
| 58 | + arms_as_chars: bool = True, |
| 59 | + seed: Optional[int] = None, |
| 60 | + ): |
46 | 61 | self._probs = probs
|
47 | 62 | self._rng = np.random.RandomState(seed)
|
| 63 | + self._problem = _default_multiarm_problem(len(self._probs), arms_as_chars) |
48 | 64 |
|
49 | 65 | def problem_statement(self) -> vz.ProblemStatement:
|
50 |
| - return _default_multiarm_problem(len(self._probs)) |
| 66 | + return copy.deepcopy(self._problem) |
51 | 67 |
|
52 | 68 | def evaluate(self, suggestions: Sequence[vz.Trial]) -> None:
|
53 | 69 | """Each arm has a fixed probability of outputting 0 or 1 reward."""
|
| 70 | + feasibles = self._problem.search_space.parameters[0].feasible_values |
54 | 71 | for suggestion in suggestions:
|
55 |
| - arm = int(suggestion.parameters["arm"].value) |
56 |
| - prob = self._probs[arm] |
| 72 | + arm_index = feasibles.index(suggestion.parameters["arm"].value) |
| 73 | + prob = self._probs[arm_index] |
57 | 74 | reward = self._rng.choice([0, 1], p=[1 - prob, prob])
|
58 | 75 | suggestion.final_measurement = vz.Measurement(metrics={"reward": reward})
|
59 | 76 |
|
60 | 77 |
|
61 | 78 | class FixedMultiArmExperimenter(experimenter.Experimenter):
|
62 | 79 | """Rewards are deterministic."""
|
63 | 80 |
|
64 |
| - def __init__(self, rewards: Sequence[float]): |
| 81 | + def __init__(self, rewards: Sequence[float], arms_as_chars: bool = True): |
65 | 82 | self._rewards = rewards
|
| 83 | + self._problem = _default_multiarm_problem(len(self._rewards), arms_as_chars) |
66 | 84 |
|
67 | 85 | def problem_statement(self) -> vz.ProblemStatement:
|
68 |
| - return _default_multiarm_problem(len(self._rewards)) |
| 86 | + return copy.deepcopy(self._problem) |
69 | 87 |
|
70 | 88 | def evaluate(self, suggestions: Sequence[vz.Trial]) -> None:
|
| 89 | + feasibles = self._problem.search_space.parameters[0].feasible_values |
71 | 90 | for suggestion in suggestions:
|
72 |
| - arm = int(suggestion.parameters["arm"].value) |
73 |
| - reward = self._rewards[arm] |
| 91 | + arm_index = feasibles.index(suggestion.parameters["arm"].value) |
| 92 | + reward = self._rewards[arm_index] |
74 | 93 | suggestion.final_measurement = vz.Measurement(metrics={"reward": reward})
|
0 commit comments