Documentation on using converters

xingyousong · copybara-github · commit b2d19ec291fa · 2024-02-15T09:34:17.000-08:00
PiperOrigin-RevId: 607360878
diff --git a/docs/guides/developer/designers.ipynb b/docs/guides/developer/designers.ipynb
@@ -10,9 +10,7 @@
         "\n",
         "\n",
         "# Designers\n",
-        "This documentation will allow a developer to use the Designer API for typical algorithm design.\n",
-        "\n",
-        "\n"
+        "This documentation will allow a developer to use the Designer API for typical algorithm design."
       ]
     },
     {
diff --git a/docs/guides/index.rst b/docs/guides/index.rst
@@ -9,6 +9,7 @@ For Users
 
     user/running_vizier
     user/search_spaces
+    user/converters
     Switching to Vertex <https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/vizier/conversions_vertex_vizier_and_open_source_vizier.ipynb>
     user/supported_algorithms
 
diff --git a/docs/guides/user/converters.ipynb b/docs/guides/user/converters.ipynb
@@ -0,0 +1,357 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "c8cEHaZVJXsU"
+      },
+      "source": [
+        "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/google/vizier/blob/main/docs/guides/developer/converters.ipynb)\n",
+        "\n",
+        "# Converters\n",
+        "This documentation demonstrates how to use converters for representing PyVizier objects as NumPy arrays and vice-versa."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "glNyultvKLVg"
+      },
+      "source": [
+        "## Installation and reference imports"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "plR-NKqFJOma"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install google-vizier"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "x1BGr_ZvKQoK"
+      },
+      "outputs": [],
+      "source": [
+        "from vizier import pyvizier as vz\n",
+        "from vizier.pyvizier import converters"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8Uhzgb5yYvkT"
+      },
+      "source": [
+        "Suppose we had a problem statement and some trials associated to the study."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bHm5beYNMXOF"
+      },
+      "outputs": [],
+      "source": [
+        "# Setup search space\n",
+        "search_space = vz.SearchSpace()\n",
+        "root = search_space.root\n",
+        "root.add_float_param(name='double', min_value=0.0, max_value=1.0)\n",
+        "root.add_int_param(name='int', min_value=1, max_value=10)\n",
+        "root.add_discrete_param(name='discrete', feasible_values=[0.1, 0.3, 0.5])\n",
+        "root.add_categorical_param(name='categorical', feasible_values=['a', 'b', 'c'])\n",
+        "\n",
+        "# Setup metric configurations\n",
+        "m1 = vz.MetricInformation(name='m1', goal=vz.ObjectiveMetricGoal.MAXIMIZE)\n",
+        "m2 = vz.MetricInformation(name='m2', goal=vz.ObjectiveMetricGoal.MINIMIZE)\n",
+        "\n",
+        "# Final problem\n",
+        "problem = vz.ProblemStatement(search_space, metric_information=[m1, m2])\n",
+        "\n",
+        "# Example trials\n",
+        "trial1 = vz.Trial(\n",
+        "    parameters={'double': 0.6, 'int': 2, 'discrete': 0.1, 'categorical': 'a'},\n",
+        "    final_measurement=vz.Measurement(metrics={'m1': 0.1, 'm2': 0.2}),\n",
+        ")\n",
+        "trial2 = vz.Trial(\n",
+        "    parameters={'double': 0.1, 'int': 6, 'discrete': 0.3, 'categorical': 'b'},\n",
+        "    final_measurement=vz.Measurement(metrics={'m1': -1.0, 'm2': 0.8}),\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-xcnx2LQKes_"
+      },
+      "source": [
+        "## Quick Start\n",
+        "To use numerical models, both our `x` (parameters) and `y` (metrics) need to be formatted as numpy arrays. We can directly do so with `TrialToArrayConverter`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CONUnh92Yma3"
+      },
+      "outputs": [],
+      "source": [
+        "t2a_converter = converters.TrialToArrayConverter.from_study_config(problem)\n",
+        "xs, ys = t2a_converter.to_xy([trial1, trial2])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IocslOsLa8_i"
+      },
+      "source": [
+        "We can also convert the `xs` back into PyVizier `ParameterDict`s:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "w4o0gM0KaPw3"
+      },
+      "outputs": [],
+      "source": [
+        "t2a_converter.to_parameters(xs)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MOXYX_oAYm29"
+      },
+      "source": [
+        "Behind the scenes, the `TrialToArrayConverter` actually uses a `DefaultTrialConverter` which first converts both trial parameters and metrics into `dict[str, np.ndarray]` and then concatenates the arrays together."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qiU_rMjfK45Q"
+      },
+      "outputs": [],
+      "source": [
+        "converter = converters.DefaultTrialConverter.from_study_config(problem)\n",
+        "xs_dict, ys_dict = converter.to_xy([trial1, trial2])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UlT0t5KDi1OO"
+      },
+      "source": [
+        "Trials can be recovered too:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wrgpY0ANi73O"
+      },
+      "outputs": [],
+      "source": [
+        "original_trials = converter.to_trials(xs_dict, ys_dict)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dyjiYSXmddoN"
+      },
+      "source": [
+        "## Customization\n",
+        "There are multiple ways to convert parameters of specific types. For example,\n",
+        "some common methods to convert the `'categorical'` parameter (with feasible\n",
+        "values `['a', 'b', 'c']`) can be:\n",
+        "\n",
+        "*   Integer Index: `'b' -\u003e 1` since `b` has index 1 among feasible values.\n",
+        "*   One-Hot: `'b' -\u003e [0, 1, 0]` using one-hot encoding.\n",
+        "\n",
+        "Additional considerations can for example:\n",
+        "\n",
+        "*   Whether to scale continuous parameter values into `[0,1]`\n",
+        "*   Whether to always sign-flip metrics to assume maximization only.\n",
+        "\n",
+        "These options can be specified when constructing both `TrialToArrayConverter` and `DefaultTrialConverter` ([source code](https://github.com/google/vizier/blob/main/vizier/pyvizier/converters/core.py)):\n",
+        "\n",
+        "```python\n",
+        "@classmethod\n",
+        "def from_study_config(\n",
+        "    cls,\n",
+        "    study_config: pyvizier.ProblemStatement,\n",
+        "    *,\n",
+        "    scale: bool = True,\n",
+        "    pad_oovs: bool = True,\n",
+        "    max_discrete_indices: int = 0,\n",
+        "    flip_sign_for_minimization_metrics: bool = True,\n",
+        "    dtype=np.float64,\n",
+        "):\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eflsNETlg8VT"
+      },
+      "source": [
+        "For more fine-grained control over specific `ParameterConfig`s and `MetricInformation`s, a user can specify individual arguments to each `DefaultModelInputConverter` and `DefaultModelOutputConverter` respectively."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eDeFdd3Op3f3"
+      },
+      "outputs": [],
+      "source": [
+        "# Only considers the 'double' parameter values.\n",
+        "double_pc = search_space.get('double')\n",
+        "double_converter = converters.DefaultModelInputConverter(double_pc, scale=True)\n",
+        "double_converter.convert([trial1, trial2])\n",
+        "\n",
+        "# Only considers the 'categorical' parameter values.\n",
+        "categorical_pc = search_space.get('categorical')\n",
+        "categorial_converter = converters.DefaultModelInputConverter(categorical_pc, onehot_embed=True)\n",
+        "categorial_converter.convert([trial1, trial2])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qHbnUXeDqyhb"
+      },
+      "outputs": [],
+      "source": [
+        "# Only considers the 'm1' metric values.\n",
+        "m1_converter = converters.DefaultModelOutputConverter(m1)\n",
+        "m1_converter.convert([trial1.final_measurement, trial2.final_measurement])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SIkZVMAWtdBt"
+      },
+      "source": [
+        "These can be inserted into the `DefaultTrialConverter`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_RWxP0Cgthf6"
+      },
+      "outputs": [],
+      "source": [
+        "parameter_converters = [double_converter, categorial_converter]\n",
+        "metric_converters = [m1_converter]\n",
+        "\n",
+        "custom_converter = converters.DefaultTrialConverter(parameter_converters, metric_converters)\n",
+        "custom_converter.to_xy([trial1, trial2])  # Same array outputs as above."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_r2RuW6oplD5"
+      },
+      "source": [
+        "For full customization, the user may create their own `ModelInputConverter`s and `ModelOutputConverter`s.\n",
+        "\n",
+        "```python\n",
+        "class ModelInputConverter(metaclass=abc.ABCMeta):\n",
+        "  \"\"\"Interface for extracting inputs to the model.\"\"\"\n",
+        "\n",
+        "  @abc.abstractmethod\n",
+        "  def convert(self, trials: Sequence[vz.TrialSuggestion]) -\u003e np.ndarray:\n",
+        "    \"\"\"Returns an array of shape (number of trials, feature dimension).\"\"\"\n",
+        "\n",
+        "  @property\n",
+        "  @abc.abstractmethod\n",
+        "  def output_spec(self) -\u003e NumpyArraySpec:\n",
+        "    \"\"\"Provides specification of the output from this converter.\"\"\"\n",
+        "\n",
+        "  @property\n",
+        "  @abc.abstractmethod\n",
+        "  def parameter_config(self):\n",
+        "    \"\"\"Original ParameterConfig that this converter acts on.\"\"\"\n",
+        "\n",
+        "  @abc.abstractmethod\n",
+        "  def to_parameter_values(\n",
+        "      self, array: np.ndarray\n",
+        "  ) -\u003e List[Optional[vz.ParameterValue]]:\n",
+        "    \"\"\"Convert and clip to the nearest feasible parameter values.\"\"\"\n",
+        "```\n",
+        "\n",
+        "```python\n",
+        "class ModelOutputConverter(metaclass=abc.ABCMeta):\n",
+        "  \"\"\"Metric converter interface.\"\"\"\n",
+        "\n",
+        "  @abc.abstractmethod\n",
+        "  def convert(self, measurements: Sequence[vz.Measurement]) -\u003e np.ndarray:\n",
+        "    \"\"\"Returns N x 1 array.\"\"\"\n",
+        "    pass\n",
+        "\n",
+        "  @abc.abstractmethod\n",
+        "  def to_metrics(self, labels: np.ndarray) -\u003e Sequence[Optional[vz.Metric]]:\n",
+        "    \"\"\"Returns a list of pyvizier metrics.\"\"\"\n",
+        "\n",
+        "  @property\n",
+        "  @abc.abstractmethod\n",
+        "  def metric_information(self) -\u003e vz.MetricInformation:\n",
+        "    \"\"\"Describes the semantics of the return value from convert() method.\"\"\"\n",
+        "\n",
+        "  @property\n",
+        "  def output_shape(self) -\u003e Tuple[None, int]:\n",
+        "    return (None, 1)\n",
+        "```"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "last_runtime": {
+        "build_target": "//learning/deepmind/dm_python:dm_notebook3",
+        "kind": "private"
+      },
+      "name": "Converters.ipynb",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1nArqkCmNjB9-GwTg3nera6cQ9J4ws1FY",
+          "timestamp": 1707957172845
+        }
+      ]
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

Original file line number	Diff line number	Diff line change
`@@ -10,9 +10,7 @@`
`10`	`10`	`"\n",`
`11`	`11`	`"\n",`
`12`	`12`	`"# Designers\n",`
`13`		`- "This documentation will allow a developer to use the Designer API for typical algorithm design.\n",`
`14`		`- "\n",`
`15`		`- "\n"`
	`13`	`+ "This documentation will allow a developer to use the Designer API for typical algorithm design."`
`16`	`14`	`]`
`17`	`15`	`},`
`18`	`16`	`{`