[FEA] Allow setting *_pool_size with human-readable string (#1670)

Matt711 · web-flow · commit 687ed5c4b8b7 · 2024-09-09T22:47:39.000Z
Closes #173 Authors: - Matthew Murray (https://github.com/Matt711) - Lawrence Mitchell (https://github.com/wence-) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Mark Harris (https://github.com/harrism) URL: #1670
diff --git a/README.md b/README.md
@@ -771,8 +771,8 @@ of 1 GiB and a maximum size of 4 GiB. The pool uses
 >>> import rmm
 >>> pool = rmm.mr.PoolMemoryResource(
 ...     rmm.mr.CudaMemoryResource(),
-...     initial_pool_size=2**30,
-...     maximum_pool_size=2**32
+...     initial_pool_size="1GiB", # equivalent to initial_pool_size=2**30
+...     maximum_pool_size="4GiB"
 ... )
 >>> rmm.mr.set_current_device_resource(pool)
 ```
diff --git a/python/rmm/docs/guide.md b/python/rmm/docs/guide.md
@@ -139,8 +139,8 @@ of 1 GiB and a maximum size of 4 GiB. The pool uses
 >>> import rmm
 >>> pool = rmm.mr.PoolMemoryResource(
 ...     rmm.mr.CudaMemoryResource(),
-...     initial_pool_size=2**30,
-...     maximum_pool_size=2**32
+...     initial_pool_size="1GiB", # equivalent to initial_pool_size=2**30
+...     maximum_pool_size="4GiB"
 ... )
 >>> rmm.mr.set_current_device_resource(pool)
 ```
@@ -151,8 +151,8 @@ Similarly, to use a pool of managed memory:
 >>> import rmm
 >>> pool = rmm.mr.PoolMemoryResource(
 ...     rmm.mr.ManagedMemoryResource(),
-...     initial_pool_size=2**30,
-...     maximum_pool_size=2**32
+...     initial_pool_size="1GiB",
+...     maximum_pool_size="4GiB"
 ... )
 >>> rmm.mr.set_current_device_resource(pool)
 ```
diff --git a/python/rmm/rmm/_lib/CMakeLists.txt b/python/rmm/rmm/_lib/CMakeLists.txt
@@ -12,7 +12,8 @@
 # the License.
 # =============================================================================
 
-set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx)
+set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
+                   helper.pyx)
 set(linked_libraries rmm::rmm)
 
 # Build all of the Cython targets
diff --git a/python/rmm/rmm/_lib/helper.pxd b/python/rmm/rmm/_lib/helper.pxd
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+cdef object parse_bytes(object s) except *
diff --git a/python/rmm/rmm/_lib/helper.pyx b/python/rmm/rmm/_lib/helper.pyx
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper functions for rmm"""
+
+import re
+
+
+cdef dict BYTE_SIZES = {
+    'b': 1,
+    '': 1,
+    'kb': 1000,
+    'mb': 1000**2,
+    'gb': 1000**3,
+    'tb': 1000**4,
+    'pb': 1000**5,
+    'kib': 1024,
+    'mib': 1024**2,
+    'gib': 1024**3,
+    'tib': 1024**4,
+    'pib': 1024**5,
+}
+
+
+pattern = re.compile(r"^([0-9]+(?:\.[0-9]*)?)[\t ]*((?i:(?:[kmgtp]i?)?b))?$")
+
+cdef object parse_bytes(object s):
+    """Parse a string or integer into a number of bytes.
+
+    Parameters
+    ----------
+    s : int | str
+        Size in bytes. If an integer is provided, it is returned as-is.
+        A string is parsed as a floating point number with an (optional,
+        case-insensitive) byte-specifier, both SI prefixes (kb, mb, ..., pb)
+        and binary prefixes (kib, mib, ..., pib) are supported.
+
+     Returns
+     -------
+     Requested size in bytes as an integer.
+
+     Raises
+     ------
+     ValueError
+         If it is not possible to parse the input as a byte specification.
+    """
+    cdef str suffix
+    cdef double n
+    cdef int multiplier
+
+    if isinstance(s, int):
+        return s
+
+    match = pattern.match(s)
+
+    if match is None:
+        raise ValueError(f"Could not parse {s} as a byte specification")
+
+    n = float(match.group(1))
+
+    suffix = match.group(2)
+    if suffix is None:
+        suffix = ""
+
+    multiplier = BYTE_SIZES[suffix.lower()]
+
+    return int(n*multiplier)
diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx
@@ -32,10 +32,13 @@ from libcpp.string cimport string
 from cuda.cudart import cudaError_t
 
 from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice
+
 from rmm._cuda.stream cimport Stream
+
 from rmm._cuda.stream import DEFAULT_STREAM
 
 from rmm._lib.cuda_stream_view cimport cuda_stream_view
+from rmm._lib.helper cimport parse_bytes
 from rmm._lib.memory_resource cimport (
     available_device_memory as c_available_device_memory,
     percent_of_free_device_memory as c_percent_of_free_device_memory,
@@ -44,6 +47,7 @@ from rmm._lib.per_device_resource cimport (
     cuda_device_id,
     set_per_device_resource as cpp_set_per_device_resource,
 )
+
 from rmm.statistics import Statistics
 
 # Transparent handle of a C++ exception
@@ -314,9 +318,9 @@ cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
 
     Parameters
     ----------
-    initial_pool_size : int, optional
+    initial_pool_size : int | str, optional
         Initial pool size in bytes. By default, half the available memory
-        on the device is used.
+        on the device is used. A string argument is parsed using `parse_bytes`.
     release_threshold: int, optional
         Release threshold in bytes. If the pool size grows beyond this
         value, unused memory held by the pool will be released at the
@@ -334,7 +338,7 @@ cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
         cdef optional[size_t] c_initial_pool_size = (
             optional[size_t]()
             if initial_pool_size is None
-            else optional[size_t](<size_t> initial_pool_size)
+            else optional[size_t](<size_t> parse_bytes(initial_pool_size))
         )
 
         cdef optional[size_t] c_release_threshold = (
@@ -426,12 +430,12 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor):
         c_initial_pool_size = (
             c_percent_of_free_device_memory(50) if
             initial_pool_size is None
-            else initial_pool_size
+            else parse_bytes(initial_pool_size)
         )
         c_maximum_pool_size = (
             optional[size_t]() if
             maximum_pool_size is None
-            else optional[size_t](<size_t> maximum_pool_size)
+            else optional[size_t](<size_t> parse_bytes(maximum_pool_size))
         )
         self.c_obj.reset(
             new pool_memory_resource[device_memory_resource](
@@ -456,10 +460,10 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor):
         upstream_mr : DeviceMemoryResource
             The DeviceMemoryResource from which to allocate blocks for the
             pool.
-        initial_pool_size : int, optional
+        initial_pool_size : int | str, optional
             Initial pool size in bytes. By default, half the available memory
             on the device is used.
-        maximum_pool_size : int, optional
+        maximum_pool_size : int | str, optional
             Maximum size in bytes, that the pool can grow to.
         """
         pass
@@ -1091,8 +1095,10 @@ cpdef void _initialize(
         typ = PoolMemoryResource
         args = (upstream(),)
         kwargs = dict(
-            initial_pool_size=initial_pool_size,
-            maximum_pool_size=maximum_pool_size
+            initial_pool_size=None if initial_pool_size is None
+            else parse_bytes(initial_pool_size),
+            maximum_pool_size=None if maximum_pool_size is None
+            else parse_bytes(maximum_pool_size)
         )
     else:
         typ = upstream
diff --git a/python/rmm/rmm/rmm.py b/python/rmm/rmm/rmm.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -45,14 +45,16 @@ def reinitialize(
         performance.
     managed_memory : bool, default False
         If True, use managed memory for device memory allocation
-    initial_pool_size : int, default None
+    initial_pool_size : int | str, default None
         When `pool_allocator` is True, this indicates the initial pool size in
         bytes. By default, 1/2 of the total GPU memory is used.
         When `pool_allocator` is False, this argument is ignored if provided.
-    maximum_pool_size : int, default None
+        A string argument is parsed using `parse_bytes`.
+    maximum_pool_size : int | str, default None
         When `pool_allocator` is True, this indicates the maximum pool size in
         bytes. By default, the total available memory on the GPU is used.
         When `pool_allocator` is False, this argument is ignored if provided.
+        A string argument is parsed using `parse_bytes`.
     devices : int or List[int], default 0
         GPU device  IDs to register. By default registers only GPU 0.
     logging : bool, default False
diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py
@@ -432,8 +432,8 @@ def test_rmm_pool_cupy_allocator_stream_lifetime():
 def test_pool_memory_resource(dtype, nelem, alloc):
     mr = rmm.mr.PoolMemoryResource(
         rmm.mr.CudaMemoryResource(),
-        initial_pool_size=1 << 22,
-        maximum_pool_size=1 << 23,
+        initial_pool_size="4MiB",
+        maximum_pool_size="8MiB",
     )
     rmm.mr.set_current_device_resource(mr)
     assert rmm.mr.get_current_device_resource_type() is type(mr)
@@ -507,7 +507,7 @@ def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr):
 
 def test_reinitialize_max_pool_size():
     rmm.reinitialize(
-        pool_allocator=True, initial_pool_size=0, maximum_pool_size=1 << 23
+        pool_allocator=True, initial_pool_size=0, maximum_pool_size="8MiB"
     )
     rmm.DeviceBuffer().resize((1 << 23) - 1)
 
@@ -530,6 +530,24 @@ def test_reinitialize_initial_pool_size_gt_max():
     assert "Initial pool size exceeds the maximum pool size" in str(e.value)
 
 
+def test_reinitialize_with_valid_str_arg_pool_size():
+    rmm.reinitialize(
+        pool_allocator=True,
+        initial_pool_size="2kib",
+        maximum_pool_size="8kib",
+    )
+
+
+def test_reinitialize_with_invalid_str_arg_pool_size():
+    with pytest.raises(ValueError) as e:
+        rmm.reinitialize(
+            pool_allocator=True,
+            initial_pool_size="2k",  # 2kb valid, not 2k
+            maximum_pool_size="8k",
+        )
+    assert "Could not parse" in str(e.value)
+
+
 @pytest.mark.parametrize("dtype", _dtypes)
 @pytest.mark.parametrize("nelem", _nelems)
 @pytest.mark.parametrize("alloc", _allocs)