Merge branch '22-11-0-1' into 'main'

leofang · leofang · commit b7f847c16413 · 2023-01-09T11:12:50.000-08:00
Prepare for 22.11.0.1 hotfix release

See merge request cuda-hpc-libraries/cuquantum-sdk/cuquantum-public!16
diff --git a/LICENSE b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 BSD-3-Clause
 
diff --git a/python/LICENSE b/python/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 BSD-3-Clause
 
diff --git a/python/cuquantum/_version.py b/python/cuquantum/_version.py
@@ -5,4 +5,4 @@
 # Note: cuQuantum Python follows the cuQuantum SDK version, which is now
 # switched to YY.MM and is different from individual libraries' (semantic)
 # versioning scheme.
-__version__ = '22.11.0'
+__version__ = '22.11.0.1'
diff --git a/python/cuquantum/cutensornet/_internal/circuit_converter_utils.py b/python/cuquantum/cutensornet/_internal/circuit_converter_utils.py
@@ -50,7 +50,8 @@ def infer_parser(circuit):
     Infer the package that defines the circuit object.
     """
     if qiskit and isinstance(circuit, qiskit.QuantumCircuit):
-        qiskit_version  = qiskit.__qiskit_version__['qiskit'] # qiskit metapackage version
+        import importlib.metadata
+        qiskit_version = importlib.metadata.version('qiskit') # qiskit metapackage version
         check_version('qiskit', qiskit_version, QISKIT_MIN_VERSION)
         return circuit_parser_utils_qiskit
     elif cirq and isinstance(circuit, cirq.Circuit):
diff --git a/python/cuquantum/cutensornet/_internal/utils.py b/python/cuquantum/cutensornet/_internal/utils.py
@@ -185,27 +185,22 @@ def create_empty_tensor(cls, extents, dtype, device_id, stream_ctx):
     return tensor
 
 
-def create_output_tensor(cls, package, output, size_dict, device_id, data_type):
+def create_output_tensor(cls, package, output, size_dict, device_id, stream, data_type):
     """
     Create output tensor and associated data (modes, extents, strides). This operation is
-    blocking and is safe to use with asynchronous memory pools.
+    ordered through events and is safe to use with asynchronous memory pools.
     """
     modes = tuple(m for m in output)
     extents = tuple(size_dict[m] for m in output)
 
-    package_ifc = package_wrapper.PACKAGE[package]
-
-    stream = package_ifc.create_stream(device_id)
-    stream, stream_ctx, _ = _create_stream_ctx_ptr_cupy_stream(package_ifc, stream)
+    stream, stream_ctx, _ = get_or_create_stream(device_id, stream, package)
 
     with device_ctx(device_id):
-        start = stream.record()
         output = create_empty_tensor(cls, extents, data_type, device_id, stream_ctx)
-        end = stream.record()
-        end.synchronize()
+        output_event = stream.record()
 
     strides = output.strides
-    return output, modes, extents, strides
+    return output, output_event, modes, extents, strides
 
 
 def get_network_device_id(operands):
@@ -494,16 +489,10 @@ def get_mpi_comm_pointer(comm):
     Returns:
         tuple: A pair of int values representing the address and the size.
     """
-    # We won't initialize MPI for users in any case
     try:
-        import mpi4py
-        init = mpi4py.rc.initialize
-        mpi4py.rc.initialize = False
-        from mpi4py import MPI
+        from mpi4py import MPI  # init!
     except ImportError as e:
         raise RuntimeError("please install mpi4py") from e
-    finally:
-        mpi4py.rc.initialize = init
 
     if not isinstance(comm, MPI.Comm):
         raise ValueError("invalid MPI communicator")
diff --git a/python/cuquantum/cutensornet/tensor_network.py b/python/cuquantum/cutensornet/tensor_network.py
@@ -233,8 +233,12 @@ def __init__(self, *operands, qualifiers=None, options=None):
         num_modes_in = tuple(len(m) for m in modes_in)
         self.qualifiers_in = utils.check_tensor_qualifiers(qualifiers, cutn.tensor_qualifiers_dtype, num_inputs)
 
-        self.contraction, modes_out, extents_out, strides_out = utils.create_output_tensor(
-                self.output_class, self.package, self.output, self.size_dict, self.device_id, self.data_type)
+        # Create the output in the context of the current stream to work around a performance issue with CuPy's memory pool.
+        stream = None
+        self.logger.debug("Beginning output tensor creation...")
+        self.contraction, self.contraction_output_event, modes_out, extents_out, strides_out = utils.create_output_tensor(
+                self.output_class, self.package, self.output, self.size_dict, self.device_id, stream, self.data_type)
+        self.logger.debug("The output tensor has been created.")
 
         # Create/set handle.
         if options.handle is not None:
@@ -631,7 +635,13 @@ def autotune(self, *, iterations=3, stream=None):
 
         # Check if we still hold an output tensor; if not, create a new one.
         if self.contraction is None:
+            self.logger.debug("Beginning output (empty) tensor creation...")
             self.contraction = utils.create_empty_tensor(self.output_class, self.extents_out, self.data_type, self.device_id, stream_ctx)
+            self.logger.debug("The output (empty) tensor has been created.")
+        elif self.contraction_output_event is not None:
+            stream.wait_event(self.contraction_output_event)
+            self.contraction_output_event = None
+            self.logger.debug("Established ordering with output tensor creation event.")
 
         timing =  bool(self.logger and self.logger.handlers)
         self.logger.info(f"Starting autotuning...")
@@ -716,7 +726,13 @@ def contract(self, *, slices=None, stream=None):
 
         # Check if we still hold an output tensor; if not, create a new one.
         if self.contraction is None:
+            self.logger.debug("Beginning output (empty) tensor creation...")
             self.contraction = utils.create_empty_tensor(self.output_class, self.extents_out, self.data_type, self.device_id, stream_ctx)
+            self.logger.debug("The output (empty) tensor has been created.")
+        elif self.contraction_output_event is not None:
+            stream.wait_event(self.contraction_output_event)
+            self.contraction_output_event = None
+            self.logger.debug("Established ordering with output tensor creation event.")
 
         # Create a slice group for contraction.
         slice_group = None
diff --git a/python/samples/cutensornet/circuit_converter/qiskit_basic.ipynb b/python/samples/cutensornet/circuit_converter/qiskit_basic.ipynb
@@ -128,8 +128,8 @@
     "print(type(sv))\n",
     "\n",
     "# check if the computed statevector is correct\n",
-    "circuit.save_statevector()\n",
     "simulator = qiskit.Aer.get_backend('aer_simulator_statevector')\n",
+    "circuit.save_statevector()\n",
     "circ = qiskit.transpile(circuit, simulator)\n",
     "result = simulator.run(circ).result()\n",
     "sv_qiskit = np.asarray(result.get_statevector()).reshape([2]*num_qubits)\n",
@@ -327,7 +327,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.9.15"
   }
  },
  "nbformat": 4,

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
	`1`	`+Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
`2`	`2`
`3`	`3`	`BSD-3-Clause`
`4`	`4`