Skip to content

Segmentation fault when contracting a tensor network with unit-length bonds #201

@YauheniTalochkaN

Description

@YauheniTalochkaN

Dear developers,

Could you please help me solve the following problem?
I initialized a state vector in the PEPS form on a hexagonal lattice as a set of tensors with two/three auxiliary indices and one physical index. At the beginning, the length of these auxiliary indices was set to 1 (unentangled state) and increases during the calculation process. At each step, I calculate the norm of the state vector as contraction of the PEPS with its conjugate tensor network using cutensornetNetworkContract method. It works well when the length of the auxiliary indices exceeds 1. But I got "Segmentation fault (core dumped)" from
HANDLE_CUTN_ERROR(cutensornetContractionOptimize(local_handle, descNet, optimizerConfig, workspaceLimit, optimizerInfo)) at the beginning when this length is 1. I have double checked all input modes, extents, and tensor data, so it looks like there's a problem with this cutensornetContractionOptimize method. Please find the attached code of this part below for more details.

void TensorNetwork::ContractTensors(const std::vector<int32_t>& numModesIn,
                                    const std::vector<const int64_t*>& extentsIn,
                                    const std::vector<const int32_t*>& modesIn,
                                    const std::vector<cutensornetTensorQualifiers_t>& qualifiersIn,
                                    const int32_t numModesOut,
                                    const int32_t* modesOut,
                                    const std::vector<const void*>& tensorsIn,
                                    void* tensorOut,
                                    size_t thread_num,
                                    const int32_t numAutotuningIterations,
                                    const int32_t numHyperSamples,
                                    const int32_t disableSlicing)
    {
        cutensornetHandle_t local_handle = handle_.at(thread_num);

        size_t numInputTensors = numModesIn.size();
        
        cutensornetNetworkDescriptor_t descNet;
        HANDLE_CUTN_ERROR(cutensornetCreateNetwork(local_handle, &descNet));

        std::vector<int64_t> tensorIDs(numInputTensors);

        for(size_t t = 0; t < numInputTensors; ++t)
        {
            HANDLE_CUTN_ERROR(cutensornetNetworkAppendTensor(local_handle,
                                                             descNet,
                                                             numModesIn[t],
                                                             extentsIn[t],
                                                             modesIn[t],
                                                             &qualifiersIn[t],
                                                             typeData_,
                                                             &tensorIDs[t]));
        }

        HANDLE_CUTN_ERROR(cutensornetNetworkSetOutputTensor(local_handle,
                                                            descNet,
                                                            numModesOut,
                                                            modesOut,
                                                            typeData_));


        HANDLE_CUTN_ERROR(cutensornetNetworkSetAttribute(local_handle,
                                                         descNet,
                                                         CUTENSORNET_NETWORK_COMPUTE_TYPE,
                                                         &typeCompute_,
                                                         sizeof(typeCompute_)));

        cutensornetContractionOptimizerConfig_t optimizerConfig;
        HANDLE_CUTN_ERROR(cutensornetCreateContractionOptimizerConfig(local_handle, &optimizerConfig));

        HANDLE_CUTN_ERROR(cutensornetContractionOptimizerConfigSetAttribute(local_handle,
                                                                            optimizerConfig,
                                                                            CUTENSORNET_CONTRACTION_OPTIMIZER_CONFIG_HYPER_NUM_SAMPLES,
                                                                            &numHyperSamples,
                                                                            sizeof(numHyperSamples)));

        if(disableSlicing == 1)
        {
            HANDLE_CUTN_ERROR(cutensornetContractionOptimizerConfigSetAttribute(local_handle, 
                                                                                optimizerConfig,
                                                                                CUTENSORNET_CONTRACTION_OPTIMIZER_CONFIG_SLICER_DISABLE_SLICING,
                                                                                &disableSlicing, 
                                                                                sizeof(disableSlicing)));
        }

        cutensornetContractionOptimizerInfo_t optimizerInfo;
        HANDLE_CUTN_ERROR(cutensornetCreateContractionOptimizerInfo(local_handle, descNet, &optimizerInfo));

        size_t freeMem, totalMem;
        HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem));
        uint64_t workspaceLimit = static_cast<uint64_t>(static_cast<double>(freeMem) * 0.8 / static_cast<double>(numStreams_));
        
        HANDLE_CUTN_ERROR(cutensornetContractionOptimize(local_handle, 
                                                         descNet, 
                                                         optimizerConfig, 
                                                         workspaceLimit, 
                                                         optimizerInfo));

        cutensornetWorkspaceDescriptor_t workDesc;
        HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(local_handle, &workDesc));
        
        HANDLE_CUTN_ERROR(cutensornetWorkspaceComputeContractionSizes(local_handle, 
                                                                      descNet, 
                                                                      optimizerInfo, 
                                                                      workDesc));
        
        int64_t worksize_scratch = 0, worksize_cache = 0;
        
        HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(local_handle, 
                                                            workDesc,
                                                            CUTENSORNET_WORKSIZE_PREF_MIN, 
                                                            CUTENSORNET_MEMSPACE_DEVICE, 
                                                            CUTENSORNET_WORKSPACE_SCRATCH, 
                                                            &worksize_scratch));
        
        HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(local_handle, 
                                                            workDesc,
                                                            CUTENSORNET_WORKSIZE_PREF_MIN, 
                                                            CUTENSORNET_MEMSPACE_DEVICE,
                                                            CUTENSORNET_WORKSPACE_CACHE, 
                                                            &worksize_cache));

        if(worksize_scratch + worksize_cache > workspaceLimit)
        {
            throw std::runtime_error("TensorNetwork::ContractTensors: "
                                     "The required size of scratch and cache exceeds the current workspace limit.");
        }

        void *scratch_ptr = nullptr, *cache_ptr = nullptr;

        if (worksize_scratch > 0) 
        {
            HANDLE_CUDA_ERROR(cudaMalloc(&scratch_ptr, worksize_scratch));

            HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(local_handle, 
                                                            workDesc,
                                                            CUTENSORNET_MEMSPACE_DEVICE, 
                                                            CUTENSORNET_WORKSPACE_SCRATCH, 
                                                            scratch_ptr, 
                                                            worksize_scratch));
        }

        if (worksize_cache > 0) 
        {
            HANDLE_CUDA_ERROR(cudaMalloc(&cache_ptr, worksize_cache));

            HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(local_handle, 
                                                            workDesc,
                                                            CUTENSORNET_MEMSPACE_DEVICE, 
                                                            CUTENSORNET_WORKSPACE_CACHE,
                                                            cache_ptr, 
                                                            worksize_cache));
        }
        
        HANDLE_CUTN_ERROR(cutensornetNetworkPrepareContraction(local_handle,
                                                               descNet,
                                                               workDesc));

        for (size_t t = 0; t < numInputTensors; ++t)
        {
            HANDLE_CUTN_ERROR(cutensornetNetworkSetInputTensorMemory(local_handle,
                                                                     descNet,
                                                                     tensorIDs[t],
                                                                     tensorsIn[t],
                                                                     nullptr));
        }

        HANDLE_CUTN_ERROR(cutensornetNetworkSetOutputTensorMemory(local_handle,
                                                                  descNet,
                                                                  tensorOut,
                                                                  nullptr));
        
        cutensornetNetworkAutotunePreference_t autotunePref;
        HANDLE_CUTN_ERROR(cutensornetCreateNetworkAutotunePreference(local_handle, &autotunePref));

        HANDLE_CUTN_ERROR(cutensornetNetworkAutotunePreferenceSetAttribute(local_handle,
                                                                           autotunePref,
                                                                           CUTENSORNET_NETWORK_AUTOTUNE_MAX_ITERATIONS,
                                                                           &numAutotuningIterations,
                                                                           sizeof(numAutotuningIterations)));

        HANDLE_CUTN_ERROR(cutensornetNetworkAutotuneContraction(local_handle,
                                                                descNet,
                                                                workDesc,
                                                                autotunePref,
                                                                streams_.at(thread_num)));

        cutensornetSliceGroup_t sliceGroup = nullptr;

        if(disableSlicing != 1)
        {
            int64_t numSlices = 0;
            
            HANDLE_CUTN_ERROR(cutensornetContractionOptimizerInfoGetAttribute(local_handle,
                                                                              optimizerInfo,
                                                                              CUTENSORNET_CONTRACTION_OPTIMIZER_INFO_NUM_SLICES,
                                                                              &numSlices,
                                                                              sizeof(numSlices)));
  
            HANDLE_CUTN_ERROR(cutensornetCreateSliceGroupFromIDRange(local_handle, 0, numSlices, 1, &sliceGroup));
        }
        
        HANDLE_CUTN_ERROR(cutensornetNetworkContract(local_handle,
                                                     descNet,
                                                     0,
                                                     workDesc,
                                                     sliceGroup,
                                                     streams_.at(thread_num)));

        SynchronizeStreams(std::vector<size_t>{thread_num});
        
        HANDLE_CUTN_ERROR(cutensornetDestroySliceGroup(sliceGroup));
        HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
        HANDLE_CUTN_ERROR(cutensornetDestroyContractionOptimizerInfo(optimizerInfo));
        HANDLE_CUTN_ERROR(cutensornetDestroyContractionOptimizerConfig(optimizerConfig));
        HANDLE_CUTN_ERROR(cutensornetDestroyNetworkAutotunePreference(autotunePref));
        HANDLE_CUTN_ERROR(cutensornetDestroyNetwork(descNet));
        HANDLE_CUDA_ERROR(cudaFree(scratch_ptr));
        HANDLE_CUDA_ERROR(cudaFree(cache_ptr));
    }

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions