-
Notifications
You must be signed in to change notification settings - Fork 91
Description
Dear developers,
Could you please help me solve the following problem?
I initialized a state vector in the PEPS form on a hexagonal lattice as a set of tensors with two/three auxiliary indices and one physical index. At the beginning, the length of these auxiliary indices was set to 1 (unentangled state) and increases during the calculation process. At each step, I calculate the norm of the state vector as contraction of the PEPS with its conjugate tensor network using cutensornetNetworkContract method. It works well when the length of the auxiliary indices exceeds 1. But I got "Segmentation fault (core dumped)" from
HANDLE_CUTN_ERROR(cutensornetContractionOptimize(local_handle, descNet, optimizerConfig, workspaceLimit, optimizerInfo)) at the beginning when this length is 1. I have double checked all input modes, extents, and tensor data, so it looks like there's a problem with this cutensornetContractionOptimize method. Please find the attached code of this part below for more details.
void TensorNetwork::ContractTensors(const std::vector<int32_t>& numModesIn,
const std::vector<const int64_t*>& extentsIn,
const std::vector<const int32_t*>& modesIn,
const std::vector<cutensornetTensorQualifiers_t>& qualifiersIn,
const int32_t numModesOut,
const int32_t* modesOut,
const std::vector<const void*>& tensorsIn,
void* tensorOut,
size_t thread_num,
const int32_t numAutotuningIterations,
const int32_t numHyperSamples,
const int32_t disableSlicing)
{
cutensornetHandle_t local_handle = handle_.at(thread_num);
size_t numInputTensors = numModesIn.size();
cutensornetNetworkDescriptor_t descNet;
HANDLE_CUTN_ERROR(cutensornetCreateNetwork(local_handle, &descNet));
std::vector<int64_t> tensorIDs(numInputTensors);
for(size_t t = 0; t < numInputTensors; ++t)
{
HANDLE_CUTN_ERROR(cutensornetNetworkAppendTensor(local_handle,
descNet,
numModesIn[t],
extentsIn[t],
modesIn[t],
&qualifiersIn[t],
typeData_,
&tensorIDs[t]));
}
HANDLE_CUTN_ERROR(cutensornetNetworkSetOutputTensor(local_handle,
descNet,
numModesOut,
modesOut,
typeData_));
HANDLE_CUTN_ERROR(cutensornetNetworkSetAttribute(local_handle,
descNet,
CUTENSORNET_NETWORK_COMPUTE_TYPE,
&typeCompute_,
sizeof(typeCompute_)));
cutensornetContractionOptimizerConfig_t optimizerConfig;
HANDLE_CUTN_ERROR(cutensornetCreateContractionOptimizerConfig(local_handle, &optimizerConfig));
HANDLE_CUTN_ERROR(cutensornetContractionOptimizerConfigSetAttribute(local_handle,
optimizerConfig,
CUTENSORNET_CONTRACTION_OPTIMIZER_CONFIG_HYPER_NUM_SAMPLES,
&numHyperSamples,
sizeof(numHyperSamples)));
if(disableSlicing == 1)
{
HANDLE_CUTN_ERROR(cutensornetContractionOptimizerConfigSetAttribute(local_handle,
optimizerConfig,
CUTENSORNET_CONTRACTION_OPTIMIZER_CONFIG_SLICER_DISABLE_SLICING,
&disableSlicing,
sizeof(disableSlicing)));
}
cutensornetContractionOptimizerInfo_t optimizerInfo;
HANDLE_CUTN_ERROR(cutensornetCreateContractionOptimizerInfo(local_handle, descNet, &optimizerInfo));
size_t freeMem, totalMem;
HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem));
uint64_t workspaceLimit = static_cast<uint64_t>(static_cast<double>(freeMem) * 0.8 / static_cast<double>(numStreams_));
HANDLE_CUTN_ERROR(cutensornetContractionOptimize(local_handle,
descNet,
optimizerConfig,
workspaceLimit,
optimizerInfo));
cutensornetWorkspaceDescriptor_t workDesc;
HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(local_handle, &workDesc));
HANDLE_CUTN_ERROR(cutensornetWorkspaceComputeContractionSizes(local_handle,
descNet,
optimizerInfo,
workDesc));
int64_t worksize_scratch = 0, worksize_cache = 0;
HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(local_handle,
workDesc,
CUTENSORNET_WORKSIZE_PREF_MIN,
CUTENSORNET_MEMSPACE_DEVICE,
CUTENSORNET_WORKSPACE_SCRATCH,
&worksize_scratch));
HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(local_handle,
workDesc,
CUTENSORNET_WORKSIZE_PREF_MIN,
CUTENSORNET_MEMSPACE_DEVICE,
CUTENSORNET_WORKSPACE_CACHE,
&worksize_cache));
if(worksize_scratch + worksize_cache > workspaceLimit)
{
throw std::runtime_error("TensorNetwork::ContractTensors: "
"The required size of scratch and cache exceeds the current workspace limit.");
}
void *scratch_ptr = nullptr, *cache_ptr = nullptr;
if (worksize_scratch > 0)
{
HANDLE_CUDA_ERROR(cudaMalloc(&scratch_ptr, worksize_scratch));
HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(local_handle,
workDesc,
CUTENSORNET_MEMSPACE_DEVICE,
CUTENSORNET_WORKSPACE_SCRATCH,
scratch_ptr,
worksize_scratch));
}
if (worksize_cache > 0)
{
HANDLE_CUDA_ERROR(cudaMalloc(&cache_ptr, worksize_cache));
HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(local_handle,
workDesc,
CUTENSORNET_MEMSPACE_DEVICE,
CUTENSORNET_WORKSPACE_CACHE,
cache_ptr,
worksize_cache));
}
HANDLE_CUTN_ERROR(cutensornetNetworkPrepareContraction(local_handle,
descNet,
workDesc));
for (size_t t = 0; t < numInputTensors; ++t)
{
HANDLE_CUTN_ERROR(cutensornetNetworkSetInputTensorMemory(local_handle,
descNet,
tensorIDs[t],
tensorsIn[t],
nullptr));
}
HANDLE_CUTN_ERROR(cutensornetNetworkSetOutputTensorMemory(local_handle,
descNet,
tensorOut,
nullptr));
cutensornetNetworkAutotunePreference_t autotunePref;
HANDLE_CUTN_ERROR(cutensornetCreateNetworkAutotunePreference(local_handle, &autotunePref));
HANDLE_CUTN_ERROR(cutensornetNetworkAutotunePreferenceSetAttribute(local_handle,
autotunePref,
CUTENSORNET_NETWORK_AUTOTUNE_MAX_ITERATIONS,
&numAutotuningIterations,
sizeof(numAutotuningIterations)));
HANDLE_CUTN_ERROR(cutensornetNetworkAutotuneContraction(local_handle,
descNet,
workDesc,
autotunePref,
streams_.at(thread_num)));
cutensornetSliceGroup_t sliceGroup = nullptr;
if(disableSlicing != 1)
{
int64_t numSlices = 0;
HANDLE_CUTN_ERROR(cutensornetContractionOptimizerInfoGetAttribute(local_handle,
optimizerInfo,
CUTENSORNET_CONTRACTION_OPTIMIZER_INFO_NUM_SLICES,
&numSlices,
sizeof(numSlices)));
HANDLE_CUTN_ERROR(cutensornetCreateSliceGroupFromIDRange(local_handle, 0, numSlices, 1, &sliceGroup));
}
HANDLE_CUTN_ERROR(cutensornetNetworkContract(local_handle,
descNet,
0,
workDesc,
sliceGroup,
streams_.at(thread_num)));
SynchronizeStreams(std::vector<size_t>{thread_num});
HANDLE_CUTN_ERROR(cutensornetDestroySliceGroup(sliceGroup));
HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
HANDLE_CUTN_ERROR(cutensornetDestroyContractionOptimizerInfo(optimizerInfo));
HANDLE_CUTN_ERROR(cutensornetDestroyContractionOptimizerConfig(optimizerConfig));
HANDLE_CUTN_ERROR(cutensornetDestroyNetworkAutotunePreference(autotunePref));
HANDLE_CUTN_ERROR(cutensornetDestroyNetwork(descNet));
HANDLE_CUDA_ERROR(cudaFree(scratch_ptr));
HANDLE_CUDA_ERROR(cudaFree(cache_ptr));
}