Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
8744ba7
add align for WorkQueue
liutiexing Sep 22, 2021
4759bc8
Merge branch 'develop' of https://github.com/liutiexing/Paddle into d…
liutiexing Sep 22, 2021
6f00ace
add spinlock
liutiexing Sep 23, 2021
2d6f1cf
merge develop
liutiexing Sep 23, 2021
f5099be
Merge branch 'develop' of https://github.com/liutiexing/Paddle into d…
liutiexing Sep 26, 2021
54aa332
merge develop
liutiexing Oct 12, 2021
1d1bd82
merge
liutiexing Oct 12, 2021
dfbf3e4
Merge remote-tracking branch 'upstream/develop' into develop
liutiexing Oct 12, 2021
a5392b3
Merge remote-tracking branch 'upstream/develop' into develop
liutiexing Oct 14, 2021
e206173
Add EventsWaiter
liutiexing Oct 15, 2021
0a3dcd9
Revert "Add EventsWaiter"
liutiexing Oct 15, 2021
4689bb5
Merge remote-tracking branch 'upstream/develop' into develop
liutiexing Oct 15, 2021
0cec99a
Merge remote-tracking branch 'upstream/develop' into develop
liutiexing Oct 20, 2021
481c4fa
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Oct 27, 2021
83db84e
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Oct 29, 2021
7010e0d
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Nov 16, 2021
ec2a363
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Nov 23, 2021
90a59ec
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Nov 26, 2021
1445bbe
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Nov 29, 2021
a2c74ab
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 1, 2021
1c09b4e
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 2, 2021
cb8cf7d
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 8, 2021
cf0dcd6
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 8, 2021
2f95801
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 14, 2021
14bec1b
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 15, 2021
8a5f7af
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 16, 2021
f0a5915
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 20, 2021
0fe35aa
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 21, 2021
f65eef2
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 23, 2021
b37e42d
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 28, 2021
cf5e240
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 29, 2021
b31869a
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Dec 30, 2021
fab2911
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 4, 2022
16b0903
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 6, 2022
074fea5
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 7, 2022
8f4a51c
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 8, 2022
09036ff
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 10, 2022
0e6a94f
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 13, 2022
d2293fd
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 14, 2022
b529801
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 17, 2022
ff55840
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 17, 2022
52684e7
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 21, 2022
e806789
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 22, 2022
e59a3f8
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 24, 2022
8fa5e17
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 25, 2022
5c8ffbd
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 25, 2022
e5586e9
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 26, 2022
e5731a4
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Jan 28, 2022
67cd2a6
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Feb 10, 2022
b624967
add log for Executor
Feb 10, 2022
df6298b
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Feb 11, 2022
1f1a720
Merge branch 'develop' of https://github.com/liutiexing/Paddle into d…
Feb 11, 2022
7edaab6
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Feb 11, 2022
f92599f
Merge branch 'develop' of https://github.com/liutiexing/Paddle into d…
Feb 11, 2022
bdec640
Merge branch 'PaddlePaddle:develop' into develop
liutiexing Feb 11, 2022
e14255f
Merge branch 'develop' of https://github.com/liutiexing/Paddle into d…
Feb 11, 2022
330682e
Add CudaTracer to trace CUDA events
Feb 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion paddle/fluid/platform/profiler/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
cc_library(host_tracer SRCS host_tracer.cc DEPS enforce)
cc_library(new_profiler SRCS profiler.cc DEPS host_tracer)
cc_library(cuda_tracer SRCS cuda_tracer.cc cupti_data_process.cc DEPS workqueue_utils enforce glog)
cc_library(new_profiler SRCS profiler.cc DEPS host_tracer cuda_tracer)
cc_library(event_node SRCS event_node.cc DEPS enforce)
cc_library(chrometracinglogger SRCS chrometracing_logger.cc DEPS event_node)
cc_test(test_event_node SRCS test_event_node.cc DEPS event_node chrometracinglogger)
Expand Down
191 changes: 191 additions & 0 deletions paddle/fluid/platform/profiler/cuda_tracer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/platform/profiler/cuda_tracer.h"
#include <string>
#include <unordered_map>
#include "glog/logging.h"
#include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h"
#include "paddle/fluid/platform/os_info.h"
#include "paddle/fluid/platform/profiler/cupti_data_process.h"

#define CUPTI_CALL(call) \
do { \
CUptiResult _status = call; \
if (_status != CUPTI_SUCCESS) { \
const char* errstr; \
dynload::cuptiGetResultString(_status, &errstr); \
LOG(ERROR) << "Function " << #call << " failed with error " << errstr; \
exit(-1); \
} \
} while (0)

namespace paddle {
namespace platform {

namespace details {
std::unordered_map<uint32_t, uint64_t> CreateThreadIdMapping() {
std::unordered_map<uint32_t, uint64_t> mapping;
std::unordered_map<uint64_t, ThreadId> ids = GetAllThreadIds();
for (const auto& id : ids) {
mapping[id.second.cupti_tid] = id.second.sys_tid;
}
return mapping;
}
} // namespace details

CudaTracer::CudaTracer() {}

void CudaTracer::PrepareTracing() {
PADDLE_ENFORCE_EQ(
state_ == TracerState::UNINITED || state_ == TracerState::STOPED, true,
platform::errors::PreconditionNotMet("Tracer must be UNINITED"));
EnableCuptiActivity();
state_ = TracerState::READY;
}

void CudaTracer::StartTracing() {
PADDLE_ENFORCE_EQ(
state_ == TracerState::READY, true,
platform::errors::PreconditionNotMet("Tracer must be READY or STOPPED"));
ConsumeBuffers();
tracing_start_ns_ = PosixInNsec();
state_ = TracerState::STARTED;
}

void CudaTracer::StopTracing() {
PADDLE_ENFORCE_EQ(
state_, TracerState::STARTED,
platform::errors::PreconditionNotMet("Tracer must be STARTED"));
DisableCuptiActivity();
state_ = TracerState::STOPED;
}

void CudaTracer::CollectTraceData(TraceEventCollector* collector) {
PADDLE_ENFORCE_EQ(
state_, TracerState::STOPED,
platform::errors::PreconditionNotMet("Tracer must be STOPED"));
ProcessCuptiActivity(collector);
}

int CudaTracer::ProcessCuptiActivity(TraceEventCollector* collector) {
int record_cnt = 0;
#ifdef PADDLE_WITH_CUPTI
CUPTI_CALL(dynload::cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_FLUSH_FORCED));
auto mapping = details::CreateThreadIdMapping();
std::vector<ActivityBuffer> buffers = ConsumeBuffers();
for (auto& buffer : buffers) {
if (buffer.addr == nullptr || buffer.valid_size == 0) {
continue;
}

CUpti_Activity* record = nullptr;
while (true) {
CUptiResult status = dynload::cuptiActivityGetNextRecord(
buffer.addr, buffer.valid_size, &record);
if (status == CUPTI_SUCCESS) {
details::ProcessCuptiActivityRecord(record, tracing_start_ns_, mapping,
collector);
++record_cnt;
} else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) {
break;
} else {
CUPTI_CALL(status);
}
}

ReleaseBuffer(buffer.addr);
}
#endif
return record_cnt;
}

void CudaTracer::EnableCuptiActivity() {
#ifdef PADDLE_WITH_CUPTI
CUPTI_CALL(dynload::cuptiActivityRegisterCallbacks(BufferRequestedCallback,
BufferCompletedCallback));

CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY));
CUPTI_CALL(
dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL));
CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER));
CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME));
CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET));
VLOG(3) << "enable cupti activity";
#endif
}

void CudaTracer::DisableCuptiActivity() {
#ifdef PADDLE_WITH_CUPTI
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMCPY));
CUPTI_CALL(
dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_DRIVER));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_RUNTIME));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMSET));
VLOG(3) << "disable cupti activity";
#endif
}

#ifdef PADDLE_WITH_CUPTI
void CUPTIAPI CudaTracer::BufferRequestedCallback(uint8_t** buffer,
size_t* size,
size_t* max_num_records) {
GetInstance().AllocateBuffer(buffer, size);
*max_num_records = 0;
}

void CUPTIAPI CudaTracer::BufferCompletedCallback(CUcontext ctx,
uint32_t stream_id,
uint8_t* buffer, size_t size,
size_t valid_size) {
GetInstance().ProduceBuffer(buffer, valid_size);
size_t dropped = 0;
CUPTI_CALL(
dynload::cuptiActivityGetNumDroppedRecords(ctx, stream_id, &dropped));
if (dropped != 0) {
LOG(WARNING) << "Stream " << stream_id << " Dropped " << dropped
<< " activity records";
}
}
#endif

void CudaTracer::AllocateBuffer(uint8_t** buffer, size_t* size) {
constexpr size_t kBufSize = 1 << 23; // 8 MB
constexpr size_t kBufAlign = 8; // 8 B
*buffer = reinterpret_cast<uint8_t*>(
paddle::framework::AlignedMalloc(kBufSize, kBufAlign));
*size = kBufSize;
}

void CudaTracer::ProduceBuffer(uint8_t* buffer, size_t valid_size) {
std::lock_guard<std::mutex> guard(activity_buffer_lock_);
activity_buffers_.emplace_back(buffer, valid_size);
}

std::vector<CudaTracer::ActivityBuffer> CudaTracer::ConsumeBuffers() {
std::vector<ActivityBuffer> buffers;
{
std::lock_guard<std::mutex> guard(activity_buffer_lock_);
buffers.swap(activity_buffers_);
}
return buffers;
}

void CudaTracer::ReleaseBuffer(uint8_t* buffer) {
paddle::framework::AlignedFree(buffer);
}

} // namespace platform
} // namespace paddle
87 changes: 87 additions & 0 deletions paddle/fluid/platform/profiler/cuda_tracer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <mutex>
#include <vector>
#include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/profiler/tracer_base.h"

namespace paddle {
namespace platform {

// Based on CUDA CUPTI
class CudaTracer : public TracerBase {
public:
// Singleton. CUPTI imposes this restriction.
static CudaTracer& GetInstance() {
static CudaTracer instance;
return instance;
}

void PrepareTracing() override;

void StartTracing() override;

void StopTracing() override;

void CollectTraceData(TraceEventCollector* collector) override;

private:
struct ActivityBuffer {
ActivityBuffer(uint8_t* addr, size_t size) : addr(addr), valid_size(size) {}
uint8_t* addr;
size_t valid_size;
};

CudaTracer();

DISABLE_COPY_AND_ASSIGN(CudaTracer);

void EnableCuptiActivity();

void DisableCuptiActivity();

int ProcessCuptiActivity(TraceEventCollector* collector);

#ifdef PADDLE_WITH_CUPTI
// Used by CUPTI Activity API to request buffer
static void CUPTIAPI BufferRequestedCallback(uint8_t** buffer, size_t* size,
size_t* max_num_records);

// Used by CUPTI Activity API to commit a completed buffer
static void CUPTIAPI BufferCompletedCallback(CUcontext ctx,
uint32_t stream_id,
uint8_t* buffer, size_t size,
size_t valid_size);
#endif

void AllocateBuffer(uint8_t** buffer, size_t* size);

void ProduceBuffer(uint8_t* buffer, size_t valid_size);

std::vector<ActivityBuffer> ConsumeBuffers();

void ReleaseBuffer(uint8_t* buffer);

uint64_t tracing_start_ns_ = UINT64_MAX;
std::mutex activity_buffer_lock_;
std::vector<ActivityBuffer> activity_buffers_;
};

} // namespace platform
} // namespace paddle
Loading