Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion cpp/src/arrow/util/io_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,18 @@
#define __EXTENSIONS__
#endif

// For memset_s
#define __STDC_WANT_LIB_EXT1__ 1

#include "arrow/util/windows_compatibility.h" // IWYU pragma: keep

#include <algorithm>
#include <array>
#include <cerrno>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <mutex>
#include <random>
Expand All @@ -52,6 +55,7 @@
#include <fcntl.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h> // IWYU pragma: keep

Expand Down Expand Up @@ -1867,5 +1871,34 @@ uint64_t GetOptionalThreadId() {
return (tid == 0) ? tid - 1 : tid;
}

void SecureZero(uint8_t* data, int64_t size) {
// Heavily borrowed from libb2's `secure_zero_memory` at
// https://github.com/BLAKE2/libb2/blob/master/src/blake2-impl.h
const auto n = static_cast<size_t>(size);
#if defined(_WIN32)
SecureZeroMemory(data, n);
#elif defined(__STDC_LIB_EXT1__)
// Prioritize first the general C11 call
memset_s(data, n, 0, n);
#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25))
// glibc 2.25+ has explicit_bzero
explicit_bzero(data, n);
#else
// Try to ensure that a true library call to memset() will be generated
// by the compiler.
static const volatile auto memset_v = &memset;
memset_v(data, 0, n);
__asm__ __volatile__("" ::"r"(data) : "memory");
#endif
}

void SecureZero(std::string* data) {
if (data->length() > 0) {
SecureZero(reinterpret_cast<uint8_t*>(&(*data)[0]),
static_cast<int64_t>(data->length()));
data->clear();
}
}

} // namespace internal
} // namespace arrow
5 changes: 5 additions & 0 deletions cpp/src/arrow/util/io_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,5 +346,10 @@ int64_t GetRandomSeed();
ARROW_EXPORT
uint64_t GetThreadId();

ARROW_EXPORT
void SecureZero(uint8_t* data, int64_t size);
ARROW_EXPORT
void SecureZero(std::string* data);

} // namespace internal
} // namespace arrow
38 changes: 38 additions & 0 deletions cpp/src/arrow/util/io_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "arrow/util/bit_util.h"
#include "arrow/util/io_util.h"
#include "arrow/util/logging.h"
#include "arrow/util/string_view.h"
#include "arrow/util/windows_compatibility.h"
#include "arrow/util/windows_fixup.h"

Expand Down Expand Up @@ -719,5 +720,42 @@ TEST(SendSignal, ToThread) {
#endif
}

class TestSecureZero : public ::testing::Test {
public:
void CheckSecureZero() {
const std::string copy = data_;
const auto old_ptr = data_.c_str();
const auto old_size = data_.length();
SecureZero(&data_);
// Allocate new area without initializing it, to minimize the risk of
// dereferencing an invalid address.
std::string new_string;
new_string.reserve(old_size);
// The old data should not be there anymore
for (auto c : util::string_view(old_ptr, old_size)) {
ASSERT_EQ(c, 0);
}
}

protected:
std::string data_;
};

TEST_F(TestSecureZero, SmallString) {
// A small string may have its storage inside the string object itself
data_ = "123";
CheckSecureZero();
}

TEST_F(TestSecureZero, LargeString) {
data_.assign(200, 'x');
CheckSecureZero();
}

TEST_F(TestSecureZero, EmptyString) {
// Shouldn't crash or misbehave
SecureZero(&data_);
}

} // namespace internal
} // namespace arrow