Skip to content

Commit 85ef58c

Browse files
committed
libexpr: Implement small string optimization for Value
This attempts to store small strings inline in the Value struct. This is possible to do for 64 bit systems that are little endian or on other systems where pointer tagging optimization is not used. Another reason for this change is to start storing the string length explicitly (at least for the small string case for now).
1 parent 49e9c14 commit 85ef58c

File tree

3 files changed

+136
-5
lines changed

3 files changed

+136
-5
lines changed

src/libexpr/eval.cc

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,12 @@ DebugTraceStacker::DebugTraceStacker(EvalState & evalState, DebugTrace t)
824824

825825
void Value::mkString(std::string_view s)
826826
{
827+
if constexpr (ValueStorage::maxSmallStringSize > 0) {
828+
if (s.size() <= ValueStorage::maxSmallStringSize) {
829+
ValueStorage::setSmallString(s);
830+
return;
831+
}
832+
}
827833
mkStringNoCopy(makeImmutableString(s));
828834
}
829835

@@ -843,7 +849,12 @@ static const char ** encodeContext(const NixStringContext & context)
843849

844850
void Value::mkString(std::string_view s, const NixStringContext & context)
845851
{
846-
mkStringNoCopy(makeImmutableString(s), encodeContext(context));
852+
auto encodedContext = encodeContext(context);
853+
if (encodedContext == nullptr) {
854+
mkString(s);
855+
return;
856+
}
857+
mkStringNoCopy(makeImmutableString(s), encodedContext);
847858
}
848859

849860
void Value::mkStringMove(const char * s, const NixStringContext & context)
@@ -1958,8 +1969,7 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v)
19581969
/* c_str() is not str().c_str() because we want to create a string
19591970
Value. allocating a GC'd string directly and moving it into a
19601971
Value lets us avoid an allocation and copy. */
1961-
const auto c_str = [&] {
1962-
char * result = allocString(sSize + 1);
1972+
const auto c_str = [&](char * result) {
19631973
char * tmp = result;
19641974
for (const auto & part : s) {
19651975
memcpy(tmp, part->data(), part->size());
@@ -2041,8 +2051,23 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v)
20412051
.withFrame(env, *this)
20422052
.debugThrow();
20432053
v.mkPath(state.rootPath(CanonPath(str())));
2044-
} else
2045-
v.mkStringMove(c_str(), context);
2054+
} else {
2055+
if (sSize == 0) {
2056+
v.mkStringMove("", context);
2057+
return;
2058+
}
2059+
2060+
if (sSize <= Value::maxSmallStringSize) {
2061+
/* +1 is required for the NUL terminator. */
2062+
std::array<char, Value::maxSmallStringSize + 1> result;
2063+
v.mkString(c_str(result.data()), context);
2064+
return;
2065+
}
2066+
2067+
char * result = allocString(sSize + 1);
2068+
v.mkStringMove(c_str(result), context);
2069+
result[sSize] = 0;
2070+
}
20462071
}
20472072

20482073
void ExprPos::eval(EvalState & state, Env & env, Value & v)

src/libexpr/include/nix/expr/symbol-table.hh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ public:
123123
const auto & [v, idx] = key.store.add(SymbolValue{});
124124
if (size == 0) {
125125
v.mkStringNoCopy("", nullptr);
126+
} else if (size <= Value::maxSmallStringSize) {
127+
v.mkString(key.s);
126128
} else {
127129
auto s = key.alloc.allocate(size + 1);
128130
memcpy(s, key.s.data(), size);

src/libexpr/include/nix/expr/value.hh

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <span>
66
#include <type_traits>
77
#include <concepts>
8+
#include <bit>
89

910
#include "nix/expr/eval-gc.hh"
1011
#include "nix/expr/value/context.hh"
@@ -47,6 +48,7 @@ typedef enum {
4748
/* layout: Single untaggable field */
4849
tListN,
4950
tString,
51+
tSmallString,
5052
tPath,
5153
} InternalType;
5254

@@ -323,14 +325,23 @@ inline constexpr InternalType payloadTypeToInternalType = PayloadTypeToInternalT
323325
template<std::size_t ptrSize, typename Enable = void>
324326
class ValueStorage : public detail::ValueBase
325327
{
328+
static constexpr std::size_t smallStringStorageSize = std::max({
329+
#define NIX_VALUE_STORAGE_FIELD_SIZE(T, FIELD_NAME, DISCRIMINATOR) sizeof(T),
330+
NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_FIELD_SIZE)
331+
#undef NIX_VALUE_STORAGE_FIELD_SIZE
332+
});
333+
326334
protected:
327335
using Payload = union
328336
{
329337
#define NIX_VALUE_STORAGE_DEFINE_FIELD(T, FIELD_NAME, DISCRIMINATOR) T FIELD_NAME;
330338
NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_DEFINE_FIELD)
331339
#undef NIX_VALUE_STORAGE_DEFINE_FIELD
340+
std::array<char, smallStringStorageSize> smallString;
332341
};
333342

343+
static constexpr std::size_t maxSmallStringSize = smallStringStorageSize - 1;
344+
334345
private:
335346
InternalType internalType = tUninitialized;
336347
Payload payload;
@@ -357,6 +368,30 @@ protected:
357368
#undef NIX_VALUE_STORAGE_GET_IMPL
358369
#undef NIX_VALUE_STORAGE_FOR_EACH_FIELD
359370

371+
void setSmallString(std::string_view s)
372+
{
373+
assert(s.size() <= maxSmallStringSize);
374+
internalType = tSmallString;
375+
payload.smallString = {};
376+
/* Trick is the same as in Facebook's Folly string. Use the last byte
377+
of the string to store the remaining capacity. This was it naturally
378+
becomes the null terminator when string has the size (smallStringStorageSize - 1). */
379+
payload.smallString.back() = maxSmallStringSize - s.size();
380+
std::memcpy(payload.smallString.data(), s.data(), s.size());
381+
}
382+
383+
std::size_t getSmallStringSize() const
384+
{
385+
std::size_t remainingCapacity = payload.smallString.back();
386+
return maxSmallStringSize - remainingCapacity;
387+
}
388+
389+
const char * getSmallStringData() const
390+
{
391+
/* This string is null terminated. See setSmallString. */
392+
return payload.smallString.data();
393+
}
394+
360395
/** Get internal type currently occupying the storage. */
361396
InternalType getInternalType() const noexcept
362397
{
@@ -434,6 +469,7 @@ class ValueStorage<ptrSize, std::enable_if_t<detail::useBitPackedValueStorage<pt
434469
/* The order of these enumations must be the same as in InternalType. */
435470
pdListN, //< layout: Single untaggable field.
436471
pdString,
472+
pdSmallString,
437473
pdPath,
438474
pdPairOfPointers, //< layout: Pair of pointers payload
439475
};
@@ -513,6 +549,7 @@ protected:
513549
/* The order must match that of the enumerations defined in InternalType. */
514550
case pdListN:
515551
case pdString:
552+
case pdSmallString:
516553
case pdPath:
517554
return static_cast<InternalType>(tListN + (pd - pdListN));
518555
case pdPairOfPointers:
@@ -643,6 +680,56 @@ protected:
643680
{
644681
setUntaggablePayload<pdPath>(path.accessor, path.path);
645682
}
683+
684+
/**
685+
* Pointer tagging doesn't play well with big endian systems (because the tag will be in the middle
686+
* of the array), so we don't do this optimization on big endian systems.
687+
*
688+
* 14 = 8 + 8 - 1 (the type tag) - 1 (string size + null terminator)
689+
*/
690+
static constexpr std::size_t maxSmallStringSize = std::endian::native == std::endian::little ? 14 : 0;
691+
692+
void setSmallString(std::string_view s)
693+
{
694+
assert(s.size() <= maxSmallStringSize);
695+
696+
std::size_t remainingCapacity = maxSmallStringSize - s.size();
697+
payload = {pdSmallString, remainingCapacity << 56};
698+
699+
/* 7 - we are skipping the first tag byte (it's stored in the 3 least significant bits). */
700+
{
701+
auto firstDWord = s.substr(0, 7);
702+
std::size_t bitPos = 8;
703+
for (auto c : firstDWord) {
704+
payload[0] |= (PackedPointer{static_cast<unsigned char>(c)} << bitPos);
705+
bitPos += 8;
706+
}
707+
708+
s.remove_prefix(firstDWord.size());
709+
}
710+
711+
{
712+
auto secondDWord = s;
713+
assert(secondDWord.size() <= 7);
714+
std::size_t bitPos = 0;
715+
for (auto c : secondDWord) {
716+
payload[1] |= (PackedPointer{static_cast<unsigned char>(c)} << bitPos);
717+
bitPos += 8;
718+
}
719+
}
720+
}
721+
722+
std::size_t getSmallStringSize() const
723+
{
724+
std::size_t remainingCapacity = payload[1] >> 56;
725+
return maxSmallStringSize - remainingCapacity;
726+
}
727+
728+
const char * getSmallStringData() const
729+
{
730+
/* Skip the type tag byte. */
731+
return reinterpret_cast<const char *>(payload.data()) + 1;
732+
}
646733
};
647734

648735
/**
@@ -849,6 +936,10 @@ struct Value : public ValueStorage<sizeof(void *)>
849936
}
850937

851938
public:
939+
/**
940+
* Maximum size of a string that can be stored inline without allocations.
941+
*/
942+
using ValueStorage::maxSmallStringSize;
852943

853944
/**
854945
* Never modify the backing `Value` object!
@@ -907,6 +998,7 @@ public:
907998
case tBool:
908999
return nBool;
9091000
case tString:
1001+
case tSmallString:
9101002
return nString;
9111003
case tPath:
9121004
return nPath;
@@ -1071,16 +1163,28 @@ public:
10711163

10721164
std::string_view string_view() const noexcept
10731165
{
1166+
if constexpr (maxSmallStringSize > 0) {
1167+
if (isa<tSmallString>())
1168+
return std::string_view{getSmallStringData(), getSmallStringSize()};
1169+
}
10741170
return std::string_view(getStorage<StringWithContext>().c_str);
10751171
}
10761172

10771173
const char * c_str() const noexcept
10781174
{
1175+
if constexpr (maxSmallStringSize > 0) {
1176+
if (isa<tSmallString>())
1177+
return getSmallStringData();
1178+
}
10791179
return getStorage<StringWithContext>().c_str;
10801180
}
10811181

10821182
const char ** context() const noexcept
10831183
{
1184+
if constexpr (maxSmallStringSize > 0) {
1185+
if (isa<tSmallString>())
1186+
return nullptr;
1187+
}
10841188
return getStorage<StringWithContext>().context;
10851189
}
10861190

0 commit comments

Comments
 (0)