55#include < span>
66#include < type_traits>
77#include < concepts>
8+ #include < bit>
89
910#include " nix/expr/eval-gc.hh"
1011#include " nix/expr/value/context.hh"
@@ -47,6 +48,7 @@ typedef enum {
4748 /* layout: Single untaggable field */
4849 tListN,
4950 tString,
51+ tSmallString,
5052 tPath,
5153} InternalType;
5254
@@ -323,14 +325,23 @@ inline constexpr InternalType payloadTypeToInternalType = PayloadTypeToInternalT
323325template <std::size_t ptrSize, typename Enable = void >
324326class ValueStorage : public detail ::ValueBase
325327{
328+ static constexpr std::size_t smallStringStorageSize = std::max({
329+ #define NIX_VALUE_STORAGE_FIELD_SIZE (T, FIELD_NAME, DISCRIMINATOR ) sizeof (T),
330+ NIX_VALUE_STORAGE_FOR_EACH_FIELD (NIX_VALUE_STORAGE_FIELD_SIZE)
331+ #undef NIX_VALUE_STORAGE_FIELD_SIZE
332+ });
333+
326334protected:
327335 using Payload = union
328336 {
329337#define NIX_VALUE_STORAGE_DEFINE_FIELD (T, FIELD_NAME, DISCRIMINATOR ) T FIELD_NAME;
330338 NIX_VALUE_STORAGE_FOR_EACH_FIELD (NIX_VALUE_STORAGE_DEFINE_FIELD)
331339#undef NIX_VALUE_STORAGE_DEFINE_FIELD
340+ std::array<char , smallStringStorageSize> smallString;
332341 };
333342
343+ static constexpr std::size_t maxSmallStringSize = smallStringStorageSize - 1 ;
344+
334345private:
335346 InternalType internalType = tUninitialized;
336347 Payload payload;
@@ -357,6 +368,30 @@ protected:
357368#undef NIX_VALUE_STORAGE_GET_IMPL
358369#undef NIX_VALUE_STORAGE_FOR_EACH_FIELD
359370
371+ void setSmallString (std::string_view s)
372+ {
373+ assert (s.size () <= maxSmallStringSize);
374+ internalType = tSmallString;
375+ payload.smallString = {};
376+ /* Trick is the same as in Facebook's Folly string. Use the last byte
377+ of the string to store the remaining capacity. This was it naturally
378+ becomes the null terminator when string has the size (smallStringStorageSize - 1). */
379+ payload.smallString .back () = maxSmallStringSize - s.size ();
380+ std::memcpy (payload.smallString .data (), s.data (), s.size ());
381+ }
382+
383+ std::size_t getSmallStringSize () const
384+ {
385+ std::size_t remainingCapacity = payload.smallString .back ();
386+ return maxSmallStringSize - remainingCapacity;
387+ }
388+
389+ const char * getSmallStringData () const
390+ {
391+ /* This string is null terminated. See setSmallString. */
392+ return payload.smallString .data ();
393+ }
394+
360395 /* * Get internal type currently occupying the storage. */
361396 InternalType getInternalType () const noexcept
362397 {
@@ -434,6 +469,7 @@ class ValueStorage<ptrSize, std::enable_if_t<detail::useBitPackedValueStorage<pt
434469 /* The order of these enumations must be the same as in InternalType. */
435470 pdListN, // < layout: Single untaggable field.
436471 pdString,
472+ pdSmallString,
437473 pdPath,
438474 pdPairOfPointers, // < layout: Pair of pointers payload
439475 };
@@ -513,6 +549,7 @@ protected:
513549 /* The order must match that of the enumerations defined in InternalType. */
514550 case pdListN:
515551 case pdString:
552+ case pdSmallString:
516553 case pdPath:
517554 return static_cast <InternalType>(tListN + (pd - pdListN));
518555 case pdPairOfPointers:
@@ -643,6 +680,56 @@ protected:
643680 {
644681 setUntaggablePayload<pdPath>(path.accessor , path.path );
645682 }
683+
684+ /* *
685+ * Pointer tagging doesn't play well with big endian systems (because the tag will be in the middle
686+ * of the array), so we don't do this optimization on big endian systems.
687+ *
688+ * 14 = 8 + 8 - 1 (the type tag) - 1 (string size + null terminator)
689+ */
690+ static constexpr std::size_t maxSmallStringSize = std::endian::native == std::endian::little ? 14 : 0 ;
691+
692+ void setSmallString (std::string_view s)
693+ {
694+ assert (s.size () <= maxSmallStringSize);
695+
696+ std::size_t remainingCapacity = maxSmallStringSize - s.size ();
697+ payload = {pdSmallString, remainingCapacity << 56 };
698+
699+ /* 7 - we are skipping the first tag byte (it's stored in the 3 least significant bits). */
700+ {
701+ auto firstDWord = s.substr (0 , 7 );
702+ std::size_t bitPos = 8 ;
703+ for (auto c : firstDWord) {
704+ payload[0 ] |= (PackedPointer{static_cast <unsigned char >(c)} << bitPos);
705+ bitPos += 8 ;
706+ }
707+
708+ s.remove_prefix (firstDWord.size ());
709+ }
710+
711+ {
712+ auto secondDWord = s;
713+ assert (secondDWord.size () <= 7 );
714+ std::size_t bitPos = 0 ;
715+ for (auto c : secondDWord) {
716+ payload[1 ] |= (PackedPointer{static_cast <unsigned char >(c)} << bitPos);
717+ bitPos += 8 ;
718+ }
719+ }
720+ }
721+
722+ std::size_t getSmallStringSize () const
723+ {
724+ std::size_t remainingCapacity = payload[1 ] >> 56 ;
725+ return maxSmallStringSize - remainingCapacity;
726+ }
727+
728+ const char * getSmallStringData () const
729+ {
730+ /* Skip the type tag byte. */
731+ return reinterpret_cast <const char *>(payload.data ()) + 1 ;
732+ }
646733};
647734
648735/* *
@@ -849,6 +936,10 @@ struct Value : public ValueStorage<sizeof(void *)>
849936 }
850937
851938public:
939+ /* *
940+ * Maximum size of a string that can be stored inline without allocations.
941+ */
942+ using ValueStorage::maxSmallStringSize;
852943
853944 /* *
854945 * Never modify the backing `Value` object!
@@ -907,6 +998,7 @@ public:
907998 case tBool:
908999 return nBool;
9091000 case tString:
1001+ case tSmallString:
9101002 return nString;
9111003 case tPath:
9121004 return nPath;
@@ -1071,16 +1163,28 @@ public:
10711163
10721164 std::string_view string_view () const noexcept
10731165 {
1166+ if constexpr (maxSmallStringSize > 0 ) {
1167+ if (isa<tSmallString>())
1168+ return std::string_view{getSmallStringData (), getSmallStringSize ()};
1169+ }
10741170 return std::string_view (getStorage<StringWithContext>().c_str );
10751171 }
10761172
10771173 const char * c_str () const noexcept
10781174 {
1175+ if constexpr (maxSmallStringSize > 0 ) {
1176+ if (isa<tSmallString>())
1177+ return getSmallStringData ();
1178+ }
10791179 return getStorage<StringWithContext>().c_str ;
10801180 }
10811181
10821182 const char ** context () const noexcept
10831183 {
1184+ if constexpr (maxSmallStringSize > 0 ) {
1185+ if (isa<tSmallString>())
1186+ return nullptr ;
1187+ }
10841188 return getStorage<StringWithContext>().context ;
10851189 }
10861190
0 commit comments