11#include < algorithm>
2+ #include < array>
23#include < cassert>
34#include < cctype>
4- #include < cinttypes>
55#include < cstdarg>
66#include < cstdio>
77#include < functional>
8- #include < iostream>
98#include < map>
10- #include < set>
119#include < string>
1210#include < string_view>
1311#include < vector>
@@ -72,42 +70,24 @@ size_t GetFileSize(const std::string& filename, int* error) {
7270 return result;
7371}
7472
75- bool EndsWith (const std::string& str, std::string_view suffix) {
76- size_t suffix_len = suffix.length ();
77- size_t str_len = str.length ();
78- if (str_len < suffix_len) {
79- return false ;
80- }
81- return str.compare (str_len - suffix_len, suffix_len, suffix) == 0 ;
82- }
83-
84- bool StartsWith (const std::string& str, std::string_view prefix) {
85- size_t prefix_len = prefix.length ();
86- size_t str_len = str.length ();
87- if (str_len < prefix_len) {
88- return false ;
89- }
90- return str.compare (0 , prefix_len, prefix) == 0 ;
91- }
92-
93- bool FilenameIsConfigGypi (const std::string& path) {
94- return path == " config.gypi" || EndsWith (path, " /config.gypi" );
73+ constexpr bool FilenameIsConfigGypi (const std::string_view path) {
74+ return path == " config.gypi" || path.ends_with (" /config.gypi" );
9575}
9676
9777typedef std::vector<std::string> FileList;
9878typedef std::map<std::string, FileList> FileMap;
9979
10080bool SearchFiles (const std::string& dir,
10181 FileMap* file_map,
102- const std::string& extension) {
82+ std::string_view extension) {
10383 uv_fs_t scan_req;
10484 int result = uv_fs_scandir (nullptr , &scan_req, dir.c_str (), 0 , nullptr );
10585 bool errored = false ;
10686 if (result < 0 ) {
10787 PrintUvError (" scandir" , dir.c_str (), result);
10888 errored = true ;
10989 } else {
110- auto it = file_map->insert ({extension, FileList ()}).first ;
90+ auto it = file_map->insert ({std::string ( extension) , FileList ()}).first ;
11191 FileList& files = it->second ;
11292 files.reserve (files.size () + result);
11393 uv_dirent_t dent;
@@ -124,7 +104,7 @@ bool SearchFiles(const std::string& dir,
124104 }
125105
126106 std::string path = dir + ' /' + dent.name ;
127- if (EndsWith ( path, extension)) {
107+ if (path. ends_with ( extension)) {
128108 files.emplace_back (path);
129109 continue ;
130110 }
@@ -153,12 +133,11 @@ constexpr std::string_view kJsSuffix = ".js";
153133constexpr std::string_view kGypiSuffix = " .gypi" ;
154134constexpr std::string_view depsPrefix = " deps/" ;
155135constexpr std::string_view libPrefix = " lib/" ;
156- std::set<std::string_view> kAllowedExtensions {
157- kGypiSuffix , kJsSuffix , kMjsSuffix };
158136
159- std::string_view HasAllowedExtensions (const std::string& filename) {
160- for (const auto & ext : kAllowedExtensions ) {
161- if (EndsWith (filename, ext)) {
137+ constexpr std::string_view HasAllowedExtensions (
138+ const std::string_view filename) {
139+ for (const auto & ext : {kGypiSuffix , kJsSuffix , kMjsSuffix }) {
140+ if (filename.ends_with (ext)) {
162141 return ext;
163142 }
164143 }
@@ -350,17 +329,17 @@ std::string GetFileId(const std::string& filename) {
350329 size_t start = 0 ;
351330 std::string prefix;
352331 // Strip .mjs and .js suffix
353- if (EndsWith ( filename, kMjsSuffix )) {
332+ if (filename. ends_with ( kMjsSuffix )) {
354333 end -= kMjsSuffix .size ();
355- } else if (EndsWith ( filename, kJsSuffix )) {
334+ } else if (filename. ends_with ( kJsSuffix )) {
356335 end -= kJsSuffix .size ();
357336 }
358337
359338 // deps/acorn/acorn/dist/acorn.js -> internal/deps/acorn/acorn/dist/acorn
360- if (StartsWith ( filename, depsPrefix)) {
339+ if (filename. starts_with ( depsPrefix)) {
361340 start = depsPrefix.size ();
362341 prefix = " internal/deps/" ;
363- } else if (StartsWith ( filename, libPrefix)) {
342+ } else if (filename. starts_with ( libPrefix)) {
364343 // lib/internal/url.js -> internal/url
365344 start = libPrefix.size ();
366345 prefix = " " ;
@@ -381,18 +360,52 @@ std::string GetVariableName(const std::string& id) {
381360 return result;
382361}
383362
384- std::vector<std::string> GetCodeTable () {
385- size_t size = 1 << 16 ;
386- std::vector<std::string> code_table (size);
387- for (size_t i = 0 ; i < size; ++i) {
388- code_table[i] = std::to_string (i) + ' ,' ;
363+ // The function returns a string buffer and an array of
364+ // offsets. The string is just "0,1,2,3,...,65535,".
365+ // The second array contain the offsets indicating the
366+ // start of each substring ("0,", "1,", etc.) and the final
367+ // offset points just beyond the end of the string.
368+ // 382106 is the length of the string "0,1,2,3,...,65535,".
369+ // 65537 is 2**16 + 1
370+ // This function could be constexpr, but it might become too expensive to
371+ // compile.
372+ std::pair<std::array<char , 382106 >, std::array<uint32_t , 65537 >>
373+ precompute_string () {
374+ // the string "0,1,2,3,...,65535,".
375+ std::array<char , 382106 > str;
376+ // the offsets in the string pointing at the beginning of each substring
377+ std::array<uint32_t , 65537 > off;
378+ off[0 ] = 0 ;
379+ char * p = &str[0 ];
380+ constexpr auto const_int_to_str = [](uint16_t value, char * s) -> uint32_t {
381+ uint32_t index = 0 ;
382+ do {
383+ s[index++] = ' 0' + (value % 10 );
384+ value /= 10 ;
385+ } while (value != 0 );
386+
387+ for (uint32_t i = 0 ; i < index / 2 ; ++i) {
388+ char temp = s[i];
389+ s[i] = s[index - i - 1 ];
390+ s[index - i - 1 ] = temp;
391+ }
392+ s[index] = ' ,' ;
393+ return index + 1 ;
394+ };
395+ for (int i = 0 ; i < 65536 ; ++i) {
396+ size_t offset = const_int_to_str (i, p);
397+ p += offset;
398+ off[i + 1 ] = off[i] + offset;
389399 }
390- return code_table ;
400+ return {str, off} ;
391401}
392402
393- const std::string& GetCode (uint16_t index) {
394- static std::vector<std::string> table = GetCodeTable ();
395- return table[index];
403+ const std::string_view GetCode (uint16_t index) {
404+ // We use about 644254 bytes of memory. An array of 65536 strings might use
405+ // 2097152 bytes so we save 3x the memory.
406+ static auto [backing_string, offsets] = precompute_string ();
407+ return std::string_view (&backing_string[offsets[index]],
408+ offsets[index + 1 ] - offsets[index]);
396409}
397410
398411#ifdef NODE_JS2C_USE_STRING_LITERALS
@@ -532,8 +545,7 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
532545 // Avoid using snprintf on large chunks of data because it's much slower.
533546 // It's fine to use it on small amount of data though.
534547 if constexpr (is_two_byte) {
535- std::vector<uint16_t > utf16_codepoints;
536- utf16_codepoints.resize (count);
548+ std::vector<uint16_t > utf16_codepoints (count);
537549 size_t utf16_count = simdutf::convert_utf8_to_utf16 (
538550 code.data (),
539551 code.size (),
@@ -542,8 +554,8 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
542554 utf16_codepoints.resize (utf16_count);
543555 Debug (" static size %zu\n " , utf16_count);
544556 for (size_t i = 0 ; i < utf16_count; ++i) {
545- const std::string& str = GetCode (utf16_codepoints[i]);
546- memcpy (result.data () + cur, str.c_str (), str.size ());
557+ std::string_view str = GetCode (utf16_codepoints[i]);
558+ memcpy (result.data () + cur, str.data (), str.size ());
547559 cur += str.size ();
548560 }
549561 } else {
@@ -556,8 +568,8 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
556568 i,
557569 ch);
558570 }
559- const std::string& str = GetCode (ch);
560- memcpy (result.data () + cur, str.c_str (), str.size ());
571+ std::string_view str = GetCode (ch);
572+ memcpy (result.data () + cur, str.data (), str.size ());
561573 cur += str.size ();
562574 }
563575 }
@@ -895,8 +907,8 @@ int Main(int argc, char* argv[]) {
895907 int error = 0 ;
896908 const std::string& file = args[i];
897909 if (IsDirectory (file, &error)) {
898- if (!SearchFiles (file, &file_map, std::string ( kJsSuffix ) ) ||
899- !SearchFiles (file, &file_map, std::string ( kMjsSuffix ) )) {
910+ if (!SearchFiles (file, &file_map, kJsSuffix ) ||
911+ !SearchFiles (file, &file_map, kMjsSuffix )) {
900912 return 1 ;
901913 }
902914 } else if (error != 0 ) {
0 commit comments