@@ -190,6 +190,17 @@ const meta = [
190190 '' , '' , '' , '' , '' , '' , '' , '\\\\'
191191] ;
192192
193+ // Regex used for ansi escape code splitting
194+ // Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js
195+ // License: MIT, authors: @sindresorhus, Qix-, arjunmehta and LitoMore
196+ // Matches all ansi escape code sequences in a string
197+ const ansiPattern = '[\\u001B\\u009B][[\\]()#;?]*' +
198+ '(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)' +
199+ '|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))' ;
200+ const ansi = new RegExp ( ansiPattern , 'g' ) ;
201+
202+ let getStringWidth ;
203+
193204function getUserOptions ( ctx ) {
194205 return {
195206 stylize : ctx . stylize ,
@@ -1149,7 +1160,7 @@ function groupArrayElements(ctx, output, value) {
11491160 // entries length of all output entries. We have to remove colors first,
11501161 // otherwise the length would not be calculated properly.
11511162 for ( ; i < outputLength ; i ++ ) {
1152- const len = ctx . colors ? removeColors ( output [ i ] ) . length : output [ i ] . length ;
1163+ const len = getStringWidth ( output [ i ] , ctx . colors ) ;
11531164 dataLen [ i ] = len ;
11541165 totalLength += len + separatorSpace ;
11551166 if ( maxLength < len )
@@ -1192,8 +1203,6 @@ function groupArrayElements(ctx, output, value) {
11921203 if ( columns <= 1 ) {
11931204 return output ;
11941205 }
1195- // TODO(BridgeAR): Add unicode support. Use the readline getStringWidth
1196- // function.
11971206 const tmp = [ ] ;
11981207 const maxLineLength = [ ] ;
11991208 for ( let i = 0 ; i < columns ; i ++ ) {
@@ -1560,11 +1569,8 @@ function formatProperty(ctx, value, recurseTimes, key, type, desc) {
15601569 const diff = ( ctx . compact !== true || type !== kObjectType ) ? 2 : 3 ;
15611570 ctx . indentationLvl += diff ;
15621571 str = formatValue ( ctx , desc . value , recurseTimes ) ;
1563- if ( diff === 3 ) {
1564- const len = ctx . colors ? removeColors ( str ) . length : str . length ;
1565- if ( ctx . breakLength < len ) {
1566- extra = `\n${ ' ' . repeat ( ctx . indentationLvl ) } ` ;
1567- }
1572+ if ( diff === 3 && ctx . breakLength < getStringWidth ( str , ctx . colors ) ) {
1573+ extra = `\n${ ' ' . repeat ( ctx . indentationLvl ) } ` ;
15681574 }
15691575 ctx . indentationLvl -= diff ;
15701576 } else if ( desc . get !== undefined ) {
@@ -1884,9 +1890,116 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
18841890 return str ;
18851891}
18861892
1893+ if ( internalBinding ( 'config' ) . hasIntl ) {
1894+ const icu = internalBinding ( 'icu' ) ;
1895+ // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
1896+ // Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
1897+ // TODO(BridgeAR): Expose the options to the user. That is probably the
1898+ // best thing possible at the moment, since it's difficult to know what
1899+ // the receiving end supports.
1900+ getStringWidth = function getStringWidth ( str , removeControlChars = true ) {
1901+ let width = 0 ;
1902+ if ( removeControlChars )
1903+ str = stripVTControlCharacters ( str ) ;
1904+ for ( let i = 0 ; i < str . length ; i ++ ) {
1905+ // Try to avoid calling into C++ by first handling the ASCII portion of
1906+ // the string. If it is fully ASCII, we skip the C++ part.
1907+ const code = str . charCodeAt ( i ) ;
1908+ if ( code >= 127 ) {
1909+ width += icu . getStringWidth ( str . slice ( i ) ) ;
1910+ break ;
1911+ }
1912+ width += code >= 32 ? 1 : 0 ;
1913+ }
1914+ return width ;
1915+ } ;
1916+ } else {
1917+ /**
1918+ * Returns the number of columns required to display the given string.
1919+ */
1920+ getStringWidth = function getStringWidth ( str , removeControlChars = true ) {
1921+ let width = 0 ;
1922+
1923+ if ( removeControlChars )
1924+ str = stripVTControlCharacters ( str ) ;
1925+
1926+ for ( const char of str ) {
1927+ const code = char . codePointAt ( 0 ) ;
1928+ if ( isFullWidthCodePoint ( code ) ) {
1929+ width += 2 ;
1930+ } else if ( ! isZeroWidthCodePoint ( code ) ) {
1931+ width ++ ;
1932+ }
1933+ }
1934+
1935+ return width ;
1936+ } ;
1937+
1938+ /**
1939+ * Returns true if the character represented by a given
1940+ * Unicode code point is full-width. Otherwise returns false.
1941+ */
1942+ const isFullWidthCodePoint = ( code ) => {
1943+ // Code points are partially derived from:
1944+ // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
1945+ return code >= 0x1100 && (
1946+ code <= 0x115f || // Hangul Jamo
1947+ code === 0x2329 || // LEFT-POINTING ANGLE BRACKET
1948+ code === 0x232a || // RIGHT-POINTING ANGLE BRACKET
1949+ // CJK Radicals Supplement .. Enclosed CJK Letters and Months
1950+ ( code >= 0x2e80 && code <= 0x3247 && code !== 0x303f ) ||
1951+ // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
1952+ ( code >= 0x3250 && code <= 0x4dbf ) ||
1953+ // CJK Unified Ideographs .. Yi Radicals
1954+ ( code >= 0x4e00 && code <= 0xa4c6 ) ||
1955+ // Hangul Jamo Extended-A
1956+ ( code >= 0xa960 && code <= 0xa97c ) ||
1957+ // Hangul Syllables
1958+ ( code >= 0xac00 && code <= 0xd7a3 ) ||
1959+ // CJK Compatibility Ideographs
1960+ ( code >= 0xf900 && code <= 0xfaff ) ||
1961+ // Vertical Forms
1962+ ( code >= 0xfe10 && code <= 0xfe19 ) ||
1963+ // CJK Compatibility Forms .. Small Form Variants
1964+ ( code >= 0xfe30 && code <= 0xfe6b ) ||
1965+ // Halfwidth and Fullwidth Forms
1966+ ( code >= 0xff01 && code <= 0xff60 ) ||
1967+ ( code >= 0xffe0 && code <= 0xffe6 ) ||
1968+ // Kana Supplement
1969+ ( code >= 0x1b000 && code <= 0x1b001 ) ||
1970+ // Enclosed Ideographic Supplement
1971+ ( code >= 0x1f200 && code <= 0x1f251 ) ||
1972+ // Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff
1973+ // Emoticons 0x1f600 - 0x1f64f
1974+ ( code >= 0x1f300 && code <= 0x1f64f ) ||
1975+ // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
1976+ ( code >= 0x20000 && code <= 0x3fffd )
1977+ ) ;
1978+ } ;
1979+
1980+ const isZeroWidthCodePoint = ( code ) => {
1981+ return code <= 0x1F || // C0 control codes
1982+ ( code > 0x7F && code <= 0x9F ) || // C1 control codes
1983+ ( code >= 0x300 && code <= 0x36F ) || // Combining Diacritical Marks
1984+ ( code >= 0x200B && code <= 0x200F ) || // Modifying Invisible Characters
1985+ ( code >= 0xFE00 && code <= 0xFE0F ) || // Variation Selectors
1986+ ( code >= 0xFE20 && code <= 0xFE2F ) || // Combining Half Marks
1987+ ( code >= 0xE0100 && code <= 0xE01EF ) ; // Variation Selectors
1988+ } ;
1989+ }
1990+
1991+ /**
1992+ * Remove all VT control characters. Use to estimate displayed string width.
1993+ */
1994+ function stripVTControlCharacters ( str ) {
1995+ return str . replace ( ansi , '' ) ;
1996+ }
1997+
18871998module . exports = {
18881999 inspect,
18892000 format,
18902001 formatWithOptions,
1891- inspectDefaultOptions
2002+ getStringWidth,
2003+ inspectDefaultOptions,
2004+ stripVTControlCharacters
18922005} ;
0 commit comments