11'use strict' ;
22
3- const { SafeSet, MathMax } = primordials ;
3+ const { SafeSet, MathMax, StringPrototypeIncludes } = primordials ;
44const {
55 codes : { ERR_TAP_LEXER_ERROR } ,
66} = require ( 'internal/errors' ) ;
77
8+ const kEOL = '' ;
9+ const kEOF = '' ;
10+
811const TokenKind = {
912 EOF : 'EOF' ,
1013 EOL : 'EOL' ,
14+ NEWLINE : 'NewLine' ,
1115 NUMERIC : 'Numeric' ,
1216 LITERAL : 'Literal' ,
1317 KEYWORD : 'Keyword' ,
@@ -43,7 +47,7 @@ class Token {
4347 line : stream . line ,
4448 column : MathMax ( stream . column - valueLength + 1 , 1 ) , // 1 based
4549 start : MathMax ( stream . pos - valueLength , 0 ) , // zero based
46- end : stream . pos - 1 , // zero based
50+ end : stream . pos - ( value === '' ? 0 : 1 ) , // zero based
4751 } ;
4852
4953 // EOF is a special case
@@ -116,7 +120,7 @@ class TapLexer {
116120 this . #source = new InputStream ( source ) ;
117121 this . #lastScannedToken = new Token ( {
118122 kind : TokenKind . EOL ,
119- value : TokenKind . EOL ,
123+ value : kEOL ,
120124 stream : this . #source,
121125 } ) ;
122126 }
@@ -132,8 +136,8 @@ class TapLexer {
132136 this . #lastScannedToken = token ;
133137 }
134138
135- if ( token . kind === TokenKind . EOL ) {
136- // Store the current chunk + EOL token
139+ if ( token . kind === TokenKind . NEWLINE ) {
140+ // Store the current chunk + NEWLINE token
137141 tokens . push ( [ ...chunk , token ] ) ;
138142 chunk = [ ] ;
139143 } else {
@@ -142,14 +146,11 @@ class TapLexer {
142146 }
143147
144148 if ( chunk . length > 0 ) {
145- tokens . push ( chunk ) ;
149+ tokens . push ( [ ... chunk , this . #scanEOL ( ) ] ) ;
146150 }
147151
148- if ( tokens . length > 0 ) {
149- tokens . at ( - 1 ) . push ( this . #scanEOF( ) ) ;
150- } else {
151- tokens . push ( [ this . #scanEOF( ) ] ) ;
152- }
152+ // send EOF as a separate chunk
153+ tokens . push ( [ this . #scanEOF( ) ] ) ;
153154
154155 return tokens ;
155156 }
@@ -171,8 +172,8 @@ class TapLexer {
171172
172173 if ( this . #isEOFSymbol( char ) ) {
173174 return this . #scanEOF( ) ;
174- } else if ( this . #isEOLSymbol ( char ) ) {
175- return this . #scanEOL ( char ) ;
175+ } else if ( this . #isNewLineSymbol ( char ) ) {
176+ return this . #scanNewLine ( char ) ;
176177 } else if ( this . #isNumericSymbol( char ) ) {
177178 return this . #scanNumeric( char ) ;
178179 } else if ( this . #isDashSymbol( char ) ) {
@@ -196,7 +197,7 @@ class TapLexer {
196197 ) ;
197198 }
198199
199- #scanEOL ( char ) {
200+ #scanNewLine ( char ) {
200201 // In case of odd number of ESCAPE symbols, we need to clear the remaining
201202 // escape chars from the stack and start fresh for the next line.
202203 this . #escapeStack = [ ] ;
@@ -205,18 +206,26 @@ class TapLexer {
205206 this . #isComment = false ;
206207
207208 return new Token ( {
208- kind : TokenKind . EOL ,
209+ kind : TokenKind . NEWLINE ,
209210 value : char ,
210211 stream : this . #source,
211212 } ) ;
212213 }
213214
215+ #scanEOL( ) {
216+ return new Token ( {
217+ kind : TokenKind . EOL ,
218+ value : kEOL ,
219+ stream : this . #source,
220+ } ) ;
221+ }
222+
214223 #scanEOF( ) {
215224 this . #isComment = false ;
216225
217226 return new Token ( {
218227 kind : TokenKind . EOF ,
219- value : TokenKind . EOF ,
228+ value : kEOF ,
220229 stream : this . #source,
221230 } ) ;
222231 }
@@ -292,7 +301,7 @@ class TapLexer {
292301
293302 // If we encounter a hash symbol at the beginning of a line,
294303 // we consider it as a comment
295- if ( ! lastCharacter || this . #isEOLSymbol ( lastCharacter ) ) {
304+ if ( ! lastCharacter || this . #isNewLineSymbol ( lastCharacter ) ) {
296305 this . #isComment = true ;
297306 return new Token ( {
298307 kind : TokenKind . COMMENT ,
@@ -371,7 +380,12 @@ class TapLexer {
371380 }
372381
373382 #scanTAPKeyword( word ) {
374- if ( word === 'TAP' && this . #lastScannedToken. kind === TokenKind . EOL ) {
383+ const isLastScannedTokenEOLorNewLine = StringPrototypeIncludes (
384+ [ TokenKind . EOL , TokenKind . NEWLINE ] ,
385+ this . #lastScannedToken. kind
386+ ) ;
387+
388+ if ( word === 'TAP' && isLastScannedTokenEOLorNewLine ) {
375389 return new Token ( {
376390 kind : TokenKind . TAP ,
377391 value : word ,
@@ -395,7 +409,7 @@ class TapLexer {
395409 } ) ;
396410 }
397411
398- if ( word === 'not' && this . #lastScannedToken . kind === TokenKind . EOL ) {
412+ if ( word === 'not' && isLastScannedTokenEOLorNewLine ) {
399413 return new Token ( {
400414 kind : TokenKind . TAP_TEST_NOTOK ,
401415 value : word ,
@@ -406,7 +420,7 @@ class TapLexer {
406420 if (
407421 word === 'ok' &&
408422 ( this . #lastScannedToken. kind === TokenKind . TAP_TEST_NOTOK ||
409- this . #lastScannedToken . kind === TokenKind . EOL )
423+ isLastScannedTokenEOLorNewLine )
410424 ) {
411425 return new Token ( {
412426 kind : TokenKind . TAP_TEST_OK ,
@@ -415,7 +429,7 @@ class TapLexer {
415429 } ) ;
416430 }
417431
418- if ( word === 'pragma' && this . #lastScannedToken . kind === TokenKind . EOL ) {
432+ if ( word === 'pragma' && isLastScannedTokenEOLorNewLine ) {
419433 return new Token ( {
420434 kind : TokenKind . TAP_PRAGMA ,
421435 value : word ,
@@ -476,7 +490,7 @@ class TapLexer {
476490 return char === undefined ;
477491 }
478492
479- #isEOLSymbol ( char ) {
493+ #isNewLineSymbol ( char ) {
480494 return char === '\n' || char === '\r' ;
481495 }
482496
0 commit comments