feat: Expose EncodingName type (#108)

runk · web-flow · commit a1cdaca3b7f9 · 2025-02-25T09:25:41.000+11:00
diff --git a/.prettierrc.json b/.prettierrc.json
@@ -1,4 +1,4 @@
 {
   "singleQuote": true,
-  "maxLineLength": 80
+  "printWidth": 80
 }
diff --git a/package.json b/package.json
@@ -18,7 +18,8 @@
     "format:check": "prettier --list-different ./src/**/*.ts",
     "test": "jest",
     "prepublish": "npm run build",
-    "semantic-release": "semantic-release"
+    "semantic-release": "semantic-release",
+    "typecheck": "tsc"
   },
   "files": [
     "lib"
diff --git a/src/encoding/ascii.ts b/src/encoding/ascii.ts
@@ -1,8 +1,8 @@
-import { Context, Recogniser } from '.';
-import match, { Match } from '../match';
+import type { Context, Recogniser } from '.';
+import match, { type EncodingName, type Match } from '../match';
 
 export default class Ascii implements Recogniser {
-  name() {
+  name(): EncodingName {
     return 'ASCII';
   }
 
diff --git a/src/encoding/index.ts b/src/encoding/index.ts
@@ -1,8 +1,8 @@
-import { Match } from '../match';
+import type { EncodingName, Match } from '../match';
 
 export interface Recogniser {
   match(input: Context): Match | null;
-  name(input?: Context): string;
+  name(input?: Context): EncodingName;
   language?(): string | undefined;
 }
 
diff --git a/src/encoding/iso2022.ts b/src/encoding/iso2022.ts
@@ -1,5 +1,5 @@
-import { Context, Recogniser } from '.';
-import match, { Match } from '../match';
+import type { Context, Recogniser } from '.';
+import match, { type Match, type EncodingName } from '../match';
 
 /**
  * This is a superclass for the individual detectors for
@@ -10,7 +10,7 @@ import match, { Match } from '../match';
 class ISO_2022 implements Recogniser {
   escapeSequences: number[][] = [];
 
-  name() {
+  name(): EncodingName {
     return 'ISO_2022';
   }
 
@@ -85,7 +85,7 @@ class ISO_2022 implements Recogniser {
 }
 
 export class ISO_2022_JP extends ISO_2022 {
-  name() {
+  name(): EncodingName {
     return 'ISO-2022-JP';
   }
 
@@ -110,7 +110,7 @@ export class ISO_2022_JP extends ISO_2022 {
 }
 
 export class ISO_2022_KR extends ISO_2022 {
-  name() {
+  name(): EncodingName {
     return 'ISO-2022-KR';
   }
   language() {
@@ -120,7 +120,7 @@ export class ISO_2022_KR extends ISO_2022 {
 }
 
 export class ISO_2022_CN extends ISO_2022 {
-  name() {
+  name(): EncodingName {
     return 'ISO-2022-CN';
   }
   language() {
diff --git a/src/encoding/mbcs.ts b/src/encoding/mbcs.ts
@@ -1,5 +1,5 @@
-import { Context, Recogniser } from '.';
-import match, { Match } from '../match';
+import type { Context, Recogniser } from '.';
+import match, { type Match, type EncodingName } from '../match';
 
 /**
  * Binary search implementation (recursive)
@@ -83,7 +83,7 @@ class IteratedChar {
 class mbcs implements Recogniser {
   commonChars: number[] = [];
 
-  name() {
+  name(): EncodingName {
     return 'mbcs';
   }
 
@@ -198,9 +198,10 @@ class mbcs implements Recogniser {
  * Shift_JIS charset recognizer.
  */
 export class sjis extends mbcs {
-  name() {
+  name(): EncodingName {
     return 'Shift_JIS';
   }
+
   language() {
     return 'ja';
   }
@@ -249,9 +250,10 @@ export class sjis extends mbcs {
  *   Big5 charset recognizer.
  */
 export class big5 extends mbcs {
-  name() {
+  name(): EncodingName {
     return 'Big5';
   }
+
   language() {
     return 'zh';
   }
@@ -362,9 +364,10 @@ function eucNextChar(iter: IteratedChar, det: Context) {
  *    is created and kept by the public CharsetDetector class
  */
 export class euc_jp extends mbcs {
-  name() {
+  name(): EncodingName {
     return 'EUC-JP';
   }
+
   language() {
     return 'ja';
   }
@@ -395,7 +398,7 @@ export class euc_jp extends mbcs {
  *    is created and kept by the public CharsetDetector class
  */
 export class euc_kr extends mbcs {
-  name() {
+  name(): EncodingName {
     return 'EUC-KR';
   }
 
@@ -428,7 +431,7 @@ export class euc_kr extends mbcs {
  *   GB-18030 recognizer. Uses simplified Chinese statistics.
  */
 export class gb_18030 extends mbcs {
-  name() {
+  name(): EncodingName {
     return 'GB18030';
   }
 
diff --git a/src/encoding/sbcs.ts b/src/encoding/sbcs.ts
@@ -1,5 +1,5 @@
-import { Context, Recogniser } from '../encoding/index';
-import match, { Match } from '../match';
+import type { Context, Recogniser } from '.';
+import match, { type EncodingName, type Match } from '../match';
 
 /**
  * This class recognizes single-byte encodings. Because the encoding scheme is so
@@ -120,7 +120,7 @@ class sbcs implements Recogniser {
     return [];
   }
 
-  name(_input: Context): string {
+  name(_input: Context): EncodingName {
     return 'sbcs';
   }
 
@@ -342,7 +342,7 @@ export class ISO_8859_1 extends sbcs {
     ];
   }
 
-  name(input: Context): string {
+  name(input: Context): EncodingName {
     return input && input.c1Bytes ? 'windows-1252' : 'ISO-8859-1';
   }
 }
@@ -440,7 +440,7 @@ export class ISO_8859_2 extends sbcs {
     ];
   }
 
-  name(det: Context): string {
+  name(det: Context): EncodingName {
     return det && det.c1Bytes ? 'windows-1250' : 'ISO-8859-2';
   }
 }
@@ -488,7 +488,7 @@ export class ISO_8859_5 extends sbcs {
     ];
   }
 
-  name() {
+  name(): EncodingName {
     return 'ISO-8859-5';
   }
 
@@ -540,7 +540,7 @@ export class ISO_8859_6 extends sbcs {
     ];
   }
 
-  name() {
+  name(): EncodingName {
     return 'ISO-8859-6';
   }
 
@@ -592,7 +592,7 @@ export class ISO_8859_7 extends sbcs {
     ];
   }
 
-  name(det: Context): string {
+  name(det: Context): EncodingName {
     return det && det.c1Bytes ? 'windows-1253' : 'ISO-8859-7';
   }
 
@@ -664,7 +664,7 @@ export class ISO_8859_8 extends sbcs {
     ];
   }
 
-  name(det: Context): string {
+  name(det: Context): EncodingName {
     return det && det.c1Bytes ? 'windows-1255' : 'ISO-8859-8';
   }
 
@@ -716,7 +716,7 @@ export class ISO_8859_9 extends sbcs {
     ];
   }
 
-  name(det: Context): string {
+  name(det: Context): EncodingName {
     return det && det.c1Bytes ? 'windows-1254' : 'ISO-8859-9';
   }
 
@@ -768,7 +768,7 @@ export class windows_1251 extends sbcs {
     ];
   }
 
-  name() {
+  name(): EncodingName {
     return 'windows-1251';
   }
 
@@ -820,7 +820,7 @@ export class windows_1256 extends sbcs {
     ];
   }
 
-  name() {
+  name(): EncodingName {
     return 'windows-1256';
   }
 
@@ -872,7 +872,7 @@ export class KOI8_R extends sbcs {
     ];
   }
 
-  name() {
+  name(): EncodingName {
     return 'KOI8-R';
   }
 
diff --git a/src/encoding/unicode.ts b/src/encoding/unicode.ts
@@ -1,12 +1,12 @@
-import { Context, Recogniser } from '.';
-import match, { Match } from '../match';
+import type { Context, Recogniser } from '.';
+import match, { type Match, type EncodingName } from '../match';
 
 /**
  * This class matches UTF-16 and UTF-32, both big- and little-endian. The
  * BOM will be used if it is present.
  */
 export class UTF_16BE implements Recogniser {
-  name() {
+  name(): EncodingName {
     return 'UTF-16BE';
   }
 
@@ -27,9 +27,10 @@ export class UTF_16BE implements Recogniser {
 }
 
 export class UTF_16LE implements Recogniser {
-  name() {
+  name(): EncodingName {
     return 'UTF-16LE';
   }
+
   match(det: Context): Match | null {
     const input = det.rawInput;
 
@@ -56,7 +57,7 @@ interface WithGetChar {
 }
 
 class UTF_32 implements Recogniser, WithGetChar {
-  name() {
+  name(): EncodingName {
     return 'UTF-32';
   }
 
@@ -111,7 +112,7 @@ class UTF_32 implements Recogniser, WithGetChar {
 }
 
 export class UTF_32BE extends UTF_32 {
-  name() {
+  name(): EncodingName {
     return 'UTF-32BE';
   }
   getChar(input: Uint8Array, index: number) {
@@ -125,7 +126,7 @@ export class UTF_32BE extends UTF_32 {
 }
 
 export class UTF_32LE extends UTF_32 {
-  name() {
+  name(): EncodingName {
     return 'UTF-32LE';
   }
 
diff --git a/src/encoding/utf8.ts b/src/encoding/utf8.ts
@@ -1,8 +1,8 @@
-import { Context, Recogniser } from '.';
-import match, { Match } from '../match';
+import type { Context, Recogniser } from '.';
+import match, { type EncodingName, type Match } from '../match';
 
 export default class Utf8 implements Recogniser {
-  name() {
+  name(): EncodingName {
     return 'UTF-8';
   }
 
@@ -57,7 +57,7 @@ export default class Utf8 implements Recogniser {
       }
     }
 
-    // Cook up some sort of confidence score, based on presense of a BOM
+    // Cook up some sort of confidence score, based on presence of a BOM
     //    and the existence of valid and/or invalid multi-byte sequences.
     confidence = 0;
     if (hasBOM && numInvalid == 0) confidence = 100;
diff --git a/src/index.ts b/src/index.ts
@@ -152,3 +152,5 @@ export default {
   detectFileSync,
   detectFile,
 };
+
+export { Match, EncodingName } from './match';
diff --git a/src/match.ts b/src/match.ts
@@ -1,8 +1,45 @@
 import { Context, Recogniser } from "./encoding";
 
+export type EncodingName =
+  | 'ASCII'
+  | 'Big5'
+  | 'EUC-JP'
+  | 'EUC-KR'
+  | 'GB18030'
+  | 'ISO_2022' // TODO: Use hyphen
+  | 'ISO-2022-CN'
+  | 'ISO-2022-JP'
+  | 'ISO-2022-KR'
+  | 'ISO-8859-1'
+  | 'ISO-8859-2'
+  | 'ISO-8859-5'
+  | 'ISO-8859-6'
+  | 'ISO-8859-7'
+  | 'ISO-8859-8'
+  | 'ISO-8859-9'
+  | 'ISO-8859-9'
+  | 'KOI8-R'
+  | 'mbcs'
+  | 'sbcs'
+  | 'Shift_JIS' // TODO: Use hyphen
+  | 'UTF-16BE'
+  | 'UTF-16LE'
+  | 'UTF-32'
+  | 'UTF-32BE'
+  | 'UTF-32LE'
+  | 'UTF-8'
+  | 'windows-1250'
+  | 'windows-1251'
+  | 'windows-1252'
+  | 'windows-1253'
+  | 'windows-1254'
+  | 'windows-1254'
+  | 'windows-1255'
+  | 'windows-1256'
+
 export interface Match {
   confidence: number;
-  name: string;
+  name: EncodingName;
   lang?: string;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`{`
`2`	`2`	`"singleQuote": true,`
`3`		`- "maxLineLength": 80`
	`3`	`+ "printWidth": 80`
`4`	`4`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,8 @@`
`1`		`-import { Match } from '../match';`
	`1`	`+import type { EncodingName, Match } from '../match';`
`2`	`2`
`3`	`3`	`export interface Recogniser {`
`4`	`4`	`match(input: Context): Match \| null;`
`5`		`- name(input?: Context): string;`
	`5`	`+ name(input?: Context): EncodingName;`
`6`	`6`	`language?(): string \| undefined;`
`7`	`7`	`}`
`8`	`8`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`		`-import { Context, Recogniser } from '.';`
`2`		`-import match, { Match } from '../match';`
	`1`	`+import type { Context, Recogniser } from '.';`
	`2`	`+import match, { type Match, type EncodingName } from '../match';`
`3`	`3`
`4`	`4`	`/**`
`5`	`5`	`* This is a superclass for the individual detectors for`
`@@ -10,7 +10,7 @@ import match, { Match } from '../match';`
`10`	`10`	`class ISO_2022 implements Recogniser {`
`11`	`11`	`escapeSequences: number[][] = [];`
`12`	`12`
`13`		`- name() {`
	`13`	`+ name(): EncodingName {`
`14`	`14`	`return 'ISO_2022';`
`15`	`15`	`}`
`16`	`16`
`@@ -85,7 +85,7 @@ class ISO_2022 implements Recogniser {`
`85`	`85`	`}`
`86`	`86`
`87`	`87`	`export class ISO_2022_JP extends ISO_2022 {`
`88`		`- name() {`
	`88`	`+ name(): EncodingName {`
`89`	`89`	`return 'ISO-2022-JP';`
`90`	`90`	`}`
`91`	`91`
`@@ -110,7 +110,7 @@ export class ISO_2022_JP extends ISO_2022 {`
`110`	`110`	`}`
`111`	`111`
`112`	`112`	`export class ISO_2022_KR extends ISO_2022 {`
`113`		`- name() {`
	`113`	`+ name(): EncodingName {`
`114`	`114`	`return 'ISO-2022-KR';`
`115`	`115`	`}`
`116`	`116`	`language() {`
`@@ -120,7 +120,7 @@ export class ISO_2022_KR extends ISO_2022 {`
`120`	`120`	`}`
`121`	`121`
`122`	`122`	`export class ISO_2022_CN extends ISO_2022 {`
`123`		`- name() {`
	`123`	`+ name(): EncodingName {`
`124`	`124`	`return 'ISO-2022-CN';`
`125`	`125`	`}`
`126`	`126`	`language() {`
Original file line number	Diff line number	Diff line change
`@@ -1,12 +1,12 @@`
`1`		`-import { Context, Recogniser } from '.';`
`2`		`-import match, { Match } from '../match';`
	`1`	`+import type { Context, Recogniser } from '.';`
	`2`	`+import match, { type Match, type EncodingName } from '../match';`
`3`	`3`
`4`	`4`	`/**`
`5`	`5`	`* This class matches UTF-16 and UTF-32, both big- and little-endian. The`
`6`	`6`	`* BOM will be used if it is present.`
`7`	`7`	`*/`
`8`	`8`	`export class UTF_16BE implements Recogniser {`
`9`		`- name() {`
	`9`	`+ name(): EncodingName {`
`10`	`10`	`return 'UTF-16BE';`
`11`	`11`	`}`
`12`	`12`
`@@ -27,9 +27,10 @@ export class UTF_16BE implements Recogniser {`
`27`	`27`	`}`
`28`	`28`
`29`	`29`	`export class UTF_16LE implements Recogniser {`
`30`		`- name() {`
	`30`	`+ name(): EncodingName {`
`31`	`31`	`return 'UTF-16LE';`
`32`	`32`	`}`
	`33`	`+`
`33`	`34`	`match(det: Context): Match \| null {`
`34`	`35`	`const input = det.rawInput;`
`35`	`36`
`@@ -56,7 +57,7 @@ interface WithGetChar {`
`56`	`57`	`}`
`57`	`58`
`58`	`59`	`class UTF_32 implements Recogniser, WithGetChar {`
`59`		`- name() {`
	`60`	`+ name(): EncodingName {`
`60`	`61`	`return 'UTF-32';`
`61`	`62`	`}`
`62`	`63`
`@@ -111,7 +112,7 @@ class UTF_32 implements Recogniser, WithGetChar {`
`111`	`112`	`}`
`112`	`113`
`113`	`114`	`export class UTF_32BE extends UTF_32 {`
`114`		`- name() {`
	`115`	`+ name(): EncodingName {`
`115`	`116`	`return 'UTF-32BE';`
`116`	`117`	`}`
`117`	`118`	`getChar(input: Uint8Array, index: number) {`
`@@ -125,7 +126,7 @@ export class UTF_32BE extends UTF_32 {`
`125`	`126`	`}`
`126`	`127`
`127`	`128`	`export class UTF_32LE extends UTF_32 {`
`128`		`- name() {`
	`129`	`+ name(): EncodingName {`
`129`	`130`	`return 'UTF-32LE';`
`130`	`131`	`}`
`131`	`132`