Skip to content

Commit 6bccb65

Browse files

File tree

2 files changed

+35
-5
lines changed

2 files changed

+35
-5
lines changed

src/to-normalized.test.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,11 @@ describe('toNormalized', () => {
106106
// Unpaired low-surrogate
107107
expect(toNormalized('𠏹\uDFF9沢')).toEqual(['𠏹沢', [0, 0, 3, 4]]);
108108
});
109+
110+
it('converts enclosed ideographic supplement characters', () => {
111+
expect(toNormalized('🈂🈔🈩🈀🈁は🈲🈯🉥')).toEqual([
112+
'サ二一ほかココは禁指財',
113+
[0, 2, 4, 6, 6, 8, 8, 10, 11, 13, 15, 17],
114+
]);
115+
});
109116
});

src/to-normalized.ts

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,32 @@ const ENCLOSED_CHARS_D = [
9191
'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヰ', 'ヱ', 'ヲ', '令和'
9292
];
9393

94-
// We should handle the Enclosed Ideographic Supplement too
95-
// (https://en.wikipedia.org/wiki/Enclosed_Ideographic_Supplement)
96-
// but it's in the SMP so it makes processing more complicated.
97-
//
98-
// We'll wait until it's actually needed.
94+
// Enclosed Ideographic Supplement: 0x1f200-0x1f26f (actually up to 0x1f2ff but
95+
// there are no characters in the range 0x1f266-0x1f2ff currently)
96+
// prettier-ignore
97+
const ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = [
98+
'ほか', 'ココ', 'サ', undefined,
99+
undefined, undefined, undefined, undefined,
100+
undefined, undefined, undefined, undefined,
101+
undefined, undefined, undefined, undefined,
102+
'手', '字', '双', 'デ', '二', '多', '解', '天',
103+
'交', '映', '無', '料', '前', '後', '再', '新',
104+
'初', '終', '生', '販', '声', '吹', '演', '投',
105+
'捕', '一', '三', '遊', '左', '中', '右', '指',
106+
'走', '打', '禁', '空', '合', '満', '有', '月',
107+
'申', '割', '営', '配',
108+
undefined, undefined, undefined, undefined,
109+
'本', '三', '二', '安', '点', '打', '盗', '勝',
110+
'敗', undefined, undefined, undefined,
111+
undefined, undefined, undefined, undefined,
112+
'得', '可', undefined, undefined,
113+
undefined, undefined, undefined, undefined,
114+
undefined, undefined, undefined, undefined,
115+
undefined, undefined, undefined, undefined,
116+
'福', '祿', '壽', '喜', '囍', '財', undefined, undefined,
117+
undefined, undefined, undefined, undefined,
118+
undefined, undefined, undefined, undefined,
119+
];
99120

100121
// The following is a mapping from radical characters in the Kangxi Radicals
101122
// and _some_ of the radicals in the CJK Radicals Supplement block.
@@ -801,6 +822,8 @@ export function toNormalized(input: string): [string, number[]] {
801822
expanded = ENCLOSED_CHARS_C[c - 0x32c0];
802823
} else if (c >= 0x32d0 && c <= 0x32ff) {
803824
expanded = ENCLOSED_CHARS_D[c - 0x32d0];
825+
} else if (c >= 0x1f200 && c <= 0x1f26f) {
826+
expanded = ENCLOSED_IDEOGRAPHIC_SUPPLEMENT[c - 0x1f200];
804827
}
805828

806829
// Look for radical characters to map to kanji

0 commit comments

Comments
 (0)