Skip to content

Commit 85899ff

Browse files
committed
Update UCD to Unicode 16.0.0
They added some new scripts. There were a few changes to the line break algorithm, most notably there is more rules that require more context than before. While not major, there was some shuffling and additions to our implementation to match the new rules. IDNA test data now disallows the trailing dot/empty root label, technically to be toggled off by an option that controls a few things, but we don't have options. For test-data they changed the format a little - "" is used to mean empty string, while a blank segment is null/no string, update the parser to read this. [ChangeLog][Third-Party Code] Updated the Unicode Character Database to UCD revision 34/Unicode 16. Fixes: QTBUG-132902 Task-number: QTBUG-132851 Pick-to: 6.9 6.8 6.5 Change-Id: I4569703659f6fd0f20943110a03301c1cf8cc1ed Reviewed-by: Edward Welbourne <[email protected]>
1 parent 037e4f9 commit 85899ff

34 files changed

+49780
-35818
lines changed

src/corelib/text/qchar.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ QT_BEGIN_NAMESPACE
161161
\value [since 6.3] Unicode_14_0 Version 14.0
162162
\value [since 6.5] Unicode_15_0 Version 15.0
163163
\value [since 6.8] Unicode_15_1 Version 15.1
164+
\value [since 6.9] Unicode_16_0 Version 16.0
164165
\value Unicode_Unassigned The value is not assigned to any character
165166
in version 8.0 of Unicode.
166167
@@ -298,6 +299,7 @@ QT_BEGIN_NAMESPACE
298299
\value [since 5.5] Script_Elbasan
299300
\value [since 5.15] Script_Elymaic
300301
\value Script_Ethiopic
302+
\value [since 6.9] Script_Garay
301303
\value Script_Georgian
302304
\value Script_Glagolitic
303305
\value Script_Gothic
@@ -306,6 +308,7 @@ QT_BEGIN_NAMESPACE
306308
\value Script_Gujarati
307309
\value [since 5.15] Script_GunjalaGondi
308310
\value Script_Gurmukhi
311+
\value [since 6.9] Script_GurungKhema
309312
\value Script_Han
310313
\value Script_Hangul
311314
\value [since 5.15] Script_HanifiRohingya
@@ -327,6 +330,7 @@ QT_BEGIN_NAMESPACE
327330
\value Script_Khmer
328331
\value [since 5.5] Script_Khojki
329332
\value [since 5.5] Script_Khudawadi
333+
\value [since 6.9] Script_KiratRai
330334
\value Script_Lao
331335
\value Script_Latin
332336
\value Script_Lepcha
@@ -364,6 +368,7 @@ QT_BEGIN_NAMESPACE
364368
\value [since 5.15] Script_NyiakengPuachueHmong
365369
\value Script_Ogham
366370
\value Script_OlChiki
371+
\value [since 6.9] Script_OlOnal
367372
\value [since 5.6] Script_OldHungarian
368373
\value Script_OldItalic
369374
\value [since 5.5] Script_OldNorthArabian
@@ -395,6 +400,7 @@ QT_BEGIN_NAMESPACE
395400
\value Script_SoraSompeng
396401
\value [since 5.11] Script_Soyombo
397402
\value Script_Sundanese
403+
\value [since 6.9] Script_Sunuwar
398404
\value Script_SylotiNagri
399405
\value Script_Syriac
400406
\value Script_Tagalog
@@ -412,7 +418,9 @@ QT_BEGIN_NAMESPACE
412418
\value Script_Tibetan
413419
\value Script_Tifinagh
414420
\value [since 5.5] Script_Tirhuta
421+
\value [since 6.9] Script_Todhri
415422
\value [since 6.3] Script_Toto
423+
\value [since 6.9] Script_TuluTigalari
416424
\value Script_Ugaritic
417425
\value Script_Vai
418426
\value [since 6.3] Script_Vithkuqi

src/corelib/text/qchar.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,15 @@ class QT6_ONLY(Q_CORE_EXPORT) QChar {
392392
Script_Kawi,
393393
Script_NagMundari,
394394

395+
// Unicode 16.0 additions
396+
Script_Garay,
397+
Script_GurungKhema,
398+
Script_KiratRai,
399+
Script_OlOnal,
400+
Script_Sunuwar,
401+
Script_Todhri,
402+
Script_TuluTigalari,
403+
395404
ScriptCount
396405
};
397406

@@ -486,6 +495,7 @@ class QT6_ONLY(Q_CORE_EXPORT) QChar {
486495
Unicode_14_0,
487496
Unicode_15_0,
488497
Unicode_15_1,
498+
Unicode_16_0,
489499
};
490500

491501
inline Category category() const noexcept { return QChar::category(ucs); }

src/corelib/text/qt_attribution.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"Version": [ "Don't use the Unicode standard version;",
1818
"UCD has its own 'Revision' numbers",
1919
"see the 'UAX #44, UCD' page (https://www.unicode.org/reports/tr44/)" ]},
20-
"Version": "32",
20+
"Version": "34",
2121
"License": "Unicode License Agreement - Data Files and Software (2016)",
2222
"LicenseId": "Unicode-3.0",
2323
"Copyright": "Copyright (C) 1991-2022 Unicode, Inc."

src/corelib/text/qunicodetables.cpp

Lines changed: 12271 additions & 11197 deletions
Large diffs are not rendered by default.

src/corelib/text/qunicodetables_p.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright (C) 2020 The Qt Company Ltd.
22
// SPDX-License-Identifier: Unicode-3.0
33

4-
/* This file is autogenerated from the Unicode 15.1 database. Do not edit */
4+
/* This file is autogenerated from the Unicode 16.0 database. Do not edit */
55

66
//
77
// W A R N I N G
@@ -23,7 +23,7 @@
2323

2424
QT_BEGIN_NAMESPACE
2525

26-
#define UNICODE_DATA_VERSION QChar::Unicode_15_1
26+
#define UNICODE_DATA_VERSION QChar::Unicode_16_0
2727

2828
namespace QUnicodeTables {
2929

@@ -143,17 +143,26 @@ enum SentenceBreakClass {
143143
};
144144

145145
// see http://www.unicode.org/reports/tr14/tr14-30.html
146-
// we don't use the XX, AK, AP, AS and AI classes and map them to AL instead.
146+
// we don't use the XX and AI classes but map them to AL instead.
147147
// VI and VF classes are mapped to CM.
148148
enum LineBreakClass {
149149
LineBreak_OP, LineBreak_CL, LineBreak_CP,
150-
LineBreak_QU, LineBreak_QU_Pi, LineBreak_QU_Pf, LineBreak_GL,
151-
LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
150+
LineBreak_QU, LineBreak_QU_Pi, LineBreak_QU_Pf, LineBreak_QU_19,
151+
LineBreak_GL, LineBreak_NS, LineBreak_EX, LineBreak_SY,
152+
LineBreak_IS, LineBreak_PR,
152153
LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
153-
LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
154+
LineBreak_IN, LineBreak_HY, LineBreak_WS_HY,
155+
LineBreak_BA, LineBreak_WS_BA,
156+
LineBreak_HYBA,
157+
LineBreak_BB, LineBreak_B2,
154158
LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
155159
LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_RI, LineBreak_CB,
156-
LineBreak_EB, LineBreak_EM, LineBreak_ZWJ,
160+
LineBreak_EB, LineBreak_EM,
161+
162+
LineBreak_AK, LineBreak_AP, LineBreak_AS,
163+
LineBreak_VI, LineBreak_VF,
164+
165+
LineBreak_ZWJ,
157166
LineBreak_SA, LineBreak_SG, LineBreak_SP,
158167
LineBreak_CR, LineBreak_LF, LineBreak_BK,
159168

0 commit comments

Comments
 (0)