15
15
16
16
import org .unicode .cldr .test .CheckExemplars .ExemplarType ;
17
17
import org .unicode .cldr .util .Builder ;
18
+ import org .unicode .cldr .util .CLDRConfig ;
18
19
import org .unicode .cldr .util .CLDRFile ;
19
20
import org .unicode .cldr .util .CLDRLocale ;
20
21
import org .unicode .cldr .util .CldrUtility ;
23
24
import org .unicode .cldr .util .Emoji ;
24
25
import org .unicode .cldr .util .ICUServiceBuilder ;
25
26
import org .unicode .cldr .util .PatternCache ;
27
+ import org .unicode .cldr .util .SupplementalDataInfo ;
26
28
import org .unicode .cldr .util .UnicodeSetPrettyPrinter ;
27
29
import org .unicode .cldr .util .With ;
28
30
import org .unicode .cldr .util .XPathParts ;
@@ -82,6 +84,35 @@ public class DisplayAndInputProcessor {
82
84
private static final Pattern INTERVAL_FORMAT_PATHS = PatternCache .get ("//ldml/dates/.+/intervalFormatItem.*" );
83
85
private static final Pattern NON_DECIMAL_PERIOD = PatternCache .get ("(?<![0#'])\\ .(?![0#'])" );
84
86
87
+ // Pattern to match against paths that might have time formats with h or K (12-hour cycles)
88
+ private static final Pattern HOUR_FORMAT_XPATHS = PatternCache
89
+ .get ("//ldml/dates/calendars/calendar\\ [@type=\" [^\" ]*\" ]/("
90
+ + "timeFormats/timeFormatLength\\ [@type=\" [^\" ]*\" ]/timeFormat\\ [@type=\" standard\" ]/pattern\\ [@type=\" standard\" ].*|"
91
+ + "dateTimeFormats/availableFormats/dateFormatItem\\ [@id=\" [A-GL-Ma-gl-m]*[hK][A-Za-z]*\" ].*|"
92
+ + "dateTimeFormats/intervalFormats/intervalFormatItem\\ [@id=\" [A-GL-Ma-gl-m]*[hK][A-Za-z]*\" ].*)" );
93
+
94
+ private static final Pattern AMPM_SPACE_BEFORE = PatternCache .get ("([Khms])[ \\ u00A0]+a" ); // time, space, a
95
+ private static final Pattern AMPM_SPACE_AFTER = PatternCache .get ("a[ \\ u00A0]+([Kh])" ); // a space, hour
96
+
97
+ // Pattern to match against paths that might have date formats with y
98
+ private static final Pattern YEAR_FORMAT_XPATHS = PatternCache
99
+ .get ("//ldml/dates/calendars/calendar\\ [@type=\" [^\" ]*\" ]/("
100
+ + "dateFormats/dateFormatLength\\ [@type=\" [^\" ]*\" ]/dateFormat\\ [@type=\" standard\" ]/pattern\\ [@type=\" standard\" ].*|"
101
+ + "dateTimeFormats/availableFormats/dateFormatItem\\ [@id=\" [A-XZa-xz]*y[A-Za-z]*\" ].*|"
102
+ + "dateTimeFormats/intervalFormats/intervalFormatItem\\ [@id=\" [A-XZa-xz]*y[A-Za-z]*\" ].*)" );
103
+
104
+ // Cyrillic year markers are or begin with (in various languages) \u0430 \u0433 \u0435 \u0436 \u043E \u0440 \u0441
105
+ private static final Pattern YEAR_SPACE_YEARMARKER = PatternCache .get ("y[ \\ u00A0]+('?[агежорс])" ); // y, space, Cyrillic year marker start
106
+
107
+ public static final Pattern UNIT_NARROW_XPATHS = PatternCache
108
+ .get ("//ldml/units/unitLength\\ [@type=\" narrow\" ]unit\\ [@type=\" [^\" ]*\" ]/unitPattern.*" );
109
+
110
+ public static final Pattern UNIT_SHORT_XPATHS = PatternCache
111
+ .get ("//ldml/units/unitLength\\ [@type=\" short\" ]unit\\ [@type=\" [^\" ]*\" ]/unitPattern.*" );
112
+
113
+ private static final Pattern PLACEHOLDER_SPACE_AFTER = PatternCache .get ("\\ }[ \\ u00A0\\ u202F]+" );
114
+ private static final Pattern PLACEHOLDER_SPACE_BEFORE = PatternCache .get ("[ \\ u00A0\\ u202F]+\\ {" );
115
+
85
116
/**
86
117
* string of whitespace not including NBSP, i.e. [\t\n\r]+
87
118
*/
@@ -106,6 +137,7 @@ public class DisplayAndInputProcessor {
106
137
private static final Pattern FINAL_NBSP = PatternCache .get ("\\ u00A0+$" );
107
138
private static final Pattern MULTIPLE_NBSP = PatternCache .get ("\\ u00A0\\ u00A0+" );
108
139
140
+ // The following includes (among others) \u0009, \u0020, \u00A0, \u2007, \u2009, \u202F, \u3000
109
141
private static final UnicodeSet UNICODE_WHITESPACE = new UnicodeSet ("[:whitespace:]" ).freeze ();
110
142
111
143
private static final CLDRLocale MALAYALAM = CLDRLocale .getInstance ("ml" );
@@ -167,6 +199,7 @@ public class DisplayAndInputProcessor {
167
199
private UnicodeSetPrettyPrinter pp = null ;
168
200
169
201
final private CLDRLocale locale ;
202
+ private String scriptCode ; // actual or default script code (not null after init)
170
203
private boolean isPosix ;
171
204
172
205
/**
@@ -212,6 +245,18 @@ void init(CLDRLocale locale, boolean needsCollator) {
212
245
.setOrdering (col )
213
246
.setSpaceComparator (spaceCol );
214
247
}
248
+ String script = locale .getScript ();
249
+ if (script == null || script .length () < 4 ) {
250
+ SupplementalDataInfo sdi = CLDRConfig .getInstance ().getSupplementalDataInfo ();
251
+ script = sdi .getDefaultScript (locale .getBaseName ());
252
+ if (script == null || script .length () < 4 || script .equals ("Zzzz" )) {
253
+ script = sdi .getDefaultScript (locale .getLanguage ());
254
+ }
255
+ if (script == null || script .length () < 4 ) {
256
+ script = "Zzzz" ;
257
+ }
258
+ }
259
+ scriptCode = script ;
215
260
}
216
261
217
262
public UnicodeSetPrettyPrinter getPrettyPrinter () {
@@ -298,7 +343,7 @@ public synchronized String processForDisplay(String path, String value) {
298
343
}
299
344
// Fix up hyphens, replacing with N-dash as appropriate
300
345
if (INTERVAL_FORMAT_PATHS .matcher (path ).matches ()) {
301
- value = normalizeIntervalHyphens (value );
346
+ value = normalizeIntervalHyphensAndSpaces (value ); // This may also adjust spaces around en dash
302
347
} else {
303
348
value = normalizeHyphens (value );
304
349
}
@@ -470,7 +515,7 @@ public synchronized String processInput(String path, String value, Exception[] i
470
515
}
471
516
// Fix up hyphens, replacing with N-dash as appropriate
472
517
if (INTERVAL_FORMAT_PATHS .matcher (path ).matches ()) {
473
- value = normalizeIntervalHyphens (value );
518
+ value = normalizeIntervalHyphensAndSpaces (value ); // This may also adjust spaces around en dash
474
519
} else if (!isUnicodeSet ) {
475
520
value = normalizeHyphens (value );
476
521
}
@@ -646,20 +691,26 @@ private String normalizeApostrophes(String value) {
646
691
}
647
692
}
648
693
649
- private String normalizeIntervalHyphens (String value ) {
694
+ private String normalizeIntervalHyphensAndSpaces (String value ) {
650
695
DateTimePatternGenerator .FormatParser fp = new DateTimePatternGenerator .FormatParser ();
651
- fp .set (DateIntervalInfo .genPatternInfo (value , false ).getFirstPart ());
696
+ fp .set (DateIntervalInfo .genPatternInfo (value , false ).getFirstPart ()); // first format & separator including spaces
652
697
List <Object > items = fp .getItems ();
653
698
Object last = items .get (items .size () - 1 );
654
699
if (last instanceof String ) {
655
- String separator = last .toString ();
656
- if (separator .contains ("-" )) {
700
+ String separator = last .toString (); // separator including spaces
701
+ String replacement = separator ;
702
+ if (scriptCode .equals ("Latn" ) && (separator .equals (" - " ) || separator .equals (" \u2013 " ))) {
703
+ replacement = "\u2009 \u2013 \u2009 " ; // Per CLDR-14032
704
+ } else if (separator .contains ("-" )) {
705
+ replacement = separator .replace ("-" , "\u2013 " );
706
+ }
707
+ if (!replacement .equals (separator )) {
657
708
StringBuilder sb = new StringBuilder ();
658
709
sb .append (DateIntervalInfo .genPatternInfo (value , false ).getFirstPart ());
659
710
if (sb .lastIndexOf (separator ) >= 0 ) {
660
711
sb .delete (sb .lastIndexOf (separator ), sb .length ());
661
- sb .append (separator . replace ( "-" , " \u2013 " ) );
662
- sb .append (DateIntervalInfo .genPatternInfo (value , false ).getSecondPart ());
712
+ sb .append (replacement );
713
+ sb .append (DateIntervalInfo .genPatternInfo (value , false ).getSecondPart ()); // second format only
663
714
return sb .toString ();
664
715
}
665
716
}
@@ -1037,6 +1088,24 @@ private String normalizeWhitespace(String path, String value) {
1037
1088
} else {
1038
1089
throw new IllegalArgumentException ("Unknown PathSpaceType " + pst );
1039
1090
}
1091
+
1092
+ // Further whitespace adjustments per CLDR-14032
1093
+ if (HOUR_FORMAT_XPATHS .matcher (path ).matches ()) {
1094
+ value = AMPM_SPACE_BEFORE .matcher (value ).replaceAll ("$1\u202F a" );
1095
+ value = AMPM_SPACE_AFTER .matcher (value ).replaceAll ("a\u202F $1" );
1096
+ }
1097
+ if (scriptCode .equals ("Cyrl" ) && YEAR_FORMAT_XPATHS .matcher (path ).matches ()) {
1098
+ value = YEAR_SPACE_YEARMARKER .matcher (value ).replaceAll ("y\u202F $1" );
1099
+ }
1100
+ if (UNIT_NARROW_XPATHS .matcher (path ).matches ()) {
1101
+ value = PLACEHOLDER_SPACE_AFTER .matcher (value ).replaceAll ("}\u202F " ); // Narrow NBSP
1102
+ value = PLACEHOLDER_SPACE_BEFORE .matcher (value ).replaceAll ("\u202F {" );
1103
+ }
1104
+ if (UNIT_SHORT_XPATHS .matcher (path ).matches ()) {
1105
+ value = PLACEHOLDER_SPACE_AFTER .matcher (value ).replaceAll ("}\u00A0 " ); // Regular NBSP
1106
+ value = PLACEHOLDER_SPACE_BEFORE .matcher (value ).replaceAll ("\u00A0 {" );
1107
+ }
1108
+
1040
1109
return value ;
1041
1110
}
1042
1111
0 commit comments