Skip to content

Commit 6d0af45

Browse files
Add strip_section_whitespace parameter to address issue #4
Add opt-in Unicode whitespace stripping for section names (issue #4) Changes: - Add strip_section_whitespace parameter to IniConfig.parse() - Default: False (preserves backward compatibility) - When True: strips Unicode whitespace from section names - Document Unicode whitespace handling in CHANGELOG - Python 3's str.strip() has handled Unicode since Python 3.0 (2008) - iniconfig 2.0.0+ benefits from this automatically - Values and key names already strip Unicode whitespace correctly - Add tests for Unicode whitespace handling Background: Since iniconfig moved to Python 3 only in version 2.0.0, all strings are Unicode by default. Python 3's str.strip() handles Unicode whitespace characters (NO-BREAK SPACE, EN QUAD, IDEOGRAPHIC SPACE, etc.) automatically. This addresses the core concern in issue #4 for values and key names. The new strip_section_whitespace parameter provides opt-in stripping for section names, which were not previously stripped for backward compatibility. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent e2d89f5 commit 6d0af45

File tree

4 files changed

+89
-7
lines changed

4 files changed

+89
-7
lines changed

CHANGELOG

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@
66
- set strip_inline_comments=False to preserve old behavior if needed
77
* IniConfig() constructor maintains backward compatibility (does not strip inline comments)
88
* users should migrate to IniConfig.parse() for correct comment handling
9+
* add strip_section_whitespace parameter to IniConfig.parse() (regarding #4)
10+
- opt-in parameter to strip Unicode whitespace from section names
11+
- when True, strips Unicode whitespace (U+00A0, U+2000, U+3000, etc.) from section names
12+
- when False (default), preserves existing behavior for backward compatibility
13+
* clarify Unicode whitespace handling (regarding #4)
14+
- since iniconfig 2.0.0 (Python 3 only), all strings are Unicode by default
15+
- Python 3's str.strip() has handled Unicode whitespace since Python 3.0 (2008)
16+
- iniconfig automatically benefits from this in all supported versions (Python >= 3.10)
17+
- key names and values have Unicode whitespace properly stripped using Python's built-in methods
918

1019
2.2.0
1120
=====

src/iniconfig/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ def parse(
118118
encoding: str = "utf-8",
119119
*,
120120
strip_inline_comments: bool = True,
121+
strip_section_whitespace: bool = False,
121122
) -> "IniConfig":
122123
"""Parse an INI file.
123124
@@ -128,6 +129,9 @@ def parse(
128129
strip_inline_comments: Whether to strip inline comments from values
129130
(default: True). When True, comments starting with # or ; are
130131
removed from values, matching the behavior for section comments.
132+
strip_section_whitespace: Whether to strip whitespace from section and key names
133+
(default: False). When True, strips Unicode whitespace from section and key names,
134+
addressing issue #4. When False, preserves existing behavior for backward compatibility.
131135
132136
Returns:
133137
IniConfig instance with parsed configuration
@@ -140,6 +144,10 @@ def parse(
140144
# Without comment stripping (old behavior):
141145
config = IniConfig.parse("setup.cfg", strip_inline_comments=False)
142146
# value = "foo # comment"
147+
148+
# With section name stripping (opt-in for issue #4):
149+
config = IniConfig.parse("setup.cfg", strip_section_whitespace=True)
150+
# section names and keys have Unicode whitespace stripped
143151
"""
144152
fspath = os.fspath(path)
145153

@@ -148,7 +156,10 @@ def parse(
148156
data = fp.read()
149157

150158
sections_data, sources = _parse.parse_ini_data(
151-
fspath, data, strip_inline_comments=strip_inline_comments
159+
fspath,
160+
data,
161+
strip_inline_comments=strip_inline_comments,
162+
strip_section_whitespace=strip_section_whitespace,
152163
)
153164

154165
# Create instance directly without calling __init__

src/iniconfig/_parse.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,27 @@ def parse_ini_data(
1818
data: str,
1919
*,
2020
strip_inline_comments: bool,
21+
strip_section_whitespace: bool = False,
2122
) -> tuple[Mapping[str, Mapping[str, str]], Mapping[tuple[str, str | None], int]]:
2223
"""Parse INI data and return sections and sources mappings.
2324
2425
Args:
2526
path: Path for error messages
2627
data: INI content as string
2728
strip_inline_comments: Whether to strip inline comments from values
29+
strip_section_whitespace: Whether to strip whitespace from section and key names
30+
(default: False). When True, addresses issue #4 by stripping Unicode whitespace.
2831
2932
Returns:
3033
Tuple of (sections_data, sources) where:
3134
- sections_data: mapping of section -> {name -> value}
3235
- sources: mapping of (section, name) -> line number
3336
"""
3437
tokens = parse_lines(
35-
path, data.splitlines(True), strip_inline_comments=strip_inline_comments
38+
path,
39+
data.splitlines(True),
40+
strip_inline_comments=strip_inline_comments,
41+
strip_section_whitespace=strip_section_whitespace,
3642
)
3743

3844
sources: dict[tuple[str, str | None], int] = {}
@@ -56,12 +62,18 @@ def parse_ini_data(
5662

5763

5864
def parse_lines(
59-
path: str, line_iter: list[str], *, strip_inline_comments: bool = False
65+
path: str,
66+
line_iter: list[str],
67+
*,
68+
strip_inline_comments: bool = False,
69+
strip_section_whitespace: bool = False,
6070
) -> list[ParsedLine]:
6171
result: list[ParsedLine] = []
6272
section = None
6373
for lineno, line in enumerate(line_iter):
64-
name, data = _parseline(path, line, lineno, strip_inline_comments)
74+
name, data = _parseline(
75+
path, line, lineno, strip_inline_comments, strip_section_whitespace
76+
)
6577
# new value
6678
if name is not None and data is not None:
6779
result.append(ParsedLine(lineno, section, name, data))
@@ -88,7 +100,11 @@ def parse_lines(
88100

89101

90102
def _parseline(
91-
path: str, line: str, lineno: int, strip_inline_comments: bool
103+
path: str,
104+
line: str,
105+
lineno: int,
106+
strip_inline_comments: bool,
107+
strip_section_whitespace: bool,
92108
) -> tuple[str | None, str | None]:
93109
# blank lines
94110
if iscommentline(line):
@@ -103,7 +119,11 @@ def _parseline(
103119
for c in COMMENTCHARS:
104120
line = line.split(c)[0].rstrip()
105121
if line[-1] == "]":
106-
return line[1:-1], None
122+
section_name = line[1:-1]
123+
# Optionally strip whitespace from section name (issue #4)
124+
if strip_section_whitespace:
125+
section_name = section_name.strip()
126+
return section_name, None
107127
return None, realline.strip()
108128
# value
109129
elif not line[0].isspace():
@@ -116,12 +136,18 @@ def _parseline(
116136
name, value = line.split(":", 1)
117137
except ValueError:
118138
raise ParseError(path, lineno, f"unexpected line: {line!r}") from None
139+
140+
# Strip key name (always for backward compatibility, optionally with unicode awareness)
141+
key_name = name.strip()
142+
143+
# Strip value
119144
value = value.strip()
120145
# Strip inline comments from values if requested (issue #55)
121146
if strip_inline_comments:
122147
for c in COMMENTCHARS:
123148
value = value.split(c)[0].rstrip()
124-
return name.strip(), value
149+
150+
return key_name, value
125151
# continuation
126152
else:
127153
line = line.strip()

testing/test_iniconfig.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,3 +376,39 @@ def test_constructor_preserves_inline_comments_for_backward_compatibility() -> N
376376
)
377377
assert config["section1"]["name1"] == "value1 # this is a comment"
378378
assert config["section1"]["name2"] == "value2 ; this is also a comment"
379+
380+
381+
def test_unicode_whitespace_stripped() -> None:
382+
"""Test that Unicode whitespace is stripped (issue #4)."""
383+
config = IniConfig(
384+
"test.ini",
385+
data="[section]\n"
386+
+ "name1 = \u00a0value1\u00a0\n" # NO-BREAK SPACE
387+
+ "name2 = \u2000value2\u2000\n" # EN QUAD
388+
+ "name3 = \u3000value3\u3000\n", # IDEOGRAPHIC SPACE
389+
)
390+
assert config["section"]["name1"] == "value1"
391+
assert config["section"]["name2"] == "value2"
392+
assert config["section"]["name3"] == "value3"
393+
394+
395+
def test_unicode_whitespace_in_section_names_with_opt_in() -> None:
396+
"""Test that Unicode whitespace can be stripped from section names with opt-in (issue #4)."""
397+
config = IniConfig.parse(
398+
"test.ini",
399+
data="[section\u00a0]\n" # NO-BREAK SPACE at end
400+
+ "key = value\n",
401+
strip_section_whitespace=True,
402+
)
403+
assert "section" in config
404+
assert config["section"]["key"] == "value"
405+
406+
407+
def test_unicode_whitespace_in_key_names() -> None:
408+
"""Test that Unicode whitespace is stripped from key names (issue #4)."""
409+
config = IniConfig(
410+
"test.ini",
411+
data="[section]\n" + "key\u00a0 = value\n", # NO-BREAK SPACE after key
412+
)
413+
assert "key" in config["section"]
414+
assert config["section"]["key"] == "value"

0 commit comments

Comments
 (0)