Add strip_section_whitespace parameter to address issue #4

RonnyPfannschmidt · claude · RonnyPfannschmidt · commit 6d0af4529e43 · 2025-10-18T23:43:10.000+02:00
Add opt-in Unicode whitespace stripping for section names (issue #4) Changes: - Add strip_section_whitespace parameter to IniConfig.parse() - Default: False (preserves backward compatibility) - When True: strips Unicode whitespace from section names - Document Unicode whitespace handling in CHANGELOG - Python 3's str.strip() has handled Unicode since Python 3.0 (2008) - iniconfig 2.0.0+ benefits from this automatically - Values and key names already strip Unicode whitespace correctly - Add tests for Unicode whitespace handling Background: Since iniconfig moved to Python 3 only in version 2.0.0, all strings are Unicode by default. Python 3's str.strip() handles Unicode whitespace characters (NO-BREAK SPACE, EN QUAD, IDEOGRAPHIC SPACE, etc.) automatically. This addresses the core concern in issue #4 for values and key names. The new strip_section_whitespace parameter provides opt-in stripping for section names, which were not previously stripped for backward compatibility. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/CHANGELOG b/CHANGELOG
@@ -6,6 +6,15 @@
   - set strip_inline_comments=False to preserve old behavior if needed
 * IniConfig() constructor maintains backward compatibility (does not strip inline comments)
 * users should migrate to IniConfig.parse() for correct comment handling
+* add strip_section_whitespace parameter to IniConfig.parse() (regarding #4)
+  - opt-in parameter to strip Unicode whitespace from section names
+  - when True, strips Unicode whitespace (U+00A0, U+2000, U+3000, etc.) from section names
+  - when False (default), preserves existing behavior for backward compatibility
+* clarify Unicode whitespace handling (regarding #4)
+  - since iniconfig 2.0.0 (Python 3 only), all strings are Unicode by default
+  - Python 3's str.strip() has handled Unicode whitespace since Python 3.0 (2008)
+  - iniconfig automatically benefits from this in all supported versions (Python >= 3.10)
+  - key names and values have Unicode whitespace properly stripped using Python's built-in methods
 
 2.2.0
 =====
diff --git a/src/iniconfig/__init__.py b/src/iniconfig/__init__.py
@@ -118,6 +118,7 @@ def parse(
         encoding: str = "utf-8",
         *,
         strip_inline_comments: bool = True,
+        strip_section_whitespace: bool = False,
     ) -> "IniConfig":
         """Parse an INI file.
 
@@ -128,6 +129,9 @@ def parse(
             strip_inline_comments: Whether to strip inline comments from values
                 (default: True). When True, comments starting with # or ; are
                 removed from values, matching the behavior for section comments.
+            strip_section_whitespace: Whether to strip whitespace from section and key names
+                (default: False). When True, strips Unicode whitespace from section and key names,
+                addressing issue #4. When False, preserves existing behavior for backward compatibility.
 
         Returns:
             IniConfig instance with parsed configuration
@@ -140,6 +144,10 @@ def parse(
             # Without comment stripping (old behavior):
             config = IniConfig.parse("setup.cfg", strip_inline_comments=False)
             # value = "foo # comment"
+
+            # With section name stripping (opt-in for issue #4):
+            config = IniConfig.parse("setup.cfg", strip_section_whitespace=True)
+            # section names and keys have Unicode whitespace stripped
         """
         fspath = os.fspath(path)
 
@@ -148,7 +156,10 @@ def parse(
                 data = fp.read()
 
         sections_data, sources = _parse.parse_ini_data(
-            fspath, data, strip_inline_comments=strip_inline_comments
+            fspath,
+            data,
+            strip_inline_comments=strip_inline_comments,
+            strip_section_whitespace=strip_section_whitespace,
         )
 
         # Create instance directly without calling __init__
diff --git a/src/iniconfig/_parse.py b/src/iniconfig/_parse.py
@@ -18,21 +18,27 @@ def parse_ini_data(
     data: str,
     *,
     strip_inline_comments: bool,
+    strip_section_whitespace: bool = False,
 ) -> tuple[Mapping[str, Mapping[str, str]], Mapping[tuple[str, str | None], int]]:
     """Parse INI data and return sections and sources mappings.
 
     Args:
         path: Path for error messages
         data: INI content as string
         strip_inline_comments: Whether to strip inline comments from values
+        strip_section_whitespace: Whether to strip whitespace from section and key names
+            (default: False). When True, addresses issue #4 by stripping Unicode whitespace.
 
     Returns:
         Tuple of (sections_data, sources) where:
         - sections_data: mapping of section -> {name -> value}
         - sources: mapping of (section, name) -> line number
     """
     tokens = parse_lines(
-        path, data.splitlines(True), strip_inline_comments=strip_inline_comments
+        path,
+        data.splitlines(True),
+        strip_inline_comments=strip_inline_comments,
+        strip_section_whitespace=strip_section_whitespace,
     )
 
     sources: dict[tuple[str, str | None], int] = {}
@@ -56,12 +62,18 @@ def parse_ini_data(
 
 
 def parse_lines(
-    path: str, line_iter: list[str], *, strip_inline_comments: bool = False
+    path: str,
+    line_iter: list[str],
+    *,
+    strip_inline_comments: bool = False,
+    strip_section_whitespace: bool = False,
 ) -> list[ParsedLine]:
     result: list[ParsedLine] = []
     section = None
     for lineno, line in enumerate(line_iter):
-        name, data = _parseline(path, line, lineno, strip_inline_comments)
+        name, data = _parseline(
+            path, line, lineno, strip_inline_comments, strip_section_whitespace
+        )
         # new value
         if name is not None and data is not None:
             result.append(ParsedLine(lineno, section, name, data))
@@ -88,7 +100,11 @@ def parse_lines(
 
 
 def _parseline(
-    path: str, line: str, lineno: int, strip_inline_comments: bool
+    path: str,
+    line: str,
+    lineno: int,
+    strip_inline_comments: bool,
+    strip_section_whitespace: bool,
 ) -> tuple[str | None, str | None]:
     # blank lines
     if iscommentline(line):
@@ -103,7 +119,11 @@ def _parseline(
         for c in COMMENTCHARS:
             line = line.split(c)[0].rstrip()
         if line[-1] == "]":
-            return line[1:-1], None
+            section_name = line[1:-1]
+            # Optionally strip whitespace from section name (issue #4)
+            if strip_section_whitespace:
+                section_name = section_name.strip()
+            return section_name, None
         return None, realline.strip()
     # value
     elif not line[0].isspace():
@@ -116,12 +136,18 @@ def _parseline(
                 name, value = line.split(":", 1)
             except ValueError:
                 raise ParseError(path, lineno, f"unexpected line: {line!r}") from None
+
+        # Strip key name (always for backward compatibility, optionally with unicode awareness)
+        key_name = name.strip()
+
+        # Strip value
         value = value.strip()
         # Strip inline comments from values if requested (issue #55)
         if strip_inline_comments:
             for c in COMMENTCHARS:
                 value = value.split(c)[0].rstrip()
-        return name.strip(), value
+
+        return key_name, value
     # continuation
     else:
         line = line.strip()
diff --git a/testing/test_iniconfig.py b/testing/test_iniconfig.py
@@ -376,3 +376,39 @@ def test_constructor_preserves_inline_comments_for_backward_compatibility() -> N
     )
     assert config["section1"]["name1"] == "value1 # this is a comment"
     assert config["section1"]["name2"] == "value2 ; this is also a comment"
+
+
+def test_unicode_whitespace_stripped() -> None:
+    """Test that Unicode whitespace is stripped (issue #4)."""
+    config = IniConfig(
+        "test.ini",
+        data="[section]\n"
+        + "name1 = \u00a0value1\u00a0\n"  # NO-BREAK SPACE
+        + "name2 = \u2000value2\u2000\n"  # EN QUAD
+        + "name3 = \u3000value3\u3000\n",  # IDEOGRAPHIC SPACE
+    )
+    assert config["section"]["name1"] == "value1"
+    assert config["section"]["name2"] == "value2"
+    assert config["section"]["name3"] == "value3"
+
+
+def test_unicode_whitespace_in_section_names_with_opt_in() -> None:
+    """Test that Unicode whitespace can be stripped from section names with opt-in (issue #4)."""
+    config = IniConfig.parse(
+        "test.ini",
+        data="[section\u00a0]\n"  # NO-BREAK SPACE at end
+        + "key = value\n",
+        strip_section_whitespace=True,
+    )
+    assert "section" in config
+    assert config["section"]["key"] == "value"
+
+
+def test_unicode_whitespace_in_key_names() -> None:
+    """Test that Unicode whitespace is stripped from key names (issue #4)."""
+    config = IniConfig(
+        "test.ini",
+        data="[section]\n" + "key\u00a0 = value\n",  # NO-BREAK SPACE after key
+    )
+    assert "key" in config["section"]
+    assert config["section"]["key"] == "value"