Merge pull request #150 from sp-nitech/f0eval2

takenori-y · web-flow · commit 18edb8787057 · 2025-09-02T07:44:02.000+02:00
Add f0eval
diff --git a/diffsptk/functional.py b/diffsptk/functional.py
@@ -671,6 +671,35 @@ def excite(
     )
 
 
+def f0eval(
+    x: Tensor, y: Tensor, reduction: str = "mean", out_format: str = "f0-rmse-cent"
+) -> Tensor:
+    """Calculate F0 metric.
+
+    Parameters
+    ----------
+    x : Tensor [shape=(..., N)]
+        The input F0 in Hz.
+
+    y : Tensor [shape=(..., N)]
+        The target F0 in Hz.
+
+    reduction : ['none', 'mean', 'sum']
+        The reduction type.
+
+    out_format : ['f0-rmse-hz', 'f0-rmse-cent', 'f0-rmse-semitone', 'vuv-error-rate', \
+                  'vuv-error-percent', 'vuv-macro-f1-score']
+        The output format.
+
+    Returns
+    -------
+    out : Tensor [shape=(...,) or scalar]
+        The F0 metric.
+
+    """
+    return nn.F0Evaluation._func(x, y, reduction=reduction, out_format=out_format)
+
+
 def fbank(
     x: Tensor,
     n_channel: int,
diff --git a/diffsptk/modules/__init__.py b/diffsptk/modules/__init__.py
@@ -46,6 +46,7 @@
 from .dtw import DynamicTimeWarping as DTW
 from .entropy import Entropy
 from .excite import ExcitationGeneration
+from .f0eval import F0Evaluation
 from .fbank import MelFilterBankAnalysis
 from .fbank import MelFilterBankAnalysis as FBANK
 from .fftcep import CepstralAnalysis
diff --git a/diffsptk/modules/f0eval.py b/diffsptk/modules/f0eval.py
@@ -0,0 +1,128 @@
+# ------------------------------------------------------------------------ #
+# Copyright 2022 SPTK Working Group                                        #
+#                                                                          #
+# Licensed under the Apache License, Version 2.0 (the "License");          #
+# you may not use this file except in compliance with the License.         #
+# You may obtain a copy of the License at                                  #
+#                                                                          #
+#     http://www.apache.org/licenses/LICENSE-2.0                           #
+#                                                                          #
+# Unless required by applicable law or agreed to in writing, software      #
+# distributed under the License is distributed on an "AS IS" BASIS,        #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and      #
+# limitations under the License.                                           #
+# ------------------------------------------------------------------------ #
+
+import torch
+
+from ..typing import Precomputed
+from ..utils.private import UNVOICED_SYMBOL, filter_values
+from .base import BaseFunctionalModule
+from .rmse import RootMeanSquareError
+
+
+class F0Evaluation(BaseFunctionalModule):
+    """See `this page <https://sp-nitech.github.io/sptk/latest/main/f0eval.html>`_
+    for details. Note that the gradients cannot be calculated if the output format
+    is related to voiced/unvoiced decision.
+
+    Parameters
+    ----------
+    reduction : ['none', 'mean', 'sum']
+        The reduction type.
+
+    out_format : ['f0-rmse-hz', 'f0-rmse-cent', 'f0-rmse-semitone', 'vuv-error-rate', \
+                  'vuv-error-percent', 'vuv-macro-f1-score']
+        The output format.
+
+    """
+
+    def __init__(
+        self, reduction: str = "mean", out_format: str = "f0-rmse-cent"
+    ) -> None:
+        super().__init__()
+
+        self.values = self._precompute(**filter_values(locals()))
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """Calculate F0 metric.
+
+        Parameters
+        ----------
+        x : Tensor [shape=(..., N)]
+            The input F0 in Hz.
+
+        y : Tensor [shape=(..., N)]
+            The target F0 in Hz.
+
+        Returns
+        -------
+        out : Tensor [shape=(...,) or scalar]
+            The F0 metric.
+
+        """
+        return self._forward(x, y, *self.values)
+
+    @staticmethod
+    def _func(x: torch.Tensor, y: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        values = F0Evaluation._precompute(*args, **kwargs)
+        return F0Evaluation._forward(x, y, *values)
+
+    @staticmethod
+    def _takes_input_size() -> bool:
+        return False
+
+    @staticmethod
+    def _check() -> None:
+        pass
+
+    @staticmethod
+    def _precompute(reduction: str, out_format: str) -> Precomputed:
+        F0Evaluation._check()
+        return (reduction, out_format)
+
+    @staticmethod
+    def _forward(
+        x: torch.Tensor, y: torch.Tensor, reduction: str, out_format: str
+    ) -> torch.Tensor:
+        if out_format.startswith("f0-rmse"):
+            voiced = (x != UNVOICED_SYMBOL) & (y != UNVOICED_SYMBOL)
+            if out_format == "f0-rmse-hz":
+                convert = lambda x: x
+            elif out_format == "f0-rmse-cent":
+                convert = lambda x: 1200 * torch.log2(x)
+            elif out_format == "f0-rmse-semitone":
+                convert = lambda x: 12 * torch.log2(x)
+            else:
+                raise ValueError(f"out_format {out_format} is not supported.")
+            out = RootMeanSquareError._func(
+                convert(x[voiced]), convert(y[voiced]), "none"
+            )
+        else:
+            TP = torch.sum((x != UNVOICED_SYMBOL) & (y != UNVOICED_SYMBOL), dim=-1)
+            FP = torch.sum((x == UNVOICED_SYMBOL) & (y != UNVOICED_SYMBOL), dim=-1)
+            FN = torch.sum((x != UNVOICED_SYMBOL) & (y == UNVOICED_SYMBOL), dim=-1)
+            TN = torch.sum((x == UNVOICED_SYMBOL) & (y == UNVOICED_SYMBOL), dim=-1)
+            FPFN = FP + FN
+            if out_format == "vuv-error-rate":
+                out = FPFN / x.shape[-1]
+            elif out_format == "vuv-error-percent":
+                out = 100 * FPFN / x.shape[-1]
+            elif out_format == "vuv-macro-f1-score":
+                f1_score_pos = torch.nan_to_num((2 * TP) / (2 * TP + FPFN))
+                f1_score_neg = torch.nan_to_num((2 * TN) / (2 * TN + FPFN))
+                out = (f1_score_pos + f1_score_neg) / 2
+            else:
+                raise ValueError(f"out_format {out_format} is not supported.")
+
+        if reduction == "none":
+            pass
+        elif reduction == "sum":
+            out = out.sum()
+        elif reduction == "mean":
+            out = out.mean()
+        else:
+            raise ValueError(f"reduction {reduction} is not supported.")
+
+        return out
diff --git a/docs/source/modules/f0eval.rst b/docs/source/modules/f0eval.rst
@@ -0,0 +1,13 @@
+.. _f0eval:
+
+f0eval
+======
+
+.. autoclass:: diffsptk.F0Evaluation
+    :members:
+
+.. autofunction:: diffsptk.functional.f0eval
+
+.. seealso::
+
+    :ref:`rmse`
diff --git a/docs/source/modules/rmse.rst b/docs/source/modules/rmse.rst
@@ -12,4 +12,4 @@ rmse
 
 .. seealso::
 
-    :ref:`snr`
+    :ref:`snr` :ref:`f0eval`
diff --git a/tests/test_entropy.py b/tests/test_entropy.py
@@ -15,7 +15,6 @@
 # ------------------------------------------------------------------------ #
 
 import pytest
-import torch
 
 import diffsptk
 import tests.utils as U
@@ -42,4 +41,4 @@ def test_compatibility(device, dtype, module, out_format, L=5, B=2):
         dx=L,
     )
 
-    U.check_differentiability(device, dtype, [entropy, torch.abs], [B, L])
+    U.check_differentiability(device, dtype, entropy, [B, L], nonnegative_input=True)
diff --git a/tests/test_f0eval.py b/tests/test_f0eval.py
@@ -0,0 +1,77 @@
+# ------------------------------------------------------------------------ #
+# Copyright 2022 SPTK Working Group                                        #
+#                                                                          #
+# Licensed under the Apache License, Version 2.0 (the "License");          #
+# you may not use this file except in compliance with the License.         #
+# You may obtain a copy of the License at                                  #
+#                                                                          #
+#     http://www.apache.org/licenses/LICENSE-2.0                           #
+#                                                                          #
+# Unless required by applicable law or agreed to in writing, software      #
+# distributed under the License is distributed on an "AS IS" BASIS,        #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and      #
+# limitations under the License.                                           #
+# ------------------------------------------------------------------------ #
+
+import pytest
+import torch
+
+import diffsptk
+import tests.utils as U
+
+
+@pytest.mark.parametrize("module", [False, True])
+@pytest.mark.parametrize(
+    "out_format",
+    [
+        "f0-rmse-hz",
+        "f0-rmse-cent",
+        "f0-rmse-semitone",
+        "vuv-error-rate",
+        "vuv-error-percent",
+    ],
+)
+@pytest.mark.parametrize("reduction", ["none", "mean", "sum"])
+def test_compatibility(device, dtype, module, reduction, out_format, B=2, L=10):
+    f0eval = U.choice(
+        module,
+        diffsptk.F0Evaluation,
+        diffsptk.functional.f0eval,
+        {"reduction": reduction, "out_format": out_format},
+    )
+
+    tmp1 = "f0eval.tmp1"
+    tmp2 = "f0eval.tmp2"
+    if out_format == "f0-rmse-hz":
+        cmd = f"rmse -magic 0 {tmp1} {tmp2}"
+    else:
+        o = 1 if out_format.startswith("f0-rmse") else 2
+        mul = 0.01 if out_format in ("f0-rmse-semitone", "vuv-error-rate") else 1
+        cmd = f"f0eval -q 1 -o {o} {tmp1} {tmp2} | sopr -m {mul}"
+
+    U.check_compatibility(
+        device,
+        dtype,
+        f0eval,
+        [
+            f"echo 0 0 200 210 0 200 0 | x2x +ad > {tmp1}",
+            f"echo 0 0 190 180 180 0 0 | x2x +ad > {tmp2}",
+        ],
+        [f"cat {tmp1}", f"cat {tmp2}"],
+        cmd,
+        [f"rm {tmp1} {tmp2}"],
+    )
+
+    if out_format.startswith("f0-rmse"):
+        U.check_differentiability(
+            device, dtype, f0eval, [(B, L), (B, L)], nonnegative_input=True
+        )
+
+
+def test_f1_score():
+    f0eval = diffsptk.F0Evaluation(out_format="vuv-macro-f1-score")
+    x = torch.tensor([0, 1, 1, 0, 0, 1, 0, 1, 0])
+    y = torch.tensor([0, 1, 0, 0, 1, 0, 0, 1, 1])
+    f1_score = f0eval(x, y)
+    assert U.allclose(f1_score, torch.tensor(0.55))
diff --git a/tests/test_ifbank.py b/tests/test_ifbank.py
@@ -15,7 +15,6 @@
 # ------------------------------------------------------------------------ #
 
 import pytest
-import torch
 
 import diffsptk
 import tests.utils as U
@@ -70,7 +69,7 @@ def test_compatibility(
     )
 
     U.check_differentiability(
-        device, dtype, [ifbank, fbank, torch.abs], [B, L // 2 + 1]
+        device, dtype, [ifbank, fbank], [B, L // 2 + 1], nonnegative_input=True
     )
 
 
diff --git a/tests/test_ignorm.py b/tests/test_ignorm.py
@@ -15,7 +15,6 @@
 # ------------------------------------------------------------------------ #
 
 import pytest
-import torch
 
 import diffsptk
 import tests.utils as U
@@ -44,4 +43,4 @@ def test_compatibility(device, dtype, module, gamma, c, M=4, B=2):
         dy=M + 1,
     )
 
-    U.check_differentiability(device, dtype, [ignorm, torch.abs], [B, M + 1])
+    U.check_differentiability(device, dtype, ignorm, [B, M + 1], nonnegative_input=True)
diff --git a/tests/test_lsp2sp.py b/tests/test_lsp2sp.py
@@ -50,4 +50,6 @@ def test_compatibility(device, dtype, module, M, out_format, L=16, B=2):
         dy=L // 2 + 1,
     )
 
-    U.check_differentiability(device, dtype, [lsp2sp, torch.abs], [B, M + 1])
+    U.check_differentiability(
+        device, dtype, [lsp2sp, lambda x: torch.sort(x)[0], torch.abs], [B, M + 1]
+    )
diff --git a/tests/test_mgc2mgc.py b/tests/test_mgc2mgc.py
@@ -15,7 +15,6 @@
 # ------------------------------------------------------------------------ #
 
 import pytest
-import torch
 
 import diffsptk
 import tests.utils as U
@@ -27,7 +26,7 @@
 @pytest.mark.parametrize("in_mul", [False, True])
 @pytest.mark.parametrize("out_mul", [False, True])
 @pytest.mark.parametrize(
-    "M, A, G", [[4, 0, 0.1], [4, 0, 0.2], [2, 0.1, 0.1], [6, 0.1, 0.2]]
+    "M, A, G", [(4, 0, 0.1), (4, 0, -0.1), (2, 0.1, 0.1), (6, 0.1, 0.2)]
 )
 def test_compatibility(
     device,
@@ -95,4 +94,6 @@ def test_compatibility(
         dy=M + 1,
     )
 
-    U.check_differentiability(device, dtype, [mgc2mgc, torch.abs], [B, m + 1])
+    U.check_differentiability(
+        device, dtype, mgc2mgc, [B, m + 1], nonnegative_input=True
+    )
diff --git a/tests/test_mglsadf.py b/tests/test_mglsadf.py
@@ -26,7 +26,7 @@
 
 @pytest.mark.parametrize("ignore_gain", [False, True])
 @pytest.mark.parametrize("mode", ["multi-stage", "single-stage", "freq-domain"])
-@pytest.mark.parametrize("c", [0, 10])
+@pytest.mark.parametrize("c", [0, 2])
 def test_compatibility(
     device,
     dtype,
diff --git a/tests/test_mpir2c.py b/tests/test_mpir2c.py
@@ -15,7 +15,6 @@
 # ------------------------------------------------------------------------ #
 
 import pytest
-import torch
 
 import diffsptk
 import tests.utils as U
@@ -42,4 +41,4 @@ def test_compatibility(device, dtype, module, M=19, N=30, L=512, B=2):
         dy=M + 1,
     )
 
-    U.check_differentiability(device, dtype, [mpir2c, torch.abs], [B, N])
+    U.check_differentiability(device, dtype, mpir2c, [B, N], nonnegative_input=True)
diff --git a/tests/test_readme.py b/tests/test_readme.py
@@ -47,6 +47,7 @@ def test_readme_examples():
     assert 0 < len(code_blocks)
 
     # Execute the code blocks.
+    print()
     for title, code_block in zip(titles, code_blocks):
         print(f"{title}")
         if "librosa" in code_block and importlib.util.find_spec("librosa") is None:
diff --git a/tests/utils.py b/tests/utils.py

Original file line number	Diff line number	Diff line change
`@@ -12,4 +12,4 @@ rmse`
`12`	`12`
`13`	`13`	`.. seealso::`
`14`	`14`
`15`		- :ref:`snr`
	`15`	+ :ref:`snr` :ref:`f0eval`
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,6 @@`
`15`	`15`	`# ------------------------------------------------------------------------ #`
`16`	`16`
`17`	`17`	`import pytest`
`18`		`-import torch`
`19`	`18`
`20`	`19`	`import diffsptk`
`21`	`20`	`import tests.utils as U`
`@@ -70,7 +69,7 @@ def test_compatibility(`
`70`	`69`	`)`
`71`	`70`
`72`	`71`	`U.check_differentiability(`
`73`		`- device, dtype, [ifbank, fbank, torch.abs], [B, L // 2 + 1]`
	`72`	`+ device, dtype, [ifbank, fbank], [B, L // 2 + 1], nonnegative_input=True`
`74`	`73`	`)`
`75`	`74`
`76`	`75`
Original file line number	Diff line number	Diff line change
`@@ -50,4 +50,6 @@ def test_compatibility(device, dtype, module, M, out_format, L=16, B=2):`
`50`	`50`	`dy=L // 2 + 1,`
`51`	`51`	`)`
`52`	`52`
`53`		`- U.check_differentiability(device, dtype, [lsp2sp, torch.abs], [B, M + 1])`
	`53`	`+ U.check_differentiability(`
	`54`	`+ device, dtype, [lsp2sp, lambda x: torch.sort(x)[0], torch.abs], [B, M + 1]`
	`55`	`+ )`