Skip to content

Commit 697a50d

Browse files
author
Ashley Scillitoe
committed
Merge branch 'master' into dependabot/pip/sphinx-gte-4.2.0-and-lt-7.0.0
2 parents 4e743f1 + 1d1957b commit 697a50d

File tree

22 files changed

+824
-169
lines changed

22 files changed

+824
-169
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,12 @@ jobs:
8484
# are removed from tests, this can be removed, allowing all tests to use random seeds.
8585
8686
- name: Upload coverage to Codecov
87-
if: ${{ success() }}
88-
run: |
89-
codecov -F ${{ matrix.os }}-${{ matrix.python-version }}
87+
uses: codecov/codecov-action@v3
88+
with:
89+
directory: .
90+
env_vars: ${{matrix.os}}, ${{matrix.python-version}}
91+
fail_ci_if_error: false
92+
verbose: true
9093

9194
- name: Build Python package
9295
run: |

alibi_detect/od/_knn.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -135,17 +135,17 @@ def score(self, x: np.ndarray) -> np.ndarray:
135135
x
136136
Data to score. The shape of `x` should be `(n_instances, n_features)`.
137137
138+
Returns
139+
-------
140+
Outlier scores. The shape of the scores is `(n_instances,)`. The higher the score, the more anomalous the \
141+
instance.
142+
138143
Raises
139144
------
140145
NotFittedError
141146
If called before detector has been fit.
142147
ThresholdNotInferredError
143148
If k is a list and a threshold was not inferred.
144-
145-
Returns
146-
-------
147-
Outlier scores. The shape of the scores is `(n_instances,)`. The higher the score, the more anomalous the \
148-
instance.
149149
"""
150150
score = self.backend.score(self.backend._to_tensor(x))
151151
score = self.backend._ensembler(score)
@@ -158,16 +158,6 @@ def infer_threshold(self, x: np.ndarray, fpr: float) -> None:
158158
The threshold is computed so that the outlier detector would incorrectly classify `fpr` proportion of the
159159
reference data as outliers.
160160
161-
Raises
162-
------
163-
ValueError
164-
Raised if `fpr` is not in ``(0, 1)``.
165-
166-
Raises
167-
------
168-
NotFittedError
169-
If called before detector has been fit.
170-
171161
Parameters
172162
----------
173163
x
@@ -176,6 +166,13 @@ def infer_threshold(self, x: np.ndarray, fpr: float) -> None:
176166
False positive rate used to infer the threshold. The false positive rate is the proportion of
177167
instances in `x` that are incorrectly classified as outliers. The false positive rate should
178168
be in the range ``(0, 1)``.
169+
170+
Raises
171+
------
172+
ValueError
173+
Raised if `fpr` is not in ``(0, 1)``.
174+
NotFittedError
175+
If called before detector has been fit.
179176
"""
180177
self.backend.infer_threshold(self.backend._to_tensor(x), fpr)
181178

@@ -191,19 +188,19 @@ def predict(self, x: np.ndarray) -> Dict[str, Any]:
191188
x
192189
Data to predict. The shape of `x` should be `(n_instances, n_features)`.
193190
194-
Raises
195-
------
196-
NotFittedError
197-
If called before detector has been fit.
198-
ThresholdNotInferredError
199-
If k is a list and a threshold was not inferred.
200-
201191
Returns
202192
-------
203193
Dictionary with keys 'data' and 'meta'. 'data' contains the outlier scores. If threshold inference was \
204194
performed, 'data' also contains the threshold value, outlier labels and p-vals . The shape of the scores is \
205195
`(n_instances,)`. The higher the score, the more anomalous the instance. 'meta' contains information about \
206196
the detector.
197+
198+
Raises
199+
------
200+
NotFittedError
201+
If called before detector has been fit.
202+
ThresholdNotInferredError
203+
If k is a list and a threshold was not inferred.
207204
"""
208205
outputs = self.backend.predict(self.backend._to_tensor(x))
209206
output = outlier_prediction_dict()

alibi_detect/od/_mahalanobis.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
from typing import Union, Optional, Dict, Any
2+
from typing import TYPE_CHECKING
3+
from alibi_detect.exceptions import _catch_error as catch_error
4+
5+
6+
import numpy as np
7+
8+
from alibi_detect.utils._types import Literal
9+
from alibi_detect.base import BaseDetector, FitMixin, ThresholdMixin, outlier_prediction_dict
10+
from alibi_detect.od.pytorch import MahalanobisTorch
11+
from alibi_detect.utils.frameworks import BackendValidator
12+
from alibi_detect.version import __version__
13+
14+
15+
if TYPE_CHECKING:
16+
import torch
17+
18+
19+
backends = {
20+
'pytorch': MahalanobisTorch
21+
}
22+
23+
24+
class Mahalanobis(BaseDetector, FitMixin, ThresholdMixin):
25+
def __init__(
26+
self,
27+
min_eigenvalue: float = 1e-6,
28+
backend: Literal['pytorch'] = 'pytorch',
29+
device: Optional[Union[Literal['cuda', 'gpu', 'cpu'], 'torch.device']] = None,
30+
) -> None:
31+
"""
32+
The Mahalanobis outlier detection method.
33+
34+
The Mahalanobis detector computes the directions of variation of a dataset and uses them to detect when points
35+
are outliers by checking to see if the points vary from dataset points in unexpected ways.
36+
37+
When we fit the Mahalanobis detector we compute the covariance matrix of the reference data and its eigenvectors
38+
and eigenvalues. We filter small eigenvalues for numerical stability using the `min_eigenvalue` parameter. We
39+
then inversely weight each eigenvector by its eigenvalue.
40+
41+
When we score test points we project them onto the eigenvectors and compute the l2-norm of the projected point.
42+
Because the eigenvectors are inversely weighted by the eigenvalues, the score will take into account the
43+
difference in variance along each direction of variation. If a test point lies along a direction of high
44+
variation then it must lie very far out to obtain a high score. If a test point lies along a direction of low
45+
variation then it doesn't need to lie very far out to obtain a high score.
46+
47+
Parameters
48+
----------
49+
min_eigenvalue
50+
Eigenvectors with eigenvalues below this value will be discarded. This is to ensure numerical stability.
51+
backend
52+
Backend used for outlier detection. Defaults to ``'pytorch'``. Options are ``'pytorch'``.
53+
device
54+
Device type used. The default tries to use the GPU and falls back on CPU if needed. Can be specified by
55+
passing either ``'cuda'``, ``'gpu'``, ``'cpu'`` or an instance of ``torch.device``.
56+
57+
Raises
58+
------
59+
NotImplementedError
60+
If choice of `backend` is not implemented.
61+
"""
62+
super().__init__()
63+
64+
backend_str: str = backend.lower()
65+
BackendValidator(
66+
backend_options={'pytorch': ['pytorch']},
67+
construct_name=self.__class__.__name__
68+
).verify_backend(backend_str)
69+
70+
backend_cls = backends[backend]
71+
self.backend = backend_cls(min_eigenvalue, device=device)
72+
73+
# set metadata
74+
self.meta['detector_type'] = 'outlier'
75+
self.meta['data_type'] = 'numeric'
76+
self.meta['online'] = False
77+
78+
def fit(self, x_ref: np.ndarray) -> None:
79+
"""Fit the detector on reference data.
80+
81+
Fitting the Mahalanobis detector amounts to computing the covariance matrix and its eigenvectors. We filter out
82+
very small eigenvalues using the `min_eigenvalue` parameter. We then scale the eigenvectors such that the data
83+
projected onto them has mean ``0`` and std ``1``.
84+
85+
Parameters
86+
----------
87+
x_ref
88+
Reference data used to fit the detector.
89+
"""
90+
self.backend.fit(self.backend._to_tensor(x_ref))
91+
92+
@catch_error('NotFittedError')
93+
def score(self, x: np.ndarray) -> np.ndarray:
94+
"""Score `x` instances using the detector.
95+
96+
The mahalanobis method projects `x` onto the scaled eigenvectors computed during the fit step. The score is then
97+
the l2-norm of the projected data. The higher the score, the more outlying the instance.
98+
99+
Parameters
100+
----------
101+
x
102+
Data to score. The shape of `x` should be `(n_instances, n_features)`.
103+
104+
Returns
105+
-------
106+
Outlier scores. The shape of the scores is `(n_instances,)`. The higher the score, the more outlying the \
107+
instance.
108+
109+
Raises
110+
------
111+
NotFittedError
112+
If called before detector has been fit.
113+
"""
114+
score = self.backend.score(self.backend._to_tensor(x))
115+
return self.backend._to_numpy(score)
116+
117+
@catch_error('NotFittedError')
118+
def infer_threshold(self, x: np.ndarray, fpr: float) -> None:
119+
"""Infer the threshold for the Mahalanobis detector.
120+
121+
The threshold is computed so that the outlier detector would incorrectly classify `fpr` proportion of the
122+
reference data as outliers.
123+
124+
Parameters
125+
----------
126+
x
127+
Reference data used to infer the threshold.
128+
fpr
129+
False positive rate used to infer the threshold. The false positive rate is the proportion of
130+
instances in `x` that are incorrectly classified as outliers. The false positive rate should
131+
be in the range ``(0, 1)``.
132+
133+
Raises
134+
------
135+
ValueError
136+
Raised if `fpr` is not in ``(0, 1)``.
137+
NotFittedError
138+
If called before detector has been fit.
139+
"""
140+
self.backend.infer_threshold(self.backend._to_tensor(x), fpr)
141+
142+
@catch_error('NotFittedError')
143+
def predict(self, x: np.ndarray) -> Dict[str, Any]:
144+
"""Predict whether the instances in `x` are outliers or not.
145+
146+
Scores the instances in `x` and if the threshold was inferred, returns the outlier labels and p-values as well.
147+
148+
Parameters
149+
----------
150+
x
151+
Data to predict. The shape of `x` should be `(n_instances, n_features)`.
152+
153+
Returns
154+
-------
155+
Dictionary with keys 'data' and 'meta'. 'data' contains the outlier scores. If threshold inference was \
156+
performed, 'data' also contains the threshold value, outlier labels and p-vals . The shape of the scores is \
157+
`(n_instances,)`. The higher the score, the more anomalous the instance. 'meta' contains information about \
158+
the detector.
159+
160+
Raises
161+
------
162+
NotFittedError
163+
If called before detector has been fit.
164+
"""
165+
outputs = self.backend.predict(self.backend._to_tensor(x))
166+
output = outlier_prediction_dict()
167+
output['data'] = {
168+
**output['data'],
169+
**self.backend._to_numpy(outputs)
170+
}
171+
output['meta'] = {
172+
**output['meta'],
173+
'name': self.__class__.__name__,
174+
'detector_type': 'outlier',
175+
'online': False,
176+
'version': __version__,
177+
}
178+
return output
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from alibi_detect.utils.missing_optional_dependency import import_optional
22

33
KNNTorch = import_optional('alibi_detect.od.pytorch.knn', ['KNNTorch'])
4+
MahalanobisTorch = import_optional('alibi_detect.od.pytorch.mahalanobis', ['MahalanobisTorch'])
45
Ensembler = import_optional('alibi_detect.od.pytorch.ensemble', ['Ensembler'])
5-
6-
to_numpy = import_optional('alibi_detect.od.pytorch.base', ['to_numpy'])

alibi_detect/od/pytorch/base.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,14 @@ def predict(self, x: torch.Tensor) -> TorchOutlierDetectorOutput:
216216
x
217217
Data to predict.
218218
219+
Returns
220+
-------
221+
Output of the outlier detector. Includes the p-values, outlier labels, instance scores and threshold.
222+
219223
Raises
220224
------
221225
ValueError
222226
Raised if the detector is not fit on reference data.
223-
224-
Returns
225-
-------
226-
Output of the outlier detector. Includes the p-values, outlier labels, instance scores and threshold.
227227
"""
228228
self.check_fitted() # type: ignore
229229
raw_scores = self.score(x)

alibi_detect/od/pytorch/ensemble.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def transform(self, x: torch.Tensor):
2727
"""
2828
raise NotImplementedError()
2929

30-
@torch.no_grad()
3130
def forward(self, x: torch.Tensor) -> torch.Tensor:
3231
return self.transform(x)
3332

@@ -46,7 +45,7 @@ def fit(self, x: torch.Tensor) -> Self:
4645
"""
4746
pass
4847

49-
def set_fitted(self) -> Self:
48+
def _set_fitted(self) -> Self:
5049
"""Sets the fitted attribute to True.
5150
5251
Should be called within each transform method.
@@ -92,7 +91,7 @@ def fit(self, val_scores: torch.Tensor) -> Self:
9291
score outputs of ensemble of detectors applied to reference data.
9392
"""
9493
self.val_scores = val_scores
95-
return self.set_fitted()
94+
return self._set_fitted()
9695

9796
def transform(self, scores: torch.Tensor) -> torch.Tensor:
9897
"""Transform scores to 1 - p-values.
@@ -133,7 +132,7 @@ def fit(self, val_scores: torch.Tensor) -> Self:
133132
"""
134133
self.val_means = val_scores.mean(0)[None, :]
135134
self.val_scales = val_scores.std(0)[None, :]
136-
return self.set_fitted()
135+
return self._set_fitted()
137136

138137
def transform(self, scores: torch.Tensor) -> torch.Tensor:
139138
"""Transform scores to normalized values. Subtracts the mean and scales by the standard deviation.
@@ -312,4 +311,4 @@ def fit(self, x: torch.Tensor) -> Self:
312311
"""
313312
if self.normalizer is not None:
314313
self.normalizer.fit(x) # type: ignore
315-
return self.set_fitted()
314+
return self._set_fitted()

alibi_detect/od/pytorch/knn.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def __init__(
4242
self.ks = torch.tensor(k) if self.ensemble else torch.tensor([k], device=self.device)
4343
self.ensembler = ensembler
4444

45-
@torch.no_grad()
4645
def forward(self, x: torch.Tensor) -> torch.Tensor:
4746
"""Detect if `x` is an outlier.
4847
@@ -67,7 +66,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
6766
preds = scores > self.threshold
6867
return preds
6968

70-
@torch.no_grad()
7169
def score(self, x: torch.Tensor) -> torch.Tensor:
7270
"""Computes the score of `x`
7371
@@ -100,4 +98,4 @@ def fit(self, x_ref: torch.Tensor):
10098
The Dataset tensor.
10199
"""
102100
self.x_ref = x_ref
103-
self.set_fitted()
101+
self._set_fitted()

0 commit comments

Comments
 (0)