Skip to content

Commit 54cab15

Browse files
authored
Make Jitter do something by default (#3066)
* Apply some jitter by default in Jitter * Add Jitter API examples and update release notes * Fix docstring
1 parent a23cf31 commit 54cab15

File tree

6 files changed

+236
-21
lines changed

6 files changed

+236
-21
lines changed
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "f2e5a85d-c710-492b-a4fc-09b45ae26471",
7+
"metadata": {
8+
"tags": [
9+
"hide"
10+
]
11+
},
12+
"outputs": [],
13+
"source": [
14+
"import seaborn.objects as so\n",
15+
"from seaborn import load_dataset\n",
16+
"penguins = load_dataset(\"penguins\")"
17+
]
18+
},
19+
{
20+
"cell_type": "raw",
21+
"id": "14b5927c-42f1-4934-adee-3d380b8b3228",
22+
"metadata": {},
23+
"source": [
24+
"When used without any arguments, a small amount of jitter will be applied along the orientation axis:"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"id": "bc1b4941-bbe6-4afc-b51a-0ac67cbe417d",
31+
"metadata": {},
32+
"outputs": [],
33+
"source": [
34+
"(\n",
35+
" so.Plot(penguins, \"species\", \"body_mass_g\")\n",
36+
" .add(so.Dots(), so.Jitter())\n",
37+
")"
38+
]
39+
},
40+
{
41+
"cell_type": "raw",
42+
"id": "1101690e-6c19-4219-aa4e-180798454df1",
43+
"metadata": {},
44+
"source": [
45+
"The `width` parameter controls the amount of jitter relative to the spacing between the marks:"
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": null,
51+
"id": "c4251b9d-8b11-4c2c-905c-2f3b523dee70",
52+
"metadata": {},
53+
"outputs": [],
54+
"source": [
55+
"(\n",
56+
" so.Plot(penguins, \"species\", \"body_mass_g\")\n",
57+
" .add(so.Dots(), so.Jitter(.5))\n",
58+
")"
59+
]
60+
},
61+
{
62+
"cell_type": "raw",
63+
"id": "38aa639a-356e-4674-970b-53d55379b2b7",
64+
"metadata": {},
65+
"source": [
66+
"The `width` parameter always applies to the orientation axis, so the direction of jitter will adapt along with the orientation:"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"id": "1cfe1c07-7e81-45a0-a989-240503046133",
73+
"metadata": {},
74+
"outputs": [],
75+
"source": [
76+
"(\n",
77+
" so.Plot(penguins, \"body_mass_g\", \"species\")\n",
78+
" .add(so.Dots(), so.Jitter(.5))\n",
79+
")"
80+
]
81+
},
82+
{
83+
"cell_type": "raw",
84+
"id": "0f5de4cc-3383-4503-8b59-9c48230a12a5",
85+
"metadata": {},
86+
"source": [
87+
"Because the `width` jitter is relative, it can be used when the orientation axis is numeric without further tweaking:"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": null,
93+
"id": "c94c41e8-29c4-4439-a5d1-0b8ffb244890",
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"(\n",
98+
" so.Plot(penguins[\"body_mass_g\"].round(-3), penguins[\"flipper_length_mm\"])\n",
99+
" .add(so.Dots(), so.Jitter())\n",
100+
")"
101+
]
102+
},
103+
{
104+
"cell_type": "raw",
105+
"id": "dd982dfa-fd9f-4edc-8190-18f0e101ae1a",
106+
"metadata": {},
107+
"source": [
108+
"In contrast to `width`, the `x` and `y` parameters always refer to specific axes and control the jitter in data units:"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"id": "b0f2e5ca-68ad-4439-a4ee-f32f65682e95",
115+
"metadata": {},
116+
"outputs": [],
117+
"source": [
118+
"(\n",
119+
" so.Plot(penguins[\"body_mass_g\"].round(-3), penguins[\"flipper_length_mm\"])\n",
120+
" .add(so.Dots(), so.Jitter(x=100))\n",
121+
")"
122+
]
123+
},
124+
{
125+
"cell_type": "raw",
126+
"id": "a90ba526-8043-42ed-8f57-36445c163c0d",
127+
"metadata": {},
128+
"source": [
129+
"Both `x` and `y` can be used in a single transform:"
130+
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": null,
135+
"id": "6c07ed1d-ac77-4b30-90a8-e1b8760f9fad",
136+
"metadata": {},
137+
"outputs": [],
138+
"source": [
139+
"(\n",
140+
" so.Plot(\n",
141+
" penguins[\"body_mass_g\"].round(-3),\n",
142+
" penguins[\"flipper_length_mm\"].round(-1),\n",
143+
" )\n",
144+
" .add(so.Dots(), so.Jitter(x=200, y=5))\n",
145+
")"
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": null,
151+
"id": "bb04c7a2-93f0-44cf-aacf-0eb436d0f14b",
152+
"metadata": {},
153+
"outputs": [],
154+
"source": []
155+
}
156+
],
157+
"metadata": {
158+
"kernelspec": {
159+
"display_name": "py310",
160+
"language": "python",
161+
"name": "py310"
162+
},
163+
"language_info": {
164+
"codemirror_mode": {
165+
"name": "ipython",
166+
"version": 3
167+
},
168+
"file_extension": ".py",
169+
"mimetype": "text/x-python",
170+
"name": "python",
171+
"nbconvert_exporter": "python",
172+
"pygments_lexer": "ipython3",
173+
"version": "3.10.0"
174+
}
175+
},
176+
"nbformat": 4,
177+
"nbformat_minor": 5
178+
}

doc/whatsnew/v0.12.1.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ v0.12.1 (Unreleased)
88

99
- |Feature| The :class:`Band` and :class:`Range` marks will now cover the full extent of the data if `min` / `max` variables are not explicitly assigned or added in a transform (:pr:`3056`).
1010

11+
- |Enhancement| The :class:`Jitter` move now applies a small amount of jitter by default (:pr:`3066`).
12+
1113
- |Enhancement| Marks that sort along the orient axis (e.g. :class:`Line`) now use a stable algorithm (:pr:`3064`).
1214

1315
- |Fix| Make :class:`objects.PolyFit` robust to missing data (:pr:`3010`).

seaborn/_core/moves.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
from __future__ import annotations
22
from dataclasses import dataclass
3-
from typing import ClassVar, Callable, Optional, Union
3+
from typing import ClassVar, Callable, Optional, Union, cast
44

55
import numpy as np
66
from pandas import DataFrame
77

88
from seaborn._core.groupby import GroupBy
99
from seaborn._core.scales import Scale
10+
from seaborn._core.typing import Default
11+
12+
default = Default()
1013

1114

1215
@dataclass
@@ -24,35 +27,48 @@ def __call__(
2427
@dataclass
2528
class Jitter(Move):
2629
"""
27-
Random displacement of marks along either or both axes to reduce overplotting.
30+
Random displacement along one or both axes to reduce overplotting.
31+
32+
Parameters
33+
----------
34+
width : float
35+
Magnitude of jitter, relative to mark width, along the orientation axis.
36+
If not provided, the default value will be 0 when `x` or `y` are set, otherwise
37+
there will be a small amount of jitter applied by default.
38+
x : float
39+
Magnitude of jitter, in data units, along the x axis.
40+
y : float
41+
Magnitude of jitter, in data units, along the y axis.
42+
43+
Examples
44+
--------
45+
.. include:: ../docstrings/objects.Jitter.rst
46+
2847
"""
29-
width: float = 0
48+
width: float | Default = default
3049
x: float = 0
3150
y: float = 0
32-
33-
seed: Optional[int] = None
34-
35-
# TODO what is the best way to have a reasonable default?
36-
# The problem is that "reasonable" seems dependent on the mark
51+
seed: int | None = None
3752

3853
def __call__(
3954
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
4055
) -> DataFrame:
4156

42-
# TODO is it a problem that GroupBy is not used for anything here?
43-
# Should we type it as optional?
44-
4557
data = data.copy()
46-
4758
rng = np.random.default_rng(self.seed)
4859

4960
def jitter(data, col, scale):
5061
noise = rng.uniform(-.5, +.5, len(data))
5162
offsets = noise * scale
5263
return data[col] + offsets
5364

65+
if self.width is default:
66+
width = 0.0 if self.x or self.y else 0.2
67+
else:
68+
width = cast(float, self.width)
69+
5470
if self.width:
55-
data[orient] = jitter(data, orient, self.width * data["width"])
71+
data[orient] = jitter(data, orient, width * data["width"])
5672
if self.x:
5773
data["x"] = jitter(data, "x", self.x)
5874
if self.y:

seaborn/_core/plot.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@
2929
from seaborn._core.subplots import Subplots
3030
from seaborn._core.groupby import GroupBy
3131
from seaborn._core.properties import PROPERTIES, Property
32-
from seaborn._core.typing import DataSource, VariableSpec, VariableSpecList, OrderSpec
32+
from seaborn._core.typing import (
33+
DataSource,
34+
VariableSpec,
35+
VariableSpecList,
36+
OrderSpec,
37+
Default,
38+
)
3339
from seaborn._core.rules import categorical_order
3440
from seaborn._compat import set_scale_obj, set_layout_engine
3541
from seaborn.rcmod import axes_style, plotting_context
@@ -47,6 +53,9 @@
4753
from typing_extensions import TypedDict
4854

4955

56+
default = Default()
57+
58+
5059
# ---- Definitions for internal specs --------------------------------- #
5160

5261

@@ -79,13 +88,6 @@ class PairSpec(TypedDict, total=False):
7988

8089
# --- Local helpers ----------------------------------------------------------------
8190

82-
class Default:
83-
def __repr__(self):
84-
return "<default>"
85-
86-
87-
default = Default()
88-
8991

9092
@contextmanager
9193
def theme_context(params: dict[str, Any]) -> Generator:

seaborn/_core/typing.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,11 @@
2929
ContinuousValueSpec = Union[
3030
Tuple[float, float], List[float], Dict[Any, float], None,
3131
]
32+
33+
34+
class Default:
35+
def __repr__(self):
36+
return "<default>"
37+
38+
39+
default = Default()

tests/_core/test_moves.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,15 @@ def check_pos(self, res, df, var, limit):
7878
assert (res[var] < df[var] + limit / 2).all()
7979
assert (res[var] > df[var] - limit / 2).all()
8080

81+
def test_default(self, df):
82+
83+
orient = "x"
84+
groupby = self.get_groupby(df, orient)
85+
res = Jitter()(df, groupby, orient, {})
86+
self.check_same(res, df, "y", "grp2", "width")
87+
self.check_pos(res, df, "x", 0.2 * df["width"])
88+
assert (res["x"] - df["x"]).abs().min() > 0
89+
8190
def test_width(self, df):
8291

8392
width = .4

0 commit comments

Comments
 (0)