Skip to content

Commit 1ea4a96

Browse files
authored
ENH: LineStats aggregation (#380)
* Stat aggregation line_profiler/__init__.py Now also importing `line_profiler.line_profiler.LineStats` line_profiler/line_profiler.py[i] pathlib Removed unused import in stub file LineStats New `line_profiler._line_profiler.LineStats` subclass helping with utilization and aggregation, with the following added methods: - `__repr__()` - `__add__()`, `__iadd__()` - `print()` - `from_files()`, `to_file()` - `from_stats_objects()` LineProfiler get_stats() Now returning a `line_profiler.line_profiler.LineStats` object instead of a `line_profiler._line_profiler.LineStats` dump_stats(), print_stats() Now deferring to the respective `LineStats` methods load_stats() Now an alias to `LineStats.from_files()` main() Updated to be able to take multiple positional arguments * Tests tests/test_cli.py::test_multiple_lprof_files() New test for using `python -m line_profiler` with multiple `.lprof` files tests/test_line_profiler.py::test_load_stats_files() New test for loading stats files (old and new, single and multiple) * Speed up test tests/test_cli.py::test_multiple_lprof_files() Sped up by avoiding spawning new Python processes * CHANGELOG entry * Fix lint
1 parent d05582d commit 1ea4a96

File tree

6 files changed

+339
-31
lines changed

6 files changed

+339
-31
lines changed

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Changes
5454
* ``kernprof`` and ``python -m line_profiler`` CLI options
5555
* ``GlobalProfiler`` configurations, and
5656
* profiler output (e.g. ``LineProfiler.print_stats()``) formatting
57+
* ENH: Added capability to combine profiling data both programmatically (``LineStats.__add__()``) and via the CLI (``python -m line_profiler``) (#380, originally proposed in #219)
5758

5859
4.2.0
5960
~~~~~

line_profiler/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,14 +251,14 @@ def main():
251251
# NOTE: This needs to be in sync with ../kernprof.py and line_profiler.py
252252
__version__ = '5.0.1'
253253

254-
from .line_profiler import (LineProfiler,
254+
from .line_profiler import (LineProfiler, LineStats,
255255
load_ipython_extension, load_stats, main,
256256
show_func, show_text,)
257257

258258

259259
from .explicit_profiler import profile
260260

261261

262-
__all__ = ['LineProfiler', 'line_profiler',
262+
__all__ = ['LineProfiler', 'LineStats', 'line_profiler',
263263
'load_ipython_extension', 'load_stats', 'main', 'show_func',
264264
'show_text', '__version__', 'profile']

line_profiler/line_profiler.py

Lines changed: 178 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import functools
88
import inspect
99
import linecache
10+
import operator
1011
import os
1112
import pickle
1213
import sys
@@ -17,7 +18,8 @@
1718
from datetime import datetime
1819

1920
try:
20-
from ._line_profiler import LineProfiler as CLineProfiler
21+
from ._line_profiler import (LineProfiler as CLineProfiler,
22+
LineStats as CLineStats)
2123
except ImportError as ex:
2224
raise ImportError(
2325
'The line_profiler._line_profiler c-extension is not importable. '
@@ -186,6 +188,169 @@ def __init__(self, func, profiler_id):
186188
self.profiler_id = profiler_id
187189

188190

191+
class LineStats(CLineStats):
192+
def __repr__(self):
193+
return '{}({}, {:.2G})'.format(
194+
type(self).__name__, self.timings, self.unit)
195+
196+
def __eq__(self, other):
197+
"""
198+
Example:
199+
>>> from copy import deepcopy
200+
>>>
201+
>>>
202+
>>> stats1 = LineStats(
203+
... {('foo', 1, 'spam.py'): [(2, 10, 300)],
204+
... ('bar', 10, 'spam.py'):
205+
... [(11, 2, 1000), (12, 1, 500)]},
206+
... 1E-6)
207+
>>> stats2 = deepcopy(stats1)
208+
>>> assert stats1 == stats2 is not stats1
209+
>>> stats2.timings = 1E-7
210+
>>> assert stats2 != stats1
211+
>>> stats3 = deepcopy(stats1)
212+
>>> assert stats1 == stats3 is not stats1
213+
>>> stats3.timings['foo', 1, 'spam.py'][:] = [(2, 11, 330)]
214+
>>> assert stats3 != stats1
215+
"""
216+
for attr in 'timings', 'unit':
217+
getter = operator.attrgetter(attr)
218+
try:
219+
if getter(self) != getter(other):
220+
return False
221+
except (AttributeError, TypeError):
222+
return NotImplemented
223+
return True
224+
225+
def __add__(self, other):
226+
"""
227+
Example:
228+
>>> stats1 = LineStats(
229+
... {('foo', 1, 'spam.py'): [(2, 10, 300)],
230+
... ('bar', 10, 'spam.py'):
231+
... [(11, 2, 1000), (12, 1, 500)]},
232+
... 1E-6)
233+
>>> stats2 = LineStats(
234+
... {('bar', 10, 'spam.py'):
235+
... [(11, 10, 20000), (12, 5, 1000)],
236+
... ('baz', 5, 'eggs.py'): [(5, 2, 5000)]},
237+
... 1E-7)
238+
>>> stats_sum = LineStats(
239+
... {('foo', 1, 'spam.py'): [(2, 10, 300)],
240+
... ('bar', 10, 'spam.py'):
241+
... [(11, 12, 3000), (12, 6, 600)],
242+
... ('baz', 5, 'eggs.py'): [(5, 2, 500)]},
243+
... 1E-6)
244+
>>> assert stats1 + stats2 == stats2 + stats1 == stats_sum
245+
"""
246+
timings, unit = self._get_aggregated_timings([self, other])
247+
return type(self)(timings, unit)
248+
249+
def __iadd__(self, other):
250+
"""
251+
Example:
252+
>>> stats1 = LineStats(
253+
... {('foo', 1, 'spam.py'): [(2, 10, 300)],
254+
... ('bar', 10, 'spam.py'):
255+
... [(11, 2, 1000), (12, 1, 500)]},
256+
... 1E-6)
257+
>>> stats2 = LineStats(
258+
... {('bar', 10, 'spam.py'):
259+
... [(11, 10, 20000), (12, 5, 1000)],
260+
... ('baz', 5, 'eggs.py'): [(5, 2, 5000)]},
261+
... 1E-7)
262+
>>> stats_sum = LineStats(
263+
... {('foo', 1, 'spam.py'): [(2, 10, 300)],
264+
... ('bar', 10, 'spam.py'):
265+
... [(11, 12, 3000), (12, 6, 600)],
266+
... ('baz', 5, 'eggs.py'): [(5, 2, 500)]},
267+
... 1E-6)
268+
>>> address = id(stats2)
269+
>>> stats2 += stats1
270+
>>> assert id(stats2) == address
271+
>>> assert stats2 == stats_sum
272+
"""
273+
self.timings, self.unit = self._get_aggregated_timings([self, other])
274+
return self
275+
276+
def print(self, stream=None, **kwargs):
277+
show_text(self.timings, self.unit, stream=stream, **kwargs)
278+
279+
def to_file(self, filename):
280+
""" Pickle the instance to the given filename.
281+
"""
282+
with open(filename, 'wb') as f:
283+
pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
284+
285+
@classmethod
286+
def from_files(cls, file, /, *files):
287+
"""
288+
Utility function to load an instance from the given filenames.
289+
"""
290+
stats_objs = []
291+
for file in [file, *files]:
292+
with open(file, 'rb') as f:
293+
stats_objs.append(pickle.load(f))
294+
return cls.from_stats_objects(*stats_objs)
295+
296+
@classmethod
297+
def from_stats_objects(cls, stats, /, *more_stats):
298+
"""
299+
Example:
300+
>>> stats1 = LineStats(
301+
... {('foo', 1, 'spam.py'): [(2, 10, 300)],
302+
... ('bar', 10, 'spam.py'):
303+
... [(11, 2, 1000), (12, 1, 500)]},
304+
... 1E-6)
305+
>>> stats2 = LineStats(
306+
... {('bar', 10, 'spam.py'):
307+
... [(11, 10, 20000), (12, 5, 1000)],
308+
... ('baz', 5, 'eggs.py'): [(5, 2, 5000)]},
309+
... 1E-7)
310+
>>> stats_combined = LineStats.from_stats_objects(
311+
... stats1, stats2)
312+
>>> assert stats_combined.unit == 1E-6
313+
>>> assert stats_combined.timings == {
314+
... ('foo', 1, 'spam.py'): [(2, 10, 300)],
315+
... ('bar', 10, 'spam.py'):
316+
... [(11, 12, 3000), (12, 6, 600)],
317+
... ('baz', 5, 'eggs.py'): [(5, 2, 500)]}
318+
"""
319+
timings, unit = cls._get_aggregated_timings([stats, *more_stats])
320+
return cls(timings, unit)
321+
322+
@staticmethod
323+
def _get_aggregated_timings(stats_objs):
324+
if not stats_objs:
325+
raise ValueError(f'stats_objs = {stats_objs!r}: empty')
326+
try:
327+
stats, = stats_objs
328+
except ValueError: # > 1 obj
329+
# Add from small scaling factors to large to minimize
330+
# rounding errors
331+
stats_objs = sorted(stats_objs, key=operator.attrgetter('unit'))
332+
unit = stats_objs[-1].unit
333+
# type: dict[tuple[str, int, int], dict[int, tuple[int, float]]
334+
timing_dict = {}
335+
for stats in stats_objs:
336+
factor = stats.unit / unit
337+
for key, entries in stats.timings.items():
338+
entry_dict = timing_dict.setdefault(key, {})
339+
for lineno, nhits, time in entries:
340+
prev_nhits, prev_time = entry_dict.get(lineno, (0, 0))
341+
entry_dict[lineno] = (prev_nhits + nhits,
342+
prev_time + factor * time)
343+
timings = {
344+
key: [(lineno, nhits, int(round(time, 0)))
345+
for lineno, (nhits, time) in sorted(entry_dict.items())]
346+
for key, entry_dict in timing_dict.items()}
347+
else:
348+
timings = {key: entries.copy()
349+
for key, entries in stats.timings.items()}
350+
unit = stats.unit
351+
return timings, unit
352+
353+
189354
class LineProfiler(CLineProfiler, ByCountProfilerMixin):
190355
"""
191356
A profiler that records the execution times of individual lines.
@@ -296,24 +461,24 @@ def _debug(self, msg):
296461
msg = f'{self_repr}: {msg}'
297462
logger.debug(msg)
298463

464+
def get_stats(self):
465+
return LineStats.from_stats_objects(super().get_stats())
466+
299467
def dump_stats(self, filename):
300468
""" Dump a representation of the data to a file as a pickled
301469
:py:class:`~.LineStats` object from :py:meth:`~.get_stats()`.
302470
"""
303-
lstats = self.get_stats()
304-
with open(filename, 'wb') as f:
305-
pickle.dump(lstats, f, pickle.HIGHEST_PROTOCOL)
471+
self.get_stats().to_file(filename)
306472

307473
def print_stats(self, stream=None, output_unit=None, stripzeros=False,
308474
details=True, summarize=False, sort=False, rich=False, *,
309475
config=None):
310476
""" Show the gathered statistics.
311477
"""
312-
lstats = self.get_stats()
313-
show_text(lstats.timings, lstats.unit, output_unit=output_unit,
314-
stream=stream, stripzeros=stripzeros,
315-
details=details, summarize=summarize, sort=sort, rich=rich,
316-
config=config)
478+
self.get_stats().print(
479+
stream=stream, output_unit=output_unit,
480+
stripzeros=stripzeros, details=details, summarize=summarize,
481+
sort=sort, rich=rich, config=config)
317482

318483
def _add_namespace(
319484
self, namespace, *,
@@ -799,12 +964,7 @@ def show_text(stats, unit, output_unit=None, stream=None, stripzeros=False,
799964
stream.write(line + '\n')
800965

801966

802-
def load_stats(filename):
803-
""" Utility function to load a pickled :py:class:`~.LineStats`
804-
object from a given filename.
805-
"""
806-
with open(filename, 'rb') as f:
807-
return pickle.load(f)
967+
load_stats = LineStats.from_files
808968

809969

810970
def main():
@@ -846,7 +1006,8 @@ def main():
8461006
help='Print a summary of total function time. '
8471007
f'(Default: {default.conf_dict["summarize"]})')
8481008
add_argument(parser, 'profile_output',
849-
help="'*.lprof' file created by `kernprof`")
1009+
nargs='+',
1010+
help="'*.lprof' file(s) created by `kernprof`")
8501011

8511012
args = parser.parse_args()
8521013
if args.config:
@@ -856,7 +1017,7 @@ def main():
8561017
if getattr(args, key, None) is None:
8571018
setattr(args, key, default)
8581019

859-
lstats = load_stats(args.profile_output)
1020+
lstats = LineStats.from_files(*args.profile_output)
8601021
show_text(lstats.timings, lstats.unit,
8611022
output_unit=args.unit,
8621023
stripzeros=args.skip_zero,

line_profiler/line_profiler.pyi

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
import io
2-
import pathlib
32
from functools import cached_property, partial, partialmethod
43
from os import PathLike
54
from types import FunctionType, ModuleType
6-
from typing import TYPE_CHECKING, overload, Callable, Literal, Mapping, TypeVar
5+
from typing import (TYPE_CHECKING,
6+
overload,
7+
Callable, Mapping,
8+
Literal, Self,
9+
Protocol, TypeVar)
710
try:
811
from typing import ( # type: ignore[attr-defined] # noqa: F401
912
ParamSpec)
1013
except ImportError:
1114
from typing_extensions import ParamSpec # noqa: F401
1215
from _typeshed import Incomplete
13-
from ._line_profiler import LineProfiler as CLineProfiler
16+
from ._line_profiler import (LineProfiler as CLineProfiler,
17+
LineStats as CLineStats)
1418
from .profiler_mixin import ByCountProfilerMixin, CLevelCallable
1519
from .scoping_policy import ScopingPolicy, ScopingPolicyDict
1620

@@ -33,6 +37,42 @@ def load_ipython_extension(ip) -> None:
3337
...
3438

3539

40+
class _StatsLike(Protocol):
41+
timings: Mapping[tuple[str, int, str], # funcname, lineno, filename
42+
list[tuple[int, int, int]]] # lineno, nhits, time
43+
unit: float
44+
45+
46+
class LineStats(CLineStats):
47+
def to_file(self, filename: PathLike[str] | str) -> None:
48+
...
49+
50+
def print(self, stream: Incomplete | None = None, **kwargs) -> None:
51+
...
52+
53+
@classmethod
54+
def from_files(cls, file: PathLike[str] | str, /,
55+
*files: PathLike[str] | str) -> Self:
56+
...
57+
58+
@classmethod
59+
def from_stats_objects(cls, stats: _StatsLike, /,
60+
*more_stats: _StatsLike) -> Self:
61+
...
62+
63+
def __repr__(self) -> str:
64+
...
65+
66+
def __eq__(self, other) -> bool:
67+
...
68+
69+
def __add__(self, other: _StatsLike) -> Self:
70+
...
71+
72+
def __iadd__(self, other: _StatsLike) -> Self:
73+
...
74+
75+
3676
class LineProfiler(CLineProfiler, ByCountProfilerMixin):
3777
@overload
3878
def __call__(self, # type: ignore[overload-overlap]
@@ -86,6 +126,9 @@ class LineProfiler(CLineProfiler, ByCountProfilerMixin):
86126
name: str | None = None) -> Literal[0, 1]:
87127
...
88128

129+
def get_stats(self) -> LineStats:
130+
...
131+
89132
def dump_stats(self, filename) -> None:
90133
...
91134

@@ -148,8 +191,7 @@ def show_text(stats,
148191
...
149192

150193

151-
def load_stats(filename):
152-
...
194+
load_stats = LineStats.from_files
153195

154196

155197
def main():

0 commit comments

Comments
 (0)