Skip to content

Commit eb7df5f

Browse files
authored
some small repairs (#7)
- Updated readme to use the actual names I ended up going with in #1 - Fix bug when passing `Pair` object with `on` - Fix bug in method definition for `quantile_windows` - Test various keyword arguments for `interval_join`
1 parent bae33e4 commit eb7df5f

File tree

5 files changed

+33
-13
lines changed

5 files changed

+33
-13
lines changed

.JuliaFormatter.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
style = "yas"

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "DataFrameIntervals"
22
uuid = "33b79e07-adbe-4034-b8be-6bacde625d75"
33
authors = ["Beacon Biosignals, Inc."]
4-
version = "0.0.1"
4+
version = "0.0.2"
55

66
[deps]
77
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"

README.md

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@
77
[![Docs: Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/DataFrameIntervals.jl/dev)
88

99
DataFrameIntervals provides two functions that are handy for computing joins over intervals
10-
of time: split_into and split_into_combine, and a helper function called `quantile_windows`.
10+
of time: `interval_join` and `groupby_interval_join`, and a helper function called `quantile_windows`.
1111

12-
Rows match in this join if their time spans overlap. The time spans can be represented as i[`TimeSpan`](https://juliapackages.com/p/timespans) objects or [`Interval`](https://juliapackages.com/p/intervals) objects.
12+
Rows match in this join if their time spans overlap. The time spans can be represented as
1313

14-
Currently this requires an unreleased version of `Intervals.jl` (which should be version 1.8 when released). Make sure to add the following to your project before adding `DataFrameIntervals`.
14+
- [`TimeSpan`](https://juliapackages.com/p/timespans) objects
15+
- [`Interval`](https://juliapackages.com/p/intervals) objects.
16+
- `NamedTuples` with a `start` and `stop` field.
17+
18+
Currently this requires an unreleased version of `Intervals.jl` (which should be version 1.8 when released). If you don't use the manifest, make sure to add the following to your project before adding `DataFrameIntervals`.
1519

1620
```
1721
julia> ]add https://github.com/invenia/Intervals.jl#rf/intervalset-type
@@ -53,13 +57,13 @@ df = DataFrame(label = rand(('a':'d'), n), x = rand(n), span = spans)
5357
```julia
5458
quarters = quantile_windows(4, df, label=:quarter)
5559

56-
split_into(df, quarters)
60+
interval_join(df, quarters, on=:span)
5761
```
5862

5963
```
6064
103×6 DataFrame
61-
Row │ quarter label x left_span right_span span
62-
│ Int64 Char Float64 TimeSpan TimeSpan TimeSpan
65+
Row │ quarter label x span_left span_right span
66+
│ Int64 Char Float64 TimeSpan TimeSpan TimeSpan
6367
─────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
6468
1 │ 1 b 0.0606309 TimeSpan(00:00:05.164631882, 00:… TimeSpan(00:00:05.164631882, 00:… TimeSpan(00:00:05.164631882, 00:…
6569
2 │ 1 a 0.961599 TimeSpan(00:00:08.853504418, 00:… TimeSpan(00:00:05.164631882, 00:… TimeSpan(00:00:08.853504418, 00:…

src/DataFrameIntervals.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ forright(x::Pair) = last(x)
5959

6060
function setup_column_names!(left, right; on, renamecols=identity => identity,
6161
renameon=:_left => :_right)
62-
if !(on isa Symbol || on isa AbstractString)
62+
if !(on isa Union{Symbol,AbstractString,Pair{Symbol,Symbol},
63+
Pair{<:AbstractString,<:AbstractString}})
6364
error("Interval joins support only one `on` column; iterables are not allowed.")
6465
end
6566

@@ -335,7 +336,7 @@ function quantile_windows(n, span_; spancol=:span, label=:index, min_duration=no
335336
df = DataFrame(; (spancol => splits, label_helper(label) => value_helper(label, n))...)
336337
return df
337338
end
338-
function quantile_windows(n, span::DataFrame; spancol=:span, kwds...)
339+
function quantile_windows(n, span::AbstractDataFrame; spancol=:span, kwds...)
339340
return quantile_windows(n, dfspan(span, spancol); spancol, kwds...)
340341
end
341342

test/runtests.jl

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,14 @@ Base.isapprox(a::TimePeriod, b::TimePeriod; atol=period) = return abs(a - b) ≤
2020
df1 = DataFrame(; label=rand(('a':'d'), n), x=rand(n), span=spans)
2121
quarters = quantile_windows(4, df1; label=:quarter)
2222
@test nrow(quarters) == 4
23-
@test isapprox(duration(quarters.span[1]), duration(quarters.span[2]),
23+
@test isapprox(duration(quarters.span[1]), duration(quarters.span[2]);
2424
atol=Nanosecond(1))
25-
@test isapprox(duration(quarters.span[2]), duration(quarters.span[3]),
25+
@test isapprox(duration(quarters.span[2]), duration(quarters.span[3]);
2626
atol=Nanosecond(1))
2727
@test isapprox(duration(quarters.span[2]), duration(quarters.span[3]);
2828
atol=Nanosecond(1)) ||
2929
duration(quarters.span[4]) duration(quarters.span[3])
30-
31-
# TODO: test various column renaming bevhariors
30+
@test nrow(quantile_windows(4, subset(df1, :label => ByRow(in('a':'b'))))) == 4
3231

3332
# NOTE: the bulk of the correctness testing for interval intersections
3433
# has already been handled by calling out to `Intervals.find_intervals`
@@ -41,6 +40,21 @@ Base.isapprox(a::TimePeriod, b::TimePeriod; atol=period) = return abs(a - b) ≤
4140
DataFrameIntervals.interval.(df1.span))
4241
@test df_result.span_left == mapreduce(ix -> df1.span[ix], vcat, ixs)
4342

43+
# test column renaming
44+
rename!(quarters, :span => :time_span)
45+
df_result2 = interval_join(df1, quarters; on=:span => :time_span,
46+
renameon=:_a => :_b,
47+
renamecols=:_left => :_right)
48+
rename!(quarters, :time_span => :span)
49+
@test issetequal(names(df_result2),
50+
["time_span_b", "quarter_right", "label_left", "x_left", "span_a",
51+
"span"])
52+
quarters_2 = insertcols!(copy(quarters), :label => rand('y':'z', 4))
53+
df_result3 = interval_join(df1, quarters_2; on=:span, makeunique=true)
54+
@test issetequal(names(df_result3),
55+
["span_right", "quarter", "label", "label_1", "x",
56+
"span_left", "span"])
57+
4458
# test interval joins with named tuples
4559
nt_spans = [(; start=start(x), stop=stop(x)) for x in spans]
4660
df1_nt = hcat(df1[!, Not(:span)], DataFrame(; span=nt_spans))

0 commit comments

Comments
 (0)