From d29a44efa734b3a54e6b4c2c444598d53bd222de Mon Sep 17 00:00:00 2001 From: Michael Krabbe Borregaard Date: Sun, 13 Jan 2019 12:44:59 +0100 Subject: [PATCH 1/4] fix histograms with NaN --- src/recipes.jl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/recipes.jl b/src/recipes.jl index 5ae3cebf..f86bad70 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -590,7 +590,7 @@ wand_edges(x...) = (@warn("Load the StatPlots package in order to use :wand bins function _auto_binning_nbins(vs::NTuple{N,AbstractVector}, dim::Integer; mode::Symbol = :auto) where N _cl(x) = ceil(Int, NaNMath.max(x, one(x))) - _iqr(v) = (q = quantile(v, 0.75) - quantile(v, 0.25); q > 0 ? q : oftype(q, 1)) + _iqr(v) = (q = quantile(filter(!isnan,v), 0.75) - quantile(filter(!isnan,v), 0.25); q > 0 ? q : oftype(q, 1)) _span(v) = ignorenan_maximum(v) - ignorenan_minimum(v) n_samples = length(LinearIndices(first(vs))) @@ -622,7 +622,7 @@ function _auto_binning_nbins(vs::NTuple{N,AbstractVector}, dim::Integer; mode::S end end -_hist_edge(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Integer) where {N} = StatsBase.histrange(vs[dim], binning, :left) +_hist_edge(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Integer) where {N} = StatsBase.histrange(filter(!isnan,vs[dim]), binning, :left) _hist_edge(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Symbol) where {N} = _hist_edge(vs, dim, _auto_binning_nbins(vs, dim, mode = binning)) _hist_edge(vs::NTuple{N,AbstractVector}, dim::Integer, binning::AbstractVector) where {N} = binning @@ -635,11 +635,17 @@ _hist_edges(vs::NTuple{N,AbstractVector}, binning::Union{Integer, Symbol, Abstra _hist_norm_mode(mode::Symbol) = mode _hist_norm_mode(mode::Bool) = mode ? :pdf : :none +function _filternans(vs::NTuple{N,AbstractVector}) where N + _invertedindex(v, not) = [j for (i,j) in enumerate(v) if !(i ∈ not)] + nots = union(Set.(findall.(isnan, vs))...) + _invertedindex.(vs, Ref(nots)) +end + function _make_hist(vs::NTuple{N,AbstractVector}, binning; normed = false, weights = nothing) where N edges = _hist_edges(vs, binning) h = float( weights == nothing ? - StatsBase.fit(StatsBase.Histogram, vs, edges, closed = :left) : - StatsBase.fit(StatsBase.Histogram, vs, StatsBase.Weights(weights), edges, closed = :left) + StatsBase.fit(StatsBase.Histogram, _filternans(vs), edges, closed = :left) : + StatsBase.fit(StatsBase.Histogram, _filternans(vs), StatsBase.Weights(weights), edges, closed = :left) ) normalize!(h, mode = _hist_norm_mode(normed)) end From 979f9495f104c524b79f6cee1efeead165c8d704 Mon Sep 17 00:00:00 2001 From: Michael Krabbe Borregaard Date: Sun, 13 Jan 2019 14:17:54 +0100 Subject: [PATCH 2/4] add faster method for 1-d histogram --- src/recipes.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/recipes.jl b/src/recipes.jl index f86bad70..0b729b3d 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -635,6 +635,7 @@ _hist_edges(vs::NTuple{N,AbstractVector}, binning::Union{Integer, Symbol, Abstra _hist_norm_mode(mode::Symbol) = mode _hist_norm_mode(mode::Bool) = mode ? :pdf : :none +_filternans(vs::NTuple{1,AbstractVector}) = filter!.(!isnan, vs) function _filternans(vs::NTuple{N,AbstractVector}) where N _invertedindex(v, not) = [j for (i,j) in enumerate(v) if !(i ∈ not)] nots = union(Set.(findall.(isnan, vs))...) From 667ed161c0d1710fdc1c287254f17590ab0e106e Mon Sep 17 00:00:00 2001 From: Michael Krabbe Borregaard Date: Mon, 14 Jan 2019 10:21:35 +0100 Subject: [PATCH 3/4] move filtering to makebins --- src/recipes.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/recipes.jl b/src/recipes.jl index 0b729b3d..0bef95c7 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -589,9 +589,9 @@ Plots.@deps stepbins path wand_edges(x...) = (@warn("Load the StatPlots package in order to use :wand bins. Defaulting to :auto", once = true); :auto) function _auto_binning_nbins(vs::NTuple{N,AbstractVector}, dim::Integer; mode::Symbol = :auto) where N - _cl(x) = ceil(Int, NaNMath.max(x, one(x))) - _iqr(v) = (q = quantile(filter(!isnan,v), 0.75) - quantile(filter(!isnan,v), 0.25); q > 0 ? q : oftype(q, 1)) - _span(v) = ignorenan_maximum(v) - ignorenan_minimum(v) + _cl(x) = ceil(Int, max(x, one(x))) + _iqr(v) = (q = quantile(v, 0.75) - quantile(v, 0.25); q > 0 ? q : oftype(q, 1)) + _span(v) = maximum(v) - minimum(v) n_samples = length(LinearIndices(first(vs))) @@ -622,7 +622,7 @@ function _auto_binning_nbins(vs::NTuple{N,AbstractVector}, dim::Integer; mode::S end end -_hist_edge(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Integer) where {N} = StatsBase.histrange(filter(!isnan,vs[dim]), binning, :left) +_hist_edge(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Integer) where {N} = StatsBase.histrange(vs[dim], binning, :left) _hist_edge(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Symbol) where {N} = _hist_edge(vs, dim, _auto_binning_nbins(vs, dim, mode = binning)) _hist_edge(vs::NTuple{N,AbstractVector}, dim::Integer, binning::AbstractVector) where {N} = binning @@ -643,10 +643,11 @@ function _filternans(vs::NTuple{N,AbstractVector}) where N end function _make_hist(vs::NTuple{N,AbstractVector}, binning; normed = false, weights = nothing) where N + vs = _filternans(vs) edges = _hist_edges(vs, binning) h = float( weights == nothing ? - StatsBase.fit(StatsBase.Histogram, _filternans(vs), edges, closed = :left) : - StatsBase.fit(StatsBase.Histogram, _filternans(vs), StatsBase.Weights(weights), edges, closed = :left) + StatsBase.fit(StatsBase.Histogram, vs, edges, closed = :left) : + StatsBase.fit(StatsBase.Histogram, vs, StatsBase.Weights(weights), edges, closed = :left) ) normalize!(h, mode = _hist_norm_mode(normed)) end From 3d77c59c92209902e30e81538c34cbc1df6d8cbf Mon Sep 17 00:00:00 2001 From: Michael Krabbe Borregaard Date: Mon, 14 Jan 2019 11:44:42 +0100 Subject: [PATCH 4/4] don't rebind vs --- src/recipes.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/recipes.jl b/src/recipes.jl index 0bef95c7..d0e9e7e6 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -643,11 +643,11 @@ function _filternans(vs::NTuple{N,AbstractVector}) where N end function _make_hist(vs::NTuple{N,AbstractVector}, binning; normed = false, weights = nothing) where N - vs = _filternans(vs) - edges = _hist_edges(vs, binning) + localvs = _filternans(vs) + edges = _hist_edges(localvs, binning) h = float( weights == nothing ? - StatsBase.fit(StatsBase.Histogram, vs, edges, closed = :left) : - StatsBase.fit(StatsBase.Histogram, vs, StatsBase.Weights(weights), edges, closed = :left) + StatsBase.fit(StatsBase.Histogram, localvs, edges, closed = :left) : + StatsBase.fit(StatsBase.Histogram, localvs, StatsBase.Weights(weights), edges, closed = :left) ) normalize!(h, mode = _hist_norm_mode(normed)) end