From d6e5b57a089ba236c7df4d1ba1d20669bf809ad3 Mon Sep 17 00:00:00 2001 From: "Michael K. Borregaard" Date: Tue, 5 Sep 2017 14:01:59 +0200 Subject: [PATCH 1/3] fix-2dhistogram-bins --- src/recipes.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/recipes.jl b/src/recipes.jl index 208a4360..aeee2bc9 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -506,7 +506,8 @@ function _auto_binning_nbins{N}(vs::NTuple{N,AbstractVector}, dim::Integer; mode n_samples = length(linearindices(first(vs))) # Estimator for number of samples in one row/column of bins along each axis: - n = max(1, n_samples^(1/N)) + nd = n_samples^(1/(2+N)) + v = vs[dim] @@ -515,15 +516,15 @@ function _auto_binning_nbins{N}(vs::NTuple{N,AbstractVector}, dim::Integer; mode end if mode == :sqrt # Square-root choice - _cl(sqrt(n)) + _cl(sqrt(n_samples)) elseif mode == :sturges # Sturges' formula - _cl(log2(n)) + 1 + _cl(log2(n_samples)) + 1 elseif mode == :rice # Rice Rule - _cl(2 * n^(1/3)) + _cl(2 * nd) elseif mode == :scott # Scott's normal reference rule - _cl(_span(v) / (3.5 * std(v) / n^(1/3))) + _cl(_span(v) / (3.5 * std(v) / nd)) elseif mode == :fd # Freedman–Diaconis rule - _cl(_span(v) / (2 * _iqr(v) / n^(1/3))) + _cl(_span(v) / (2 * _iqr(v) / nd)) elseif mode == :wand wand_edges(v) # this makes this function not type stable, but the type instability does not propagate else From cd9f00300cacbf91cd5ca3526666be3da22a1211 Mon Sep 17 00:00:00 2001 From: "Michael K. Borregaard" Date: Tue, 5 Sep 2017 15:40:11 +0200 Subject: [PATCH 2/3] Add correction for correlations --- src/recipes.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/recipes.jl b/src/recipes.jl index aeee2bc9..4105f7c5 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -505,9 +505,10 @@ function _auto_binning_nbins{N}(vs::NTuple{N,AbstractVector}, dim::Integer; mode _span(v) = ignorenan_maximum(v) - ignorenan_minimum(v) n_samples = length(linearindices(first(vs))) - # Estimator for number of samples in one row/column of bins along each axis: - nd = n_samples^(1/(2+N)) + # The nd estimator is the key to most automatic binning methods, and is modified for twodimensional histograms to include correlation + nd = n_samples^(1/(2+N)) + nd = N == 2 ? nd / (1-cor(first(vs), last(vs))^2)^(3//8) : nd # the >2-dimensional case does not have a nice solution to correlations v = vs[dim] From 20ef846c38d33b44cbbf36d6ba0ab86362d9ad76 Mon Sep 17 00:00:00 2001 From: "Michael K. Borregaard" Date: Tue, 5 Sep 2017 22:42:05 +0200 Subject: [PATCH 3/3] move bracket in Sturges --- src/recipes.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recipes.jl b/src/recipes.jl index 4105f7c5..61d3de87 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -519,7 +519,7 @@ function _auto_binning_nbins{N}(vs::NTuple{N,AbstractVector}, dim::Integer; mode if mode == :sqrt # Square-root choice _cl(sqrt(n_samples)) elseif mode == :sturges # Sturges' formula - _cl(log2(n_samples)) + 1 + _cl(log2(n_samples) + 1) elseif mode == :rice # Rice Rule _cl(2 * nd) elseif mode == :scott # Scott's normal reference rule