From f99957770a2575845fc33ae73a726fbc0146e857 Mon Sep 17 00:00:00 2001 From: "Michael K. Borregaard" Date: Tue, 21 Feb 2017 10:06:20 +0100 Subject: [PATCH 1/2] base histograms on StatsBase --- REQUIRE | 1 + src/Plots.jl | 1 + src/recipes.jl | 107 ++++++++++++++++++++++--------------------------- 3 files changed, 50 insertions(+), 59 deletions(-) diff --git a/REQUIRE b/REQUIRE index 67e5f781..7397cbc0 100644 --- a/REQUIRE +++ b/REQUIRE @@ -7,3 +7,4 @@ Reexport FixedSizeArrays Measures Showoff +StatsBase diff --git a/src/Plots.jl b/src/Plots.jl index 337880da..1d622e0b 100644 --- a/src/Plots.jl +++ b/src/Plots.jl @@ -9,6 +9,7 @@ using Base.Meta @reexport using PlotUtils @reexport using PlotThemes import Showoff +import StatsBase export grid, diff --git a/src/recipes.jl b/src/recipes.jl index 156bda3e..36a7e990 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -381,51 +381,37 @@ end # --------------------------------------------------------------------------- # Histograms -# edges from number of bins -function calc_edges(v, bins::Integer) - vmin, vmax = extrema(v) - linspace(vmin, vmax, bins+1) +function my_hist(v, bins::Int, weights::Void) + h = StatsBase.fit(StatsBase.Histogram, v, nbins = bins) + h.edges[1], h.weights end -# just pass through arrays -calc_edges(v, bins::AVec) = bins - -# find the bucket index of this value -function bucket_index(vi, edges) - for (i,e) in enumerate(edges) - if vi <= e - return max(1,i-1) - end - end - return length(edges)-1 +function my_hist(v, bins::AVec, weights::Void) + h = StatsBase.fit(StatsBase.Histogram, v, bins) + h.edges[1], h.weights end -function my_hist(v, bins; normed = false, weights = nothing) - edges = calc_edges(v, bins) - counts = zeros(length(edges)-1) +function my_hist(v, bins::Int, weights::AVec) + h = StatsBase.fit(StatsBase.Histogram, v, StatsBase.weights(weights), nbins = bins) + h.edges[1], h.weights +end - # add a weighted count - for (i,vi) in enumerate(v) - idx = bucket_index(vi, edges) - counts[idx] += (weights == nothing ? 1.0 : weights[i]) - end +function my_hist(v, bins::AVec, weights::AVec) + h = StatsBase.fit(StatsBase.Histogram, v, bins, StatsBase.weights(weights)) + h.edges[1], h.weights +end + +@recipe function f(::Type{Val{:histogram}}, x, y, z) + edges, counts = my_hist(y, d[:bins], d[:weights]) # normalize by bar area? - norm_denom = normed ? sum(diff(edges) .* counts) : 1.0 + norm_denom = d[:normalize] ? sum(diff(edges) .* counts) : 1.0 if norm_denom == 0 norm_denom = 1.0 end - edges, counts ./ norm_denom -end - - -@recipe function f(::Type{Val{:histogram}}, x, y, z) - edges, counts = my_hist(y, d[:bins], - normed = d[:normalize], - weights = d[:weights]) x := edges - y := counts + y := counts ./ norm_denom seriestype := :bar () end @@ -434,52 +420,55 @@ end # --------------------------------------------------------------------------- # Histogram 2D -# if tuple, map out bins, otherwise use the same for both -calc_edges_2d(x, y, bins) = calc_edges(x, bins), calc_edges(y, bins) -calc_edges_2d{X,Y}(x, y, bins::Tuple{X,Y}) = calc_edges(x, bins[1]), calc_edges(y, bins[2]) +centers(v::AVec) = 0.5 * (v[1:end-1] + v[2:end]) -# the 2D version -function my_hist_2d(x, y, bins; normed = false, weights = nothing) - xedges, yedges = calc_edges_2d(x, y, bins) - counts = zeros(length(yedges)-1, length(xedges)-1) +function my_hist_2d(v1, v2, bins::Union{AVec, Tuple{AVec, AVec}}, weights::AVec) + h = StatsBase.fit(StatsBase.Histogram, (v1, v2), bins, StatsBase.weights(weights)) + h.edges..., Float64.(h.weights) +end - # add a weighted count - for i=1:length(x) - r = bucket_index(y[i], yedges) - c = bucket_index(x[i], xedges) - counts[r,c] += (weights == nothing ? 1.0 : weights[i]) - end +function my_hist_2d(v1, v2, bins::Union{AVec, Tuple{AVec, AVec}}, weights::Void) + h = StatsBase.fit(StatsBase.Histogram, (v1, v2), bins) + h.edges..., Float64.(h.weights) +end - # normalize to cubic area of the imaginary surface towers - norm_denom = normed ? sum((diff(yedges) * diff(xedges)') .* counts) : 1.0 +function my_hist_2d(v1, v2, bins::Union{Int, Tuple{Int, Int}}, weights::Void) + h = StatsBase.fit(StatsBase.Histogram, (v1, v2), nbins = bins) + h.edges..., Float64.(h.weights) +end + +function my_hist_2d(v1, v2, bins::Union{Int, Tuple{Int, Int}}, weights::AVec) + h = StatsBase.fit(StatsBase.Histogram, (v1, v2), StatsBase.weights(weights), nbins = bins) + h.edges..., Float64.(h.weights) +end + +@recipe function f(::Type{Val{:histogram2d}}, x, y, z) + yedges, xedges, counts = my_hist_2d(x, y, d[:bins], d[:weights]) + + norm_denom = d[:normalize] ? sum((diff(yedges) * diff(xedges)') .* counts) : 1.0 if norm_denom == 0 norm_denom = 1.0 end - xedges, yedges, counts ./ norm_denom -end - -centers(v::AVec) = 0.5 * (v[1:end-1] + v[2:end]) - -@recipe function f(::Type{Val{:histogram2d}}, x, y, z) - xedges, yedges, counts = my_hist_2d(x, y, d[:bins], - normed = d[:normalize], - weights = d[:weights]) for (i,c) in enumerate(counts) if c == 0 counts[i] = NaN end end + x := centers(xedges) y := centers(yedges) - z := Surface(counts) + z := Surface(counts ./ norm_denom) linewidth := 0 seriestype := :heatmap () end @deps histogram2d heatmap - +@recipe function f{T, E}(h::StatsBase.Histogram{T, 2, E}) + seriestype := :bar + h.edges[1], h.weights +end # --------------------------------------------------------------------------- # scatter 3d From 075e5b96e2dd2cbe78f3f8458857c83b84dfd26e Mon Sep 17 00:00:00 2001 From: "Michael K. Borregaard" Date: Tue, 21 Feb 2017 10:14:44 +0100 Subject: [PATCH 2/2] wait for the StatPlots PR to merge --- src/recipes.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/recipes.jl b/src/recipes.jl index 36a7e990..fdd6c525 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -465,10 +465,6 @@ end end @deps histogram2d heatmap -@recipe function f{T, E}(h::StatsBase.Histogram{T, 2, E}) - seriestype := :bar - h.edges[1], h.weights -end # --------------------------------------------------------------------------- # scatter 3d