Compare commits

...

3 Commits

Author SHA1 Message Date
Michael Krabbe Borregaard
45a04d5309 Merge pull request #713 from oschulz/new-hist-dev
Change histogram implementation, use StatsPlots, add new histogram st…
2017-03-01 22:32:06 +01:00
Oliver Schulz
6420f6fdc9 Conform to Plots.jl coding style 2017-03-01 17:33:22 +01:00
Oliver Schulz
19a9726e61 Change histogram implementation, use StatsPlots, add new histogram styles
New series recipes for binned data:

* barbins
* scatterbins
* stepbins

New series recipes for histogram:

* barhist (histogram is now an alias for this)
* scatterhist
* stephist

Supports plotting 1D and 2D StatsBase histograms, seriestype can be set to
bar(bins), scatter(bins) or step(bins).

Also adds support for some common auto-binning modes:

* :sturges, :auto - Sturges' formula
* :sqrt - Square-root choice
* :rice - Rice Rule
* :scott - Scott's normal reference rule
* :fd - Freedman–Diaconis rule

Maybe these could be contributed to StatsBase at some point.

Error bars currently don't work correctly for scatterbins and scatterhist,
due to problem with manipulating error bars in a series recipe, but do work
for "plot(h::StatsBase.Histogram, seriestype = :scatter)" (works around
the problem by calling scatter directly, it seems that error bars can be
manipulated correctly in a type recipe).
2017-03-01 14:24:00 +01:00
4 changed files with 225 additions and 73 deletions

View File

@ -7,3 +7,4 @@ Reexport
FixedSizeArrays FixedSizeArrays
Measures Measures
Showoff Showoff
StatsBase

View File

@ -9,6 +9,7 @@ using Base.Meta
@reexport using PlotUtils @reexport using PlotUtils
@reexport using PlotThemes @reexport using PlotThemes
import Showoff import Showoff
import StatsBase
export export
grid, grid,
@ -148,6 +149,9 @@ end
@shorthands bar @shorthands bar
@shorthands barh @shorthands barh
@shorthands histogram @shorthands histogram
@shorthands barhist
@shorthands stephist
@shorthands scatterhist
@shorthands histogram2d @shorthands histogram2d
@shorthands density @shorthands density
@shorthands heatmap @shorthands heatmap

View File

@ -77,7 +77,7 @@ const _typeAliases = Dict{Symbol,Symbol}(
add_non_underscore_aliases!(_typeAliases) add_non_underscore_aliases!(_typeAliases)
like_histogram(seriestype::Symbol) = seriestype in (:histogram, :density) like_histogram(seriestype::Symbol) = seriestype in (:histogram, :barhist, :barbins, :density)
like_line(seriestype::Symbol) = seriestype in (:line, :path, :steppre, :steppost) like_line(seriestype::Symbol) = seriestype in (:line, :path, :steppre, :steppost)
like_surface(seriestype::Symbol) = seriestype in (:contour, :contourf, :contour3d, :heatmap, :surface, :wireframe, :image) like_surface(seriestype::Symbol) = seriestype in (:contour, :contourf, :contour3d, :heatmap, :surface, :wireframe, :image)
@ -1260,7 +1260,7 @@ function _add_defaults!(d::KW, plt::Plot, sp::Subplot, commandIndex::Int)
end end
# scatter plots don't have a line, but must have a shape # scatter plots don't have a line, but must have a shape
if d[:seriestype] in (:scatter, :scatter3d) if d[:seriestype] in (:scatter, :scatterbins, :scatterhist, :scatter3d)
d[:linewidth] = 0 d[:linewidth] = 0
if d[:markershape] == :none if d[:markershape] == :none
d[:markershape] = :circle d[:markershape] = :circle

View File

@ -378,109 +378,256 @@ end
end end
@deps bar shape @deps bar shape
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Histograms # Histograms
# edges from number of bins _bin_centers(v::AVec) = (v[1:end-1] + v[2:end]) / 2
function calc_edges(v, bins::Integer)
vmin, vmax = extrema(v)
linspace(vmin, vmax, bins+1) @recipe function f(::Type{Val{:barbins}}, x, y, z)
edge, weights = x, y
if (d[:bar_width] == nothing)
bar_width := diff(edge)
end
x := _bin_centers(edge)
y := weights
seriestype := :bar
()
end
@deps barbins bins
@recipe function f(::Type{Val{:scatterbins}}, x, y, z)
edge, weights = x, y
xerror := diff(edge)/2
x := _bin_centers(edge)
y := weights
seriestype := :scatter
()
end
@deps scatterbins scatter
function _stepbins_path(edge, weights)
nbins = length(linearindices(weights))
if length(linearindices(edge)) != nbins + 1
error("Edge vector must be 1 longer than weight vector")
end
it_e, it_w = start(edge), start(weights)
px, it_e = next(edge, it_e)
py = zero(eltype(weights))
npathpts = 2 * nbins + 2
x = Vector{eltype(px)}(npathpts)
y = Vector{eltype(py)}(npathpts)
x[1], y[1] = px, py
i = 2
while (i < npathpts - 1)
py, it_w = next(weights, it_w)
x[i], y[i] = px, py
i += 1
px, it_e = next(edge, it_e)
x[i], y[i] = px, py
i += 1
end
assert(i == npathpts)
x[end], y[end] = px, zero(py)
(x, y)
end end
# just pass through arrays @recipe function f(::Type{Val{:stepbins}}, x, y, z)
calc_edges(v, bins::AVec) = bins edge, weights = x, y
# find the bucket index of this value axis = d[:subplot][Plots.isvertical(d) ? :xaxis : :yaxis]
function bucket_index(vi, edges)
for (i,e) in enumerate(edges) xpts, ypts = _stepbins_path(edge, weights)
if vi <= e if !Plots.isvertical(d)
return max(1,i-1) xpts, ypts = ypts, xpts
end end
# create a secondary series for the markers
if d[:markershape] != :none
@series begin
seriestype := :scatter
x := Plots._bin_centers(edge)
y := weights
fillrange := nothing
label := ""
primary := false
()
end
markershape := :none
xerror := :none
yerror := :none
end
x := xpts
y := ypts
seriestype := :path
ylims --> [0, 1.1 * maximum(weights)]
()
end
Plots.@deps stepbins path
function _auto_binning_nbins{N}(vs::NTuple{N,AbstractVector}, dim::Integer; mode::Symbol = :auto)
_cl(x) = max(ceil(Int, x), 1)
_iqr(v) = quantile(v, 0.75) - quantile(v, 0.25)
_span(v) = maximum(v) - minimum(v)
n_samples = length(linearindices(first(vs)))
# Estimator for number of samples in one row/column of bins along each axis:
n = max(1, n_samples^(1/N))
v = vs[dim]
if mode == :sqrt # Square-root choice
_cl(sqrt(n))
elseif mode == :sturges || mode ==:auto # Sturges' formula
_cl(log2(n)) + 1
elseif mode == :rice # Rice Rule
_cl(2 * n^(1/3))
elseif mode == :scott # Scott's normal reference rule
_cl(_span(v) / (3.5 * std(v) / n^(1/3)))
elseif mode == :fd # FreedmanDiaconis rule
_cl(_span(v) / (2 * _iqr(v) / n^(1/3)))
else
error("Unknown auto-binning mode $mode")
end end
return length(edges)-1
end end
function my_hist(v, bins; normed = false, weights = nothing) _hist_edge{N}(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Integer) = StatsBase.histrange(vs[dim], binning, :left)
edges = calc_edges(v, bins) _hist_edge{N}(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Symbol) = _hist_edge(vs, dim, _auto_binning_nbins(vs, dim, mode = binning))
counts = zeros(length(edges)-1) _hist_edge{N}(vs::NTuple{N,AbstractVector}, dim::Integer, binning::AbstractVector) = binning
# add a weighted count _hist_edges{N}(vs::NTuple{N,AbstractVector}, binning::NTuple{N}) =
for (i,vi) in enumerate(v) map(dim -> _hist_edge(vs, dim, binning[dim]), (1:N...))
idx = bucket_index(vi, edges)
counts[idx] += (weights == nothing ? 1.0 : weights[i])
end
counts = isapprox(extrema(diff(edges))...) ? counts : counts ./ diff(edges) # for uneven bins, normalize to area. _hist_edges{N}(vs::NTuple{N,AbstractVector}, binning::Union{Integer, Symbol, AbstractVector}) =
map(dim -> _hist_edge(vs, dim, binning), (1:N...))
# normalize by bar area? _hist_norm_mode(mode::Symbol) = mode
norm_denom = normed ? sum(diff(edges) .* counts) : 1.0 _hist_norm_mode(mode::Bool) = mode ? :norm : :none
if norm_denom == 0
norm_denom = 1.0
end
edges, counts ./ norm_denom function _make_hist{N}(vs::NTuple{N,AbstractVector}, binning; normed = false, weights = nothing)
edges = _hist_edges(vs, binning)
h = float( weights == nothing ?
StatsBase.fit(StatsBase.Histogram, vs, edges) :
StatsBase.fit(StatsBase.Histogram, vs, weights, edges)
)
normalize!(h, mode = _hist_norm_mode(normed))
end end
@recipe function f(::Type{Val{:histogram}}, x, y, z) @recipe function f(::Type{Val{:histogram}}, x, y, z)
edges, counts = my_hist(y, d[:bins], seriestype := :barhist
normed = d[:normalize],
weights = d[:weights])
bar_width := diff(edges)
x := centers(edges)
y := counts
seriestype := :bar
() ()
end end
@deps histogram bar @deps histogram barhist
@recipe function f(::Type{Val{:barhist}}, x, y, z)
h = _make_hist((y,), d[:bins], normed = d[:normalize], weights = d[:weights])
x := h.edges[1]
y := h.weights
seriestype := :barbins
()
end
@deps barhist barbins
@recipe function f(::Type{Val{:stephist}}, x, y, z)
h = _make_hist((y,), d[:bins], normed = d[:normalize], weights = d[:weights])
x := h.edges[1]
y := h.weights
seriestype := :stepbins
()
end
@deps stephist stepbins
@recipe function f(::Type{Val{:scatterhist}}, x, y, z)
h = _make_hist((y,), d[:bins], normed = d[:normalize], weights = d[:weights])
x := h.edges[1]
y := h.weights
seriestype := :scatterbins
()
end
@deps scatterhist scatterbins
@recipe function f{T, E}(h::StatsBase.Histogram{T, 1, E})
seriestype --> :barbins
st_map = Dict(
:bar => :barbins, :scatter => :scatterbins, :step => :stepbins,
:steppost => :stepbins # :step can be mapped to :steppost in pre-processing
)
seriestype := get(st_map, d[:seriestype], d[:seriestype])
if d[:seriestype] == :scatterbins
# Workaround, error bars currently not set correctly by scatterbins
xerror --> diff(h.edges[1])/2
seriestype := :scatter
(Plots._bin_centers(h.edges[1]), h.weights)
else
(h.edges[1], h.weights)
end
end
@recipe function f{H <: StatsBase.Histogram}(hv::AbstractVector{H})
for h in hv
@series begin
h
end
end
end
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Histogram 2D # Histogram 2D
# if tuple, map out bins, otherwise use the same for both @recipe function f(::Type{Val{:bins2d}}, x, y, z)
calc_edges_2d(x, y, bins) = calc_edges(x, bins), calc_edges(y, bins) edge_x, edge_y, weights = x, y, z.surf
calc_edges_2d{X,Y}(x, y, bins::Tuple{X,Y}) = calc_edges(x, bins[1]), calc_edges(y, bins[2])
# the 2D version float_weights = float(weights)
function my_hist_2d(x, y, bins; normed = false, weights = nothing) if is(float_weights, weights)
xedges, yedges = calc_edges_2d(x, y, bins) float_weights = deepcopy(float_weights)
counts = zeros(length(yedges)-1, length(xedges)-1)
# add a weighted count
for i=1:length(x)
r = bucket_index(y[i], yedges)
c = bucket_index(x[i], xedges)
counts[r,c] += (weights == nothing ? 1.0 : weights[i])
end end
for (i, c) in enumerate(float_weights)
# normalize to cubic area of the imaginary surface towers
norm_denom = normed ? sum((diff(yedges) * diff(xedges)') .* counts) : 1.0
if norm_denom == 0
norm_denom = 1.0
end
xedges, yedges, counts ./ norm_denom
end
centers(v::AVec) = 0.5 * (v[1:end-1] + v[2:end])
@recipe function f(::Type{Val{:histogram2d}}, x, y, z)
xedges, yedges, counts = my_hist_2d(x, y, d[:bins],
normed = d[:normalize],
weights = d[:weights])
for (i,c) in enumerate(counts)
if c == 0 if c == 0
counts[i] = NaN float_weights[i] = NaN
end end
end end
x := centers(xedges)
y := centers(yedges) x := Plots._bin_centers(edge_x)
z := Surface(counts) y := Plots._bin_centers(edge_y)
linewidth := 0 z := Surface(float_weights)
match_dimensions := true
seriestype := :heatmap seriestype := :heatmap
() ()
end end
@deps histogram2d heatmap Plots.@deps bins2d heatmap
@recipe function f(::Type{Val{:histogram2d}}, x, y, z)
h = _make_hist((x, y), d[:bins], normed = d[:normalize], weights = d[:weights])
x := h.edges[1]
y := h.edges[2]
z := Surface(h.weights)
seriestype := :bins2d
()
end
@deps histogram2d bins2d
@recipe function f{T, E}(h::StatsBase.Histogram{T, 2, E})
seriestype --> :bins2d
(h.edges[1], h.edges[2], Surface(h.weights))
end
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------