diff --git a/REQUIRE b/REQUIRE index 67e5f781..14bcddc8 100644 --- a/REQUIRE +++ b/REQUIRE @@ -7,3 +7,4 @@ Reexport FixedSizeArrays Measures Showoff +StatsBase 0.14.0 diff --git a/src/Plots.jl b/src/Plots.jl index 337880da..113242e9 100644 --- a/src/Plots.jl +++ b/src/Plots.jl @@ -9,6 +9,7 @@ using Base.Meta @reexport using PlotUtils @reexport using PlotThemes import Showoff +import StatsBase export grid, @@ -148,6 +149,9 @@ end @shorthands bar @shorthands barh @shorthands histogram +@shorthands barhist +@shorthands stephist +@shorthands scatterhist @shorthands histogram2d @shorthands density @shorthands heatmap diff --git a/src/arg_desc.jl b/src/arg_desc.jl index 4efbf1af..b233d6d7 100644 --- a/src/arg_desc.jl +++ b/src/arg_desc.jl @@ -21,7 +21,7 @@ const _arg_desc = KW( :markerstrokewidth => "Number. Width of the marker stroke (border. in pixels)", :markerstrokecolor => "Color Type. Color of the marker stroke (border). `:match` will take the value from `:foreground_color_subplot`.", :markerstrokealpha => "Number in [0,1]. The alpha/opacity override for the marker stroke (border). `nothing` (the default) means it will take the alpha value of markerstrokecolor.", -:bins => "Integer, NTuple{2,Integer}, AbstractVector. For histogram-types, defines the number of bins, or the edges, of the histogram.", +:bins => "Integer, NTuple{2,Integer}, AbstractVector or Symbol. For histogram-types, defines the number of bins, or the edges, of the histogram, or the auto-binning algorithm to use (:sturges, :sqrt, :rice, :scott or :fd)", :smooth => "Bool. Add a regression line?", :group => "AbstractVector. Data is split into a separate series, one for each unique value in `group`.", :x => "Various. Input data. First Dimension", @@ -40,7 +40,7 @@ const _arg_desc = KW( :ribbon => "Number or AbstractVector. Creates a fillrange around the data points.", :quiver => "AbstractVector or 2-Tuple of vectors. The directional vectors U,V which specify velocity/gradient vectors for a quiver plot.", :arrow => "nothing (no arrows), Bool (if true, default arrows), Arrow object, or arg(s) that could be style or head length/widths. Defines arrowheads that should be displayed at the end of path line segments (just before a NaN and the last non-NaN point). Used in quiverplot, streamplot, or similar.", -:normalize => "Bool. Should normalize histogram types? Trying for area == 1.", +:normalize => "Bool or Symbol. Histogram normalization mode. Possible values are: false/:none (no normalization, default), true/:pdf (normalize to a PDF with integral of 1) and :density (only normalize in respect to bin sizes).", :weights => "AbstractVector. Used in histogram types for weighted counts.", :contours => "Bool. Add contours to the side-grids of 3D plots? Used in surface/wireframe.", :match_dimensions => "Bool. For heatmap types... should the first dimension of a matrix (rows) correspond to the first dimension of the plot (x-axis)? The default is false, which matches the behavior of Matplotlib, Plotly, and others. Note: when passing a function for z, the function should still map `(x,y) -> z`.", diff --git a/src/args.jl b/src/args.jl index fa5a8e2f..4f4bba59 100644 --- a/src/args.jl +++ b/src/args.jl @@ -35,7 +35,9 @@ const _3dTypes = [ ] const _allTypes = vcat([ :none, :line, :path, :steppre, :steppost, :sticks, :scatter, - :heatmap, :hexbin, :histogram, :histogram2d, :histogram3d, :density, :bar, :hline, :vline, + :heatmap, :hexbin, :barbins, :barhist, :histogram, :scatterbins, + :scatterhist, :stepbins, :stephist, :bins2d, :histogram2d, :histogram3d, + :density, :bar, :hline, :vline, :contour, :pie, :shape, :image ], _3dTypes) @@ -78,7 +80,7 @@ const _typeAliases = Dict{Symbol,Symbol}( add_non_underscore_aliases!(_typeAliases) -like_histogram(seriestype::Symbol) = seriestype == :histogram +like_histogram(seriestype::Symbol) = seriestype in (:histogram, :barhist, :barbins) like_line(seriestype::Symbol) = seriestype in (:line, :path, :steppre, :steppost) like_surface(seriestype::Symbol) = seriestype in (:contour, :contourf, :contour3d, :heatmap, :surface, :wireframe, :image) @@ -154,6 +156,8 @@ const _markerAliases = Dict{Symbol,Symbol}( ) const _allScales = [:identity, :ln, :log2, :log10, :asinh, :sqrt] +const _logScales = [:ln, :log2, :log10] +const _logScaleBases = Dict(:ln => e, :log2 => 2.0, :log10 => 10.0) const _scaleAliases = Dict{Symbol,Symbol}( :none => :identity, :log => :log10, @@ -1261,7 +1265,7 @@ function _add_defaults!(d::KW, plt::Plot, sp::Subplot, commandIndex::Int) end # scatter plots don't have a line, but must have a shape - if d[:seriestype] in (:scatter, :scatter3d) + if d[:seriestype] in (:scatter, :scatterbins, :scatterhist, :scatter3d) d[:linewidth] = 0 if d[:markershape] == :none d[:markershape] = :circle diff --git a/src/pipeline.jl b/src/pipeline.jl index 6c2e05c3..70644055 100644 --- a/src/pipeline.jl +++ b/src/pipeline.jl @@ -277,6 +277,13 @@ function _subplot_setup(plt::Plot, d::KW, kw_list::Vector{KW}) attr[Symbol(letter,k)] = v end end + for k in (:scale,), letter in (:x,:y,:z) + # Series recipes may need access to this information + lk = Symbol(letter,k) + if haskey(attr, lk) + kw[lk] = attr[lk] + end + end end sp_attrs[sp] = attr end @@ -357,7 +364,7 @@ function _expand_subplot_extrema(sp::Subplot, d::KW, st::Symbol) expand_extrema!(sp[:xaxis], (0,w)) expand_extrema!(sp[:yaxis], (0,h)) sp[:yaxis].d[:flip] = true - elseif !(st in (:pie, :histogram, :histogram2d)) + elseif !(st in (:pie, :histogram, :bins2d, :histogram2d)) expand_extrema!(sp, d) end end diff --git a/src/recipes.jl b/src/recipes.jl index 5be5b6e0..cb89fbc0 100644 --- a/src/recipes.jl +++ b/src/recipes.jl @@ -323,10 +323,11 @@ end # create a bar plot as a filled step function @recipe function f(::Type{Val{:bar}}, x, y, z) - nx, ny = length(x), length(y) + procx, procy, xscale, yscale, baseline = _preprocess_binlike(d, x, y) + nx, ny = length(procx), length(procy) axis = d[:subplot][isvertical(d) ? :xaxis : :yaxis] - cv = [discrete_value!(axis, xi)[1] for xi=x] - x = if nx == ny + cv = [discrete_value!(axis, xi)[1] for xi=procx] + procx = if nx == ny cv elseif nx == ny + 1 0.5diff(cv) + cv[1:end-1] @@ -337,9 +338,9 @@ end # compute half-width of bars bw = d[:bar_width] hw = if bw == nothing - 0.5mean(diff(x)) + 0.5mean(diff(procx)) else - Float64[0.5cycle(bw,i) for i=1:length(x)] + Float64[0.5cycle(bw,i) for i=1:length(procx)] end # make fillto a vector... default fills to 0 @@ -347,16 +348,21 @@ end if fillto == nothing fillto = 0 end + if (yscale in _logScales) && !all(_is_positive, fillto) + fillto = map(x -> _is_positive(x) ? typeof(baseline)(x) : baseline, fillto) + end # create the bar shapes by adding x/y segments xseg, yseg = Segments(), Segments() for i=1:ny - center = x[i] - hwi = cycle(hw,i) - yi = y[i] - fi = cycle(fillto,i) - push!(xseg, center-hwi, center-hwi, center+hwi, center+hwi, center-hwi) - push!(yseg, yi, fi, fi, yi, yi) + yi = procy[i] + if !isnan(yi) + center = procx[i] + hwi = cycle(hw,i) + fi = cycle(fillto,i) + push!(xseg, center-hwi, center-hwi, center+hwi, center+hwi, center-hwi) + push!(yseg, yi, fi, fi, yi, yi) + end end # widen limits out a bit @@ -378,109 +384,323 @@ end end @deps bar shape + # --------------------------------------------------------------------------- # Histograms -# edges from number of bins -function calc_edges(v, bins::Integer) - vmin, vmax = extrema(v) - linspace(vmin, vmax, bins+1) +_bin_centers(v::AVec) = (v[1:end-1] + v[2:end]) / 2 + +_is_positive(x) = (x > 0) && !(x ≈ 0) + +_positive_else_nan{T}(::Type{T}, x::Real) = _is_positive(x) ? T(x) : T(NaN) + +function _scale_adjusted_values{T<:AbstractFloat}(::Type{T}, V::AbstractVector, scale::Symbol) + if scale in _logScales + [_positive_else_nan(T, x) for x in V] + else + [T(x) for x in V] + end end -# just pass through arrays -calc_edges(v, bins::AVec) = bins +function _hist_ylim_lo{T<:Real}(ymin::T, yscale::Symbol) + if (yscale in _logScales) + ymin / T(_logScaleBases[yscale]^log10(2)) + else + zero(T) + end +end -# find the bucket index of this value -function bucket_index(vi, edges) - for (i,e) in enumerate(edges) - if vi <= e - return max(1,i-1) +function _hist_ylim_hi{T<:Real}(ymax::T, yscale::Symbol) + if (yscale in _logScales) + ymax * T(_logScaleBases[yscale]^log10(2)) + else + ymax * T(1.1) + end +end + + +function _preprocess_binlike(d, x, y) + xscale = get(d, :xscale, :identity) + yscale = get(d, :yscale, :identity) + + T = float(promote_type(eltype(x), eltype(y))) + edge = map(T, x) + weights = _scale_adjusted_values(T, y, yscale) + w_min = minimum(weights) + baseline = _hist_ylim_lo(isnan(w_min) ? one(T) : w_min, yscale) + edge, weights, xscale, yscale, baseline +end + + + +@recipe function f(::Type{Val{:barbins}}, x, y, z) + edge, weights, xscale, yscale, baseline = _preprocess_binlike(d, x, y) + if (d[:bar_width] == nothing) + bar_width := diff(edge) + end + x := _bin_centers(edge) + y := weights + seriestype := :bar + () +end +@deps barbins bar + + +@recipe function f(::Type{Val{:scatterbins}}, x, y, z) + edge, weights, xscale, yscale, baseline = _preprocess_binlike(d, x, y) + xerror := diff(edge)/2 + x := _bin_centers(edge) + y := weights + seriestype := :scatter + () +end +@deps scatterbins scatter + + +function _stepbins_path(edge, weights, baseline::Real, xscale::Symbol, yscale::Symbol) + log_scale_x = xscale in _logScales + log_scale_y = yscale in _logScales + + nbins = length(linearindices(weights)) + if length(linearindices(edge)) != nbins + 1 + error("Edge vector must be 1 longer than weight vector") + end + + x = eltype(edge)[] + y = eltype(weights)[] + + it_e, it_w = start(edge), start(weights) + a, it_e = next(edge, it_e) + last_w = eltype(weights)(NaN) + i = 1 + while (!done(edge, it_e) && !done(edge, it_e)) + b, it_e = next(edge, it_e) + w, it_w = next(weights, it_w) + + if (log_scale_x && a ≈ 0) + a = b/_logScaleBases[xscale]^3 end + + if isnan(w) + if !isnan(last_w) + push!(x, a) + push!(y, baseline) + end + else + if isnan(last_w) + push!(x, a) + push!(y, baseline) + end + push!(x, a) + push!(y, w) + push!(x, b) + push!(y, w) + end + + a = b + last_w = w end - return length(edges)-1 + if (last_w != baseline) + push!(x, a) + push!(y, baseline) + end + + (x, y) end -function my_hist(v, bins; normed = false, weights = nothing) - edges = calc_edges(v, bins) - counts = zeros(length(edges)-1) - # add a weighted count - for (i,vi) in enumerate(v) - idx = bucket_index(vi, edges) - counts[idx] += (weights == nothing ? 1.0 : weights[i]) +@recipe function f(::Type{Val{:stepbins}}, x, y, z) + axis = d[:subplot][Plots.isvertical(d) ? :xaxis : :yaxis] + + edge, weights, xscale, yscale, baseline = _preprocess_binlike(d, x, y) + + xpts, ypts = _stepbins_path(edge, weights, baseline, xscale, yscale) + if !isvertical(d) + xpts, ypts = ypts, xpts end - counts = isapprox(extrema(diff(edges))...) ? counts : counts ./ diff(edges) # for uneven bins, normalize to area. - - # normalize by bar area? - norm_denom = normed ? sum(diff(edges) .* counts) : 1.0 - if norm_denom == 0 - norm_denom = 1.0 + # create a secondary series for the markers + if d[:markershape] != :none + @series begin + seriestype := :scatter + x := _bin_centers(edge) + y := weights + fillrange := nothing + label := "" + primary := false + () + end + markershape := :none + xerror := :none + yerror := :none end - edges, counts ./ norm_denom + x := xpts + y := ypts + seriestype := :path + + ylims --> [baseline, _hist_ylim_hi(maximum(weights), yscale)] + () +end +Plots.@deps stepbins path + + +function _auto_binning_nbins{N}(vs::NTuple{N,AbstractVector}, dim::Integer; mode::Symbol = :auto) + _cl(x) = max(ceil(Int, x), 1) + _iqr(v) = quantile(v, 0.75) - quantile(v, 0.25) + _span(v) = maximum(v) - minimum(v) + + n_samples = length(linearindices(first(vs))) + # Estimator for number of samples in one row/column of bins along each axis: + n = max(1, n_samples^(1/N)) + + v = vs[dim] + + if mode == :sqrt # Square-root choice + _cl(sqrt(n)) + elseif mode == :sturges || mode ==:auto # Sturges' formula + _cl(log2(n)) + 1 + elseif mode == :rice # Rice Rule + _cl(2 * n^(1/3)) + elseif mode == :scott # Scott's normal reference rule + _cl(_span(v) / (3.5 * std(v) / n^(1/3))) + elseif mode == :fd # Freedman–Diaconis rule + _cl(_span(v) / (2 * _iqr(v) / n^(1/3))) + else + error("Unknown auto-binning mode $mode") + end +end + +_hist_edge{N}(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Integer) = StatsBase.histrange(vs[dim], binning, :left) +_hist_edge{N}(vs::NTuple{N,AbstractVector}, dim::Integer, binning::Symbol) = _hist_edge(vs, dim, _auto_binning_nbins(vs, dim, mode = binning)) +_hist_edge{N}(vs::NTuple{N,AbstractVector}, dim::Integer, binning::AbstractVector) = binning + +_hist_edges{N}(vs::NTuple{N,AbstractVector}, binning::NTuple{N}) = + map(dim -> _hist_edge(vs, dim, binning[dim]), (1:N...)) + +_hist_edges{N}(vs::NTuple{N,AbstractVector}, binning::Union{Integer, Symbol, AbstractVector}) = + map(dim -> _hist_edge(vs, dim, binning), (1:N...)) + +_hist_norm_mode(mode::Symbol) = mode +_hist_norm_mode(mode::Bool) = mode ? :pdf : :none + +function _make_hist{N}(vs::NTuple{N,AbstractVector}, binning; normed = false, weights = nothing) + edges = _hist_edges(vs, binning) + h = float( weights == nothing ? + StatsBase.fit(StatsBase.Histogram, vs, edges, closed = :left) : + StatsBase.fit(StatsBase.Histogram, vs, weights, edges, closed = :left) + ) + normalize!(h, mode = _hist_norm_mode(normed)) end @recipe function f(::Type{Val{:histogram}}, x, y, z) - edges, counts = my_hist(y, d[:bins], - normed = d[:normalize], - weights = d[:weights]) - bar_width := diff(edges) - x := centers(edges) - y := counts - seriestype := :bar + seriestype := :barhist () end -@deps histogram bar +@deps histogram barhist + +@recipe function f(::Type{Val{:barhist}}, x, y, z) + h = _make_hist((y,), d[:bins], normed = d[:normalize], weights = d[:weights]) + x := h.edges[1] + y := h.weights + seriestype := :barbins + () +end +@deps barhist barbins + +@recipe function f(::Type{Val{:stephist}}, x, y, z) + h = _make_hist((y,), d[:bins], normed = d[:normalize], weights = d[:weights]) + x := h.edges[1] + y := h.weights + seriestype := :stepbins + () +end +@deps stephist stepbins + +@recipe function f(::Type{Val{:scatterhist}}, x, y, z) + h = _make_hist((y,), d[:bins], normed = d[:normalize], weights = d[:weights]) + x := h.edges[1] + y := h.weights + seriestype := :scatterbins + () +end +@deps scatterhist scatterbins + + +@recipe function f{T, E}(h::StatsBase.Histogram{T, 1, E}) + seriestype --> :barbins + + st_map = Dict( + :bar => :barbins, :scatter => :scatterbins, :step => :stepbins, + :steppost => :stepbins # :step can be mapped to :steppost in pre-processing + ) + seriestype := get(st_map, d[:seriestype], d[:seriestype]) + + if d[:seriestype] == :scatterbins + # Workaround, error bars currently not set correctly by scatterbins + edge, weights, xscale, yscale, baseline = _preprocess_binlike(d, h.edges[1], h.weights) + info("xscale = $xscale, yscale = $yscale") + xerror --> diff(h.edges[1])/2 + seriestype := :scatter + (Plots._bin_centers(edge), weights) + else + (h.edges[1], h.weights) + end +end + + +@recipe function f{H <: StatsBase.Histogram}(hv::AbstractVector{H}) + for h in hv + @series begin + h + end + end +end + # --------------------------------------------------------------------------- # Histogram 2D -# if tuple, map out bins, otherwise use the same for both -calc_edges_2d(x, y, bins) = calc_edges(x, bins), calc_edges(y, bins) -calc_edges_2d{X,Y}(x, y, bins::Tuple{X,Y}) = calc_edges(x, bins[1]), calc_edges(y, bins[2]) +@recipe function f(::Type{Val{:bins2d}}, x, y, z) + edge_x, edge_y, weights = x, y, z.surf -# the 2D version -function my_hist_2d(x, y, bins; normed = false, weights = nothing) - xedges, yedges = calc_edges_2d(x, y, bins) - counts = zeros(length(yedges)-1, length(xedges)-1) - - # add a weighted count - for i=1:length(x) - r = bucket_index(y[i], yedges) - c = bucket_index(x[i], xedges) - counts[r,c] += (weights == nothing ? 1.0 : weights[i]) + float_weights = float(weights) + if is(float_weights, weights) + float_weights = deepcopy(float_weights) end - - # normalize to cubic area of the imaginary surface towers - norm_denom = normed ? sum((diff(yedges) * diff(xedges)') .* counts) : 1.0 - if norm_denom == 0 - norm_denom = 1.0 - end - - xedges, yedges, counts ./ norm_denom -end - -centers(v::AVec) = 0.5 * (v[1:end-1] + v[2:end]) - -@recipe function f(::Type{Val{:histogram2d}}, x, y, z) - xedges, yedges, counts = my_hist_2d(x, y, d[:bins], - normed = d[:normalize], - weights = d[:weights]) - for (i,c) in enumerate(counts) + for (i, c) in enumerate(float_weights) if c == 0 - counts[i] = NaN + float_weights[i] = NaN end end - x := centers(xedges) - y := centers(yedges) - z := Surface(counts) - linewidth := 0 + + x := Plots._bin_centers(edge_x) + y := Plots._bin_centers(edge_y) + z := Surface(float_weights) + + match_dimensions := true seriestype := :heatmap () end -@deps histogram2d heatmap +Plots.@deps bins2d heatmap + + +@recipe function f(::Type{Val{:histogram2d}}, x, y, z) + h = _make_hist((x, y), d[:bins], normed = d[:normalize], weights = d[:weights]) + x := h.edges[1] + y := h.edges[2] + z := Surface(h.weights) + seriestype := :bins2d + () +end +@deps histogram2d bins2d + + +@recipe function f{T, E}(h::StatsBase.Histogram{T, 2, E}) + seriestype --> :bins2d + (h.edges[1], h.edges[2], Surface(h.weights)) +end # --------------------------------------------------------------------------- diff --git a/src/subplots.jl b/src/subplots.jl index 3400874c..924cc6c0 100644 --- a/src/subplots.jl +++ b/src/subplots.jl @@ -39,7 +39,7 @@ series_list(sp::Subplot) = sp.series_list # filter(series -> series.d[:subplot] function should_add_to_legend(series::Series) series.d[:primary] && series.d[:label] != "" && !(series.d[:seriestype] in ( - :hexbin,:histogram2d,:hline,:vline, + :hexbin,:bins2d,:histogram2d,:hline,:vline, :contour,:contourf,:contour3d,:surface,:wireframe, :heatmap, :pie, :image ))