Skip to content

Binned Scatter Plots

1
binscatter(x,y);

example_binscatter_1

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#include <matplot/matplot.h>
#include <random>
#include <tuple>

int main() {
    using namespace matplot;

    auto f = figure();
    f->width(f->width() * 2);

    auto x_reduced = randn(1000, 0, 1);
    auto y_reduced =
        transform(x_reduced, [](double x) { return 2 * x + randn(0, 1); });
    auto ax1 = subplot(1, 2, 0);
    scatter(ax1, x_reduced, y_reduced);
    title(ax1, "Scatter plot (n=1000)");

    auto x = randn(1000000, 0., 1.);
    auto y = transform(x, [](double x) { return 2 * x + randn(0, 1); });
    auto ax2 = subplot(1, 2, 1);
    binscatter(ax2, x, y);
    axis(tight);
    title(ax2, "Binned Scatter plot (n=1000000)");

    show();
    return 0;
}

More examples

example_binscatter_2

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#include <matplot/matplot.h>

int main() {
    using namespace matplot;

    auto f = figure(true);

    f->width(f->width() * 2);
    f->height(f->height() * 2);
    f->x_position(200);
    f->y_position(100);

    auto x = randn(1000000, 0., 1.);
    auto y = transform(x, [](double x) { return 2 * x + randn(0, 1); });
    std::vector<double> x_line(x.begin(), x.begin() + 1000);
    std::vector<double> y_line(y.begin(), y.begin() + 1000);

    subplot(2, 3, 0);
    scatter(x_line, y_line);
    title("Scatter");

    subplot(2, 3, 1);
    binscatter(x, y, bin_scatter_style::point_size);
    title("Bin Scatter: Point size");

    subplot(2, 3, 2);
    binscatter(x, y, bin_scatter_style::point_alpha);
    title("Bin Scatter: Point alpha");

    subplot(2, 3, 3);
    binscatter(x, y, bin_scatter_style::point_colormap);
    title("Bin Scatter: Colormap");

    subplot(2, 3, 4);
    binscatter(x, y, bin_scatter_style::heatmap);
    axis(tight);
    title("Bin Scatter: Heatmap");

    subplot(2, 3, 5);
    binscatter(x, y, bin_scatter_style::jitter);
    title("Bin Scatter: Jitter");

    f->show();

    return 0;
}

example_binscatter_3

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include <matplot/matplot.h>
#include <random>
#include <tuple>

int main() {
    using namespace matplot;

    auto f = figure(true);
    f->width(f->width() * 2);
    f->height(f->height() * 2);
    f->x_position(200);
    f->y_position(100);

    auto x = randn(1000000, 0., 1.);
    auto y = transform(x, [](double x) { return 2 * x + randn(0, 1); });

    bin_scatter_style b = bin_scatter_style::automatic;

    subplot(2, 3, 0);
    binscatter(x, y, histogram::binning_algorithm::automatic, b);
    axis(tight);
    title("Automatic");

    subplot(2, 3, 1);
    binscatter(x, y, histogram::binning_algorithm::integers, b);
    axis(tight);
    title("Integers");

    subplot(2, 3, 2);
    binscatter(x, y, histogram::binning_algorithm::scott, b);
    axis(tight);
    title("Scott's rule");

    subplot(2, 3, 3);
    binscatter(x, y, histogram::binning_algorithm::fd, b);
    axis(tight);
    title("Freedman-Diaconis rule");

    subplot(2, 3, 4);
    binscatter(x, y, histogram::binning_algorithm::sqrt, b);
    axis(tight);
    title("Square root rule");

    subplot(2, 3, 5);
    binscatter(x, y, histogram::binning_algorithm::sturges, b);
    axis(tight);
    title("Sturges' rule");

    f->show();

    return 0;
}

example_binscatter_4

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include <matplot/matplot.h>
#include <random>
#include <tuple>

int main() {
    using namespace matplot;

    auto f = figure(true);
    f->width(f->width() * 2);
    f->height(f->height() * 2);
    f->x_position(200);
    f->y_position(100);

    auto x = randn(1000000, 0., 1.);
    auto y = transform(x, [](double x) { return 2 * x + randn(0, 1); });

    bin_scatter_style b = bin_scatter_style::heatmap;
    histogram::binning_algorithm a = histogram::binning_algorithm::automatic;

    subplot(2, 3, 0);
    binscatter(x, y, a, b, histogram::normalization::count);
    axis(tight);
    title("Normalization: Count");

    subplot(2, 3, 1);
    binscatter(x, y, a, b, histogram::normalization::probability);
    axis(tight);
    title("Normalization: Probability");

    subplot(2, 3, 2);
    binscatter(x, y, a, b, histogram::normalization::cummulative_count);
    axis(tight);
    title("Normalization: Cummulative count");

    subplot(2, 3, 3);
    binscatter(x, y, a, b, histogram::normalization::count_density);
    axis(tight);
    title("Normalization: Count density");

    subplot(2, 3, 4);
    binscatter(x, y, a, b, histogram::normalization::pdf);
    axis(tight);
    title("Normalization: PDF");

    subplot(2, 3, 5);
    binscatter(x, y, a, b, histogram::normalization::cdf);
    axis(tight);
    title("Normalization: CDF");

    f->show();

    return 0;
}

example_binscatter_5

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include <matplot/matplot.h>
#include <random>
#include <tuple>

int main() {
    using namespace matplot;

    auto f = figure(true);
    f->width(f->width() * 2);
    f->height(f->height() * 2);
    f->x_position(200);
    f->y_position(100);
    f->quiet_mode(true);

    auto x = randn(10000, 0., 1.);
    auto y = randn(10000, 0., 1.);

    subplot(2, 3, 0);
    scatter(x, y);
    title("Scatter plot");

    subplot(2, 3, 1);
    binscatter(x, y, 30, 10, bin_scatter_style::point_size);
    title("Binned scatter plot: Point size");

    subplot(2, 3, 2);
    binscatter(x, y, 30, 10, bin_scatter_style::point_alpha);
    title("Binned scatter plot: Point alpha");

    subplot(2, 3, 3);
    binscatter(x, y, 30, 10, bin_scatter_style::jitter);
    title("Binned scatter plot: Jitter");

    subplot(2, 3, 4);
    binscatter(x, y, 30, 10, bin_scatter_style::point_colormap);
    title("Binned scatter plot: Colormap");

    subplot(2, 3, 5);
    binscatter(x, y, 30, 10, bin_scatter_style::heatmap);
    title("Binned scatter plot: Heatmap");
    axis(tight);

    f->draw();

    show();
    return 0;
}

example_binscatter_6

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
#include <matplot/matplot.h>
#include <random>
#include <tuple>

int main() {
    using namespace matplot;

    auto x = randn(100000, 0., 1.);
    auto y = randn(100000, 0., 1.);

    binscatter(x, y, bin_scatter_style::point_colormap);
    colormap(gca(), palette::parula());

    show();
    return 0;
}

example_binscatter_7

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
#include <matplot/matplot.h>
#include <random>
#include <tuple>

int main() {
    using namespace matplot;

    auto x = randn(100000, 0., 1.);
    auto y = randn(100000, 0., 1.);

    binscatter(x, y, 20, 30, bin_scatter_style::heatmap);
    axis(tight);

    show();
    return 0;
}

Binned scatter plots use variations of the histogram algorithms of the previous section as an extra step to place all the data into two-dimensional bins that can be represented with varying colors or sizes. This is useful when there are so many data points that a scatter plot would be impractical for visualizing the data.