Skip to content

Word Cloud

1
wordcloud(text, black_list);

example_wordcloud_1

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
#include <matplot/matplot.h>

int main() {
    using namespace matplot;

    std::string text = fileread("shakespeare_sonnets.txt");
    std::transform(text.begin(), text.end(), text.begin(), [](char c) {
        return static_cast<char>(std::tolower(static_cast<int>(c)));
    });

    std::vector<std::string> black_list =
        tokenize(fileread("en_blacklist.txt"));

    wordcloud(text, black_list);
    title("Shakespeare's sonnets");

    show();
    return 0;
}

More examples

example_wordcloud_3

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#include <matplot/matplot.h>

int main() {
    using namespace matplot;

    std::string text = fileread("shakespeare_sonnets.txt");
    std::transform(text.begin(), text.end(), text.begin(),
                   [](char c) { return static_cast<char>(std::tolower(static_cast<int>(c))); });

    std::vector<std::string> black_list =
        tokenize(fileread("en_blacklist.txt"));

    std::vector<std::string> text_tokens = tokenize(text);
    auto it = std::remove_if(text_tokens.begin(), text_tokens.end(),
                             [](const std::string &s) { return s.size() < 5; });
    text_tokens.erase(it, text_tokens.end());

    auto [tokens, count] = wordcount(text_tokens, black_list);

    wordcloud(tokens, count);
    title("Shakespeare's sonnets - Big words");

    matplot::show();
    return 0;
}

example_wordcloud_4

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <matplot/matplot.h>

int main() {
    using namespace matplot;

    std::string text = fileread("shakespeare_sonnets.txt");
    std::transform(text.begin(), text.end(), text.begin(), [](char c) {
        return static_cast<char>(std::tolower(static_cast<int>(c)));
    });

    std::vector<std::string> black_list =
        tokenize(fileread("en_blacklist.txt"));

    auto [tokens, count] = wordcount(text, black_list);

    std::vector<double> custom_colors = rand(tokens.size(), 0., 100.);

    wordcloud(tokens, count, custom_colors);
    title("Shakespeare's sonnets - Random colors");

    show();
    return 0;
}

example_wordcloud_4

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <matplot/matplot.h>

int main() {
    using namespace matplot;

    std::string text = fileread("shakespeare_sonnets.txt");
    std::transform(text.begin(), text.end(), text.begin(), [](char c) {
        return static_cast<char>(std::tolower(static_cast<int>(c)));
    });

    std::vector<std::string> black_list =
        tokenize(fileread("en_blacklist.txt"));

    auto [tokens, count] = wordcount(text, black_list);

    std::vector<double> custom_colors = rand(tokens.size(), 0., 100.);

    wordcloud(tokens, count, custom_colors);
    title("Shakespeare's sonnets - Random colors");

    show();
    return 0;
}

Word clouds are generated from text or pairs of words and their frequency. After attributing a size proportional to each word frequency, the algorithm to position the labels iterates words from the largest to the smallest. For each word, it spins the word in polar coordinates converted to Cartesian coordinates until it does not overlap with any other word.

By default, the colors and the sizes depend on the word frequencies. We can customize the colors by passing a third parameter to the wordcloud function.