boost::fast_pool_allocator makes std::forward_list slower

I wrote code to measure sorting performance of forward_list, and counterintuitively it seems that using memory pool allocator with it actually makes it slower:

// clang++ -O3 -std=c++11 -I/usr/local/include -lboost_system sort-test.cc -o sort-test

#include <boost/pool/pool_alloc.hpp>

#include <algorithm>
#include <cmath>
#include <chrono>
#include <forward_list>
#include <fstream>
#include <iostream>
#include <vector>

using namespace std;
using namespace std::chrono;
using namespace boost;

class TSampler {
public:
    void AddSample(double value) {
        NumSamples++;

        if (NumSamples == 1) {
            Mean = value;
            M = value;
            M2 = value * value;
            return;
        }

        double dillutionFactor = 1.0 / NumSamples;

        // variance recurrence
        double cm = value - M;
        M += dillutionFactor * cm;
        M2 += cm * (value - M);

        // mean recurrence
        Mean += dillutionFactor * (value - Mean);
    }

    double GetMean() const {
        return Mean;
    }

    double GetStdDev() const {
        Recalc();
        return StdDev;
    }

    double GetConfidenceRadius() const {
        Recalc();
        return Radius;
    }

    double GetConfidenceLo() const {
        Recalc();
        return Mean - Radius;
    }

    double GetConfidenceHi() const {
        Recalc();
        return Mean + Radius;
    }

private:
    void Recalc() const {
        StdDev = sqrt(M2 / (NumSamples - 1.0));
        Radius = 1.96 * StdDev / sqrt(NumSamples + 0.0);
    }

private:
    size_t NumSamples = 0;
    double Mean = NAN, M = NAN, M2 = NAN;
    mutable double StdDev = NAN, Radius = NAN;
};

void FillSamples(int *samples, size_t size) {
    ifstream r("/dev/urandom");
    r.read(reinterpret_cast<char *>(samples), size * sizeof(*samples));
}

using TList = forward_list<int>;
using TPooledList = forward_list<int, fast_pool_allocator<int>>;
using TVector = vector<int>;

void InitContainer(TList &xs, int *input, size_t size) {
    for (int i = size - 1; i >= 0; --i) {
        xs.push_front(input[i]);
    }
}

void InitContainer(TPooledList &xs, int *input, size_t size) {
    for (int i = size - 1; i >= 0; --i) {
        xs.push_front(input[i]);
    }
}

void InitContainer(TVector &xs, int *input, size_t size) {
    xs.reserve(size);
    copy(input, input + size, back_inserter(xs));
}

void Sort(TPooledList &xs) {
    xs.sort();
}

void Sort(TList &xs) {
    xs.sort();
}

void Sort(TVector &xs) {
    sort(xs.begin(), xs.end());
}

template<typename TContainer>
void RunSort(TSampler &sampler, int *input, size_t size) {
    TContainer xs;
    InitContainer(xs, input, size);

    steady_clock::time_point before = steady_clock::now();
    Sort(xs);
    steady_clock::time_point after = steady_clock::now();

    auto delta = duration_cast<duration<double>>(after - before);
    sampler.AddSample(delta.count());
}

int main(int argc, char **argv) {
    argc--; argv++;
    if (argc != 2) {
        cerr << "Usage: sort-test T N\nT: number of trials\nN: size of the vector/list" << endl;
        exit(1);
    }

    int t = atoi(argv[0]);
    int n = atoi(argv[1]);
    if (t <= 0 || n <= 0) {
        cerr << "Invalid arguments" << endl;
        exit(1);
    }

    TSampler listSampler;
    TSampler pooledListSampler;
    TSampler vectorSampler;
    TVector input;
    input.resize(n);
    for (int trial = 0; trial < t; ++trial) {
        FillSamples(&input[0], n);
        RunSort<TList>(listSampler, &input[0], n);
        RunSort<TPooledList>(pooledListSampler, &input[0], n);
        RunSort<TVector>(vectorSampler, &input[0], n);
    }

    cout << "List:        " << listSampler.GetMean()
         << " ± " << listSampler.GetConfidenceRadius()
         << " (95% confidence)" << endl;
    cout << "Pooled list: " << pooledListSampler.GetMean()
         << " ± " << pooledListSampler.GetConfidenceRadius()
         << " (95% confidence)" << endl;
    cout << "Vector:      " << vectorSampler.GetMean()
         << " ± " << vectorSampler.GetConfidenceRadius()
         << " (95% confidence)" << endl;
}

On my 2013 MacBook Air:

[03:04:59 dev]$ ./sort-test 10 10000
List:        0.00082735 ± 0.000189312 (95% confidence)
Pooled list: 0.00091835 ± 0.000201482 (95% confidence)
Vector:      0.000484268 ± 0.000107911 (95% confidence)
[03:05:03 dev]$ ./sort-test 10 10000
List:        0.000764033 ± 0.000188491 (95% confidence)
Pooled list: 0.00103925 ± 0.00038322 (95% confidence)
Vector:      0.000482046 ± 0.000111277 (95% confidence)
[03:05:08 dev]$ ./sort-test 30 10000
List:        0.000747641 ± 6.93474e-05 (95% confidence)
Pooled list: 0.000912401 ± 9.43897e-05 (95% confidence)
Vector:      0.000526488 ± 9.34585e-05 (95% confidence)
[03:05:13 dev]$ ./sort-test 30 10000
List:        0.000810124 ± 8.71386e-05 (95% confidence)
Pooled list: 0.000946477 ± 0.000115813 (95% confidence)
Vector:      0.000495523 ± 5.44617e-05 (95% confidence)
[03:05:18 dev]$ ./sort-test 30 100000
List:        0.0107134 ± 0.00110686 (95% confidence)
Pooled list: 0.018314 ± 0.00102368 (95% confidence)
Vector:      0.00583989 ± 0.000514794 (95% confidence)

Why is this the case? One reason may be that memory locality is actually worse when using a memory pool because the list nodes are allocated using normal operator ::new, but then I have to ask how one would use fast_pool_allocator properly.

Answers


Using https://github.com/rmartinho/nonius instead for the benchmarking, I get these results:

libstdc++, gcc 5, -O3
clock resolution: mean is 75.9223 ns (5120002 iterations)

benchmarking list
collecting 100 samples, 1 iterations each, in estimated 381.414 ms
mean: 479.875 μs, lb 448.911 μs, ub 629.948 μs, ci 0.95
std dev: 300.223 μs, lb 13.0769 μs, ub 715.676 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is severely inflated by outliers

benchmarking pooledList
collecting 100 samples, 1 iterations each, in estimated 432.377 ms
mean: 326.049 μs, lb 311.569 μs, ub 393.567 μs, ci 0.95
std dev: 136.228 μs, lb 13.3412 μs, ub 323.482 μs, ci 0.95
found 4 outliers among 100 samples (4%)
variance is severely inflated by outliers

benchmarking vector
collecting 100 samples, 1 iterations each, in estimated 311.1 ms
mean: 82.4482 μs, lb 81.3539 μs, ub 84.4259 μs, ci 0.95
std dev: 7.31431 μs, lb 4.91517 μs, ub 12.4147 μs, ci 0.95
found 20 outliers among 100 samples (20%)
variance is severely inflated by outliers

benchmarking boost.slist
collecting 100 samples, 1 iterations each, in estimated 400.889 ms
mean: 492.731 μs, lb 489.809 μs, ub 495.912 μs, ci 0.95
std dev: 15.5061 μs, lb 13.6999 μs, ub 17.8827 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is moderately inflated by outliers

libstdc++, clang 3.6, -O3
clock resolution: mean is 75.8616 ns (5120002 iterations)

benchmarking list
collecting 100 samples, 1 iterations each, in estimated 390.905 ms
mean: 488.722 μs, lb 475.528 μs, ub 532.547 μs, ci 0.95
std dev: 109.262 μs, lb 33.3498 μs, ub 242.045 μs, ci 0.95
found 3 outliers among 100 samples (3%)
variance is severely inflated by outliers

benchmarking pooledList
collecting 100 samples, 1 iterations each, in estimated 460.04 ms
mean: 371.775 μs, lb 359.049 μs, ub 421.965 μs, ci 0.95
std dev: 119.361 μs, lb 12.3961 μs, ub 283.336 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is severely inflated by outliers

benchmarking vector
collecting 100 samples, 1 iterations each, in estimated 312.925 ms
mean: 74.4602 μs, lb 71.1761 μs, ub 80.5051 μs, ci 0.95
std dev: 21.8959 μs, lb 14.3879 μs, ub 36.7203 μs, ci 0.95
found 18 outliers among 100 samples (18%)
variance is severely inflated by outliers

benchmarking boost.slist
collecting 100 samples, 1 iterations each, in estimated 412.003 ms
mean: 542.015 μs, lb 538.517 μs, ub 548.913 μs, ci 0.95
std dev: 24.0988 μs, lb 14.6781 μs, ub 46.3466 μs, ci 0.95
found 4 outliers among 100 samples (4%)
variance is moderately inflated by outliers

libc++, clang 3.6, -O3
clock resolution: mean is 80.7181 ns (5120002 iterations)

benchmarking list
collecting 100 samples, 1 iterations each, in estimated 379.471 ms
mean: 245.714 μs, lb 234.609 μs, ub 296.895 μs, ci 0.95
std dev: 103.979 μs, lb 11.1603 μs, ub 246.781 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is severely inflated by outliers

benchmarking pooledList
collecting 100 samples, 1 iterations each, in estimated 403.439 ms
mean: 196.096 μs, lb 194.302 μs, ub 198.409 μs, ci 0.95
std dev: 10.3566 μs, lb 8.65354 μs, ub 14.8478 μs, ci 0.95
found 1 outliers among 100 samples (1%)
variance is severely inflated by outliers

benchmarking vector
collecting 100 samples, 1 iterations each, in estimated 328.543 ms
mean: 13.8026 μs, lb 13.2373 μs, ub 15.107 μs, ci 0.95
std dev: 4.12654 μs, lb 2.22994 μs, ub 8.16186 μs, ci 0.95
found 23 outliers among 100 samples (23%)
variance is severely inflated by outliers

benchmarking boost.slist
collecting 100 samples, 1 iterations each, in estimated 419.92 ms
mean: 552.445 μs, lb 544.444 μs, ub 568.603 μs, ci 0.95
std dev: 55.5666 μs, lb 31.562 μs, ub 94.2535 μs, ci 0.95
found 5 outliers among 100 samples (5%)
variance is severely inflated by outliers

Code
#include <boost/pool/pool_alloc.hpp>
#include <boost/container/slist.hpp>

#include <nonius/benchmark.h++>
#include <forward_list>
#include <iostream>
#include <fstream>
#include <vector>

void FillSamples(int *samples, size_t size) {
    std::ifstream r("/dev/urandom");
    r.read(reinterpret_cast<char *>(samples), size * sizeof(*samples));
}

using TList       = std::forward_list<int>;
using TPooledList = std::forward_list<int, boost::fast_pool_allocator<int>>;
using TBoostList  = boost::container::slist<int, boost::fast_pool_allocator<int>>;
using TVector     = std::vector<int>;

void InitContainer(TVector &xs, int *input, size_t size) {
    xs.reserve(size);
    copy(input, input + size, back_inserter(xs));
}

template <typename SomeList>
void InitContainer(SomeList &xs, int *input, size_t size) {
    for (int i = size - 1; i >= 0; --i) {
        xs.push_front(input[i]);
    }
}

template <typename SomeList>
void Sort(SomeList &xs) {
    xs.sort();
}

void Sort(TVector &xs) {
    sort(xs.begin(), xs.end());
}

#ifndef VECSIZE
#define VECSIZE 10000
#endif

template<typename TContainer>
void RunSort(nonius::chronometer &sampler, int *input, size_t size) {
    TContainer xs;
    InitContainer(xs, input, size);
    FillSamples(&input[0], VECSIZE);

    sampler.measure([&]{ 
       Sort(xs); 
   });
}

NONIUS_BENCHMARK("list", [](nonius::chronometer cm) {
    TVector input;
    input.resize(VECSIZE);

    RunSort<TList>(cm, &input[0], VECSIZE);
})

NONIUS_BENCHMARK("pooledList", [](nonius::chronometer cm) {
    TVector input;
    input.resize(VECSIZE);

    RunSort<TPooledList>(cm, &input[0], VECSIZE);
})

NONIUS_BENCHMARK("vector", [](nonius::chronometer cm) {
    TVector input;
    input.resize(VECSIZE);

    RunSort<TVector>(cm, &input[0], VECSIZE);
})

NONIUS_BENCHMARK("boost.slist", [](nonius::chronometer cm) {
    TVector input;
    input.resize(VECSIZE);

    RunSort<TBoostList>(cm, &input[0], VECSIZE);
})

#include <nonius/main.h++>

Need Your Help

How to like a facebook post from android app

android facebook permissions facebook-like publish

I have implemented the login authentication in my app, i know how to share, but im getting crazy about the like, because im never getting a positive response. So...how can i like a post from my and...

does a wrong src in the img tag overloads the server?

html lighttpd performance

I have this on a very big number of pages: src="//avatar/images/...";