array<int, SIZE> t
如前所述,C++17 您可使用孔径。
vector<int> countBits(int num) {
static constexpr int SIZE = 100000;
static constexpr array<int, SIZE> t {[]() constexpr {
constexpr uint32_t size = SIZE;
array<int, size> v{};
for (int i = 0; i < size; i++)
v[i] = v[i>>1] + (i & 1); // or simply v[i] = __builtin_popcount(i);
return v;}()};
vector<int> v(t.begin(), t.begin() + num + 1);
return v;
}
然而,你必须使用++阵列类型。
int t[SIZE]
如果你真想使用一个C阵列int [SIZE]
,不同于array<int, SIZE>
宣布全球阵列,然后计算主体内的数值,以便在汇编时间建立静态阵列:
int w[100000] = {0};
vector<int> countBits(int num) {
vector<int> v(w, w + num + 1);
return v;
}
int main(void) {
for (int i = 0; i < 100000; i++)
w[i] = __builtin_popcount(i);
}
Results
运行时间的产出(确实是:)
OK ( 591 cycles) 0,1,1, -> 0,1,1,
OK ( 453 cycles) 0,1,1,2,1,2, -> 0,1,1,2,1,2,
OK ( 455 cycles) 0,1,1,2,1,2,2,3,1,2,... -> 0,1,1,2,1,2,2,3,1,2,...
2. 与科斯特阵列的平均产出:
OK ( 1 cycles) 0,1,1, -> 0,1,1,
OK ( 2 cycles) 0,1,1,2,1,2, -> 0,1,1,2,1,2,
OK ( 24 cycles) 0,1,1,2,1,2,2,3,1,2,... -> 0,1,1,2,1,2,2,3,1,2,...
采用第二种方法的平均产出(在我们摆脱C++阵列的间接费用时,略为加快):
OK ( 0 cycles) 0,1,1, -> 0,1,1,
OK ( 1 cycles) 0,1,1,2,1,2, -> 0,1,1,2,1,2,
OK ( 23 cycles) 0,1,1,2,1,2,2,3,1,2,... -> 0,1,1,2,1,2,2,3,1,2,...
Benchmark
我的基准是:
#include <vector>
#include <string>
#include <cstdint>
#include <array>
#include <iostream>
#include <ctime>
#include <iterator>
#include <sstream>
using namespace std;
vector<int> nums = {2, 5};
vector<vector<int>> expected = {{0,1,1}, {0,1,1,2,1,2}}; // feel free to add more tests
for (int i = 0; i < expected.size(); i++) {
clock_t start = clock();
vector<int> res = countBits(nums[i]);
double elapsedTime = (clock() - start);
printf("%s 33[30m(%4.0lf cycles) 33[0m %s -> %s
", (expected[i] == res) ? " 33[34mOK" : " 33[31mKO", elapsedTime, toString(res).c_str(), toString(expected[i]).c_str());
}