1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
| #include <iostream> #include <vector> #include <thread> #include <cmath> #include <algorithm> #include <numeric> #include <chrono>
template<typename T, typename BinaryOp> T serial_reduce(const std::vector<T>& data, T init, BinaryOp op) { T result = init; for (const auto& item : data) { result = op(result, item); } return result; }
template<typename T, typename BinaryOp> T parallel_reduce(const std::vector<T>& data, T init, BinaryOp op, int num_threads) { int n = data.size(); if (n == 0) return init; std::vector<T> partial_results(num_threads, init); std::vector<std::thread> threads; int elements_per_thread = std::ceil(static_cast<double>(n) / num_threads); for (int i = 0; i < num_threads; i++) { threads.emplace_back([i, &data, &partial_results, &op, elements_per_thread, n]() { int start = i * elements_per_thread; int end = std::min((i + 1) * elements_per_thread, n); if (start < n) { T local_result = data[start]; for (int j = start + 1; j < end; j++) { local_result = op(local_result, data[j]); } partial_results[i] = local_result; } }); } for (auto& thread : threads) { thread.join(); } T final_result = init; for (const auto& partial : partial_results) { final_result = op(final_result, partial); } return final_result; }
template<typename T, typename BinaryOp> T tree_reduce(const std::vector<T>& data, int start, int end, T init, BinaryOp op) { if (start == end) return init; if (start + 1 == end) return data[start]; int mid = (start + end) / 2; T left = tree_reduce(data, start, mid, init, op); T right = tree_reduce(data, mid, end, init, op); return op(left, right); }
int main() { const int size = 1000000; std::vector<int> data(size); for (int i = 0; i < size; i++) { data[i] = i % 100; } auto start = std::chrono::high_resolution_clock::now(); int serial_sum = serial_reduce(data, 0, [](int a, int b) { return a + b; }); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> serial_duration = end - start; start = std::chrono::high_resolution_clock::now(); int parallel_sum = parallel_reduce(data, 0, [](int a, int b) { return a + b; }, 4); end = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> parallel_duration = end - start; start = std::chrono::high_resolution_clock::now(); int tree_sum = tree_reduce(data, 0, data.size(), 0, [](int a, int b) { return a + b; }); end = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> tree_duration = end - start; std::cout << "Serial reduction result: " << serial_sum << ", time: " << serial_duration.count() << "s\n"; std::cout << "Parallel reduction result: " << parallel_sum << ", time: " << parallel_duration.count() << "s\n"; std::cout << "Tree reduction result: " << tree_sum << ", time: " << tree_duration.count() << "s\n"; return 0; }
|