Skip to content

Commit 1b70062

Browse files
authored
some more optimizations (elalish#503)
* simple speedup from stl algorithms * fix build * std::list allocator optimization * added documentation * fix weird formatting issues * try fixing MacOS build it seems that Apple Clang 14 does not support pstl * fix again * fix macos build * fix apple * clean up using macro * fix apple? * small fix... * fix apple (again...)
1 parent 84432c9 commit 1b70062

File tree

3 files changed

+124
-37
lines changed

3 files changed

+124
-37
lines changed

src/collider/src/collider.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,8 @@ SparseIndices Collider::Collisions(const VecDH<T>& queriesIn) const {
287287
{thrust::pair<int*, int*>(nullptr, nullptr), counts.ptrD(),
288288
nodeBBox_.ptrD(), internalChildren_.ptrD()}));
289289
// compute start index for each query and total count
290-
exclusive_scan(policy, counts.begin(), counts.end(), counts.begin());
290+
exclusive_scan(policy, counts.begin(), counts.end(), counts.begin(), 0,
291+
std::plus<int>());
291292
SparseIndices queryTri(counts.back());
292293
// actually recording collisions
293294
for_each_n(policy, zip(queriesIn.cbegin(), countAt(0)), queriesIn.size(),

src/polygon/src/polygon.cpp

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,14 @@
1515
#include "polygon.h"
1616

1717
#include <algorithm>
18+
#if MANIFOLD_PAR == 'T'
19+
#include <execution>
20+
#endif
1821
#include <list>
1922
#include <map>
23+
#if !__APPLE__
24+
#include <memory_resource>
25+
#endif
2026
#include <queue>
2127
#include <set>
2228
#include <stack>
@@ -303,14 +309,25 @@ class Monotones {
303309

304310
private:
305311
struct VertAdj;
306-
typedef std::list<VertAdj>::iterator VertItr;
307312
struct EdgePair;
308-
typedef std::list<EdgePair>::iterator PairItr;
309313
enum VertType { Start, WestSide, EastSide, Merge, End, Skip };
314+
#if __APPLE__
315+
typedef std::list<VertAdj>::iterator VertItr;
316+
typedef std::list<EdgePair>::iterator PairItr;
310317

311-
std::list<VertAdj> monotones_; // sweep-line list of verts
312-
std::list<EdgePair> activePairs_; // west to east list of monotone edge pairs
318+
std::list<VertAdj> monotones_; // sweep-line list of verts
319+
std::list<EdgePair> activePairs_; // west to east monotone edges
313320
std::list<EdgePair> inactivePairs_; // completed monotones
321+
#else
322+
typedef std::pmr::list<VertAdj>::iterator VertItr;
323+
typedef std::pmr::list<EdgePair>::iterator PairItr;
324+
325+
std::pmr::monotonic_buffer_resource mbr;
326+
std::pmr::polymorphic_allocator<int> pa{&mbr};
327+
std::pmr::list<VertAdj> monotones_{pa}; // sweep-line list of verts
328+
std::pmr::list<EdgePair> activePairs_{pa}; // west to east monotone edges
329+
std::pmr::list<EdgePair> inactivePairs_{pa}; // completed monotones
330+
#endif
314331
float precision_; // a triangle of this height or less is degenerate
315332

316333
/**
@@ -392,7 +409,7 @@ class Monotones {
392409
class Triangulator {
393410
public:
394411
Triangulator(VertItr vert, float precision) : precision_(precision) {
395-
reflex_chain_.push(vert);
412+
reflex_chain_.push_back(vert);
396413
other_side_ = vert;
397414
}
398415
int NumTriangles() const { return triangles_output_; }
@@ -407,14 +424,14 @@ class Monotones {
407424
*/
408425
void ProcessVert(const VertItr vi, bool onRight, bool last,
409426
std::vector<glm::ivec3> &triangles) {
410-
VertItr v_top = reflex_chain_.top();
427+
VertItr v_top = reflex_chain_.back();
411428
if (reflex_chain_.size() < 2) {
412-
reflex_chain_.push(vi);
429+
reflex_chain_.push_back(vi);
413430
onRight_ = onRight;
414431
return;
415432
}
416-
reflex_chain_.pop();
417-
VertItr vj = reflex_chain_.top();
433+
reflex_chain_.pop_back();
434+
VertItr vj = reflex_chain_.back();
418435
if (onRight_ == onRight && !last) {
419436
// This only creates enough triangles to ensure the reflex chain is
420437
// still reflex.
@@ -423,13 +440,13 @@ class Monotones {
423440
while (ccw == (onRight_ ? 1 : -1) || ccw == 0) {
424441
AddTriangle(triangles, vi, vj, v_top);
425442
v_top = vj;
426-
reflex_chain_.pop();
443+
reflex_chain_.pop_back();
427444
if (reflex_chain_.empty()) break;
428-
vj = reflex_chain_.top();
445+
vj = reflex_chain_.back();
429446
ccw = CCW(vi->pos, vj->pos, v_top->pos, precision_);
430447
}
431-
reflex_chain_.push(v_top);
432-
reflex_chain_.push(vi);
448+
reflex_chain_.push_back(v_top);
449+
reflex_chain_.push_back(vi);
433450
} else {
434451
// This branch empties the reflex chain and switches sides. It must be
435452
// used for the last point, as it will output all the triangles
@@ -438,19 +455,19 @@ class Monotones {
438455
onRight_ = !onRight_;
439456
VertItr v_last = v_top;
440457
while (!reflex_chain_.empty()) {
441-
vj = reflex_chain_.top();
458+
vj = reflex_chain_.back();
442459
AddTriangle(triangles, vi, v_last, vj);
443460
v_last = vj;
444-
reflex_chain_.pop();
461+
reflex_chain_.pop_back();
445462
}
446-
reflex_chain_.push(v_top);
447-
reflex_chain_.push(vi);
463+
reflex_chain_.push_back(v_top);
464+
reflex_chain_.push_back(vi);
448465
other_side_ = v_top;
449466
}
450467
}
451468

452469
private:
453-
std::stack<VertItr> reflex_chain_;
470+
std::vector<VertItr> reflex_chain_;
454471
VertItr other_side_; // The end vertex across from the reflex chain
455472
bool onRight_; // The side the reflex chain is on
456473
int triangles_output_ = 0;
@@ -784,7 +801,11 @@ class Monotones {
784801
starts.push_back(v);
785802
}
786803
}
804+
#if MANIFOLD_PAR == 'T' && !(__APPLE__)
805+
std::sort(std::execution::par_unseq, starts.begin(), starts.end(), cmp);
806+
#else
787807
std::sort(starts.begin(), starts.end(), cmp);
808+
#endif
788809

789810
std::vector<VertItr> skipped;
790811
VertItr insertAt = monotones_.begin();

src/utilities/include/par.h

Lines changed: 83 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
#define MANIFOLD_PAR_NS omp
3131
#elif MANIFOLD_PAR == 'T'
3232
#include <thrust/system/tbb/execution_policy.h>
33+
34+
#include <algorithm>
35+
#include <execution>
3336
#define MANIFOLD_PAR_NS tbb
3437
#else
3538
#define MANIFOLD_PAR_NS cpp
@@ -136,43 +139,105 @@ inline ExecutionPolicy autoPolicy(int size) {
136139
thrust::NAME(thrust::cpp::par, args...); \
137140
}
138141

142+
#if MANIFOLD_PAR == 'T' && !(__APPLE__)
143+
// sometimes stl variant is faster
144+
#define STL_DYNAMIC_BACKEND(NAME, RET) \
145+
template <typename Ret = RET, typename... Args> \
146+
Ret NAME(ExecutionPolicy policy, Args... args) { \
147+
switch (policy) { \
148+
case ExecutionPolicy::ParUnseq: \
149+
case ExecutionPolicy::Par: \
150+
return std::NAME(std::execution::par_unseq, args...); \
151+
case ExecutionPolicy::Seq: \
152+
break; \
153+
} \
154+
return std::NAME(args...); \
155+
}
156+
#define STL_DYNAMIC_BACKEND_VOID(NAME) \
157+
template <typename... Args> \
158+
void NAME(ExecutionPolicy policy, Args... args) { \
159+
switch (policy) { \
160+
case ExecutionPolicy::ParUnseq: \
161+
case ExecutionPolicy::Par: \
162+
std::NAME(std::execution::par_unseq, args...); \
163+
break; \
164+
case ExecutionPolicy::Seq: \
165+
std::NAME(args...); \
166+
break; \
167+
} \
168+
}
169+
170+
template <typename... Args>
171+
void exclusive_scan(ExecutionPolicy policy, Args... args) {
172+
// https://github.com/llvm/llvm-project/issues/59810
173+
std::exclusive_scan(args...);
174+
}
175+
template <typename DerivedPolicy, typename InputIterator1,
176+
typename InputIterator2, typename OutputIterator, typename Predicate>
177+
OutputIterator copy_if(ExecutionPolicy policy, InputIterator1 first,
178+
InputIterator1 last, InputIterator2 stencil,
179+
OutputIterator result, Predicate pred) {
180+
if (policy == ExecutionPolicy::Seq)
181+
return thrust::copy_if(thrust::cpp::par, first, last, stencil, result,
182+
pred);
183+
else
184+
// note: this is not a typo, see
185+
// https://github.com/NVIDIA/thrust/issues/1977
186+
return thrust::copy_if(first, last, stencil, result, pred);
187+
}
188+
template <typename DerivedPolicy, typename InputIterator1,
189+
typename OutputIterator, typename Predicate>
190+
OutputIterator copy_if(ExecutionPolicy policy, InputIterator1 first,
191+
InputIterator1 last, OutputIterator result,
192+
Predicate pred) {
193+
if (policy == ExecutionPolicy::Seq)
194+
return std::copy_if(first, last, result, pred);
195+
else
196+
return std::copy_if(std::execution::par_unseq, first, last, result, pred);
197+
}
198+
#else
199+
#define STL_DYNAMIC_BACKEND(NAME, RET) THRUST_DYNAMIC_BACKEND(NAME, RET)
200+
#define STL_DYNAMIC_BACKEND_VOID(NAME) THRUST_DYNAMIC_BACKEND_VOID(NAME)
201+
202+
THRUST_DYNAMIC_BACKEND_VOID(exclusive_scan)
203+
THRUST_DYNAMIC_BACKEND(copy_if, void)
204+
#endif
205+
139206
THRUST_DYNAMIC_BACKEND_HOST_VOID(for_each)
140207
THRUST_DYNAMIC_BACKEND_HOST_VOID(for_each_n)
141208

142209
THRUST_DYNAMIC_BACKEND_VOID(gather)
143210
THRUST_DYNAMIC_BACKEND_VOID(scatter)
144211
THRUST_DYNAMIC_BACKEND_VOID(for_each)
145212
THRUST_DYNAMIC_BACKEND_VOID(for_each_n)
146-
THRUST_DYNAMIC_BACKEND_VOID(sort)
147-
THRUST_DYNAMIC_BACKEND_VOID(stable_sort)
148-
THRUST_DYNAMIC_BACKEND_VOID(fill)
149213
THRUST_DYNAMIC_BACKEND_VOID(sequence)
150214
THRUST_DYNAMIC_BACKEND_VOID(sort_by_key)
151215
THRUST_DYNAMIC_BACKEND_VOID(stable_sort_by_key)
152-
THRUST_DYNAMIC_BACKEND_VOID(copy)
153216
THRUST_DYNAMIC_BACKEND_VOID(transform)
154-
THRUST_DYNAMIC_BACKEND_VOID(inclusive_scan)
155-
THRUST_DYNAMIC_BACKEND_VOID(exclusive_scan)
156217
THRUST_DYNAMIC_BACKEND_VOID(uninitialized_fill)
157218
THRUST_DYNAMIC_BACKEND_VOID(uninitialized_copy)
219+
THRUST_DYNAMIC_BACKEND_VOID(stable_sort)
220+
THRUST_DYNAMIC_BACKEND_VOID(fill)
221+
THRUST_DYNAMIC_BACKEND_VOID(copy)
222+
THRUST_DYNAMIC_BACKEND_VOID(inclusive_scan)
158223
THRUST_DYNAMIC_BACKEND_VOID(copy_n)
224+
STL_DYNAMIC_BACKEND_VOID(sort)
159225

160-
THRUST_DYNAMIC_BACKEND(all_of, bool)
161-
THRUST_DYNAMIC_BACKEND(is_sorted, bool)
162-
THRUST_DYNAMIC_BACKEND(reduce, void)
163-
THRUST_DYNAMIC_BACKEND(count_if, int)
164-
THRUST_DYNAMIC_BACKEND(binary_search, bool)
165226
// void implies that the user have to specify the return type in the template
166227
// argument, as we are unable to deduce it
228+
THRUST_DYNAMIC_BACKEND(transform_reduce, void)
229+
THRUST_DYNAMIC_BACKEND(gather_if, void)
230+
THRUST_DYNAMIC_BACKEND(reduce_by_key, void)
231+
THRUST_DYNAMIC_BACKEND(lower_bound, void)
167232
THRUST_DYNAMIC_BACKEND(remove, void)
168-
THRUST_DYNAMIC_BACKEND(copy_if, void)
169-
THRUST_DYNAMIC_BACKEND(remove_if, void)
170-
THRUST_DYNAMIC_BACKEND(unique, void)
171233
THRUST_DYNAMIC_BACKEND(find, void)
172234
THRUST_DYNAMIC_BACKEND(find_if, void)
173-
THRUST_DYNAMIC_BACKEND(reduce_by_key, void)
174-
THRUST_DYNAMIC_BACKEND(transform_reduce, void)
175-
THRUST_DYNAMIC_BACKEND(lower_bound, void)
176-
THRUST_DYNAMIC_BACKEND(gather_if, void)
235+
THRUST_DYNAMIC_BACKEND(all_of, bool)
236+
THRUST_DYNAMIC_BACKEND(is_sorted, bool)
237+
THRUST_DYNAMIC_BACKEND(reduce, void)
238+
THRUST_DYNAMIC_BACKEND(count_if, int)
239+
THRUST_DYNAMIC_BACKEND(binary_search, bool)
240+
STL_DYNAMIC_BACKEND(remove_if, void)
241+
STL_DYNAMIC_BACKEND(unique, void)
177242

178243
} // namespace manifold

0 commit comments

Comments
 (0)