AutoPas  3.0.0
Loading...
Searching...
No Matches
SlicedBalancedBasedTraversal.h
Go to the documentation of this file.
1
7#pragma once
8
9#include <array>
10#include <vector>
11
12#include "BalancedTraversal.h"
14#include "autopas/utils/Timer.h"
16
17namespace autopas {
18
29template <class ParticleCell, class Functor>
30class SlicedBalancedBasedTraversal : public SlicedLockBasedTraversal<ParticleCell, Functor>, public BalancedTraversal {
31 public:
36 explicit SlicedBalancedBasedTraversal(const std::array<unsigned long, 3> &dims, Functor *functor,
37 const double interactionLength, const std::array<double, 3> &cellLength,
38 DataLayoutOption dataLayout, bool useNewton3, bool spaciallyForward)
39 : SlicedLockBasedTraversal<ParticleCell, Functor>(dims, functor, interactionLength, cellLength, dataLayout,
40 useNewton3, spaciallyForward) {
41 // As we create exactly one slice per thread, dynamic scheduling makes little sense.
42 this->_dynamic = false;
43 }
44
49 void initSliceThickness(unsigned long minSliceThickness) override {
50 // make thicknesses are empty
51 this->_sliceThickness.clear();
52
53 // estimate loads along longest axis
54 auto maxDimension = this->_dimsPerLength[0];
55 auto maxDimensionLength = this->_cellsPerDimension[this->_dimsPerLength[0]];
56
57 std::vector<unsigned long> loads;
58 utils::Timer timer;
59 timer.start();
60 loads.resize(maxDimensionLength);
61 AUTOPAS_OPENMP(parallel for schedule(static, 1))
62 for (auto x = 0; x < maxDimensionLength; x++) {
63 std::array<unsigned long, 3> lowerCorner = {0, 0, 0};
64 std::array<unsigned long, 3> upperCorner = this->_cellsPerDimension;
65 // upper corner is inclusive, so subtract 1 from each coordinate
66 --upperCorner[0];
67 --upperCorner[1];
68 --upperCorner[2];
69 lowerCorner[maxDimension] = x;
70 upperCorner[maxDimension] = x;
71 if (not this->_loadEstimator) {
73 "AutoPas internal error: SlicedBalancedBasedTraversal's _loadEstimator is null.");
74 }
75 auto load = this->_loadEstimator(this->_cellsPerDimension, lowerCorner, upperCorner);
76 loads[x] = load;
77 }
78 for (auto i = 1; i < loads.size(); i++) {
79 loads[i] += loads[i - 1];
80 }
81 auto fullLoad = loads.back();
82 auto loadEstimationTime = timer.stop();
83 AutoPasLog(DEBUG, "load estimation took {} nanoseconds", loadEstimationTime);
84
85 auto numSlices = (size_t)autopas_get_max_threads();
86 AutoPasLog(DEBUG, "{} threads available.", numSlices);
87 // using greedy algorithm to assign slice thicknesses. May lead to less slices being used.
88 unsigned int totalThickness = 0;
89 // avg load per slice
90 auto avg = fullLoad / numSlices;
91 auto lastLoad = 0;
92 for (auto s = 0; s < numSlices; s++) {
93 unsigned int thickness;
94 if (s == numSlices - 1) {
95 thickness = maxDimensionLength - totalThickness;
96 } else {
97 thickness = minSliceThickness;
98 while (totalThickness + thickness + 1 < maxDimensionLength and
99 loads[totalThickness + thickness - 1] - lastLoad < avg) {
100 auto load1 = loads[totalThickness + thickness - 1] - lastLoad;
101 auto load2 = loads[totalThickness + thickness] - lastLoad;
102 // if (abs(avg-load1) < abs(avg-load2))
103 // doing this manually as we are using unsigned longs and would have to cast otherwise
104 if (((avg > load1) ? (avg - load1) : (load1 - avg)) < ((avg > load2) ? (avg - load2) : (load2 - avg))) {
105 break;
106 }
107 thickness++;
108 }
109 }
110 if (totalThickness + thickness > maxDimensionLength || thickness < minSliceThickness) {
111 // if minSlicethickness can no longer be satisfied, add remaining space to last slice
112 this->_sliceThickness[s - 1] += maxDimensionLength - totalThickness;
113 AutoPasLog(DEBUG, "Balanced Sliced traversal only using {} threads because of greedy algorithm.", s);
114 numSlices = s;
115 break;
116
117 } else {
118 totalThickness += thickness;
120 this->_sliceThickness.push_back(thickness);
121 if (s != numSlices - 1) {
122 // avg of remaining load over remaining threads
123 avg = (fullLoad - loads[totalThickness - 1]) / (numSlices - s - 1);
124 lastLoad = loads[totalThickness - 1];
125 }
126 }
127 }
128 std::string thicknessStr;
129 std::string loadStr;
130 lastLoad = 0;
131 totalThickness = 0;
132 for (auto t : this->_sliceThickness) {
133 thicknessStr += std::to_string(t) + ", ";
134 totalThickness += t;
135 loadStr += std::to_string(loads[totalThickness - 1] - lastLoad) + ", ";
136 lastLoad = loads[totalThickness - 1];
137 }
138
139 // some analysis output that is only relevant when logger is set to debug
140 if (autopas::Logger::get()->level() <= autopas::Logger::LogLevel::debug) {
141 std::string thicknessStr;
142 std::string loadStr;
143 auto lastLoad = 0;
144 totalThickness = 0;
145 for (auto t : this->_sliceThickness) {
146 thicknessStr += std::to_string(t) + ", ";
147 totalThickness += t;
148 loadStr += std::to_string(loads[totalThickness - 1] - lastLoad) + ", ";
149 lastLoad = loads[totalThickness - 1];
150 }
151
153 AutoPasLog(DEBUG, "Slice Thicknesses: [{}]", thicknessStr);
154 AutoPasLog(DEBUG, "Slice loads: [{}]", loadStr);
155 }
156
157 if (this->_spaciallyForward) {
158 // decreases last _sliceThickness by _overlapLongestAxis to account for the way we handle base cells
159 this->_sliceThickness.back() -= this->_overlapLongestAxis;
160 }
161 }
162};
163
164} // namespace autopas
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
#define AUTOPAS_OPENMP(args)
Empty macro to throw away any arguments.
Definition: WrapOpenMP.h:126
Base class for traversals utilising load balancing.
Definition: BalancedTraversal.h:19
EstimatorFunction _loadEstimator
Algorithm to use for estimating load.
Definition: BalancedTraversal.h:41
std::array< unsigned long, 3 > _cellsPerDimension
The dimensions of the cellblock.
Definition: CellTraversal.h:55
Functor base class.
Definition: Functor.h:40
static auto get()
Get a pointer to the actual logger object.
Definition: Logger.h:90
Class for Cells of Particles.
Definition: ParticleCell.h:51
This class provides a load balanced version of the base sliced traversal.
Definition: SlicedBalancedBasedTraversal.h:30
void initSliceThickness(unsigned long minSliceThickness) override
Calculates slice thickness according to estimates loads.
Definition: SlicedBalancedBasedTraversal.h:49
SlicedBalancedBasedTraversal(const std::array< unsigned long, 3 > &dims, Functor *functor, const double interactionLength, const std::array< double, 3 > &cellLength, DataLayoutOption dataLayout, bool useNewton3, bool spaciallyForward)
Constructor of the balanced sliced traversal.
Definition: SlicedBalancedBasedTraversal.h:36
std::array< int, 3 > _dimsPerLength
Store ids of dimensions ordered by number of cells per dimensions.
Definition: SlicedBasedTraversal.h:147
std::vector< unsigned long > _sliceThickness
The number of cells per slice in the dimension that was sliced.
Definition: SlicedBasedTraversal.h:157
bool _spaciallyForward
Whether the base step only covers neigboring cells tha are spacially forward (for example c08).
Definition: SlicedBasedTraversal.h:162
unsigned long _overlapLongestAxis
Overlap of interacting cells along the longest axis.
Definition: SlicedBasedTraversal.h:152
This class provides the sliced traversal.
Definition: SlicedLockBasedTraversal.h:32
bool _dynamic
whether to use static or dynamic scheduling.
Definition: SlicedLockBasedTraversal.h:48
static void exception(const Exception e)
Handle an exception derived by std::exception.
Definition: ExceptionHandler.h:63
Timer class to stop times.
Definition: Timer.h:20
void start()
start the timer.
Definition: Timer.cpp:17
long stop()
Stops the timer and returns the time elapsed in nanoseconds since the last call to start.
Definition: Timer.cpp:25
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144