AutoPas  3.0.0
Loading...
Searching...
No Matches
SlicedLockBasedTraversal.h
Go to the documentation of this file.
1
8#pragma once
9
10#include <numeric>
11
14#include "autopas/utils/Timer.h"
16
17namespace autopas {
18
31template <class ParticleCell, class Functor>
32class SlicedLockBasedTraversal : public SlicedBasedTraversal<ParticleCell, Functor> {
33 public:
38 explicit SlicedLockBasedTraversal(const std::array<unsigned long, 3> &dims, Functor *functor,
39 const double interactionLength, const std::array<double, 3> &cellLength,
40 DataLayoutOption dataLayout, bool useNewton3, bool spaciallyForward)
41 : SlicedBasedTraversal<ParticleCell, Functor>(dims, functor, interactionLength, cellLength, dataLayout,
42 useNewton3, spaciallyForward) {}
43
44 protected:
48 bool _dynamic = true;
49
56 template <typename LoopBody>
57 inline void slicedTraversal(LoopBody &&loopBody);
58};
59
60template <class ParticleCell, class Functor>
61template <typename LoopBody>
63 using std::array;
64
65 auto numSlices = this->_sliceThickness.size();
66 std::vector<AutoPasLock> locks;
67 locks.resize((numSlices - 1) * this->_overlapLongestAxis);
68
69 // 0) check if applicable
70 const auto overLapps23 = [&]() -> std::array<size_t, 2> {
71 if (this->_spaciallyForward) {
72 return {this->_overlap[this->_dimsPerLength[1]], this->_overlap[this->_dimsPerLength[2]]};
73 } else {
74 return {0ul, 0ul};
75 }
76 }();
77
78 std::vector<utils::Timer> timers;
79 std::vector<double> threadTimes;
80
81 timers.resize(numSlices);
82 threadTimes.resize(numSlices);
83
84#ifdef AUTOPAS_USE_OPENMP
85 if (this->_dynamic) {
86 omp_set_schedule(omp_sched_dynamic, 1);
87 } else {
88 omp_set_schedule(omp_sched_static, 1);
89 }
90#endif
91 AUTOPAS_OPENMP(parallel for schedule(runtime))
92 for (size_t slice = 0; slice < numSlices; ++slice) {
93 timers[slice].start();
94 array<unsigned long, 3> myStartArray{0, 0, 0};
95 for (size_t i = 0; i < slice; ++i) {
96 myStartArray[this->_dimsPerLength[0]] += this->_sliceThickness[i];
97 }
98
99 // all but the first slice need to lock their starting layers.
100 const unsigned long lockBaseIndex = (slice - 1) * this->_overlapLongestAxis;
101 if (slice > 0) {
102 for (unsigned long i = 0ul; i < this->_overlapLongestAxis; i++) {
103 locks[lockBaseIndex + i].lock();
104 }
105 }
106 const auto lastLayer = myStartArray[this->_dimsPerLength[0]] + this->_sliceThickness[slice];
107 for (unsigned long sliceOffset = 0ul; sliceOffset < this->_sliceThickness[slice]; ++sliceOffset) {
108 const auto dimSlice = myStartArray[this->_dimsPerLength[0]] + sliceOffset;
109 // at the last layers request lock for the starting layer of the next
110 // slice. Does not apply for the last slice.
111 if (slice != numSlices - 1 and dimSlice >= lastLayer - this->_overlapLongestAxis) {
112 locks[((slice + 1) * this->_overlapLongestAxis) - (lastLayer - dimSlice)].lock();
113 }
114 for (unsigned long dimMedium = 0; dimMedium < this->_cellsPerDimension[this->_dimsPerLength[1]] - overLapps23[0];
115 ++dimMedium) {
116 for (unsigned long dimShort = 0; dimShort < this->_cellsPerDimension[this->_dimsPerLength[2]] - overLapps23[1];
117 ++dimShort) {
118 array<unsigned long, 3> idArray = {};
119 idArray[this->_dimsPerLength[0]] = dimSlice;
120 idArray[this->_dimsPerLength[1]] = dimMedium;
121 idArray[this->_dimsPerLength[2]] = dimShort;
122
123 loopBody(idArray[0], idArray[1], idArray[2]);
124 }
125 }
126 // at the end of the first layers release the lock
127 if (slice > 0 and dimSlice < myStartArray[this->_dimsPerLength[0]] + this->_overlapLongestAxis) {
128 locks[lockBaseIndex + sliceOffset].unlock();
129 // if lastLayer is reached within overlap area, unlock all following locks
130 // this should never be the case if slice thicknesses are set up properly; thickness should always be
131 // greater than the overlap along the longest axis, or the slices won't be processed in parallel.
132 if (dimSlice == lastLayer - 1) {
133 for (unsigned long i = sliceOffset + 1; i < this->_overlapLongestAxis; ++i) {
134 locks[lockBaseIndex + i].unlock();
135 }
136 }
137 } else if (slice != numSlices - 1 and dimSlice == lastLayer - 1) {
138 // clearing of the locks set on the last layers of each slice
139 for (size_t i = (slice * this->_overlapLongestAxis); i < (slice + 1) * this->_overlapLongestAxis; ++i) {
140 locks[i].unlock();
141 }
142 }
143 }
144 threadTimes[slice] = timers[slice].stop();
145 }
146
147 std::string timesStr;
148 for (auto t : threadTimes) {
149 timesStr += std::to_string(t) + ", ";
150 }
151 auto minMax = std::minmax_element(threadTimes.begin(), threadTimes.end());
152 auto avg = std::accumulate(threadTimes.begin(), threadTimes.end(), 0.0) / numSlices;
153 auto variance = std::accumulate(threadTimes.cbegin(), threadTimes.cend(), 0.0,
154 [avg](double a, double b) -> double { return a + std::pow(avg - b, 2.0); }) /
155 numSlices;
156 auto stddev = std::sqrt(variance);
157
158 AutoPasLog(DEBUG, "times per slice: [{}].", timesStr);
159 AutoPasLog(DEBUG, "Difference between longest and shortest time: {:.3G}", *minMax.second - *minMax.first);
160 AutoPasLog(DEBUG, "Ratio between longest and shortest time: {:.3G}", (float)*minMax.second / *minMax.first);
161 AutoPasLog(DEBUG, "avg: {:.3G}, std-deviation: {:.3G} ({:.3G}%)", avg, stddev, 100 * stddev / avg);
162}
163
164} // namespace autopas
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
#define AUTOPAS_OPENMP(args)
Empty macro to throw away any arguments.
Definition: WrapOpenMP.h:126
Functor base class.
Definition: Functor.h:40
Class for Cells of Particles.
Definition: ParticleCell.h:51
This class provides base for locked- and colored sliced traversals.
Definition: SlicedBasedTraversal.h:30
This class provides the sliced traversal.
Definition: SlicedLockBasedTraversal.h:32
bool _dynamic
whether to use static or dynamic scheduling.
Definition: SlicedLockBasedTraversal.h:48
SlicedLockBasedTraversal(const std::array< unsigned long, 3 > &dims, Functor *functor, const double interactionLength, const std::array< double, 3 > &cellLength, DataLayoutOption dataLayout, bool useNewton3, bool spaciallyForward)
Constructor of the sliced traversal.
Definition: SlicedLockBasedTraversal.h:38
void slicedTraversal(LoopBody &&loopBody)
The main traversal of the sliced traversal.
Definition: SlicedLockBasedTraversal.h:62
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32