AutoPas  3.0.0
Loading...
Searching...
No Matches
RemainderPairwiseInteractionHandler.h
Go to the documentation of this file.
1
7#pragma once
8
9#include <array>
10#include <cmath>
11#include <memory>
12#include <mutex>
13#include <vector>
14
21
22namespace autopas {
23
30template <typename Particle_T>
32 public:
38 std::vector<std::vector<std::vector<std::unique_ptr<std::mutex>>>> &spatialLocks)
39 : _spatialLocks(spatialLocks) {}
40
62 template <bool newton3, class ContainerType, class PairwiseFunctor>
63 void computeRemainderInteractions(PairwiseFunctor *f, ContainerType &container,
64 std::vector<FullParticleCell<Particle_T>> &particleBuffers,
65 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers, bool useSoA) {
66 // Balance buffers. This makes processing them with static scheduling quite efficient.
67 // Also, if particles were not inserted in parallel, this enables us to process them in parallel now.
68 // Cost is at max O(2N) worst O(N) per buffer collection and negligible compared to interacting them.
69 auto cellToVec = [](auto &cell) -> std::vector<Particle_T> & { return cell._particles; };
70 utils::ArrayUtils::balanceVectors(particleBuffers, cellToVec);
71 utils::ArrayUtils::balanceVectors(haloParticleBuffers, cellToVec);
72
73 // The following part performs the main remainder traversal. The actual calculation is done in 4 steps carried out
74 // in three helper functions.
75
76 // TraceTimers only run with log level TRACE
77 utils::TraceTimer timerBufferContainer;
78 utils::TraceTimer timerPBufferPBuffer;
79 utils::TraceTimer timerPBufferHBuffer;
80 utils::TraceTimer timerBufferSoAConversion;
81 timerBufferContainer.start();
82
83 // steps 1 & 2.
84 // particleBuffer with all particles close in container
85 // and haloParticleBuffer with owned, close particles in container.
86 // This is always AoS-based because the container particles are found with region iterators,
87 // which don't have an SoA interface.
88 remainderHelperBufferContainerAoS<newton3>(f, container, particleBuffers, haloParticleBuffers);
89
90 timerBufferContainer.stop();
91 timerBufferSoAConversion.start();
92
93 if (useSoA) {
94 // All (halo-)buffer interactions shall happen vectorized, hence, load all buffer data into SoAs
95 for (auto &buffer : particleBuffers) {
96 f->SoALoader(buffer, buffer._particleSoABuffer, 0, false);
97 }
98 for (auto &buffer : haloParticleBuffers) {
99 f->SoALoader(buffer, buffer._particleSoABuffer, 0, false);
100 }
101 }
102
103 timerBufferSoAConversion.stop();
104 timerPBufferPBuffer.start();
105
106 // step 3. particleBuffer with itself and all other buffers
107 remainderHelperBufferBuffer<newton3>(f, particleBuffers, useSoA);
108
109 timerPBufferPBuffer.stop();
110 timerPBufferHBuffer.start();
111
112 // step 4. particleBuffer with haloParticleBuffer
113 remainderHelperBufferHaloBuffer(f, particleBuffers, haloParticleBuffers, useSoA);
114
115 timerPBufferHBuffer.stop();
116 timerBufferSoAConversion.start();
117
118 // unpack particle SoAs. Halo data is not interesting
119 if (useSoA) {
120 for (auto &buffer : particleBuffers) f->SoAExtractor(buffer, buffer._particleSoABuffer, 0);
121 }
122
123 timerBufferSoAConversion.stop();
124
125 AutoPasLog(TRACE, "Timer Buffers <-> Container (1+2): {}", timerBufferContainer.getTotalTime());
126 AutoPasLog(TRACE, "Timer PBuffers<-> PBuffer ( 3): {}", timerPBufferPBuffer.getTotalTime());
127 AutoPasLog(TRACE, "Timer PBuffers<-> HBuffer ( 4): {}", timerPBufferHBuffer.getTotalTime());
128 AutoPasLog(TRACE, "Timer Load and extract SoA buffers: {}", timerBufferSoAConversion.getTotalTime());
129
130 // Note: haloParticleBuffer with itself is NOT needed, as interactions between halo particles are unneeded!
131 }
132
133 private:
137 std::vector<std::vector<std::vector<std::unique_ptr<std::mutex>>>> &_spatialLocks;
138
152 template <bool newton3, class ContainerType, class PairwiseFunctor>
153 void remainderHelperBufferContainerAoS(PairwiseFunctor *f, ContainerType &container,
154 std::vector<FullParticleCell<Particle_T>> &particleBuffers,
155 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers) {
157 using namespace autopas::utils::ArrayMath::literals;
158
159 // Bunch of shorthands
160 const auto cutoff = container.getCutoff();
161 const auto interactionLength = container.getInteractionLength();
162 const auto haloBoxMin = container.getBoxMin() - interactionLength;
163 const auto totalBoxLengthInv = 1. / (container.getBoxMax() + interactionLength - haloBoxMin);
164 const std::array<size_t, 3> spacialLocksPerDim{_spatialLocks.size(), _spatialLocks[0].size(),
165 _spatialLocks[0][0].size()};
166
167 // Helper function to obtain the lock responsible for a given position.
168 // Implemented as lambda because it can reuse a lot of information that is known in this context.
169 const auto getSpacialLock = [&](const std::array<double, 3> &pos) -> std::mutex & {
170 const auto posDistFromLowerCorner = pos - haloBoxMin;
171 const auto relativePos = posDistFromLowerCorner * totalBoxLengthInv;
172 // Lock coordinates are the position scaled by the number of locks
173 const auto lockCoords =
174 static_cast_copy_array<size_t>(static_cast_copy_array<double>(spacialLocksPerDim) * relativePos);
175 return *_spatialLocks[lockCoords[0]][lockCoords[1]][lockCoords[2]];
176 };
177
178 // one halo and particle buffer pair per thread
179 AUTOPAS_OPENMP(parallel for schedule(static, 1) default(shared))
180 for (int bufferId = 0; bufferId < particleBuffers.size(); ++bufferId) {
181 auto &particleBuffer = particleBuffers[bufferId];
182 auto &haloParticleBuffer = haloParticleBuffers[bufferId];
183
184 // 1. particleBuffer with all close particles in container
185 for (auto &&p1 : particleBuffer) {
186 const auto min = p1.getR() - cutoff;
187 const auto max = p1.getR() + cutoff;
188 container.forEachInRegion(
189 [&](auto &p2) {
190 if constexpr (newton3) {
191 const std::lock_guard<std::mutex> lock(getSpacialLock(p2.getR()));
192 f->AoSFunctor(p1, p2, true);
193 } else {
194 f->AoSFunctor(p1, p2, false);
195 // no need to calculate force enacted on a halo
196 if (not p2.isHalo()) {
197 const std::lock_guard<std::mutex> lock(getSpacialLock(p2.getR()));
198 f->AoSFunctor(p2, p1, false);
199 }
200 }
201 },
202 min, max, IteratorBehavior::ownedOrHalo);
203 }
204
205 // 2. haloParticleBuffer with owned, close particles in container
206 for (auto &&p1halo : haloParticleBuffer) {
207 const auto min = p1halo.getR() - cutoff;
208 const auto max = p1halo.getR() + cutoff;
209 container.forEachInRegion(
210 [&](auto &p2) {
211 // No need to apply anything to p1halo
212 // -> AoSFunctor(p1, p2, false) not needed as it neither adds force nor Upot (potential energy)
213 // -> newton3 argument needed for correct globals
214 const std::lock_guard<std::mutex> lock(getSpacialLock(p2.getR()));
215 f->AoSFunctor(p2, p1halo, newton3);
216 },
217 min, max, IteratorBehavior::owned);
218 }
219 }
220 }
221
231 template <bool newton3, class PairwiseFunctor>
232 void remainderHelperBufferBuffer(PairwiseFunctor *f, std::vector<FullParticleCell<Particle_T>> &particleBuffers,
233 bool useSoA) {
234 if (useSoA)
235 remainderHelperBufferBufferSoA<newton3>(f, particleBuffers);
236 else
237 remainderHelperBufferBufferAoS<newton3>(f, particleBuffers);
238 }
239
247 template <bool newton3, class PairwiseFunctor>
248 void remainderHelperBufferBufferAoS(PairwiseFunctor *f, std::vector<FullParticleCell<Particle_T>> &particleBuffers) {
249 // For all interactions between different buffers we turn newton3 always off,
250 // which ensures that only one thread at a time is writing to a buffer.
251 // This saves expensive locks.
252
253 // We can not use collapse here without locks, otherwise races would occur.
254 AUTOPAS_OPENMP(parallel for)
255 for (size_t bufferIdxI = 0; bufferIdxI < particleBuffers.size(); ++bufferIdxI) {
256 for (size_t bufferIdxJOffset = 0; bufferIdxJOffset < particleBuffers.size(); ++bufferIdxJOffset) {
257 // Let each bufferI use a different starting point for bufferJ to minimize false sharing
258 const auto bufferIdxJ = (bufferIdxI + bufferIdxJOffset) % particleBuffers.size();
259
260 // interact the two buffers
261 if (bufferIdxI == bufferIdxJ) {
262 // CASE Same buffer
263 // Only use Newton3 if it is allowed, and we are working on only one buffer. This avoids data races.
264 const bool useNewton3 = newton3;
265 auto &bufferRef = particleBuffers[bufferIdxI];
266 const auto bufferSize = bufferRef.size();
267 for (auto i = 0; i < bufferSize; ++i) {
268 auto &p1 = bufferRef[i];
269 // If Newton3 is disabled run over the whole buffer, otherwise only what is ahead
270 for (auto j = useNewton3 ? i + 1 : 0; j < bufferSize; ++j) {
271 if (i == j) {
272 continue;
273 }
274 auto &p2 = bufferRef[j];
275 f->AoSFunctor(p1, p2, useNewton3);
276 }
277 }
278 } else {
279 // CASE: Two buffers
280 for (auto &p1 : particleBuffers[bufferIdxI]) {
281 for (auto &p2 : particleBuffers[bufferIdxJ]) {
282 f->AoSFunctor(p1, p2, false);
283 }
284 }
285 }
286 }
287 }
288 }
289
297 template <bool newton3, class PairwiseFunctor>
298 void remainderHelperBufferBufferSoA(PairwiseFunctor *f, std::vector<FullParticleCell<Particle_T>> &particleBuffers) {
299 // we can not use collapse here without locks, otherwise races would occur.
300 AUTOPAS_OPENMP(parallel for)
301 for (size_t i = 0; i < particleBuffers.size(); ++i) {
302 for (size_t jj = 0; jj < particleBuffers.size(); ++jj) {
303 auto *particleBufferSoAA = &particleBuffers[i]._particleSoABuffer;
304 // instead of starting every (parallel) iteration i at j == 0 offset them by i to minimize waiting times at
305 // locks
306 const auto j = (i + jj) % particleBuffers.size();
307 if (i == j) {
308 // For buffer interactions where bufferA == bufferB we can always enable newton3 if it is allowed.
309 f->SoAFunctorSingle(*particleBufferSoAA, newton3);
310 } else {
311 // For all interactions between different buffers we turn newton3 always off,
312 // which ensures that only one thread at a time is writing to a buffer. This saves expensive locks.
313 auto *particleBufferSoAB = &particleBuffers[j]._particleSoABuffer;
314 f->SoAFunctorPair(*particleBufferSoAA, *particleBufferSoAB, false);
315 }
316 }
317 }
318 }
319
333 template <class PairwiseFunctor>
334 void remainderHelperBufferHaloBuffer(PairwiseFunctor *f, std::vector<FullParticleCell<Particle_T>> &particleBuffers,
335 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers, bool useSoA) {
336 if (useSoA)
337 remainderHelperBufferHaloBufferSoA(f, particleBuffers, haloParticleBuffers);
338 else
339 remainderHelperBufferHaloBufferAoS(f, particleBuffers, haloParticleBuffers);
340 }
341
349 template <class PairwiseFunctor>
350 void remainderHelperBufferHaloBufferAoS(PairwiseFunctor *f,
351 std::vector<FullParticleCell<Particle_T>> &particleBuffers,
352 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers) {
353 // Here, phase / color based parallelism turned out to be more efficient than tasks
354 AUTOPAS_OPENMP(parallel)
355 for (int interactionOffset = 0; interactionOffset < haloParticleBuffers.size(); ++interactionOffset) {
356 AUTOPAS_OPENMP(for)
357 for (size_t i = 0; i < particleBuffers.size(); ++i) {
358 auto &particleBuffer = particleBuffers[i];
359 auto &haloBuffer = haloParticleBuffers[(i + interactionOffset) % haloParticleBuffers.size()];
360
361 for (auto &p1 : particleBuffer) {
362 for (auto &p2 : haloBuffer) {
363 f->AoSFunctor(p1, p2, false);
364 }
365 }
366 }
367 }
368 }
369
377 template <class PairwiseFunctor>
378 void remainderHelperBufferHaloBufferSoA(PairwiseFunctor *f,
379 std::vector<FullParticleCell<Particle_T>> &particleBuffers,
380 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers) {
381 // Here, phase / color based parallelism turned out to be more efficient than tasks
382 AUTOPAS_OPENMP(parallel)
383 for (int interactionOffset = 0; interactionOffset < haloParticleBuffers.size(); ++interactionOffset) {
384 AUTOPAS_OPENMP(for)
385 for (size_t i = 0; i < particleBuffers.size(); ++i) {
386 auto &particleBufferSoA = particleBuffers[i]._particleSoABuffer;
387 auto &haloBufferSoA =
388 haloParticleBuffers[(i + interactionOffset) % haloParticleBuffers.size()]._particleSoABuffer;
389 f->SoAFunctorPair(particleBufferSoA, haloBufferSoA, false);
390 }
391 }
392 }
393};
394} // namespace autopas
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
#define AUTOPAS_OPENMP(args)
Empty macro to throw away any arguments.
Definition: WrapOpenMP.h:126
This class handles the storage of particles in their full form.
Definition: FullParticleCell.h:26
void SoALoader(ParticleCell &cell, SoA< SoAArraysType > &soa, size_t offset, bool skipSoAResize)
Copies the AoS data of the given cell in the given soa.
Definition: Functor.h:112
void SoAExtractor(ParticleCell &cell, SoA< SoAArraysType > &soa, size_t offset)
Copies the data stored in the soa back into the cell.
Definition: Functor.h:126
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31
virtual void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3)
PairwiseFunctor for arrays of structures (AoS).
Definition: PairwiseFunctor.h:56
virtual void SoAFunctorPair(SoAView< SoAArraysType > soa1, SoAView< SoAArraysType > soa2, bool newton3)
PairwiseFunctor for structure of arrays (SoA)
Definition: PairwiseFunctor.h:102
virtual void SoAFunctorSingle(SoAView< SoAArraysType > soa, bool newton3)
PairwiseFunctor for structure of arrays (SoA)
Definition: PairwiseFunctor.h:70
Handles pairwise interactions involving particle buffers (particles not yet inserted into the main co...
Definition: RemainderPairwiseInteractionHandler.h:31
void computeRemainderInteractions(PairwiseFunctor *f, ContainerType &container, std::vector< FullParticleCell< Particle_T > > &particleBuffers, std::vector< FullParticleCell< Particle_T > > &haloParticleBuffers, bool useSoA)
Performs the interactions ParticleContainer::computeInteractions() did not cover.
Definition: RemainderPairwiseInteractionHandler.h:63
RemainderPairwiseInteractionHandler(std::vector< std::vector< std::vector< std::unique_ptr< std::mutex > > > > &spatialLocks)
Constructor for RemainderPairwiseInteractionHandler.
Definition: RemainderPairwiseInteractionHandler.h:37
A wrapper around autopas::utils::Timer that only compiles implementation logic if the SPDLOG_ACTIVE_L...
Definition: TraceTimer.h:20
long getTotalTime() const
Get total accumulated time.
Definition: TraceTimer.h:63
void start()
start the timer.
Definition: TraceTimer.h:25
long stop()
Stops the timer and returns the time elapsed in nanoseconds since the last call to start.
Definition: TraceTimer.h:34
constexpr std::array< T, SIZE > max(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Takes elementwise maximum and returns the result.
Definition: ArrayMath.h:96
constexpr std::array< T, SIZE > min(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Takes elementwise minimum, returns the result.
Definition: ArrayMath.h:62
void balanceVectors(OuterContainerT &vecvec)
Given a collection of vectors, redistributes the elements of the vectors so they all have the same (o...
Definition: ArrayUtils.h:151
constexpr std::array< output_t, SIZE > static_cast_copy_array(const std::array< input_t, SIZE > &a)
Creates a new array by performing an element-wise static_cast<>.
Definition: ArrayUtils.h:33
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32