AutoPas  3.0.0
Loading...
Searching...
No Matches
RemainderPairwiseInteractionHandler.h
Go to the documentation of this file.
1
7#pragma once
8
9#include <array>
10#include <cmath>
11#include <memory>
12#include <mutex>
13#include <vector>
14
18#include "autopas/utils/Timer.h"
21
22namespace autopas {
23
30template <typename Particle_T>
32 public:
38 std::vector<std::vector<std::vector<std::unique_ptr<std::mutex>>>> &spatialLocks)
39 : _spatialLocks(spatialLocks) {}
40
62 template <bool newton3, class ContainerType, class PairwiseFunctor>
63 void computeRemainderInteractions(PairwiseFunctor *f, ContainerType &container,
64 std::vector<FullParticleCell<Particle_T>> &particleBuffers,
65 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers, bool useSoA) {
66 // Balance buffers. This makes processing them with static scheduling quite efficient.
67 // Also, if particles were not inserted in parallel, this enables us to process them in parallel now.
68 // Cost is at max O(2N) worst O(N) per buffer collection and negligible compared to interacting them.
69 auto cellToVec = [](auto &cell) -> std::vector<Particle_T> & { return cell._particles; };
70 utils::ArrayUtils::balanceVectors(particleBuffers, cellToVec);
71 utils::ArrayUtils::balanceVectors(haloParticleBuffers, cellToVec);
72
73 // The following part performs the main remainder traversal. The actual calculation is done in 4 steps carried out
74 // in three helper functions.
75
76 // only activate time measurements if it will actually be logged
77#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_TRACE
78 autopas::utils::Timer timerBufferContainer, timerPBufferPBuffer, timerPBufferHBuffer, timerBufferSoAConversion;
79 timerBufferContainer.start();
80#endif
81 // steps 1 & 2.
82 // particleBuffer with all particles close in container
83 // and haloParticleBuffer with owned, close particles in container.
84 // This is always AoS-based because the container particles are found with region iterators,
85 // which don't have an SoA interface.
86 remainderHelperBufferContainerAoS<newton3>(f, container, particleBuffers, haloParticleBuffers);
87
88#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_TRACE
89 timerBufferContainer.stop();
90 timerBufferSoAConversion.start();
91#endif
92
93 if (useSoA) {
94 // All (halo-)buffer interactions shall happen vectorized, hence, load all buffer data into SoAs
95 for (auto &buffer : particleBuffers) {
96 f->SoALoader(buffer, buffer._particleSoABuffer, 0, false);
97 }
98 for (auto &buffer : haloParticleBuffers) {
99 f->SoALoader(buffer, buffer._particleSoABuffer, 0, false);
100 }
101 }
102#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_TRACE
103 timerBufferSoAConversion.stop();
104 timerPBufferPBuffer.start();
105#endif
106
107 // step 3. particleBuffer with itself and all other buffers
108 remainderHelperBufferBuffer<newton3>(f, particleBuffers, useSoA);
109
110#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_TRACE
111 timerPBufferPBuffer.stop();
112 timerPBufferHBuffer.start();
113#endif
114
115 // step 4. particleBuffer with haloParticleBuffer
116 remainderHelperBufferHaloBuffer(f, particleBuffers, haloParticleBuffers, useSoA);
117
118#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_TRACE
119 timerPBufferHBuffer.stop();
120 timerBufferSoAConversion.start();
121#endif
122
123 // unpack particle SoAs. Halo data is not interesting
124 if (useSoA) {
125 for (auto &buffer : particleBuffers) f->SoAExtractor(buffer, buffer._particleSoABuffer, 0);
126 }
127
128#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_TRACE
129 timerBufferSoAConversion.stop();
130#endif
131
132 AutoPasLog(TRACE, "Timer Buffers <-> Container (1+2): {}", timerBufferContainer.getTotalTime());
133 AutoPasLog(TRACE, "Timer PBuffers<-> PBuffer ( 3): {}", timerPBufferPBuffer.getTotalTime());
134 AutoPasLog(TRACE, "Timer PBuffers<-> HBuffer ( 4): {}", timerPBufferHBuffer.getTotalTime());
135 AutoPasLog(TRACE, "Timer Load and extract SoA buffers: {}", timerBufferSoAConversion.getTotalTime());
136
137 // Note: haloParticleBuffer with itself is NOT needed, as interactions between halo particles are unneeded!
138 }
139
140 private:
144 std::vector<std::vector<std::vector<std::unique_ptr<std::mutex>>>> &_spatialLocks;
145
159 template <bool newton3, class ContainerType, class PairwiseFunctor>
160 void remainderHelperBufferContainerAoS(PairwiseFunctor *f, ContainerType &container,
161 std::vector<FullParticleCell<Particle_T>> &particleBuffers,
162 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers) {
164 using namespace autopas::utils::ArrayMath::literals;
165
166 // Bunch of shorthands
167 const auto cutoff = container.getCutoff();
168 const auto interactionLength = container.getInteractionLength();
169 const auto haloBoxMin = container.getBoxMin() - interactionLength;
170 const auto totalBoxLengthInv = 1. / (container.getBoxMax() + interactionLength - haloBoxMin);
171 const std::array<size_t, 3> spacialLocksPerDim{_spatialLocks.size(), _spatialLocks[0].size(),
172 _spatialLocks[0][0].size()};
173
174 // Helper function to obtain the lock responsible for a given position.
175 // Implemented as lambda because it can reuse a lot of information that is known in this context.
176 const auto getSpacialLock = [&](const std::array<double, 3> &pos) -> std::mutex & {
177 const auto posDistFromLowerCorner = pos - haloBoxMin;
178 const auto relativePos = posDistFromLowerCorner * totalBoxLengthInv;
179 // Lock coordinates are the position scaled by the number of locks
180 const auto lockCoords =
181 static_cast_copy_array<size_t>(static_cast_copy_array<double>(spacialLocksPerDim) * relativePos);
182 return *_spatialLocks[lockCoords[0]][lockCoords[1]][lockCoords[2]];
183 };
184
185 // one halo and particle buffer pair per thread
186 AUTOPAS_OPENMP(parallel for schedule(static, 1) default(shared))
187 for (int bufferId = 0; bufferId < particleBuffers.size(); ++bufferId) {
188 auto &particleBuffer = particleBuffers[bufferId];
189 auto &haloParticleBuffer = haloParticleBuffers[bufferId];
190
191 // 1. particleBuffer with all close particles in container
192 for (auto &&p1 : particleBuffer) {
193 const auto min = p1.getR() - cutoff;
194 const auto max = p1.getR() + cutoff;
195 container.forEachInRegion(
196 [&](auto &p2) {
197 if constexpr (newton3) {
198 const std::lock_guard<std::mutex> lock(getSpacialLock(p2.getR()));
199 f->AoSFunctor(p1, p2, true);
200 } else {
201 f->AoSFunctor(p1, p2, false);
202 // no need to calculate force enacted on a halo
203 if (not p2.isHalo()) {
204 const std::lock_guard<std::mutex> lock(getSpacialLock(p2.getR()));
205 f->AoSFunctor(p2, p1, false);
206 }
207 }
208 },
209 min, max, IteratorBehavior::ownedOrHalo);
210 }
211
212 // 2. haloParticleBuffer with owned, close particles in container
213 for (auto &&p1halo : haloParticleBuffer) {
214 const auto min = p1halo.getR() - cutoff;
215 const auto max = p1halo.getR() + cutoff;
216 container.forEachInRegion(
217 [&](auto &p2) {
218 // No need to apply anything to p1halo
219 // -> AoSFunctor(p1, p2, false) not needed as it neither adds force nor Upot (potential energy)
220 // -> newton3 argument needed for correct globals
221 const std::lock_guard<std::mutex> lock(getSpacialLock(p2.getR()));
222 f->AoSFunctor(p2, p1halo, newton3);
223 },
224 min, max, IteratorBehavior::owned);
225 }
226 }
227 }
228
238 template <bool newton3, class PairwiseFunctor>
239 void remainderHelperBufferBuffer(PairwiseFunctor *f, std::vector<FullParticleCell<Particle_T>> &particleBuffers,
240 bool useSoA) {
241 if (useSoA)
242 remainderHelperBufferBufferSoA<newton3>(f, particleBuffers);
243 else
244 remainderHelperBufferBufferAoS<newton3>(f, particleBuffers);
245 }
246
254 template <bool newton3, class PairwiseFunctor>
255 void remainderHelperBufferBufferAoS(PairwiseFunctor *f, std::vector<FullParticleCell<Particle_T>> &particleBuffers) {
256 // For all interactions between different buffers we turn newton3 always off,
257 // which ensures that only one thread at a time is writing to a buffer.
258 // This saves expensive locks.
259
260 // We can not use collapse here without locks, otherwise races would occur.
261 AUTOPAS_OPENMP(parallel for)
262 for (size_t bufferIdxI = 0; bufferIdxI < particleBuffers.size(); ++bufferIdxI) {
263 for (size_t bufferIdxJOffset = 0; bufferIdxJOffset < particleBuffers.size(); ++bufferIdxJOffset) {
264 // Let each bufferI use a different starting point for bufferJ to minimize false sharing
265 const auto bufferIdxJ = (bufferIdxI + bufferIdxJOffset) % particleBuffers.size();
266
267 // interact the two buffers
268 if (bufferIdxI == bufferIdxJ) {
269 // CASE Same buffer
270 // Only use Newton3 if it is allowed, and we are working on only one buffer. This avoids data races.
271 const bool useNewton3 = newton3;
272 auto &bufferRef = particleBuffers[bufferIdxI];
273 const auto bufferSize = bufferRef.size();
274 for (auto i = 0; i < bufferSize; ++i) {
275 auto &p1 = bufferRef[i];
276 // If Newton3 is disabled run over the whole buffer, otherwise only what is ahead
277 for (auto j = useNewton3 ? i + 1 : 0; j < bufferSize; ++j) {
278 if (i == j) {
279 continue;
280 }
281 auto &p2 = bufferRef[j];
282 f->AoSFunctor(p1, p2, useNewton3);
283 }
284 }
285 } else {
286 // CASE: Two buffers
287 for (auto &p1 : particleBuffers[bufferIdxI]) {
288 for (auto &p2 : particleBuffers[bufferIdxJ]) {
289 f->AoSFunctor(p1, p2, false);
290 }
291 }
292 }
293 }
294 }
295 }
296
304 template <bool newton3, class PairwiseFunctor>
305 void remainderHelperBufferBufferSoA(PairwiseFunctor *f, std::vector<FullParticleCell<Particle_T>> &particleBuffers) {
306 // we can not use collapse here without locks, otherwise races would occur.
307 AUTOPAS_OPENMP(parallel for)
308 for (size_t i = 0; i < particleBuffers.size(); ++i) {
309 for (size_t jj = 0; jj < particleBuffers.size(); ++jj) {
310 auto *particleBufferSoAA = &particleBuffers[i]._particleSoABuffer;
311 // instead of starting every (parallel) iteration i at j == 0 offset them by i to minimize waiting times at
312 // locks
313 const auto j = (i + jj) % particleBuffers.size();
314 if (i == j) {
315 // For buffer interactions where bufferA == bufferB we can always enable newton3 if it is allowed.
316 f->SoAFunctorSingle(*particleBufferSoAA, newton3);
317 } else {
318 // For all interactions between different buffers we turn newton3 always off,
319 // which ensures that only one thread at a time is writing to a buffer. This saves expensive locks.
320 auto *particleBufferSoAB = &particleBuffers[j]._particleSoABuffer;
321 f->SoAFunctorPair(*particleBufferSoAA, *particleBufferSoAB, false);
322 }
323 }
324 }
325 }
326
340 template <class PairwiseFunctor>
341 void remainderHelperBufferHaloBuffer(PairwiseFunctor *f, std::vector<FullParticleCell<Particle_T>> &particleBuffers,
342 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers, bool useSoA) {
343 if (useSoA)
344 remainderHelperBufferHaloBufferSoA(f, particleBuffers, haloParticleBuffers);
345 else
346 remainderHelperBufferHaloBufferAoS(f, particleBuffers, haloParticleBuffers);
347 }
348
356 template <class PairwiseFunctor>
357 void remainderHelperBufferHaloBufferAoS(PairwiseFunctor *f,
358 std::vector<FullParticleCell<Particle_T>> &particleBuffers,
359 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers) {
360 // Here, phase / color based parallelism turned out to be more efficient than tasks
361 AUTOPAS_OPENMP(parallel)
362 for (int interactionOffset = 0; interactionOffset < haloParticleBuffers.size(); ++interactionOffset) {
363 AUTOPAS_OPENMP(for)
364 for (size_t i = 0; i < particleBuffers.size(); ++i) {
365 auto &particleBuffer = particleBuffers[i];
366 auto &haloBuffer = haloParticleBuffers[(i + interactionOffset) % haloParticleBuffers.size()];
367
368 for (auto &p1 : particleBuffer) {
369 for (auto &p2 : haloBuffer) {
370 f->AoSFunctor(p1, p2, false);
371 }
372 }
373 }
374 }
375 }
376
384 template <class PairwiseFunctor>
385 void remainderHelperBufferHaloBufferSoA(PairwiseFunctor *f,
386 std::vector<FullParticleCell<Particle_T>> &particleBuffers,
387 std::vector<FullParticleCell<Particle_T>> &haloParticleBuffers) {
388 // Here, phase / color based parallelism turned out to be more efficient than tasks
389 AUTOPAS_OPENMP(parallel)
390 for (int interactionOffset = 0; interactionOffset < haloParticleBuffers.size(); ++interactionOffset) {
391 AUTOPAS_OPENMP(for)
392 for (size_t i = 0; i < particleBuffers.size(); ++i) {
393 auto &particleBufferSoA = particleBuffers[i]._particleSoABuffer;
394 auto &haloBufferSoA =
395 haloParticleBuffers[(i + interactionOffset) % haloParticleBuffers.size()]._particleSoABuffer;
396 f->SoAFunctorPair(particleBufferSoA, haloBufferSoA, false);
397 }
398 }
399 }
400};
401} // namespace autopas
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
#define AUTOPAS_OPENMP(args)
Empty macro to throw away any arguments.
Definition: WrapOpenMP.h:126
This class handles the storage of particles in their full form.
Definition: FullParticleCell.h:26
void SoALoader(ParticleCell &cell, SoA< SoAArraysType > &soa, size_t offset, bool skipSoAResize)
Copies the AoS data of the given cell in the given soa.
Definition: Functor.h:112
void SoAExtractor(ParticleCell &cell, SoA< SoAArraysType > &soa, size_t offset)
Copies the data stored in the soa back into the cell.
Definition: Functor.h:126
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31
virtual void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3)
PairwiseFunctor for arrays of structures (AoS).
Definition: PairwiseFunctor.h:56
virtual void SoAFunctorPair(SoAView< SoAArraysType > soa1, SoAView< SoAArraysType > soa2, bool newton3)
PairwiseFunctor for structure of arrays (SoA)
Definition: PairwiseFunctor.h:102
virtual void SoAFunctorSingle(SoAView< SoAArraysType > soa, bool newton3)
PairwiseFunctor for structure of arrays (SoA)
Definition: PairwiseFunctor.h:70
Handles pairwise interactions involving particle buffers (particles not yet inserted into the main co...
Definition: RemainderPairwiseInteractionHandler.h:31
void computeRemainderInteractions(PairwiseFunctor *f, ContainerType &container, std::vector< FullParticleCell< Particle_T > > &particleBuffers, std::vector< FullParticleCell< Particle_T > > &haloParticleBuffers, bool useSoA)
Performs the interactions ParticleContainer::computeInteractions() did not cover.
Definition: RemainderPairwiseInteractionHandler.h:63
RemainderPairwiseInteractionHandler(std::vector< std::vector< std::vector< std::unique_ptr< std::mutex > > > > &spatialLocks)
Constructor for RemainderPairwiseInteractionHandler.
Definition: RemainderPairwiseInteractionHandler.h:37
Timer class to stop times.
Definition: Timer.h:20
void start()
start the timer.
Definition: Timer.cpp:17
long getTotalTime() const
Get total accumulated time.
Definition: Timer.h:53
long stop()
Stops the timer and returns the time elapsed in nanoseconds since the last call to start.
Definition: Timer.cpp:25
constexpr std::array< T, SIZE > max(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Takes elementwise maximum and returns the result.
Definition: ArrayMath.h:96
constexpr std::array< T, SIZE > min(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Takes elementwise minimum, returns the result.
Definition: ArrayMath.h:62
void balanceVectors(OuterContainerT &vecvec)
Given a collection of vectors, redistributes the elements of the vectors so they all have the same (o...
Definition: ArrayUtils.h:151
constexpr std::array< output_t, SIZE > static_cast_copy_array(const std::array< input_t, SIZE > &a)
Creates a new array by performing an element-wise static_cast<>.
Definition: ArrayUtils.h:33
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32