AutoPas  3.0.0
Loading...
Searching...
No Matches
LJFunctor.h
Go to the documentation of this file.
1
8#pragma once
9
16#include "autopas/utils/SoA.h"
19#include "autopas/utils/inBox.h"
20
21namespace mdLib {
22
36template <class Particle_T, bool applyShift = false, bool useMixing = false,
37 autopas::FunctorN3Modes useNewton3 = autopas::FunctorN3Modes::Both, bool calculateGlobals = false,
38 bool countFLOPs = false, bool relevantForTuning = true>
40 : public autopas::PairwiseFunctor<Particle_T, LJFunctor<Particle_T, applyShift, useMixing, useNewton3,
41 calculateGlobals, countFLOPs, relevantForTuning>> {
45 using SoAArraysType = typename Particle_T::SoAArraysType;
46
50 using SoAFloatPrecision = typename Particle_T::ParticleSoAFloatPrecision;
51
52 public:
56 LJFunctor() = delete;
57
58 private:
64 explicit LJFunctor(double cutoff, void * /*dummy*/)
65 : autopas::PairwiseFunctor<Particle_T, LJFunctor<Particle_T, applyShift, useMixing, useNewton3, calculateGlobals,
66 countFLOPs, relevantForTuning>>(cutoff),
67 _cutoffSquared{cutoff * cutoff},
68 _potentialEnergySum{0.},
69 _virialSum{0., 0., 0.},
70 _postProcessed{false} {
71 if constexpr (calculateGlobals) {
72 _aosThreadDataGlobals.resize(autopas::autopas_get_max_threads());
73 }
74 if constexpr (countFLOPs) {
75 _aosThreadDataFLOPs.resize(autopas::autopas_get_max_threads());
76 }
77 }
78
79 public:
88 explicit LJFunctor(double cutoff) : LJFunctor(cutoff, nullptr) {
89 static_assert(not useMixing,
90 "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set "
91 "mixing to false.");
92 }
93
100 explicit LJFunctor(double cutoff, ParticlePropertiesLibrary<double, size_t> &particlePropertiesLibrary)
101 : LJFunctor(cutoff, nullptr) {
102 static_assert(useMixing,
103 "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor "
104 "or set mixing to true.");
105 _PPLibrary = &particlePropertiesLibrary;
106 }
107
108 std::string getName() final { return "LJFunctorAutoVec"; }
109
110 bool isRelevantForTuning() final { return relevantForTuning; }
111
112 bool allowsNewton3() final {
113 return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;
114 }
115
116 bool allowsNonNewton3() final {
117 return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;
118 }
119
120 void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final {
121 using namespace autopas::utils::ArrayMath::literals;
122
123 if (i.isDummy() or j.isDummy()) {
124 return;
125 }
126
127 const auto threadnum = autopas::autopas_get_thread_num();
128
129 if constexpr (countFLOPs) {
130 ++_aosThreadDataFLOPs[threadnum].numDistCalls;
131 }
132
133 auto sigmaSquared = _sigmaSquared;
134 auto epsilon24 = _epsilon24;
135 auto shift6 = _shift6;
136 if constexpr (useMixing) {
137 sigmaSquared = _PPLibrary->getMixingSigmaSquared(i.getTypeId(), j.getTypeId());
138 epsilon24 = _PPLibrary->getMixing24Epsilon(i.getTypeId(), j.getTypeId());
139 if constexpr (applyShift) {
140 shift6 = _PPLibrary->getMixingShift6(i.getTypeId(), j.getTypeId());
141 }
142 }
143 auto dr = i.getR() - j.getR();
144 double dr2 = autopas::utils::ArrayMath::dot(dr, dr);
145
146 if (dr2 > _cutoffSquared) {
147 return;
148 }
149
150 double invdr2 = 1. / dr2;
151 double lj6 = sigmaSquared * invdr2;
152 lj6 = lj6 * lj6 * lj6;
153 double lj12 = lj6 * lj6;
154 double lj12m6 = lj12 - lj6;
155 double fac = epsilon24 * (lj12 + lj12m6) * invdr2;
156 auto f = dr * fac;
157 i.addF(f);
158 if (newton3) {
159 // only if we use newton 3 here, we want to
160 j.subF(f);
161 }
162
163 if constexpr (countFLOPs) {
164 if (newton3) {
165 ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;
166 } else {
167 ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;
168 }
169 }
170
171 if constexpr (calculateGlobals) {
172 // We always add the full contribution for each owned particle and divide the sums by 2 in endTraversal().
173 // Potential energy has an additional factor of 6, which is also handled in endTraversal().
174
175 auto virial = dr * f;
176 double potentialEnergy6 = epsilon24 * lj12m6 + shift6;
177
178 if (i.isOwned()) {
179 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
180 _aosThreadDataGlobals[threadnum].virialSum += virial;
181 }
182 // for non-newton3 the second particle will be considered in a separate calculation
183 if (newton3 and j.isOwned()) {
184 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
185 _aosThreadDataGlobals[threadnum].virialSum += virial;
186 }
187 if constexpr (countFLOPs) {
188 if (newton3) {
189 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;
190 } else {
191 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;
192 }
193 }
194 }
195 }
196
201 void SoAFunctorSingle(autopas::SoAView<SoAArraysType> soa, bool newton3) final {
202 if (soa.size() == 0) return;
203
204 const auto threadnum = autopas::autopas_get_thread_num();
205
206 const auto *const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
207 const auto *const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
208 const auto *const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
209 const auto *const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
210
211 SoAFloatPrecision *const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
212 SoAFloatPrecision *const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
213 SoAFloatPrecision *const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
214
215 [[maybe_unused]] auto *const __restrict typeptr = soa.template begin<Particle_T::AttributeNames::typeId>();
216 // the local redeclaration of the following values helps the SoAFloatPrecision-generation of various compilers.
217 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
218
219 SoAFloatPrecision potentialEnergySum = 0.; // Note: This is not the potential energy but some fixed multiple of it.
220 SoAFloatPrecision virialSumX = 0.;
221 SoAFloatPrecision virialSumY = 0.;
222 SoAFloatPrecision virialSumZ = 0.;
223
224 size_t numDistanceCalculationSum = 0;
225 size_t numKernelCallsN3Sum = 0;
226 size_t numKernelCallsNoN3Sum = 0;
227 size_t numGlobalCalcsSum = 0;
228
229 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
230 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
231 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
232 if constexpr (useMixing) {
233 // Preload all sigma and epsilons for next vectorized region.
234 // Not preloading and directly using the values, will produce worse results.
235 sigmaSquareds.resize(soa.size());
236 epsilon24s.resize(soa.size());
237 // if no mixing or mixing but no shift shift6 is constant therefore we do not need this vector.
238 if constexpr (applyShift) {
239 shift6s.resize(soa.size());
240 }
241 }
242
243 const SoAFloatPrecision const_shift6 = _shift6;
244 const SoAFloatPrecision const_sigmaSquared = _sigmaSquared;
245 const SoAFloatPrecision const_epsilon24 = _epsilon24;
246
247 for (unsigned int i = 0; i < soa.size(); ++i) {
248 const auto ownedStateI = ownedStatePtr[i];
249 if (ownedStateI == autopas::OwnershipState::dummy) {
250 continue;
251 }
252
253 SoAFloatPrecision fxacc = 0.;
254 SoAFloatPrecision fyacc = 0.;
255 SoAFloatPrecision fzacc = 0.;
256
257 if constexpr (useMixing) {
258 for (unsigned int j = 0; j < soa.size(); ++j) {
259 auto mixingData = _PPLibrary->getLJMixingData(typeptr[i], typeptr[j]);
260 sigmaSquareds[j] = mixingData.sigmaSquared;
261 epsilon24s[j] = mixingData.epsilon24;
262 if constexpr (applyShift) {
263 shift6s[j] = mixingData.shift6;
264 }
265 }
266 }
267
268// icpc vectorizes this.
269// g++ only with -ffast-math or -funsafe-math-optimizations
270#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsSum)
271 for (unsigned int j = i + 1; j < soa.size(); ++j) {
272 SoAFloatPrecision shift6 = const_shift6;
273 SoAFloatPrecision sigmaSquared = const_sigmaSquared;
274 SoAFloatPrecision epsilon24 = const_epsilon24;
275 if constexpr (useMixing) {
276 sigmaSquared = sigmaSquareds[j];
277 epsilon24 = epsilon24s[j];
278 if constexpr (applyShift) {
279 shift6 = shift6s[j];
280 }
281 }
282
283 const auto ownedStateJ = ownedStatePtr[j];
284
285 const SoAFloatPrecision drx = xptr[i] - xptr[j];
286 const SoAFloatPrecision dry = yptr[i] - yptr[j];
287 const SoAFloatPrecision drz = zptr[i] - zptr[j];
288
289 const SoAFloatPrecision drx2 = drx * drx;
290 const SoAFloatPrecision dry2 = dry * dry;
291 const SoAFloatPrecision drz2 = drz * drz;
292
293 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
294
295 // Mask away if distance is too large or any particle is a dummy.
296 // Particle ownedStateI was already checked previously.
297 const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;
298
299 const SoAFloatPrecision invdr2 = 1. / dr2;
300 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
301 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
302 const SoAFloatPrecision lj12 = lj6 * lj6;
303 const SoAFloatPrecision lj12m6 = lj12 - lj6;
304 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
305
306 const SoAFloatPrecision fx = drx * fac;
307 const SoAFloatPrecision fy = dry * fac;
308 const SoAFloatPrecision fz = drz * fac;
309
310 fxacc += fx;
311 fyacc += fy;
312 fzacc += fz;
313
314 // newton 3
315 fxptr[j] -= fx;
316 fyptr[j] -= fy;
317 fzptr[j] -= fz;
318
319 if constexpr (countFLOPs) {
320 numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;
321 numKernelCallsN3Sum += mask;
322 }
323
324 if (calculateGlobals) {
325 const SoAFloatPrecision virialx = drx * fx;
326 const SoAFloatPrecision virialy = dry * fy;
327 const SoAFloatPrecision virialz = drz * fz;
328 const SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
329
330 // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().
331 SoAFloatPrecision energyFactor = (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +
332 (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.);
333 potentialEnergySum += potentialEnergy6 * energyFactor;
334
335 virialSumX += virialx * energyFactor;
336 virialSumY += virialy * energyFactor;
337 virialSumZ += virialz * energyFactor;
338
339 if constexpr (countFLOPs) {
340 numGlobalCalcsSum += mask;
341 }
342 }
343 }
344
345 fxptr[i] += fxacc;
346 fyptr[i] += fyacc;
347 fzptr[i] += fzacc;
348 }
349 if constexpr (countFLOPs) {
350 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
351 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
352 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
353 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsSum; // Always N3 in Single SoAFunctor
354 }
355 if (calculateGlobals) {
356 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
357 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
358 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
359 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
360 }
361 }
362
367 const bool newton3) final {
368 if (newton3) {
369 SoAFunctorPairImpl<true>(soa1, soa2);
370 } else {
371 SoAFunctorPairImpl<false>(soa1, soa2);
372 }
373 }
374
375 private:
383 template <bool newton3>
384 void SoAFunctorPairImpl(autopas::SoAView<SoAArraysType> soa1, autopas::SoAView<SoAArraysType> soa2) {
385 if (soa1.size() == 0 || soa2.size() == 0) return;
386
387 const auto threadnum = autopas::autopas_get_thread_num();
388
389 const auto *const __restrict x1ptr = soa1.template begin<Particle_T::AttributeNames::posX>();
390 const auto *const __restrict y1ptr = soa1.template begin<Particle_T::AttributeNames::posY>();
391 const auto *const __restrict z1ptr = soa1.template begin<Particle_T::AttributeNames::posZ>();
392 const auto *const __restrict x2ptr = soa2.template begin<Particle_T::AttributeNames::posX>();
393 const auto *const __restrict y2ptr = soa2.template begin<Particle_T::AttributeNames::posY>();
394 const auto *const __restrict z2ptr = soa2.template begin<Particle_T::AttributeNames::posZ>();
395 const auto *const __restrict ownedStatePtr1 = soa1.template begin<Particle_T::AttributeNames::ownershipState>();
396 const auto *const __restrict ownedStatePtr2 = soa2.template begin<Particle_T::AttributeNames::ownershipState>();
397
398 auto *const __restrict fx1ptr = soa1.template begin<Particle_T::AttributeNames::forceX>();
399 auto *const __restrict fy1ptr = soa1.template begin<Particle_T::AttributeNames::forceY>();
400 auto *const __restrict fz1ptr = soa1.template begin<Particle_T::AttributeNames::forceZ>();
401 auto *const __restrict fx2ptr = soa2.template begin<Particle_T::AttributeNames::forceX>();
402 auto *const __restrict fy2ptr = soa2.template begin<Particle_T::AttributeNames::forceY>();
403 auto *const __restrict fz2ptr = soa2.template begin<Particle_T::AttributeNames::forceZ>();
404 [[maybe_unused]] auto *const __restrict typeptr1 = soa1.template begin<Particle_T::AttributeNames::typeId>();
405 [[maybe_unused]] auto *const __restrict typeptr2 = soa2.template begin<Particle_T::AttributeNames::typeId>();
406
407 // Checks whether the cells are halo cells.
408 SoAFloatPrecision potentialEnergySum = 0.;
409 SoAFloatPrecision virialSumX = 0.;
410 SoAFloatPrecision virialSumY = 0.;
411 SoAFloatPrecision virialSumZ = 0.;
412
413 size_t numDistanceCalculationSum = 0;
414 size_t numKernelCallsN3Sum = 0;
415 size_t numKernelCallsNoN3Sum = 0;
416 size_t numGlobalCalcsN3Sum = 0;
417 size_t numGlobalCalcsNoN3Sum = 0;
418
419 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
420 SoAFloatPrecision shift6 = _shift6;
421 SoAFloatPrecision sigmaSquared = _sigmaSquared;
422 SoAFloatPrecision epsilon24 = _epsilon24;
423
424 // preload all sigma and epsilons for next vectorized region
425 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
426 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
427 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
428 if constexpr (useMixing) {
429 sigmaSquareds.resize(soa2.size());
430 epsilon24s.resize(soa2.size());
431 // if no mixing or mixing but no shift shift6 is constant therefore we do not need this vector.
432 if constexpr (applyShift) {
433 shift6s.resize(soa2.size());
434 }
435 }
436
437 for (unsigned int i = 0; i < soa1.size(); ++i) {
438 SoAFloatPrecision fxacc = 0;
439 SoAFloatPrecision fyacc = 0;
440 SoAFloatPrecision fzacc = 0;
441
442 const auto ownedStateI = ownedStatePtr1[i];
443 if (ownedStateI == autopas::OwnershipState::dummy) {
444 continue;
445 }
446
447 // preload all sigma and epsilons for next vectorized region
448 if constexpr (useMixing) {
449 for (unsigned int j = 0; j < soa2.size(); ++j) {
450 sigmaSquareds[j] = _PPLibrary->getMixingSigmaSquared(typeptr1[i], typeptr2[j]);
451 epsilon24s[j] = _PPLibrary->getMixing24Epsilon(typeptr1[i], typeptr2[j]);
452 if constexpr (applyShift) {
453 shift6s[j] = _PPLibrary->getMixingShift6(typeptr1[i], typeptr2[j]);
454 }
455 }
456 }
457
458// icpc vectorizes this.
459// g++ only with -ffast-math or -funsafe-math-optimizations
460#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum)
461 for (unsigned int j = 0; j < soa2.size(); ++j) {
462 if constexpr (useMixing) {
463 sigmaSquared = sigmaSquareds[j];
464 epsilon24 = epsilon24s[j];
465 if constexpr (applyShift) {
466 shift6 = shift6s[j];
467 }
468 }
469
470 const auto ownedStateJ = ownedStatePtr2[j];
471
472 const SoAFloatPrecision drx = x1ptr[i] - x2ptr[j];
473 const SoAFloatPrecision dry = y1ptr[i] - y2ptr[j];
474 const SoAFloatPrecision drz = z1ptr[i] - z2ptr[j];
475
476 const SoAFloatPrecision drx2 = drx * drx;
477 const SoAFloatPrecision dry2 = dry * dry;
478 const SoAFloatPrecision drz2 = drz * drz;
479
480 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
481
482 // Mask away if distance is too large or any particle is a dummy.
483 // Particle ownedStateI was already checked previously.
484 const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;
485
486 const SoAFloatPrecision invdr2 = 1. / dr2;
487 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
488 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
489 const SoAFloatPrecision lj12 = lj6 * lj6;
490 const SoAFloatPrecision lj12m6 = lj12 - lj6;
491 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
492
493 const SoAFloatPrecision fx = drx * fac;
494 const SoAFloatPrecision fy = dry * fac;
495 const SoAFloatPrecision fz = drz * fac;
496
497 fxacc += fx;
498 fyacc += fy;
499 fzacc += fz;
500 if (newton3) {
501 fx2ptr[j] -= fx;
502 fy2ptr[j] -= fy;
503 fz2ptr[j] -= fz;
504 }
505
506 if constexpr (countFLOPs) {
507 numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;
508 if constexpr (newton3) {
509 numKernelCallsN3Sum += mask;
510 } else {
511 numKernelCallsNoN3Sum += mask;
512 }
513 }
514
515 if constexpr (calculateGlobals) {
516 SoAFloatPrecision virialx = drx * fx;
517 SoAFloatPrecision virialy = dry * fy;
518 SoAFloatPrecision virialz = drz * fz;
519 SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
520
521 // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().
522 const SoAFloatPrecision energyFactor =
523 (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +
524 (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);
525 potentialEnergySum += potentialEnergy6 * energyFactor;
526 virialSumX += virialx * energyFactor;
527 virialSumY += virialy * energyFactor;
528 virialSumZ += virialz * energyFactor;
529
530 if constexpr (countFLOPs) {
531 if constexpr (newton3) {
532 numGlobalCalcsN3Sum += mask;
533 } else {
534 numGlobalCalcsNoN3Sum += mask;
535 }
536 }
537 }
538 }
539 fx1ptr[i] += fxacc;
540 fy1ptr[i] += fyacc;
541 fz1ptr[i] += fzacc;
542 }
543 if constexpr (countFLOPs) {
544 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
545 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
546 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
547 _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
548 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
549 }
550 if (calculateGlobals) {
551 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
552 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
553 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
554 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
555 }
556 }
557
558 public:
559 // clang-format off
565 // clang-format on
566 void SoAFunctorVerlet(autopas::SoAView<SoAArraysType> soa, const size_t indexFirst,
567 const std::vector<size_t, autopas::AlignedAllocator<size_t>> &neighborList,
568 bool newton3) final {
569 if (soa.size() == 0 or neighborList.empty()) return;
570 if (newton3) {
571 SoAFunctorVerletImpl<true>(soa, indexFirst, neighborList);
572 } else {
573 SoAFunctorVerletImpl<false>(soa, indexFirst, neighborList);
574 }
575 }
576
585 void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared) {
586 _epsilon24 = epsilon24;
587 _sigmaSquared = sigmaSquared;
588 if (applyShift) {
589 _shift6 = ParticlePropertiesLibrary<double, size_t>::calcShift6(_epsilon24, _sigmaSquared, _cutoffSquared);
590 } else {
591 _shift6 = 0.;
592 }
593 }
594
598 constexpr static auto getNeededAttr() {
599 return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,
600 Particle_T::AttributeNames::posX,
601 Particle_T::AttributeNames::posY,
602 Particle_T::AttributeNames::posZ,
603 Particle_T::AttributeNames::forceX,
604 Particle_T::AttributeNames::forceY,
605 Particle_T::AttributeNames::forceZ,
606 Particle_T::AttributeNames::typeId,
607 Particle_T::AttributeNames::ownershipState};
608 }
609
613 constexpr static auto getNeededAttr(std::false_type) {
614 return std::array<typename Particle_T::AttributeNames, 6>{
615 Particle_T::AttributeNames::id, Particle_T::AttributeNames::posX,
616 Particle_T::AttributeNames::posY, Particle_T::AttributeNames::posZ,
617 Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};
618 }
619
623 constexpr static auto getComputedAttr() {
624 return std::array<typename Particle_T::AttributeNames, 3>{
625 Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};
626 }
627
632 constexpr static bool getMixing() { return useMixing; }
633
638 void initTraversal() final {
639 _potentialEnergySum = 0.;
640 _virialSum = {0., 0., 0.};
641 _postProcessed = false;
642 if constexpr (calculateGlobals) {
643 for (auto &data : _aosThreadDataGlobals) {
644 data.setZero();
645 }
646 }
647 if constexpr (countFLOPs) {
648 for (auto &data : _aosThreadDataFLOPs) {
649 data.setZero();
650 }
651 }
652 }
653
658 void endTraversal(bool newton3) final {
659 using namespace autopas::utils::ArrayMath::literals;
660
661 if (_postProcessed) {
663 "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");
664 }
665 if (calculateGlobals) {
666 for (const auto &data : _aosThreadDataGlobals) {
667 _potentialEnergySum += data.potentialEnergySum;
668 _virialSum += data.virialSum;
669 }
670 // For each interaction, we added the full contribution for both particles. Divide by 2 here, so that each
671 // contribution is only counted once per pair.
672 _potentialEnergySum *= 0.5;
673 _virialSum *= 0.5;
674
675 // We have always calculated 6*potentialEnergy, so we divide by 6 here!
676 _potentialEnergySum /= 6.;
677 _postProcessed = true;
678
679 AutoPasLog(DEBUG, "Final potential energy {}", _potentialEnergySum);
680 AutoPasLog(DEBUG, "Final virial {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);
681 }
682 }
683
689 if (not calculateGlobals) {
691 "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate "
692 "global "
693 "values, please specify calculateGlobals to be true.");
694 }
695 if (not _postProcessed) {
697 "Cannot get potential energy, because endTraversal was not called.");
698 }
699 return _potentialEnergySum;
700 }
701
706 double getVirial() {
707 if (not calculateGlobals) {
709 "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global "
710 "values, please specify calculateGlobals to be true.");
711 }
712 if (not _postProcessed) {
714 "Cannot get virial, because endTraversal was not called.");
715 }
716 return _virialSum[0] + _virialSum[1] + _virialSum[2];
717 }
718
755 [[nodiscard]] size_t getNumFLOPs() const override {
756 if constexpr (countFLOPs) {
757 const size_t numDistCallsAcc =
758 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
759 [](size_t sum, const auto &data) { return sum + data.numDistCalls; });
760 const size_t numKernelCallsN3Acc =
761 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
762 [](size_t sum, const auto &data) { return sum + data.numKernelCallsN3; });
763 const size_t numKernelCallsNoN3Acc =
764 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
765 [](size_t sum, const auto &data) { return sum + data.numKernelCallsNoN3; });
766 const size_t numGlobalCalcsN3Acc =
767 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
768 [](size_t sum, const auto &data) { return sum + data.numGlobalCalcsN3; });
769 const size_t numGlobalCalcsNoN3Acc =
770 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
771 [](size_t sum, const auto &data) { return sum + data.numGlobalCalcsNoN3; });
772
773 constexpr size_t numFLOPsPerDistanceCall = 8;
774 constexpr size_t numFLOPsPerN3KernelCall = 18;
775 constexpr size_t numFLOPsPerNoN3KernelCall = 15;
776 constexpr size_t numFLOPsPerN3GlobalCalc = applyShift ? 13 : 12;
777 constexpr size_t numFLOPsPerNoN3GlobalCalc = applyShift ? 9 : 8;
778
779 return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +
780 numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +
781 numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;
782 } else {
783 // This is needed because this function still gets called with FLOP logging disabled, just nothing is done with it
784 return std::numeric_limits<size_t>::max();
785 }
786 }
787
788 [[nodiscard]] double getHitRate() const override {
789 if constexpr (countFLOPs) {
790 const size_t numDistCallsAcc =
791 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
792 [](size_t sum, const auto &data) { return sum + data.numDistCalls; });
793 const size_t numKernelCallsN3Acc =
794 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
795 [](size_t sum, const auto &data) { return sum + data.numKernelCallsN3; });
796 const size_t numKernelCallsNoN3Acc =
797 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
798 [](size_t sum, const auto &data) { return sum + data.numKernelCallsNoN3; });
799
800 return (static_cast<double>(numKernelCallsNoN3Acc) + static_cast<double>(numKernelCallsN3Acc)) /
801 (static_cast<double>(numDistCallsAcc));
802 } else {
803 // This is needed because this function still gets called with FLOP logging disabled, just nothing is done with it
804 return std::numeric_limits<double>::quiet_NaN();
805 }
806 }
807
808 private:
809 template <bool newton3>
810 void SoAFunctorVerletImpl(autopas::SoAView<SoAArraysType> soa, const size_t indexFirst,
811 const std::vector<size_t, autopas::AlignedAllocator<size_t>> &neighborList) {
812 const auto *const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
813 const auto *const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
814 const auto *const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
815
816 auto *const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
817 auto *const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
818 auto *const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
819 [[maybe_unused]] auto *const __restrict typeptr1 = soa.template begin<Particle_T::AttributeNames::typeId>();
820 [[maybe_unused]] auto *const __restrict typeptr2 = soa.template begin<Particle_T::AttributeNames::typeId>();
821
822 const auto *const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
823
824 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
825 SoAFloatPrecision shift6 = _shift6;
826 SoAFloatPrecision sigmaSquared = _sigmaSquared;
827 SoAFloatPrecision epsilon24 = _epsilon24;
828
829 SoAFloatPrecision potentialEnergySum = 0.;
830 SoAFloatPrecision virialSumX = 0.;
831 SoAFloatPrecision virialSumY = 0.;
832 SoAFloatPrecision virialSumZ = 0.;
833
834 // Counters for when countFLOPs is activated
835 size_t numDistanceCalculationSum = 0;
836 size_t numKernelCallsN3Sum = 0;
837 size_t numKernelCallsNoN3Sum = 0;
838 size_t numGlobalCalcsN3Sum = 0;
839 size_t numGlobalCalcsNoN3Sum = 0;
840
841 SoAFloatPrecision fxacc = 0;
842 SoAFloatPrecision fyacc = 0;
843 SoAFloatPrecision fzacc = 0;
844 const size_t neighborListSize = neighborList.size();
845 const size_t *const __restrict neighborListPtr = neighborList.data();
846
847 // checks whether particle i is owned.
848 const auto ownedStateI = ownedStatePtr[indexFirst];
849 if (ownedStateI == autopas::OwnershipState::dummy) {
850 return;
851 }
852
853 const auto threadnum = autopas::autopas_get_thread_num();
854
855 // this is a magic number, that should correspond to at least
856 // vectorization width*N have testet multiple sizes:
857 // 4: does not give a speedup, slower than original AoSFunctor
858 // 8: small speedup compared to AoS
859 // 12: highest speedup compared to Aos
860 // 16: smaller speedup
861 // in theory this is a variable, we could auto-tune over...
862#ifdef __AVX512F__
863 // use a multiple of 8 for avx
864 constexpr size_t vecsize = 16;
865#else
866 // for everything else 12 is faster
867 constexpr size_t vecsize = 12;
868#endif
869 size_t joff = 0;
870
871 // if the size of the verlet list is larger than the given size vecsize,
872 // we will use a vectorized version.
873 if (neighborListSize >= vecsize) {
874 alignas(64) std::array<SoAFloatPrecision, vecsize> xtmp, ytmp, ztmp, xArr, yArr, zArr, fxArr, fyArr, fzArr;
875 alignas(64) std::array<autopas::OwnershipState, vecsize> ownedStateArr{};
876 // broadcast of the position of particle i
877 for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {
878 xtmp[tmpj] = xptr[indexFirst];
879 ytmp[tmpj] = yptr[indexFirst];
880 ztmp[tmpj] = zptr[indexFirst];
881 }
882 // loop over the verlet list from 0 to x*vecsize
883 for (; joff < neighborListSize - vecsize + 1; joff += vecsize) {
884 // in each iteration we calculate the interactions of particle i with
885 // vecsize particles in the neighborlist of particle i starting at
886 // particle joff
887
888 [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> sigmaSquareds;
889 [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> epsilon24s;
890 [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> shift6s;
891 if constexpr (useMixing) {
892 for (size_t j = 0; j < vecsize; j++) {
893 sigmaSquareds[j] =
894 _PPLibrary->getMixingSigmaSquared(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
895 epsilon24s[j] = _PPLibrary->getMixing24Epsilon(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
896 if constexpr (applyShift) {
897 shift6s[j] = _PPLibrary->getMixingShift6(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
898 }
899 }
900 }
901
902 // gather position of particle j
903#pragma omp simd safelen(vecsize)
904 for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {
905 xArr[tmpj] = xptr[neighborListPtr[joff + tmpj]];
906 yArr[tmpj] = yptr[neighborListPtr[joff + tmpj]];
907 zArr[tmpj] = zptr[neighborListPtr[joff + tmpj]];
908 ownedStateArr[tmpj] = ownedStatePtr[neighborListPtr[joff + tmpj]];
909 }
910 // do omp simd with reduction of the interaction
911#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum) safelen(vecsize)
912 for (size_t j = 0; j < vecsize; j++) {
913 if constexpr (useMixing) {
914 sigmaSquared = sigmaSquareds[j];
915 epsilon24 = epsilon24s[j];
916 if constexpr (applyShift) {
917 shift6 = shift6s[j];
918 }
919 }
920 // const size_t j = currentList[jNeighIndex];
921
922 const auto ownedStateJ = ownedStateArr[j];
923
924 const SoAFloatPrecision drx = xtmp[j] - xArr[j];
925 const SoAFloatPrecision dry = ytmp[j] - yArr[j];
926 const SoAFloatPrecision drz = ztmp[j] - zArr[j];
927
928 const SoAFloatPrecision drx2 = drx * drx;
929 const SoAFloatPrecision dry2 = dry * dry;
930 const SoAFloatPrecision drz2 = drz * drz;
931
932 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
933
934 // Mask away if distance is too large or any particle is a dummy.
935 // Particle ownedStateI was already checked previously.
936 const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;
937
938 const SoAFloatPrecision invdr2 = 1. / dr2;
939 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
940 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
941 const SoAFloatPrecision lj12 = lj6 * lj6;
942 const SoAFloatPrecision lj12m6 = lj12 - lj6;
943 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
944
945 const SoAFloatPrecision fx = drx * fac;
946 const SoAFloatPrecision fy = dry * fac;
947 const SoAFloatPrecision fz = drz * fac;
948
949 fxacc += fx;
950 fyacc += fy;
951 fzacc += fz;
952 if (newton3) {
953 fxArr[j] = fx;
954 fyArr[j] = fy;
955 fzArr[j] = fz;
956 }
957
958 if constexpr (countFLOPs) {
959 numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;
960 if constexpr (newton3) {
961 numKernelCallsN3Sum += mask;
962 } else {
963 numKernelCallsNoN3Sum += mask;
964 }
965 }
966
967 if (calculateGlobals) {
968 SoAFloatPrecision virialx = drx * fx;
969 SoAFloatPrecision virialy = dry * fy;
970 SoAFloatPrecision virialz = drz * fz;
971 SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
972
973 // We add 6 times the potential energy for each owned particle. The total sum is corrected in
974 // endTraversal().
975 const SoAFloatPrecision energyFactor =
976 (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +
977 (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);
978 potentialEnergySum += potentialEnergy6 * energyFactor;
979 virialSumX += virialx * energyFactor;
980 virialSumY += virialy * energyFactor;
981 virialSumZ += virialz * energyFactor;
982
983 if constexpr (countFLOPs) {
984 if constexpr (newton3) {
985 numGlobalCalcsN3Sum += mask;
986 } else {
987 numGlobalCalcsNoN3Sum += mask;
988 }
989 }
990 }
991 }
992 // scatter the forces to where they belong, this is only needed for newton3
993 if (newton3) {
994#pragma omp simd safelen(vecsize)
995 for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {
996 const size_t j = neighborListPtr[joff + tmpj];
997 fxptr[j] -= fxArr[tmpj];
998 fyptr[j] -= fyArr[tmpj];
999 fzptr[j] -= fzArr[tmpj];
1000 }
1001 }
1002 }
1003 }
1004 // this loop goes over the remainder and uses no optimizations
1005 for (size_t jNeighIndex = joff; jNeighIndex < neighborListSize; ++jNeighIndex) {
1006 size_t j = neighborList[jNeighIndex];
1007 if (indexFirst == j) continue;
1008 if constexpr (useMixing) {
1009 sigmaSquared = _PPLibrary->getMixingSigmaSquared(typeptr1[indexFirst], typeptr2[j]);
1010 epsilon24 = _PPLibrary->getMixing24Epsilon(typeptr1[indexFirst], typeptr2[j]);
1011 if constexpr (applyShift) {
1012 shift6 = _PPLibrary->getMixingShift6(typeptr1[indexFirst], typeptr2[j]);
1013 }
1014 }
1015
1016 const auto ownedStateJ = ownedStatePtr[j];
1017 if (ownedStateJ == autopas::OwnershipState::dummy) {
1018 continue;
1019 }
1020
1021 const SoAFloatPrecision drx = xptr[indexFirst] - xptr[j];
1022 const SoAFloatPrecision dry = yptr[indexFirst] - yptr[j];
1023 const SoAFloatPrecision drz = zptr[indexFirst] - zptr[j];
1024
1025 const SoAFloatPrecision drx2 = drx * drx;
1026 const SoAFloatPrecision dry2 = dry * dry;
1027 const SoAFloatPrecision drz2 = drz * drz;
1028
1029 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
1030
1031 if constexpr (countFLOPs) {
1032 numDistanceCalculationSum += 1;
1033 }
1034
1035 if (dr2 > cutoffSquared) {
1036 continue;
1037 }
1038
1039 const SoAFloatPrecision invdr2 = 1. / dr2;
1040 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
1041 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
1042 const SoAFloatPrecision lj12 = lj6 * lj6;
1043 const SoAFloatPrecision lj12m6 = lj12 - lj6;
1044 const SoAFloatPrecision fac = epsilon24 * (lj12 + lj12m6) * invdr2;
1045
1046 const SoAFloatPrecision fx = drx * fac;
1047 const SoAFloatPrecision fy = dry * fac;
1048 const SoAFloatPrecision fz = drz * fac;
1049
1050 fxacc += fx;
1051 fyacc += fy;
1052 fzacc += fz;
1053 if (newton3) {
1054 fxptr[j] -= fx;
1055 fyptr[j] -= fy;
1056 fzptr[j] -= fz;
1057 }
1058
1059 if constexpr (countFLOPs) {
1060 if constexpr (newton3) {
1061 numKernelCallsN3Sum += 1;
1062 } else {
1063 numKernelCallsNoN3Sum += 1;
1064 }
1065 }
1066
1067 if (calculateGlobals) {
1068 SoAFloatPrecision virialx = drx * fx;
1069 SoAFloatPrecision virialy = dry * fy;
1070 SoAFloatPrecision virialz = drz * fz;
1071 SoAFloatPrecision potentialEnergy6 = (epsilon24 * lj12m6 + shift6);
1072
1073 // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().
1074 const SoAFloatPrecision energyFactor =
1075 (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +
1076 (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);
1077 potentialEnergySum += potentialEnergy6 * energyFactor;
1078 virialSumX += virialx * energyFactor;
1079 virialSumY += virialy * energyFactor;
1080 virialSumZ += virialz * energyFactor;
1081
1082 if constexpr (countFLOPs) {
1083 if constexpr (newton3) {
1084 ++numGlobalCalcsN3Sum;
1085 } else {
1086 ++numGlobalCalcsNoN3Sum;
1087 }
1088 }
1089 }
1090 }
1091
1092 if (fxacc != 0 or fyacc != 0 or fzacc != 0) {
1093 fxptr[indexFirst] += fxacc;
1094 fyptr[indexFirst] += fyacc;
1095 fzptr[indexFirst] += fzacc;
1096 }
1097
1098 if constexpr (countFLOPs) {
1099 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
1100 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
1101 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
1102 _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
1103 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
1104 }
1105
1106 if (calculateGlobals) {
1107 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
1108 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
1109 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
1110 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
1111 }
1112 }
1113
1118 class AoSThreadDataGlobals {
1119 public:
1120 AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}
1121 void setZero() {
1122 virialSum = {0., 0., 0.};
1123 potentialEnergySum = 0.;
1124 }
1125
1126 // variables
1127 std::array<double, 3> virialSum;
1128 double potentialEnergySum;
1129
1130 private:
1131 // dummy parameter to get the right size (64 bytes)
1132 double __remainingTo64[(64 - 4 * sizeof(double)) / sizeof(double)];
1133 };
1134
1142 class AoSThreadDataFLOPs {
1143 public:
1144 AoSThreadDataFLOPs() : __remainingTo64{} {}
1145
1149 void setZero() {
1150 numKernelCallsNoN3 = 0;
1151 numKernelCallsN3 = 0;
1152 numDistCalls = 0;
1153 numGlobalCalcsNoN3 = 0;
1154 numGlobalCalcsN3 = 0;
1155 }
1156
1161 size_t numKernelCallsNoN3 = 0;
1162
1167 size_t numKernelCallsN3 = 0;
1168
1173 size_t numDistCalls = 0;
1174
1179 size_t numGlobalCalcsN3 = 0;
1180
1185 size_t numGlobalCalcsNoN3 = 0;
1186
1187 private:
1191 double __remainingTo64[(64 - 5 * sizeof(size_t)) / sizeof(size_t)];
1192 };
1193
1194 // make sure of the size of AoSThreadDataGlobals and AoSThreadDataFLOPs
1195 static_assert(sizeof(AoSThreadDataGlobals) % 64 == 0, "AoSThreadDataGlobals has wrong size");
1196 static_assert(sizeof(AoSThreadDataFLOPs) % 64 == 0, "AoSThreadDataFLOPs has wrong size");
1197
1198 const double _cutoffSquared;
1199 // not const because they might be reset through PPL
1200 double _epsilon24, _sigmaSquared, _shift6 = 0;
1201
1203
1204 // sum of the potential energy, only calculated if calculateGlobals is true
1205 double _potentialEnergySum;
1206
1207 // sum of the virial, only calculated if calculateGlobals is true
1208 std::array<double, 3> _virialSum;
1209
1210 // thread buffer for aos
1211 std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals{};
1212 std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};
1213
1214 // defines whether or whether not the global values are already preprocessed
1215 bool _postProcessed;
1216};
1217} // namespace mdLib
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28
floatType getMixingShift6(intType i, intType j) const
Returns precomputed mixed shift * 6 for one pair of site types.
Definition: ParticlePropertiesLibrary.h:262
floatType getMixingSigmaSquared(intType i, intType j) const
Returns precomputed mixed squared sigma for one pair of site types.
Definition: ParticlePropertiesLibrary.h:252
floatType getMixing24Epsilon(intType i, intType j) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:224
static double calcShift6(double epsilon24, double sigmaSquared, double cutoffSquared)
Calculate the shift multiplied 6 of the lennard jones potential from given cutoff,...
Definition: ParticlePropertiesLibrary.h:575
auto getLJMixingData(intType i, intType j) const
Get complete mixing data for one pair of LJ site types.
Definition: ParticlePropertiesLibrary.h:234
AlignedAllocator class.
Definition: AlignedAllocator.h:29
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31
PairwiseFunctor(double cutoff)
Constructor.
Definition: PairwiseFunctor.h:42
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23
size_t size() const
Returns the number of particles in the view.
Definition: SoAView.h:83
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:115
A functor to handle lennard-jones interactions between two particles (molecules).
Definition: LJFunctor.h:41
static constexpr bool getMixing()
Definition: LJFunctor.h:632
double getHitRate() const override
Get the hit rate.
Definition: LJFunctor.h:788
void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final
PairwiseFunctor for arrays of structures (AoS).
Definition: LJFunctor.h:120
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: LJFunctor.h:112
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: LJFunctor.h:116
void initTraversal() final
Reset the global values.
Definition: LJFunctor.h:638
LJFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: LJFunctor.h:88
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: LJFunctor.h:598
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: LJFunctor.h:658
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: LJFunctor.h:110
LJFunctor()=delete
Deleted default constructor.
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: LJFunctor.h:613
void SoAFunctorSingle(autopas::SoAView< SoAArraysType > soa, bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:201
std::string getName() final
Returns name of functor.
Definition: LJFunctor.h:108
LJFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: LJFunctor.h:100
void SoAFunctorPair(autopas::SoAView< SoAArraysType > soa1, autopas::SoAView< SoAArraysType > soa2, const bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:366
void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared)
Sets the particle properties constants for this functor.
Definition: LJFunctor.h:585
void SoAFunctorVerlet(autopas::SoAView< SoAArraysType > soa, const size_t indexFirst, const std::vector< size_t, autopas::AlignedAllocator< size_t > > &neighborList, bool newton3) final
PairwiseFunctor for structure of arrays (SoA) for neighbor lists.
Definition: LJFunctor.h:566
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: LJFunctor.h:755
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: LJFunctor.h:623
double getPotentialEnergy()
Get the potential Energy.
Definition: LJFunctor.h:688
double getVirial()
Get the virial.
Definition: LJFunctor.h:706
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144
@ dummy
Dummy or deleted state, a particle with this state is not an actual particle!
@ owned
Owned state, a particle with this state is an actual particle and owned by the current AutoPas object...
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:22
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132
constexpr unsigned int DEFAULT_CACHE_LINE_SIZE
Default size for a cache line.
Definition: AlignedAllocator.h:21