39template <
class Particle_T,
bool applyShift =
false,
bool useMixing =
false,
41 bool countFLOPs =
false,
bool relevantForTuning =
true>
44 calculateGlobals, countFLOPs, relevantForTuning>> {
48 using SoAArraysType =
typename Particle_T::SoAArraysType;
53 using SoAFloatPrecision =
typename Particle_T::ParticleSoAFloatPrecision;
67 explicit LJFunctor(
double cutoff,
void * )
69 countFLOPs, relevantForTuning>>(cutoff),
70 _cutoffSquared{cutoff * cutoff},
71 _potentialEnergySum{0.},
72 _virialSum{0., 0., 0.},
73 _postProcessed{false} {
74 if constexpr (calculateGlobals) {
77 if constexpr (countFLOPs) {
92 static_assert(not useMixing,
93 "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set "
105 static_assert(useMixing,
106 "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor "
107 "or set mixing to true.");
108 _PPLibrary = &particlePropertiesLibrary;
111 std::string
getName() final {
return "LJFunctorAutoVec"; }
116 return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;
120 return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;
123 void AoSFunctor(Particle_T &i, Particle_T &j,
bool newton3)
final {
124 using namespace autopas::utils::ArrayMath::literals;
126 if (i.isDummy() or j.isDummy()) {
132 if constexpr (countFLOPs) {
133 ++_aosThreadDataFLOPs[threadnum].numDistCalls;
136 auto sigmaSquared = _sigmaSquared;
137 auto epsilon24 = _epsilon24;
138 auto shift6 = _shift6;
139 if constexpr (useMixing) {
142 if constexpr (applyShift) {
146 auto dr = i.getR() - j.getR();
149 if (dr2 > _cutoffSquared) {
153 double invdr2 = 1. / dr2;
154 double lj6 = sigmaSquared * invdr2;
155 lj6 = lj6 * lj6 * lj6;
156 double lj12 = lj6 * lj6;
157 double lj12m6 = lj12 - lj6;
158 double fac = epsilon24 * (lj12 + lj12m6) * invdr2;
166 if constexpr (countFLOPs) {
168 ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;
170 ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;
174 if constexpr (calculateGlobals) {
178 auto virial = dr * f;
179 double potentialEnergy6 = epsilon24 * lj12m6 + shift6;
182 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
183 _aosThreadDataGlobals[threadnum].virialSum += virial;
186 if (newton3 and j.isOwned()) {
187 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
188 _aosThreadDataGlobals[threadnum].virialSum += virial;
190 if constexpr (countFLOPs) {
192 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;
194 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;
205 if (soa.size() == 0)
return;
209 const auto *
const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
210 const auto *
const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
211 const auto *
const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
212 const auto *
const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
214 SoAFloatPrecision *
const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
215 SoAFloatPrecision *
const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
216 SoAFloatPrecision *
const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
218 [[maybe_unused]]
auto *
const __restrict typeptr = soa.template begin<Particle_T::AttributeNames::typeId>();
220 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
222 SoAFloatPrecision potentialEnergySum = 0.;
223 SoAFloatPrecision virialSumX = 0.;
224 SoAFloatPrecision virialSumY = 0.;
225 SoAFloatPrecision virialSumZ = 0.;
227 size_t numDistanceCalculationSum = 0;
228 size_t numKernelCallsN3Sum = 0;
229 size_t numKernelCallsNoN3Sum = 0;
230 size_t numGlobalCalcsSum = 0;
232 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
233 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
234 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
235 if constexpr (useMixing) {
238 sigmaSquareds.resize(soa.size());
239 epsilon24s.resize(soa.size());
241 if constexpr (applyShift) {
242 shift6s.resize(soa.size());
246 const SoAFloatPrecision const_shift6 = _shift6;
247 const SoAFloatPrecision const_sigmaSquared = _sigmaSquared;
248 const SoAFloatPrecision const_epsilon24 = _epsilon24;
250 for (
unsigned int i = 0; i < soa.size(); ++i) {
251 const auto ownedStateI = ownedStatePtr[i];
256 SoAFloatPrecision fxacc = 0.;
257 SoAFloatPrecision fyacc = 0.;
258 SoAFloatPrecision fzacc = 0.;
260 if constexpr (useMixing) {
261 for (
unsigned int j = 0; j < soa.size(); ++j) {
263 sigmaSquareds[j] = mixingData.sigmaSquared;
264 epsilon24s[j] = mixingData.epsilon24;
265 if constexpr (applyShift) {
266 shift6s[j] = mixingData.shift6;
273#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsSum)
274 for (
unsigned int j = i + 1; j < soa.size(); ++j) {
275 SoAFloatPrecision shift6 = const_shift6;
276 SoAFloatPrecision sigmaSquared = const_sigmaSquared;
277 SoAFloatPrecision epsilon24 = const_epsilon24;
278 if constexpr (useMixing) {
279 sigmaSquared = sigmaSquareds[j];
280 epsilon24 = epsilon24s[j];
281 if constexpr (applyShift) {
286 const auto ownedStateJ = ownedStatePtr[j];
288 const SoAFloatPrecision drx = xptr[i] - xptr[j];
289 const SoAFloatPrecision dry = yptr[i] - yptr[j];
290 const SoAFloatPrecision drz = zptr[i] - zptr[j];
292 const SoAFloatPrecision drx2 = drx * drx;
293 const SoAFloatPrecision dry2 = dry * dry;
294 const SoAFloatPrecision drz2 = drz * drz;
296 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
302 const SoAFloatPrecision invdr2 = 1. / dr2;
303 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
304 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
305 const SoAFloatPrecision lj12 = lj6 * lj6;
306 const SoAFloatPrecision lj12m6 = lj12 - lj6;
307 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
309 const SoAFloatPrecision fx = drx * fac;
310 const SoAFloatPrecision fy = dry * fac;
311 const SoAFloatPrecision fz = drz * fac;
322 if constexpr (countFLOPs) {
324 numKernelCallsN3Sum += mask;
327 if (calculateGlobals) {
328 const SoAFloatPrecision virialx = drx * fx;
329 const SoAFloatPrecision virialy = dry * fy;
330 const SoAFloatPrecision virialz = drz * fz;
331 const SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
336 potentialEnergySum += potentialEnergy6 * energyFactor;
338 virialSumX += virialx * energyFactor;
339 virialSumY += virialy * energyFactor;
340 virialSumZ += virialz * energyFactor;
342 if constexpr (countFLOPs) {
343 numGlobalCalcsSum += mask;
352 if constexpr (countFLOPs) {
353 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
354 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
355 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
356 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsSum;
358 if (calculateGlobals) {
359 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
360 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
361 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
362 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
370 const bool newton3)
final {
372 SoAFunctorPairImpl<true>(soa1, soa2);
374 SoAFunctorPairImpl<false>(soa1, soa2);
386 template <
bool newton3>
388 if (soa1.
size() == 0 || soa2.
size() == 0)
return;
392 const auto *
const __restrict x1ptr = soa1.template begin<Particle_T::AttributeNames::posX>();
393 const auto *
const __restrict y1ptr = soa1.template begin<Particle_T::AttributeNames::posY>();
394 const auto *
const __restrict z1ptr = soa1.template begin<Particle_T::AttributeNames::posZ>();
395 const auto *
const __restrict x2ptr = soa2.template begin<Particle_T::AttributeNames::posX>();
396 const auto *
const __restrict y2ptr = soa2.template begin<Particle_T::AttributeNames::posY>();
397 const auto *
const __restrict z2ptr = soa2.template begin<Particle_T::AttributeNames::posZ>();
398 const auto *
const __restrict ownedStatePtr1 = soa1.template begin<Particle_T::AttributeNames::ownershipState>();
399 const auto *
const __restrict ownedStatePtr2 = soa2.template begin<Particle_T::AttributeNames::ownershipState>();
401 auto *
const __restrict fx1ptr = soa1.template begin<Particle_T::AttributeNames::forceX>();
402 auto *
const __restrict fy1ptr = soa1.template begin<Particle_T::AttributeNames::forceY>();
403 auto *
const __restrict fz1ptr = soa1.template begin<Particle_T::AttributeNames::forceZ>();
404 auto *
const __restrict fx2ptr = soa2.template begin<Particle_T::AttributeNames::forceX>();
405 auto *
const __restrict fy2ptr = soa2.template begin<Particle_T::AttributeNames::forceY>();
406 auto *
const __restrict fz2ptr = soa2.template begin<Particle_T::AttributeNames::forceZ>();
407 [[maybe_unused]]
auto *
const __restrict typeptr1 = soa1.template begin<Particle_T::AttributeNames::typeId>();
408 [[maybe_unused]]
auto *
const __restrict typeptr2 = soa2.template begin<Particle_T::AttributeNames::typeId>();
411 SoAFloatPrecision potentialEnergySum = 0.;
412 SoAFloatPrecision virialSumX = 0.;
413 SoAFloatPrecision virialSumY = 0.;
414 SoAFloatPrecision virialSumZ = 0.;
416 size_t numDistanceCalculationSum = 0;
417 size_t numKernelCallsN3Sum = 0;
418 size_t numKernelCallsNoN3Sum = 0;
419 size_t numGlobalCalcsN3Sum = 0;
420 size_t numGlobalCalcsNoN3Sum = 0;
422 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
423 SoAFloatPrecision shift6 = _shift6;
424 SoAFloatPrecision sigmaSquared = _sigmaSquared;
425 SoAFloatPrecision epsilon24 = _epsilon24;
428 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
429 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
430 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
431 if constexpr (useMixing) {
432 sigmaSquareds.resize(soa2.
size());
433 epsilon24s.resize(soa2.
size());
435 if constexpr (applyShift) {
436 shift6s.resize(soa2.
size());
440 for (
unsigned int i = 0; i < soa1.
size(); ++i) {
441 SoAFloatPrecision fxacc = 0;
442 SoAFloatPrecision fyacc = 0;
443 SoAFloatPrecision fzacc = 0;
445 const auto ownedStateI = ownedStatePtr1[i];
451 if constexpr (useMixing) {
452 for (
unsigned int j = 0; j < soa2.
size(); ++j) {
455 if constexpr (applyShift) {
463#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum)
464 for (
unsigned int j = 0; j < soa2.
size(); ++j) {
465 if constexpr (useMixing) {
466 sigmaSquared = sigmaSquareds[j];
467 epsilon24 = epsilon24s[j];
468 if constexpr (applyShift) {
473 const auto ownedStateJ = ownedStatePtr2[j];
475 const SoAFloatPrecision drx = x1ptr[i] - x2ptr[j];
476 const SoAFloatPrecision dry = y1ptr[i] - y2ptr[j];
477 const SoAFloatPrecision drz = z1ptr[i] - z2ptr[j];
479 const SoAFloatPrecision drx2 = drx * drx;
480 const SoAFloatPrecision dry2 = dry * dry;
481 const SoAFloatPrecision drz2 = drz * drz;
483 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
489 const SoAFloatPrecision invdr2 = 1. / dr2;
490 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
491 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
492 const SoAFloatPrecision lj12 = lj6 * lj6;
493 const SoAFloatPrecision lj12m6 = lj12 - lj6;
494 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
496 const SoAFloatPrecision fx = drx * fac;
497 const SoAFloatPrecision fy = dry * fac;
498 const SoAFloatPrecision fz = drz * fac;
509 if constexpr (countFLOPs) {
511 if constexpr (newton3) {
512 numKernelCallsN3Sum += mask;
514 numKernelCallsNoN3Sum += mask;
518 if constexpr (calculateGlobals) {
519 SoAFloatPrecision virialx = drx * fx;
520 SoAFloatPrecision virialy = dry * fy;
521 SoAFloatPrecision virialz = drz * fz;
522 SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
525 const SoAFloatPrecision energyFactor =
528 potentialEnergySum += potentialEnergy6 * energyFactor;
529 virialSumX += virialx * energyFactor;
530 virialSumY += virialy * energyFactor;
531 virialSumZ += virialz * energyFactor;
533 if constexpr (countFLOPs) {
534 if constexpr (newton3) {
535 numGlobalCalcsN3Sum += mask;
537 numGlobalCalcsNoN3Sum += mask;
546 if constexpr (countFLOPs) {
547 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
548 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
549 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
550 _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
551 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
553 if (calculateGlobals) {
554 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
555 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
556 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
557 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
571 bool newton3)
final {
572 if (soa.size() == 0 or neighborList.empty())
return;
574 SoAFunctorVerletImpl<true>(soa, indexFirst, neighborList);
576 SoAFunctorVerletImpl<false>(soa, indexFirst, neighborList);
589 _epsilon24 = epsilon24;
590 _sigmaSquared = sigmaSquared;
602 return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,
603 Particle_T::AttributeNames::posX,
604 Particle_T::AttributeNames::posY,
605 Particle_T::AttributeNames::posZ,
606 Particle_T::AttributeNames::forceX,
607 Particle_T::AttributeNames::forceY,
608 Particle_T::AttributeNames::forceZ,
609 Particle_T::AttributeNames::typeId,
610 Particle_T::AttributeNames::ownershipState};
617 return std::array<typename Particle_T::AttributeNames, 6>{
618 Particle_T::AttributeNames::id, Particle_T::AttributeNames::posX,
619 Particle_T::AttributeNames::posY, Particle_T::AttributeNames::posZ,
620 Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};
627 return std::array<typename Particle_T::AttributeNames, 3>{
628 Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};
642 _potentialEnergySum = 0.;
643 _virialSum = {0., 0., 0.};
644 _postProcessed =
false;
645 if constexpr (calculateGlobals) {
646 for (
auto &data : _aosThreadDataGlobals) {
650 if constexpr (countFLOPs) {
651 for (
auto &data : _aosThreadDataFLOPs) {
662 using namespace autopas::utils::ArrayMath::literals;
664 if (_postProcessed) {
666 "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");
668 if (calculateGlobals) {
669 for (
const auto &data : _aosThreadDataGlobals) {
670 _potentialEnergySum += data.potentialEnergySum;
671 _virialSum += data.virialSum;
675 _potentialEnergySum *= 0.5;
679 _potentialEnergySum /= 6.;
680 _postProcessed =
true;
682 AutoPasLog(DEBUG,
"Final potential energy {}", _potentialEnergySum);
683 AutoPasLog(DEBUG,
"Final virial {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);
692 if (not calculateGlobals) {
694 "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate "
696 "values, please specify calculateGlobals to be true.");
698 if (not _postProcessed) {
700 "Cannot get potential energy, because endTraversal was not called.");
702 return _potentialEnergySum;
710 if (not calculateGlobals) {
712 "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global "
713 "values, please specify calculateGlobals to be true.");
715 if (not _postProcessed) {
717 "Cannot get virial, because endTraversal was not called.");
719 return _virialSum[0] + _virialSum[1] + _virialSum[2];
759 if constexpr (countFLOPs) {
760 const size_t numDistCallsAcc =
761 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
762 [](
size_t sum,
const auto &data) { return sum + data.numDistCalls; });
763 const size_t numKernelCallsN3Acc =
764 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
765 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsN3; });
766 const size_t numKernelCallsNoN3Acc =
767 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
768 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsNoN3; });
769 const size_t numGlobalCalcsN3Acc =
770 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
771 [](
size_t sum,
const auto &data) { return sum + data.numGlobalCalcsN3; });
772 const size_t numGlobalCalcsNoN3Acc =
773 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
774 [](
size_t sum,
const auto &data) { return sum + data.numGlobalCalcsNoN3; });
776 constexpr size_t numFLOPsPerDistanceCall = 8;
777 constexpr size_t numFLOPsPerN3KernelCall = 18;
778 constexpr size_t numFLOPsPerNoN3KernelCall = 15;
779 constexpr size_t numFLOPsPerN3GlobalCalc = applyShift ? 13 : 12;
780 constexpr size_t numFLOPsPerNoN3GlobalCalc = applyShift ? 9 : 8;
782 return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +
783 numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +
784 numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;
787 return std::numeric_limits<size_t>::max();
792 if constexpr (countFLOPs) {
793 const size_t numDistCallsAcc =
794 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
795 [](
size_t sum,
const auto &data) { return sum + data.numDistCalls; });
796 const size_t numKernelCallsN3Acc =
797 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
798 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsN3; });
799 const size_t numKernelCallsNoN3Acc =
800 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
801 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsNoN3; });
803 return (
static_cast<double>(numKernelCallsNoN3Acc) +
static_cast<double>(numKernelCallsN3Acc)) /
804 (
static_cast<double>(numDistCallsAcc));
807 return std::numeric_limits<double>::quiet_NaN();
812 template <
bool newton3>
815 const auto *
const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
816 const auto *
const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
817 const auto *
const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
819 auto *
const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
820 auto *
const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
821 auto *
const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
822 [[maybe_unused]]
auto *
const __restrict typeptr1 = soa.template begin<Particle_T::AttributeNames::typeId>();
823 [[maybe_unused]]
auto *
const __restrict typeptr2 = soa.template begin<Particle_T::AttributeNames::typeId>();
825 const auto *
const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
827 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
828 SoAFloatPrecision shift6 = _shift6;
829 SoAFloatPrecision sigmaSquared = _sigmaSquared;
830 SoAFloatPrecision epsilon24 = _epsilon24;
832 SoAFloatPrecision potentialEnergySum = 0.;
833 SoAFloatPrecision virialSumX = 0.;
834 SoAFloatPrecision virialSumY = 0.;
835 SoAFloatPrecision virialSumZ = 0.;
838 size_t numDistanceCalculationSum = 0;
839 size_t numKernelCallsN3Sum = 0;
840 size_t numKernelCallsNoN3Sum = 0;
841 size_t numGlobalCalcsN3Sum = 0;
842 size_t numGlobalCalcsNoN3Sum = 0;
844 SoAFloatPrecision fxacc = 0;
845 SoAFloatPrecision fyacc = 0;
846 SoAFloatPrecision fzacc = 0;
847 const size_t neighborListSize = neighborList.size();
848 const size_t *
const __restrict neighborListPtr = neighborList.data();
851 const auto ownedStateI = ownedStatePtr[indexFirst];
867 constexpr size_t vecsize = 16;
870 constexpr size_t vecsize = 12;
876 if (neighborListSize >= vecsize) {
877 alignas(64) std::array<SoAFloatPrecision, vecsize> xtmp, ytmp, ztmp, xArr, yArr, zArr, fxArr, fyArr, fzArr;
878 alignas(64) std::array<autopas::OwnershipState, vecsize> ownedStateArr{};
880 for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
881 xtmp[tmpj] = xptr[indexFirst];
882 ytmp[tmpj] = yptr[indexFirst];
883 ztmp[tmpj] = zptr[indexFirst];
886 for (; joff < neighborListSize - vecsize + 1; joff += vecsize) {
894 if constexpr (useMixing) {
895 for (
size_t j = 0; j < vecsize; j++) {
898 epsilon24s[j] = _PPLibrary->
getMixing24Epsilon(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
899 if constexpr (applyShift) {
900 shift6s[j] = _PPLibrary->
getMixingShift6(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
906#pragma omp simd safelen(vecsize)
907 for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
908 xArr[tmpj] = xptr[neighborListPtr[joff + tmpj]];
909 yArr[tmpj] = yptr[neighborListPtr[joff + tmpj]];
910 zArr[tmpj] = zptr[neighborListPtr[joff + tmpj]];
911 ownedStateArr[tmpj] = ownedStatePtr[neighborListPtr[joff + tmpj]];
914#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum) safelen(vecsize)
915 for (
size_t j = 0; j < vecsize; j++) {
916 if constexpr (useMixing) {
917 sigmaSquared = sigmaSquareds[j];
918 epsilon24 = epsilon24s[j];
919 if constexpr (applyShift) {
925 const auto ownedStateJ = ownedStateArr[j];
927 const SoAFloatPrecision drx = xtmp[j] - xArr[j];
928 const SoAFloatPrecision dry = ytmp[j] - yArr[j];
929 const SoAFloatPrecision drz = ztmp[j] - zArr[j];
931 const SoAFloatPrecision drx2 = drx * drx;
932 const SoAFloatPrecision dry2 = dry * dry;
933 const SoAFloatPrecision drz2 = drz * drz;
935 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
941 const SoAFloatPrecision invdr2 = 1. / dr2;
942 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
943 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
944 const SoAFloatPrecision lj12 = lj6 * lj6;
945 const SoAFloatPrecision lj12m6 = lj12 - lj6;
946 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
948 const SoAFloatPrecision fx = drx * fac;
949 const SoAFloatPrecision fy = dry * fac;
950 const SoAFloatPrecision fz = drz * fac;
961 if constexpr (countFLOPs) {
963 if constexpr (newton3) {
964 numKernelCallsN3Sum += mask;
966 numKernelCallsNoN3Sum += mask;
970 if (calculateGlobals) {
971 SoAFloatPrecision virialx = drx * fx;
972 SoAFloatPrecision virialy = dry * fy;
973 SoAFloatPrecision virialz = drz * fz;
974 SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
978 const SoAFloatPrecision energyFactor =
981 potentialEnergySum += potentialEnergy6 * energyFactor;
982 virialSumX += virialx * energyFactor;
983 virialSumY += virialy * energyFactor;
984 virialSumZ += virialz * energyFactor;
986 if constexpr (countFLOPs) {
987 if constexpr (newton3) {
988 numGlobalCalcsN3Sum += mask;
990 numGlobalCalcsNoN3Sum += mask;
997#pragma omp simd safelen(vecsize)
998 for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
999 const size_t j = neighborListPtr[joff + tmpj];
1000 fxptr[j] -= fxArr[tmpj];
1001 fyptr[j] -= fyArr[tmpj];
1002 fzptr[j] -= fzArr[tmpj];
1008 for (
size_t jNeighIndex = joff; jNeighIndex < neighborListSize; ++jNeighIndex) {
1009 size_t j = neighborList[jNeighIndex];
1010 if (indexFirst == j)
continue;
1011 if constexpr (useMixing) {
1014 if constexpr (applyShift) {
1015 shift6 = _PPLibrary->
getMixingShift6(typeptr1[indexFirst], typeptr2[j]);
1019 const auto ownedStateJ = ownedStatePtr[j];
1024 const SoAFloatPrecision drx = xptr[indexFirst] - xptr[j];
1025 const SoAFloatPrecision dry = yptr[indexFirst] - yptr[j];
1026 const SoAFloatPrecision drz = zptr[indexFirst] - zptr[j];
1028 const SoAFloatPrecision drx2 = drx * drx;
1029 const SoAFloatPrecision dry2 = dry * dry;
1030 const SoAFloatPrecision drz2 = drz * drz;
1032 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
1034 if constexpr (countFLOPs) {
1035 numDistanceCalculationSum += 1;
1038 if (dr2 > cutoffSquared) {
1042 const SoAFloatPrecision invdr2 = 1. / dr2;
1043 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
1044 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
1045 const SoAFloatPrecision lj12 = lj6 * lj6;
1046 const SoAFloatPrecision lj12m6 = lj12 - lj6;
1047 const SoAFloatPrecision fac = epsilon24 * (lj12 + lj12m6) * invdr2;
1049 const SoAFloatPrecision fx = drx * fac;
1050 const SoAFloatPrecision fy = dry * fac;
1051 const SoAFloatPrecision fz = drz * fac;
1062 if constexpr (countFLOPs) {
1063 if constexpr (newton3) {
1064 numKernelCallsN3Sum += 1;
1066 numKernelCallsNoN3Sum += 1;
1070 if (calculateGlobals) {
1071 SoAFloatPrecision virialx = drx * fx;
1072 SoAFloatPrecision virialy = dry * fy;
1073 SoAFloatPrecision virialz = drz * fz;
1074 SoAFloatPrecision potentialEnergy6 = (epsilon24 * lj12m6 + shift6);
1077 const SoAFloatPrecision energyFactor =
1080 potentialEnergySum += potentialEnergy6 * energyFactor;
1081 virialSumX += virialx * energyFactor;
1082 virialSumY += virialy * energyFactor;
1083 virialSumZ += virialz * energyFactor;
1085 if constexpr (countFLOPs) {
1086 if constexpr (newton3) {
1087 ++numGlobalCalcsN3Sum;
1089 ++numGlobalCalcsNoN3Sum;
1095 if (fxacc != 0 or fyacc != 0 or fzacc != 0) {
1096 fxptr[indexFirst] += fxacc;
1097 fyptr[indexFirst] += fyacc;
1098 fzptr[indexFirst] += fzacc;
1101 if constexpr (countFLOPs) {
1102 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
1103 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
1104 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
1105 _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
1106 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
1109 if (calculateGlobals) {
1110 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
1111 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
1112 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
1113 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
1121 class AoSThreadDataGlobals {
1123 AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}
1125 virialSum = {0., 0., 0.};
1126 potentialEnergySum = 0.;
1130 std::array<double, 3> virialSum;
1131 double potentialEnergySum;
1135 double __remainingTo64[(64 - 4 *
sizeof(double)) /
sizeof(double)];
1145 class AoSThreadDataFLOPs {
1147 AoSThreadDataFLOPs() : __remainingTo64{} {}
1153 numKernelCallsNoN3 = 0;
1154 numKernelCallsN3 = 0;
1156 numGlobalCalcsNoN3 = 0;
1157 numGlobalCalcsN3 = 0;
1164 size_t numKernelCallsNoN3 = 0;
1170 size_t numKernelCallsN3 = 0;
1176 size_t numDistCalls = 0;
1182 size_t numGlobalCalcsN3 = 0;
1188 size_t numGlobalCalcsNoN3 = 0;
1194 double __remainingTo64[(64 - 5 *
sizeof(size_t)) /
sizeof(size_t)];
1198 static_assert(
sizeof(AoSThreadDataGlobals) % 64 == 0,
"AoSThreadDataGlobals has wrong size");
1199 static_assert(
sizeof(AoSThreadDataFLOPs) % 64 == 0,
"AoSThreadDataFLOPs has wrong size");
1201 const double _cutoffSquared;
1203 double _epsilon24, _sigmaSquared, _shift6 = 0;
1208 double _potentialEnergySum;
1211 std::array<double, 3> _virialSum;
1214 std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals{};
1215 std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};
1218 bool _postProcessed;
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28
floatType getMixingShift6(intType i, intType j) const
Returns precomputed mixed shift * 6 for one pair of site types.
Definition: ParticlePropertiesLibrary.h:262
floatType getMixingSigmaSquared(intType i, intType j) const
Returns precomputed mixed squared sigma for one pair of site types.
Definition: ParticlePropertiesLibrary.h:252
floatType getMixing24Epsilon(intType i, intType j) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:224
static double calcShift6(double epsilon24, double sigmaSquared, double cutoffSquared)
Calculate the shift multiplied 6 of the lennard jones potential from given cutoff,...
Definition: ParticlePropertiesLibrary.h:576
auto getLJMixingData(intType i, intType j) const
Get complete mixing data for one pair of LJ site types.
Definition: ParticlePropertiesLibrary.h:234
AlignedAllocator class.
Definition: AlignedAllocator.h:29
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31
PairwiseFunctor(double cutoff)
Constructor.
Definition: PairwiseFunctor.h:42
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23
size_t size() const
Returns the number of particles in the view.
Definition: SoAView.h:83
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:116
A functor to handle lennard-jones interactions between two particles (molecules).
Definition: LJFunctor.h:44
static constexpr bool getMixing()
Definition: LJFunctor.h:635
double getHitRate() const override
Get the hit rate.
Definition: LJFunctor.h:791
void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final
PairwiseFunctor for arrays of structures (AoS).
Definition: LJFunctor.h:123
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: LJFunctor.h:115
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: LJFunctor.h:119
void initTraversal() final
Reset the global values.
Definition: LJFunctor.h:641
LJFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: LJFunctor.h:91
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: LJFunctor.h:601
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: LJFunctor.h:661
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: LJFunctor.h:113
LJFunctor()=delete
Deleted default constructor.
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: LJFunctor.h:616
void SoAFunctorSingle(autopas::SoAView< SoAArraysType > soa, bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:204
std::string getName() final
Returns name of functor.
Definition: LJFunctor.h:111
LJFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: LJFunctor.h:103
void SoAFunctorPair(autopas::SoAView< SoAArraysType > soa1, autopas::SoAView< SoAArraysType > soa2, const bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:369
void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared)
Sets the particle properties constants for this functor.
Definition: LJFunctor.h:588
void SoAFunctorVerlet(autopas::SoAView< SoAArraysType > soa, const size_t indexFirst, const std::vector< size_t, autopas::AlignedAllocator< size_t > > &neighborList, bool newton3) final
PairwiseFunctor for structure of arrays (SoA) for neighbor lists.
Definition: LJFunctor.h:569
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: LJFunctor.h:758
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: LJFunctor.h:626
double getPotentialEnergy()
Get the potential Energy.
Definition: LJFunctor.h:691
double getVirial()
Get the virial.
Definition: LJFunctor.h:709
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:34
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144
@ dummy
Dummy or deleted state, a particle with this state is not an actual particle!
@ owned
Owned state, a particle with this state is an actual particle and owned by the current AutoPas object...
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:23
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132
constexpr unsigned int DEFAULT_CACHE_LINE_SIZE
Default size for a cache line.
Definition: AlignedAllocator.h:21