36template <
class Particle_T,
bool applyShift =
false,
bool useMixing =
false,
38 bool countFLOPs =
false,
bool relevantForTuning =
true>
41 calculateGlobals, countFLOPs, relevantForTuning>> {
45 using SoAArraysType =
typename Particle_T::SoAArraysType;
50 using SoAFloatPrecision =
typename Particle_T::ParticleSoAFloatPrecision;
64 explicit LJFunctor(
double cutoff,
void * )
66 countFLOPs, relevantForTuning>>(cutoff),
67 _cutoffSquared{cutoff * cutoff},
68 _potentialEnergySum{0.},
69 _virialSum{0., 0., 0.},
70 _postProcessed{false} {
71 if constexpr (calculateGlobals) {
74 if constexpr (countFLOPs) {
89 static_assert(not useMixing,
90 "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set "
102 static_assert(useMixing,
103 "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor "
104 "or set mixing to true.");
105 _PPLibrary = &particlePropertiesLibrary;
108 std::string
getName() final {
return "LJFunctorAutoVec"; }
113 return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;
117 return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;
120 void AoSFunctor(Particle_T &i, Particle_T &j,
bool newton3)
final {
121 using namespace autopas::utils::ArrayMath::literals;
123 if (i.isDummy() or j.isDummy()) {
129 if constexpr (countFLOPs) {
130 ++_aosThreadDataFLOPs[threadnum].numDistCalls;
133 auto sigmaSquared = _sigmaSquared;
134 auto epsilon24 = _epsilon24;
135 auto shift6 = _shift6;
136 if constexpr (useMixing) {
139 if constexpr (applyShift) {
143 auto dr = i.getR() - j.getR();
146 if (dr2 > _cutoffSquared) {
150 double invdr2 = 1. / dr2;
151 double lj6 = sigmaSquared * invdr2;
152 lj6 = lj6 * lj6 * lj6;
153 double lj12 = lj6 * lj6;
154 double lj12m6 = lj12 - lj6;
155 double fac = epsilon24 * (lj12 + lj12m6) * invdr2;
163 if constexpr (countFLOPs) {
165 ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;
167 ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;
171 if constexpr (calculateGlobals) {
175 auto virial = dr * f;
176 double potentialEnergy6 = epsilon24 * lj12m6 + shift6;
179 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
180 _aosThreadDataGlobals[threadnum].virialSum += virial;
183 if (newton3 and j.isOwned()) {
184 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
185 _aosThreadDataGlobals[threadnum].virialSum += virial;
187 if constexpr (countFLOPs) {
189 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;
191 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;
202 if (soa.size() == 0)
return;
206 const auto *
const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
207 const auto *
const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
208 const auto *
const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
209 const auto *
const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
211 SoAFloatPrecision *
const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
212 SoAFloatPrecision *
const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
213 SoAFloatPrecision *
const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
215 [[maybe_unused]]
auto *
const __restrict typeptr = soa.template begin<Particle_T::AttributeNames::typeId>();
217 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
219 SoAFloatPrecision potentialEnergySum = 0.;
220 SoAFloatPrecision virialSumX = 0.;
221 SoAFloatPrecision virialSumY = 0.;
222 SoAFloatPrecision virialSumZ = 0.;
224 size_t numDistanceCalculationSum = 0;
225 size_t numKernelCallsN3Sum = 0;
226 size_t numKernelCallsNoN3Sum = 0;
227 size_t numGlobalCalcsSum = 0;
229 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
230 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
231 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
232 if constexpr (useMixing) {
235 sigmaSquareds.resize(soa.size());
236 epsilon24s.resize(soa.size());
238 if constexpr (applyShift) {
239 shift6s.resize(soa.size());
243 const SoAFloatPrecision const_shift6 = _shift6;
244 const SoAFloatPrecision const_sigmaSquared = _sigmaSquared;
245 const SoAFloatPrecision const_epsilon24 = _epsilon24;
247 for (
unsigned int i = 0; i < soa.size(); ++i) {
248 const auto ownedStateI = ownedStatePtr[i];
253 SoAFloatPrecision fxacc = 0.;
254 SoAFloatPrecision fyacc = 0.;
255 SoAFloatPrecision fzacc = 0.;
257 if constexpr (useMixing) {
258 for (
unsigned int j = 0; j < soa.size(); ++j) {
260 sigmaSquareds[j] = mixingData.sigmaSquared;
261 epsilon24s[j] = mixingData.epsilon24;
262 if constexpr (applyShift) {
263 shift6s[j] = mixingData.shift6;
270#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsSum)
271 for (
unsigned int j = i + 1; j < soa.size(); ++j) {
272 SoAFloatPrecision shift6 = const_shift6;
273 SoAFloatPrecision sigmaSquared = const_sigmaSquared;
274 SoAFloatPrecision epsilon24 = const_epsilon24;
275 if constexpr (useMixing) {
276 sigmaSquared = sigmaSquareds[j];
277 epsilon24 = epsilon24s[j];
278 if constexpr (applyShift) {
283 const auto ownedStateJ = ownedStatePtr[j];
285 const SoAFloatPrecision drx = xptr[i] - xptr[j];
286 const SoAFloatPrecision dry = yptr[i] - yptr[j];
287 const SoAFloatPrecision drz = zptr[i] - zptr[j];
289 const SoAFloatPrecision drx2 = drx * drx;
290 const SoAFloatPrecision dry2 = dry * dry;
291 const SoAFloatPrecision drz2 = drz * drz;
293 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
299 const SoAFloatPrecision invdr2 = 1. / dr2;
300 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
301 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
302 const SoAFloatPrecision lj12 = lj6 * lj6;
303 const SoAFloatPrecision lj12m6 = lj12 - lj6;
304 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
306 const SoAFloatPrecision fx = drx * fac;
307 const SoAFloatPrecision fy = dry * fac;
308 const SoAFloatPrecision fz = drz * fac;
319 if constexpr (countFLOPs) {
321 numKernelCallsN3Sum += mask;
324 if (calculateGlobals) {
325 const SoAFloatPrecision virialx = drx * fx;
326 const SoAFloatPrecision virialy = dry * fy;
327 const SoAFloatPrecision virialz = drz * fz;
328 const SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
333 potentialEnergySum += potentialEnergy6 * energyFactor;
335 virialSumX += virialx * energyFactor;
336 virialSumY += virialy * energyFactor;
337 virialSumZ += virialz * energyFactor;
339 if constexpr (countFLOPs) {
340 numGlobalCalcsSum += mask;
349 if constexpr (countFLOPs) {
350 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
351 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
352 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
353 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsSum;
355 if (calculateGlobals) {
356 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
357 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
358 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
359 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
367 const bool newton3)
final {
369 SoAFunctorPairImpl<true>(soa1, soa2);
371 SoAFunctorPairImpl<false>(soa1, soa2);
383 template <
bool newton3>
385 if (soa1.
size() == 0 || soa2.
size() == 0)
return;
389 const auto *
const __restrict x1ptr = soa1.template begin<Particle_T::AttributeNames::posX>();
390 const auto *
const __restrict y1ptr = soa1.template begin<Particle_T::AttributeNames::posY>();
391 const auto *
const __restrict z1ptr = soa1.template begin<Particle_T::AttributeNames::posZ>();
392 const auto *
const __restrict x2ptr = soa2.template begin<Particle_T::AttributeNames::posX>();
393 const auto *
const __restrict y2ptr = soa2.template begin<Particle_T::AttributeNames::posY>();
394 const auto *
const __restrict z2ptr = soa2.template begin<Particle_T::AttributeNames::posZ>();
395 const auto *
const __restrict ownedStatePtr1 = soa1.template begin<Particle_T::AttributeNames::ownershipState>();
396 const auto *
const __restrict ownedStatePtr2 = soa2.template begin<Particle_T::AttributeNames::ownershipState>();
398 auto *
const __restrict fx1ptr = soa1.template begin<Particle_T::AttributeNames::forceX>();
399 auto *
const __restrict fy1ptr = soa1.template begin<Particle_T::AttributeNames::forceY>();
400 auto *
const __restrict fz1ptr = soa1.template begin<Particle_T::AttributeNames::forceZ>();
401 auto *
const __restrict fx2ptr = soa2.template begin<Particle_T::AttributeNames::forceX>();
402 auto *
const __restrict fy2ptr = soa2.template begin<Particle_T::AttributeNames::forceY>();
403 auto *
const __restrict fz2ptr = soa2.template begin<Particle_T::AttributeNames::forceZ>();
404 [[maybe_unused]]
auto *
const __restrict typeptr1 = soa1.template begin<Particle_T::AttributeNames::typeId>();
405 [[maybe_unused]]
auto *
const __restrict typeptr2 = soa2.template begin<Particle_T::AttributeNames::typeId>();
408 SoAFloatPrecision potentialEnergySum = 0.;
409 SoAFloatPrecision virialSumX = 0.;
410 SoAFloatPrecision virialSumY = 0.;
411 SoAFloatPrecision virialSumZ = 0.;
413 size_t numDistanceCalculationSum = 0;
414 size_t numKernelCallsN3Sum = 0;
415 size_t numKernelCallsNoN3Sum = 0;
416 size_t numGlobalCalcsN3Sum = 0;
417 size_t numGlobalCalcsNoN3Sum = 0;
419 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
420 SoAFloatPrecision shift6 = _shift6;
421 SoAFloatPrecision sigmaSquared = _sigmaSquared;
422 SoAFloatPrecision epsilon24 = _epsilon24;
425 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
426 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
427 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
428 if constexpr (useMixing) {
429 sigmaSquareds.resize(soa2.
size());
430 epsilon24s.resize(soa2.
size());
432 if constexpr (applyShift) {
433 shift6s.resize(soa2.
size());
437 for (
unsigned int i = 0; i < soa1.
size(); ++i) {
438 SoAFloatPrecision fxacc = 0;
439 SoAFloatPrecision fyacc = 0;
440 SoAFloatPrecision fzacc = 0;
442 const auto ownedStateI = ownedStatePtr1[i];
448 if constexpr (useMixing) {
449 for (
unsigned int j = 0; j < soa2.
size(); ++j) {
452 if constexpr (applyShift) {
460#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum)
461 for (
unsigned int j = 0; j < soa2.
size(); ++j) {
462 if constexpr (useMixing) {
463 sigmaSquared = sigmaSquareds[j];
464 epsilon24 = epsilon24s[j];
465 if constexpr (applyShift) {
470 const auto ownedStateJ = ownedStatePtr2[j];
472 const SoAFloatPrecision drx = x1ptr[i] - x2ptr[j];
473 const SoAFloatPrecision dry = y1ptr[i] - y2ptr[j];
474 const SoAFloatPrecision drz = z1ptr[i] - z2ptr[j];
476 const SoAFloatPrecision drx2 = drx * drx;
477 const SoAFloatPrecision dry2 = dry * dry;
478 const SoAFloatPrecision drz2 = drz * drz;
480 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
486 const SoAFloatPrecision invdr2 = 1. / dr2;
487 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
488 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
489 const SoAFloatPrecision lj12 = lj6 * lj6;
490 const SoAFloatPrecision lj12m6 = lj12 - lj6;
491 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
493 const SoAFloatPrecision fx = drx * fac;
494 const SoAFloatPrecision fy = dry * fac;
495 const SoAFloatPrecision fz = drz * fac;
506 if constexpr (countFLOPs) {
508 if constexpr (newton3) {
509 numKernelCallsN3Sum += mask;
511 numKernelCallsNoN3Sum += mask;
515 if constexpr (calculateGlobals) {
516 SoAFloatPrecision virialx = drx * fx;
517 SoAFloatPrecision virialy = dry * fy;
518 SoAFloatPrecision virialz = drz * fz;
519 SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
522 const SoAFloatPrecision energyFactor =
525 potentialEnergySum += potentialEnergy6 * energyFactor;
526 virialSumX += virialx * energyFactor;
527 virialSumY += virialy * energyFactor;
528 virialSumZ += virialz * energyFactor;
530 if constexpr (countFLOPs) {
531 if constexpr (newton3) {
532 numGlobalCalcsN3Sum += mask;
534 numGlobalCalcsNoN3Sum += mask;
543 if constexpr (countFLOPs) {
544 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
545 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
546 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
547 _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
548 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
550 if (calculateGlobals) {
551 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
552 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
553 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
554 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
568 bool newton3)
final {
569 if (soa.size() == 0 or neighborList.empty())
return;
571 SoAFunctorVerletImpl<true>(soa, indexFirst, neighborList);
573 SoAFunctorVerletImpl<false>(soa, indexFirst, neighborList);
586 _epsilon24 = epsilon24;
587 _sigmaSquared = sigmaSquared;
599 return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,
600 Particle_T::AttributeNames::posX,
601 Particle_T::AttributeNames::posY,
602 Particle_T::AttributeNames::posZ,
603 Particle_T::AttributeNames::forceX,
604 Particle_T::AttributeNames::forceY,
605 Particle_T::AttributeNames::forceZ,
606 Particle_T::AttributeNames::typeId,
607 Particle_T::AttributeNames::ownershipState};
614 return std::array<typename Particle_T::AttributeNames, 6>{
615 Particle_T::AttributeNames::id, Particle_T::AttributeNames::posX,
616 Particle_T::AttributeNames::posY, Particle_T::AttributeNames::posZ,
617 Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};
624 return std::array<typename Particle_T::AttributeNames, 3>{
625 Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};
639 _potentialEnergySum = 0.;
640 _virialSum = {0., 0., 0.};
641 _postProcessed =
false;
642 if constexpr (calculateGlobals) {
643 for (
auto &data : _aosThreadDataGlobals) {
647 if constexpr (countFLOPs) {
648 for (
auto &data : _aosThreadDataFLOPs) {
659 using namespace autopas::utils::ArrayMath::literals;
661 if (_postProcessed) {
663 "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");
665 if (calculateGlobals) {
666 for (
const auto &data : _aosThreadDataGlobals) {
667 _potentialEnergySum += data.potentialEnergySum;
668 _virialSum += data.virialSum;
672 _potentialEnergySum *= 0.5;
676 _potentialEnergySum /= 6.;
677 _postProcessed =
true;
679 AutoPasLog(DEBUG,
"Final potential energy {}", _potentialEnergySum);
680 AutoPasLog(DEBUG,
"Final virial {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);
689 if (not calculateGlobals) {
691 "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate "
693 "values, please specify calculateGlobals to be true.");
695 if (not _postProcessed) {
697 "Cannot get potential energy, because endTraversal was not called.");
699 return _potentialEnergySum;
707 if (not calculateGlobals) {
709 "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global "
710 "values, please specify calculateGlobals to be true.");
712 if (not _postProcessed) {
714 "Cannot get virial, because endTraversal was not called.");
716 return _virialSum[0] + _virialSum[1] + _virialSum[2];
756 if constexpr (countFLOPs) {
757 const size_t numDistCallsAcc =
758 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
759 [](
size_t sum,
const auto &data) { return sum + data.numDistCalls; });
760 const size_t numKernelCallsN3Acc =
761 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
762 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsN3; });
763 const size_t numKernelCallsNoN3Acc =
764 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
765 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsNoN3; });
766 const size_t numGlobalCalcsN3Acc =
767 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
768 [](
size_t sum,
const auto &data) { return sum + data.numGlobalCalcsN3; });
769 const size_t numGlobalCalcsNoN3Acc =
770 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
771 [](
size_t sum,
const auto &data) { return sum + data.numGlobalCalcsNoN3; });
773 constexpr size_t numFLOPsPerDistanceCall = 8;
774 constexpr size_t numFLOPsPerN3KernelCall = 18;
775 constexpr size_t numFLOPsPerNoN3KernelCall = 15;
776 constexpr size_t numFLOPsPerN3GlobalCalc = applyShift ? 13 : 12;
777 constexpr size_t numFLOPsPerNoN3GlobalCalc = applyShift ? 9 : 8;
779 return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +
780 numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +
781 numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;
784 return std::numeric_limits<size_t>::max();
789 if constexpr (countFLOPs) {
790 const size_t numDistCallsAcc =
791 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
792 [](
size_t sum,
const auto &data) { return sum + data.numDistCalls; });
793 const size_t numKernelCallsN3Acc =
794 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
795 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsN3; });
796 const size_t numKernelCallsNoN3Acc =
797 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
798 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsNoN3; });
800 return (
static_cast<double>(numKernelCallsNoN3Acc) +
static_cast<double>(numKernelCallsN3Acc)) /
801 (
static_cast<double>(numDistCallsAcc));
804 return std::numeric_limits<double>::quiet_NaN();
809 template <
bool newton3>
812 const auto *
const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
813 const auto *
const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
814 const auto *
const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
816 auto *
const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
817 auto *
const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
818 auto *
const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
819 [[maybe_unused]]
auto *
const __restrict typeptr1 = soa.template begin<Particle_T::AttributeNames::typeId>();
820 [[maybe_unused]]
auto *
const __restrict typeptr2 = soa.template begin<Particle_T::AttributeNames::typeId>();
822 const auto *
const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
824 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
825 SoAFloatPrecision shift6 = _shift6;
826 SoAFloatPrecision sigmaSquared = _sigmaSquared;
827 SoAFloatPrecision epsilon24 = _epsilon24;
829 SoAFloatPrecision potentialEnergySum = 0.;
830 SoAFloatPrecision virialSumX = 0.;
831 SoAFloatPrecision virialSumY = 0.;
832 SoAFloatPrecision virialSumZ = 0.;
835 size_t numDistanceCalculationSum = 0;
836 size_t numKernelCallsN3Sum = 0;
837 size_t numKernelCallsNoN3Sum = 0;
838 size_t numGlobalCalcsN3Sum = 0;
839 size_t numGlobalCalcsNoN3Sum = 0;
841 SoAFloatPrecision fxacc = 0;
842 SoAFloatPrecision fyacc = 0;
843 SoAFloatPrecision fzacc = 0;
844 const size_t neighborListSize = neighborList.size();
845 const size_t *
const __restrict neighborListPtr = neighborList.data();
848 const auto ownedStateI = ownedStatePtr[indexFirst];
864 constexpr size_t vecsize = 16;
867 constexpr size_t vecsize = 12;
873 if (neighborListSize >= vecsize) {
874 alignas(64) std::array<SoAFloatPrecision, vecsize> xtmp, ytmp, ztmp, xArr, yArr, zArr, fxArr, fyArr, fzArr;
875 alignas(64) std::array<autopas::OwnershipState, vecsize> ownedStateArr{};
877 for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
878 xtmp[tmpj] = xptr[indexFirst];
879 ytmp[tmpj] = yptr[indexFirst];
880 ztmp[tmpj] = zptr[indexFirst];
883 for (; joff < neighborListSize - vecsize + 1; joff += vecsize) {
891 if constexpr (useMixing) {
892 for (
size_t j = 0; j < vecsize; j++) {
895 epsilon24s[j] = _PPLibrary->
getMixing24Epsilon(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
896 if constexpr (applyShift) {
897 shift6s[j] = _PPLibrary->
getMixingShift6(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
903#pragma omp simd safelen(vecsize)
904 for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
905 xArr[tmpj] = xptr[neighborListPtr[joff + tmpj]];
906 yArr[tmpj] = yptr[neighborListPtr[joff + tmpj]];
907 zArr[tmpj] = zptr[neighborListPtr[joff + tmpj]];
908 ownedStateArr[tmpj] = ownedStatePtr[neighborListPtr[joff + tmpj]];
911#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum) safelen(vecsize)
912 for (
size_t j = 0; j < vecsize; j++) {
913 if constexpr (useMixing) {
914 sigmaSquared = sigmaSquareds[j];
915 epsilon24 = epsilon24s[j];
916 if constexpr (applyShift) {
922 const auto ownedStateJ = ownedStateArr[j];
924 const SoAFloatPrecision drx = xtmp[j] - xArr[j];
925 const SoAFloatPrecision dry = ytmp[j] - yArr[j];
926 const SoAFloatPrecision drz = ztmp[j] - zArr[j];
928 const SoAFloatPrecision drx2 = drx * drx;
929 const SoAFloatPrecision dry2 = dry * dry;
930 const SoAFloatPrecision drz2 = drz * drz;
932 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
938 const SoAFloatPrecision invdr2 = 1. / dr2;
939 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
940 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
941 const SoAFloatPrecision lj12 = lj6 * lj6;
942 const SoAFloatPrecision lj12m6 = lj12 - lj6;
943 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
945 const SoAFloatPrecision fx = drx * fac;
946 const SoAFloatPrecision fy = dry * fac;
947 const SoAFloatPrecision fz = drz * fac;
958 if constexpr (countFLOPs) {
960 if constexpr (newton3) {
961 numKernelCallsN3Sum += mask;
963 numKernelCallsNoN3Sum += mask;
967 if (calculateGlobals) {
968 SoAFloatPrecision virialx = drx * fx;
969 SoAFloatPrecision virialy = dry * fy;
970 SoAFloatPrecision virialz = drz * fz;
971 SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
975 const SoAFloatPrecision energyFactor =
978 potentialEnergySum += potentialEnergy6 * energyFactor;
979 virialSumX += virialx * energyFactor;
980 virialSumY += virialy * energyFactor;
981 virialSumZ += virialz * energyFactor;
983 if constexpr (countFLOPs) {
984 if constexpr (newton3) {
985 numGlobalCalcsN3Sum += mask;
987 numGlobalCalcsNoN3Sum += mask;
994#pragma omp simd safelen(vecsize)
995 for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
996 const size_t j = neighborListPtr[joff + tmpj];
997 fxptr[j] -= fxArr[tmpj];
998 fyptr[j] -= fyArr[tmpj];
999 fzptr[j] -= fzArr[tmpj];
1005 for (
size_t jNeighIndex = joff; jNeighIndex < neighborListSize; ++jNeighIndex) {
1006 size_t j = neighborList[jNeighIndex];
1007 if (indexFirst == j)
continue;
1008 if constexpr (useMixing) {
1011 if constexpr (applyShift) {
1012 shift6 = _PPLibrary->
getMixingShift6(typeptr1[indexFirst], typeptr2[j]);
1016 const auto ownedStateJ = ownedStatePtr[j];
1021 const SoAFloatPrecision drx = xptr[indexFirst] - xptr[j];
1022 const SoAFloatPrecision dry = yptr[indexFirst] - yptr[j];
1023 const SoAFloatPrecision drz = zptr[indexFirst] - zptr[j];
1025 const SoAFloatPrecision drx2 = drx * drx;
1026 const SoAFloatPrecision dry2 = dry * dry;
1027 const SoAFloatPrecision drz2 = drz * drz;
1029 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
1031 if constexpr (countFLOPs) {
1032 numDistanceCalculationSum += 1;
1035 if (dr2 > cutoffSquared) {
1039 const SoAFloatPrecision invdr2 = 1. / dr2;
1040 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
1041 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
1042 const SoAFloatPrecision lj12 = lj6 * lj6;
1043 const SoAFloatPrecision lj12m6 = lj12 - lj6;
1044 const SoAFloatPrecision fac = epsilon24 * (lj12 + lj12m6) * invdr2;
1046 const SoAFloatPrecision fx = drx * fac;
1047 const SoAFloatPrecision fy = dry * fac;
1048 const SoAFloatPrecision fz = drz * fac;
1059 if constexpr (countFLOPs) {
1060 if constexpr (newton3) {
1061 numKernelCallsN3Sum += 1;
1063 numKernelCallsNoN3Sum += 1;
1067 if (calculateGlobals) {
1068 SoAFloatPrecision virialx = drx * fx;
1069 SoAFloatPrecision virialy = dry * fy;
1070 SoAFloatPrecision virialz = drz * fz;
1071 SoAFloatPrecision potentialEnergy6 = (epsilon24 * lj12m6 + shift6);
1074 const SoAFloatPrecision energyFactor =
1077 potentialEnergySum += potentialEnergy6 * energyFactor;
1078 virialSumX += virialx * energyFactor;
1079 virialSumY += virialy * energyFactor;
1080 virialSumZ += virialz * energyFactor;
1082 if constexpr (countFLOPs) {
1083 if constexpr (newton3) {
1084 ++numGlobalCalcsN3Sum;
1086 ++numGlobalCalcsNoN3Sum;
1092 if (fxacc != 0 or fyacc != 0 or fzacc != 0) {
1093 fxptr[indexFirst] += fxacc;
1094 fyptr[indexFirst] += fyacc;
1095 fzptr[indexFirst] += fzacc;
1098 if constexpr (countFLOPs) {
1099 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
1100 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
1101 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
1102 _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
1103 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
1106 if (calculateGlobals) {
1107 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
1108 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
1109 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
1110 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
1118 class AoSThreadDataGlobals {
1120 AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}
1122 virialSum = {0., 0., 0.};
1123 potentialEnergySum = 0.;
1127 std::array<double, 3> virialSum;
1128 double potentialEnergySum;
1132 double __remainingTo64[(64 - 4 *
sizeof(double)) /
sizeof(double)];
1142 class AoSThreadDataFLOPs {
1144 AoSThreadDataFLOPs() : __remainingTo64{} {}
1150 numKernelCallsNoN3 = 0;
1151 numKernelCallsN3 = 0;
1153 numGlobalCalcsNoN3 = 0;
1154 numGlobalCalcsN3 = 0;
1161 size_t numKernelCallsNoN3 = 0;
1167 size_t numKernelCallsN3 = 0;
1173 size_t numDistCalls = 0;
1179 size_t numGlobalCalcsN3 = 0;
1185 size_t numGlobalCalcsNoN3 = 0;
1191 double __remainingTo64[(64 - 5 *
sizeof(size_t)) /
sizeof(size_t)];
1195 static_assert(
sizeof(AoSThreadDataGlobals) % 64 == 0,
"AoSThreadDataGlobals has wrong size");
1196 static_assert(
sizeof(AoSThreadDataFLOPs) % 64 == 0,
"AoSThreadDataFLOPs has wrong size");
1198 const double _cutoffSquared;
1200 double _epsilon24, _sigmaSquared, _shift6 = 0;
1205 double _potentialEnergySum;
1208 std::array<double, 3> _virialSum;
1211 std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals{};
1212 std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};
1215 bool _postProcessed;
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28
floatType getMixingShift6(intType i, intType j) const
Returns precomputed mixed shift * 6 for one pair of site types.
Definition: ParticlePropertiesLibrary.h:262
floatType getMixingSigmaSquared(intType i, intType j) const
Returns precomputed mixed squared sigma for one pair of site types.
Definition: ParticlePropertiesLibrary.h:252
floatType getMixing24Epsilon(intType i, intType j) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:224
static double calcShift6(double epsilon24, double sigmaSquared, double cutoffSquared)
Calculate the shift multiplied 6 of the lennard jones potential from given cutoff,...
Definition: ParticlePropertiesLibrary.h:575
auto getLJMixingData(intType i, intType j) const
Get complete mixing data for one pair of LJ site types.
Definition: ParticlePropertiesLibrary.h:234
AlignedAllocator class.
Definition: AlignedAllocator.h:29
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31
PairwiseFunctor(double cutoff)
Constructor.
Definition: PairwiseFunctor.h:42
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23
size_t size() const
Returns the number of particles in the view.
Definition: SoAView.h:83
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:115
A functor to handle lennard-jones interactions between two particles (molecules).
Definition: LJFunctor.h:41
static constexpr bool getMixing()
Definition: LJFunctor.h:632
double getHitRate() const override
Get the hit rate.
Definition: LJFunctor.h:788
void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final
PairwiseFunctor for arrays of structures (AoS).
Definition: LJFunctor.h:120
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: LJFunctor.h:112
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: LJFunctor.h:116
void initTraversal() final
Reset the global values.
Definition: LJFunctor.h:638
LJFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: LJFunctor.h:88
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: LJFunctor.h:598
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: LJFunctor.h:658
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: LJFunctor.h:110
LJFunctor()=delete
Deleted default constructor.
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: LJFunctor.h:613
void SoAFunctorSingle(autopas::SoAView< SoAArraysType > soa, bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:201
std::string getName() final
Returns name of functor.
Definition: LJFunctor.h:108
LJFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: LJFunctor.h:100
void SoAFunctorPair(autopas::SoAView< SoAArraysType > soa1, autopas::SoAView< SoAArraysType > soa2, const bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:366
void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared)
Sets the particle properties constants for this functor.
Definition: LJFunctor.h:585
void SoAFunctorVerlet(autopas::SoAView< SoAArraysType > soa, const size_t indexFirst, const std::vector< size_t, autopas::AlignedAllocator< size_t > > &neighborList, bool newton3) final
PairwiseFunctor for structure of arrays (SoA) for neighbor lists.
Definition: LJFunctor.h:566
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: LJFunctor.h:755
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: LJFunctor.h:623
double getPotentialEnergy()
Get the potential Energy.
Definition: LJFunctor.h:688
double getVirial()
Get the virial.
Definition: LJFunctor.h:706
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144
@ dummy
Dummy or deleted state, a particle with this state is not an actual particle!
@ owned
Owned state, a particle with this state is an actual particle and owned by the current AutoPas object...
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:22
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132
constexpr unsigned int DEFAULT_CACHE_LINE_SIZE
Default size for a cache line.
Definition: AlignedAllocator.h:21