10#include <hwy/highway.h>
24namespace highway = hwy::HWY_NAMESPACE;
28constexpr highway::ScalableTag<int64_t>
tag_long;
44using MaskDouble =
decltype(highway::FirstN(tag_double, 1));
46using MaskLong =
decltype(highway::FirstN(tag_long, 2));
61template <
class Particle_T,
bool applyShift =
false,
bool useMixing =
false,
63 bool countFLOPs =
false,
bool relevantForTuning =
true>
67 calculateGlobals, countFLOPs, relevantForTuning>> {
68 using SoAArraysType = Particle_T::SoAArraysType;
83 particlePropertiesLibrary = std::nullopt)
85 _cutoffSquareAoS{cutoff * cutoff},
86 _PPLibrary{particlePropertiesLibrary} {
87 if (calculateGlobals) {
91 if constexpr (countFLOPs) {
92 AutoPasLog(DEBUG,
"Using LJFunctorHWY with countFLOPs but FLOP counting is not implemented.");
95 if constexpr (useMixing) {
96 if (not _PPLibrary.has_value()) {
97 throw std::runtime_error(
"Mixing is enabled but no ParticlePropertiesLibrary was provided!");
100 if (_PPLibrary.has_value()) {
101 throw std::runtime_error(
"Mixing is disabled but a ParticlePropertiesLibrary was provided!");
106 std::string
getName() final {
return "LJFunctorHWY"; }
111 return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;
115 return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;
126 return std::ranges::find(_vecPatternsAllowed, vecPattern) != _vecPatternsAllowed.end();
132 inline void AoSFunctor(Particle_T &i, Particle_T &j,
bool newton3)
final {
133 using namespace autopas::utils::ArrayMath::literals;
134 if (i.isDummy() or j.isDummy()) {
137 auto sigmaSquare = _sigmaSquareAoS;
138 auto epsilon24 = _epsilon24AoS;
139 auto shift6 = _shift6AoS;
140 if constexpr (useMixing) {
141 sigmaSquare = _PPLibrary->get().getMixingSigmaSquared(i.getTypeId(), j.getTypeId());
142 epsilon24 = _PPLibrary->get().getMixing24Epsilon(i.getTypeId(), j.getTypeId());
143 if constexpr (applyShift) {
144 shift6 = _PPLibrary->get().getMixingShift6(i.getTypeId(), j.getTypeId());
147 const auto dr = i.getR() - j.getR();
150 if (dr2 > _cutoffSquareAoS) {
154 const double invdr2 = 1. / dr2;
155 double lj6 = sigmaSquare * invdr2;
156 lj6 = lj6 * lj6 * lj6;
157 const double lj12 = lj6 * lj6;
158 const double lj12m6 = lj12 - lj6;
159 const double fac = epsilon24 * (lj12 + lj12m6) * invdr2;
160 const auto f = dr * fac;
166 if (calculateGlobals) {
167 const auto virial = dr * f;
168 const double potentialEnergy6 = epsilon24 * lj12m6 + shift6;
173 _aosThreadData[threadnum].potentialEnergySum += potentialEnergy6;
174 _aosThreadData[threadnum].virialSum += virial;
177 if (newton3 and j.isOwned()) {
179 _aosThreadData[threadnum].potentialEnergySum += potentialEnergy6;
180 _aosThreadData[threadnum].virialSum += virial;
190 if (soa.size() == 0)
return;
193 const auto *
const __restrict xPtr = soa.template begin<Particle_T::AttributeNames::posX>();
194 const auto *
const __restrict yPtr = soa.template begin<Particle_T::AttributeNames::posY>();
195 const auto *
const __restrict zPtr = soa.template begin<Particle_T::AttributeNames::posZ>();
197 const auto *
const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
199 auto *
const __restrict fxPtr = soa.template begin<Particle_T::AttributeNames::forceX>();
200 auto *
const __restrict fyPtr = soa.template begin<Particle_T::AttributeNames::forceY>();
201 auto *
const __restrict fzPtr = soa.template begin<Particle_T::AttributeNames::forceZ>();
203 const auto *
const __restrict typeIDptr = soa.template begin<Particle_T::AttributeNames::typeId>();
211 for (std::ptrdiff_t i =
static_cast<std::ptrdiff_t
>(soa.size()) - 1;
212 checkFirstLoopCondition<true, VectorizationPattern::p1xVec>(i, 0);
213 decrementFirstLoop<VectorizationPattern::p1xVec>(i)) {
214 static_assert(std::is_same_v<std::underlying_type_t<autopas::OwnershipState>, int64_t>,
215 "OwnershipStates underlying type should be int64_t!");
217 handleILoopBody<true, true, false, VectorizationPattern::p1xVec>(
218 i, xPtr, yPtr, zPtr, ownedStatePtr, xPtr, yPtr, zPtr, ownedStatePtr, fxPtr, fyPtr, fzPtr, fxPtr, fyPtr, fzPtr,
219 typeIDptr, typeIDptr, virialSumX, virialSumY, virialSumZ, uPotSum, 0, i);
222 if constexpr (calculateGlobals) {
223 computeGlobals(virialSumX, virialSumY, virialSumZ, uPotSum);
233 bool newton3)
final {
234 switch (_vecPattern) {
235 case VectorizationPattern::p1xVec: {
237 SoAFunctorPairImpl<true, VectorizationPattern::p1xVec>(soa1, soa2);
239 SoAFunctorPairImpl<false, VectorizationPattern::p1xVec>(soa1, soa2);
243 case VectorizationPattern::p2xVecDiv2: {
245 SoAFunctorPairImpl<true, VectorizationPattern::p2xVecDiv2>(soa1, soa2);
247 SoAFunctorPairImpl<false, VectorizationPattern::p2xVecDiv2>(soa1, soa2);
251 case VectorizationPattern::pVecDiv2x2: {
253 SoAFunctorPairImpl<true, VectorizationPattern::pVecDiv2x2>(soa1, soa2);
255 SoAFunctorPairImpl<false, VectorizationPattern::pVecDiv2x2>(soa1, soa2);
259 case VectorizationPattern::pVecx1: {
261 SoAFunctorPairImpl<true, VectorizationPattern::pVecx1>(soa1, soa2);
263 SoAFunctorPairImpl<false, VectorizationPattern::pVecx1>(soa1, soa2);
282 template <
bool reversed, VectorizationPattern vecPattern>
283 static constexpr bool checkFirstLoopCondition(
const std::ptrdiff_t i,
const long vecEnd) {
284 if constexpr (reversed) {
285 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
287 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
289 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
291 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
297 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
299 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
300 return i < (vecEnd - 1);
301 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
306 return i < criterion;
307 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
312 return i < criterion;
326 template <VectorizationPattern vecPattern>
327 static constexpr void decrementFirstLoop(std::ptrdiff_t &i) {
328 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
330 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
332 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
334 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
346 template <VectorizationPattern vecPattern>
347 static constexpr void incrementFirstLoop(std::ptrdiff_t &i) {
348 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
350 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
352 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
354 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
368 template <
bool reversed>
369 static constexpr int obtainILoopRemainderLength(std::ptrdiff_t i,
const int vecEnd) {
370 return reversed ? (i < 0 ? 0 : i + 1) : vecEnd - i;
381 template <VectorizationPattern vecPattern>
382 static constexpr bool checkSecondLoopCondition(std::ptrdiff_t i,
size_t j) {
383 std::ptrdiff_t limit = 0;
385 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
388 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
391 limit = i - (i % block);
392 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
395 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
403 return j < static_cast<size_t>(limit);
413 template <VectorizationPattern vecPattern>
414 static constexpr void incrementSecondLoop(std::ptrdiff_t &j) {
415 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
417 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
419 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
421 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
434 template <VectorizationPattern vecPattern>
435 static constexpr int obtainJLoopRemainderLength(
const std::ptrdiff_t j) {
436 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
438 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
440 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
441 return static_cast<int>(j & (1));
442 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
467 template <
bool remainder,
bool reversed, VectorizationPattern vecPattern>
468 static void fillIRegisters(
const size_t i,
const double *
const __restrict xPtr,
const double *
const __restrict yPtr,
469 const double *
const __restrict zPtr,
471 VectorDouble &y1, VectorDouble &z1, MaskDouble &ownedMaskI,
const size_t restI) {
472 VectorLong ownedStateILong = highway::Zero(tag_long);
474 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
475 const auto owned =
static_cast<int64_t
>(ownedStatePtr[i]);
476 ownedStateILong = highway::Set(tag_long, owned);
478 x1 = highway::Set(tag_double, xPtr[i]);
479 y1 = highway::Set(tag_double, yPtr[i]);
480 z1 = highway::Set(tag_double, zPtr[i]);
481 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
482 const auto ownedFirst =
static_cast<int64_t
>(ownedStatePtr[i]);
483 ownedStateILong = highway::Set(tag_long, ownedFirst);
485 x1 = highway::Set(tag_double, xPtr[i]);
486 y1 = highway::Set(tag_double, yPtr[i]);
487 z1 = highway::Set(tag_double, zPtr[i]);
489 VectorLong tmpOwnedI = highway::Zero(tag_long);
494 if constexpr (not remainder) {
495 const auto index = reversed ? i - 1 : i + 1;
496 const auto ownedSecond =
static_cast<int64_t
>(ownedStatePtr[index]);
497 tmpOwnedI = highway::Set(tag_long, ownedSecond);
498 tmpX1 = highway::Set(tag_double, xPtr[index]);
499 tmpY1 = highway::Set(tag_double, yPtr[index]);
500 tmpZ1 = highway::Set(tag_double, zPtr[index]);
503 ownedStateILong = highway::ConcatLowerLower(tag_long, tmpOwnedI, ownedStateILong);
504 x1 = highway::ConcatLowerLower(tag_double, tmpX1, x1);
505 y1 = highway::ConcatLowerLower(tag_double, tmpY1, y1);
506 z1 = highway::ConcatLowerLower(tag_double, tmpZ1, z1);
507 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
508 const int index = reversed ? (remainder ? 0 : i -
_vecLengthDouble / 2 + 1) : i;
511 ownedStateILong = highway::LoadN(tag_long,
reinterpret_cast<const int64_t *
>(&ownedStatePtr[index]), lanes);
513 x1 = highway::LoadN(tag_double, &xPtr[index], lanes);
514 y1 = highway::LoadN(tag_double, &yPtr[index], lanes);
515 z1 = highway::LoadN(tag_double, &zPtr[index], lanes);
517 ownedStateILong = highway::ConcatLowerLower(tag_long, ownedStateILong, ownedStateILong);
518 x1 = highway::ConcatLowerLower(tag_double, x1, x1);
519 y1 = highway::ConcatLowerLower(tag_double, y1, y1);
520 z1 = highway::ConcatLowerLower(tag_double, z1, z1);
521 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
522 const auto index = reversed ? (remainder ? 0 : i -
_vecLengthDouble + 1) : i;
524 if constexpr (remainder) {
525 x1 = highway::LoadN(tag_double, &xPtr[index], restI);
526 y1 = highway::LoadN(tag_double, &yPtr[index], restI);
527 z1 = highway::LoadN(tag_double, &zPtr[index], restI);
529 ownedStateILong = highway::LoadN(tag_long,
reinterpret_cast<const int64_t *
>(&ownedStatePtr[index]), restI);
531 x1 = highway::LoadU(tag_double, &xPtr[index]);
532 y1 = highway::LoadU(tag_double, &yPtr[index]);
533 z1 = highway::LoadU(tag_double, &zPtr[index]);
535 ownedStateILong = highway::LoadU(tag_long,
reinterpret_cast<const int64_t *
>(&ownedStatePtr[index]));
539 MaskLong ownedMaskILong = highway::Ne(ownedStateILong, highway::Zero(tag_long));
542 ownedMaskI = highway::RebindMask(tag_double, ownedMaskILong);
545 template <
bool remainder, VectorizationPattern vecPattern>
546 static void handleNewton3Reduction(
const VectorDouble &fx,
const VectorDouble &fy,
const VectorDouble &fz,
547 double *
const __restrict fx2Ptr,
double *
const __restrict fy2Ptr,
548 double *
const __restrict fz2Ptr,
const size_t j,
const size_t rest) {
549 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
551 remainder ? highway::LoadN(tag_double, &fx2Ptr[j], rest) : highway::LoadU(
tag_double, &fx2Ptr[j]);
553 remainder ? highway::LoadN(tag_double, &fy2Ptr[j], rest) : highway::LoadU(
tag_double, &fy2Ptr[j]);
555 remainder ? highway::LoadN(tag_double, &fz2Ptr[j], rest) : highway::LoadU(
tag_double, &fz2Ptr[j]);
561 remainder ? highway::StoreN(fx2New, tag_double, &fx2Ptr[j], rest)
562 : highway::StoreU(fx2New,
tag_double, &fx2Ptr[j]);
563 remainder ? highway::StoreN(fy2New, tag_double, &fy2Ptr[j], rest)
564 : highway::StoreU(fy2New,
tag_double, &fy2Ptr[j]);
565 remainder ? highway::StoreN(fz2New, tag_double, &fz2Ptr[j], rest)
566 : highway::StoreU(fz2New,
tag_double, &fz2Ptr[j]);
567 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
568 const auto lowerFx = highway::LowerHalf(tag_double_half, fx);
569 const auto lowerFy = highway::LowerHalf(tag_double_half, fy);
570 const auto lowerFz = highway::LowerHalf(tag_double_half, fz);
572 const auto upperFx = highway::UpperHalf(tag_double_half, fx);
573 const auto upperFy = highway::UpperHalf(tag_double_half, fy);
574 const auto upperFz = highway::UpperHalf(tag_double_half, fz);
576 const auto fxCombined = highway::Add(lowerFx, upperFx);
577 const auto fyCombined = highway::Add(lowerFy, upperFy);
578 const auto fzCombined = highway::Add(lowerFz, upperFz);
582 const auto fx2 = highway::LoadN(tag_double_half, &fx2Ptr[j], lanes);
583 const auto fy2 = highway::LoadN(tag_double_half, &fy2Ptr[j], lanes);
584 const auto fz2 = highway::LoadN(tag_double_half, &fz2Ptr[j], lanes);
586 const auto newFx = highway::Sub(fx2, fxCombined);
587 const auto newFy = highway::Sub(fy2, fyCombined);
588 const auto newFz = highway::Sub(fz2, fzCombined);
590 highway::StoreN(newFx, tag_double_half, &fx2Ptr[j], lanes);
591 highway::StoreN(newFy, tag_double_half, &fy2Ptr[j], lanes);
592 highway::StoreN(newFz, tag_double_half, &fz2Ptr[j], lanes);
593 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
594 const auto lowerFx = highway::LowerHalf(tag_double_half, fx);
595 const auto lowerFy = highway::LowerHalf(tag_double_half, fy);
596 const auto lowerFz = highway::LowerHalf(tag_double_half, fz);
598 fx2Ptr[j] -= highway::ReduceSum(tag_double_half, lowerFx);
599 fy2Ptr[j] -= highway::ReduceSum(tag_double_half, lowerFy);
600 fz2Ptr[j] -= highway::ReduceSum(tag_double_half, lowerFz);
602 if constexpr (not remainder) {
603 const auto upperFx = highway::UpperHalf(tag_double_half, fx);
604 const auto upperFy = highway::UpperHalf(tag_double_half, fy);
605 const auto upperFz = highway::UpperHalf(tag_double_half, fz);
607 fx2Ptr[j + 1] -= highway::ReduceSum(tag_double_half, upperFx);
608 fy2Ptr[j + 1] -= highway::ReduceSum(tag_double_half, upperFy);
609 fz2Ptr[j + 1] -= highway::ReduceSum(tag_double_half, upperFz);
611 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
612 fx2Ptr[j] -= highway::ReduceSum(tag_double, fx);
613 fy2Ptr[j] -= highway::ReduceSum(tag_double, fy);
614 fz2Ptr[j] -= highway::ReduceSum(tag_double, fz);
618 template <
bool reversed,
bool remainder, VectorizationPattern vecPattern>
619 static void reduceAccumulatedForce(
const size_t i,
double *
const __restrict fxPtr,
double *
const __restrict fyPtr,
620 double *
const __restrict fzPtr,
const VectorDouble &fxAcc,
621 const VectorDouble &fyAcc,
const VectorDouble &fzAcc,
const int restI) {
622 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
623 fxPtr[i] += highway::ReduceSum(tag_double, fxAcc);
624 fyPtr[i] += highway::ReduceSum(tag_double, fyAcc);
625 fzPtr[i] += highway::ReduceSum(tag_double, fzAcc);
626 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
627 const auto lowerFxAcc = highway::LowerHalf(tag_double_half, fxAcc);
628 const auto lowerFyAcc = highway::LowerHalf(tag_double_half, fyAcc);
629 const auto lowerFzAcc = highway::LowerHalf(tag_double_half, fzAcc);
631 fxPtr[i] += highway::ReduceSum(tag_double_half, lowerFxAcc);
632 fyPtr[i] += highway::ReduceSum(tag_double_half, lowerFyAcc);
633 fzPtr[i] += highway::ReduceSum(tag_double_half, lowerFzAcc);
635 if constexpr (not remainder) {
636 const auto upperFxAcc = highway::UpperHalf(tag_double_half, fxAcc);
637 const auto upperFyAcc = highway::UpperHalf(tag_double_half, fyAcc);
638 const auto upperFzAcc = highway::UpperHalf(tag_double_half, fzAcc);
640 const auto index = reversed ? i - 1 : i + 1;
641 fxPtr[index] += highway::ReduceSum(tag_double_half, upperFxAcc);
642 fyPtr[index] += highway::ReduceSum(tag_double_half, upperFyAcc);
643 fzPtr[index] += highway::ReduceSum(tag_double_half, upperFzAcc);
645 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
646 const auto lowerFxAcc = highway::LowerHalf(tag_double_half, fxAcc);
647 const auto lowerFyAcc = highway::LowerHalf(tag_double_half, fyAcc);
648 const auto lowerFzAcc = highway::LowerHalf(tag_double_half, fzAcc);
650 const auto upperFxAcc = highway::UpperHalf(tag_double_half, fxAcc);
651 const auto upperFyAcc = highway::UpperHalf(tag_double_half, fyAcc);
652 const auto upperFzAcc = highway::UpperHalf(tag_double_half, fzAcc);
654 const auto fxAccCombined = highway::Add(lowerFxAcc, upperFxAcc);
655 const auto fyAccCombined = highway::Add(lowerFyAcc, upperFyAcc);
656 const auto fzAccCombined = highway::Add(lowerFzAcc, upperFzAcc);
658 const int index = reversed ? (remainder ? 0 : i -
_vecLengthDouble / 2 + 1) : i;
662 const auto oldFx = highway::LoadN(tag_double_half, &fxPtr[index], lanes);
663 const auto oldFy = highway::LoadN(tag_double_half, &fyPtr[index], lanes);
664 const auto oldFz = highway::LoadN(tag_double_half, &fzPtr[index], lanes);
666 const auto newFx = highway::Add(oldFx, fxAccCombined);
667 const auto newFy = highway::Add(oldFy, fyAccCombined);
668 const auto newFz = highway::Add(oldFz, fzAccCombined);
670 highway::StoreN(newFx, tag_double_half, &fxPtr[index], lanes);
671 highway::StoreN(newFy, tag_double_half, &fyPtr[index], lanes);
672 highway::StoreN(newFz, tag_double_half, &fzPtr[index], lanes);
673 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
675 remainder ? highway::LoadN(tag_double, &fxPtr[i], restI) : highway::LoadU(
tag_double, &fxPtr[i]);
677 remainder ? highway::LoadN(tag_double, &fyPtr[i], restI) : highway::LoadU(
tag_double, &fyPtr[i]);
679 remainder ? highway::LoadN(tag_double, &fzPtr[i], restI) : highway::LoadU(
tag_double, &fzPtr[i]);
685 remainder ? highway::StoreN(fxNew, tag_double, &fxPtr[i], restI) : highway::StoreU(fxNew,
tag_double, &fxPtr[i]);
686 remainder ? highway::StoreN(fyNew, tag_double, &fyPtr[i], restI) : highway::StoreU(fyNew,
tag_double, &fyPtr[i]);
687 remainder ? highway::StoreN(fzNew, tag_double, &fzPtr[i], restI) : highway::StoreU(fzNew,
tag_double, &fzPtr[i]);
691 inline void computeGlobals(
const VectorDouble &virialSumX,
const VectorDouble &virialSumY,
692 const VectorDouble &virialSumZ,
const VectorDouble &uPotSum) {
695 _aosThreadData[threadnum].virialSum[0] += highway::ReduceSum(tag_double, virialSumX);
696 _aosThreadData[threadnum].virialSum[1] += highway::ReduceSum(tag_double, virialSumY);
697 _aosThreadData[threadnum].virialSum[2] += highway::ReduceSum(tag_double, virialSumZ);
698 _aosThreadData[threadnum].potentialEnergySum += highway::ReduceSum(tag_double, uPotSum);
734 template <
bool reversed,
bool newton3,
bool remainderI, VectorizationPattern vecPattern>
735 inline void handleILoopBody(
736 const size_t i,
const double *
const __restrict xPtr1,
const double *
const __restrict yPtr1,
738 const double *
const __restrict xPtr2,
const double *
const __restrict yPtr2,
const double *
const __restrict zPtr2,
740 double *
const __restrict fyPtr1,
double *
const __restrict fzPtr1,
double *
const __restrict fxPtr2,
741 double *
const __restrict fyPtr2,
double *
const __restrict fzPtr2,
const size_t *
const __restrict typeIDptr1,
742 const size_t *
const __restrict typeIDptr2, VectorDouble &virialSumX, VectorDouble &virialSumY,
743 VectorDouble &virialSumZ, VectorDouble &uPotSum,
const size_t restI,
const size_t jVecEnd) {
754 fillIRegisters<remainderI, reversed, vecPattern>(i, xPtr1, yPtr1, zPtr1, ownedStatePtr1, x1, y1, z1, ownedMaskI,
757 std::ptrdiff_t j = 0;
758 for (; checkSecondLoopCondition<vecPattern>(jVecEnd, j); incrementSecondLoop<vecPattern>(j)) {
759 SoAKernel<newton3, remainderI, false, reversed, vecPattern>(
760 i, j, ownedMaskI,
reinterpret_cast<const int64_t *
>(ownedStatePtr2), x1, y1, z1, xPtr2, yPtr2, zPtr2, fxPtr2,
761 fyPtr2, fzPtr2, &typeIDptr1[i], &typeIDptr2[j], fxAcc, fyAcc, fzAcc, virialSumX, virialSumY, virialSumZ,
765 const int restJ = obtainJLoopRemainderLength<vecPattern>(jVecEnd);
767 SoAKernel<newton3, remainderI, true, reversed, vecPattern>(
768 i, j, ownedMaskI,
reinterpret_cast<const int64_t *
>(ownedStatePtr2), x1, y1, z1, xPtr2, yPtr2, zPtr2, fxPtr2,
769 fyPtr2, fzPtr2, &typeIDptr1[i], &typeIDptr2[j], fxAcc, fyAcc, fzAcc, virialSumX, virialSumY, virialSumZ,
770 uPotSum, restI, restJ);
773 reduceAccumulatedForce<reversed, remainderI, vecPattern>(i, fxPtr1, fyPtr1, fzPtr1, fxAcc, fyAcc, fzAcc, restI);
783 template <
bool newton3, VectorizationPattern vecPattern>
785 if (soa1.
size() == 0 || soa2.
size() == 0) {
789 const auto *
const __restrict x1Ptr = soa1.template begin<Particle_T::AttributeNames::posX>();
790 const auto *
const __restrict y1Ptr = soa1.template begin<Particle_T::AttributeNames::posY>();
791 const auto *
const __restrict z1Ptr = soa1.template begin<Particle_T::AttributeNames::posZ>();
792 const auto *
const __restrict x2Ptr = soa2.template begin<Particle_T::AttributeNames::posX>();
793 const auto *
const __restrict y2Ptr = soa2.template begin<Particle_T::AttributeNames::posY>();
794 const auto *
const __restrict z2Ptr = soa2.template begin<Particle_T::AttributeNames::posZ>();
796 const auto *
const __restrict ownedStatePtr1 = soa1.template begin<Particle_T::AttributeNames::ownershipState>();
797 const auto *
const __restrict ownedStatePtr2 = soa2.template begin<Particle_T::AttributeNames::ownershipState>();
799 auto *
const __restrict fx1Ptr = soa1.template begin<Particle_T::AttributeNames::forceX>();
800 auto *
const __restrict fy1Ptr = soa1.template begin<Particle_T::AttributeNames::forceY>();
801 auto *
const __restrict fz1Ptr = soa1.template begin<Particle_T::AttributeNames::forceZ>();
802 auto *
const __restrict fx2Ptr = soa2.template begin<Particle_T::AttributeNames::forceX>();
803 auto *
const __restrict fy2Ptr = soa2.template begin<Particle_T::AttributeNames::forceY>();
804 auto *
const __restrict fz2Ptr = soa2.template begin<Particle_T::AttributeNames::forceZ>();
806 const auto *
const __restrict typeID1ptr = soa1.template begin<Particle_T::AttributeNames::typeId>();
807 const auto *
const __restrict typeID2ptr = soa2.template begin<Particle_T::AttributeNames::typeId>();
814 std::ptrdiff_t i = 0;
815 for (; checkFirstLoopCondition<false, vecPattern>(i, soa1.
size()); incrementFirstLoop<vecPattern>(i)) {
816 handleILoopBody<false, newton3, false, vecPattern>(
817 i, x1Ptr, y1Ptr, z1Ptr, ownedStatePtr1, x2Ptr, y2Ptr, z2Ptr, ownedStatePtr2, fx1Ptr, fy1Ptr, fz1Ptr, fx2Ptr,
818 fy2Ptr, fz2Ptr, typeID1ptr, typeID2ptr, virialSumX, virialSumY, virialSumZ, uPotSum, 0, soa2.
size());
821 if constexpr (vecPattern != VectorizationPattern::p1xVec) {
823 const int restI = obtainILoopRemainderLength<false>(i, soa1.
size());
825 handleILoopBody<false, newton3, true, vecPattern>(
826 i, x1Ptr, y1Ptr, z1Ptr, ownedStatePtr1, x2Ptr, y2Ptr, z2Ptr, ownedStatePtr2, fx1Ptr, fy1Ptr, fz1Ptr, fx2Ptr,
827 fy2Ptr, fz2Ptr, typeID1ptr, typeID2ptr, virialSumX, virialSumY, virialSumZ, uPotSum, restI, soa2.
size());
831 if constexpr (calculateGlobals) {
832 computeGlobals(virialSumX, virialSumY, virialSumZ, uPotSum);
853 template <
bool remainder, VectorizationPattern vecPattern>
854 static void fillJRegisters(
const size_t j,
const double *
const __restrict x2Ptr,
const double *
const __restrict y2Ptr,
855 const double *
const __restrict z2Ptr,
const int64_t *
const __restrict ownedStatePtr2,
856 VectorDouble &x2, VectorDouble &y2, VectorDouble &z2, MaskDouble &ownedMaskJ,
857 const unsigned int rest) {
858 VectorLong ownedStateJLong = highway::Zero(tag_long);
860 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
861 if constexpr (remainder) {
862 x2 = highway::LoadN(tag_double, &x2Ptr[j], rest);
863 y2 = highway::LoadN(tag_double, &y2Ptr[j], rest);
864 z2 = highway::LoadN(tag_double, &z2Ptr[j], rest);
866 ownedStateJLong = highway::LoadN(tag_long, &ownedStatePtr2[j], rest);
868 x2 = highway::LoadU(tag_double, &x2Ptr[j]);
869 y2 = highway::LoadU(tag_double, &y2Ptr[j]);
870 z2 = highway::LoadU(tag_double, &z2Ptr[j]);
872 ownedStateJLong = highway::LoadU(tag_long, &ownedStatePtr2[j]);
874 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
877 VectorLong ownedStateJ = highway::LoadN(tag_long, &ownedStatePtr2[j], lanes);
878 x2 = highway::LoadN(tag_double, &x2Ptr[j], lanes);
879 y2 = highway::LoadN(tag_double, &y2Ptr[j], lanes);
880 z2 = highway::LoadN(tag_double, &z2Ptr[j], lanes);
883 ownedStateJLong = highway::ConcatLowerLower(tag_long, ownedStateJ, ownedStateJ);
884 x2 = highway::ConcatLowerLower(tag_double, x2, x2);
885 y2 = highway::ConcatLowerLower(tag_double, y2, y2);
886 z2 = highway::ConcatLowerLower(tag_double, z2, z2);
887 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
888 VectorLong ownedStateJ = highway::Set(tag_long, ownedStatePtr2[j]);
889 x2 = highway::Set(tag_double, x2Ptr[j]);
890 y2 = highway::Set(tag_double, y2Ptr[j]);
891 z2 = highway::Set(tag_double, z2Ptr[j]);
893 if constexpr (remainder) {
894 ownedStateJLong = highway::ConcatLowerLower(tag_long, highway::Zero(tag_long), ownedStateJ);
895 x2 = highway::ConcatLowerLower(tag_double, highway::Zero(tag_double), x2);
896 y2 = highway::ConcatLowerLower(tag_double, highway::Zero(tag_double), y2);
897 z2 = highway::ConcatLowerLower(tag_double, highway::Zero(tag_double), z2);
899 const auto tmpOwnedJ = highway::Set(tag_long, ownedStatePtr2[j + 1]);
900 const auto tmpX2 = highway::Set(tag_double, x2Ptr[j + 1]);
901 const auto tmpY2 = highway::Set(tag_double, y2Ptr[j + 1]);
902 const auto tmpZ2 = highway::Set(tag_double, z2Ptr[j + 1]);
904 ownedStateJLong = highway::ConcatLowerLower(tag_long, tmpOwnedJ, ownedStateJ);
905 x2 = highway::ConcatLowerLower(tag_double, tmpX2, x2);
906 y2 = highway::ConcatLowerLower(tag_double, tmpY2, y2);
907 z2 = highway::ConcatLowerLower(tag_double, tmpZ2, z2);
909 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
910 ownedStateJLong = highway::Set(tag_long, ownedStatePtr2[j]);
911 x2 = highway::Set(tag_double, x2Ptr[j]);
912 y2 = highway::Set(tag_double, y2Ptr[j]);
913 z2 = highway::Set(tag_double, z2Ptr[j]);
916 MaskLong ownedMaskJLong = highway::Ne(ownedStateJLong, highway::Zero(tag_long));
919 ownedMaskJ = highway::RebindMask(tag_double, ownedMaskJLong);
922 template <
bool remainderI,
bool remainderJ,
bool reversed, VectorizationPattern vecPattern>
923 inline void fillPhysicsRegisters(
const size_t *
const typeID1Ptr,
const size_t *
const typeID2Ptr,
924 VectorDouble &epsilon24s, VectorDouble &sigmaSquareds, VectorDouble &shift6s,
925 const unsigned int restI,
const unsigned int restJ)
const {
928 HWY_ALIGN std::array<double, _maxVecLengthDouble> epsilons{};
929 HWY_ALIGN std::array<double, _maxVecLengthDouble> sigmas{};
930 HWY_ALIGN std::array<double, _maxVecLengthDouble> shifts{};
932 if constexpr (vecPattern == VectorizationPattern::p1xVec) {
934 epsilons[j] = _PPLibrary->get().getMixing24Epsilon(*typeID1Ptr, *(typeID2Ptr + j));
935 sigmas[j] = _PPLibrary->get().getMixingSigmaSquared(*typeID1Ptr, *(typeID2Ptr + j));
936 if constexpr (applyShift) {
937 shifts[j] = _PPLibrary->get().getMixingShift6(*typeID1Ptr, *(typeID2Ptr + j));
940 }
else if constexpr (vecPattern == VectorizationPattern::p2xVecDiv2) {
941 for (
int i = 0; i < (remainderI ? 1 : 2); ++i) {
944 const auto typeID1 = reversed ? typeID1Ptr - i : typeID1Ptr + i;
945 epsilons[index] = _PPLibrary->get().getMixing24Epsilon(*typeID1, *(typeID2Ptr + j));
946 sigmas[index] = _PPLibrary->get().getMixingSigmaSquared(*typeID1, *(typeID2Ptr + j));
948 if constexpr (applyShift) {
949 shifts[index] = _PPLibrary->get().getMixingShift6(*typeID1, *(typeID2Ptr + j));
953 }
else if constexpr (vecPattern == VectorizationPattern::pVecDiv2x2) {
955 for (
int j = 0; j < (remainderJ ? 1 : 2); ++j) {
957 const auto typeID1 = reversed ? typeID1Ptr - i : typeID1Ptr + i;
959 epsilons[index] = _PPLibrary->get().getMixing24Epsilon(*typeID1, *(typeID2Ptr + j));
960 sigmas[index] = _PPLibrary->get().getMixingSigmaSquared(*typeID1, *(typeID2Ptr + j));
962 if constexpr (applyShift) {
963 shifts[index] = _PPLibrary->get().getMixingShift6(*typeID1, *(typeID2Ptr + j));
967 }
else if constexpr (vecPattern == VectorizationPattern::pVecx1) {
969 auto typeID1 = reversed ? typeID1Ptr - i : typeID1Ptr + i;
970 epsilons[i] = _PPLibrary->get().getMixing24Epsilon(*typeID1, *typeID2Ptr);
971 sigmas[i] = _PPLibrary->get().getMixingSigmaSquared(*typeID1, *typeID2Ptr);
973 if constexpr (applyShift) {
974 shifts[i] = _PPLibrary->get().getMixingShift6(*typeID1, *typeID2Ptr);
979 epsilon24s = highway::Load(tag_double, epsilons.data());
980 sigmaSquareds = highway::Load(tag_double, sigmas.data());
981 if constexpr (applyShift) {
982 shift6s = highway::Load(tag_double, shifts.data());
1019 template <
bool newton3,
bool remainderI,
bool remainderJ,
bool reversed, VectorizationPattern vecPattern>
1020 inline void SoAKernel(
const size_t i,
const size_t j,
const MaskDouble &ownedMaskI,
1021 const int64_t *
const __restrict ownedStatePtr2,
const VectorDouble &x1,
const VectorDouble &y1,
1022 const VectorDouble &z1,
const double *
const __restrict x2Ptr,
1023 const double *
const __restrict y2Ptr,
const double *
const __restrict z2Ptr,
1024 double *
const __restrict fx2Ptr,
double *
const __restrict fy2Ptr,
1025 double *
const __restrict fz2Ptr,
const size_t *
const typeID1Ptr,
const size_t *
const typeID2Ptr,
1026 VectorDouble &fxAcc, VectorDouble &fyAcc, VectorDouble &fzAcc, VectorDouble &virialSumX,
1027 VectorDouble &virialSumY, VectorDouble &virialSumZ, VectorDouble &uPotSum,
1028 const unsigned int restI,
const unsigned int restJ) {
1029 VectorDouble epsilon24s = highway::Undefined(tag_double);
1030 VectorDouble sigmaSquareds = highway::Undefined(tag_double);
1033 if constexpr (useMixing) {
1034 fillPhysicsRegisters<remainderI, remainderJ, reversed, vecPattern>(typeID1Ptr, typeID2Ptr, epsilon24s,
1035 sigmaSquareds, shift6s, restI, restJ);
1037 epsilon24s = highway::Set(tag_double, _epsilon24AoS);
1038 sigmaSquareds = highway::Set(tag_double, _sigmaSquareAoS);
1039 if constexpr (applyShift) {
1040 shift6s = highway::Set(tag_double, _shift6AoS);
1042 shift6s = highway::Zero(tag_double);
1051 fillJRegisters<remainderJ, vecPattern>(j, x2Ptr, y2Ptr, z2Ptr, ownedStatePtr2, x2, y2, z2, ownedMaskJ, restJ);
1054 const auto drX = highway::Sub(x1, x2);
1055 const auto drY = highway::Sub(y1, y2);
1056 const auto drZ = highway::Sub(z1, z2);
1058 const auto drX2 = highway::Mul(drX, drX);
1059 const auto drY2 = highway::Mul(drY, drY);
1060 const auto drZ2 = highway::Mul(drZ, drZ);
1062 const auto dr2 = highway::Add(highway::Add(drX2, drY2), drZ2);
1064 VectorDouble cutoffSquared = highway::Set(tag_double, _cutoffSquareAoS);
1066 const auto dummyMask = highway::And(ownedMaskI, ownedMaskJ);
1067 const auto cutoffDummyMask = highway::MaskedLe(dummyMask, dr2, cutoffSquared);
1069 if (highway::AllFalse(tag_double, cutoffDummyMask)) {
1074 const auto invDr2 = highway::Div(highway::Set(tag_double, 1.0), dr2);
1075 const auto lj2 = highway::Mul(sigmaSquareds, invDr2);
1076 const auto lj4 = highway::Mul(lj2, lj2);
1077 const auto lj6 = highway::Mul(lj2, lj4);
1078 const auto lj12 = highway::Mul(lj6, lj6);
1079 const auto lj12m6 = highway::Sub(lj12, lj6);
1080 const auto lj12m6alj12 = highway::Add(lj12m6, lj12);
1081 const auto lj12m6alj12e = highway::Mul(lj12m6alj12, epsilon24s);
1082 const auto fac = highway::Mul(lj12m6alj12e, invDr2);
1084 const auto facMasked = highway::IfThenElseZero(cutoffDummyMask, fac);
1090 fxAcc = highway::Add(fxAcc, fx);
1091 fyAcc = highway::Add(fyAcc, fy);
1092 fzAcc = highway::Add(fzAcc, fz);
1094 if constexpr (newton3) {
1095 handleNewton3Reduction<remainderJ, vecPattern>(fx, fy, fz, fx2Ptr, fy2Ptr, fz2Ptr, j, restJ);
1098 if constexpr (calculateGlobals) {
1099 auto virialX = highway::Mul(fx, drX);
1100 auto virialY = highway::Mul(fy, drY);
1101 auto virialZ = highway::Mul(fz, drZ);
1103 auto uPot = highway::MulAdd(epsilon24s, lj12m6, shift6s);
1104 auto uPotMasked = highway::IfThenElseZero(cutoffDummyMask, uPot);
1106 auto energyFactor = highway::MaskedSet(tag_double, dummyMask, 1.0);
1108 if constexpr (newton3) {
1109 energyFactor = highway::Add(energyFactor, highway::MaskedSet(tag_double, dummyMask, 1.0));
1112 uPotSum = highway::MulAdd(energyFactor, uPotMasked, uPotSum);
1113 virialSumX = highway::MulAdd(energyFactor, virialX, virialSumX);
1114 virialSumY = highway::MulAdd(energyFactor, virialY, virialSumY);
1115 virialSumZ = highway::MulAdd(energyFactor, virialZ, virialSumZ);
1129 bool newton3)
final {
1130 if (soa.size() == 0 or neighborList.empty())
return;
1132 SoAFunctorVerletImpl<true>(soa, indexFirst, neighborList);
1134 SoAFunctorVerletImpl<false>(soa, indexFirst, neighborList);
1139 template <
bool newton3>
1142 const auto *
const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
1147 const auto *
const __restrict xPtr = soa.template begin<Particle_T::AttributeNames::posX>();
1148 const auto *
const __restrict yPtr = soa.template begin<Particle_T::AttributeNames::posY>();
1149 const auto *
const __restrict zPtr = soa.template begin<Particle_T::AttributeNames::posZ>();
1151 auto *
const __restrict fxPtr = soa.template begin<Particle_T::AttributeNames::forceX>();
1152 auto *
const __restrict fyPtr = soa.template begin<Particle_T::AttributeNames::forceY>();
1153 auto *
const __restrict fzPtr = soa.template begin<Particle_T::AttributeNames::forceZ>();
1155 const auto *
const __restrict typeIDPtr = soa.template begin<Particle_T::AttributeNames::typeId>();
1157 VectorDouble virialSumX = highway::Zero(tag_double);
1158 VectorDouble virialSumY = highway::Zero(tag_double);
1159 VectorDouble virialSumZ = highway::Zero(tag_double);
1160 VectorDouble uPotSum = highway::Zero(tag_double);
1161 VectorDouble fxAcc = highway::Zero(tag_double);
1162 VectorDouble fyAcc = highway::Zero(tag_double);
1163 VectorDouble fzAcc = highway::Zero(tag_double);
1165 const VectorDouble x1 = highway::Set(tag_double, xPtr[indexFirst]);
1166 const VectorDouble y1 = highway::Set(tag_double, yPtr[indexFirst]);
1167 const VectorDouble z1 = highway::Set(tag_double, zPtr[indexFirst]);
1168 const auto ownedI =
static_cast<int64_t
>(ownedStatePtr[indexFirst]);
1169 const VectorDouble ownedStateI = highway::Set(tag_double,
static_cast<double>(ownedI));
1170 const MaskDouble ownedMaskI = highway::Ne(ownedStateI, highway::Zero(tag_double));
1174 HWY_ALIGN std::array<double, _maxVecLengthDouble> x2Tmp{};
1175 HWY_ALIGN std::array<double, _maxVecLengthDouble> y2Tmp{};
1176 HWY_ALIGN std::array<double, _maxVecLengthDouble> z2Tmp{};
1177 HWY_ALIGN std::array<double, _maxVecLengthDouble> fx2Tmp{};
1178 HWY_ALIGN std::array<double, _maxVecLengthDouble> fy2Tmp{};
1179 HWY_ALIGN std::array<double, _maxVecLengthDouble> fz2Tmp{};
1180 HWY_ALIGN std::array<size_t, _maxVecLengthDouble> typeID2Tmp{};
1183 HWY_ALIGN std::array<int64_t, _maxVecLengthDouble> ownedStates2Tmp;
1187 const size_t vecEnd = (neighborList.size() /
_vecLengthDouble) * _vecLengthDouble;
1192 x2Tmp[vecIndex] = xPtr[neighborList[j + vecIndex]];
1193 y2Tmp[vecIndex] = yPtr[neighborList[j + vecIndex]];
1194 z2Tmp[vecIndex] = zPtr[neighborList[j + vecIndex]];
1195 if constexpr (newton3) {
1196 fx2Tmp[vecIndex] = fxPtr[neighborList[j + vecIndex]];
1197 fy2Tmp[vecIndex] = fyPtr[neighborList[j + vecIndex]];
1198 fz2Tmp[vecIndex] = fzPtr[neighborList[j + vecIndex]];
1200 typeID2Tmp[vecIndex] = typeIDPtr[neighborList[j + vecIndex]];
1201 const auto ownedState = ownedStatePtr[neighborList[j + vecIndex]];
1202 ownedStates2Tmp[vecIndex] =
static_cast<int64_t
>(ownedState);
1205 SoAKernel<newton3, false, false, false, VectorizationPattern::p1xVec>(
1206 0, 0, ownedMaskI, ownedStates2Tmp.data(), x1, y1, z1, x2Tmp.data(), y2Tmp.data(), z2Tmp.data(), fx2Tmp.data(),
1207 fy2Tmp.data(), fz2Tmp.data(), &typeIDPtr[indexFirst], typeID2Tmp.data(), fxAcc, fyAcc, fzAcc, virialSumX,
1208 virialSumY, virialSumZ, uPotSum, 0, 0);
1210 if constexpr (newton3) {
1212 fxPtr[neighborList[j + vecIndex]] = fx2Tmp[vecIndex];
1213 fyPtr[neighborList[j + vecIndex]] = fy2Tmp[vecIndex];
1214 fzPtr[neighborList[j + vecIndex]] = fz2Tmp[vecIndex];
1219 const int rest =
static_cast<int>(neighborList.size() & (
_vecLengthDouble - 1));
1222 for (
size_t vecIndex = 0; vecIndex < rest; ++vecIndex) {
1223 x2Tmp[vecIndex] = xPtr[neighborList[j + vecIndex]];
1224 y2Tmp[vecIndex] = yPtr[neighborList[j + vecIndex]];
1225 z2Tmp[vecIndex] = zPtr[neighborList[j + vecIndex]];
1226 if constexpr (newton3) {
1227 fx2Tmp[vecIndex] = fxPtr[neighborList[j + vecIndex]];
1228 fy2Tmp[vecIndex] = fyPtr[neighborList[j + vecIndex]];
1229 fz2Tmp[vecIndex] = fzPtr[neighborList[j + vecIndex]];
1231 typeID2Tmp[vecIndex] = typeIDPtr[neighborList[j + vecIndex]];
1232 const auto ownedState = ownedStatePtr[neighborList[j + vecIndex]];
1233 ownedStates2Tmp[vecIndex] =
static_cast<int64_t
>(ownedState);
1236 SoAKernel<newton3, false, true, false, VectorizationPattern::p1xVec>(
1237 0, 0, ownedMaskI, ownedStates2Tmp.data(), x1, y1, z1, x2Tmp.data(), y2Tmp.data(), z2Tmp.data(), fx2Tmp.data(),
1238 fy2Tmp.data(), fz2Tmp.data(), &typeIDPtr[indexFirst], typeID2Tmp.data(), fxAcc, fyAcc, fzAcc, virialSumX,
1239 virialSumY, virialSumZ, uPotSum, 0, rest);
1241 if constexpr (newton3) {
1242 for (
long vecIndex = 0; vecIndex <
_vecLengthDouble && vecIndex < rest; ++vecIndex) {
1243 fxPtr[neighborList[j + vecIndex]] = fx2Tmp[vecIndex];
1244 fyPtr[neighborList[j + vecIndex]] = fy2Tmp[vecIndex];
1245 fzPtr[neighborList[j + vecIndex]] = fz2Tmp[vecIndex];
1250 fxPtr[indexFirst] += highway::ReduceSum(tag_double, fxAcc);
1251 fyPtr[indexFirst] += highway::ReduceSum(tag_double, fyAcc);
1252 fzPtr[indexFirst] += highway::ReduceSum(tag_double, fzAcc);
1254 if constexpr (calculateGlobals) {
1255 computeGlobals(virialSumX, virialSumY, virialSumZ, uPotSum);
1264 return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,
1265 Particle_T::AttributeNames::posX,
1266 Particle_T::AttributeNames::posY,
1267 Particle_T::AttributeNames::posZ,
1268 Particle_T::AttributeNames::forceX,
1269 Particle_T::AttributeNames::forceY,
1270 Particle_T::AttributeNames::forceZ,
1271 Particle_T::AttributeNames::typeId,
1272 Particle_T::AttributeNames::ownershipState};
1279 return std::array<typename Particle_T::AttributeNames, 6>{
1280 Particle_T::AttributeNames::id, Particle_T::AttributeNames::posX,
1281 Particle_T::AttributeNames::posY, Particle_T::AttributeNames::posZ,
1282 Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};
1289 return std::array<typename Particle_T::AttributeNames, 3>{
1290 Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};
1304 _potentialEnergySum = 0.;
1305 _virialSum = {0., 0., 0.};
1306 _postProcessed =
false;
1307 for (
size_t i = 0; i < _aosThreadData.size(); ++i) {
1308 _aosThreadData[i].setZero();
1317 using namespace autopas::utils::ArrayMath::literals;
1319 if (_postProcessed) {
1321 "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");
1324 if (calculateGlobals) {
1325 for (
size_t i = 0; i < _aosThreadData.size(); ++i) {
1326 _potentialEnergySum += _aosThreadData[i].potentialEnergySum;
1327 _virialSum += _aosThreadData[i].virialSum;
1331 _potentialEnergySum *= 0.5;
1335 _potentialEnergySum /= 6.;
1336 _postProcessed =
true;
1338 AutoPasLog(DEBUG,
"Final potential energy {}", _potentialEnergySum);
1339 AutoPasLog(DEBUG,
"Final virial {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);
1348 if (not calculateGlobals) {
1350 "Trying to get upot even though calculateGlobals is false. If you want this functor to calculate global "
1351 "values, please specify calculateGlobals to be true.");
1353 if (not _postProcessed) {
1356 return _potentialEnergySum;
1364 if (not calculateGlobals) {
1366 "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global "
1367 "values, please specify calculateGlobals to be true.");
1369 if (not _postProcessed) {
1371 "Cannot get virial, because endTraversal was not called.");
1373 return _virialSum[0] + _virialSum[1] + _virialSum[2];
1385 _epsilon24AoS = epsilon24;
1386 _sigmaSquareAoS = sigmaSquare;
1387 if constexpr (applyShift) {
1403 class AoSThreadData {
1405 AoSThreadData() : virialSum{0., 0., 0.}, potentialEnergySum{0.} {}
1408 virialSum = {0., 0., 0.};
1409 potentialEnergySum = 0.;
1412 std::array<double, 3> virialSum{};
1413 double potentialEnergySum{};
1416 double __remainingTo64[(64 - 4 *
sizeof(double)) /
sizeof(double)];
1418 static_assert(
sizeof(AoSThreadData) % 64 == 0,
"AoSThreadData has wrong size");
1421 const double _cutoffSquareAoS{0.};
1423 double _epsilon24AoS{0.}, _sigmaSquareAoS{0.}, _shift6AoS{0.};
1427 std::optional<std::reference_wrapper<ParticlePropertiesLibrary<double, size_t>>> _PPLibrary;
1430 double _potentialEnergySum{0.};
1431 std::array<double, 3> _virialSum{0., 0., 0.};
1432 std::vector<AoSThreadData> _aosThreadData{};
1435 bool _postProcessed{
false};
1441 static constexpr std::array<VectorizationPattern, 4> _vecPatternsAllowed = {
1442 VectorizationPattern::p1xVec, VectorizationPattern::p2xVecDiv2, VectorizationPattern::pVecDiv2x2,
1443 VectorizationPattern::pVecx1};
decltype(highway::FirstN(tag_long, 2)) MaskLong
Type for a Long Mask.
Definition: LJFunctorHWY.h:46
constexpr highway::Half< highway::DFromV< VectorDouble > > tag_double_half
Highway tag for a half-filled double register.
Definition: LJFunctorHWY.h:42
decltype(highway::Zero(tag_double)) VectorDouble
Type for a Double vector register.
Definition: LJFunctorHWY.h:38
constexpr highway::ScalableTag< double > tag_double
Highway tag for full double register.
Definition: LJFunctorHWY.h:26
constexpr size_t _maxVecLengthDouble
Upper bound on the number of double values in a full register.
Definition: LJFunctorHWY.h:36
autopas::VectorizationPatternOption::Value VectorizationPattern
Vectorization Pattern Type.
Definition: LJFunctorHWY.h:48
decltype(highway::Zero(tag_long)) VectorLong
Type for a Long vector register.
Definition: LJFunctorHWY.h:40
HWY_LANES_CONSTEXPR size_t _vecLengthDouble
Number of double values in a full register.
Definition: LJFunctorHWY.h:32
constexpr highway::ScalableTag< int64_t > tag_long
Highway tag for full long register.
Definition: LJFunctorHWY.h:28
decltype(highway::FirstN(tag_double, 1)) MaskDouble
Type for a Double Mask.
Definition: LJFunctorHWY.h:44
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28
static double calcShift6(double epsilon24, double sigmaSquared, double cutoffSquared)
Calculate the shift multiplied 6 of the lennard jones potential from given cutoff,...
Definition: ParticlePropertiesLibrary.h:576
AlignedAllocator class.
Definition: AlignedAllocator.h:29
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31
PairwiseFunctor(double cutoff)
Constructor.
Definition: PairwiseFunctor.h:42
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23
size_t size() const
Returns the number of particles in the view.
Definition: SoAView.h:83
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:116
static void exception(const Exception e)
Handle an exception derived by std::exception.
Definition: ExceptionHandler.h:64
A functor to handle lennard-jones interactions between two particles (molecules) This functor uses th...
Definition: LJFunctorHWY.h:67
double getVirial() const
Get the virial.
Definition: LJFunctorHWY.h:1363
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: LJFunctorHWY.h:108
void SoAFunctorPair(autopas::SoAView< SoAArraysType > soa1, autopas::SoAView< SoAArraysType > soa2, bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctorHWY.h:232
LJFunctorHWY(double cutoff, std::optional< std::reference_wrapper< ParticlePropertiesLibrary< double, size_t > > > particlePropertiesLibrary=std::nullopt)
Constructor for Functor with mixing enabled/disabled.
Definition: LJFunctorHWY.h:82
void setVecPattern(const VectorizationPattern vecPattern) final
Setter for the vectorization pattern to be used.
Definition: LJFunctorHWY.h:1397
void initTraversal() final
Reset the global values.
Definition: LJFunctorHWY.h:1303
void SoAFunctorSingle(autopas::SoAView< SoAArraysType > soa, const bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctorHWY.h:189
void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final
PairwiseFunctor for arrays of structures (AoS).
Definition: LJFunctorHWY.h:132
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: LJFunctorHWY.h:114
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: LJFunctorHWY.h:1263
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: LJFunctorHWY.h:1278
LJFunctorHWY()=delete
Deleted default constructor.
static constexpr bool getMixing()
Definition: LJFunctorHWY.h:1297
std::string getName() final
Returns name of functor.
Definition: LJFunctorHWY.h:106
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: LJFunctorHWY.h:1288
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: LJFunctorHWY.h:110
void SoAFunctorVerlet(autopas::SoAView< SoAArraysType > soa, const size_t indexFirst, const std::vector< size_t, autopas::AlignedAllocator< size_t > > &neighborList, bool newton3) final
PairwiseFunctor for structure of arrays (SoA) for neighbor lists.
Definition: LJFunctorHWY.h:1127
double getPotentialEnergy() const
Get the potential Energy.
Definition: LJFunctorHWY.h:1347
bool isVecPatternAllowed(const VectorizationPattern vecPattern) final
Specifies whether the functor is capable of using the specified Vectorization Pattern in the SoA func...
Definition: LJFunctorHWY.h:125
void setParticleProperties(const double epsilon24, const double sigmaSquare)
Sets the particle properties constants for this functor.
Definition: LJFunctorHWY.h:1384
void endTraversal(const bool newton3) final
Accumulates global values, e.g.
Definition: LJFunctorHWY.h:1316
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:34
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144
OwnershipState
Enum that specifies the state of ownership.
Definition: OwnershipState.h:20
@ dummy
Dummy or deleted state, a particle with this state is not an actual particle!
@ owned
Owned state, a particle with this state is an actual particle and owned by the current AutoPas object...
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:23
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132