97template <
class Particle_T,
bool useMixing =
false,
autopas::FunctorN3Modes useNewton3 = autopas::FunctorN3Modes::Both,
98 bool calculateGlobals =
false,
bool countFLOPs =
false>
101 Particle_T, AxilrodTellerMutoFunctor<Particle_T, useMixing, useNewton3, calculateGlobals, countFLOPs>> {
105 using SoAArraysType =
typename Particle_T::SoAArraysType;
110 using SoAFloatPrecision =
typename Particle_T::ParticleSoAFloatPrecision;
128 _cutoffSquared{cutoff * cutoff},
129 _potentialEnergySum{0.},
130 _virialSum{0., 0., 0.},
131 _aosThreadDataGlobals(),
132 _postProcessed{false} {
133 if constexpr (calculateGlobals) {
136 if constexpr (countFLOPs) {
151 static_assert(not useMixing,
152 "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set "
164 static_assert(useMixing,
165 "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor "
166 "or set mixing to true.");
167 _PPLibrary = &particlePropertiesLibrary;
170 std::string
getName() final {
return "AxilrodTellerMutoFunctorAutoVec"; }
175 return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;
179 return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;
182 void AoSFunctor(Particle_T &i, Particle_T &j, Particle_T &k,
bool newton3)
final {
183 using namespace autopas::utils::ArrayMath::literals;
185 if (i.isDummy() or j.isDummy() or k.isDummy()) {
191 if constexpr (countFLOPs) {
192 ++_aosThreadDataFLOPs[threadnum].numDistCalls;
196 if constexpr (useMixing) {
197 nu = _PPLibrary->
getMixingNu(i.getTypeId(), j.getTypeId(), k.getTypeId());
200 const auto displacementIJ = j.getR() - i.getR();
201 const auto displacementJK = k.getR() - j.getR();
202 const auto displacementKI = i.getR() - k.getR();
209 if (distSquaredIJ > _cutoffSquared or distSquaredJK > _cutoffSquared or distSquaredKI > _cutoffSquared) {
214 const double allDistsSquared = distSquaredIJ * distSquaredJK * distSquaredKI;
215 const double allDistsTo5 = allDistsSquared * allDistsSquared * std::sqrt(allDistsSquared);
216 const double factor = 3.0 * nu / allDistsTo5;
223 const double allDotProducts = IJDotKI * IJDotJK * JKDotKI;
225 const auto forceIDirectionJK = displacementJK * IJDotKI * (IJDotJK - JKDotKI);
226 const auto forceIDirectionIJ =
227 displacementIJ * (IJDotJK * JKDotKI - distSquaredJK * distSquaredKI + 5.0 * allDotProducts / distSquaredIJ);
228 const auto forceIDirectionKI =
229 displacementKI * (-IJDotJK * JKDotKI + distSquaredIJ * distSquaredJK - 5.0 * allDotProducts / distSquaredKI);
231 const auto forceI = (forceIDirectionJK + forceIDirectionIJ + forceIDirectionKI) * factor;
234 auto forceJ = forceI;
235 auto forceK = forceI;
237 const auto forceJDirectionKI = displacementKI * IJDotJK * (JKDotKI - IJDotKI);
238 const auto forceJDirectionIJ =
239 displacementIJ * (-IJDotKI * JKDotKI + distSquaredJK * distSquaredKI - 5.0 * allDotProducts / distSquaredIJ);
240 const auto forceJDirectionJK =
241 displacementJK * (IJDotKI * JKDotKI - distSquaredIJ * distSquaredKI + 5.0 * allDotProducts / distSquaredJK);
243 forceJ = (forceJDirectionKI + forceJDirectionIJ + forceJDirectionJK) * factor;
246 forceK = (forceI + forceJ) * (-1.0);
250 if constexpr (countFLOPs) {
252 ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;
254 ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;
258 if constexpr (calculateGlobals) {
261 const double potentialEnergy3 = factor * (allDistsSquared - 3.0 * allDotProducts);
265 const auto virialI = forceI * i.getR();
267 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;
268 _aosThreadDataGlobals[threadnum].virialSum += virialI;
271 if (newton3 and j.isOwned()) {
272 const auto virialJ = forceJ * j.getR();
273 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;
274 _aosThreadDataGlobals[threadnum].virialSum += virialJ;
276 if (newton3 and k.isOwned()) {
277 const auto virialK = forceK * k.getR();
278 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;
279 _aosThreadDataGlobals[threadnum].virialSum += virialK;
281 if constexpr (countFLOPs) {
283 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;
285 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;
304 return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,
305 Particle_T::AttributeNames::posX,
306 Particle_T::AttributeNames::posY,
307 Particle_T::AttributeNames::posZ,
308 Particle_T::AttributeNames::forceX,
309 Particle_T::AttributeNames::forceY,
310 Particle_T::AttributeNames::forceZ,
311 Particle_T::AttributeNames::typeId,
312 Particle_T::AttributeNames::ownershipState};
319 return std::array<typename Particle_T::AttributeNames, 6>{
320 Particle_T::AttributeNames::id, Particle_T::AttributeNames::posX,
321 Particle_T::AttributeNames::posY, Particle_T::AttributeNames::posZ,
322 Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};
329 return std::array<typename Particle_T::AttributeNames, 3>{
330 Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};
344 _potentialEnergySum = 0.;
345 _virialSum = {0., 0., 0.};
346 _postProcessed =
false;
347 for (
size_t i = 0; i < _aosThreadDataGlobals.size(); ++i) {
348 _aosThreadDataGlobals[i].setZero();
357 using namespace autopas::utils::ArrayMath::literals;
359 if (_postProcessed) {
361 "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");
363 if (calculateGlobals) {
365 for (
const auto &data : _aosThreadDataGlobals) {
366 _potentialEnergySum += data.potentialEnergySum;
367 _virialSum += data.virialSum;
372 _potentialEnergySum /= 3.;
375 _potentialEnergySum /= 3.;
377 _postProcessed =
true;
379 AutoPasLog(TRACE,
"Final potential energy {}", _potentialEnergySum);
380 AutoPasLog(TRACE,
"Final virial {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);
389 if (not calculateGlobals) {
391 "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate "
393 "values, please specify calculateGlobals to be true.");
395 if (not _postProcessed) {
397 "Cannot get potential energy, because endTraversal was not called.");
399 return _potentialEnergySum;
407 if (not calculateGlobals) {
409 "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global "
410 "values, please specify calculateGlobals to be true.");
412 if (not _postProcessed) {
414 "Cannot get virial, because endTraversal was not called.");
416 return _virialSum[0] + _virialSum[1] + _virialSum[2];
455 if constexpr (countFLOPs) {
456 const size_t numDistCallsAcc =
457 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
458 [](
size_t sum,
const auto &data) { return sum + data.numDistCalls; });
459 const size_t numKernelCallsN3Acc =
460 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
461 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsN3; });
462 const size_t numKernelCallsNoN3Acc =
463 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
464 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsNoN3; });
465 const size_t numGlobalCalcsN3Acc =
466 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
467 [](
size_t sum,
const auto &data) { return sum + data.numGlobalCalcsN3; });
468 const size_t numGlobalCalcsNoN3Acc =
469 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
470 [](
size_t sum,
const auto &data) { return sum + data.numGlobalCalcsNoN3; });
472 constexpr size_t numFLOPsPerDistanceCall = 24;
473 constexpr size_t numFLOPsPerN3KernelCall = 100;
474 constexpr size_t numFLOPsPerNoN3KernelCall = 59;
475 constexpr size_t numFLOPsPerN3GlobalCalc = 24;
476 constexpr size_t numFLOPsPerNoN3GlobalCalc = 10;
478 return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +
479 numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +
480 numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;
483 return std::numeric_limits<size_t>::max();
488 if constexpr (countFLOPs) {
489 const size_t numDistCallsAcc =
490 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
491 [](
size_t sum,
const auto &data) { return sum + data.numDistCalls; });
492 const size_t numKernelCallsN3Acc =
493 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
494 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsN3; });
495 const size_t numKernelCallsNoN3Acc =
496 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
497 [](
size_t sum,
const auto &data) { return sum + data.numKernelCallsNoN3; });
499 return (
static_cast<double>(numKernelCallsNoN3Acc) +
static_cast<double>(numKernelCallsN3Acc)) /
500 (
static_cast<double>(numDistCallsAcc));
503 return std::numeric_limits<double>::quiet_NaN();
508 template <
bool newton3>
517 class AoSThreadDataGlobals {
519 AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}
521 virialSum = {0., 0., 0.};
522 potentialEnergySum = 0.;
526 std::array<double, 3> virialSum;
527 double potentialEnergySum;
531 double __remainingTo64[(64 - 4 *
sizeof(double)) /
sizeof(double)];
541 class AoSThreadDataFLOPs {
543 AoSThreadDataFLOPs() : __remainingTo64{} {}
549 numKernelCallsNoN3 = 0;
550 numKernelCallsN3 = 0;
552 numGlobalCalcsN3 = 0;
553 numGlobalCalcsNoN3 = 0;
560 size_t numKernelCallsNoN3 = 0;
566 size_t numKernelCallsN3 = 0;
572 size_t numDistCalls = 0;
577 size_t numGlobalCalcsN3 = 0;
582 size_t numGlobalCalcsNoN3 = 0;
588 double __remainingTo64[(64 - 5 *
sizeof(size_t)) /
sizeof(size_t)];
592 static_assert(
sizeof(AoSThreadDataGlobals) % 64 == 0,
"AoSThreadDataGlobals has wrong size");
593 static_assert(
sizeof(AoSThreadDataFLOPs) % 64 == 0,
"AoSThreadDataFLOPs has wrong size");
595 const double _cutoffSquared;
604 double _potentialEnergySum;
607 std::array<double, 3> _virialSum;
610 std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals;
611 std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28
floatType getMixingNu(intType i, intType j, intType k) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:283
AlignedAllocator class.
Definition: AlignedAllocator.h:29
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23
TriwiseFunctor class.
Definition: TriwiseFunctor.h:28
TriwiseFunctor(double cutoff)
Constructor.
Definition: TriwiseFunctor.h:39
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:115
static void exception(const Exception e)
Handle an exception derived by std::exception.
Definition: ExceptionHandler.h:63
A functor to handle Axilrod-Teller-Muto interactions between three particles (molecules).
Definition: AxilrodTellerMutoFunctor.h:101
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: AxilrodTellerMutoFunctor.h:328
void AoSFunctor(Particle_T &i, Particle_T &j, Particle_T &k, bool newton3) final
TriwiseFunctor for arrays of structures (AoS).
Definition: AxilrodTellerMutoFunctor.h:182
static constexpr bool getMixing()
Definition: AxilrodTellerMutoFunctor.h:337
double getHitRate() const override
Get the hit rate.
Definition: AxilrodTellerMutoFunctor.h:487
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: AxilrodTellerMutoFunctor.h:454
AxilrodTellerMutoFunctor()=delete
Deleted default constructor.
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: AxilrodTellerMutoFunctor.h:174
void initTraversal() final
Reset the global values.
Definition: AxilrodTellerMutoFunctor.h:343
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: AxilrodTellerMutoFunctor.h:318
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: AxilrodTellerMutoFunctor.h:172
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: AxilrodTellerMutoFunctor.h:178
AxilrodTellerMutoFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: AxilrodTellerMutoFunctor.h:162
double getVirial()
Get the virial.
Definition: AxilrodTellerMutoFunctor.h:406
void setParticleProperties(SoAFloatPrecision nu)
Sets the particle properties constants for this functor.
Definition: AxilrodTellerMutoFunctor.h:298
double getPotentialEnergy()
Get the potential Energy.
Definition: AxilrodTellerMutoFunctor.h:388
std::string getName() final
Returns name of functor.
Definition: AxilrodTellerMutoFunctor.h:170
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: AxilrodTellerMutoFunctor.h:356
AxilrodTellerMutoFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: AxilrodTellerMutoFunctor.h:150
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: AxilrodTellerMutoFunctor.h:303
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:22
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132