96template <
class Particle_T, 
bool useMixing = 
false, 
autopas::FunctorN3Modes useNewton3 = autopas::FunctorN3Modes::Both,
 
   97          bool calculateGlobals = 
false, 
bool countFLOPs = 
false>
 
  100          Particle_T, AxilrodTellerFunctor<Particle_T, useMixing, useNewton3, calculateGlobals, countFLOPs>> {
 
  104  using SoAArraysType = 
typename Particle_T::SoAArraysType;
 
  109  using SoAFloatPrecision = 
typename Particle_T::ParticleSoAFloatPrecision;
 
  127        _cutoffSquared{cutoff * cutoff},
 
  128        _potentialEnergySum{0.},
 
  129        _virialSum{0., 0., 0.},
 
  130        _aosThreadDataGlobals(),
 
  131        _postProcessed{false} {
 
  132    if constexpr (calculateGlobals) {
 
  135    if constexpr (countFLOPs) {
 
  150    static_assert(not useMixing,
 
  151                  "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set " 
  163    static_assert(useMixing,
 
  164                  "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor " 
  165                  "or set mixing to true.");
 
  166    _PPLibrary = &particlePropertiesLibrary;
 
  169  std::string 
getName() final { 
return "AxilrodTellerFunctorAutoVec"; }
 
  174    return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;
 
  178    return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;
 
  181  void AoSFunctor(Particle_T &i, Particle_T &j, Particle_T &k, 
bool newton3) 
final {
 
  182    using namespace autopas::utils::ArrayMath::literals;
 
  184    if (i.isDummy() or j.isDummy() or k.isDummy()) {
 
  190    if constexpr (countFLOPs) {
 
  191      ++_aosThreadDataFLOPs[threadnum].numDistCalls;
 
  195    if constexpr (useMixing) {
 
  196      nu = _PPLibrary->
getMixingNu(i.getTypeId(), j.getTypeId(), k.getTypeId());
 
  199    const auto displacementIJ = j.getR() - i.getR();
 
  200    const auto displacementJK = k.getR() - j.getR();
 
  201    const auto displacementKI = i.getR() - k.getR();
 
  208    if (distSquaredIJ > _cutoffSquared or distSquaredJK > _cutoffSquared or distSquaredKI > _cutoffSquared) {
 
  213    const double allDistsSquared = distSquaredIJ * distSquaredJK * distSquaredKI;
 
  214    const double allDistsTo5 = allDistsSquared * allDistsSquared * std::sqrt(allDistsSquared);
 
  215    const double factor = 3.0 * nu / allDistsTo5;
 
  222    const double allDotProducts = IJDotKI * IJDotJK * JKDotKI;
 
  224    const auto forceIDirectionJK = displacementJK * IJDotKI * (IJDotJK - JKDotKI);
 
  225    const auto forceIDirectionIJ =
 
  226        displacementIJ * (IJDotJK * JKDotKI - distSquaredJK * distSquaredKI + 5.0 * allDotProducts / distSquaredIJ);
 
  227    const auto forceIDirectionKI =
 
  228        displacementKI * (-IJDotJK * JKDotKI + distSquaredIJ * distSquaredJK - 5.0 * allDotProducts / distSquaredKI);
 
  230    const auto forceI = (forceIDirectionJK + forceIDirectionIJ + forceIDirectionKI) * factor;
 
  233    auto forceJ = forceI;
 
  234    auto forceK = forceI;
 
  236      const auto forceJDirectionKI = displacementKI * IJDotJK * (JKDotKI - IJDotKI);
 
  237      const auto forceJDirectionIJ =
 
  238          displacementIJ * (-IJDotKI * JKDotKI + distSquaredJK * distSquaredKI - 5.0 * allDotProducts / distSquaredIJ);
 
  239      const auto forceJDirectionJK =
 
  240          displacementJK * (IJDotKI * JKDotKI - distSquaredIJ * distSquaredKI + 5.0 * allDotProducts / distSquaredJK);
 
  242      forceJ = (forceJDirectionKI + forceJDirectionIJ + forceJDirectionJK) * factor;
 
  245      forceK = (forceI + forceJ) * (-1.0);
 
  249    if constexpr (countFLOPs) {
 
  251        ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;
 
  253        ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;
 
  257    if constexpr (calculateGlobals) {
 
  260      const double potentialEnergy3 = factor * (allDistsSquared - 3.0 * allDotProducts);
 
  264      const auto virialI = forceI * i.getR();
 
  266        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;
 
  267        _aosThreadDataGlobals[threadnum].virialSum += virialI;
 
  270      if (newton3 and j.isOwned()) {
 
  271        const auto virialJ = forceJ * j.getR();
 
  272        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;
 
  273        _aosThreadDataGlobals[threadnum].virialSum += virialJ;
 
  275      if (newton3 and k.isOwned()) {
 
  276        const auto virialK = forceK * k.getR();
 
  277        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;
 
  278        _aosThreadDataGlobals[threadnum].virialSum += virialK;
 
  280      if constexpr (countFLOPs) {
 
  282          ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;
 
  284          ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;
 
  303    return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,
 
  304                                                              Particle_T::AttributeNames::posX,
 
  305                                                              Particle_T::AttributeNames::posY,
 
  306                                                              Particle_T::AttributeNames::posZ,
 
  307                                                              Particle_T::AttributeNames::forceX,
 
  308                                                              Particle_T::AttributeNames::forceY,
 
  309                                                              Particle_T::AttributeNames::forceZ,
 
  310                                                              Particle_T::AttributeNames::typeId,
 
  311                                                              Particle_T::AttributeNames::ownershipState};
 
  318    return std::array<typename Particle_T::AttributeNames, 6>{
 
  319        Particle_T::AttributeNames::id,     Particle_T::AttributeNames::posX,
 
  320        Particle_T::AttributeNames::posY,   Particle_T::AttributeNames::posZ,
 
  321        Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};
 
  328    return std::array<typename Particle_T::AttributeNames, 3>{
 
  329        Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};
 
  343    _potentialEnergySum = 0.;
 
  344    _virialSum = {0., 0., 0.};
 
  345    _postProcessed = 
false;
 
  346    for (
size_t i = 0; i < _aosThreadDataGlobals.size(); ++i) {
 
  347      _aosThreadDataGlobals[i].setZero();
 
  356    using namespace autopas::utils::ArrayMath::literals;
 
  358    if (_postProcessed) {
 
  360          "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");
 
  362    if (calculateGlobals) {
 
  364      for (
const auto &data : _aosThreadDataGlobals) {
 
  365        _potentialEnergySum += data.potentialEnergySum;
 
  366        _virialSum += data.virialSum;
 
  371      _potentialEnergySum /= 3.;
 
  374      _potentialEnergySum /= 3.;
 
  376      _postProcessed = 
true;
 
  378      AutoPasLog(TRACE, 
"Final potential energy {}", _potentialEnergySum);
 
  379      AutoPasLog(TRACE, 
"Final virial           {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);
 
  388    if (not calculateGlobals) {
 
  390          "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate " 
  392          "values, please specify calculateGlobals to be true.");
 
  394    if (not _postProcessed) {
 
  396          "Cannot get potential energy, because endTraversal was not called.");
 
  398    return _potentialEnergySum;
 
  406    if (not calculateGlobals) {
 
  408          "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global " 
  409          "values, please specify calculateGlobals to be true.");
 
  411    if (not _postProcessed) {
 
  413          "Cannot get virial, because endTraversal was not called.");
 
  415    return _virialSum[0] + _virialSum[1] + _virialSum[2];
 
  454    if constexpr (countFLOPs) {
 
  455      const size_t numDistCallsAcc =
 
  456          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  457                          [](
size_t sum, 
const auto &data) { return sum + data.numDistCalls; });
 
  458      const size_t numKernelCallsN3Acc =
 
  459          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  460                          [](
size_t sum, 
const auto &data) { return sum + data.numKernelCallsN3; });
 
  461      const size_t numKernelCallsNoN3Acc =
 
  462          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  463                          [](
size_t sum, 
const auto &data) { return sum + data.numKernelCallsNoN3; });
 
  464      const size_t numGlobalCalcsN3Acc =
 
  465          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  466                          [](
size_t sum, 
const auto &data) { return sum + data.numGlobalCalcsN3; });
 
  467      const size_t numGlobalCalcsNoN3Acc =
 
  468          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  469                          [](
size_t sum, 
const auto &data) { return sum + data.numGlobalCalcsNoN3; });
 
  471      constexpr size_t numFLOPsPerDistanceCall = 24;
 
  472      constexpr size_t numFLOPsPerN3KernelCall = 100;
 
  473      constexpr size_t numFLOPsPerNoN3KernelCall = 59;
 
  474      constexpr size_t numFLOPsPerN3GlobalCalc = 24;
 
  475      constexpr size_t numFLOPsPerNoN3GlobalCalc = 10;
 
  477      return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +
 
  478             numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +
 
  479             numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;
 
  482      return std::numeric_limits<size_t>::max();
 
  487    if constexpr (countFLOPs) {
 
  488      const size_t numDistCallsAcc =
 
  489          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  490                          [](
size_t sum, 
const auto &data) { return sum + data.numDistCalls; });
 
  491      const size_t numKernelCallsN3Acc =
 
  492          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  493                          [](
size_t sum, 
const auto &data) { return sum + data.numKernelCallsN3; });
 
  494      const size_t numKernelCallsNoN3Acc =
 
  495          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  496                          [](
size_t sum, 
const auto &data) { return sum + data.numKernelCallsNoN3; });
 
  498      return (
static_cast<double>(numKernelCallsNoN3Acc) + 
static_cast<double>(numKernelCallsN3Acc)) /
 
  499             (
static_cast<double>(numDistCallsAcc));
 
  502      return std::numeric_limits<double>::quiet_NaN();
 
  507  template <
bool newton3>
 
  516  class AoSThreadDataGlobals {
 
  518    AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}
 
  520      virialSum = {0., 0., 0.};
 
  521      potentialEnergySum = 0.;
 
  525    std::array<double, 3> virialSum;
 
  526    double potentialEnergySum;
 
  530    double __remainingTo64[(64 - 4 * 
sizeof(double)) / 
sizeof(double)];
 
  540  class AoSThreadDataFLOPs {
 
  542    AoSThreadDataFLOPs() : __remainingTo64{} {}
 
  548      numKernelCallsNoN3 = 0;
 
  549      numKernelCallsN3 = 0;
 
  551      numGlobalCalcsN3 = 0;
 
  552      numGlobalCalcsNoN3 = 0;
 
  559    size_t numKernelCallsNoN3 = 0;
 
  565    size_t numKernelCallsN3 = 0;
 
  571    size_t numDistCalls = 0;
 
  576    size_t numGlobalCalcsN3 = 0;
 
  581    size_t numGlobalCalcsNoN3 = 0;
 
  587    double __remainingTo64[(64 - 5 * 
sizeof(size_t)) / 
sizeof(size_t)];
 
  591  static_assert(
sizeof(AoSThreadDataGlobals) % 64 == 0, 
"AoSThreadDataGlobals has wrong size");
 
  592  static_assert(
sizeof(AoSThreadDataFLOPs) % 64 == 0, 
"AoSThreadDataFLOPs has wrong size");
 
  594  const double _cutoffSquared;
 
  603  double _potentialEnergySum;
 
  606  std::array<double, 3> _virialSum;
 
  609  std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals;
 
  610  std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};
 
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28
floatType getMixingNu(intType i, intType j, intType k) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:283
AlignedAllocator class.
Definition: AlignedAllocator.h:29
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23
TriwiseFunctor class.
Definition: TriwiseFunctor.h:28
TriwiseFunctor(double cutoff)
Constructor.
Definition: TriwiseFunctor.h:39
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:115
static void exception(const Exception e)
Handle an exception derived by std::exception.
Definition: ExceptionHandler.h:63
A functor to handle Axilrod-Teller(-Muto) interactions between three particles (molecules).
Definition: AxilrodTellerFunctor.h:100
double getVirial()
Get the virial.
Definition: AxilrodTellerFunctor.h:405
AxilrodTellerFunctor()=delete
Deleted default constructor.
double getHitRate() const override
Get the hit rate.
Definition: AxilrodTellerFunctor.h:486
AxilrodTellerFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: AxilrodTellerFunctor.h:149
AxilrodTellerFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: AxilrodTellerFunctor.h:161
void setParticleProperties(SoAFloatPrecision nu)
Sets the particle properties constants for this functor.
Definition: AxilrodTellerFunctor.h:297
static constexpr bool getMixing()
Definition: AxilrodTellerFunctor.h:336
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: AxilrodTellerFunctor.h:302
std::string getName() final
Returns name of functor.
Definition: AxilrodTellerFunctor.h:169
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: AxilrodTellerFunctor.h:317
void initTraversal() final
Reset the global values.
Definition: AxilrodTellerFunctor.h:342
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: AxilrodTellerFunctor.h:171
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: AxilrodTellerFunctor.h:177
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: AxilrodTellerFunctor.h:173
void AoSFunctor(Particle_T &i, Particle_T &j, Particle_T &k, bool newton3) final
TriwiseFunctor for arrays of structures (AoS).
Definition: AxilrodTellerFunctor.h:181
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: AxilrodTellerFunctor.h:327
double getPotentialEnergy()
Get the potential Energy.
Definition: AxilrodTellerFunctor.h:387
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: AxilrodTellerFunctor.h:453
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: AxilrodTellerFunctor.h:355
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:22
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132