doxygen_documentation/git-master/AxilrodTellerFunctor_8h_source.html

#pragma once


#include "ParticlePropertiesLibrary.h"

#include "autopas/baseFunctors/TriwiseFunctor.h"

#include "autopas/particles/OwnershipState.h"

#include "autopas/utils/AlignedAllocator.h"

#include "autopas/utils/ArrayMath.h"

#include "autopas/utils/ExceptionHandler.h"

#include "autopas/utils/SoA.h"

#include "autopas/utils/StaticBoolSelector.h"

#include "autopas/utils/WrapOpenMP.h"

#include "autopas/utils/inBox.h"


namespace mdLib {


template <class Particle_T, bool useMixing = false, autopas::FunctorN3Modes useNewton3 = autopas::FunctorN3Modes::Both,

          bool calculateGlobals = false, bool countFLOPs = false>

class AxilrodTellerFunctor

    : public autopas::TriwiseFunctor<

          Particle_T, AxilrodTellerFunctor<Particle_T, useMixing, useNewton3, calculateGlobals, countFLOPs>> {

  using SoAArraysType = typename Particle_T::SoAArraysType;


  using SoAFloatPrecision = typename Particle_T::ParticleSoAFloatPrecision;


 public:

  AxilrodTellerFunctor() = delete;


 private:

  explicit AxilrodTellerFunctor(double cutoff, void * /*dummy*/)

      : autopas::TriwiseFunctor<Particle_T,

                                AxilrodTellerFunctor<Particle_T, useMixing, useNewton3, calculateGlobals, countFLOPs>>(

            cutoff),

        _cutoffSquared{cutoff * cutoff},

        _potentialEnergySum{0.},

        _virialSum{0., 0., 0.},

        _aosThreadDataGlobals(),

        _postProcessed{false} {

    if constexpr (calculateGlobals) {

      _aosThreadDataGlobals.resize(autopas::autopas_get_max_threads());

    }

    if constexpr (countFLOPs) {

      _aosThreadDataFLOPs.resize(autopas::autopas_get_max_threads());

    }

  }


 public:

  explicit AxilrodTellerFunctor(double cutoff) : AxilrodTellerFunctor(cutoff, nullptr) {

    static_assert(not useMixing,

                  "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set "

                  "mixing to false.");

  }


  explicit AxilrodTellerFunctor(double cutoff, ParticlePropertiesLibrary<double, size_t> &particlePropertiesLibrary)

      : AxilrodTellerFunctor(cutoff, nullptr) {

    static_assert(useMixing,

                  "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor "

                  "or set mixing to true.");

    _PPLibrary = &particlePropertiesLibrary;

  }


  std::string getName() final { return "AxilrodTellerFunctorAutoVec"; }


  bool isRelevantForTuning() final { return true; }


  bool allowsNewton3() final {

    return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;

  }


  bool allowsNonNewton3() final {

    return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;

  }


  void AoSFunctor(Particle_T &i, Particle_T &j, Particle_T &k, bool newton3) final {

    using namespace autopas::utils::ArrayMath::literals;


    if (i.isDummy() or j.isDummy() or k.isDummy()) {

      return;

    }


    const auto threadnum = autopas::autopas_get_thread_num();


    if constexpr (countFLOPs) {

      ++_aosThreadDataFLOPs[threadnum].numDistCalls;

    }


    auto nu = _nu;

    if constexpr (useMixing) {

      nu = _PPLibrary->getMixingNu(i.getTypeId(), j.getTypeId(), k.getTypeId());

    }


    const auto displacementIJ = j.getR() - i.getR();

    const auto displacementJK = k.getR() - j.getR();

    const auto displacementKI = i.getR() - k.getR();


    const double distSquaredIJ = autopas::utils::ArrayMath::dot(displacementIJ, displacementIJ);

    const double distSquaredJK = autopas::utils::ArrayMath::dot(displacementJK, displacementJK);

    const double distSquaredKI = autopas::utils::ArrayMath::dot(displacementKI, displacementKI);


    // Check cutoff for every distance

    if (distSquaredIJ > _cutoffSquared or distSquaredJK > _cutoffSquared or distSquaredKI > _cutoffSquared) {

      return;

    }


    // Calculate prefactor

    const double allDistsSquared = distSquaredIJ * distSquaredJK * distSquaredKI;

    const double allDistsTo5 = allDistsSquared * allDistsSquared * std::sqrt(allDistsSquared);

    const double factor = 3.0 * nu / allDistsTo5;


    // Dot products of both distance vectors going from one particle

    const double IJDotKI = autopas::utils::ArrayMath::dot(displacementIJ, displacementKI);

    const double IJDotJK = autopas::utils::ArrayMath::dot(displacementIJ, displacementJK);

    const double JKDotKI = autopas::utils::ArrayMath::dot(displacementJK, displacementKI);


    const double allDotProducts = IJDotKI * IJDotJK * JKDotKI;


    const auto forceIDirectionJK = displacementJK * IJDotKI * (IJDotJK - JKDotKI);

    const auto forceIDirectionIJ =

        displacementIJ * (IJDotJK * JKDotKI - distSquaredJK * distSquaredKI + 5.0 * allDotProducts / distSquaredIJ);

    const auto forceIDirectionKI =

        displacementKI * (-IJDotJK * JKDotKI + distSquaredIJ * distSquaredJK - 5.0 * allDotProducts / distSquaredKI);


    const auto forceI = (forceIDirectionJK + forceIDirectionIJ + forceIDirectionKI) * factor;

    i.addF(forceI);


    auto forceJ = forceI;

    auto forceK = forceI;

    if (newton3) {

      const auto forceJDirectionKI = displacementKI * IJDotJK * (JKDotKI - IJDotKI);

      const auto forceJDirectionIJ =

          displacementIJ * (-IJDotKI * JKDotKI + distSquaredJK * distSquaredKI - 5.0 * allDotProducts / distSquaredIJ);

      const auto forceJDirectionJK =

          displacementJK * (IJDotKI * JKDotKI - distSquaredIJ * distSquaredKI + 5.0 * allDotProducts / distSquaredJK);


      forceJ = (forceJDirectionKI + forceJDirectionIJ + forceJDirectionJK) * factor;

      j.addF(forceJ);


      forceK = (forceI + forceJ) * (-1.0);

      k.addF(forceK);

    }


    if constexpr (countFLOPs) {

      if (newton3) {

        ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;

      } else {

        ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;

      }

    }


    if constexpr (calculateGlobals) {

      // Add 3 * potential energy to every owned particle of the interaction.

      // Division to the correct value is handled in endTraversal().

      const double potentialEnergy3 = factor * (allDistsSquared - 3.0 * allDotProducts);


      // Virial is calculated as f_i * r_i

      // see Thompson et al.: https://doi.org/10.1063/1.3245303

      const auto virialI = forceI * i.getR();

      if (i.isOwned()) {

        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;

        _aosThreadDataGlobals[threadnum].virialSum += virialI;

      }

      // for non-newton3 particles j and/or k will be considered in a separate calculation

      if (newton3 and j.isOwned()) {

        const auto virialJ = forceJ * j.getR();

        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;

        _aosThreadDataGlobals[threadnum].virialSum += virialJ;

      }

      if (newton3 and k.isOwned()) {

        const auto virialK = forceK * k.getR();

        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy3;

        _aosThreadDataGlobals[threadnum].virialSum += virialK;

      }

      if constexpr (countFLOPs) {

        if (newton3) {

          ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;

        } else {

          ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;

        }

      }

    }

  }


  void setParticleProperties(SoAFloatPrecision nu) { _nu = nu; }


  constexpr static auto getNeededAttr() {

    return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,

                                                              Particle_T::AttributeNames::posX,

                                                              Particle_T::AttributeNames::posY,

                                                              Particle_T::AttributeNames::posZ,

                                                              Particle_T::AttributeNames::forceX,

                                                              Particle_T::AttributeNames::forceY,

                                                              Particle_T::AttributeNames::forceZ,

                                                              Particle_T::AttributeNames::typeId,

                                                              Particle_T::AttributeNames::ownershipState};

  }


  constexpr static auto getNeededAttr(std::false_type) {

    return std::array<typename Particle_T::AttributeNames, 6>{

        Particle_T::AttributeNames::id,     Particle_T::AttributeNames::posX,

        Particle_T::AttributeNames::posY,   Particle_T::AttributeNames::posZ,

        Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};

  }


  constexpr static auto getComputedAttr() {

    return std::array<typename Particle_T::AttributeNames, 3>{

        Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};

  }


  constexpr static bool getMixing() { return useMixing; }


  void initTraversal() final {

    _potentialEnergySum = 0.;

    _virialSum = {0., 0., 0.};

    _postProcessed = false;

    for (size_t i = 0; i < _aosThreadDataGlobals.size(); ++i) {

      _aosThreadDataGlobals[i].setZero();

    }

  }


  void endTraversal(bool newton3) final {

    using namespace autopas::utils::ArrayMath::literals;


    if (_postProcessed) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");

    }

    if (calculateGlobals) {

      // Accumulate potential energy and virial values.

      for (const auto &data : _aosThreadDataGlobals) {

        _potentialEnergySum += data.potentialEnergySum;

        _virialSum += data.virialSum;

      }


      // For each interaction, we added the full contribution for all three particles. Divide by 3 here, so that each

      // contribution is only counted once per triplet.

      _potentialEnergySum /= 3.;


      // Additionally, we have always calculated 3*potentialEnergy, so we divide by 3 again.

      _potentialEnergySum /= 3.;


      _postProcessed = true;


      AutoPasLog(TRACE, "Final potential energy {}", _potentialEnergySum);

      AutoPasLog(TRACE, "Final virial           {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);

    }

  }


  double getPotentialEnergy() {

    if (not calculateGlobals) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate "

          "global "

          "values, please specify calculateGlobals to be true.");

    }

    if (not _postProcessed) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Cannot get potential energy, because endTraversal was not called.");

    }

    return _potentialEnergySum;

  }


  double getVirial() {

    if (not calculateGlobals) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global "

          "values, please specify calculateGlobals to be true.");

    }

    if (not _postProcessed) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Cannot get virial, because endTraversal was not called.");

    }

    return _virialSum[0] + _virialSum[1] + _virialSum[2];

  }


  [[nodiscard]] size_t getNumFLOPs() const override {

    if constexpr (countFLOPs) {

      const size_t numDistCallsAcc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numDistCalls; });

      const size_t numKernelCallsN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numKernelCallsN3; });

      const size_t numKernelCallsNoN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numKernelCallsNoN3; });

      const size_t numGlobalCalcsN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numGlobalCalcsN3; });

      const size_t numGlobalCalcsNoN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numGlobalCalcsNoN3; });


      constexpr size_t numFLOPsPerDistanceCall = 24;

      constexpr size_t numFLOPsPerN3KernelCall = 100;

      constexpr size_t numFLOPsPerNoN3KernelCall = 59;

      constexpr size_t numFLOPsPerN3GlobalCalc = 24;

      constexpr size_t numFLOPsPerNoN3GlobalCalc = 10;


      return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +

             numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +

             numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;

    } else {

      // This is needed because this function still gets called with FLOP logging disabled, just nothing is done with it

      return std::numeric_limits<size_t>::max();

    }

  }


  [[nodiscard]] double getHitRate() const override {

    if constexpr (countFLOPs) {

      const size_t numDistCallsAcc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numDistCalls; });

      const size_t numKernelCallsN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numKernelCallsN3; });

      const size_t numKernelCallsNoN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numKernelCallsNoN3; });


      return (static_cast<double>(numKernelCallsNoN3Acc) + static_cast<double>(numKernelCallsN3Acc)) /

             (static_cast<double>(numDistCallsAcc));

    } else {

      // This is needed because this function still gets called with FLOP logging disabled, just nothing is done with it

      return std::numeric_limits<double>::quiet_NaN();

    }

  }


 private:

  template <bool newton3>

  void SoAFunctorVerletImpl(autopas::SoAView<SoAArraysType> soa, const size_t indexFirst,

                            const std::vector<size_t, autopas::AlignedAllocator<size_t>> &neighborList) {

    autopas::utils::ExceptionHandler::exception("AxilrodTellerFunctor::SoAFunctorVerletImpl() is not implemented.");

  }


  class AoSThreadDataGlobals {

   public:

    AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}

    void setZero() {

      virialSum = {0., 0., 0.};

      potentialEnergySum = 0.;

    }


    // variables

    std::array<double, 3> virialSum;

    double potentialEnergySum;


   private:

    // dummy parameter to get the right size (64 bytes)

    double __remainingTo64[(64 - 4 * sizeof(double)) / sizeof(double)];

  };


  class AoSThreadDataFLOPs {

   public:

    AoSThreadDataFLOPs() : __remainingTo64{} {}


    void setZero() {

      numKernelCallsNoN3 = 0;

      numKernelCallsN3 = 0;

      numDistCalls = 0;

      numGlobalCalcsN3 = 0;

      numGlobalCalcsNoN3 = 0;

    }


    size_t numKernelCallsNoN3 = 0;


    size_t numKernelCallsN3 = 0;


    size_t numDistCalls = 0;


    size_t numGlobalCalcsN3 = 0;


    size_t numGlobalCalcsNoN3 = 0;


   private:

    double __remainingTo64[(64 - 5 * sizeof(size_t)) / sizeof(size_t)];

  };


  // make sure of the size of AoSThreadDataGlobals

  static_assert(sizeof(AoSThreadDataGlobals) % 64 == 0, "AoSThreadDataGlobals has wrong size");

  static_assert(sizeof(AoSThreadDataFLOPs) % 64 == 0, "AoSThreadDataFLOPs has wrong size");


  const double _cutoffSquared;


  // Parameter of the Axilrod-Teller potential

  // not const because they might be reset through PPL

  double _nu = 0.0;


  ParticlePropertiesLibrary<SoAFloatPrecision, size_t> *_PPLibrary = nullptr;


  // sum of the potential energy, only calculated if calculateGlobals is true

  double _potentialEnergySum;


  // sum of the virial, only calculated if calculateGlobals is true

  std::array<double, 3> _virialSum;


  // thread buffer for aos

  std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals;

  std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};


  // defines whether or whether not the global values are already preprocessed

  bool _postProcessed;

};

}  // namespace mdLib

AlignedAllocator.h

ArrayMath.h

ExceptionHandler.h

AutoPasLog
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24

OwnershipState.h

ParticlePropertiesLibrary.h

SoA.h

StaticBoolSelector.h

TriwiseFunctor.h

WrapOpenMP.h

ParticlePropertiesLibrary
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28

ParticlePropertiesLibrary::getMixingNu
floatType getMixingNu(intType i, intType j, intType k) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:283

autopas::AlignedAllocator
AlignedAllocator class.
Definition: AlignedAllocator.h:29

autopas::SoAView
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23

autopas::TriwiseFunctor
TriwiseFunctor class.
Definition: TriwiseFunctor.h:28

autopas::TriwiseFunctor::TriwiseFunctor
TriwiseFunctor(double cutoff)
Constructor.
Definition: TriwiseFunctor.h:39

autopas::utils::ExceptionHandler::AutoPasException
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:115

autopas::utils::ExceptionHandler::exception
static void exception(const Exception e)
Handle an exception derived by std::exception.
Definition: ExceptionHandler.h:63

mdLib::AxilrodTellerFunctor
A functor to handle Axilrod-Teller(-Muto) interactions between three particles (molecules).
Definition: AxilrodTellerFunctor.h:100

mdLib::AxilrodTellerFunctor::getVirial
double getVirial()
Get the virial.
Definition: AxilrodTellerFunctor.h:405

mdLib::AxilrodTellerFunctor::AxilrodTellerFunctor
AxilrodTellerFunctor()=delete
Deleted default constructor.

mdLib::AxilrodTellerFunctor::getHitRate
double getHitRate() const override
Get the hit rate.
Definition: AxilrodTellerFunctor.h:486

mdLib::AxilrodTellerFunctor::AxilrodTellerFunctor
AxilrodTellerFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: AxilrodTellerFunctor.h:149

mdLib::AxilrodTellerFunctor::AxilrodTellerFunctor
AxilrodTellerFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: AxilrodTellerFunctor.h:161

mdLib::AxilrodTellerFunctor::setParticleProperties
void setParticleProperties(SoAFloatPrecision nu)
Sets the particle properties constants for this functor.
Definition: AxilrodTellerFunctor.h:297

mdLib::AxilrodTellerFunctor::getMixing
static constexpr bool getMixing()
Definition: AxilrodTellerFunctor.h:336

mdLib::AxilrodTellerFunctor::getNeededAttr
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: AxilrodTellerFunctor.h:302

mdLib::AxilrodTellerFunctor::getName
std::string getName() final
Returns name of functor.
Definition: AxilrodTellerFunctor.h:169

mdLib::AxilrodTellerFunctor::getNeededAttr
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: AxilrodTellerFunctor.h:317

mdLib::AxilrodTellerFunctor::initTraversal
void initTraversal() final
Reset the global values.
Definition: AxilrodTellerFunctor.h:342

mdLib::AxilrodTellerFunctor::isRelevantForTuning
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: AxilrodTellerFunctor.h:171

mdLib::AxilrodTellerFunctor::allowsNonNewton3
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: AxilrodTellerFunctor.h:177

mdLib::AxilrodTellerFunctor::allowsNewton3
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: AxilrodTellerFunctor.h:173

mdLib::AxilrodTellerFunctor::AoSFunctor
void AoSFunctor(Particle_T &i, Particle_T &j, Particle_T &k, bool newton3) final
TriwiseFunctor for arrays of structures (AoS).
Definition: AxilrodTellerFunctor.h:181

mdLib::AxilrodTellerFunctor::getComputedAttr
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: AxilrodTellerFunctor.h:327

mdLib::AxilrodTellerFunctor::getPotentialEnergy
double getPotentialEnergy()
Get the potential Energy.
Definition: AxilrodTellerFunctor.h:387

mdLib::AxilrodTellerFunctor::getNumFLOPs
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: AxilrodTellerFunctor.h:453

mdLib::AxilrodTellerFunctor::endTraversal
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: AxilrodTellerFunctor.h:355

inBox.h

autopas::utils::ArrayMath::dot
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233

autopas
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32

autopas::autopas_get_max_threads
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144

autopas::FunctorN3Modes
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:22

autopas::autopas_get_thread_num
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132