doxygen_documentation/git-master/LJFunctor_8h_source.html

#pragma once


#include "ParticlePropertiesLibrary.h"

#include "autopas/baseFunctors/PairwiseFunctor.h"

#include "autopas/particles/OwnershipState.h"

#include "autopas/utils/AlignedAllocator.h"

#include "autopas/utils/ArrayMath.h"

#include "autopas/utils/ExceptionHandler.h"

#include "autopas/utils/SoA.h"

#include "autopas/utils/StaticBoolSelector.h"

#include "autopas/utils/WrapOpenMP.h"

#include "autopas/utils/inBox.h"


namespace mdLib {


template <class Particle_T, bool applyShift = false, bool useMixing = false,

          autopas::FunctorN3Modes useNewton3 = autopas::FunctorN3Modes::Both, bool calculateGlobals = false,

          bool countFLOPs = false, bool relevantForTuning = true>

class LJFunctor

    : public autopas::PairwiseFunctor<Particle_T, LJFunctor<Particle_T, applyShift, useMixing, useNewton3,

                                                            calculateGlobals, countFLOPs, relevantForTuning>> {

  using SoAArraysType = typename Particle_T::SoAArraysType;


  using SoAFloatPrecision = typename Particle_T::ParticleSoAFloatPrecision;


 public:

  LJFunctor() = delete;


 private:

  explicit LJFunctor(double cutoff, void * /*dummy*/)

      : autopas::PairwiseFunctor<Particle_T, LJFunctor<Particle_T, applyShift, useMixing, useNewton3, calculateGlobals,

                                                       countFLOPs, relevantForTuning>>(cutoff),

        _cutoffSquared{cutoff * cutoff},

        _potentialEnergySum{0.},

        _virialSum{0., 0., 0.},

        _postProcessed{false} {

    if constexpr (calculateGlobals) {

      _aosThreadDataGlobals.resize(autopas::autopas_get_max_threads());

    }

    if constexpr (countFLOPs) {

      _aosThreadDataFLOPs.resize(autopas::autopas_get_max_threads());

    }

  }


 public:

  explicit LJFunctor(double cutoff) : LJFunctor(cutoff, nullptr) {

    static_assert(not useMixing,

                  "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set "

                  "mixing to false.");

  }


  explicit LJFunctor(double cutoff, ParticlePropertiesLibrary<double, size_t> &particlePropertiesLibrary)

      : LJFunctor(cutoff, nullptr) {

    static_assert(useMixing,

                  "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor "

                  "or set mixing to true.");

    _PPLibrary = &particlePropertiesLibrary;

  }


  std::string getName() final { return "LJFunctorAutoVec"; }


  bool isRelevantForTuning() final { return relevantForTuning; }


  bool allowsNewton3() final {

    return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;

  }


  bool allowsNonNewton3() final {

    return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;

  }


  void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final {

    using namespace autopas::utils::ArrayMath::literals;


    if (i.isDummy() or j.isDummy()) {

      return;

    }


    const auto threadnum = autopas::autopas_get_thread_num();


    if constexpr (countFLOPs) {

      ++_aosThreadDataFLOPs[threadnum].numDistCalls;

    }


    auto sigmaSquared = _sigmaSquared;

    auto epsilon24 = _epsilon24;

    auto shift6 = _shift6;

    if constexpr (useMixing) {

      sigmaSquared = _PPLibrary->getMixingSigmaSquared(i.getTypeId(), j.getTypeId());

      epsilon24 = _PPLibrary->getMixing24Epsilon(i.getTypeId(), j.getTypeId());

      if constexpr (applyShift) {

        shift6 = _PPLibrary->getMixingShift6(i.getTypeId(), j.getTypeId());

      }

    }

    auto dr = i.getR() - j.getR();

    double dr2 = autopas::utils::ArrayMath::dot(dr, dr);


    if (dr2 > _cutoffSquared) {

      return;

    }


    double invdr2 = 1. / dr2;

    double lj6 = sigmaSquared * invdr2;

    lj6 = lj6 * lj6 * lj6;

    double lj12 = lj6 * lj6;

    double lj12m6 = lj12 - lj6;

    double fac = epsilon24 * (lj12 + lj12m6) * invdr2;

    auto f = dr * fac;

    i.addF(f);

    if (newton3) {

      // only if we use newton 3 here, we want to

      j.subF(f);

    }


    if constexpr (countFLOPs) {

      if (newton3) {

        ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;

      } else {

        ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;

      }

    }


    if constexpr (calculateGlobals) {

      // We always add the full contribution for each owned particle and divide the sums by 2 in endTraversal().

      // Potential energy has an additional factor of 6, which is also handled in endTraversal().


      auto virial = dr * f;

      double potentialEnergy6 = epsilon24 * lj12m6 + shift6;


      if (i.isOwned()) {

        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;

        _aosThreadDataGlobals[threadnum].virialSum += virial;

      }

      // for non-newton3 the second particle will be considered in a separate calculation

      if (newton3 and j.isOwned()) {

        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;

        _aosThreadDataGlobals[threadnum].virialSum += virial;

      }

      if constexpr (countFLOPs) {

        if (newton3) {

          ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;

        } else {

          ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;

        }

      }

    }

  }


  void SoAFunctorSingle(autopas::SoAView<SoAArraysType> soa, bool newton3) final {

    if (soa.size() == 0) return;


    const auto threadnum = autopas::autopas_get_thread_num();


    const auto *const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();

    const auto *const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();

    const auto *const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();

    const auto *const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();


    SoAFloatPrecision *const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();

    SoAFloatPrecision *const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();

    SoAFloatPrecision *const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();


    [[maybe_unused]] auto *const __restrict typeptr = soa.template begin<Particle_T::AttributeNames::typeId>();

    // the local redeclaration of the following values helps the SoAFloatPrecision-generation of various compilers.

    const SoAFloatPrecision cutoffSquared = _cutoffSquared;


    SoAFloatPrecision potentialEnergySum = 0.;  // Note: This is not the potential energy but some fixed multiple of it.

    SoAFloatPrecision virialSumX = 0.;

    SoAFloatPrecision virialSumY = 0.;

    SoAFloatPrecision virialSumZ = 0.;


    size_t numDistanceCalculationSum = 0;

    size_t numKernelCallsN3Sum = 0;

    size_t numKernelCallsNoN3Sum = 0;

    size_t numGlobalCalcsSum = 0;


    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;

    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;

    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;

    if constexpr (useMixing) {

      // Preload all sigma and epsilons for next vectorized region.

      // Not preloading and directly using the values, will produce worse results.

      sigmaSquareds.resize(soa.size());

      epsilon24s.resize(soa.size());

      // if no mixing or mixing but no shift shift6 is constant therefore we do not need this vector.

      if constexpr (applyShift) {

        shift6s.resize(soa.size());

      }

    }


    const SoAFloatPrecision const_shift6 = _shift6;

    const SoAFloatPrecision const_sigmaSquared = _sigmaSquared;

    const SoAFloatPrecision const_epsilon24 = _epsilon24;


    for (unsigned int i = 0; i < soa.size(); ++i) {

      const auto ownedStateI = ownedStatePtr[i];

      if (ownedStateI == autopas::OwnershipState::dummy) {

        continue;

      }


      SoAFloatPrecision fxacc = 0.;

      SoAFloatPrecision fyacc = 0.;

      SoAFloatPrecision fzacc = 0.;


      if constexpr (useMixing) {

        for (unsigned int j = 0; j < soa.size(); ++j) {

          auto mixingData = _PPLibrary->getLJMixingData(typeptr[i], typeptr[j]);

          sigmaSquareds[j] = mixingData.sigmaSquared;

          epsilon24s[j] = mixingData.epsilon24;

          if constexpr (applyShift) {

            shift6s[j] = mixingData.shift6;

          }

        }

      }


// icpc vectorizes this.

// g++ only with -ffast-math or -funsafe-math-optimizations

#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsSum)

      for (unsigned int j = i + 1; j < soa.size(); ++j) {

        SoAFloatPrecision shift6 = const_shift6;

        SoAFloatPrecision sigmaSquared = const_sigmaSquared;

        SoAFloatPrecision epsilon24 = const_epsilon24;

        if constexpr (useMixing) {

          sigmaSquared = sigmaSquareds[j];

          epsilon24 = epsilon24s[j];

          if constexpr (applyShift) {

            shift6 = shift6s[j];

          }

        }


        const auto ownedStateJ = ownedStatePtr[j];


        const SoAFloatPrecision drx = xptr[i] - xptr[j];

        const SoAFloatPrecision dry = yptr[i] - yptr[j];

        const SoAFloatPrecision drz = zptr[i] - zptr[j];


        const SoAFloatPrecision drx2 = drx * drx;

        const SoAFloatPrecision dry2 = dry * dry;

        const SoAFloatPrecision drz2 = drz * drz;


        const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;


        // Mask away if distance is too large or any particle is a dummy.

        // Particle ownedStateI was already checked previously.

        const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;


        const SoAFloatPrecision invdr2 = 1. / dr2;

        const SoAFloatPrecision lj2 = sigmaSquared * invdr2;

        const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;

        const SoAFloatPrecision lj12 = lj6 * lj6;

        const SoAFloatPrecision lj12m6 = lj12 - lj6;

        const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;


        const SoAFloatPrecision fx = drx * fac;

        const SoAFloatPrecision fy = dry * fac;

        const SoAFloatPrecision fz = drz * fac;


        fxacc += fx;

        fyacc += fy;

        fzacc += fz;


        // newton 3

        fxptr[j] -= fx;

        fyptr[j] -= fy;

        fzptr[j] -= fz;


        if constexpr (countFLOPs) {

          numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;

          numKernelCallsN3Sum += mask;

        }


        if (calculateGlobals) {

          const SoAFloatPrecision virialx = drx * fx;

          const SoAFloatPrecision virialy = dry * fy;

          const SoAFloatPrecision virialz = drz * fz;

          const SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);


          // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().

          SoAFloatPrecision energyFactor = (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +

                                           (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.);

          potentialEnergySum += potentialEnergy6 * energyFactor;


          virialSumX += virialx * energyFactor;

          virialSumY += virialy * energyFactor;

          virialSumZ += virialz * energyFactor;


          if constexpr (countFLOPs) {

            numGlobalCalcsSum += mask;

          }

        }

      }


      fxptr[i] += fxacc;

      fyptr[i] += fyacc;

      fzptr[i] += fzacc;

    }

    if constexpr (countFLOPs) {

      _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;

      _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;

      _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;

      _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsSum;  // Always N3 in Single SoAFunctor

    }

    if (calculateGlobals) {

      _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;

      _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;

      _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;

      _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;

    }

  }


  void SoAFunctorPair(autopas::SoAView<SoAArraysType> soa1, autopas::SoAView<SoAArraysType> soa2,

                      const bool newton3) final {

    if (newton3) {

      SoAFunctorPairImpl<true>(soa1, soa2);

    } else {

      SoAFunctorPairImpl<false>(soa1, soa2);

    }

  }


 private:

  template <bool newton3>

  void SoAFunctorPairImpl(autopas::SoAView<SoAArraysType> soa1, autopas::SoAView<SoAArraysType> soa2) {

    if (soa1.size() == 0 || soa2.size() == 0) return;


    const auto threadnum = autopas::autopas_get_thread_num();


    const auto *const __restrict x1ptr = soa1.template begin<Particle_T::AttributeNames::posX>();

    const auto *const __restrict y1ptr = soa1.template begin<Particle_T::AttributeNames::posY>();

    const auto *const __restrict z1ptr = soa1.template begin<Particle_T::AttributeNames::posZ>();

    const auto *const __restrict x2ptr = soa2.template begin<Particle_T::AttributeNames::posX>();

    const auto *const __restrict y2ptr = soa2.template begin<Particle_T::AttributeNames::posY>();

    const auto *const __restrict z2ptr = soa2.template begin<Particle_T::AttributeNames::posZ>();

    const auto *const __restrict ownedStatePtr1 = soa1.template begin<Particle_T::AttributeNames::ownershipState>();

    const auto *const __restrict ownedStatePtr2 = soa2.template begin<Particle_T::AttributeNames::ownershipState>();


    auto *const __restrict fx1ptr = soa1.template begin<Particle_T::AttributeNames::forceX>();

    auto *const __restrict fy1ptr = soa1.template begin<Particle_T::AttributeNames::forceY>();

    auto *const __restrict fz1ptr = soa1.template begin<Particle_T::AttributeNames::forceZ>();

    auto *const __restrict fx2ptr = soa2.template begin<Particle_T::AttributeNames::forceX>();

    auto *const __restrict fy2ptr = soa2.template begin<Particle_T::AttributeNames::forceY>();

    auto *const __restrict fz2ptr = soa2.template begin<Particle_T::AttributeNames::forceZ>();

    [[maybe_unused]] auto *const __restrict typeptr1 = soa1.template begin<Particle_T::AttributeNames::typeId>();

    [[maybe_unused]] auto *const __restrict typeptr2 = soa2.template begin<Particle_T::AttributeNames::typeId>();


    // Checks whether the cells are halo cells.

    SoAFloatPrecision potentialEnergySum = 0.;

    SoAFloatPrecision virialSumX = 0.;

    SoAFloatPrecision virialSumY = 0.;

    SoAFloatPrecision virialSumZ = 0.;


    size_t numDistanceCalculationSum = 0;

    size_t numKernelCallsN3Sum = 0;

    size_t numKernelCallsNoN3Sum = 0;

    size_t numGlobalCalcsN3Sum = 0;

    size_t numGlobalCalcsNoN3Sum = 0;


    const SoAFloatPrecision cutoffSquared = _cutoffSquared;

    SoAFloatPrecision shift6 = _shift6;

    SoAFloatPrecision sigmaSquared = _sigmaSquared;

    SoAFloatPrecision epsilon24 = _epsilon24;


    // preload all sigma and epsilons for next vectorized region

    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;

    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;

    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;

    if constexpr (useMixing) {

      sigmaSquareds.resize(soa2.size());

      epsilon24s.resize(soa2.size());

      // if no mixing or mixing but no shift shift6 is constant therefore we do not need this vector.

      if constexpr (applyShift) {

        shift6s.resize(soa2.size());

      }

    }


    for (unsigned int i = 0; i < soa1.size(); ++i) {

      SoAFloatPrecision fxacc = 0;

      SoAFloatPrecision fyacc = 0;

      SoAFloatPrecision fzacc = 0;


      const auto ownedStateI = ownedStatePtr1[i];

      if (ownedStateI == autopas::OwnershipState::dummy) {

        continue;

      }


      // preload all sigma and epsilons for next vectorized region

      if constexpr (useMixing) {

        for (unsigned int j = 0; j < soa2.size(); ++j) {

          sigmaSquareds[j] = _PPLibrary->getMixingSigmaSquared(typeptr1[i], typeptr2[j]);

          epsilon24s[j] = _PPLibrary->getMixing24Epsilon(typeptr1[i], typeptr2[j]);

          if constexpr (applyShift) {

            shift6s[j] = _PPLibrary->getMixingShift6(typeptr1[i], typeptr2[j]);

          }

        }

      }


// icpc vectorizes this.

// g++ only with -ffast-math or -funsafe-math-optimizations

#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum)

      for (unsigned int j = 0; j < soa2.size(); ++j) {

        if constexpr (useMixing) {

          sigmaSquared = sigmaSquareds[j];

          epsilon24 = epsilon24s[j];

          if constexpr (applyShift) {

            shift6 = shift6s[j];

          }

        }


        const auto ownedStateJ = ownedStatePtr2[j];


        const SoAFloatPrecision drx = x1ptr[i] - x2ptr[j];

        const SoAFloatPrecision dry = y1ptr[i] - y2ptr[j];

        const SoAFloatPrecision drz = z1ptr[i] - z2ptr[j];


        const SoAFloatPrecision drx2 = drx * drx;

        const SoAFloatPrecision dry2 = dry * dry;

        const SoAFloatPrecision drz2 = drz * drz;


        const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;


        // Mask away if distance is too large or any particle is a dummy.

        // Particle ownedStateI was already checked previously.

        const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;


        const SoAFloatPrecision invdr2 = 1. / dr2;

        const SoAFloatPrecision lj2 = sigmaSquared * invdr2;

        const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;

        const SoAFloatPrecision lj12 = lj6 * lj6;

        const SoAFloatPrecision lj12m6 = lj12 - lj6;

        const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;


        const SoAFloatPrecision fx = drx * fac;

        const SoAFloatPrecision fy = dry * fac;

        const SoAFloatPrecision fz = drz * fac;


        fxacc += fx;

        fyacc += fy;

        fzacc += fz;

        if (newton3) {

          fx2ptr[j] -= fx;

          fy2ptr[j] -= fy;

          fz2ptr[j] -= fz;

        }


        if constexpr (countFLOPs) {

          numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;

          if constexpr (newton3) {

            numKernelCallsN3Sum += mask;

          } else {

            numKernelCallsNoN3Sum += mask;

          }

        }


        if constexpr (calculateGlobals) {

          SoAFloatPrecision virialx = drx * fx;

          SoAFloatPrecision virialy = dry * fy;

          SoAFloatPrecision virialz = drz * fz;

          SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);


          // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().

          const SoAFloatPrecision energyFactor =

              (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +

              (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);

          potentialEnergySum += potentialEnergy6 * energyFactor;

          virialSumX += virialx * energyFactor;

          virialSumY += virialy * energyFactor;

          virialSumZ += virialz * energyFactor;


          if constexpr (countFLOPs) {

            if constexpr (newton3) {

              numGlobalCalcsN3Sum += mask;

            } else {

              numGlobalCalcsNoN3Sum += mask;

            }

          }

        }

      }

      fx1ptr[i] += fxacc;

      fy1ptr[i] += fyacc;

      fz1ptr[i] += fzacc;

    }

    if constexpr (countFLOPs) {

      _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;

      _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;

      _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;

      _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;

      _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;

    }

    if (calculateGlobals) {

      _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;

      _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;

      _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;

      _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;

    }

  }


 public:

  // clang-format off

  // clang-format on

  void SoAFunctorVerlet(autopas::SoAView<SoAArraysType> soa, const size_t indexFirst,

                        const std::vector<size_t, autopas::AlignedAllocator<size_t>> &neighborList,

                        bool newton3) final {

    if (soa.size() == 0 or neighborList.empty()) return;

    if (newton3) {

      SoAFunctorVerletImpl<true>(soa, indexFirst, neighborList);

    } else {

      SoAFunctorVerletImpl<false>(soa, indexFirst, neighborList);

    }

  }


  void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared) {

    _epsilon24 = epsilon24;

    _sigmaSquared = sigmaSquared;

    if (applyShift) {

      _shift6 = ParticlePropertiesLibrary<double, size_t>::calcShift6(_epsilon24, _sigmaSquared, _cutoffSquared);

    } else {

      _shift6 = 0.;

    }

  }


  constexpr static auto getNeededAttr() {

    return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,

                                                              Particle_T::AttributeNames::posX,

                                                              Particle_T::AttributeNames::posY,

                                                              Particle_T::AttributeNames::posZ,

                                                              Particle_T::AttributeNames::forceX,

                                                              Particle_T::AttributeNames::forceY,

                                                              Particle_T::AttributeNames::forceZ,

                                                              Particle_T::AttributeNames::typeId,

                                                              Particle_T::AttributeNames::ownershipState};

  }


  constexpr static auto getNeededAttr(std::false_type) {

    return std::array<typename Particle_T::AttributeNames, 6>{

        Particle_T::AttributeNames::id,     Particle_T::AttributeNames::posX,

        Particle_T::AttributeNames::posY,   Particle_T::AttributeNames::posZ,

        Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};

  }


  constexpr static auto getComputedAttr() {

    return std::array<typename Particle_T::AttributeNames, 3>{

        Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};

  }


  constexpr static bool getMixing() { return useMixing; }


  void initTraversal() final {

    _potentialEnergySum = 0.;

    _virialSum = {0., 0., 0.};

    _postProcessed = false;

    if constexpr (calculateGlobals) {

      for (auto &data : _aosThreadDataGlobals) {

        data.setZero();

      }

    }

    if constexpr (countFLOPs) {

      for (auto &data : _aosThreadDataFLOPs) {

        data.setZero();

      }

    }

  }


  void endTraversal(bool newton3) final {

    using namespace autopas::utils::ArrayMath::literals;


    if (_postProcessed) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");

    }

    if (calculateGlobals) {

      for (const auto &data : _aosThreadDataGlobals) {

        _potentialEnergySum += data.potentialEnergySum;

        _virialSum += data.virialSum;

      }

      // For each interaction, we added the full contribution for both particles. Divide by 2 here, so that each

      // contribution is only counted once per pair.

      _potentialEnergySum *= 0.5;

      _virialSum *= 0.5;


      // We have always calculated 6*potentialEnergy, so we divide by 6 here!

      _potentialEnergySum /= 6.;

      _postProcessed = true;


      AutoPasLog(DEBUG, "Final potential energy {}", _potentialEnergySum);

      AutoPasLog(DEBUG, "Final virial           {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);

    }

  }


  double getPotentialEnergy() {

    if (not calculateGlobals) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate "

          "global "

          "values, please specify calculateGlobals to be true.");

    }

    if (not _postProcessed) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Cannot get potential energy, because endTraversal was not called.");

    }

    return _potentialEnergySum;

  }


  double getVirial() {

    if (not calculateGlobals) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global "

          "values, please specify calculateGlobals to be true.");

    }

    if (not _postProcessed) {

      throw autopas::utils::ExceptionHandler::AutoPasException(

          "Cannot get virial, because endTraversal was not called.");

    }

    return _virialSum[0] + _virialSum[1] + _virialSum[2];

  }


  [[nodiscard]] size_t getNumFLOPs() const override {

    if constexpr (countFLOPs) {

      const size_t numDistCallsAcc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numDistCalls; });

      const size_t numKernelCallsN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numKernelCallsN3; });

      const size_t numKernelCallsNoN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numKernelCallsNoN3; });

      const size_t numGlobalCalcsN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numGlobalCalcsN3; });

      const size_t numGlobalCalcsNoN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numGlobalCalcsNoN3; });


      constexpr size_t numFLOPsPerDistanceCall = 8;

      constexpr size_t numFLOPsPerN3KernelCall = 18;

      constexpr size_t numFLOPsPerNoN3KernelCall = 15;

      constexpr size_t numFLOPsPerN3GlobalCalc = applyShift ? 13 : 12;

      constexpr size_t numFLOPsPerNoN3GlobalCalc = applyShift ? 9 : 8;


      return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +

             numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +

             numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;

    } else {

      // This is needed because this function still gets called with FLOP logging disabled, just nothing is done with it

      return std::numeric_limits<size_t>::max();

    }

  }


  [[nodiscard]] double getHitRate() const override {

    if constexpr (countFLOPs) {

      const size_t numDistCallsAcc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numDistCalls; });

      const size_t numKernelCallsN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numKernelCallsN3; });

      const size_t numKernelCallsNoN3Acc =

          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,

                          [](size_t sum, const auto &data) { return sum + data.numKernelCallsNoN3; });


      return (static_cast<double>(numKernelCallsNoN3Acc) + static_cast<double>(numKernelCallsN3Acc)) /

             (static_cast<double>(numDistCallsAcc));

    } else {

      // This is needed because this function still gets called with FLOP logging disabled, just nothing is done with it

      return std::numeric_limits<double>::quiet_NaN();

    }

  }


 private:

  template <bool newton3>

  void SoAFunctorVerletImpl(autopas::SoAView<SoAArraysType> soa, const size_t indexFirst,

                            const std::vector<size_t, autopas::AlignedAllocator<size_t>> &neighborList) {

    const auto *const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();

    const auto *const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();

    const auto *const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();


    auto *const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();

    auto *const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();

    auto *const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();

    [[maybe_unused]] auto *const __restrict typeptr1 = soa.template begin<Particle_T::AttributeNames::typeId>();

    [[maybe_unused]] auto *const __restrict typeptr2 = soa.template begin<Particle_T::AttributeNames::typeId>();


    const auto *const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();


    const SoAFloatPrecision cutoffSquared = _cutoffSquared;

    SoAFloatPrecision shift6 = _shift6;

    SoAFloatPrecision sigmaSquared = _sigmaSquared;

    SoAFloatPrecision epsilon24 = _epsilon24;


    SoAFloatPrecision potentialEnergySum = 0.;

    SoAFloatPrecision virialSumX = 0.;

    SoAFloatPrecision virialSumY = 0.;

    SoAFloatPrecision virialSumZ = 0.;


    // Counters for when countFLOPs is activated

    size_t numDistanceCalculationSum = 0;

    size_t numKernelCallsN3Sum = 0;

    size_t numKernelCallsNoN3Sum = 0;

    size_t numGlobalCalcsN3Sum = 0;

    size_t numGlobalCalcsNoN3Sum = 0;


    SoAFloatPrecision fxacc = 0;

    SoAFloatPrecision fyacc = 0;

    SoAFloatPrecision fzacc = 0;

    const size_t neighborListSize = neighborList.size();

    const size_t *const __restrict neighborListPtr = neighborList.data();


    // checks whether particle i is owned.

    const auto ownedStateI = ownedStatePtr[indexFirst];

    if (ownedStateI == autopas::OwnershipState::dummy) {

      return;

    }


    const auto threadnum = autopas::autopas_get_thread_num();


    // this is a magic number, that should correspond to at least

    // vectorization width*N have testet multiple sizes:

    // 4: does not give a speedup, slower than original AoSFunctor

    // 8: small speedup compared to AoS

    // 12: highest speedup compared to Aos

    // 16: smaller speedup

    // in theory this is a variable, we could auto-tune over...

#ifdef __AVX512F__

    // use a multiple of 8 for avx

    constexpr size_t vecsize = 16;

#else

    // for everything else 12 is faster

    constexpr size_t vecsize = 12;

#endif

    size_t joff = 0;


    // if the size of the verlet list is larger than the given size vecsize,

    // we will use a vectorized version.

    if (neighborListSize >= vecsize) {

      alignas(64) std::array<SoAFloatPrecision, vecsize> xtmp, ytmp, ztmp, xArr, yArr, zArr, fxArr, fyArr, fzArr;

      alignas(64) std::array<autopas::OwnershipState, vecsize> ownedStateArr{};

      // broadcast of the position of particle i

      for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {

        xtmp[tmpj] = xptr[indexFirst];

        ytmp[tmpj] = yptr[indexFirst];

        ztmp[tmpj] = zptr[indexFirst];

      }

      // loop over the verlet list from 0 to x*vecsize

      for (; joff < neighborListSize - vecsize + 1; joff += vecsize) {

        // in each iteration we calculate the interactions of particle i with

        // vecsize particles in the neighborlist of particle i starting at

        // particle joff


        [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> sigmaSquareds;

        [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> epsilon24s;

        [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> shift6s;

        if constexpr (useMixing) {

          for (size_t j = 0; j < vecsize; j++) {

            sigmaSquareds[j] =

                _PPLibrary->getMixingSigmaSquared(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);

            epsilon24s[j] = _PPLibrary->getMixing24Epsilon(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);

            if constexpr (applyShift) {

              shift6s[j] = _PPLibrary->getMixingShift6(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);

            }

          }

        }


        // gather position of particle j

#pragma omp simd safelen(vecsize)

        for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {

          xArr[tmpj] = xptr[neighborListPtr[joff + tmpj]];

          yArr[tmpj] = yptr[neighborListPtr[joff + tmpj]];

          zArr[tmpj] = zptr[neighborListPtr[joff + tmpj]];

          ownedStateArr[tmpj] = ownedStatePtr[neighborListPtr[joff + tmpj]];

        }

        // do omp simd with reduction of the interaction

#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum) safelen(vecsize)

        for (size_t j = 0; j < vecsize; j++) {

          if constexpr (useMixing) {

            sigmaSquared = sigmaSquareds[j];

            epsilon24 = epsilon24s[j];

            if constexpr (applyShift) {

              shift6 = shift6s[j];

            }

          }

          // const size_t j = currentList[jNeighIndex];


          const auto ownedStateJ = ownedStateArr[j];


          const SoAFloatPrecision drx = xtmp[j] - xArr[j];

          const SoAFloatPrecision dry = ytmp[j] - yArr[j];

          const SoAFloatPrecision drz = ztmp[j] - zArr[j];


          const SoAFloatPrecision drx2 = drx * drx;

          const SoAFloatPrecision dry2 = dry * dry;

          const SoAFloatPrecision drz2 = drz * drz;


          const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;


          // Mask away if distance is too large or any particle is a dummy.

          // Particle ownedStateI was already checked previously.

          const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;


          const SoAFloatPrecision invdr2 = 1. / dr2;

          const SoAFloatPrecision lj2 = sigmaSquared * invdr2;

          const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;

          const SoAFloatPrecision lj12 = lj6 * lj6;

          const SoAFloatPrecision lj12m6 = lj12 - lj6;

          const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;


          const SoAFloatPrecision fx = drx * fac;

          const SoAFloatPrecision fy = dry * fac;

          const SoAFloatPrecision fz = drz * fac;


          fxacc += fx;

          fyacc += fy;

          fzacc += fz;

          if (newton3) {

            fxArr[j] = fx;

            fyArr[j] = fy;

            fzArr[j] = fz;

          }


          if constexpr (countFLOPs) {

            numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;

            if constexpr (newton3) {

              numKernelCallsN3Sum += mask;

            } else {

              numKernelCallsNoN3Sum += mask;

            }

          }


          if (calculateGlobals) {

            SoAFloatPrecision virialx = drx * fx;

            SoAFloatPrecision virialy = dry * fy;

            SoAFloatPrecision virialz = drz * fz;

            SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);


            // We add 6 times the potential energy for each owned particle. The total sum is corrected in

            // endTraversal().

            const SoAFloatPrecision energyFactor =

                (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +

                (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);

            potentialEnergySum += potentialEnergy6 * energyFactor;

            virialSumX += virialx * energyFactor;

            virialSumY += virialy * energyFactor;

            virialSumZ += virialz * energyFactor;


            if constexpr (countFLOPs) {

              if constexpr (newton3) {

                numGlobalCalcsN3Sum += mask;

              } else {

                numGlobalCalcsNoN3Sum += mask;

              }

            }

          }

        }

        // scatter the forces to where they belong, this is only needed for newton3

        if (newton3) {

#pragma omp simd safelen(vecsize)

          for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {

            const size_t j = neighborListPtr[joff + tmpj];

            fxptr[j] -= fxArr[tmpj];

            fyptr[j] -= fyArr[tmpj];

            fzptr[j] -= fzArr[tmpj];

          }

        }

      }

    }

    // this loop goes over the remainder and uses no optimizations

    for (size_t jNeighIndex = joff; jNeighIndex < neighborListSize; ++jNeighIndex) {

      size_t j = neighborList[jNeighIndex];

      if (indexFirst == j) continue;

      if constexpr (useMixing) {

        sigmaSquared = _PPLibrary->getMixingSigmaSquared(typeptr1[indexFirst], typeptr2[j]);

        epsilon24 = _PPLibrary->getMixing24Epsilon(typeptr1[indexFirst], typeptr2[j]);

        if constexpr (applyShift) {

          shift6 = _PPLibrary->getMixingShift6(typeptr1[indexFirst], typeptr2[j]);

        }

      }


      const auto ownedStateJ = ownedStatePtr[j];

      if (ownedStateJ == autopas::OwnershipState::dummy) {

        continue;

      }


      const SoAFloatPrecision drx = xptr[indexFirst] - xptr[j];

      const SoAFloatPrecision dry = yptr[indexFirst] - yptr[j];

      const SoAFloatPrecision drz = zptr[indexFirst] - zptr[j];


      const SoAFloatPrecision drx2 = drx * drx;

      const SoAFloatPrecision dry2 = dry * dry;

      const SoAFloatPrecision drz2 = drz * drz;


      const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;


      if constexpr (countFLOPs) {

        numDistanceCalculationSum += 1;

      }


      if (dr2 > cutoffSquared) {

        continue;

      }


      const SoAFloatPrecision invdr2 = 1. / dr2;

      const SoAFloatPrecision lj2 = sigmaSquared * invdr2;

      const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;

      const SoAFloatPrecision lj12 = lj6 * lj6;

      const SoAFloatPrecision lj12m6 = lj12 - lj6;

      const SoAFloatPrecision fac = epsilon24 * (lj12 + lj12m6) * invdr2;


      const SoAFloatPrecision fx = drx * fac;

      const SoAFloatPrecision fy = dry * fac;

      const SoAFloatPrecision fz = drz * fac;


      fxacc += fx;

      fyacc += fy;

      fzacc += fz;

      if (newton3) {

        fxptr[j] -= fx;

        fyptr[j] -= fy;

        fzptr[j] -= fz;

      }


      if constexpr (countFLOPs) {

        if constexpr (newton3) {

          numKernelCallsN3Sum += 1;

        } else {

          numKernelCallsNoN3Sum += 1;

        }

      }


      if (calculateGlobals) {

        SoAFloatPrecision virialx = drx * fx;

        SoAFloatPrecision virialy = dry * fy;

        SoAFloatPrecision virialz = drz * fz;

        SoAFloatPrecision potentialEnergy6 = (epsilon24 * lj12m6 + shift6);


        // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().

        const SoAFloatPrecision energyFactor =

            (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +

            (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);

        potentialEnergySum += potentialEnergy6 * energyFactor;

        virialSumX += virialx * energyFactor;

        virialSumY += virialy * energyFactor;

        virialSumZ += virialz * energyFactor;


        if constexpr (countFLOPs) {

          if constexpr (newton3) {

            ++numGlobalCalcsN3Sum;

          } else {

            ++numGlobalCalcsNoN3Sum;

          }

        }

      }

    }


    if (fxacc != 0 or fyacc != 0 or fzacc != 0) {

      fxptr[indexFirst] += fxacc;

      fyptr[indexFirst] += fyacc;

      fzptr[indexFirst] += fzacc;

    }


    if constexpr (countFLOPs) {

      _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;

      _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;

      _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;

      _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;

      _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;

    }


    if (calculateGlobals) {

      _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;

      _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;

      _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;

      _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;

    }

  }


  class AoSThreadDataGlobals {

   public:

    AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}

    void setZero() {

      virialSum = {0., 0., 0.};

      potentialEnergySum = 0.;

    }


    // variables

    std::array<double, 3> virialSum;

    double potentialEnergySum;


   private:

    // dummy parameter to get the right size (64 bytes)

    double __remainingTo64[(64 - 4 * sizeof(double)) / sizeof(double)];

  };


  class AoSThreadDataFLOPs {

   public:

    AoSThreadDataFLOPs() : __remainingTo64{} {}


    void setZero() {

      numKernelCallsNoN3 = 0;

      numKernelCallsN3 = 0;

      numDistCalls = 0;

      numGlobalCalcsNoN3 = 0;

      numGlobalCalcsN3 = 0;

    }


    size_t numKernelCallsNoN3 = 0;


    size_t numKernelCallsN3 = 0;


    size_t numDistCalls = 0;


    size_t numGlobalCalcsN3 = 0;


    size_t numGlobalCalcsNoN3 = 0;


   private:

    double __remainingTo64[(64 - 5 * sizeof(size_t)) / sizeof(size_t)];

  };


  // make sure of the size of AoSThreadDataGlobals and AoSThreadDataFLOPs

  static_assert(sizeof(AoSThreadDataGlobals) % 64 == 0, "AoSThreadDataGlobals has wrong size");

  static_assert(sizeof(AoSThreadDataFLOPs) % 64 == 0, "AoSThreadDataFLOPs has wrong size");


  const double _cutoffSquared;

  // not const because they might be reset through PPL

  double _epsilon24, _sigmaSquared, _shift6 = 0;


  ParticlePropertiesLibrary<SoAFloatPrecision, size_t> *_PPLibrary = nullptr;


  // sum of the potential energy, only calculated if calculateGlobals is true

  double _potentialEnergySum;


  // sum of the virial, only calculated if calculateGlobals is true

  std::array<double, 3> _virialSum;


  // thread buffer for aos

  std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals{};

  std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};


  // defines whether or whether not the global values are already preprocessed

  bool _postProcessed;

};

}  // namespace mdLib

AlignedAllocator.h

ArrayMath.h

ExceptionHandler.h

AutoPasLog
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24

OwnershipState.h

PairwiseFunctor.h

ParticlePropertiesLibrary.h

SoA.h

StaticBoolSelector.h

WrapOpenMP.h

ParticlePropertiesLibrary
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28

ParticlePropertiesLibrary::getMixingShift6
floatType getMixingShift6(intType i, intType j) const
Returns precomputed mixed shift * 6 for one pair of site types.
Definition: ParticlePropertiesLibrary.h:262

ParticlePropertiesLibrary::getMixingSigmaSquared
floatType getMixingSigmaSquared(intType i, intType j) const
Returns precomputed mixed squared sigma for one pair of site types.
Definition: ParticlePropertiesLibrary.h:252

ParticlePropertiesLibrary::getMixing24Epsilon
floatType getMixing24Epsilon(intType i, intType j) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:224

ParticlePropertiesLibrary::calcShift6
static double calcShift6(double epsilon24, double sigmaSquared, double cutoffSquared)
Calculate the shift multiplied 6 of the lennard jones potential from given cutoff,...
Definition: ParticlePropertiesLibrary.h:575

ParticlePropertiesLibrary::getLJMixingData
auto getLJMixingData(intType i, intType j) const
Get complete mixing data for one pair of LJ site types.
Definition: ParticlePropertiesLibrary.h:234

autopas::AlignedAllocator
AlignedAllocator class.
Definition: AlignedAllocator.h:29

autopas::PairwiseFunctor
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31

autopas::PairwiseFunctor::PairwiseFunctor
PairwiseFunctor(double cutoff)
Constructor.
Definition: PairwiseFunctor.h:42

autopas::SoAView
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23

autopas::SoAView::size
size_t size() const
Returns the number of particles in the view.
Definition: SoAView.h:83

autopas::utils::ExceptionHandler::AutoPasException
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:115

mdLib::LJFunctor
A functor to handle lennard-jones interactions between two particles (molecules).
Definition: LJFunctor.h:41

mdLib::LJFunctor::getMixing
static constexpr bool getMixing()
Definition: LJFunctor.h:632

mdLib::LJFunctor::getHitRate
double getHitRate() const override
Get the hit rate.
Definition: LJFunctor.h:788

mdLib::LJFunctor::AoSFunctor
void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final
PairwiseFunctor for arrays of structures (AoS).
Definition: LJFunctor.h:120

mdLib::LJFunctor::allowsNewton3
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: LJFunctor.h:112

mdLib::LJFunctor::allowsNonNewton3
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: LJFunctor.h:116

mdLib::LJFunctor::initTraversal
void initTraversal() final
Reset the global values.
Definition: LJFunctor.h:638

mdLib::LJFunctor::LJFunctor
LJFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: LJFunctor.h:88

mdLib::LJFunctor::getNeededAttr
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: LJFunctor.h:598

mdLib::LJFunctor::endTraversal
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: LJFunctor.h:658

mdLib::LJFunctor::isRelevantForTuning
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: LJFunctor.h:110

mdLib::LJFunctor::LJFunctor
LJFunctor()=delete
Deleted default constructor.

mdLib::LJFunctor::getNeededAttr
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: LJFunctor.h:613

mdLib::LJFunctor::SoAFunctorSingle
void SoAFunctorSingle(autopas::SoAView< SoAArraysType > soa, bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:201

mdLib::LJFunctor::getName
std::string getName() final
Returns name of functor.
Definition: LJFunctor.h:108

mdLib::LJFunctor::LJFunctor
LJFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: LJFunctor.h:100

mdLib::LJFunctor::SoAFunctorPair
void SoAFunctorPair(autopas::SoAView< SoAArraysType > soa1, autopas::SoAView< SoAArraysType > soa2, const bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:366

mdLib::LJFunctor::setParticleProperties
void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared)
Sets the particle properties constants for this functor.
Definition: LJFunctor.h:585

mdLib::LJFunctor::SoAFunctorVerlet
void SoAFunctorVerlet(autopas::SoAView< SoAArraysType > soa, const size_t indexFirst, const std::vector< size_t, autopas::AlignedAllocator< size_t > > &neighborList, bool newton3) final
PairwiseFunctor for structure of arrays (SoA) for neighbor lists.
Definition: LJFunctor.h:566

mdLib::LJFunctor::getNumFLOPs
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: LJFunctor.h:755

mdLib::LJFunctor::getComputedAttr
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: LJFunctor.h:623

mdLib::LJFunctor::getPotentialEnergy
double getPotentialEnergy()
Get the potential Energy.
Definition: LJFunctor.h:688

mdLib::LJFunctor::getVirial
double getVirial()
Get the virial.
Definition: LJFunctor.h:706

inBox.h

autopas::utils::ArrayMath::dot
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233

autopas
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32

autopas::autopas_get_max_threads
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144

autopas::OwnershipState::dummy
@ dummy
Dummy or deleted state, a particle with this state is not an actual particle!

autopas::OwnershipState::owned
@ owned
Owned state, a particle with this state is an actual particle and owned by the current AutoPas object...

autopas::FunctorN3Modes
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:22

autopas::autopas_get_thread_num
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132

autopas::DEFAULT_CACHE_LINE_SIZE
constexpr unsigned int DEFAULT_CACHE_LINE_SIZE
Default size for a cache line.
Definition: AlignedAllocator.h:21