36template <
class Particle_T, 
bool applyShift = 
false, 
bool useMixing = 
false,
 
   38          bool countFLOPs = 
false, 
bool relevantForTuning = 
true>
 
   41                                                            calculateGlobals, countFLOPs, relevantForTuning>> {
 
   45  using SoAArraysType = 
typename Particle_T::SoAArraysType;
 
   50  using SoAFloatPrecision = 
typename Particle_T::ParticleSoAFloatPrecision;
 
   64  explicit LJFunctor(
double cutoff, 
void * )
 
   66                                                       countFLOPs, relevantForTuning>>(cutoff),
 
   67        _cutoffSquared{cutoff * cutoff},
 
   68        _potentialEnergySum{0.},
 
   69        _virialSum{0., 0., 0.},
 
   70        _postProcessed{false} {
 
   71    if constexpr (calculateGlobals) {
 
   74    if constexpr (countFLOPs) {
 
   89    static_assert(not useMixing,
 
   90                  "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set " 
  102    static_assert(useMixing,
 
  103                  "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor " 
  104                  "or set mixing to true.");
 
  105    _PPLibrary = &particlePropertiesLibrary;
 
  108  std::string 
getName() final { 
return "LJFunctorAutoVec"; }
 
  113    return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;
 
  117    return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;
 
  120  void AoSFunctor(Particle_T &i, Particle_T &j, 
bool newton3) 
final {
 
  121    using namespace autopas::utils::ArrayMath::literals;
 
  123    if (i.isDummy() or j.isDummy()) {
 
  129    if constexpr (countFLOPs) {
 
  130      ++_aosThreadDataFLOPs[threadnum].numDistCalls;
 
  133    auto sigmaSquared = _sigmaSquared;
 
  134    auto epsilon24 = _epsilon24;
 
  135    auto shift6 = _shift6;
 
  136    if constexpr (useMixing) {
 
  139      if constexpr (applyShift) {
 
  143    auto dr = i.getR() - j.getR();
 
  146    if (dr2 > _cutoffSquared) {
 
  150    double invdr2 = 1. / dr2;
 
  151    double lj6 = sigmaSquared * invdr2;
 
  152    lj6 = lj6 * lj6 * lj6;
 
  153    double lj12 = lj6 * lj6;
 
  154    double lj12m6 = lj12 - lj6;
 
  155    double fac = epsilon24 * (lj12 + lj12m6) * invdr2;
 
  163    if constexpr (countFLOPs) {
 
  165        ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;
 
  167        ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;
 
  171    if constexpr (calculateGlobals) {
 
  175      auto virial = dr * f;
 
  176      double potentialEnergy6 = epsilon24 * lj12m6 + shift6;
 
  179        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
 
  180        _aosThreadDataGlobals[threadnum].virialSum += virial;
 
  183      if (newton3 and j.isOwned()) {
 
  184        _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
 
  185        _aosThreadDataGlobals[threadnum].virialSum += virial;
 
  187      if constexpr (countFLOPs) {
 
  189          ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;
 
  191          ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;
 
  202    if (soa.size() == 0) 
return;
 
  206    const auto *
const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
 
  207    const auto *
const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
 
  208    const auto *
const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
 
  209    const auto *
const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
 
  211    SoAFloatPrecision *
const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
 
  212    SoAFloatPrecision *
const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
 
  213    SoAFloatPrecision *
const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
 
  215    [[maybe_unused]] 
auto *
const __restrict typeptr = soa.template begin<Particle_T::AttributeNames::typeId>();
 
  217    const SoAFloatPrecision cutoffSquared = _cutoffSquared;
 
  219    SoAFloatPrecision potentialEnergySum = 0.;  
 
  220    SoAFloatPrecision virialSumX = 0.;
 
  221    SoAFloatPrecision virialSumY = 0.;
 
  222    SoAFloatPrecision virialSumZ = 0.;
 
  224    size_t numDistanceCalculationSum = 0;
 
  225    size_t numKernelCallsN3Sum = 0;
 
  226    size_t numKernelCallsNoN3Sum = 0;
 
  227    size_t numGlobalCalcsSum = 0;
 
  229    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
 
  230    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
 
  231    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
 
  232    if constexpr (useMixing) {
 
  235      sigmaSquareds.resize(soa.size());
 
  236      epsilon24s.resize(soa.size());
 
  238      if constexpr (applyShift) {
 
  239        shift6s.resize(soa.size());
 
  243    const SoAFloatPrecision const_shift6 = _shift6;
 
  244    const SoAFloatPrecision const_sigmaSquared = _sigmaSquared;
 
  245    const SoAFloatPrecision const_epsilon24 = _epsilon24;
 
  247    for (
unsigned int i = 0; i < soa.size(); ++i) {
 
  248      const auto ownedStateI = ownedStatePtr[i];
 
  253      SoAFloatPrecision fxacc = 0.;
 
  254      SoAFloatPrecision fyacc = 0.;
 
  255      SoAFloatPrecision fzacc = 0.;
 
  257      if constexpr (useMixing) {
 
  258        for (
unsigned int j = 0; j < soa.size(); ++j) {
 
  260          sigmaSquareds[j] = mixingData.sigmaSquared;
 
  261          epsilon24s[j] = mixingData.epsilon24;
 
  262          if constexpr (applyShift) {
 
  263            shift6s[j] = mixingData.shift6;
 
  270#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsSum) 
  271      for (
unsigned int j = i + 1; j < soa.size(); ++j) {
 
  272        SoAFloatPrecision shift6 = const_shift6;
 
  273        SoAFloatPrecision sigmaSquared = const_sigmaSquared;
 
  274        SoAFloatPrecision epsilon24 = const_epsilon24;
 
  275        if constexpr (useMixing) {
 
  276          sigmaSquared = sigmaSquareds[j];
 
  277          epsilon24 = epsilon24s[j];
 
  278          if constexpr (applyShift) {
 
  283        const auto ownedStateJ = ownedStatePtr[j];
 
  285        const SoAFloatPrecision drx = xptr[i] - xptr[j];
 
  286        const SoAFloatPrecision dry = yptr[i] - yptr[j];
 
  287        const SoAFloatPrecision drz = zptr[i] - zptr[j];
 
  289        const SoAFloatPrecision drx2 = drx * drx;
 
  290        const SoAFloatPrecision dry2 = dry * dry;
 
  291        const SoAFloatPrecision drz2 = drz * drz;
 
  293        const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
 
  299        const SoAFloatPrecision invdr2 = 1. / dr2;
 
  300        const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
 
  301        const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
 
  302        const SoAFloatPrecision lj12 = lj6 * lj6;
 
  303        const SoAFloatPrecision lj12m6 = lj12 - lj6;
 
  304        const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
 
  306        const SoAFloatPrecision fx = drx * fac;
 
  307        const SoAFloatPrecision fy = dry * fac;
 
  308        const SoAFloatPrecision fz = drz * fac;
 
  319        if constexpr (countFLOPs) {
 
  321          numKernelCallsN3Sum += mask;
 
  324        if (calculateGlobals) {
 
  325          const SoAFloatPrecision virialx = drx * fx;
 
  326          const SoAFloatPrecision virialy = dry * fy;
 
  327          const SoAFloatPrecision virialz = drz * fz;
 
  328          const SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
 
  333          potentialEnergySum += potentialEnergy6 * energyFactor;
 
  335          virialSumX += virialx * energyFactor;
 
  336          virialSumY += virialy * energyFactor;
 
  337          virialSumZ += virialz * energyFactor;
 
  339          if constexpr (countFLOPs) {
 
  340            numGlobalCalcsSum += mask;
 
  349    if constexpr (countFLOPs) {
 
  350      _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
 
  351      _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
 
  352      _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
 
  353      _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsSum;  
 
  355    if (calculateGlobals) {
 
  356      _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
 
  357      _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
 
  358      _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
 
  359      _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
 
  367                      const bool newton3) 
final {
 
  369      SoAFunctorPairImpl<true>(soa1, soa2);
 
  371      SoAFunctorPairImpl<false>(soa1, soa2);
 
  383  template <
bool newton3>
 
  385    if (soa1.
size() == 0 || soa2.
size() == 0) 
return;
 
  389    const auto *
const __restrict x1ptr = soa1.template begin<Particle_T::AttributeNames::posX>();
 
  390    const auto *
const __restrict y1ptr = soa1.template begin<Particle_T::AttributeNames::posY>();
 
  391    const auto *
const __restrict z1ptr = soa1.template begin<Particle_T::AttributeNames::posZ>();
 
  392    const auto *
const __restrict x2ptr = soa2.template begin<Particle_T::AttributeNames::posX>();
 
  393    const auto *
const __restrict y2ptr = soa2.template begin<Particle_T::AttributeNames::posY>();
 
  394    const auto *
const __restrict z2ptr = soa2.template begin<Particle_T::AttributeNames::posZ>();
 
  395    const auto *
const __restrict ownedStatePtr1 = soa1.template begin<Particle_T::AttributeNames::ownershipState>();
 
  396    const auto *
const __restrict ownedStatePtr2 = soa2.template begin<Particle_T::AttributeNames::ownershipState>();
 
  398    auto *
const __restrict fx1ptr = soa1.template begin<Particle_T::AttributeNames::forceX>();
 
  399    auto *
const __restrict fy1ptr = soa1.template begin<Particle_T::AttributeNames::forceY>();
 
  400    auto *
const __restrict fz1ptr = soa1.template begin<Particle_T::AttributeNames::forceZ>();
 
  401    auto *
const __restrict fx2ptr = soa2.template begin<Particle_T::AttributeNames::forceX>();
 
  402    auto *
const __restrict fy2ptr = soa2.template begin<Particle_T::AttributeNames::forceY>();
 
  403    auto *
const __restrict fz2ptr = soa2.template begin<Particle_T::AttributeNames::forceZ>();
 
  404    [[maybe_unused]] 
auto *
const __restrict typeptr1 = soa1.template begin<Particle_T::AttributeNames::typeId>();
 
  405    [[maybe_unused]] 
auto *
const __restrict typeptr2 = soa2.template begin<Particle_T::AttributeNames::typeId>();
 
  408    SoAFloatPrecision potentialEnergySum = 0.;
 
  409    SoAFloatPrecision virialSumX = 0.;
 
  410    SoAFloatPrecision virialSumY = 0.;
 
  411    SoAFloatPrecision virialSumZ = 0.;
 
  413    size_t numDistanceCalculationSum = 0;
 
  414    size_t numKernelCallsN3Sum = 0;
 
  415    size_t numKernelCallsNoN3Sum = 0;
 
  416    size_t numGlobalCalcsN3Sum = 0;
 
  417    size_t numGlobalCalcsNoN3Sum = 0;
 
  419    const SoAFloatPrecision cutoffSquared = _cutoffSquared;
 
  420    SoAFloatPrecision shift6 = _shift6;
 
  421    SoAFloatPrecision sigmaSquared = _sigmaSquared;
 
  422    SoAFloatPrecision epsilon24 = _epsilon24;
 
  425    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
 
  426    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
 
  427    std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
 
  428    if constexpr (useMixing) {
 
  429      sigmaSquareds.resize(soa2.
size());
 
  430      epsilon24s.resize(soa2.
size());
 
  432      if constexpr (applyShift) {
 
  433        shift6s.resize(soa2.
size());
 
  437    for (
unsigned int i = 0; i < soa1.
size(); ++i) {
 
  438      SoAFloatPrecision fxacc = 0;
 
  439      SoAFloatPrecision fyacc = 0;
 
  440      SoAFloatPrecision fzacc = 0;
 
  442      const auto ownedStateI = ownedStatePtr1[i];
 
  448      if constexpr (useMixing) {
 
  449        for (
unsigned int j = 0; j < soa2.
size(); ++j) {
 
  452          if constexpr (applyShift) {
 
  460#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum) 
  461      for (
unsigned int j = 0; j < soa2.
size(); ++j) {
 
  462        if constexpr (useMixing) {
 
  463          sigmaSquared = sigmaSquareds[j];
 
  464          epsilon24 = epsilon24s[j];
 
  465          if constexpr (applyShift) {
 
  470        const auto ownedStateJ = ownedStatePtr2[j];
 
  472        const SoAFloatPrecision drx = x1ptr[i] - x2ptr[j];
 
  473        const SoAFloatPrecision dry = y1ptr[i] - y2ptr[j];
 
  474        const SoAFloatPrecision drz = z1ptr[i] - z2ptr[j];
 
  476        const SoAFloatPrecision drx2 = drx * drx;
 
  477        const SoAFloatPrecision dry2 = dry * dry;
 
  478        const SoAFloatPrecision drz2 = drz * drz;
 
  480        const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
 
  486        const SoAFloatPrecision invdr2 = 1. / dr2;
 
  487        const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
 
  488        const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
 
  489        const SoAFloatPrecision lj12 = lj6 * lj6;
 
  490        const SoAFloatPrecision lj12m6 = lj12 - lj6;
 
  491        const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
 
  493        const SoAFloatPrecision fx = drx * fac;
 
  494        const SoAFloatPrecision fy = dry * fac;
 
  495        const SoAFloatPrecision fz = drz * fac;
 
  506        if constexpr (countFLOPs) {
 
  508          if constexpr (newton3) {
 
  509            numKernelCallsN3Sum += mask;
 
  511            numKernelCallsNoN3Sum += mask;
 
  515        if constexpr (calculateGlobals) {
 
  516          SoAFloatPrecision virialx = drx * fx;
 
  517          SoAFloatPrecision virialy = dry * fy;
 
  518          SoAFloatPrecision virialz = drz * fz;
 
  519          SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
 
  522          const SoAFloatPrecision energyFactor =
 
  525          potentialEnergySum += potentialEnergy6 * energyFactor;
 
  526          virialSumX += virialx * energyFactor;
 
  527          virialSumY += virialy * energyFactor;
 
  528          virialSumZ += virialz * energyFactor;
 
  530          if constexpr (countFLOPs) {
 
  531            if constexpr (newton3) {
 
  532              numGlobalCalcsN3Sum += mask;
 
  534              numGlobalCalcsNoN3Sum += mask;
 
  543    if constexpr (countFLOPs) {
 
  544      _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
 
  545      _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
 
  546      _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
 
  547      _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
 
  548      _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
 
  550    if (calculateGlobals) {
 
  551      _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
 
  552      _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
 
  553      _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
 
  554      _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
 
  568                        bool newton3) 
final {
 
  569    if (soa.size() == 0 or neighborList.empty()) 
return;
 
  571      SoAFunctorVerletImpl<true>(soa, indexFirst, neighborList);
 
  573      SoAFunctorVerletImpl<false>(soa, indexFirst, neighborList);
 
  586    _epsilon24 = epsilon24;
 
  587    _sigmaSquared = sigmaSquared;
 
  599    return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,
 
  600                                                              Particle_T::AttributeNames::posX,
 
  601                                                              Particle_T::AttributeNames::posY,
 
  602                                                              Particle_T::AttributeNames::posZ,
 
  603                                                              Particle_T::AttributeNames::forceX,
 
  604                                                              Particle_T::AttributeNames::forceY,
 
  605                                                              Particle_T::AttributeNames::forceZ,
 
  606                                                              Particle_T::AttributeNames::typeId,
 
  607                                                              Particle_T::AttributeNames::ownershipState};
 
  614    return std::array<typename Particle_T::AttributeNames, 6>{
 
  615        Particle_T::AttributeNames::id,     Particle_T::AttributeNames::posX,
 
  616        Particle_T::AttributeNames::posY,   Particle_T::AttributeNames::posZ,
 
  617        Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};
 
  624    return std::array<typename Particle_T::AttributeNames, 3>{
 
  625        Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};
 
  639    _potentialEnergySum = 0.;
 
  640    _virialSum = {0., 0., 0.};
 
  641    _postProcessed = 
false;
 
  642    if constexpr (calculateGlobals) {
 
  643      for (
auto &data : _aosThreadDataGlobals) {
 
  647    if constexpr (countFLOPs) {
 
  648      for (
auto &data : _aosThreadDataFLOPs) {
 
  659    using namespace autopas::utils::ArrayMath::literals;
 
  661    if (_postProcessed) {
 
  663          "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");
 
  665    if (calculateGlobals) {
 
  666      for (
const auto &data : _aosThreadDataGlobals) {
 
  667        _potentialEnergySum += data.potentialEnergySum;
 
  668        _virialSum += data.virialSum;
 
  672      _potentialEnergySum *= 0.5;
 
  676      _potentialEnergySum /= 6.;
 
  677      _postProcessed = 
true;
 
  679      AutoPasLog(DEBUG, 
"Final potential energy {}", _potentialEnergySum);
 
  680      AutoPasLog(DEBUG, 
"Final virial           {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);
 
  689    if (not calculateGlobals) {
 
  691          "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate " 
  693          "values, please specify calculateGlobals to be true.");
 
  695    if (not _postProcessed) {
 
  697          "Cannot get potential energy, because endTraversal was not called.");
 
  699    return _potentialEnergySum;
 
  707    if (not calculateGlobals) {
 
  709          "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global " 
  710          "values, please specify calculateGlobals to be true.");
 
  712    if (not _postProcessed) {
 
  714          "Cannot get virial, because endTraversal was not called.");
 
  716    return _virialSum[0] + _virialSum[1] + _virialSum[2];
 
  756    if constexpr (countFLOPs) {
 
  757      const size_t numDistCallsAcc =
 
  758          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  759                          [](
size_t sum, 
const auto &data) { return sum + data.numDistCalls; });
 
  760      const size_t numKernelCallsN3Acc =
 
  761          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  762                          [](
size_t sum, 
const auto &data) { return sum + data.numKernelCallsN3; });
 
  763      const size_t numKernelCallsNoN3Acc =
 
  764          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  765                          [](
size_t sum, 
const auto &data) { return sum + data.numKernelCallsNoN3; });
 
  766      const size_t numGlobalCalcsN3Acc =
 
  767          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  768                          [](
size_t sum, 
const auto &data) { return sum + data.numGlobalCalcsN3; });
 
  769      const size_t numGlobalCalcsNoN3Acc =
 
  770          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  771                          [](
size_t sum, 
const auto &data) { return sum + data.numGlobalCalcsNoN3; });
 
  773      constexpr size_t numFLOPsPerDistanceCall = 8;
 
  774      constexpr size_t numFLOPsPerN3KernelCall = 18;
 
  775      constexpr size_t numFLOPsPerNoN3KernelCall = 15;
 
  776      constexpr size_t numFLOPsPerN3GlobalCalc = applyShift ? 13 : 12;
 
  777      constexpr size_t numFLOPsPerNoN3GlobalCalc = applyShift ? 9 : 8;
 
  779      return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +
 
  780             numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +
 
  781             numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;
 
  784      return std::numeric_limits<size_t>::max();
 
  789    if constexpr (countFLOPs) {
 
  790      const size_t numDistCallsAcc =
 
  791          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  792                          [](
size_t sum, 
const auto &data) { return sum + data.numDistCalls; });
 
  793      const size_t numKernelCallsN3Acc =
 
  794          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  795                          [](
size_t sum, 
const auto &data) { return sum + data.numKernelCallsN3; });
 
  796      const size_t numKernelCallsNoN3Acc =
 
  797          std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
 
  798                          [](
size_t sum, 
const auto &data) { return sum + data.numKernelCallsNoN3; });
 
  800      return (
static_cast<double>(numKernelCallsNoN3Acc) + 
static_cast<double>(numKernelCallsN3Acc)) /
 
  801             (
static_cast<double>(numDistCallsAcc));
 
  804      return std::numeric_limits<double>::quiet_NaN();
 
  809  template <
bool newton3>
 
  812    const auto *
const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
 
  813    const auto *
const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
 
  814    const auto *
const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
 
  816    auto *
const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
 
  817    auto *
const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
 
  818    auto *
const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
 
  819    [[maybe_unused]] 
auto *
const __restrict typeptr1 = soa.template begin<Particle_T::AttributeNames::typeId>();
 
  820    [[maybe_unused]] 
auto *
const __restrict typeptr2 = soa.template begin<Particle_T::AttributeNames::typeId>();
 
  822    const auto *
const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
 
  824    const SoAFloatPrecision cutoffSquared = _cutoffSquared;
 
  825    SoAFloatPrecision shift6 = _shift6;
 
  826    SoAFloatPrecision sigmaSquared = _sigmaSquared;
 
  827    SoAFloatPrecision epsilon24 = _epsilon24;
 
  829    SoAFloatPrecision potentialEnergySum = 0.;
 
  830    SoAFloatPrecision virialSumX = 0.;
 
  831    SoAFloatPrecision virialSumY = 0.;
 
  832    SoAFloatPrecision virialSumZ = 0.;
 
  835    size_t numDistanceCalculationSum = 0;
 
  836    size_t numKernelCallsN3Sum = 0;
 
  837    size_t numKernelCallsNoN3Sum = 0;
 
  838    size_t numGlobalCalcsN3Sum = 0;
 
  839    size_t numGlobalCalcsNoN3Sum = 0;
 
  841    SoAFloatPrecision fxacc = 0;
 
  842    SoAFloatPrecision fyacc = 0;
 
  843    SoAFloatPrecision fzacc = 0;
 
  844    const size_t neighborListSize = neighborList.size();
 
  845    const size_t *
const __restrict neighborListPtr = neighborList.data();
 
  848    const auto ownedStateI = ownedStatePtr[indexFirst];
 
  864    constexpr size_t vecsize = 16;
 
  867    constexpr size_t vecsize = 12;
 
  873    if (neighborListSize >= vecsize) {
 
  874      alignas(64) std::array<SoAFloatPrecision, vecsize> xtmp, ytmp, ztmp, xArr, yArr, zArr, fxArr, fyArr, fzArr;
 
  875      alignas(64) std::array<autopas::OwnershipState, vecsize> ownedStateArr{};
 
  877      for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
 
  878        xtmp[tmpj] = xptr[indexFirst];
 
  879        ytmp[tmpj] = yptr[indexFirst];
 
  880        ztmp[tmpj] = zptr[indexFirst];
 
  883      for (; joff < neighborListSize - vecsize + 1; joff += vecsize) {
 
  891        if constexpr (useMixing) {
 
  892          for (
size_t j = 0; j < vecsize; j++) {
 
  895            epsilon24s[j] = _PPLibrary->
getMixing24Epsilon(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
 
  896            if constexpr (applyShift) {
 
  897              shift6s[j] = _PPLibrary->
getMixingShift6(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
 
  903#pragma omp simd safelen(vecsize) 
  904        for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
 
  905          xArr[tmpj] = xptr[neighborListPtr[joff + tmpj]];
 
  906          yArr[tmpj] = yptr[neighborListPtr[joff + tmpj]];
 
  907          zArr[tmpj] = zptr[neighborListPtr[joff + tmpj]];
 
  908          ownedStateArr[tmpj] = ownedStatePtr[neighborListPtr[joff + tmpj]];
 
  911#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum) safelen(vecsize) 
  912        for (
size_t j = 0; j < vecsize; j++) {
 
  913          if constexpr (useMixing) {
 
  914            sigmaSquared = sigmaSquareds[j];
 
  915            epsilon24 = epsilon24s[j];
 
  916            if constexpr (applyShift) {
 
  922          const auto ownedStateJ = ownedStateArr[j];
 
  924          const SoAFloatPrecision drx = xtmp[j] - xArr[j];
 
  925          const SoAFloatPrecision dry = ytmp[j] - yArr[j];
 
  926          const SoAFloatPrecision drz = ztmp[j] - zArr[j];
 
  928          const SoAFloatPrecision drx2 = drx * drx;
 
  929          const SoAFloatPrecision dry2 = dry * dry;
 
  930          const SoAFloatPrecision drz2 = drz * drz;
 
  932          const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
 
  938          const SoAFloatPrecision invdr2 = 1. / dr2;
 
  939          const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
 
  940          const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
 
  941          const SoAFloatPrecision lj12 = lj6 * lj6;
 
  942          const SoAFloatPrecision lj12m6 = lj12 - lj6;
 
  943          const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
 
  945          const SoAFloatPrecision fx = drx * fac;
 
  946          const SoAFloatPrecision fy = dry * fac;
 
  947          const SoAFloatPrecision fz = drz * fac;
 
  958          if constexpr (countFLOPs) {
 
  960            if constexpr (newton3) {
 
  961              numKernelCallsN3Sum += mask;
 
  963              numKernelCallsNoN3Sum += mask;
 
  967          if (calculateGlobals) {
 
  968            SoAFloatPrecision virialx = drx * fx;
 
  969            SoAFloatPrecision virialy = dry * fy;
 
  970            SoAFloatPrecision virialz = drz * fz;
 
  971            SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
 
  975            const SoAFloatPrecision energyFactor =
 
  978            potentialEnergySum += potentialEnergy6 * energyFactor;
 
  979            virialSumX += virialx * energyFactor;
 
  980            virialSumY += virialy * energyFactor;
 
  981            virialSumZ += virialz * energyFactor;
 
  983            if constexpr (countFLOPs) {
 
  984              if constexpr (newton3) {
 
  985                numGlobalCalcsN3Sum += mask;
 
  987                numGlobalCalcsNoN3Sum += mask;
 
  994#pragma omp simd safelen(vecsize) 
  995          for (
size_t tmpj = 0; tmpj < vecsize; tmpj++) {
 
  996            const size_t j = neighborListPtr[joff + tmpj];
 
  997            fxptr[j] -= fxArr[tmpj];
 
  998            fyptr[j] -= fyArr[tmpj];
 
  999            fzptr[j] -= fzArr[tmpj];
 
 1005    for (
size_t jNeighIndex = joff; jNeighIndex < neighborListSize; ++jNeighIndex) {
 
 1006      size_t j = neighborList[jNeighIndex];
 
 1007      if (indexFirst == j) 
continue;
 
 1008      if constexpr (useMixing) {
 
 1011        if constexpr (applyShift) {
 
 1012          shift6 = _PPLibrary->
getMixingShift6(typeptr1[indexFirst], typeptr2[j]);
 
 1016      const auto ownedStateJ = ownedStatePtr[j];
 
 1021      const SoAFloatPrecision drx = xptr[indexFirst] - xptr[j];
 
 1022      const SoAFloatPrecision dry = yptr[indexFirst] - yptr[j];
 
 1023      const SoAFloatPrecision drz = zptr[indexFirst] - zptr[j];
 
 1025      const SoAFloatPrecision drx2 = drx * drx;
 
 1026      const SoAFloatPrecision dry2 = dry * dry;
 
 1027      const SoAFloatPrecision drz2 = drz * drz;
 
 1029      const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
 
 1031      if constexpr (countFLOPs) {
 
 1032        numDistanceCalculationSum += 1;
 
 1035      if (dr2 > cutoffSquared) {
 
 1039      const SoAFloatPrecision invdr2 = 1. / dr2;
 
 1040      const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
 
 1041      const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
 
 1042      const SoAFloatPrecision lj12 = lj6 * lj6;
 
 1043      const SoAFloatPrecision lj12m6 = lj12 - lj6;
 
 1044      const SoAFloatPrecision fac = epsilon24 * (lj12 + lj12m6) * invdr2;
 
 1046      const SoAFloatPrecision fx = drx * fac;
 
 1047      const SoAFloatPrecision fy = dry * fac;
 
 1048      const SoAFloatPrecision fz = drz * fac;
 
 1059      if constexpr (countFLOPs) {
 
 1060        if constexpr (newton3) {
 
 1061          numKernelCallsN3Sum += 1;
 
 1063          numKernelCallsNoN3Sum += 1;
 
 1067      if (calculateGlobals) {
 
 1068        SoAFloatPrecision virialx = drx * fx;
 
 1069        SoAFloatPrecision virialy = dry * fy;
 
 1070        SoAFloatPrecision virialz = drz * fz;
 
 1071        SoAFloatPrecision potentialEnergy6 = (epsilon24 * lj12m6 + shift6);
 
 1074        const SoAFloatPrecision energyFactor =
 
 1077        potentialEnergySum += potentialEnergy6 * energyFactor;
 
 1078        virialSumX += virialx * energyFactor;
 
 1079        virialSumY += virialy * energyFactor;
 
 1080        virialSumZ += virialz * energyFactor;
 
 1082        if constexpr (countFLOPs) {
 
 1083          if constexpr (newton3) {
 
 1084            ++numGlobalCalcsN3Sum;
 
 1086            ++numGlobalCalcsNoN3Sum;
 
 1092    if (fxacc != 0 or fyacc != 0 or fzacc != 0) {
 
 1093      fxptr[indexFirst] += fxacc;
 
 1094      fyptr[indexFirst] += fyacc;
 
 1095      fzptr[indexFirst] += fzacc;
 
 1098    if constexpr (countFLOPs) {
 
 1099      _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
 
 1100      _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
 
 1101      _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
 
 1102      _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
 
 1103      _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
 
 1106    if (calculateGlobals) {
 
 1107      _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
 
 1108      _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
 
 1109      _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
 
 1110      _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
 
 1118  class AoSThreadDataGlobals {
 
 1120    AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}
 
 1122      virialSum = {0., 0., 0.};
 
 1123      potentialEnergySum = 0.;
 
 1127    std::array<double, 3> virialSum;
 
 1128    double potentialEnergySum;
 
 1132    double __remainingTo64[(64 - 4 * 
sizeof(double)) / 
sizeof(double)];
 
 1142  class AoSThreadDataFLOPs {
 
 1144    AoSThreadDataFLOPs() : __remainingTo64{} {}
 
 1150      numKernelCallsNoN3 = 0;
 
 1151      numKernelCallsN3 = 0;
 
 1153      numGlobalCalcsNoN3 = 0;
 
 1154      numGlobalCalcsN3 = 0;
 
 1161    size_t numKernelCallsNoN3 = 0;
 
 1167    size_t numKernelCallsN3 = 0;
 
 1173    size_t numDistCalls = 0;
 
 1179    size_t numGlobalCalcsN3 = 0;
 
 1185    size_t numGlobalCalcsNoN3 = 0;
 
 1191    double __remainingTo64[(64 - 5 * 
sizeof(size_t)) / 
sizeof(size_t)];
 
 1195  static_assert(
sizeof(AoSThreadDataGlobals) % 64 == 0, 
"AoSThreadDataGlobals has wrong size");
 
 1196  static_assert(
sizeof(AoSThreadDataFLOPs) % 64 == 0, 
"AoSThreadDataFLOPs has wrong size");
 
 1198  const double _cutoffSquared;
 
 1200  double _epsilon24, _sigmaSquared, _shift6 = 0;
 
 1205  double _potentialEnergySum;
 
 1208  std::array<double, 3> _virialSum;
 
 1211  std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals{};
 
 1212  std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};
 
 1215  bool _postProcessed;
 
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28
floatType getMixingShift6(intType i, intType j) const
Returns precomputed mixed shift * 6 for one pair of site types.
Definition: ParticlePropertiesLibrary.h:262
floatType getMixingSigmaSquared(intType i, intType j) const
Returns precomputed mixed squared sigma for one pair of site types.
Definition: ParticlePropertiesLibrary.h:252
floatType getMixing24Epsilon(intType i, intType j) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:224
static double calcShift6(double epsilon24, double sigmaSquared, double cutoffSquared)
Calculate the shift multiplied 6 of the lennard jones potential from given cutoff,...
Definition: ParticlePropertiesLibrary.h:575
auto getLJMixingData(intType i, intType j) const
Get complete mixing data for one pair of LJ site types.
Definition: ParticlePropertiesLibrary.h:234
AlignedAllocator class.
Definition: AlignedAllocator.h:29
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31
PairwiseFunctor(double cutoff)
Constructor.
Definition: PairwiseFunctor.h:42
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23
size_t size() const
Returns the number of particles in the view.
Definition: SoAView.h:83
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:115
A functor to handle lennard-jones interactions between two particles (molecules).
Definition: LJFunctor.h:41
static constexpr bool getMixing()
Definition: LJFunctor.h:632
double getHitRate() const override
Get the hit rate.
Definition: LJFunctor.h:788
void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final
PairwiseFunctor for arrays of structures (AoS).
Definition: LJFunctor.h:120
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: LJFunctor.h:112
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: LJFunctor.h:116
void initTraversal() final
Reset the global values.
Definition: LJFunctor.h:638
LJFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: LJFunctor.h:88
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: LJFunctor.h:598
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: LJFunctor.h:658
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: LJFunctor.h:110
LJFunctor()=delete
Deleted default constructor.
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: LJFunctor.h:613
void SoAFunctorSingle(autopas::SoAView< SoAArraysType > soa, bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:201
std::string getName() final
Returns name of functor.
Definition: LJFunctor.h:108
LJFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: LJFunctor.h:100
void SoAFunctorPair(autopas::SoAView< SoAArraysType > soa1, autopas::SoAView< SoAArraysType > soa2, const bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:366
void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared)
Sets the particle properties constants for this functor.
Definition: LJFunctor.h:585
void SoAFunctorVerlet(autopas::SoAView< SoAArraysType > soa, const size_t indexFirst, const std::vector< size_t, autopas::AlignedAllocator< size_t > > &neighborList, bool newton3) final
PairwiseFunctor for structure of arrays (SoA) for neighbor lists.
Definition: LJFunctor.h:566
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: LJFunctor.h:755
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: LJFunctor.h:623
double getPotentialEnergy()
Get the potential Energy.
Definition: LJFunctor.h:688
double getVirial()
Get the virial.
Definition: LJFunctor.h:706
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:32
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144
@ dummy
Dummy or deleted state, a particle with this state is not an actual particle!
@ owned
Owned state, a particle with this state is an actual particle and owned by the current AutoPas object...
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:22
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132
constexpr unsigned int DEFAULT_CACHE_LINE_SIZE
Default size for a cache line.
Definition: AlignedAllocator.h:21