AutoPas  3.0.0
Loading...
Searching...
No Matches
LJFunctor.h
Go to the documentation of this file.
1
8#pragma once
9
16#include "autopas/utils/SoA.h"
19#include "autopas/utils/inBox.h"
20
21namespace mdLib {
22
39template <class Particle_T, bool applyShift = false, bool useMixing = false,
40 autopas::FunctorN3Modes useNewton3 = autopas::FunctorN3Modes::Both, bool calculateGlobals = false,
41 bool countFLOPs = false, bool relevantForTuning = true>
43 : public autopas::PairwiseFunctor<Particle_T, LJFunctor<Particle_T, applyShift, useMixing, useNewton3,
44 calculateGlobals, countFLOPs, relevantForTuning>> {
48 using SoAArraysType = typename Particle_T::SoAArraysType;
49
53 using SoAFloatPrecision = typename Particle_T::ParticleSoAFloatPrecision;
54
55 public:
59 LJFunctor() = delete;
60
61 private:
67 explicit LJFunctor(double cutoff, void * /*dummy*/)
68 : autopas::PairwiseFunctor<Particle_T, LJFunctor<Particle_T, applyShift, useMixing, useNewton3, calculateGlobals,
69 countFLOPs, relevantForTuning>>(cutoff),
70 _cutoffSquared{cutoff * cutoff},
71 _potentialEnergySum{0.},
72 _virialSum{0., 0., 0.},
73 _postProcessed{false} {
74 if constexpr (calculateGlobals) {
75 _aosThreadDataGlobals.resize(autopas::autopas_get_max_threads());
76 }
77 if constexpr (countFLOPs) {
78 _aosThreadDataFLOPs.resize(autopas::autopas_get_max_threads());
79 }
80 }
81
82 public:
91 explicit LJFunctor(double cutoff) : LJFunctor(cutoff, nullptr) {
92 static_assert(not useMixing,
93 "Mixing without a ParticlePropertiesLibrary is not possible! Use a different constructor or set "
94 "mixing to false.");
95 }
96
103 explicit LJFunctor(double cutoff, ParticlePropertiesLibrary<double, size_t> &particlePropertiesLibrary)
104 : LJFunctor(cutoff, nullptr) {
105 static_assert(useMixing,
106 "Not using Mixing but using a ParticlePropertiesLibrary is not allowed! Use a different constructor "
107 "or set mixing to true.");
108 _PPLibrary = &particlePropertiesLibrary;
109 }
110
111 std::string getName() final { return "LJFunctorAutoVec"; }
112
113 bool isRelevantForTuning() final { return relevantForTuning; }
114
115 bool allowsNewton3() final {
116 return useNewton3 == autopas::FunctorN3Modes::Newton3Only or useNewton3 == autopas::FunctorN3Modes::Both;
117 }
118
119 bool allowsNonNewton3() final {
120 return useNewton3 == autopas::FunctorN3Modes::Newton3Off or useNewton3 == autopas::FunctorN3Modes::Both;
121 }
122
123 void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final {
124 using namespace autopas::utils::ArrayMath::literals;
125
126 if (i.isDummy() or j.isDummy()) {
127 return;
128 }
129
130 const auto threadnum = autopas::autopas_get_thread_num();
131
132 if constexpr (countFLOPs) {
133 ++_aosThreadDataFLOPs[threadnum].numDistCalls;
134 }
135
136 auto sigmaSquared = _sigmaSquared;
137 auto epsilon24 = _epsilon24;
138 auto shift6 = _shift6;
139 if constexpr (useMixing) {
140 sigmaSquared = _PPLibrary->getMixingSigmaSquared(i.getTypeId(), j.getTypeId());
141 epsilon24 = _PPLibrary->getMixing24Epsilon(i.getTypeId(), j.getTypeId());
142 if constexpr (applyShift) {
143 shift6 = _PPLibrary->getMixingShift6(i.getTypeId(), j.getTypeId());
144 }
145 }
146 auto dr = i.getR() - j.getR();
147 double dr2 = autopas::utils::ArrayMath::dot(dr, dr);
148
149 if (dr2 > _cutoffSquared) {
150 return;
151 }
152
153 double invdr2 = 1. / dr2;
154 double lj6 = sigmaSquared * invdr2;
155 lj6 = lj6 * lj6 * lj6;
156 double lj12 = lj6 * lj6;
157 double lj12m6 = lj12 - lj6;
158 double fac = epsilon24 * (lj12 + lj12m6) * invdr2;
159 auto f = dr * fac;
160 i.addF(f);
161 if (newton3) {
162 // only if we use newton 3 here, we want to
163 j.subF(f);
164 }
165
166 if constexpr (countFLOPs) {
167 if (newton3) {
168 ++_aosThreadDataFLOPs[threadnum].numKernelCallsN3;
169 } else {
170 ++_aosThreadDataFLOPs[threadnum].numKernelCallsNoN3;
171 }
172 }
173
174 if constexpr (calculateGlobals) {
175 // We always add the full contribution for each owned particle and divide the sums by 2 in endTraversal().
176 // Potential energy has an additional factor of 6, which is also handled in endTraversal().
177
178 auto virial = dr * f;
179 double potentialEnergy6 = epsilon24 * lj12m6 + shift6;
180
181 if (i.isOwned()) {
182 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
183 _aosThreadDataGlobals[threadnum].virialSum += virial;
184 }
185 // for non-newton3 the second particle will be considered in a separate calculation
186 if (newton3 and j.isOwned()) {
187 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergy6;
188 _aosThreadDataGlobals[threadnum].virialSum += virial;
189 }
190 if constexpr (countFLOPs) {
191 if (newton3) {
192 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsN3;
193 } else {
194 ++_aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3;
195 }
196 }
197 }
198 }
199
204 void SoAFunctorSingle(autopas::SoAView<SoAArraysType> soa, bool newton3) final {
205 if (soa.size() == 0) return;
206
207 const auto threadnum = autopas::autopas_get_thread_num();
208
209 const auto *const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
210 const auto *const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
211 const auto *const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
212 const auto *const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
213
214 SoAFloatPrecision *const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
215 SoAFloatPrecision *const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
216 SoAFloatPrecision *const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
217
218 [[maybe_unused]] auto *const __restrict typeptr = soa.template begin<Particle_T::AttributeNames::typeId>();
219 // the local redeclaration of the following values helps the SoAFloatPrecision-generation of various compilers.
220 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
221
222 SoAFloatPrecision potentialEnergySum = 0.; // Note: This is not the potential energy but some fixed multiple of it.
223 SoAFloatPrecision virialSumX = 0.;
224 SoAFloatPrecision virialSumY = 0.;
225 SoAFloatPrecision virialSumZ = 0.;
226
227 size_t numDistanceCalculationSum = 0;
228 size_t numKernelCallsN3Sum = 0;
229 size_t numKernelCallsNoN3Sum = 0;
230 size_t numGlobalCalcsSum = 0;
231
232 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
233 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
234 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
235 if constexpr (useMixing) {
236 // Preload all sigma and epsilons for next vectorized region.
237 // Not preloading and directly using the values, will produce worse results.
238 sigmaSquareds.resize(soa.size());
239 epsilon24s.resize(soa.size());
240 // if no mixing or mixing but no shift shift6 is constant therefore we do not need this vector.
241 if constexpr (applyShift) {
242 shift6s.resize(soa.size());
243 }
244 }
245
246 const SoAFloatPrecision const_shift6 = _shift6;
247 const SoAFloatPrecision const_sigmaSquared = _sigmaSquared;
248 const SoAFloatPrecision const_epsilon24 = _epsilon24;
249
250 for (unsigned int i = 0; i < soa.size(); ++i) {
251 const auto ownedStateI = ownedStatePtr[i];
252 if (ownedStateI == autopas::OwnershipState::dummy) {
253 continue;
254 }
255
256 SoAFloatPrecision fxacc = 0.;
257 SoAFloatPrecision fyacc = 0.;
258 SoAFloatPrecision fzacc = 0.;
259
260 if constexpr (useMixing) {
261 for (unsigned int j = 0; j < soa.size(); ++j) {
262 auto mixingData = _PPLibrary->getLJMixingData(typeptr[i], typeptr[j]);
263 sigmaSquareds[j] = mixingData.sigmaSquared;
264 epsilon24s[j] = mixingData.epsilon24;
265 if constexpr (applyShift) {
266 shift6s[j] = mixingData.shift6;
267 }
268 }
269 }
270
271// icpc vectorizes this.
272// g++ only with -ffast-math or -funsafe-math-optimizations
273#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsSum)
274 for (unsigned int j = i + 1; j < soa.size(); ++j) {
275 SoAFloatPrecision shift6 = const_shift6;
276 SoAFloatPrecision sigmaSquared = const_sigmaSquared;
277 SoAFloatPrecision epsilon24 = const_epsilon24;
278 if constexpr (useMixing) {
279 sigmaSquared = sigmaSquareds[j];
280 epsilon24 = epsilon24s[j];
281 if constexpr (applyShift) {
282 shift6 = shift6s[j];
283 }
284 }
285
286 const auto ownedStateJ = ownedStatePtr[j];
287
288 const SoAFloatPrecision drx = xptr[i] - xptr[j];
289 const SoAFloatPrecision dry = yptr[i] - yptr[j];
290 const SoAFloatPrecision drz = zptr[i] - zptr[j];
291
292 const SoAFloatPrecision drx2 = drx * drx;
293 const SoAFloatPrecision dry2 = dry * dry;
294 const SoAFloatPrecision drz2 = drz * drz;
295
296 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
297
298 // Mask away if distance is too large or any particle is a dummy.
299 // Particle ownedStateI was already checked previously.
300 const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;
301
302 const SoAFloatPrecision invdr2 = 1. / dr2;
303 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
304 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
305 const SoAFloatPrecision lj12 = lj6 * lj6;
306 const SoAFloatPrecision lj12m6 = lj12 - lj6;
307 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
308
309 const SoAFloatPrecision fx = drx * fac;
310 const SoAFloatPrecision fy = dry * fac;
311 const SoAFloatPrecision fz = drz * fac;
312
313 fxacc += fx;
314 fyacc += fy;
315 fzacc += fz;
316
317 // newton 3
318 fxptr[j] -= fx;
319 fyptr[j] -= fy;
320 fzptr[j] -= fz;
321
322 if constexpr (countFLOPs) {
323 numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;
324 numKernelCallsN3Sum += mask;
325 }
326
327 if (calculateGlobals) {
328 const SoAFloatPrecision virialx = drx * fx;
329 const SoAFloatPrecision virialy = dry * fy;
330 const SoAFloatPrecision virialz = drz * fz;
331 const SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
332
333 // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().
334 SoAFloatPrecision energyFactor = (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +
335 (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.);
336 potentialEnergySum += potentialEnergy6 * energyFactor;
337
338 virialSumX += virialx * energyFactor;
339 virialSumY += virialy * energyFactor;
340 virialSumZ += virialz * energyFactor;
341
342 if constexpr (countFLOPs) {
343 numGlobalCalcsSum += mask;
344 }
345 }
346 }
347
348 fxptr[i] += fxacc;
349 fyptr[i] += fyacc;
350 fzptr[i] += fzacc;
351 }
352 if constexpr (countFLOPs) {
353 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
354 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
355 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
356 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsSum; // Always N3 in Single SoAFunctor
357 }
358 if (calculateGlobals) {
359 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
360 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
361 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
362 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
363 }
364 }
365
370 const bool newton3) final {
371 if (newton3) {
372 SoAFunctorPairImpl<true>(soa1, soa2);
373 } else {
374 SoAFunctorPairImpl<false>(soa1, soa2);
375 }
376 }
377
378 private:
386 template <bool newton3>
387 void SoAFunctorPairImpl(autopas::SoAView<SoAArraysType> soa1, autopas::SoAView<SoAArraysType> soa2) {
388 if (soa1.size() == 0 || soa2.size() == 0) return;
389
390 const auto threadnum = autopas::autopas_get_thread_num();
391
392 const auto *const __restrict x1ptr = soa1.template begin<Particle_T::AttributeNames::posX>();
393 const auto *const __restrict y1ptr = soa1.template begin<Particle_T::AttributeNames::posY>();
394 const auto *const __restrict z1ptr = soa1.template begin<Particle_T::AttributeNames::posZ>();
395 const auto *const __restrict x2ptr = soa2.template begin<Particle_T::AttributeNames::posX>();
396 const auto *const __restrict y2ptr = soa2.template begin<Particle_T::AttributeNames::posY>();
397 const auto *const __restrict z2ptr = soa2.template begin<Particle_T::AttributeNames::posZ>();
398 const auto *const __restrict ownedStatePtr1 = soa1.template begin<Particle_T::AttributeNames::ownershipState>();
399 const auto *const __restrict ownedStatePtr2 = soa2.template begin<Particle_T::AttributeNames::ownershipState>();
400
401 auto *const __restrict fx1ptr = soa1.template begin<Particle_T::AttributeNames::forceX>();
402 auto *const __restrict fy1ptr = soa1.template begin<Particle_T::AttributeNames::forceY>();
403 auto *const __restrict fz1ptr = soa1.template begin<Particle_T::AttributeNames::forceZ>();
404 auto *const __restrict fx2ptr = soa2.template begin<Particle_T::AttributeNames::forceX>();
405 auto *const __restrict fy2ptr = soa2.template begin<Particle_T::AttributeNames::forceY>();
406 auto *const __restrict fz2ptr = soa2.template begin<Particle_T::AttributeNames::forceZ>();
407 [[maybe_unused]] auto *const __restrict typeptr1 = soa1.template begin<Particle_T::AttributeNames::typeId>();
408 [[maybe_unused]] auto *const __restrict typeptr2 = soa2.template begin<Particle_T::AttributeNames::typeId>();
409
410 // Checks whether the cells are halo cells.
411 SoAFloatPrecision potentialEnergySum = 0.;
412 SoAFloatPrecision virialSumX = 0.;
413 SoAFloatPrecision virialSumY = 0.;
414 SoAFloatPrecision virialSumZ = 0.;
415
416 size_t numDistanceCalculationSum = 0;
417 size_t numKernelCallsN3Sum = 0;
418 size_t numKernelCallsNoN3Sum = 0;
419 size_t numGlobalCalcsN3Sum = 0;
420 size_t numGlobalCalcsNoN3Sum = 0;
421
422 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
423 SoAFloatPrecision shift6 = _shift6;
424 SoAFloatPrecision sigmaSquared = _sigmaSquared;
425 SoAFloatPrecision epsilon24 = _epsilon24;
426
427 // preload all sigma and epsilons for next vectorized region
428 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> sigmaSquareds;
429 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> epsilon24s;
430 std::vector<SoAFloatPrecision, autopas::AlignedAllocator<SoAFloatPrecision>> shift6s;
431 if constexpr (useMixing) {
432 sigmaSquareds.resize(soa2.size());
433 epsilon24s.resize(soa2.size());
434 // if no mixing or mixing but no shift shift6 is constant therefore we do not need this vector.
435 if constexpr (applyShift) {
436 shift6s.resize(soa2.size());
437 }
438 }
439
440 for (unsigned int i = 0; i < soa1.size(); ++i) {
441 SoAFloatPrecision fxacc = 0;
442 SoAFloatPrecision fyacc = 0;
443 SoAFloatPrecision fzacc = 0;
444
445 const auto ownedStateI = ownedStatePtr1[i];
446 if (ownedStateI == autopas::OwnershipState::dummy) {
447 continue;
448 }
449
450 // preload all sigma and epsilons for next vectorized region
451 if constexpr (useMixing) {
452 for (unsigned int j = 0; j < soa2.size(); ++j) {
453 sigmaSquareds[j] = _PPLibrary->getMixingSigmaSquared(typeptr1[i], typeptr2[j]);
454 epsilon24s[j] = _PPLibrary->getMixing24Epsilon(typeptr1[i], typeptr2[j]);
455 if constexpr (applyShift) {
456 shift6s[j] = _PPLibrary->getMixingShift6(typeptr1[i], typeptr2[j]);
457 }
458 }
459 }
460
461// icpc vectorizes this.
462// g++ only with -ffast-math or -funsafe-math-optimizations
463#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum)
464 for (unsigned int j = 0; j < soa2.size(); ++j) {
465 if constexpr (useMixing) {
466 sigmaSquared = sigmaSquareds[j];
467 epsilon24 = epsilon24s[j];
468 if constexpr (applyShift) {
469 shift6 = shift6s[j];
470 }
471 }
472
473 const auto ownedStateJ = ownedStatePtr2[j];
474
475 const SoAFloatPrecision drx = x1ptr[i] - x2ptr[j];
476 const SoAFloatPrecision dry = y1ptr[i] - y2ptr[j];
477 const SoAFloatPrecision drz = z1ptr[i] - z2ptr[j];
478
479 const SoAFloatPrecision drx2 = drx * drx;
480 const SoAFloatPrecision dry2 = dry * dry;
481 const SoAFloatPrecision drz2 = drz * drz;
482
483 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
484
485 // Mask away if distance is too large or any particle is a dummy.
486 // Particle ownedStateI was already checked previously.
487 const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;
488
489 const SoAFloatPrecision invdr2 = 1. / dr2;
490 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
491 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
492 const SoAFloatPrecision lj12 = lj6 * lj6;
493 const SoAFloatPrecision lj12m6 = lj12 - lj6;
494 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
495
496 const SoAFloatPrecision fx = drx * fac;
497 const SoAFloatPrecision fy = dry * fac;
498 const SoAFloatPrecision fz = drz * fac;
499
500 fxacc += fx;
501 fyacc += fy;
502 fzacc += fz;
503 if (newton3) {
504 fx2ptr[j] -= fx;
505 fy2ptr[j] -= fy;
506 fz2ptr[j] -= fz;
507 }
508
509 if constexpr (countFLOPs) {
510 numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;
511 if constexpr (newton3) {
512 numKernelCallsN3Sum += mask;
513 } else {
514 numKernelCallsNoN3Sum += mask;
515 }
516 }
517
518 if constexpr (calculateGlobals) {
519 SoAFloatPrecision virialx = drx * fx;
520 SoAFloatPrecision virialy = dry * fy;
521 SoAFloatPrecision virialz = drz * fz;
522 SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
523
524 // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().
525 const SoAFloatPrecision energyFactor =
526 (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +
527 (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);
528 potentialEnergySum += potentialEnergy6 * energyFactor;
529 virialSumX += virialx * energyFactor;
530 virialSumY += virialy * energyFactor;
531 virialSumZ += virialz * energyFactor;
532
533 if constexpr (countFLOPs) {
534 if constexpr (newton3) {
535 numGlobalCalcsN3Sum += mask;
536 } else {
537 numGlobalCalcsNoN3Sum += mask;
538 }
539 }
540 }
541 }
542 fx1ptr[i] += fxacc;
543 fy1ptr[i] += fyacc;
544 fz1ptr[i] += fzacc;
545 }
546 if constexpr (countFLOPs) {
547 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
548 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
549 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
550 _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
551 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
552 }
553 if (calculateGlobals) {
554 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
555 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
556 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
557 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
558 }
559 }
560
561 public:
562 // clang-format off
568 // clang-format on
569 void SoAFunctorVerlet(autopas::SoAView<SoAArraysType> soa, const size_t indexFirst,
570 const std::vector<size_t, autopas::AlignedAllocator<size_t>> &neighborList,
571 bool newton3) final {
572 if (soa.size() == 0 or neighborList.empty()) return;
573 if (newton3) {
574 SoAFunctorVerletImpl<true>(soa, indexFirst, neighborList);
575 } else {
576 SoAFunctorVerletImpl<false>(soa, indexFirst, neighborList);
577 }
578 }
579
588 void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared) {
589 _epsilon24 = epsilon24;
590 _sigmaSquared = sigmaSquared;
591 if (applyShift) {
592 _shift6 = ParticlePropertiesLibrary<double, size_t>::calcShift6(_epsilon24, _sigmaSquared, _cutoffSquared);
593 } else {
594 _shift6 = 0.;
595 }
596 }
597
601 constexpr static auto getNeededAttr() {
602 return std::array<typename Particle_T::AttributeNames, 9>{Particle_T::AttributeNames::id,
603 Particle_T::AttributeNames::posX,
604 Particle_T::AttributeNames::posY,
605 Particle_T::AttributeNames::posZ,
606 Particle_T::AttributeNames::forceX,
607 Particle_T::AttributeNames::forceY,
608 Particle_T::AttributeNames::forceZ,
609 Particle_T::AttributeNames::typeId,
610 Particle_T::AttributeNames::ownershipState};
611 }
612
616 constexpr static auto getNeededAttr(std::false_type) {
617 return std::array<typename Particle_T::AttributeNames, 6>{
618 Particle_T::AttributeNames::id, Particle_T::AttributeNames::posX,
619 Particle_T::AttributeNames::posY, Particle_T::AttributeNames::posZ,
620 Particle_T::AttributeNames::typeId, Particle_T::AttributeNames::ownershipState};
621 }
622
626 constexpr static auto getComputedAttr() {
627 return std::array<typename Particle_T::AttributeNames, 3>{
628 Particle_T::AttributeNames::forceX, Particle_T::AttributeNames::forceY, Particle_T::AttributeNames::forceZ};
629 }
630
635 constexpr static bool getMixing() { return useMixing; }
636
641 void initTraversal() final {
642 _potentialEnergySum = 0.;
643 _virialSum = {0., 0., 0.};
644 _postProcessed = false;
645 if constexpr (calculateGlobals) {
646 for (auto &data : _aosThreadDataGlobals) {
647 data.setZero();
648 }
649 }
650 if constexpr (countFLOPs) {
651 for (auto &data : _aosThreadDataFLOPs) {
652 data.setZero();
653 }
654 }
655 }
656
661 void endTraversal(bool newton3) final {
662 using namespace autopas::utils::ArrayMath::literals;
663
664 if (_postProcessed) {
666 "Already postprocessed, endTraversal(bool newton3) was called twice without calling initTraversal().");
667 }
668 if (calculateGlobals) {
669 for (const auto &data : _aosThreadDataGlobals) {
670 _potentialEnergySum += data.potentialEnergySum;
671 _virialSum += data.virialSum;
672 }
673 // For each interaction, we added the full contribution for both particles. Divide by 2 here, so that each
674 // contribution is only counted once per pair.
675 _potentialEnergySum *= 0.5;
676 _virialSum *= 0.5;
677
678 // We have always calculated 6*potentialEnergy, so we divide by 6 here!
679 _potentialEnergySum /= 6.;
680 _postProcessed = true;
681
682 AutoPasLog(DEBUG, "Final potential energy {}", _potentialEnergySum);
683 AutoPasLog(DEBUG, "Final virial {}", _virialSum[0] + _virialSum[1] + _virialSum[2]);
684 }
685 }
686
692 if (not calculateGlobals) {
694 "Trying to get potential energy even though calculateGlobals is false. If you want this functor to calculate "
695 "global "
696 "values, please specify calculateGlobals to be true.");
697 }
698 if (not _postProcessed) {
700 "Cannot get potential energy, because endTraversal was not called.");
701 }
702 return _potentialEnergySum;
703 }
704
709 double getVirial() {
710 if (not calculateGlobals) {
712 "Trying to get virial even though calculateGlobals is false. If you want this functor to calculate global "
713 "values, please specify calculateGlobals to be true.");
714 }
715 if (not _postProcessed) {
717 "Cannot get virial, because endTraversal was not called.");
718 }
719 return _virialSum[0] + _virialSum[1] + _virialSum[2];
720 }
721
758 [[nodiscard]] size_t getNumFLOPs() const override {
759 if constexpr (countFLOPs) {
760 const size_t numDistCallsAcc =
761 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
762 [](size_t sum, const auto &data) { return sum + data.numDistCalls; });
763 const size_t numKernelCallsN3Acc =
764 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
765 [](size_t sum, const auto &data) { return sum + data.numKernelCallsN3; });
766 const size_t numKernelCallsNoN3Acc =
767 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
768 [](size_t sum, const auto &data) { return sum + data.numKernelCallsNoN3; });
769 const size_t numGlobalCalcsN3Acc =
770 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
771 [](size_t sum, const auto &data) { return sum + data.numGlobalCalcsN3; });
772 const size_t numGlobalCalcsNoN3Acc =
773 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
774 [](size_t sum, const auto &data) { return sum + data.numGlobalCalcsNoN3; });
775
776 constexpr size_t numFLOPsPerDistanceCall = 8;
777 constexpr size_t numFLOPsPerN3KernelCall = 18;
778 constexpr size_t numFLOPsPerNoN3KernelCall = 15;
779 constexpr size_t numFLOPsPerN3GlobalCalc = applyShift ? 13 : 12;
780 constexpr size_t numFLOPsPerNoN3GlobalCalc = applyShift ? 9 : 8;
781
782 return numDistCallsAcc * numFLOPsPerDistanceCall + numKernelCallsN3Acc * numFLOPsPerN3KernelCall +
783 numKernelCallsNoN3Acc * numFLOPsPerNoN3KernelCall + numGlobalCalcsN3Acc * numFLOPsPerN3GlobalCalc +
784 numGlobalCalcsNoN3Acc * numFLOPsPerNoN3GlobalCalc;
785 } else {
786 // This is needed because this function still gets called with FLOP logging disabled, just nothing is done with it
787 return std::numeric_limits<size_t>::max();
788 }
789 }
790
791 [[nodiscard]] double getHitRate() const override {
792 if constexpr (countFLOPs) {
793 const size_t numDistCallsAcc =
794 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
795 [](size_t sum, const auto &data) { return sum + data.numDistCalls; });
796 const size_t numKernelCallsN3Acc =
797 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
798 [](size_t sum, const auto &data) { return sum + data.numKernelCallsN3; });
799 const size_t numKernelCallsNoN3Acc =
800 std::accumulate(_aosThreadDataFLOPs.begin(), _aosThreadDataFLOPs.end(), 0ul,
801 [](size_t sum, const auto &data) { return sum + data.numKernelCallsNoN3; });
802
803 return (static_cast<double>(numKernelCallsNoN3Acc) + static_cast<double>(numKernelCallsN3Acc)) /
804 (static_cast<double>(numDistCallsAcc));
805 } else {
806 // This is needed because this function still gets called with FLOP logging disabled, just nothing is done with it
807 return std::numeric_limits<double>::quiet_NaN();
808 }
809 }
810
811 private:
812 template <bool newton3>
813 void SoAFunctorVerletImpl(autopas::SoAView<SoAArraysType> soa, const size_t indexFirst,
814 const std::vector<size_t, autopas::AlignedAllocator<size_t>> &neighborList) {
815 const auto *const __restrict xptr = soa.template begin<Particle_T::AttributeNames::posX>();
816 const auto *const __restrict yptr = soa.template begin<Particle_T::AttributeNames::posY>();
817 const auto *const __restrict zptr = soa.template begin<Particle_T::AttributeNames::posZ>();
818
819 auto *const __restrict fxptr = soa.template begin<Particle_T::AttributeNames::forceX>();
820 auto *const __restrict fyptr = soa.template begin<Particle_T::AttributeNames::forceY>();
821 auto *const __restrict fzptr = soa.template begin<Particle_T::AttributeNames::forceZ>();
822 [[maybe_unused]] auto *const __restrict typeptr1 = soa.template begin<Particle_T::AttributeNames::typeId>();
823 [[maybe_unused]] auto *const __restrict typeptr2 = soa.template begin<Particle_T::AttributeNames::typeId>();
824
825 const auto *const __restrict ownedStatePtr = soa.template begin<Particle_T::AttributeNames::ownershipState>();
826
827 const SoAFloatPrecision cutoffSquared = _cutoffSquared;
828 SoAFloatPrecision shift6 = _shift6;
829 SoAFloatPrecision sigmaSquared = _sigmaSquared;
830 SoAFloatPrecision epsilon24 = _epsilon24;
831
832 SoAFloatPrecision potentialEnergySum = 0.;
833 SoAFloatPrecision virialSumX = 0.;
834 SoAFloatPrecision virialSumY = 0.;
835 SoAFloatPrecision virialSumZ = 0.;
836
837 // Counters for when countFLOPs is activated
838 size_t numDistanceCalculationSum = 0;
839 size_t numKernelCallsN3Sum = 0;
840 size_t numKernelCallsNoN3Sum = 0;
841 size_t numGlobalCalcsN3Sum = 0;
842 size_t numGlobalCalcsNoN3Sum = 0;
843
844 SoAFloatPrecision fxacc = 0;
845 SoAFloatPrecision fyacc = 0;
846 SoAFloatPrecision fzacc = 0;
847 const size_t neighborListSize = neighborList.size();
848 const size_t *const __restrict neighborListPtr = neighborList.data();
849
850 // checks whether particle i is owned.
851 const auto ownedStateI = ownedStatePtr[indexFirst];
852 if (ownedStateI == autopas::OwnershipState::dummy) {
853 return;
854 }
855
856 const auto threadnum = autopas::autopas_get_thread_num();
857
858 // this is a magic number, that should correspond to at least
859 // vectorization width*N have testet multiple sizes:
860 // 4: does not give a speedup, slower than original AoSFunctor
861 // 8: small speedup compared to AoS
862 // 12: highest speedup compared to Aos
863 // 16: smaller speedup
864 // in theory this is a variable, we could auto-tune over...
865#ifdef __AVX512F__
866 // use a multiple of 8 for avx
867 constexpr size_t vecsize = 16;
868#else
869 // for everything else 12 is faster
870 constexpr size_t vecsize = 12;
871#endif
872 size_t joff = 0;
873
874 // if the size of the verlet list is larger than the given size vecsize,
875 // we will use a vectorized version.
876 if (neighborListSize >= vecsize) {
877 alignas(64) std::array<SoAFloatPrecision, vecsize> xtmp, ytmp, ztmp, xArr, yArr, zArr, fxArr, fyArr, fzArr;
878 alignas(64) std::array<autopas::OwnershipState, vecsize> ownedStateArr{};
879 // broadcast of the position of particle i
880 for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {
881 xtmp[tmpj] = xptr[indexFirst];
882 ytmp[tmpj] = yptr[indexFirst];
883 ztmp[tmpj] = zptr[indexFirst];
884 }
885 // loop over the verlet list from 0 to x*vecsize
886 for (; joff < neighborListSize - vecsize + 1; joff += vecsize) {
887 // in each iteration we calculate the interactions of particle i with
888 // vecsize particles in the neighborlist of particle i starting at
889 // particle joff
890
891 [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> sigmaSquareds;
892 [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> epsilon24s;
893 [[maybe_unused]] alignas(autopas::DEFAULT_CACHE_LINE_SIZE) std::array<SoAFloatPrecision, vecsize> shift6s;
894 if constexpr (useMixing) {
895 for (size_t j = 0; j < vecsize; j++) {
896 sigmaSquareds[j] =
897 _PPLibrary->getMixingSigmaSquared(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
898 epsilon24s[j] = _PPLibrary->getMixing24Epsilon(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
899 if constexpr (applyShift) {
900 shift6s[j] = _PPLibrary->getMixingShift6(typeptr1[indexFirst], typeptr2[neighborListPtr[joff + j]]);
901 }
902 }
903 }
904
905 // gather position of particle j
906#pragma omp simd safelen(vecsize)
907 for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {
908 xArr[tmpj] = xptr[neighborListPtr[joff + tmpj]];
909 yArr[tmpj] = yptr[neighborListPtr[joff + tmpj]];
910 zArr[tmpj] = zptr[neighborListPtr[joff + tmpj]];
911 ownedStateArr[tmpj] = ownedStatePtr[neighborListPtr[joff + tmpj]];
912 }
913 // do omp simd with reduction of the interaction
914#pragma omp simd reduction(+ : fxacc, fyacc, fzacc, potentialEnergySum, virialSumX, virialSumY, virialSumZ, numDistanceCalculationSum, numKernelCallsN3Sum, numKernelCallsNoN3Sum, numGlobalCalcsN3Sum, numGlobalCalcsNoN3Sum) safelen(vecsize)
915 for (size_t j = 0; j < vecsize; j++) {
916 if constexpr (useMixing) {
917 sigmaSquared = sigmaSquareds[j];
918 epsilon24 = epsilon24s[j];
919 if constexpr (applyShift) {
920 shift6 = shift6s[j];
921 }
922 }
923 // const size_t j = currentList[jNeighIndex];
924
925 const auto ownedStateJ = ownedStateArr[j];
926
927 const SoAFloatPrecision drx = xtmp[j] - xArr[j];
928 const SoAFloatPrecision dry = ytmp[j] - yArr[j];
929 const SoAFloatPrecision drz = ztmp[j] - zArr[j];
930
931 const SoAFloatPrecision drx2 = drx * drx;
932 const SoAFloatPrecision dry2 = dry * dry;
933 const SoAFloatPrecision drz2 = drz * drz;
934
935 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
936
937 // Mask away if distance is too large or any particle is a dummy.
938 // Particle ownedStateI was already checked previously.
939 const bool mask = dr2 <= cutoffSquared and ownedStateJ != autopas::OwnershipState::dummy;
940
941 const SoAFloatPrecision invdr2 = 1. / dr2;
942 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
943 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
944 const SoAFloatPrecision lj12 = lj6 * lj6;
945 const SoAFloatPrecision lj12m6 = lj12 - lj6;
946 const SoAFloatPrecision fac = mask * epsilon24 * (lj12 + lj12m6) * invdr2;
947
948 const SoAFloatPrecision fx = drx * fac;
949 const SoAFloatPrecision fy = dry * fac;
950 const SoAFloatPrecision fz = drz * fac;
951
952 fxacc += fx;
953 fyacc += fy;
954 fzacc += fz;
955 if (newton3) {
956 fxArr[j] = fx;
957 fyArr[j] = fy;
958 fzArr[j] = fz;
959 }
960
961 if constexpr (countFLOPs) {
962 numDistanceCalculationSum += ownedStateJ != autopas::OwnershipState::dummy ? 1 : 0;
963 if constexpr (newton3) {
964 numKernelCallsN3Sum += mask;
965 } else {
966 numKernelCallsNoN3Sum += mask;
967 }
968 }
969
970 if (calculateGlobals) {
971 SoAFloatPrecision virialx = drx * fx;
972 SoAFloatPrecision virialy = dry * fy;
973 SoAFloatPrecision virialz = drz * fz;
974 SoAFloatPrecision potentialEnergy6 = mask * (epsilon24 * lj12m6 + shift6);
975
976 // We add 6 times the potential energy for each owned particle. The total sum is corrected in
977 // endTraversal().
978 const SoAFloatPrecision energyFactor =
979 (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +
980 (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);
981 potentialEnergySum += potentialEnergy6 * energyFactor;
982 virialSumX += virialx * energyFactor;
983 virialSumY += virialy * energyFactor;
984 virialSumZ += virialz * energyFactor;
985
986 if constexpr (countFLOPs) {
987 if constexpr (newton3) {
988 numGlobalCalcsN3Sum += mask;
989 } else {
990 numGlobalCalcsNoN3Sum += mask;
991 }
992 }
993 }
994 }
995 // scatter the forces to where they belong, this is only needed for newton3
996 if (newton3) {
997#pragma omp simd safelen(vecsize)
998 for (size_t tmpj = 0; tmpj < vecsize; tmpj++) {
999 const size_t j = neighborListPtr[joff + tmpj];
1000 fxptr[j] -= fxArr[tmpj];
1001 fyptr[j] -= fyArr[tmpj];
1002 fzptr[j] -= fzArr[tmpj];
1003 }
1004 }
1005 }
1006 }
1007 // this loop goes over the remainder and uses no optimizations
1008 for (size_t jNeighIndex = joff; jNeighIndex < neighborListSize; ++jNeighIndex) {
1009 size_t j = neighborList[jNeighIndex];
1010 if (indexFirst == j) continue;
1011 if constexpr (useMixing) {
1012 sigmaSquared = _PPLibrary->getMixingSigmaSquared(typeptr1[indexFirst], typeptr2[j]);
1013 epsilon24 = _PPLibrary->getMixing24Epsilon(typeptr1[indexFirst], typeptr2[j]);
1014 if constexpr (applyShift) {
1015 shift6 = _PPLibrary->getMixingShift6(typeptr1[indexFirst], typeptr2[j]);
1016 }
1017 }
1018
1019 const auto ownedStateJ = ownedStatePtr[j];
1020 if (ownedStateJ == autopas::OwnershipState::dummy) {
1021 continue;
1022 }
1023
1024 const SoAFloatPrecision drx = xptr[indexFirst] - xptr[j];
1025 const SoAFloatPrecision dry = yptr[indexFirst] - yptr[j];
1026 const SoAFloatPrecision drz = zptr[indexFirst] - zptr[j];
1027
1028 const SoAFloatPrecision drx2 = drx * drx;
1029 const SoAFloatPrecision dry2 = dry * dry;
1030 const SoAFloatPrecision drz2 = drz * drz;
1031
1032 const SoAFloatPrecision dr2 = drx2 + dry2 + drz2;
1033
1034 if constexpr (countFLOPs) {
1035 numDistanceCalculationSum += 1;
1036 }
1037
1038 if (dr2 > cutoffSquared) {
1039 continue;
1040 }
1041
1042 const SoAFloatPrecision invdr2 = 1. / dr2;
1043 const SoAFloatPrecision lj2 = sigmaSquared * invdr2;
1044 const SoAFloatPrecision lj6 = lj2 * lj2 * lj2;
1045 const SoAFloatPrecision lj12 = lj6 * lj6;
1046 const SoAFloatPrecision lj12m6 = lj12 - lj6;
1047 const SoAFloatPrecision fac = epsilon24 * (lj12 + lj12m6) * invdr2;
1048
1049 const SoAFloatPrecision fx = drx * fac;
1050 const SoAFloatPrecision fy = dry * fac;
1051 const SoAFloatPrecision fz = drz * fac;
1052
1053 fxacc += fx;
1054 fyacc += fy;
1055 fzacc += fz;
1056 if (newton3) {
1057 fxptr[j] -= fx;
1058 fyptr[j] -= fy;
1059 fzptr[j] -= fz;
1060 }
1061
1062 if constexpr (countFLOPs) {
1063 if constexpr (newton3) {
1064 numKernelCallsN3Sum += 1;
1065 } else {
1066 numKernelCallsNoN3Sum += 1;
1067 }
1068 }
1069
1070 if (calculateGlobals) {
1071 SoAFloatPrecision virialx = drx * fx;
1072 SoAFloatPrecision virialy = dry * fy;
1073 SoAFloatPrecision virialz = drz * fz;
1074 SoAFloatPrecision potentialEnergy6 = (epsilon24 * lj12m6 + shift6);
1075
1076 // We add 6 times the potential energy for each owned particle. The total sum is corrected in endTraversal().
1077 const SoAFloatPrecision energyFactor =
1078 (ownedStateI == autopas::OwnershipState::owned ? 1. : 0.) +
1079 (newton3 ? (ownedStateJ == autopas::OwnershipState::owned ? 1. : 0.) : 0.);
1080 potentialEnergySum += potentialEnergy6 * energyFactor;
1081 virialSumX += virialx * energyFactor;
1082 virialSumY += virialy * energyFactor;
1083 virialSumZ += virialz * energyFactor;
1084
1085 if constexpr (countFLOPs) {
1086 if constexpr (newton3) {
1087 ++numGlobalCalcsN3Sum;
1088 } else {
1089 ++numGlobalCalcsNoN3Sum;
1090 }
1091 }
1092 }
1093 }
1094
1095 if (fxacc != 0 or fyacc != 0 or fzacc != 0) {
1096 fxptr[indexFirst] += fxacc;
1097 fyptr[indexFirst] += fyacc;
1098 fzptr[indexFirst] += fzacc;
1099 }
1100
1101 if constexpr (countFLOPs) {
1102 _aosThreadDataFLOPs[threadnum].numDistCalls += numDistanceCalculationSum;
1103 _aosThreadDataFLOPs[threadnum].numKernelCallsNoN3 += numKernelCallsNoN3Sum;
1104 _aosThreadDataFLOPs[threadnum].numKernelCallsN3 += numKernelCallsN3Sum;
1105 _aosThreadDataFLOPs[threadnum].numGlobalCalcsNoN3 += numGlobalCalcsNoN3Sum;
1106 _aosThreadDataFLOPs[threadnum].numGlobalCalcsN3 += numGlobalCalcsN3Sum;
1107 }
1108
1109 if (calculateGlobals) {
1110 _aosThreadDataGlobals[threadnum].potentialEnergySum += potentialEnergySum;
1111 _aosThreadDataGlobals[threadnum].virialSum[0] += virialSumX;
1112 _aosThreadDataGlobals[threadnum].virialSum[1] += virialSumY;
1113 _aosThreadDataGlobals[threadnum].virialSum[2] += virialSumZ;
1114 }
1115 }
1116
1121 class AoSThreadDataGlobals {
1122 public:
1123 AoSThreadDataGlobals() : virialSum{0., 0., 0.}, potentialEnergySum{0.}, __remainingTo64{} {}
1124 void setZero() {
1125 virialSum = {0., 0., 0.};
1126 potentialEnergySum = 0.;
1127 }
1128
1129 // variables
1130 std::array<double, 3> virialSum;
1131 double potentialEnergySum;
1132
1133 private:
1134 // dummy parameter to get the right size (64 bytes)
1135 double __remainingTo64[(64 - 4 * sizeof(double)) / sizeof(double)];
1136 };
1137
1145 class AoSThreadDataFLOPs {
1146 public:
1147 AoSThreadDataFLOPs() : __remainingTo64{} {}
1148
1152 void setZero() {
1153 numKernelCallsNoN3 = 0;
1154 numKernelCallsN3 = 0;
1155 numDistCalls = 0;
1156 numGlobalCalcsNoN3 = 0;
1157 numGlobalCalcsN3 = 0;
1158 }
1159
1164 size_t numKernelCallsNoN3 = 0;
1165
1170 size_t numKernelCallsN3 = 0;
1171
1176 size_t numDistCalls = 0;
1177
1182 size_t numGlobalCalcsN3 = 0;
1183
1188 size_t numGlobalCalcsNoN3 = 0;
1189
1190 private:
1194 double __remainingTo64[(64 - 5 * sizeof(size_t)) / sizeof(size_t)];
1195 };
1196
1197 // make sure of the size of AoSThreadDataGlobals and AoSThreadDataFLOPs
1198 static_assert(sizeof(AoSThreadDataGlobals) % 64 == 0, "AoSThreadDataGlobals has wrong size");
1199 static_assert(sizeof(AoSThreadDataFLOPs) % 64 == 0, "AoSThreadDataFLOPs has wrong size");
1200
1201 const double _cutoffSquared;
1202 // not const because they might be reset through PPL
1203 double _epsilon24, _sigmaSquared, _shift6 = 0;
1204
1206
1207 // sum of the potential energy, only calculated if calculateGlobals is true
1208 double _potentialEnergySum;
1209
1210 // sum of the virial, only calculated if calculateGlobals is true
1211 std::array<double, 3> _virialSum;
1212
1213 // thread buffer for aos
1214 std::vector<AoSThreadDataGlobals> _aosThreadDataGlobals{};
1215 std::vector<AoSThreadDataFLOPs> _aosThreadDataFLOPs{};
1216
1217 // defines whether or whether not the global values are already preprocessed
1218 bool _postProcessed;
1219};
1220} // namespace mdLib
#define AutoPasLog(lvl, fmt,...)
Macro for logging providing common meta information without filename.
Definition: Logger.h:24
This class stores the (physical) properties of molecule types, and, in the case of multi-site molecul...
Definition: ParticlePropertiesLibrary.h:28
floatType getMixingShift6(intType i, intType j) const
Returns precomputed mixed shift * 6 for one pair of site types.
Definition: ParticlePropertiesLibrary.h:262
floatType getMixingSigmaSquared(intType i, intType j) const
Returns precomputed mixed squared sigma for one pair of site types.
Definition: ParticlePropertiesLibrary.h:252
floatType getMixing24Epsilon(intType i, intType j) const
Returns the precomputed mixed epsilon * 24.
Definition: ParticlePropertiesLibrary.h:224
static double calcShift6(double epsilon24, double sigmaSquared, double cutoffSquared)
Calculate the shift multiplied 6 of the lennard jones potential from given cutoff,...
Definition: ParticlePropertiesLibrary.h:576
auto getLJMixingData(intType i, intType j) const
Get complete mixing data for one pair of LJ site types.
Definition: ParticlePropertiesLibrary.h:234
AlignedAllocator class.
Definition: AlignedAllocator.h:29
PairwiseFunctor class.
Definition: PairwiseFunctor.h:31
PairwiseFunctor(double cutoff)
Constructor.
Definition: PairwiseFunctor.h:42
View on a fixed part of a SoA between a start index and an end index.
Definition: SoAView.h:23
size_t size() const
Returns the number of particles in the view.
Definition: SoAView.h:83
Default exception class for autopas exceptions.
Definition: ExceptionHandler.h:116
A functor to handle lennard-jones interactions between two particles (molecules).
Definition: LJFunctor.h:44
static constexpr bool getMixing()
Definition: LJFunctor.h:635
double getHitRate() const override
Get the hit rate.
Definition: LJFunctor.h:791
void AoSFunctor(Particle_T &i, Particle_T &j, bool newton3) final
PairwiseFunctor for arrays of structures (AoS).
Definition: LJFunctor.h:123
bool allowsNewton3() final
Specifies whether the functor is capable of Newton3-like functors.
Definition: LJFunctor.h:115
bool allowsNonNewton3() final
Specifies whether the functor is capable of non-Newton3-like functors.
Definition: LJFunctor.h:119
void initTraversal() final
Reset the global values.
Definition: LJFunctor.h:641
LJFunctor(double cutoff)
Constructor for Functor with mixing disabled.
Definition: LJFunctor.h:91
static constexpr auto getNeededAttr()
Get attributes needed for computation.
Definition: LJFunctor.h:601
void endTraversal(bool newton3) final
Accumulates global values, e.g.
Definition: LJFunctor.h:661
bool isRelevantForTuning() final
Specifies whether the functor should be considered for the auto-tuning process.
Definition: LJFunctor.h:113
LJFunctor()=delete
Deleted default constructor.
static constexpr auto getNeededAttr(std::false_type)
Get attributes needed for computation without N3 optimization.
Definition: LJFunctor.h:616
void SoAFunctorSingle(autopas::SoAView< SoAArraysType > soa, bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:204
std::string getName() final
Returns name of functor.
Definition: LJFunctor.h:111
LJFunctor(double cutoff, ParticlePropertiesLibrary< double, size_t > &particlePropertiesLibrary)
Constructor for Functor with mixing active.
Definition: LJFunctor.h:103
void SoAFunctorPair(autopas::SoAView< SoAArraysType > soa1, autopas::SoAView< SoAArraysType > soa2, const bool newton3) final
PairwiseFunctor for structure of arrays (SoA)
Definition: LJFunctor.h:369
void setParticleProperties(SoAFloatPrecision epsilon24, SoAFloatPrecision sigmaSquared)
Sets the particle properties constants for this functor.
Definition: LJFunctor.h:588
void SoAFunctorVerlet(autopas::SoAView< SoAArraysType > soa, const size_t indexFirst, const std::vector< size_t, autopas::AlignedAllocator< size_t > > &neighborList, bool newton3) final
PairwiseFunctor for structure of arrays (SoA) for neighbor lists.
Definition: LJFunctor.h:569
size_t getNumFLOPs() const override
Gets the number of useful FLOPs.
Definition: LJFunctor.h:758
static constexpr auto getComputedAttr()
Get attributes computed by this functor.
Definition: LJFunctor.h:626
double getPotentialEnergy()
Get the potential Energy.
Definition: LJFunctor.h:691
double getVirial()
Get the virial.
Definition: LJFunctor.h:709
constexpr T dot(const std::array< T, SIZE > &a, const std::array< T, SIZE > &b)
Generates the dot product of two arrays.
Definition: ArrayMath.h:233
This is the main namespace of AutoPas.
Definition: AutoPasDecl.h:34
int autopas_get_max_threads()
Dummy for omp_get_max_threads() when no OpenMP is available.
Definition: WrapOpenMP.h:144
@ dummy
Dummy or deleted state, a particle with this state is not an actual particle!
@ owned
Owned state, a particle with this state is an actual particle and owned by the current AutoPas object...
FunctorN3Modes
Newton 3 modes for the Functor.
Definition: Functor.h:23
int autopas_get_thread_num()
Dummy for omp_set_lock() when no OpenMP is available.
Definition: WrapOpenMP.h:132
constexpr unsigned int DEFAULT_CACHE_LINE_SIZE
Default size for a cache line.
Definition: AlignedAllocator.h:21