AutoPas  3.0.0
Loading...
Searching...
No Matches
StringUtils.h
Go to the documentation of this file.
1
7#pragma once
8
9#include <cmath>
10#include <regex>
11#include <set>
12#include <string>
13#include <vector>
14
18
20
21// anonymous namespace for namespace-private helper functions
22namespace {
30inline int needlemanWunschScore(std::string s1, std::string s2) {
31 // these scores correspond to the number of edits needed to match s1 to s2
32 constexpr int scoreMatch = 1;
33 constexpr int scoreMismatch = -1;
34 constexpr int scoreGap = -1;
35
36 // |s1|+1 x |s2|+1 Matrix
37 std::vector<std::vector<int>> scoreMatrix(s1.length() + 1, std::vector<int>(s2.length() + 1, 0));
38
39 // initialize top and right border with cumulative gap penalties
40 for (size_t i = 0; i < scoreMatrix.size(); ++i) {
41 scoreMatrix[i][0] = i * scoreGap;
42 }
43 for (size_t j = 0; j < scoreMatrix[0].size(); ++j) {
44 scoreMatrix[0][j] = j * scoreGap;
45 }
46
47 // fill rest of matrix
48 for (size_t i = 1; i < scoreMatrix.size(); ++i) {
49 for (size_t j = 1; j < scoreMatrix[0].size(); ++j) {
50 auto matchValue = s1[i - 1] == s2[j - 1] ? scoreMatch : scoreMismatch;
51 auto scoreDiagonal = scoreMatrix[i - 1][j - 1] + matchValue;
52 auto scoreLeft = scoreMatrix[i - 1][j] + scoreGap;
53 auto scoreTop = scoreMatrix[i][j - 1] + scoreGap;
54
55 std::array<decltype(scoreDiagonal), 3> scores = {scoreDiagonal, scoreLeft, scoreTop};
56 auto scoreMax = std::max_element(scores.begin(), scores.end());
57
58 scoreMatrix[i][j] = *scoreMax;
59 }
60 }
61
62 // omit backtracking since we are not interested in the alignment but only in
63 // the score lower right corner contains similarity score
64 return scoreMatrix[scoreMatrix.size() - 1][scoreMatrix[scoreMatrix.size() - 1].size() - 1];
65}
66} // namespace
77inline std::string matchStrings(const std::vector<std::string> &haystack, std::string needle) {
78 std::transform(needle.begin(), needle.end(), needle.begin(), ::tolower);
79 auto bestDistance = std::numeric_limits<int>::min();
80 std::vector<std::string> matchedStrings;
81 for (auto &s : haystack) {
82 auto distance = needlemanWunschScore(needle, s);
83 // if we find a better match throw out current matches
84 if (distance > bestDistance) {
85 matchedStrings.clear();
86 bestDistance = distance;
87 }
88 // save every match that is at least as good as the current one
89 if (distance >= bestDistance) {
90 matchedStrings.push_back(s);
91 }
92 }
93 if (matchedStrings.size() > 1) {
94 utils::ExceptionHandler::exception("Given String ({}) is ambiguous! Which option do you mean: {}", needle,
95 [](auto arr) -> std::string {
96 std::ostringstream ss;
97 for (auto &a : arr) {
98 ss << a << ", ";
99 }
100 // deletes last comma
101 ss << "\b\b";
102 return ss.str();
103 }(matchedStrings));
104 }
105 return matchedStrings[0];
106}
107
111constexpr char delimiters[] = " ,;|/";
115constexpr char delimitersRgx[] = "[\\s,;|/]";
119constexpr char delimitersRgxInv[] = "[^\\s,;|/]";
120
124static const std::string regexDoubleStr{
125 "[0-9]+" // at least one int
126 "\\.?" // maybe a dot
127 "[0-9]*" // maybe more integers after the dot
128 "(?:" // start of non-capturing group for exp
129 "e" // exponent
130 "-?" // optional minus
131 "[0-9]+" // at least one int
132 ")?" // end of group, group is optional
133};
134
141inline std::vector<std::string> tokenize(const std::string &searchString, const std::string &delimiters) {
142 std::vector<std::string> wordVector;
143
144 std::size_t prev = 0, pos;
145 while ((pos = searchString.find_first_of(delimiters, prev)) != std::string::npos) {
146 if (pos > prev) wordVector.push_back(searchString.substr(prev, pos - prev));
147 prev = pos + 1;
148 }
149 if (prev < searchString.length()) wordVector.push_back(searchString.substr(prev, std::string::npos));
150
151 return wordVector;
152}
153
163inline std::array<double, 3> parseArrayD3(const std::string &string) {
164 std::array<double, 3> parsedArray{};
165 auto strings = tokenize(string, delimiters);
166 if (strings.size() > 3) {
167 autopas::utils::ExceptionHandler::exception("parseArrayD3(): found {} instead of 3 array fields.", strings.size());
168 }
169 for (int i = 0; i < 3; i++) {
170 try {
171 parsedArray[i] = std::stod(strings[i]);
172 } catch (const std::exception &e) {
173 autopas::utils::ExceptionHandler::exception("parseArrayD3(): could not convert {} to a double: \n{}", strings[i],
174 e.what());
175 }
176 }
177 return parsedArray;
178}
179
187inline bool parseBoolOption(const std::string &booleanOption) {
188 if (booleanOption == "on" or booleanOption == "true" or booleanOption == "enabled") {
189 return true;
190 } else if (booleanOption == "off" or booleanOption == "false" or booleanOption == "disabled") {
191 return false;
192 } else {
193 autopas::utils::ExceptionHandler::exception("Unknown boolean Option: {}", booleanOption);
194 }
195 // should not be reached
196 return false;
197}
198
204inline std::set<double> parseDoubles(const std::string &doubleString) {
205 std::set<double> doubles;
206
207 std::regex regexDouble(regexDoubleStr);
208
209 // use regex iter to find all doubles in the string.
210 for (auto number = std::sregex_iterator(doubleString.begin(), doubleString.end(), regexDouble);
211 number != std::sregex_iterator(); ++number) {
212 try {
213 double value = stod(number->str());
214 doubles.insert(value);
215 } catch (const std::exception &) {
216 autopas::utils::ExceptionHandler::exception("Failed to parse a double from: {}", number->str());
217 }
218 }
219
220 return doubles;
221}
222
233inline std::unique_ptr<autopas::NumberSet<double>> parseNumberSet(const std::string &setString) {
234 // try to match an interval x-y
235 std::regex regexInterval("(" // start of 1. capture
236 + regexDoubleStr + // a double
237 ")" // end of 1. capture
238 "\\s*" // maybe whitespaces
239 "-" // a dash
240 "\\s*" // maybe more whitespaces
241 "(" // start of 2. capture
242 + regexDoubleStr + // a double
243 ")" // end of 2. capture
244 );
245 std::smatch matches;
246 if (std::regex_match(setString, matches, regexInterval)) {
247 try {
248 // matchers has whole string as str(0) so start at 1
249 double min = stod(matches.str(1));
250 double max = stod(matches.str(2));
251 return std::make_unique<autopas::NumberInterval<double>>(min, max);
252 } catch (const std::exception &) {
253 // try parseDoubles instead
254 }
255 }
256
257 std::set<double> values = autopas::utils::StringUtils::parseDoubles(setString);
258 return std::make_unique<autopas::NumberSetFinite<double>>(values);
259}
260
261} // namespace autopas::utils::StringUtils
static void exception(const Exception e)
Handle an exception derived by std::exception.
Definition: ExceptionHandler.h:63
int needlemanWunschScore(std::string s1, std::string s2)
Calculates a similarity score of s1 and s2 based on the Needleman-Wunsch string alignment algorithm.
Definition: StringUtils.h:30
Some functions to parse enums from (input-) strings.
Definition: namespaces.h:64
constexpr char delimitersRgxInv[]
Regex for all but delimiters to split input strings as regex.
Definition: StringUtils.h:119
bool parseBoolOption(const std::string &booleanOption)
Converts a string to bool.
Definition: StringUtils.h:187
std::string matchStrings(const std::vector< std::string > &haystack, std::string needle)
Finds best match of needle in haystack.
Definition: StringUtils.h:77
constexpr char delimitersRgx[]
Regex for all delimiters to split input strings.
Definition: StringUtils.h:115
std::unique_ptr< autopas::NumberSet< double > > parseNumberSet(const std::string &setString)
Converts a string to a NumberSet<double>.
Definition: StringUtils.h:233
constexpr char delimiters[]
All accepted delimiters to split input strings.
Definition: StringUtils.h:111
std::array< double, 3 > parseArrayD3(const std::string &string)
Converts a string to std::array<double,3>.
Definition: StringUtils.h:163
std::vector< std::string > tokenize(const std::string &searchString, const std::string &delimiters)
Splits a string by multiple delimiters.
Definition: StringUtils.h:141
static const std::string regexDoubleStr
Regex for a double e.g.
Definition: StringUtils.h:124
std::set< double > parseDoubles(const std::string &doubleString)
Converts a string to a set of doubles.
Definition: StringUtils.h:204