ampsci
High-precision calculations for one- and two-valence atomic systems
String.hpp
1#pragma once
2#include "fmt/color.hpp"
3#include <algorithm>
4#include <cctype>
5#include <cctype> //char from string
6#include <cstdarg>
7#include <functional>
8#include <sstream>
9#include <string>
10#include <string_view>
11#include <vector>
12
13namespace qip {
14
15//==============================================================================
16
17/*!
18 @brief Returns a formatted string using printf-style format specifiers.
19
20 @details Maximum string length is 256 characters.
21 Use the size overload for longer strings.
22*/
23inline std::string fstring(const std::string format, ...) {
24 constexpr std::size_t size = 256;
25 std::string fmt_str;
26 fmt_str.resize(size + 1); // allow for null
27
28 // C-style variadic param-list, to call c function vsnprintf (varidatic
29 // snprintf)
30 va_list args;
31 // note: format in va_start mist not be a reference type.. so copy the string?
32 va_start(args, format);
33 vsnprintf(&fmt_str[0], fmt_str.size(), format.c_str(), args);
34 va_end(args);
35
36 // resize string, remove part after the buffer (not needed)
37 fmt_str.erase(std::find(fmt_str.begin(), fmt_str.end(), '\0'), fmt_str.end());
38
39 return fmt_str;
40}
41
42//! Overload of fstring with explicit buffer size (maximum string length).
43inline std::string fstring(const std::size_t size, const std::string format,
44 ...) {
45 // nb: cannot just call other overload, since using c-style variadic function
46 // (I think?) - so a copy-paste re-implementation
47 std::string fmt_str;
48 fmt_str.resize(size + 1); // allow for null
49
50 // C-style variadic param-list, to call c function vsnprintf (varidatic
51 // snprintf)
52 va_list args;
53 // note: format in va_start mist not be a reference type.. so copy the string?
54 va_start(args, format);
55 vsnprintf(&fmt_str[0], fmt_str.size(), format.c_str(), args);
56 va_end(args);
57
58 // resize string, remove part after the buffer (not needed)
59 fmt_str.erase(std::find(fmt_str.begin(), fmt_str.end(), '\0'), fmt_str.end());
60
61 return fmt_str;
62}
63
64//==============================================================================
65
66/*!
67 @brief Compares s1 against pattern s2, where s2 may contain one wildcard '*'
68 that matches any substring.
69*/
70inline bool wildcard_compare(std::string_view s1, std::string_view s2) {
71 // look for wildcard:
72 const auto wc = std::find(s2.cbegin(), s2.cend(), '*');
73 if (wc == s2.cend())
74 return s1 == s2;
75
76 const auto pos_wc = std::size_t(std::distance(s2.cbegin(), wc));
77
78 const auto s1_front = s1.substr(0, pos_wc);
79 const auto s2_front = s2.substr(0, pos_wc);
80
81 // number of characters following the '*'
82 const auto len_back = std::size_t(std::distance(wc + 1, s2.cend()));
83
84 const auto pos_1_back = s1.length() > len_back ? s1.length() - len_back : 0;
85 const auto s1_back = s1.substr(pos_1_back, std::string::npos);
86 const auto s2_back = s2.substr(pos_wc + 1, std::string::npos);
87
88 return s1_front == s2_front && s1_back == s2_back;
89}
90
91//==============================================================================
92
93//! Conversion of a single character to lowercase.
94inline char tolower(char ch) {
95 // https://en.cppreference.com/w/cpp/string/byte/tolower
96 return static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
97}
98
99//! Returns a lowercase copy of the string.
100inline std::string tolower(std::string t_string) {
101 for (auto &c : t_string) {
102 c = qip::tolower(c);
103 }
104 return t_string;
105}
106
107//==============================================================================
108
109//! Returns true if the_string contains sub_string.
110inline bool contains(std::string_view the_string, std::string_view sub_string) {
111 return the_string.find(sub_string) != std::string::npos;
112}
113
114//! Returns true if the_string contains sub_string (case insensitive).
115inline bool ci_contains(const std::string &the_string,
116 const std::string &sub_string) {
117 return tolower(the_string).find(tolower(sub_string)) != std::string::npos;
118}
119
120//! Returns true if the_string contains any of the sub_strings.
121inline bool contains(const std::string &the_string,
122 const std::vector<std::string> &sub_strings) {
123 for (const auto &substr : sub_strings) {
124 if (contains(the_string, substr))
125 return true;
126 }
127 return false;
128}
129
130//! Returns true if the_string contains any of the sub_strings (case insensitive).
131inline bool ci_contains(const std::string &the_string,
132 const std::vector<std::string> &sub_strings) {
133 for (const auto &substr : sub_strings) {
134 if (ci_contains(the_string, substr))
135 return true;
136 }
137 return false;
138}
139
140//==============================================================================
141
142//! Case-insensitive string comparison; equivalent to tolower(s1) == tolower(s2).
143inline bool ci_compare(std::string_view s1, std::string_view s2) {
144 return std::equal(
145 s1.cbegin(), s1.cend(), s2.cbegin(), s2.cend(),
146 [](char c1, char c2) { return qip::tolower(c1) == qip::tolower(c2); });
147}
148
149/*!
150 @brief Case-insensitive version of @ref wildcard_compare.
151
152 @details Compares s1 against pattern s2, where s2 may contain one wildcard
153 '*' that matches any substring.
154*/
155inline bool ci_wc_compare(std::string_view s1, std::string_view s2) {
156 // look for wildcard:
157 const auto wc = std::find(s2.cbegin(), s2.cend(), '*');
158 if (wc == s2.cend())
159 return ci_compare(s1, s2);
160
161 const auto pos_wc = std::size_t(std::distance(s2.cbegin(), wc));
162
163 const auto s1_front = s1.substr(0, pos_wc);
164 const auto s2_front = s2.substr(0, pos_wc);
165
166 // number of characters following the '*'
167 const auto len_back = std::size_t(std::distance(wc + 1, s2.cend()));
168
169 const auto pos_1_back = s1.length() > len_back ? s1.length() - len_back : 0;
170 const auto s1_back = s1.substr(pos_1_back, std::string::npos);
171 const auto s2_back = s2.substr(pos_wc + 1, std::string::npos);
172
173 return ci_compare(s1_front, s2_front) && ci_compare(s1_back, s2_back);
174}
175
176//==============================================================================
177
178//! Returns the Levenshtein edit distance between strings a and b.
179inline auto Levenstein(std::string_view a, std::string_view b) {
180 // https://en.wikipedia.org/wiki/Levenshtein_distance
181 // https://stackoverflow.com/a/70237726/8446770
182 std::vector<size_t> d_t((a.size() + 1) * (b.size() + 1), size_t(-1));
183 auto d = [&](size_t ia, size_t ib) -> size_t & {
184 return d_t[ia * (b.size() + 1) + ib];
185 };
186 std::function<size_t(size_t, size_t)> LevensteinInt =
187 [&](size_t ia, size_t ib) -> size_t {
188 if (d(ia, ib) != size_t(-1))
189 return d(ia, ib);
190 size_t dist = 0;
191 if (ib >= b.size())
192 dist = a.size() - ia;
193 else if (ia >= a.size())
194 dist = b.size() - ib;
195 else if (a[ia] == b[ib])
196 dist = LevensteinInt(ia + 1, ib + 1);
197 else
198 dist = 1 + std::min(std::min(LevensteinInt(ia, ib + 1),
199 LevensteinInt(ia + 1, ib)),
200 LevensteinInt(ia + 1, ib + 1));
201 d(ia, ib) = dist;
202 return dist;
203 };
204 return LevensteinInt(0, 0);
205}
206
207//! Case-insensitive version of @ref Levenstein.
208inline auto ci_Levenstein(std::string_view a, std::string_view b) {
209 std::vector<size_t> d_t((a.size() + 1) * (b.size() + 1), size_t(-1));
210 auto d = [&](size_t ia, size_t ib) -> size_t & {
211 return d_t[ia * (b.size() + 1) + ib];
212 };
213 std::function<size_t(size_t, size_t)> LevensteinInt =
214 [&](size_t ia, size_t ib) -> size_t {
215 if (d(ia, ib) != size_t(-1))
216 return d(ia, ib);
217 size_t dist = 0;
218 if (ib >= b.size())
219 dist = a.size() - ia;
220 else if (ia >= a.size())
221 dist = b.size() - ib;
222 else if (qip::tolower(a[ia]) == qip::tolower(b[ib]))
223 dist = LevensteinInt(ia + 1, ib + 1);
224 else
225 dist = 1 + std::min(std::min(LevensteinInt(ia, ib + 1),
226 LevensteinInt(ia + 1, ib)),
227 LevensteinInt(ia + 1, ib + 1));
228 d(ia, ib) = dist;
229 return dist;
230 };
231 return LevensteinInt(0, 0);
232}
233
234//! Returns an iterator to the closest match to test_string in list,
235//! using @ref Levenstein distance.
236inline auto closest_match(std::string_view test_string,
237 const std::vector<std::string> &list) {
238 auto compare = [&test_string](const auto &s1, const auto &s2) {
239 return qip::Levenstein(s1, test_string) < qip::Levenstein(s2, test_string);
240 };
241 return std::min_element(list.cbegin(), list.cend(), compare);
242}
243
244//! Returns the closest match (case insensitive) to test_string in list,
245//! using @ref ci_Levenstein distance.
246inline std::string ci_closest_match(const std::string_view test_string,
247 const std::vector<std::string> &list) {
248 auto compare = [&test_string](const auto &s1, const auto &s2) {
249 return qip::ci_Levenstein(s1, test_string) <
250 qip::ci_Levenstein(s2, test_string);
251 };
252 using namespace std::string_literals;
253 return list.empty() ? ""s :
254 *std::min_element(list.cbegin(), list.cend(), compare);
255}
256
257//==============================================================================
258
259/*!
260 @brief Returns true if the string represents an integer.
261
262 @details
263 Accepts an optional leading '+' or '-'. e.g., "16" and "-12" return true;
264 "12x" and "12.5" return false.
265*/
266inline bool string_is_integer(std::string_view s) {
267 return !s.empty() &&
268 // checks if all non-leading characters are integer digits
269 std::find_if(s.cbegin() + 1, s.cend(),
270 [](auto c) { return !std::isdigit(c); }) == s.end() &&
271 // checks if leading character is one of: digit, '+', or '-'
272 (std::isdigit(s[0]) || ((s[0] == '-' || s[0] == '+') && s.size() > 1));
273}
274
275//==============================================================================
276
277//! Splits a string by delimiter into a vector of substrings.
278inline std::vector<std::string> split(const std::string &s, char delim = ' ') {
279 std::vector<std::string> out;
280 std::stringstream ss(s);
281 std::string tmp;
282 while (getline(ss, tmp, delim)) {
283 out.push_back(tmp);
284 }
285 return out;
286}
287
288//! Concatenates a vector of strings into one, with an optional delimiter.
289inline std::string concat(const std::vector<std::string> &v,
290 const std::string &delim = "") {
291 std::string out;
292 for (std::size_t i = 0; i < v.size(); ++i) {
293 out += v[i];
294 if (i != v.size() - 1)
295 out += delim;
296 }
297 return out;
298}
299
300//==============================================================================
301
302/*!
303 @brief Word-wraps input at column at, optionally prefixing each line.
304
305 @details Does not split words unless unavoidable.
306*/
307inline std::string wrap(const std::string &input, std::size_t at = 80,
308 const std::string &prefix = "") {
309 std::string output;
310 const auto length = at - prefix.size();
311 std::size_t ipos = 0;
312 std::size_t fpos = length;
313 while (ipos < input.length()) {
314 if (!output.empty())
315 output += '\n';
316
317 auto temp_pos_nl = input.find('\n', ipos);
318 if (temp_pos_nl > ipos && temp_pos_nl < fpos &&
319 temp_pos_nl != std::string::npos) {
320 output += prefix + input.substr(ipos, temp_pos_nl - ipos);
321 ipos = temp_pos_nl + 1;
322 fpos = ipos + length;
323 continue;
324 }
325
326 if (fpos >= input.length()) {
327 output += prefix + input.substr(ipos, fpos - ipos);
328 break;
329 }
330
331 auto temp_pos = input.rfind(' ', fpos);
332 if (temp_pos <= ipos || temp_pos == std::string::npos) {
333 output += prefix + input.substr(ipos, fpos - ipos);
334 ipos = fpos;
335 fpos = ipos + length;
336 } else {
337 output += prefix + input.substr(ipos, temp_pos - ipos);
338 ipos = temp_pos + 1;
339 fpos = ipos + length;
340 }
341 }
342 return output;
343}
344
345//==============================================================================
346
347//! Converts an integer to a Roman numeral string. Assumes |a| <= 3999.
348inline std::string int_to_roman(int a) {
349 if (a < 0)
350 return "-" + int_to_roman(-a);
351 if (a > 3999)
352 return std::to_string(a);
353 static const std::string M[] = {"", "M", "MM", "MMM"};
354 static const std::string C[] = {"", "C", "CC", "CCC", "CD",
355 "D", "DC", "DCC", "DCCC", "CM"};
356 static const std::string X[] = {"", "X", "XX", "XXX", "XL",
357 "L", "LX", "LXX", "LXXX", "XC"};
358 static const std::string I[] = {"", "I", "II", "III", "IV",
359 "V", "VI", "VII", "VIII", "IX"};
360 return M[a / 1000] + C[(a % 1000) / 100] + X[(a % 100) / 10] + I[(a % 10)];
361}
362
363} // namespace qip
General-purpose utility library.
Definition Array.hpp:23
auto closest_match(std::string_view test_string, const std::vector< std::string > &list)
Returns an iterator to the closest match to test_string in list, using Levenstein distance.
Definition String.hpp:236
std::string concat(const std::vector< std::string > &v, const std::string &delim="")
Concatenates a vector of strings into one, with an optional delimiter.
Definition String.hpp:289
auto Levenstein(std::string_view a, std::string_view b)
Returns the Levenshtein edit distance between strings a and b.
Definition String.hpp:179
std::string ci_closest_match(const std::string_view test_string, const std::vector< std::string > &list)
Returns the closest match (case insensitive) to test_string in list, using ci_Levenstein distance.
Definition String.hpp:246
bool string_is_integer(std::string_view s)
Returns true if the string represents an integer.
Definition String.hpp:266
std::vector< std::string > split(const std::string &s, char delim=' ')
Splits a string by delimiter into a vector of substrings.
Definition String.hpp:278
bool ci_contains(const std::string &the_string, const std::string &sub_string)
Returns true if the_string contains sub_string (case insensitive).
Definition String.hpp:115
auto ci_Levenstein(std::string_view a, std::string_view b)
Case-insensitive version of Levenstein.
Definition String.hpp:208
bool ci_wc_compare(std::string_view s1, std::string_view s2)
Case-insensitive version of wildcard_compare.
Definition String.hpp:155
std::string int_to_roman(int a)
Converts an integer to a Roman numeral string. Assumes |a| <= 3999.
Definition String.hpp:348
std::string fstring(const std::string format,...)
Returns a formatted string using printf-style format specifiers.
Definition String.hpp:23
bool ci_compare(std::string_view s1, std::string_view s2)
Case-insensitive string comparison; equivalent to tolower(s1) == tolower(s2).
Definition String.hpp:143
bool wildcard_compare(std::string_view s1, std::string_view s2)
Compares s1 against pattern s2, where s2 may contain one wildcard '*' that matches any substring.
Definition String.hpp:70
bool contains(std::string_view the_string, std::string_view sub_string)
Returns true if the_string contains sub_string.
Definition String.hpp:110
auto compare(const std::vector< T > &first, const std::vector< T > &second)
Compares two arithmetic vectors element-wise; returns {max_delta, iterator}.
Definition Vector.hpp:41
std::string wrap(const std::string &input, std::size_t at=80, const std::string &prefix="")
Word-wraps input at column at, optionally prefixing each line.
Definition String.hpp:307
char tolower(char ch)
Conversion of a single character to lowercase.
Definition String.hpp:94