ampsci
High-precision calculations for one- and two-valence atomic systems
String.hpp
1#pragma once
2#include "fmt/color.hpp"
3#include <algorithm>
4#include <cctype>
5#include <cctype> //char from string
6#include <cstdarg>
7#include <functional>
8#include <sstream>
9#include <string>
10#include <string_view>
11#include <vector>
12
13namespace qip {
14
15//==============================================================================
16//! Returns a formatted std::string, with formatting printf-like commands. Note:
17//! maximum string lenth is 256 characters. If longer string required, use
18//! provided overload
19inline std::string fstring(const std::string format, ...) {
20 constexpr std::size_t size = 256;
21 std::string fmt_str;
22 fmt_str.resize(size + 1); // allow for null
23
24 // C-style variadic param-list, to call c function vsnprintf (varidatic
25 // snprintf)
26 va_list args;
27 // note: format in va_start mist not be a reference type.. so copy the string?
28 va_start(args, format);
29 vsnprintf(&fmt_str[0], fmt_str.size(), format.c_str(), args);
30 va_end(args);
31
32 // resize string, remove part after the buffer (not needed)
33 fmt_str.erase(std::find(fmt_str.begin(), fmt_str.end(), '\0'), fmt_str.end());
34
35 return fmt_str;
36}
37
38//! Overload: size is maximum string length (buffer size).
39inline std::string fstring(const std::size_t size, const std::string format,
40 ...) {
41 // nb: cannot just call other overload, since using c-style variadic function
42 // (I think?) - so a copy-paste re-implementation
43 std::string fmt_str;
44 fmt_str.resize(size + 1); // allow for null
45
46 // C-style variadic param-list, to call c function vsnprintf (varidatic
47 // snprintf)
48 va_list args;
49 // note: format in va_start mist not be a reference type.. so copy the string?
50 va_start(args, format);
51 vsnprintf(&fmt_str[0], fmt_str.size(), format.c_str(), args);
52 va_end(args);
53
54 // resize string, remove part after the buffer (not needed)
55 fmt_str.erase(std::find(fmt_str.begin(), fmt_str.end(), '\0'), fmt_str.end());
56
57 return fmt_str;
58}
59
60//==============================================================================
61//! Compares two strings, s1 and s2. s2 may contain ONE wildcard ('*') which
62//! will match anything
63inline bool wildcard_compare(std::string_view s1, std::string_view s2) {
64 // look for wildcard:
65 const auto wc = std::find(s2.cbegin(), s2.cend(), '*');
66 if (wc == s2.cend())
67 return s1 == s2;
68
69 const auto pos_wc = std::size_t(std::distance(s2.cbegin(), wc));
70
71 const auto s1_front = s1.substr(0, pos_wc);
72 const auto s2_front = s2.substr(0, pos_wc);
73
74 // number of characters following the '*'
75 const auto len_back = std::size_t(std::distance(wc + 1, s2.cend()));
76
77 const auto pos_1_back = s1.length() > len_back ? s1.length() - len_back : 0;
78 const auto s1_back = s1.substr(pos_1_back, std::string::npos);
79 const auto s2_back = s2.substr(pos_wc + 1, std::string::npos);
80
81 return s1_front == s2_front && s1_back == s2_back;
82}
83
84//==============================================================================
85//! return static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
86inline char tolower(char ch) {
87 // https://en.cppreference.com/w/cpp/string/byte/tolower
88 return static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
89}
90
91inline std::string tolower(std::string t_string) {
92 for (auto &c : t_string) {
93 c = qip::tolower(c);
94 }
95 return t_string;
96}
97
98//==============================================================================
99//! Checks if the_string (arg1) constaints sub_string (arg2)
100inline bool contains(std::string_view the_string, std::string_view sub_string) {
101 return the_string.find(sub_string) != std::string::npos;
102}
103
104//! Checks if the_string (arg1) constaints sub_string (arg2), case insensitive
105inline bool ci_contains(const std::string &the_string,
106 const std::string &sub_string) {
107 return tolower(the_string).find(tolower(sub_string)) != std::string::npos;
108}
109
110//! Checks if the_string (arg1) constaints any of the sub_strings (arg2)
111inline bool contains(const std::string &the_string,
112 const std::vector<std::string> &sub_strings) {
113 for (const auto &substr : sub_strings) {
114 if (contains(the_string, substr))
115 return true;
116 }
117 return false;
118}
119
120//! Checks if the_string (arg1) constaints any of the sub_strings (arg2), case insensitive
121inline bool ci_contains(const std::string &the_string,
122 const std::vector<std::string> &sub_strings) {
123 for (const auto &substr : sub_strings) {
124 if (ci_contains(the_string, substr))
125 return true;
126 }
127 return false;
128}
129
130//==============================================================================
131//! Case insensitive string compare. Essentially: LowerCase(s1)==LowerCase(s2)
132inline bool ci_compare(std::string_view s1, std::string_view s2) {
133 return std::equal(
134 s1.cbegin(), s1.cend(), s2.cbegin(), s2.cend(),
135 [](char c1, char c2) { return qip::tolower(c1) == qip::tolower(c2); });
136}
137
138//! Compares two strings, s1 and s2. s2 may contain ONE wildcard ('*') which
139//! will match anything. Case Insensitive version
140inline bool ci_wc_compare(std::string_view s1, std::string_view s2) {
141 // look for wildcard:
142 const auto wc = std::find(s2.cbegin(), s2.cend(), '*');
143 if (wc == s2.cend())
144 return ci_compare(s1, s2);
145
146 const auto pos_wc = std::size_t(std::distance(s2.cbegin(), wc));
147
148 const auto s1_front = s1.substr(0, pos_wc);
149 const auto s2_front = s2.substr(0, pos_wc);
150
151 // number of characters following the '*'
152 const auto len_back = std::size_t(std::distance(wc + 1, s2.cend()));
153
154 const auto pos_1_back = s1.length() > len_back ? s1.length() - len_back : 0;
155 const auto s1_back = s1.substr(pos_1_back, std::string::npos);
156 const auto s2_back = s2.substr(pos_wc + 1, std::string::npos);
157
158 return ci_compare(s1_front, s2_front) && ci_compare(s1_back, s2_back);
159}
160
161//==============================================================================
162
163//! A simple non-optimised implementation of the Levenshtein distance
164inline auto Levenstein(std::string_view a, std::string_view b) {
165 // https://en.wikipedia.org/wiki/Levenshtein_distance
166 // https://stackoverflow.com/a/70237726/8446770
167 std::vector<size_t> d_t((a.size() + 1) * (b.size() + 1), size_t(-1));
168 auto d = [&](size_t ia, size_t ib) -> size_t & {
169 return d_t[ia * (b.size() + 1) + ib];
170 };
171 std::function<size_t(size_t, size_t)> LevensteinInt =
172 [&](size_t ia, size_t ib) -> size_t {
173 if (d(ia, ib) != size_t(-1))
174 return d(ia, ib);
175 size_t dist = 0;
176 if (ib >= b.size())
177 dist = a.size() - ia;
178 else if (ia >= a.size())
179 dist = b.size() - ib;
180 else if (a[ia] == b[ib])
181 dist = LevensteinInt(ia + 1, ib + 1);
182 else
183 dist = 1 + std::min(std::min(LevensteinInt(ia, ib + 1),
184 LevensteinInt(ia + 1, ib)),
185 LevensteinInt(ia + 1, ib + 1));
186 d(ia, ib) = dist;
187 return dist;
188 };
189 return LevensteinInt(0, 0);
190}
191
192//! A simple non-optimised implementation of the Levenshtein distance (case
193//! insensitive)
194inline auto ci_Levenstein(std::string_view a, std::string_view b) {
195 std::vector<size_t> d_t((a.size() + 1) * (b.size() + 1), size_t(-1));
196 auto d = [&](size_t ia, size_t ib) -> size_t & {
197 return d_t[ia * (b.size() + 1) + ib];
198 };
199 std::function<size_t(size_t, size_t)> LevensteinInt =
200 [&](size_t ia, size_t ib) -> size_t {
201 if (d(ia, ib) != size_t(-1))
202 return d(ia, ib);
203 size_t dist = 0;
204 if (ib >= b.size())
205 dist = a.size() - ia;
206 else if (ia >= a.size())
207 dist = b.size() - ib;
208 else if (qip::tolower(a[ia]) == qip::tolower(b[ib]))
209 dist = LevensteinInt(ia + 1, ib + 1);
210 else
211 dist = 1 + std::min(std::min(LevensteinInt(ia, ib + 1),
212 LevensteinInt(ia + 1, ib)),
213 LevensteinInt(ia + 1, ib + 1));
214 d(ia, ib) = dist;
215 return dist;
216 };
217 return LevensteinInt(0, 0);
218}
219
220//! Finds the closest match in list to test_string (return iterator)
221inline auto closest_match(std::string_view test_string,
222 const std::vector<std::string> &list) {
223 auto compare = [&test_string](const auto &s1, const auto &s2) {
224 return qip::Levenstein(s1, test_string) < qip::Levenstein(s2, test_string);
225 };
226 return std::min_element(list.cbegin(), list.cend(), compare);
227}
228
229//! Finds the closest match (case insensitive) in list to test_string (return
230//! iterator)
231inline std::string ci_closest_match(const std::string_view test_string,
232 const std::vector<std::string> &list) {
233 auto compare = [&test_string](const auto &s1, const auto &s2) {
234 return qip::ci_Levenstein(s1, test_string) <
235 qip::ci_Levenstein(s2, test_string);
236 };
237 using namespace std::string_literals;
238 return list.empty() ? ""s :
239 *std::min_element(list.cbegin(), list.cend(), compare);
240}
241
242//==============================================================================
243//! Checks if a string-like s is integer-like (including -)
244/*!
245e.g., The input strings "16" and "-12" would both return 'true', while "12x" or "12.5" would not.
246Does this by checking if all characters are integer digits exept first character, which is allowed to be an integer, or '+' or -'-
247*/
248inline bool string_is_integer(std::string_view s) {
249 return !s.empty() &&
250 // checks if all non-leading characters are integer digits
251 std::find_if(s.cbegin() + 1, s.cend(),
252 [](auto c) { return !std::isdigit(c); }) == s.end() &&
253 // checks if leading character is one of: digit, '+', or '-'
254 (std::isdigit(s[0]) || ((s[0] == '-' || s[0] == '+') && s.size() > 1));
255}
256
257//==============================================================================
258//! Splits a string by delimeter into a vector
259inline std::vector<std::string> split(const std::string &s, char delim = ' ') {
260 std::vector<std::string> out;
261 std::stringstream ss(s);
262 std::string tmp;
263 while (getline(ss, tmp, delim)) {
264 out.push_back(tmp);
265 }
266 return out;
267}
268
269//! Takes vector of strings, concats into single string, with optional delimeter
270inline std::string concat(const std::vector<std::string> &v,
271 const std::string &delim = "") {
272 std::string out;
273 for (std::size_t i = 0; i < v.size(); ++i) {
274 out += v[i];
275 if (i != v.size() - 1)
276 out += delim;
277 }
278 return out;
279}
280
281//==============================================================================
282//! Wraps the string, 'input', at line 'at'. Optionally appends a prefix
283//! 'prefix' to each line. Does not split words (if can be avoided)
284inline std::string wrap(const std::string &input, std::size_t at = 80,
285 const std::string &prefix = "") {
286 std::string output;
287 const auto length = at - prefix.size();
288 std::size_t ipos = 0;
289 std::size_t fpos = length;
290 while (ipos < input.length()) {
291 if (!output.empty())
292 output += '\n';
293
294 auto temp_pos_nl = input.find('\n', ipos);
295 if (temp_pos_nl > ipos && temp_pos_nl < fpos &&
296 temp_pos_nl != std::string::npos) {
297 output += prefix + input.substr(ipos, temp_pos_nl - ipos);
298 ipos = temp_pos_nl + 1;
299 fpos = ipos + length;
300 continue;
301 }
302
303 if (fpos >= input.length()) {
304 output += prefix + input.substr(ipos, fpos - ipos);
305 break;
306 }
307
308 auto temp_pos = input.rfind(' ', fpos);
309 if (temp_pos <= ipos || temp_pos == std::string::npos) {
310 output += prefix + input.substr(ipos, fpos - ipos);
311 ipos = fpos;
312 fpos = ipos + length;
313 } else {
314 output += prefix + input.substr(ipos, temp_pos - ipos);
315 ipos = temp_pos + 1;
316 fpos = ipos + length;
317 }
318 }
319 return output;
320}
321
322//==============================================================================
323//! Converts integer, a, to Roman Numerals. Assumed that |a|<=4000
324inline std::string int_to_roman(int a) {
325 if (a < 0)
326 return "-" + int_to_roman(-a);
327 if (a > 3999)
328 return std::to_string(a);
329 static const std::string M[] = {"", "M", "MM", "MMM"};
330 static const std::string C[] = {"", "C", "CC", "CCC", "CD",
331 "D", "DC", "DCC", "DCCC", "CM"};
332 static const std::string X[] = {"", "X", "XX", "XXX", "XL",
333 "L", "LX", "LXX", "LXXX", "XC"};
334 static const std::string I[] = {"", "I", "II", "III", "IV",
335 "V", "VI", "VII", "VIII", "IX"};
336 return M[a / 1000] + C[(a % 1000) / 100] + X[(a % 100) / 10] + I[(a % 10)];
337}
338
339} // namespace qip
qip library: A collection of useful functions
Definition Array.hpp:9
auto closest_match(std::string_view test_string, const std::vector< std::string > &list)
Finds the closest match in list to test_string (return iterator)
Definition String.hpp:221
std::string concat(const std::vector< std::string > &v, const std::string &delim="")
Takes vector of strings, concats into single string, with optional delimeter.
Definition String.hpp:270
auto Levenstein(std::string_view a, std::string_view b)
A simple non-optimised implementation of the Levenshtein distance.
Definition String.hpp:164
std::string ci_closest_match(const std::string_view test_string, const std::vector< std::string > &list)
Finds the closest match (case insensitive) in list to test_string (return iterator)
Definition String.hpp:231
bool string_is_integer(std::string_view s)
Checks if a string-like s is integer-like (including -)
Definition String.hpp:248
std::vector< std::string > split(const std::string &s, char delim=' ')
Splits a string by delimeter into a vector.
Definition String.hpp:259
bool ci_contains(const std::string &the_string, const std::string &sub_string)
Checks if the_string (arg1) constaints sub_string (arg2), case insensitive.
Definition String.hpp:105
auto ci_Levenstein(std::string_view a, std::string_view b)
A simple non-optimised implementation of the Levenshtein distance (case insensitive)
Definition String.hpp:194
bool ci_wc_compare(std::string_view s1, std::string_view s2)
Compares two strings, s1 and s2. s2 may contain ONE wildcard ('*') which will match anything....
Definition String.hpp:140
std::string int_to_roman(int a)
Converts integer, a, to Roman Numerals. Assumed that |a|<=4000.
Definition String.hpp:324
std::string fstring(const std::string format,...)
Returns a formatted std::string, with formatting printf-like commands. Note: maximum string lenth is ...
Definition String.hpp:19
bool ci_compare(std::string_view s1, std::string_view s2)
Case insensitive string compare. Essentially: LowerCase(s1)==LowerCase(s2)
Definition String.hpp:132
bool wildcard_compare(std::string_view s1, std::string_view s2)
Compares two strings, s1 and s2. s2 may contain ONE wildcard ('*') which will match anything.
Definition String.hpp:63
bool contains(std::string_view the_string, std::string_view sub_string)
Checks if the_string (arg1) constaints sub_string (arg2)
Definition String.hpp:100
auto compare(const std::vector< T > &first, const std::vector< T > &second)
Directly compare two arithmetic vectors of the same type and length. Returns pair {delta,...
Definition Vector.hpp:33
std::string wrap(const std::string &input, std::size_t at=80, const std::string &prefix="")
Wraps the string, 'input', at line 'at'. Optionally appends a prefix 'prefix' to each line....
Definition String.hpp:284
char tolower(char ch)
return static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
Definition String.hpp:86