ampsci
c++ program for high-precision atomic structure calculations of single-valence systems
Loading...
Searching...
No Matches
String.hpp
1#pragma once
2#include <algorithm>
3#include <cctype>
4#include <cctype> //char from string
5#include <cstdarg>
6#include <functional>
7#include <sstream>
8#include <string>
9#include <string_view>
10#include <vector>
11
12namespace qip {
13
14//==============================================================================
18inline std::string fstring(const std::string format, ...) {
19 constexpr std::size_t size = 256;
20 std::string fmt_str;
21 fmt_str.resize(size + 1); // allow for null
22
23 // C-style variadic param-list, to call c function vsnprintf (varidatic
24 // snprintf)
25 va_list args;
26 // note: format in va_start mist not be a reference type.. so copy the string?
27 va_start(args, format);
28 vsnprintf(&fmt_str[0], fmt_str.size(), format.c_str(), args);
29 va_end(args);
30
31 // resize string, remove part after the buffer (not needed)
32 fmt_str.erase(std::find(fmt_str.begin(), fmt_str.end(), '\0'), fmt_str.end());
33
34 return fmt_str;
35}
36
38inline std::string fstring(const std::size_t size, const std::string format,
39 ...) {
40 // nb: cannot just call other overload, since using c-style variadic function
41 // (I think?) - so a copy-paste re-implementation
42 std::string fmt_str;
43 fmt_str.resize(size + 1); // allow for null
44
45 // C-style variadic param-list, to call c function vsnprintf (varidatic
46 // snprintf)
47 va_list args;
48 // note: format in va_start mist not be a reference type.. so copy the string?
49 va_start(args, format);
50 vsnprintf(&fmt_str[0], fmt_str.size(), format.c_str(), args);
51 va_end(args);
52
53 // resize string, remove part after the buffer (not needed)
54 fmt_str.erase(std::find(fmt_str.begin(), fmt_str.end(), '\0'), fmt_str.end());
55
56 return fmt_str;
57}
58
59//==============================================================================
62inline bool wildcard_compare(std::string_view s1, std::string_view s2) {
63 // look for wildcard:
64 const auto wc = std::find(s2.cbegin(), s2.cend(), '*');
65 if (wc == s2.cend())
66 return s1 == s2;
67
68 const auto pos_wc = std::size_t(std::distance(s2.cbegin(), wc));
69
70 const auto s1_front = s1.substr(0, pos_wc);
71 const auto s2_front = s2.substr(0, pos_wc);
72
73 // number of characters following the '*'
74 const auto len_back = std::size_t(std::distance(wc + 1, s2.cend()));
75
76 const auto pos_1_back = s1.length() > len_back ? s1.length() - len_back : 0;
77 const auto s1_back = s1.substr(pos_1_back, std::string::npos);
78 const auto s2_back = s2.substr(pos_wc + 1, std::string::npos);
79
80 return s1_front == s2_front && s1_back == s2_back;
81}
82
83//==============================================================================
85inline char tolower(char ch) {
86 // https://en.cppreference.com/w/cpp/string/byte/tolower
87 return static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
88}
89
90inline std::string tolower(std::string t_string) {
91 for (auto &c : t_string) {
92 c = qip::tolower(c);
93 }
94 return t_string;
95}
96
97//==============================================================================
99inline bool contains(std::string_view the_string, std::string_view sub_string) {
100 return the_string.find(sub_string) != std::string::npos;
101}
102
104inline bool ci_contains(const std::string &the_string,
105 const std::string &sub_string) {
106 return tolower(the_string).find(tolower(sub_string)) != std::string::npos;
107}
108
110inline bool contains(const std::string &the_string,
111 const std::vector<std::string> &sub_strings) {
112 for (const auto &substr : sub_strings) {
113 if (contains(the_string, substr))
114 return true;
115 }
116 return false;
117}
118
120inline bool ci_contains(const std::string &the_string,
121 const std::vector<std::string> &sub_strings) {
122 for (const auto &substr : sub_strings) {
123 if (ci_contains(the_string, substr))
124 return true;
125 }
126 return false;
127}
128
129//==============================================================================
131inline bool ci_compare(std::string_view s1, std::string_view s2) {
132 return std::equal(
133 s1.cbegin(), s1.cend(), s2.cbegin(), s2.cend(),
134 [](char c1, char c2) { return qip::tolower(c1) == qip::tolower(c2); });
135}
136
139inline bool ci_wc_compare(std::string_view s1, std::string_view s2) {
140 // look for wildcard:
141 const auto wc = std::find(s2.cbegin(), s2.cend(), '*');
142 if (wc == s2.cend())
143 return ci_compare(s1, s2);
144
145 const auto pos_wc = std::size_t(std::distance(s2.cbegin(), wc));
146
147 const auto s1_front = s1.substr(0, pos_wc);
148 const auto s2_front = s2.substr(0, pos_wc);
149
150 // number of characters following the '*'
151 const auto len_back = std::size_t(std::distance(wc + 1, s2.cend()));
152
153 const auto pos_1_back = s1.length() > len_back ? s1.length() - len_back : 0;
154 const auto s1_back = s1.substr(pos_1_back, std::string::npos);
155 const auto s2_back = s2.substr(pos_wc + 1, std::string::npos);
156
157 return ci_compare(s1_front, s2_front) && ci_compare(s1_back, s2_back);
158}
159
160//==============================================================================
161
163inline auto Levenstein(std::string_view a, std::string_view b) {
164 // https://en.wikipedia.org/wiki/Levenshtein_distance
165 // https://stackoverflow.com/a/70237726/8446770
166 std::vector<size_t> d_t((a.size() + 1) * (b.size() + 1), size_t(-1));
167 auto d = [&](size_t ia, size_t ib) -> size_t & {
168 return d_t[ia * (b.size() + 1) + ib];
169 };
170 std::function<size_t(size_t, size_t)> LevensteinInt =
171 [&](size_t ia, size_t ib) -> size_t {
172 if (d(ia, ib) != size_t(-1))
173 return d(ia, ib);
174 size_t dist = 0;
175 if (ib >= b.size())
176 dist = a.size() - ia;
177 else if (ia >= a.size())
178 dist = b.size() - ib;
179 else if (a[ia] == b[ib])
180 dist = LevensteinInt(ia + 1, ib + 1);
181 else
182 dist = 1 + std::min(std::min(LevensteinInt(ia, ib + 1),
183 LevensteinInt(ia + 1, ib)),
184 LevensteinInt(ia + 1, ib + 1));
185 d(ia, ib) = dist;
186 return dist;
187 };
188 return LevensteinInt(0, 0);
189}
190
193inline auto ci_Levenstein(std::string_view a, std::string_view b) {
194 std::vector<size_t> d_t((a.size() + 1) * (b.size() + 1), size_t(-1));
195 auto d = [&](size_t ia, size_t ib) -> size_t & {
196 return d_t[ia * (b.size() + 1) + ib];
197 };
198 std::function<size_t(size_t, size_t)> LevensteinInt =
199 [&](size_t ia, size_t ib) -> size_t {
200 if (d(ia, ib) != size_t(-1))
201 return d(ia, ib);
202 size_t dist = 0;
203 if (ib >= b.size())
204 dist = a.size() - ia;
205 else if (ia >= a.size())
206 dist = b.size() - ib;
207 else if (qip::tolower(a[ia]) == qip::tolower(b[ib]))
208 dist = LevensteinInt(ia + 1, ib + 1);
209 else
210 dist = 1 + std::min(std::min(LevensteinInt(ia, ib + 1),
211 LevensteinInt(ia + 1, ib)),
212 LevensteinInt(ia + 1, ib + 1));
213 d(ia, ib) = dist;
214 return dist;
215 };
216 return LevensteinInt(0, 0);
217}
218
220inline auto closest_match(std::string_view test_string,
221 const std::vector<std::string> &list) {
222 auto compare = [&test_string](const auto &s1, const auto &s2) {
223 return qip::Levenstein(s1, test_string) < qip::Levenstein(s2, test_string);
224 };
225 return std::min_element(list.cbegin(), list.cend(), compare);
226}
227
230inline auto ci_closest_match(std::string_view test_string,
231 const std::vector<std::string> &list) {
232 auto compare = [&test_string](const auto &s1, const auto &s2) {
233 return qip::ci_Levenstein(s1, test_string) <
234 qip::ci_Levenstein(s2, test_string);
235 };
236 return std::min_element(list.cbegin(), list.cend(), compare);
237}
238
239//==============================================================================
241
245inline bool string_is_integer(std::string_view s) {
246 return !s.empty() &&
247 // checks if all non-leading characters are integer digits
248 std::find_if(s.cbegin() + 1, s.cend(),
249 [](auto c) { return !std::isdigit(c); }) == s.end() &&
250 // checks if leading character is one of: digit, '+', or '-'
251 (std::isdigit(s[0]) || ((s[0] == '-' || s[0] == '+') && s.size() > 1));
252}
253
254//==============================================================================
256inline std::vector<std::string> split(const std::string &s, char delim = ' ') {
257 std::vector<std::string> out;
258 std::stringstream ss(s);
259 std::string tmp;
260 while (getline(ss, tmp, delim)) {
261 out.push_back(tmp);
262 }
263 return out;
264}
265
267inline std::string concat(const std::vector<std::string> &v,
268 const std::string &delim = "") {
269 std::string out;
270 for (std::size_t i = 0; i < v.size(); ++i) {
271 out += v[i];
272 if (i != v.size() - 1)
273 out += delim;
274 }
275 return out;
276}
277
278//==============================================================================
281inline std::string wrap(const std::string &input, std::size_t at = 80,
282 const std::string &prefix = "") {
283 std::string output;
284 const auto length = at - prefix.size();
285 std::size_t ipos = 0;
286 std::size_t fpos = length;
287 while (ipos < input.length()) {
288 if (!output.empty())
289 output += '\n';
290
291 auto temp_pos_nl = input.find('\n', ipos);
292 if (temp_pos_nl > ipos && temp_pos_nl < fpos &&
293 temp_pos_nl != std::string::npos) {
294 output += prefix + input.substr(ipos, temp_pos_nl - ipos);
295 ipos = temp_pos_nl + 1;
296 fpos = ipos + length;
297 continue;
298 }
299
300 if (fpos >= input.length()) {
301 output += prefix + input.substr(ipos, fpos - ipos);
302 break;
303 }
304
305 auto temp_pos = input.rfind(' ', fpos);
306 if (temp_pos <= ipos || temp_pos == std::string::npos) {
307 output += prefix + input.substr(ipos, fpos - ipos);
308 ipos = fpos;
309 fpos = ipos + length;
310 } else {
311 output += prefix + input.substr(ipos, temp_pos - ipos);
312 ipos = temp_pos + 1;
313 fpos = ipos + length;
314 }
315 }
316 return output;
317}
318
319//==============================================================================
321inline std::string int_to_roman(int a) {
322 if (a < 0)
323 return "-" + int_to_roman(-a);
324 if (a > 3999)
325 return std::to_string(a);
326 static const std::string M[] = {"", "M", "MM", "MMM"};
327 static const std::string C[] = {"", "C", "CC", "CCC", "CD",
328 "D", "DC", "DCC", "DCCC", "CM"};
329 static const std::string X[] = {"", "X", "XX", "XXX", "XL",
330 "L", "LX", "LXX", "LXXX", "XC"};
331 static const std::string I[] = {"", "I", "II", "III", "IV",
332 "V", "VI", "VII", "VIII", "IX"};
333 return M[a / 1000] + C[(a % 1000) / 100] + X[(a % 100) / 10] + I[(a % 10)];
334}
335
336} // namespace qip
qip library: A collection of useful functions
Definition Array.hpp:9
auto closest_match(std::string_view test_string, const std::vector< std::string > &list)
Finds the closest match in list to test_string (return iterator)
Definition String.hpp:220
std::string concat(const std::vector< std::string > &v, const std::string &delim="")
Takes vector of strings, concats into single string, with optional delimeter.
Definition String.hpp:267
auto Levenstein(std::string_view a, std::string_view b)
A simple non-optimised implementation of the Levenshtein distance.
Definition String.hpp:163
bool string_is_integer(std::string_view s)
Checks if a string-like s is integer-like (including -)
Definition String.hpp:245
std::vector< std::string > split(const std::string &s, char delim=' ')
Splits a string by delimeter into a vector.
Definition String.hpp:256
bool ci_contains(const std::string &the_string, const std::string &sub_string)
Checks if the_string (arg1) constaints sub_string (arg2), case insensitive.
Definition String.hpp:104
auto ci_Levenstein(std::string_view a, std::string_view b)
A simple non-optimised implementation of the Levenshtein distance (case insensitive)
Definition String.hpp:193
bool ci_wc_compare(std::string_view s1, std::string_view s2)
Compares two strings, s1 and s2. s2 may contain ONE wildcard ('*') which will match anything....
Definition String.hpp:139
std::string int_to_roman(int a)
Converts integer, a, to Roman Numerals. Assumed that |a|<=4000.
Definition String.hpp:321
std::string fstring(const std::string format,...)
Returns a formatted std::string, with formatting printf-like commands. Note: maximum string lenth is ...
Definition String.hpp:18
bool ci_compare(std::string_view s1, std::string_view s2)
Case insensitive string compare. Essentially: LowerCase(s1)==LowerCase(s2)
Definition String.hpp:131
bool wildcard_compare(std::string_view s1, std::string_view s2)
Compares two strings, s1 and s2. s2 may contain ONE wildcard ('*') which will match anything.
Definition String.hpp:62
bool contains(std::string_view the_string, std::string_view sub_string)
Checks if the_string (arg1) constaints sub_string (arg2)
Definition String.hpp:99
auto compare(const std::vector< T > &first, const std::vector< T > &second)
Directly compare two arithmetic vectors of the same type and length. Returns pair {delta,...
Definition Vector.hpp:33
auto ci_closest_match(std::string_view test_string, const std::vector< std::string > &list)
Finds the closest match (case insensitive) in list to test_string (return iterator)
Definition String.hpp:230
std::string wrap(const std::string &input, std::size_t at=80, const std::string &prefix="")
Wraps the string, 'input', at line 'at'. Optionally appends a prefix 'prefix' to each line....
Definition String.hpp:281
char tolower(char ch)
return static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
Definition String.hpp:85