I need to write a constexpr
parser for string literal into an std::array
of std::uint8_t
in C++17.
By parsing, I mean converting string literals of hex octets into std::array
like this:
constexpr auto arr1 = MakeLiteralArray("aafc");
static_assert(0xAA == arr1[0]);
static_assert(0xFC == arr1[1]);
I have a solution for simple cases already (full code):
namespace _internal {
static constexpr auto HexToDec(const char* str, std::size_t size) {
constexpr auto base = 16;
std::uint64_t result = 0;
for (std::size_t i = 0; i < size; ++i) {
if (str[i] == '\0') {
break;
}
if ((str[i] >= '0') && (str[i] <= '9')) {
result = result * base + static_cast<std::uint64_t>(str[i] - '0');
}
if ((str[i] >= 'a') && (str[i] <= 'f')) {
result = result * base + static_cast<std::uint64_t>(str[i] - 'a' + 10);
}
if ((str[i] >= 'A') && (str[i] <= 'F')) {
result = result * base + static_cast<std::uint64_t>(str[i] - 'A' + 10);
}
}
return result;
}
template <std::size_t StrSize, std::size_t Index>
static constexpr auto StringIterator(const char (&str)[StrSize]) {
constexpr auto str_index = Index * 2;
return static_cast<std::uint8_t>(HexToDec(&str[str_index], 2));
}
template <std::size_t StrSize, std::size_t... Is>
static constexpr auto FillArray(const char (&str)[StrSize],
std::index_sequence<Is...> const&) {
return std::array{StringIterator<StrSize, Is>(str)...};
}
} // namespace _internal
template <std::size_t N>
struct LiteralArray {
std::array<std::uint8_t, N> value_{};
constexpr explicit LiteralArray(std::array<std::uint8_t, N> value)
: value_{std::move(value)} {}
constexpr operator std::array<std::uint8_t, N>() const { return value_; }
operator std::vector<std::uint8_t>() const {
return {value_.begin(), value_.end()};
}
};
template <std::size_t Size>
constexpr auto MakeLiteralArray(const char (&str)[Size]) {
constexpr auto N =
static_cast<std::size_t>(gcem::ceil(static_cast<float>(Size - 1) / 2));
return LiteralArray<N>{
_internal::FillArray<Size>(str, std::make_index_sequence<N>{})};
}
I want to extend this to be able parse UID strings like "f81d4fae-7dec-11d0-a765-00a0c91e6bf6"
from rfc4122.
I don't want a specialized parser for UID only, I want a general solution suitable for strings like "af1c-dc09"
or "55-ffffff"
.
The problem I can't solve is how to avoid '-'
and stay in the constexpr
context?
C++20's consteval
is maybe the solution, but I'm stuck with C++17.
Here is another possibility starting from Jarod42's suggestion about using some char_sequence<Cs...>
class for encoding literal string information as type information (see this)
A version of char_sequence<Cs...>
might be like this
template <char...Cs> struct char_sequence {
template <char c, char skip='\0'>
using push_back = std::conditional_t<
c=='\0' or c==skip,
char_sequence<Cs...>,
char_sequence<Cs..., c>
>;
constexpr static std::array<char,sizeof...(Cs)> value = { Cs...};
};
where a skip
character can be provided (-
in the context of this post). There is also a slight modification of the original version that get rid of \0
characters during a push_back
. Eventually, a value
array holds the characters, which makes it possible to define a to_hexa
function that will transform the char sequence into hexa numbers
template <std::size_t N>
constexpr auto to_hexa (std::array<char,N> const& in, std::uint8_t base=16) {
static_assert (N%2==0);
std::array<std::uint8_t, N/2> res = {};
for (std::size_t i=0; i<N; i+=2) {
res[i/2] = convert(in[i+0])*base + convert(in[i+1]);
}
return res;
}
The usage is as follows
// we get a char_sequence type from a literal sequence, without skipped '-'
#define MakeLiteralArray(S) to_hexa(char_sequence<> PUSH_BACK_128(S,0,'-')::value)
constexpr auto foo = MakeLiteralArray("af1c-dc-09");
static_assert (foo.size()==4);
static_assert (0xAF == foo[0]);
static_assert (0x1C == foo[1]);
static_assert (0xDC == foo[2]);
static_assert (0x09 == foo[3]);
where PUSH_BACK_128
is almost the same as Jarod42's original version.