I'm trying parse and break-down URL into parts using boost::spirit::x3 as below:
#include <iostream>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/home/x3.hpp>
struct UrlParts { std::string prefix, host, suffix; };
BOOST_FUSION_ADAPT_STRUCT(UrlParts, prefix, host, suffix)
UrlParts parseSpirit(std::string_view input) {
namespace x3 = boost::spirit::x3;
static const auto scheme_ = (x3::raw[+x3::char_("a-zA-Z0-9+.-") >> "://"]);
static const auto userinfo_ = (x3::raw[+~x3::char_("@") >> "@"]);
static const auto prefix_ = (-scheme_ >> -userinfo_);
static const auto port_ = (x3::raw[':' >> -x3::repeat(1, 5)[x3::digit] >> &(x3::char_("/?#") | x3::eoi)]);
static const auto host_ = (+(x3::char_("a-fxXA-F0-9:.") - port_));
static const auto path_ = (x3::char_("/?#") >> *x3::char_); // to store path+query+fragment
static const auto suffix_ = (-port_ >> -path_);
//static const auto url = x3::rule<class url, UrlParts>() = -prefix_ >> ('[' >> host_ >> ']' | host_) >> -suffix_;
static const auto url = -prefix_ >> ('[' >> host_ >> ']' | host_) >> -suffix_; // prefix & suffix are optional but host is required
// BOOST_SPIRIT_DEBUG_NODES((scheme_)(userinfo_)(host_)(port_)(path_)(url));
// Parse the input
auto iter = input.begin();
auto end = input.end();
UrlParts parts;
auto attr = std::tie(parts.prefix, parts.host, parts.suffix);
//parse(input.begin(), input.end(), x3::eps >> url >> x3::eoi, parts);
bool ret = x3::parse(iter, end, url >> x3::eoi, attr);
if (!ret) {
std::cout << "Parsing failed" << std::endl;
}
return parts;
}
int main()
{
for (auto input : {"http://usr:pwd@192.168.1.1:8080/file.php?abc=1#23",
"http://[::ffff:192.168.1.1]:8080/file.php?abc=1#23",
"http://::ffff:192.168.1.1/file.php?abc=1#23",
"::ffff:192.168.1.1"
}) {
std::cout << "Input: " << input << std::endl;
auto parts = parseSpirit("http://usr:pwd@192.168.1.1/file.php?abc=1");
std::cout << "Output: Prefix: " << parts.prefix << ", Host: " << parts.host << ", Suffix: " << parts.suffix << std::endl;
std::cout << "================" << std::endl;
}
return 0;
}
But above code fails to compile with error:
/usr/include/boost/spirit/home/x3/operator/detail/sequence.hpp:140:25: error: static assertion failed: Size of the passed attribute is less than expected.
140 | actual_size >= expected_size
| ~~~~~~~~~~~~^~~~~~~~~~~~~~~~
/usr/include/boost/spirit/home/x3/operator/detail/sequence.hpp:140:25: note: ‘(((int)boost::spirit::x3::detail::partition_attribute >, boost::spirit::x3::literal_string > > >, boost::spirit::x3::optional > >, boost::spirit::x3::literal_char > > > > >, boost::spirit::x3::alternative, boost::spirit::x3::plus, boost::spirit::x3::raw_directive, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > > > >, boost::spirit::x3::literal_char >, boost::spirit::x3::plus, boost::spirit::x3::raw_directive, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > > > > >, boost::spirit::x3::optional, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > >, boost::spirit::x3::optional, boost::spirit::x3::kleene > > > > >, std::tuple, std::allocator >&, std::__cxx11::basic_string, std::allocator >&, std::__cxx11::basic_string, std::allocator >&>, boost::spirit::x3::unused_type, void>::actual_size) >= ((int)boost::spirit::x3::detail::partition_attribute >, boost::spirit::x3::literal_string > > >, boost::spirit::x3::optional > >, boost::spirit::x3::literal_char > > > > >, boost::spirit::x3::alternative, boost::spirit::x3::plus, boost::spirit::x3::raw_directive, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > > > >, boost::spirit::x3::literal_char >, boost::spirit::x3::plus, boost::spirit::x3::raw_directive, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > > > > >, boost::spirit::x3::optional, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > >, boost::spirit::x3::optional, boost::spirit::x3::kleene > > > > >, std::tuple, std::allocator >&, std::__cxx11::basic_string, std::allocator >&, std::__cxx11::basic_string, std::allocator >&>, boost::spirit::x3::unused_type, void>::expected_size))’ evaluates to false
Any suggestions on what's wrong ? Similar code with boost::spirit::qi
works fine.
Additionally, I'm interested in learning if there is a more efficient way of doing this, eg: using string_view instead of string, since all the 3 parts are present in input view.
Thanks in advance!
It means the attributes are not detected as compatible for the parser expression. See it broken in 1.76:
The first problem I see is trying to synthesize a single string (prefix
) out of two raw[]
directives. That's... not gonna work.
The good news is it works (again?) starting with 1.77, but in general consider being a bit more explicit with the attribute compatibilty, like
as_type
from this answer Understanding the List Operator (%) in Boost.Spirit, or the many others https://stackoverflow.com/search?tab=newest&q=user%3a85371%20x3%20as_type&searchOn=3 or even more if you look for the nameas
which I usually prefer... https://stackoverflow.com/search?q=user%3A85371+x3+as+x3%3A%3Arule
That said, when I look at your code, you bind a manually tied tuple AND somehow still adapt the struct? That's redundant. I'd assume you want to just tie manually, so drop the adaptation.
Next up, by all means, don't use a parser combinator library as a tokenizer. I.e., don't randomly bunch together unrelated productions (suffix really should not contain the port specification).
Also, parse into a real port number using... an integer parser. Certainly if you're going to be pedantic strict about the number of digits allowed anyways! See here:
auto portnum_ = x3::uint_parser<uint16_t, 10, 1, 5>{};
auto portspec_ = ':' >> portnum_ >> &(x3::char_("/?#") | x3::eoi);
Be careful with double optionality. E.g. Since suffix_ = -port_ >> -path_
literally has only optional elements, the expression -suffix_
at best has the same meaning as suffix_
. However, there are lots of situations (optional repeating constructs) where you will get infinite loops of zero-length matches.
I suppose /
or #
should also end the user info production.
Not everything needs to be raw
. E.g. I'd prefer
auto userinfo_ = +~x3::char_("@/#") >> x3::char_("@");
for the userinfo. In fact, if you do
auto authority_ = ('[' >> host_ >> ']' | host_) >> -portspec_;
You will correctly get the parsed host without the []
brackets, which have meaning ONLY in the URI grammar.
You had the end-of-input validation commented out, let's re-enable those. Let's also return more richt information (including valid
flag) and a lot more test cases:
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
struct UrlParts { std::string prefix, host, suffix; };
UrlParts parseSpirit(std::string_view input) {
namespace x3 = boost::spirit::x3;
auto scheme_ = x3::raw[+x3::char_("a-zA-Z0-9+.-") >> "://"];
auto userinfo_ = x3::raw[+~x3::char_("@") >> "@"];
auto prefix_ = scheme_ >> -userinfo_;
auto port_ = x3::raw[':' >> -x3::repeat(1, 5)[x3::digit] >> &(x3::char_("/?#") | x3::eoi)];
auto host_ = +(x3::char_("a-fxXA-F0-9:.") - port_);
auto path_ = x3::char_("/?#") >> *x3::char_; // to store path+query+fragment
auto suffix_ = -port_ >> -path_;
// static const auto url = = -prefix_ >> ('[' >> host_ >> ']' | host_) >>
// -suffix_;
auto url //
//= x3::rule<class url, UrlParts>() //
= -prefix_ //
//>> ('[' >> host_ >> ']' | host_) >>
// -suffix_ // prefix & suffix are optional but host is required
;
// Parse the input
auto iter = input.begin();
auto end = input.end();
UrlParts p;
auto attr = std::tie(p.prefix/*, p.host, p.suffix*/);
bool ret = x3::parse(iter, end, url >> x3::eoi, attr);
if (!ret) {
std::cout << "Parsing failed" << std::endl;
}
return p;
}
int main() {
for (auto input : {"http://usr:pwd@192.168.1.1:8080/file.php?abc=1#23",
"http://[::ffff:192.168.1.1]:8080/file.php?abc=1#23",
"http://::ffff:192.168.1.1/file.php?abc=1#23", "::ffff:192.168.1.1"}) {
std::cout << "Input: " << input << std::endl;
auto parts = parseSpirit("http://usr:pwd@192.168.1.1/file.php?abc=1");
std::cout << "Output: Prefix: " << parts.prefix << ", Host: " << parts.host << ", Suffix: " << parts.suffix << std::endl;
std::cout << "================" << std::endl;
}
}
Printing
192.168.1.1 {true, "", "", "192.168.1.1", "", unspecified}
192.168.1.1/ {true, "", "", "192.168.1.1", "/", unspecified}
192.168.1.1/file.php {true, "", "", "192.168.1.1", "/file.php", unspecified}
192.168.1.1/file.php?abc=1 {true, "", "", "192.168.1.1", "/file.php?abc=1", unspecified}
192.168.1.1:8888 {true, "", "", "192.168.1.1", "", 8888}
192.168.1.1:8888/ {true, "", "", "192.168.1.1", "/", 8888}
192.168.1.1:8888/file.php {true, "", "", "192.168.1.1", "/file.php", 8888}
192.168.1.1:8888/file.php?abc=1 {true, "", "", "192.168.1.1", "/file.php?abc=1", 8888}
::ffffff::192.168.1.1:9999/file.php?abc=1 {true, "", "", "::ffffff::192.168.1.1", "/file.php?abc=1", 9999}
http://192.168.1.1 {true, "http://", "", "192.168.1.1", "", unspecified}
http://192.168.1.1/ {true, "http://", "", "192.168.1.1", "/", unspecified}
http://192.168.1.1/file.php {true, "http://", "", "192.168.1.1", "/file.php", unspecified}
http://192.168.1.1/file.php?abc=1 {true, "http://", "", "192.168.1.1", "/file.php?abc=1", unspecified}
http://192.168.1.1:8888 {true, "http://", "", "192.168.1.1", "", 8888}
http://192.168.1.1:8888/ {true, "http://", "", "192.168.1.1", "/", 8888}
http://192.168.1.1:8888/file.php {true, "http://", "", "192.168.1.1", "/file.php", 8888}
http://192.168.1.1:8888/file.php?abc=1 {true, "http://", "", "192.168.1.1", "/file.php?abc=1", 8888}
http://::ffffff::192.168.1.1:9999/file.php?abc=1 {true, "http://", "", "::ffffff::192.168.1.1", "/file.php?abc=1", 9999}
http://sehe@192.168.1.1 {true, "http://", "sehe@", "192.168.1.1", "", unspecified}
http://sehe@192.168.1.1/ {true, "http://", "sehe@", "192.168.1.1", "/", unspecified}
http://sehe@192.168.1.1/file.php {true, "http://", "sehe@", "192.168.1.1", "/file.php", unspecified}
http://sehe@192.168.1.1:8888 {true, "http://", "sehe@", "192.168.1.1", "", 8888}
http://sehe@192.168.1.1:8888/ {true, "http://", "sehe@", "192.168.1.1", "/", 8888}
http://sehe@192.168.1.1:8888/file.php {true, "http://", "sehe@", "192.168.1.1", "/file.php", 8888}
http://usr:pwd@192.168.1.1/file.php?abc=1 {true, "http://", "usr:pwd@", "192.168.1.1", "/file.php?abc=1", unspecified}
sehe@192.168.1.1 {true, "", "sehe@", "192.168.1.1", "", unspecified}
sehe@192.168.1.1/ {true, "", "sehe@", "192.168.1.1", "/", unspecified}
sehe@192.168.1.1/file.php {true, "", "sehe@", "192.168.1.1", "/file.php", unspecified}
sehe@192.168.1.1:8888 {true, "", "sehe@", "192.168.1.1", "", 8888}
sehe@192.168.1.1:8888/ {true, "", "sehe@", "192.168.1.1", "/", 8888}
sehe@192.168.1.1:8888/file.php {true, "", "sehe@", "192.168.1.1", "/file.php", 8888}
usr:pwd@192.168.1.1/file.php?abc=1 {true, "", "usr:pwd@", "192.168.1.1", "/file.php?abc=1", unspecified}
If you must use old Boost versions, we see the problem is back...
So let's fix that by re-instating the raw[]
around the user-info just as a hint. I'd rather upgrade Boost obviously:
auto userinfo_ = x3::raw[+~x3::char_("@/#") >> x3::char_("@")];
See it live on Boost 1.76 here: https://godbolt.org/z/6bc5Ycrcr
The output is identical, md5 checksums:
c69881691195579e3184ef6024136356 1.76
c69881691195579e3184ef6024136356 1.84