Parse Html with Boost Spirit X3
I know this error comes out because of my parser html_element_ references tag_block_, and tag_block_ references html_element_, but I don t know how to make it work.

#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
using namespace boost::spirit::x3;
struct tag_name{};
struct html_tag;
struct html_comment;
struct attribute_data : boost::spirit::x3::position_tagged {
  std::string name;
  boost::optional<std::string> value;

struct tag_header :  boost::spirit::x3::position_tagged {
  std::string name;
  std::vector<attribute_data> attributes;

struct self_tag: boost::spirit::x3::position_tagged {
  tag_header header;

struct html_element : boost::spirit::x3::position_tagged, boost::spirit::x3::variant< std::string, self_tag, boost::recursive_wrapper<html_tag>>{
  using base_type::base_type;
  using base_type::operator=;

struct html_tag: boost::spirit::x3::position_tagged {
  tag_header header;
  std::vector<html_element> children;

BOOST_FUSION_ADAPT_STRUCT(attribute_data, name, value);
BOOST_FUSION_ADAPT_STRUCT(tag_header, name, attributes);
BOOST_FUSION_ADAPT_STRUCT(self_tag, header);

// These are the attributes parser, seems fine
struct attribute_parser_id;
auto attribute_identifier_= rule<attribute_parser_id, std::string>{"AttributeIdentifier"} = lexeme[+(char_ - char_(" /=>"))];
auto attribute_value_= rule<attribute_parser_id, std::string>{"AttributeValue"} =
                           lexeme[""" > +(char_ - char_(""")) > """]|lexeme[" " > +(char_ - char_(" ")) > " "]|
                           lexeme[+(char_ - char_(" />"))];
auto single_attribute_ = rule<attribute_parser_id, attribute_data>{"SingleAttribute"} = attribute_identifier_ > -("=">  attribute_value_);
auto attributes_ = rule<attribute_parser_id, std::vector<attribute_data>>{"Attributes"} = (*single_attribute_);

struct tag_parser_id;

auto tag_name_begin_func = [](auto &ctx){
  get<tag_name>(ctx) = _attr(ctx).name;
  //_val(ctx).header.name = _attr(ctx);
  std::cout << typeid(_val(ctx)).name() << std::endl;

auto tag_name_end_func = [](auto &ctx){
  _pass(ctx) = get<tag_name>(ctx) == _attr(ctx);

auto self_tag_name_action = [](auto &ctx){
  _val(ctx).header.name = _attr(ctx);
auto self_tag_attribute_action = [](auto &ctx){
  _val(ctx).header.attributes = _attr(ctx);

auto inner_text = lexeme[+(char_- < )];
auto tag_name_ = rule<tag_parser_id, std::string>{"HtmlTagName"} = lexeme[*(char_ - char_(" />"))];
auto self_tag_ = rule<tag_parser_id, self_tag>{"HtmlSelfTag"} =  <  > tag_name_[self_tag_name_action] > attributes_[self_tag_attribute_action] > "/>";
auto tag_header_ = rule<tag_parser_id, tag_header>{"HtmlTagBlockHeader"} =  <  > tag_name_ > attributes_ >  > ;

rule<tag_parser_id, html_tag> tag_block_;

rule<tag_parser_id, html_element> html_element_ = "HtmlElement";

auto tag_block__def = with<tag_name>(std::string())[tag_header_[tag_name_begin_func] > (*html_element_) > "</" > omit[tag_name_[tag_name_end_func]] >  > ];
auto html_element__def = inner_text | self_tag_ | tag_block_ ;

BOOST_SPIRIT_DEFINE(tag_block_, html_element_);
int main()
  std::string source = "<div data-src="https://www.google.com" id= hello world ></div>";
  html_element result;
  auto const parser = html_element_;
  auto parse_result = phrase_parse(source.begin(), source.end(), parser, ascii::space, result);

On reading, the first thing I notice is that self_tag_ uses expectation points. That won t fly because it is ordered before other things that can legally start with <, like tag_block_:

auto html_element__def = inner_text | self_tag_ | tag_block_ ;


Many places use operator+ where operator* is required, like:

auto inner_text = lexeme[*(char_- < )];


auto inner_text = lexeme[*~char_( < )];
    = lexeme[*~char_(" />")];

Cleanup Exercism

首先,清理。 这通过在<条码>*html_element_内<条/条码>发表评论,打破了模板即时深度的障碍。 但是,首先要看什么是行之有效的:

Live On Coliru

#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iomanip>
#include <iostream>

//// Unused mixin disabled for simplicity
// #include <boost/spirit/home/x3/support/ast/position_tagged.hpp>

namespace x3 = boost::spirit::x3;
using namespace std::string_literals;

namespace Ast {
    struct tag_name {};
    struct html_tag;
    struct html_comment;

    // using mixin = x3::position_tagged;
    struct mixin {};

    struct attribute_data : mixin {
        std::string                  name;
        boost::optional<std::string> value;
    using attribute_datas = std::vector<attribute_data>;

    struct tag_header : mixin {
        std::string     name;
        attribute_datas attributes;

    struct self_tag : mixin {
        tag_header header;

    using element_base =
        x3::variant<std::string, self_tag, boost::recursive_wrapper<html_tag>>;

    struct html_element : mixin , element_base {
        using element_base::element_base;
        using element_base::operator=;

    using html_elements = std::vector<html_element>;

    struct html_tag : mixin {
        tag_header    header;
        html_elements children;
} // namespace Ast

BOOST_FUSION_ADAPT_STRUCT(Ast::attribute_data, name, value)
BOOST_FUSION_ADAPT_STRUCT(Ast::tag_header, name, attributes)
BOOST_FUSION_ADAPT_STRUCT(Ast::self_tag, header)
BOOST_FUSION_ADAPT_STRUCT(Ast::html_tag, header, children)

namespace Parser {
    auto attribute_identifier_                                                         //
        = x3::rule<struct AttributeIdentifier_tag, std::string>{"AttributeIdentifier"} //
        = x3::lexeme[+~x3::char_(" /=>")];

    auto attribute_value_                                                    //
        = x3::rule<struct AttributeValue_tag, std::string>{"AttributeValue"} //
    = x3::lexeme                                                             //
        [( "  > *~x3::char_( " ) >  " )                                      //
         | (" " > *~x3::char_(" ") > " ")                                    //
         | *~x3::char_(" />")                                                //

    auto single_attribute_ =
        x3::rule<struct attribute_identifier__tag, Ast::attribute_data>{"SingleAttribute"} //
        = attribute_identifier_ >> -("=" >> attribute_value_);

    auto attributes_                                                              //
        = x3::rule<struct attribute_data_tag, Ast::attribute_datas>{"Attributes"} //
        = *single_attribute_;

    [[maybe_unused]] static auto& header_of(x3::unused_type) {
        thread_local Ast::tag_header s_dummy;
        return s_dummy;
    [[maybe_unused]] static auto& header_of(Ast::html_tag& ht) {
        return ht.header;

    auto tag_name_begin_func = [](auto &ctx){
        get<Ast::tag_name>(ctx) = _attr(ctx).name;
        // header_of(_val(ctx)).name = _attr(ctx);
        // std::cout << typeid(_val(ctx)).name() << std::endl;

    auto tag_name_end_func         = [](auto& ctx){ _pass(ctx) = (get<Ast::tag_name>(ctx) == _attr(ctx)); };
    auto self_tag_name_action      = [](auto &ctx){ header_of(_val(ctx)).name = _attr(ctx); };
    auto self_tag_attribute_action = [](auto& ctx) { header_of(_val(ctx)).attributes = _attr(ctx); };

    auto tag_name_                                                     //
        = x3::rule<struct HtmlTagName_tag, std::string>{"HtmlTagName"} //
        = x3::lexeme[*~x3::char_(" />")];

    auto self_tag_                                                       //
        = x3::rule<struct HtmlSelfTag_tag, Ast::self_tag>{"HtmlSelfTag"} //
        =  <  >> tag_name_[self_tag_name_action] >> attributes_[self_tag_attribute_action] >> "/>";

    auto tag_header_                                                                     //
        = x3::rule<struct HtmlTagBlockHeader_tag, Ast::tag_header>{"HtmlTagBlockHeader"} //
        =  <  >> tag_name_ >> attributes_ >>  > ;

    x3::rule<struct tag_block__tag, Ast::html_tag>        tag_block_    = "TagBlock";
    x3::rule<struct html_element__tag, Ast::html_element> html_element_ = "HtmlElement";

    auto tag_block__def = x3::with<Ast::tag_name>(""s)                        //
        [                                                                     //
            tag_header_[tag_name_begin_func] >> /**html_element_ >>*/ "</" >> //
            x3::omit[tag_name_[tag_name_end_func]] >>  >                      //

    auto inner_text        = x3::lexeme[*~x3::char_( < )];
    auto html_element__def = inner_text | self_tag_ | tag_block_;

    BOOST_SPIRIT_DEFINE(tag_block_, html_element_)

namespace unit_tests {
    template <bool ShouldSucceed = true, typename P>
    void test(P const& rule, std::initializer_list<std::string_view> cases) {
        for (auto input : cases) {
            if constexpr (ShouldSucceed) {
                typename x3::traits::attribute_of<P, x3::unused_type>::type result;

                auto ok = phrase_parse(input.begin(), input.end(), rule, x3::space, result);
                std::cout << quoted(input) << " -> " << (ok ? "Ok" : "FAILED") << std::endl;
            } else {
                auto ok = phrase_parse(input.begin(), input.end(), rule, x3::space);
                if (!ok)
                    std::cout << "Fails as expected: " << quoted(input) << std::endl;
                    std::cout << "SHOULD HAVE FAILED: " << quoted(input) << std::endl;

int main() {
                         R"(<simple foo="" bar=   value-less qux=bareword/>)",
                         R"(<div />)",
                         R"(< div/>)",

                         R"(<simple foo="" bar=   value-less qux=bareword></simple>)",
                         R"(<div ></div>)",
                         R"(< div></div>)",
                         R"(< div ></div>)",
                         R"(<div data-src="https://www.google.com" id= hello world ></div>)",

                         R"(<div></ div>)",
                         R"(<div></ div >)",

                                R"(<div/ >)",
                                R"(<div>< /div>)",


"<simple foo="" bar=   value-less qux=bareword/>" -> Ok   
"<div />" -> Ok
"<div/>" -> Ok
"< div/>" -> Ok
"<simple foo="" bar=   value-less qux=bareword></simple>" -> Ok
"<div ></div>" -> Ok
"<div></div>" -> Ok
"< div></div>" -> Ok
"< div ></div>" -> Ok
"<div data-src="https://www.google.com" id= hello world ></div>" -> Ok
"<div></ div>" -> Ok
"<div></ div >" -> Ok
Fails as expected: "<div/ >"
Fails as expected: "<div>< /div>"
Fails as expected: "<div></dov>"

What Is The Trouble


真正的理由是:<代码>随附和;>扩展了背景。 这意味着,每一级再入侵为背景类型增加了更多的数据,造成新的模板即时。


auto tag_block__def =                                             //
    tag_header_[tag_name_begin_func] >> *html_element_ >> "</" >> //
    x3::omit[tag_name_[tag_name_end_func]] >>  >                  //

auto inner_text        = x3::lexeme[*~x3::char_( < )];
auto html_element__def = inner_text | self_tag_ | tag_block_;
auto start             = x3::with<Ast::tag_name>(""s)[html_element_];

然而,这突出表明了各要素可以确定的问题,而且当内部的标签超越了<代码>tag_name的背景数据时,便无用。 因此,取代<条码>载 我们可以将其编成<代码>stack<string>:

auto start = x3::with<tag_stack>(std::stack<std::string>{})[html_element_];


auto tag_name_begin_func = [](auto& ctx) { get<tag_stack>(ctx).push(_attr(ctx).name); };

auto tag_name_end_func = [](auto& ctx) {
    auto& s    = get<tag_stack>(ctx);
    _pass(ctx) = (s.top() == _attr(ctx));

Live On Coliru

#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iomanip>
#include <iostream>
#include <stack>

//// Unused mixin disabled for simplicity
// #include <boost/spirit/home/x3/support/ast/position_tagged.hpp>

namespace x3 = boost::spirit::x3;
using namespace std::string_literals;

namespace Ast {
    struct html_tag;
    struct html_comment;

    // using mixin = x3::position_tagged;
    struct mixin {};

    struct attribute_data : mixin {
        std::string                  name;
        boost::optional<std::string> value;
    using attribute_datas = std::vector<attribute_data>;

    struct tag_header : mixin {
        std::string     name;
        attribute_datas attributes;

    struct self_tag : mixin {
        tag_header header;

    using element_base =
        x3::variant<std::string, self_tag, boost::recursive_wrapper<html_tag>>;

    struct html_element : mixin , element_base {
        using element_base::element_base;
        using element_base::operator=;

    using html_elements = std::vector<html_element>;

    struct html_tag : mixin {
        tag_header    header;
        html_elements children;
} // namespace Ast

BOOST_FUSION_ADAPT_STRUCT(Ast::attribute_data, name, value)
BOOST_FUSION_ADAPT_STRUCT(Ast::tag_header, name, attributes)
BOOST_FUSION_ADAPT_STRUCT(Ast::self_tag, header)
BOOST_FUSION_ADAPT_STRUCT(Ast::html_tag, header, children)

namespace Parser {
    struct tag_stack final {};

    auto attribute_identifier_                                                         //
        = x3::rule<struct AttributeIdentifier_tag, std::string>{"AttributeIdentifier"} //
        = x3::lexeme[+~x3::char_(" /=>")];

    auto attribute_value_                                                    //
        = x3::rule<struct AttributeValue_tag, std::string>{"AttributeValue"} //
    = x3::lexeme                                                             //
        [( "  > *~x3::char_( " ) >  " )                                      //
         | (" " > *~x3::char_(" ") > " ")                                    //
         | *~x3::char_(" />")                                                //

    auto single_attribute_ =
        x3::rule<struct attribute_identifier__tag, Ast::attribute_data>{"SingleAttribute"} //
        = attribute_identifier_ >> -("=" >> attribute_value_);

    auto attributes_                                                              //
        = x3::rule<struct attribute_data_tag, Ast::attribute_datas>{"Attributes"} //
        = *single_attribute_;

    [[maybe_unused]] static auto& header_of(x3::unused_type) {
        thread_local Ast::tag_header s_dummy;
        return s_dummy;
    [[maybe_unused]] static auto& header_of(Ast::html_tag& ht) {
        return ht.header;

    auto tag_name_begin_func = [](auto& ctx) { get<tag_stack>(ctx).push(_attr(ctx).name); };

    auto tag_name_end_func = [](auto& ctx) {
        auto& s    = get<tag_stack>(ctx);
        _pass(ctx) = (s.top() == _attr(ctx));
    auto assign_name  = [](auto& ctx) { header_of(_val(ctx)).name = _attr(ctx); };
    auto assign_attrs = [](auto& ctx) { header_of(_val(ctx)).attributes = _attr(ctx); };
    auto tag_name_                                                     //
        = x3::rule<struct HtmlTagName_tag, std::string>{"HtmlTagName"} //
        = x3::lexeme[*~x3::char_(" />")];

    auto self_tag_                                                       //
        = x3::rule<struct HtmlSelfTag_tag, Ast::self_tag>{"HtmlSelfTag"} //
        =  <  >> tag_name_[assign_name] >> attributes_[assign_attrs] >> "/>";

    auto tag_header_                                                                     //
        = x3::rule<struct HtmlTagBlockHeader_tag, Ast::tag_header>{"HtmlTagBlockHeader"} //
        =  <  >> tag_name_ >> attributes_ >>  > ;

    x3::rule<struct tag_block__tag, Ast::html_tag>        tag_block_    = "TagBlock";
    x3::rule<struct html_element__tag, Ast::html_element> html_element_ = "HtmlElement";

    auto tag_block__def =                                             //
        tag_header_[tag_name_begin_func] >> *html_element_ >> "</" >> //
        x3::omit[tag_name_[tag_name_end_func]] >>  >                  //

    auto inner_text        = x3::lexeme[*~x3::char_( < )];
    auto html_element__def = inner_text | self_tag_ | tag_block_;
    auto start             = x3::with<tag_stack>(std::stack<std::string>{})[html_element_];

    BOOST_SPIRIT_DEFINE(tag_block_, html_element_)

namespace unit_tests {
    template <bool ShouldSucceed = true, typename P>
    void test(P const& rule, std::initializer_list<std::string_view> cases) {
        for (auto input : cases) {
            if constexpr (ShouldSucceed) {
                typename x3::traits::attribute_of<P, x3::unused_type>::type result;

                auto ok = phrase_parse(input.begin(), input.end(), rule, x3::space, result);
                std::cout << quoted(input) << " -> " << (ok ? "Ok" : "FAILED") << std::endl;
            } else {
                auto ok = phrase_parse(input.begin(), input.end(), rule, x3::space);
                if (!ok)
                    std::cout << "Fails as expected: " << quoted(input) << std::endl;
                    std::cout << "SHOULD HAVE FAILED: " << quoted(input) << std::endl;

int main() {
                         R"(<simple foo="" bar=   value-less qux=bareword/>)",
                         R"(<div />)",
                         R"(< div/>)",

                         R"(<simple foo="" bar=   value-less qux=bareword></simple>)",
                         R"(<div ></div>)",
                         R"(< div></div>)",
                         R"(< div ></div>)",
                         R"(<div data-src="https://www.google.com" id= hello world ></div>)",

                         R"(<div></ div>)",
                         R"(<div></ div >)",

                         R"(<div><nest/><nest some="more">yay</nest></div>)",

                                R"(<div/ >)",
                                R"(<div>< /div>)",


"<simple foo="" bar=   value-less qux=bareword/>" -> Ok
"<div />" -> Ok
"<div/>" -> Ok
"< div/>" -> Ok
"<simple foo="" bar=   value-less qux=bareword></simple>" -> Ok
"<div ></div>" -> Ok
"<div></div>" -> Ok
"< div></div>" -> Ok
"< div ></div>" -> Ok
"<div data-src="https://www.google.com" id= hello world ></div>" -> Ok
"<div></ div>" -> Ok
"<div></ div >" -> Ok
"<div><nest/><nest some="more">yay</nest></div>" -> Ok
Fails as expected: "<div/ >"
Fails as expected: "<div>< /div>"
Fails as expected: "<div></dov>"


我回答这一假设,即你正在这样做,以学习X3。 否则,唯一的建议是:do not do this。 使用图书馆。

不仅你的克马法教的教职工作非常差,在野外,会完全失败。 封闭标签不是以超文本形式提供的(“立克式”)。 文本、CDATA、实体参考资料、Unicode、越狱都会破坏你的主食。

奥赫里,你是否注意到你通过采取一些调适行动来打破传播的归属感? 我可以向各位展示如何加以确定,但我认为,我现在不这样做。


This initial solution to the problem of, among other things, matching begin/end tags, is greatly simplified here The simplification solely focuses on the "matching begin/end tags" subpart of the problem. The simplification makes no attempt at parsing strings, instead it simply parses x3:uint_. This is sufficient to illustrate a solution to the subpart of the problem because the essence of the subpart problem is matching begin tags with end tags. More specifically, the problem of inferring that the attribute of this expression:

      (   <  
      >> tag_name_
      >>  > 


      (  "</"
      >> tag_name_ 
      >>  > 

is much visually simpler than inferring that the attribute of this expression:

    auto tag_name_                                                     //
        = x3::rule<struct HtmlTagName_tag, std::string>{"HtmlTagName"} //
        = x3::lexeme[*~x3::char_(" />")];


        "</" >> //
        x3::omit[tag_name_[tag_name_end_func]] >>  >                  //  

The latter 2, visually complicated, expressions were copy&pasted from here.

Furthermore, tag_name_ and inner_text are also much simpler. The original:

   auto tag_name_                                                     //
       = x3::rule<struct HtmlTagName_tag, std::string>{"HtmlTagName"} //
       = x3::lexeme[*~x3::char_(" />")];
   auto inner_text        = x3::lexeme[*~x3::char_( < )];

is obviously and distractedly more complicated than the simplified solution:

    auto tag_name_
        = x3::uint_;
    auto inner_text        = x3::uint_;

Now, the reader may note, that the original solution contained several statements which Seth called "immediately-defined rules". An "immediately-define rule" pattern maybe "abstracted" as:

    auto RuleDef
      = x3::rule<struct RuleTag, RuleAttribute>{"RuleName"}
      = RuleRhs;

in this abstraction the camel case identifiers are pattern parameters which are replaced to create an actual instance of an immediately-defined rule, somewhat like when template s expressions are instantiated. In the above tag_name_ instance, the following replacements were made:

  RuleDef -> tag_name_
  RuleTag -> HtmlTagName_tag
  RuleAttribute -> std::string
  RuleName -> HtmlTagName
  RuleRhs -> x3::lexeme[*~x3::char_(" />")] 

But, what s the purpose of an immediately-defined rule? Well, one reason is for converting attribute of the RuleRhs to the RuleAttribute, as shown here. (The example may be a bit hard to understand because the immediately-defined rule is obscured by being within the expression forming the parser argument to the parse function.)

However, there s no need for such conversions in the simplification; hence, all the immediately-defined rules were removed as a further simplification.

Furthermore, the claim that inner_text requires * instead of + is wrong. Using * results in html_element_ always choosing inner_text but not consuming any input if the input starts with < . This results in infinite loop.


namespace unit_tests 
      < bool ShouldSucceed = true
      , typename P
      ( P const& start
      , std::initializer_list<std::string> cases
          std::cout<<__func__ <<":ShouldSucceed="<<ShouldSucceed<<";
          using aof_parser=typename x3::traits::attribute_of<P, x3::unused_type>::type;
          for (auto input : cases) 
              auto first=input.begin();
              auto const last=input.end();
              auto ok = phrase_parse(first, last, start, x3::space, attr_actual);
              std::string input_end(first,last);
              auto at_end=input_end.empty();
              bool success=ok && at_end;
              if constexpr (ShouldSucceed) 
                  if (success)
                      std::cout << ":Yes Ok,succeeded and should have succeeded.";
                      std::cout << ":Not Ok,failed but should have succeeded!";
                  std::cout << std::endl;
                  if (!ok)
                      std::cout << ":Yes Ok,failed and should have failed.";
                      std::cout << ":Not Ok,succeeded but should have failed! ";
                  std::cout << std::endl;
    template <bool ShouldSucceed = true, typename P>
    void test_with_stack
    ( P const& rule
    , std::initializer_list<std::string> cases
        auto start=
      < bool ShouldSucceed=true
      , typename Parser
      ( Parser const& parser
      , std::initializer_list<std::string> cases
      ; test_with_stack
        < ShouldSucceed
        ( parser
        , cases





to see the problem. But be prepared to kill the program because, when using * in inner_text, you ll get an infinite loop.

#define BOOST_SPIRIT_X3_DEBUG

