On reading, the first thing I notice is that self_tag_
uses expectation points. That won t fly because it is ordered before other things that can legally start with <
, like tag_block_
:
auto html_element__def = inner_text | self_tag_ | tag_block_ ;
由于人们的期待,它永远不会倒退。
Many places use operator+
where operator*
is required, like:
auto inner_text = lexeme[*(char_- < )];
所有的这些分歧都可以作为反常的表述:
auto inner_text = lexeme[*~char_( < )];
//
= lexeme[*~char_(" />")];
除了XML对元素名称等有具体有效术语之外,我假定你明确希望避免撰写符合要求的名词。 具体来说,你确实需要将名词/价值规则等排除在外,“......” 。
其中一个星号是重新使用 par子。 就我的理解而言,这应当对立即界定的规则加以罚款,但对于界定的 through>>>>>/em. 标签类型,BOOST_SPIRIT_DEFINE。
Cleanup Exercism
首先,清理。 这通过在<条码>*html_element_内<条/条码>发表评论,打破了模板即时深度的障碍。 但是,首先要看什么是行之有效的:
Live On Coliru
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iomanip>
#include <iostream>
//// Unused mixin disabled for simplicity
// #include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
namespace x3 = boost::spirit::x3;
using namespace std::string_literals;
namespace Ast {
struct tag_name {};
struct html_tag;
struct html_comment;
// using mixin = x3::position_tagged;
struct mixin {};
struct attribute_data : mixin {
std::string name;
boost::optional<std::string> value;
};
using attribute_datas = std::vector<attribute_data>;
struct tag_header : mixin {
std::string name;
attribute_datas attributes;
};
struct self_tag : mixin {
tag_header header;
};
using element_base =
x3::variant<std::string, self_tag, boost::recursive_wrapper<html_tag>>;
struct html_element : mixin , element_base {
using element_base::element_base;
using element_base::operator=;
};
using html_elements = std::vector<html_element>;
struct html_tag : mixin {
tag_header header;
html_elements children;
};
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::attribute_data, name, value)
BOOST_FUSION_ADAPT_STRUCT(Ast::tag_header, name, attributes)
BOOST_FUSION_ADAPT_STRUCT(Ast::self_tag, header)
BOOST_FUSION_ADAPT_STRUCT(Ast::html_tag, header, children)
namespace Parser {
auto attribute_identifier_ //
= x3::rule<struct AttributeIdentifier_tag, std::string>{"AttributeIdentifier"} //
= x3::lexeme[+~x3::char_(" /=>")];
auto attribute_value_ //
= x3::rule<struct AttributeValue_tag, std::string>{"AttributeValue"} //
= x3::lexeme //
[( " > *~x3::char_( " ) > " ) //
| (" " > *~x3::char_(" ") > " ") //
| *~x3::char_(" />") //
];
auto single_attribute_ =
x3::rule<struct attribute_identifier__tag, Ast::attribute_data>{"SingleAttribute"} //
= attribute_identifier_ >> -("=" >> attribute_value_);
auto attributes_ //
= x3::rule<struct attribute_data_tag, Ast::attribute_datas>{"Attributes"} //
= *single_attribute_;
[[maybe_unused]] static auto& header_of(x3::unused_type) {
thread_local Ast::tag_header s_dummy;
return s_dummy;
}
[[maybe_unused]] static auto& header_of(Ast::html_tag& ht) {
return ht.header;
}
auto tag_name_begin_func = [](auto &ctx){
get<Ast::tag_name>(ctx) = _attr(ctx).name;
// header_of(_val(ctx)).name = _attr(ctx);
// std::cout << typeid(_val(ctx)).name() << std::endl;
};
auto tag_name_end_func = [](auto& ctx){ _pass(ctx) = (get<Ast::tag_name>(ctx) == _attr(ctx)); };
auto self_tag_name_action = [](auto &ctx){ header_of(_val(ctx)).name = _attr(ctx); };
auto self_tag_attribute_action = [](auto& ctx) { header_of(_val(ctx)).attributes = _attr(ctx); };
auto tag_name_ //
= x3::rule<struct HtmlTagName_tag, std::string>{"HtmlTagName"} //
= x3::lexeme[*~x3::char_(" />")];
auto self_tag_ //
= x3::rule<struct HtmlSelfTag_tag, Ast::self_tag>{"HtmlSelfTag"} //
= < >> tag_name_[self_tag_name_action] >> attributes_[self_tag_attribute_action] >> "/>";
auto tag_header_ //
= x3::rule<struct HtmlTagBlockHeader_tag, Ast::tag_header>{"HtmlTagBlockHeader"} //
= < >> tag_name_ >> attributes_ >> > ;
x3::rule<struct tag_block__tag, Ast::html_tag> tag_block_ = "TagBlock";
x3::rule<struct html_element__tag, Ast::html_element> html_element_ = "HtmlElement";
auto tag_block__def = x3::with<Ast::tag_name>(""s) //
[ //
tag_header_[tag_name_begin_func] >> /**html_element_ >>*/ "</" >> //
x3::omit[tag_name_[tag_name_end_func]] >> > //
];
auto inner_text = x3::lexeme[*~x3::char_( < )];
auto html_element__def = inner_text | self_tag_ | tag_block_;
BOOST_SPIRIT_DEFINE(tag_block_, html_element_)
}
namespace unit_tests {
template <bool ShouldSucceed = true, typename P>
void test(P const& rule, std::initializer_list<std::string_view> cases) {
for (auto input : cases) {
if constexpr (ShouldSucceed) {
typename x3::traits::attribute_of<P, x3::unused_type>::type result;
auto ok = phrase_parse(input.begin(), input.end(), rule, x3::space, result);
std::cout << quoted(input) << " -> " << (ok ? "Ok" : "FAILED") << std::endl;
} else {
auto ok = phrase_parse(input.begin(), input.end(), rule, x3::space);
if (!ok)
std::cout << "Fails as expected: " << quoted(input) << std::endl;
else
std::cout << "SHOULD HAVE FAILED: " << quoted(input) << std::endl;
}
}
}
}
int main() {
unit_tests::test(Parser::self_tag_,
{
R"(<simple foo="" bar= value-less qux=bareword/>)",
R"(<div />)",
R"(<div/>)",
R"(< div/>)",
});
unit_tests::test(Parser::html_element_,
{
R"(<simple foo="" bar= value-less qux=bareword></simple>)",
R"(<div ></div>)",
R"(<div></div>)",
R"(< div></div>)",
R"(< div ></div>)",
R"(<div data-src="https://www.google.com" id= hello world ></div>)",
R"(<div></ div>)",
R"(<div></ div >)",
});
unit_tests::test<false>(Parser::self_tag_,
{
R"(<div/ >)",
R"(<div>< /div>)",
R"(<div></dov>)",
});
}
Outputs
"<simple foo="" bar= value-less qux=bareword/>" -> Ok
"<div />" -> Ok
"<div/>" -> Ok
"< div/>" -> Ok
"<simple foo="" bar= value-less qux=bareword></simple>" -> Ok
"<div ></div>" -> Ok
"<div></div>" -> Ok
"< div></div>" -> Ok
"< div ></div>" -> Ok
"<div data-src="https://www.google.com" id= hello world ></div>" -> Ok
"<div></ div>" -> Ok
"<div></ div >" -> Ok
Fails as expected: "<div/ >"
Fails as expected: "<div>< /div>"
Fails as expected: "<div></dov>"
What Is The Trouble
由于你可以从我的手脚中抽出,来评论再入侵问题,*html_element_
,这造成了问题。
真正的理由是:<代码>随附和;>扩展了背景。 这意味着,每一级再入侵为背景类型增加了更多的数据,造成新的模板即时。
最简单的trick计是将<代码>随附和;>移至回收之外:
auto tag_block__def = //
tag_header_[tag_name_begin_func] >> *html_element_ >> "</" >> //
x3::omit[tag_name_[tag_name_end_func]] >> > //
;
auto inner_text = x3::lexeme[*~x3::char_( < )];
auto html_element__def = inner_text | self_tag_ | tag_block_;
auto start = x3::with<Ast::tag_name>(""s)[html_element_];
然而,这突出表明了各要素可以确定的问题,而且当内部的标签超越了<代码>tag_name的背景数据时,便无用。 因此,取代<条码>载条码> 我们可以将其编成<代码>stack<string>:
auto start = x3::with<tag_stack>(std::stack<std::string>{})[html_element_];
之后,修改了相应的行动:
auto tag_name_begin_func = [](auto& ctx) { get<tag_stack>(ctx).push(_attr(ctx).name); };
auto tag_name_end_func = [](auto& ctx) {
auto& s = get<tag_stack>(ctx);
_pass(ctx) = (s.top() == _attr(ctx));
s.pop();
};
见Live On Coliru。
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iomanip>
#include <iostream>
#include <stack>
//// Unused mixin disabled for simplicity
// #include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
namespace x3 = boost::spirit::x3;
using namespace std::string_literals;
namespace Ast {
struct html_tag;
struct html_comment;
// using mixin = x3::position_tagged;
struct mixin {};
struct attribute_data : mixin {
std::string name;
boost::optional<std::string> value;
};
using attribute_datas = std::vector<attribute_data>;
struct tag_header : mixin {
std::string name;
attribute_datas attributes;
};
struct self_tag : mixin {
tag_header header;
};
using element_base =
x3::variant<std::string, self_tag, boost::recursive_wrapper<html_tag>>;
struct html_element : mixin , element_base {
using element_base::element_base;
using element_base::operator=;
};
using html_elements = std::vector<html_element>;
struct html_tag : mixin {
tag_header header;
html_elements children;
};
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::attribute_data, name, value)
BOOST_FUSION_ADAPT_STRUCT(Ast::tag_header, name, attributes)
BOOST_FUSION_ADAPT_STRUCT(Ast::self_tag, header)
BOOST_FUSION_ADAPT_STRUCT(Ast::html_tag, header, children)
namespace Parser {
struct tag_stack final {};
auto attribute_identifier_ //
= x3::rule<struct AttributeIdentifier_tag, std::string>{"AttributeIdentifier"} //
= x3::lexeme[+~x3::char_(" /=>")];
auto attribute_value_ //
= x3::rule<struct AttributeValue_tag, std::string>{"AttributeValue"} //
= x3::lexeme //
[( " > *~x3::char_( " ) > " ) //
| (" " > *~x3::char_(" ") > " ") //
| *~x3::char_(" />") //
];
auto single_attribute_ =
x3::rule<struct attribute_identifier__tag, Ast::attribute_data>{"SingleAttribute"} //
= attribute_identifier_ >> -("=" >> attribute_value_);
auto attributes_ //
= x3::rule<struct attribute_data_tag, Ast::attribute_datas>{"Attributes"} //
= *single_attribute_;
[[maybe_unused]] static auto& header_of(x3::unused_type) {
thread_local Ast::tag_header s_dummy;
return s_dummy;
}
[[maybe_unused]] static auto& header_of(Ast::html_tag& ht) {
return ht.header;
}
auto tag_name_begin_func = [](auto& ctx) { get<tag_stack>(ctx).push(_attr(ctx).name); };
auto tag_name_end_func = [](auto& ctx) {
auto& s = get<tag_stack>(ctx);
_pass(ctx) = (s.top() == _attr(ctx));
s.pop();
};
auto assign_name = [](auto& ctx) { header_of(_val(ctx)).name = _attr(ctx); };
auto assign_attrs = [](auto& ctx) { header_of(_val(ctx)).attributes = _attr(ctx); };
auto tag_name_ //
= x3::rule<struct HtmlTagName_tag, std::string>{"HtmlTagName"} //
= x3::lexeme[*~x3::char_(" />")];
auto self_tag_ //
= x3::rule<struct HtmlSelfTag_tag, Ast::self_tag>{"HtmlSelfTag"} //
= < >> tag_name_[assign_name] >> attributes_[assign_attrs] >> "/>";
auto tag_header_ //
= x3::rule<struct HtmlTagBlockHeader_tag, Ast::tag_header>{"HtmlTagBlockHeader"} //
= < >> tag_name_ >> attributes_ >> > ;
x3::rule<struct tag_block__tag, Ast::html_tag> tag_block_ = "TagBlock";
x3::rule<struct html_element__tag, Ast::html_element> html_element_ = "HtmlElement";
auto tag_block__def = //
tag_header_[tag_name_begin_func] >> *html_element_ >> "</" >> //
x3::omit[tag_name_[tag_name_end_func]] >> > //
;
auto inner_text = x3::lexeme[*~x3::char_( < )];
auto html_element__def = inner_text | self_tag_ | tag_block_;
auto start = x3::with<tag_stack>(std::stack<std::string>{})[html_element_];
BOOST_SPIRIT_DEFINE(tag_block_, html_element_)
}
namespace unit_tests {
template <bool ShouldSucceed = true, typename P>
void test(P const& rule, std::initializer_list<std::string_view> cases) {
for (auto input : cases) {
if constexpr (ShouldSucceed) {
typename x3::traits::attribute_of<P, x3::unused_type>::type result;
auto ok = phrase_parse(input.begin(), input.end(), rule, x3::space, result);
std::cout << quoted(input) << " -> " << (ok ? "Ok" : "FAILED") << std::endl;
} else {
auto ok = phrase_parse(input.begin(), input.end(), rule, x3::space);
if (!ok)
std::cout << "Fails as expected: " << quoted(input) << std::endl;
else
std::cout << "SHOULD HAVE FAILED: " << quoted(input) << std::endl;
}
}
}
}
int main() {
unit_tests::test(Parser::self_tag_,
{
R"(<simple foo="" bar= value-less qux=bareword/>)",
R"(<div />)",
R"(<div/>)",
R"(< div/>)",
});
unit_tests::test(Parser::start,
{
R"(<simple foo="" bar= value-less qux=bareword></simple>)",
R"(<div ></div>)",
R"(<div></div>)",
R"(< div></div>)",
R"(< div ></div>)",
R"(<div data-src="https://www.google.com" id= hello world ></div>)",
R"(<div></ div>)",
R"(<div></ div >)",
R"(<div><nest/><nest some="more">yay</nest></div>)",
});
unit_tests::test<false>(Parser::self_tag_,
{
R"(<div/ >)",
R"(<div>< /div>)",
R"(<div></dov>)",
});
}
印刷
"<simple foo="" bar= value-less qux=bareword/>" -> Ok
"<div />" -> Ok
"<div/>" -> Ok
"< div/>" -> Ok
"<simple foo="" bar= value-less qux=bareword></simple>" -> Ok
"<div ></div>" -> Ok
"<div></div>" -> Ok
"< div></div>" -> Ok
"< div ></div>" -> Ok
"<div data-src="https://www.google.com" id= hello world ></div>" -> Ok
"<div></ div>" -> Ok
"<div></ div >" -> Ok
"<div><nest/><nest some="more">yay</nest></div>" -> Ok
Fails as expected: "<div/ >"
Fails as expected: "<div>< /div>"
Fails as expected: "<div></dov>"
CLOSING THOUGHTS
我回答这一假设,即你正在这样做,以学习X3。 否则,唯一的建议是:do not do this。 使用图书馆。
不仅你的克马法教的教职工作非常差,在野外,会完全失败。 封闭标签不是以超文本形式提供的(“立克式”)。 文本、CDATA、实体参考资料、Unicode、越狱都会破坏你的主食。
奥赫里,你是否注意到你通过采取一些调适行动来打破传播的归属感? 我可以向各位展示如何加以确定,但我认为,我现在不这样做。
仅使用图书馆。