使用boost :: spirit解析Newick语法

问题描述:

我想使用boost :: spirit库解析Newick语法(定义为here)。使用boost :: spirit解析Newick语法

我已经做了我自己的解析器,它正确识别语法。这是它的代码:

#define BOOST_SPIRIT_DEBUG 

#include <boost/spirit/include/qi.hpp> 
#include <boost/variant/recursive_variant.hpp> 
#include <boost/fusion/include/adapt_struct.hpp> 
#include <vector> 

namespace parser 
{ 
    struct ptree; 

    typedef boost::variant<boost::recursive_wrapper<ptree>> ptree_recursive; 
    struct ptree 
    { 
     std::vector<ptree_recursive> children; 
     std::string name; 
     double length; 
    }; 

    /* Used to cast ptree_recursive into ptree. */ 
    class ptree_visitor : public boost::static_visitor<ptree> 
    { 
    public: 
     ptree operator() (ptree tree) const 
     { 
      return tree; 
     } 
    }; 
} 

BOOST_FUSION_ADAPT_STRUCT(
    parser::ptree, 
    (std::vector<parser::ptree_recursive>, children) 
    (std::string, name) 
    (double, length) 
) 

namespace parser 
{ 
    namespace qi = boost::spirit::qi; 
    namespace ascii = boost::spirit::ascii; 

    template<typename Iterator> 
    struct newick_grammar : qi::grammar<Iterator, ptree(), ascii::space_type> 
    { 
     public: 
      newick_grammar() : newick_grammar::base_type(tree) 
      { 
       using qi::lexeme; 
       using qi::double_; 
       using ascii::char_; 

       /* This is the only grammar that works fine: 
       * http://evolution.genetics.washington.edu/phylip/newick_doc.html */ 
       label = lexeme[+(char_ - ':' - ')' - ',')]; 
       branch_length = ':' >> double_; 

       subtree = 
         -descendant_list 
        >> -label 
        >> -branch_length; 

       descendant_list = 
         '(' 
        >> subtree 
        >> *(',' >> subtree) 
        >> ')'; 

       tree = subtree >> ';'; 

       BOOST_SPIRIT_DEBUG_NODE(label); 
       BOOST_SPIRIT_DEBUG_NODE(branch_length); 
       BOOST_SPIRIT_DEBUG_NODE(subtree); 
       BOOST_SPIRIT_DEBUG_NODE(descendant_list); 
       BOOST_SPIRIT_DEBUG_NODE(tree); 
      } 

     private: 

      /* grammar rules */ 
      qi::rule<Iterator, ptree(), ascii::space_type> tree, subtree; 
      qi::rule<Iterator, ptree_recursive(), ascii::space_type> descendant_list; 
      qi::rule<Iterator, double(), ascii::space_type> branch_length; 
      qi::rule<Iterator, std::string(), ascii::space_type> label; 
    }; 
} 

给解析器的ptree实例存储了newick树。 测试字符串,用于该代码,是下列之一:

(((One:0.1,Two:0.2)Sub1:0.3,(Three:0.4,Four:0.5)Sub2:0.6)Sub3:0.7,Five:0.8)Root:0.9; 

解析器正确识别语法,但它产生的局部树。特别是,prtree实例,包含“根”节点及其第一个“Sub3”子节点。 我试图使用push_at和at_c方法(解释here)aswel。我有同样的结果。

为什么语法似乎不会创建并添加所有节点,即使能够识别语法并将树作为aswel?

感谢您的建议。

SOLUTION

template<typename Iterator> 
    struct newick_grammar : qi::grammar<Iterator, base::ptree()> 
    { 
     public: 
      newick_grammar() : newick_grammar::base_type(tree) 
      { 
       /* This is the only grammar that works fine: 
       * http://evolution.genetics.washington.edu/phylip/newick_doc.html */ 
       label %= qi::lexeme[+(qi::char_ - ':' - ')' - ',')]; 
       branch_length %= ':' >> qi::double_; 

       subtree = 
         -descendant_list 
        >> -label 
        >> -branch_length; 

       descendant_list = 
         '(' 
        >> subtree 
        >> *(',' >> subtree) 
        >> ')'; 

       tree %= subtree >> ';'; 

       BOOST_SPIRIT_DEBUG_NODE(label); 
       BOOST_SPIRIT_DEBUG_NODE(branch_length); 
       BOOST_SPIRIT_DEBUG_NODE(subtree); 
       BOOST_SPIRIT_DEBUG_NODE(descendant_list); 
       BOOST_SPIRIT_DEBUG_NODE(tree); 
      } 

     private: 

      /* grammar rules */ 
      qi::rule<Iterator, base::ptree()> tree, subtree; 
      qi::rule<Iterator, base::children_ptree()> descendant_list; 
      qi::rule<Iterator, double()> branch_length; 
      qi::rule<Iterator, std::string()> label; 
    }; 

我认为这是在你的程序有很多货物*编码。例如,变体完全无用。所以我重新编写了一下,添加了一些评论以帮助你理解(如果不清楚,请不要犹豫,在评论中提问)。我把这个空间说明放在一边,因为我认为这对你的情况没用。

#include <boost/spirit/include/qi.hpp> 
#include <boost/fusion/include/adapt_struct.hpp> 
#include <boost/spirit/include/phoenix_core.hpp> 
#include <boost/spirit/include/phoenix_operator.hpp> 
#include <boost/spirit/include/phoenix_fusion.hpp> 
#include <boost/spirit/include/phoenix_stl.hpp> 
#include <vector> 
#include <string> 
#include <iostream> 

namespace parser 
{ 
    // Forward declaration for the vector 
    struct ptree; 

    // typedef to ease the writing 
    typedef std::vector<ptree> children_vector; 

    // The tree structure itseflf 
    struct ptree 
    { 
     children_vector children; 
     std::string name; 
     double length; 
    }; 

    // Streaming operator for printing the result 
    std::ostream& operator<<(std::ostream& stream, const ptree& tree) 
    { 
     bool first = true; 
     stream << "(" << tree.name << ": " << tree.length << " { "; 
     for (auto child: tree.children) 
     { 
      stream << (first ? "" : ",") << child; 
      first = false; 
     } 

     stream << " }"; 
     return stream; 
    } 
} 

// adapt the structure to fusion phoenix 
BOOST_FUSION_ADAPT_STRUCT(
    parser::ptree, 
    (parser::children_vector, children) 
    (std::string, name) 
    (double, length) 
) 

namespace parser 
{ 
    // namespace aliasing to shorten the names 
    namespace qi = boost::spirit::qi;  
    namespace phoenix = boost::phoenix; 

    // This grammar parse string to a ptree 
    struct newick_grammar : qi::grammar<std::string::const_iterator, ptree()> 
    { 
    public: 
     newick_grammar() 
      : newick_grammar::base_type(tree) // We try to parse the tree rule 
     {     
      using phoenix::at_c; // Access nth field of structure 
      using phoenix::push_back; // Push into vector 

      // For label use %= to assign the result of the parse to the string 
      label %= qi::lexeme[+(qi::char_ - ':' - ')' - ',')]; 

      // For branch length use %= to assign the result of the parse to the 
      // double 
      branch_length %= ':' >> qi::double_; 

      // When parsing the subtree just assign the elements that have been 
      // built in the subrules 
      subtree = 
       // Assign vector of children to the first element of the struct 
       -descendant_list [at_c<0>(qi::_val) = qi::_1 ] 
       // Assign the label to the second element 
       >> -label [ at_c<1>(qi::_val) = qi::_1 ] 
       // Assign the branch length to the third element 
       >> -branch_length [ at_c<2>(qi::_val) = qi::_1 ]; 

      // Descendant list is a vector of ptree, we just push back the 
      // created ptrees into the vector 
      descendant_list = 
       '(' >> subtree [ push_back(qi::_val, qi::_1) ] 
       >> *(',' >> subtree [ push_back(qi::_val, qi::_1) ]) 
       >> ')'; 

      // The tree receive the whole subtree using %= 
      tree %= subtree >> ';' ; 
     } 

    private: 

     // Here are the various grammar rules typed by the element they do 
     // generate 
     qi::rule<std::string::const_iterator, ptree()> tree, subtree; 
     qi::rule<std::string::const_iterator, children_vector()> descendant_list; 
     qi::rule<std::string::const_iterator, double()> branch_length; 
     qi::rule<std::string::const_iterator, std::string()> label; 
    }; 
} 

int main(int argc, char const *argv[]) 
{ 
    namespace qi = boost::spirit::qi; 
    std::string str; 

    while (getline(std::cin, str)) 
    { 
     // Instantiate grammar and tree 
     parser::newick_grammar grammar; 
     parser::ptree tree; 

     // Parse 
     bool result = qi::phrase_parse(str.cbegin(), str.cend(), grammar, qi::space, tree); 

     // Print the result 
     std::cout << "Parsing result: " << std::boolalpha << result << std::endl; 
     std::cout << tree << std::endl; 
    } 
    return 0; 
} 

这里是你的样品的输出:

$ ./a.exe 
(((One:0.1,Two:0.2)Sub1:0.3,(Three:0.4,Four:0.5)Sub2:0.6)Sub3:0.7,Five:0.8)Root:0.9; 
Parsing result: true 
(Root: 0.9 { (Sub3: 0.7 { (Sub1: 0.3 { (One: 0.1 { },(Two: 0.2 { } },(Sub2: 0.6 { (Three: 0.4 { },(Four: 0.5 { } } },(Five: 0.8 { } } 
+0

你好约翰。非常感谢您的回答。我使用你的建议解决了我的问题:特别是,主要问题是变体使用和缺少的%运算符。 无论如何,我删除了凤凰依赖,让解析器自动管理ptree矢量。 – sawk