Viewing file: MICG_parser.cpp (15.24 KB) -rw-rw-r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* * phc -- the open source PHP compiler * See doc/license/README.license for licensing information * * Parser for the Macro inline code generator. */
#include "MICG_parser.h"
// Uncomment to debug parsing. Useful for finding bugs in the parsed data as // well as in the parser. // To debug the AST structure generated by Spirit, enable BOOST_SPIRIT_DEBUG, // and look for the final concat_match(begin) in the file. //#define BOOST_SPIRIT_DEBUG #define BOOST_SPIRIT_DEBUG_PRINT_SOME 80 #define BOOST_SPIRIT_DEBUG_FLAGS BOOST_SPIRIT_DEBUG_FLAGS_NODES
#include <boost/lexical_cast.hpp> #include <boost/spirit.hpp> #include <boost/spirit/iterator/position_iterator.hpp> #include <boost/spirit/tree/ast.hpp> #include <boost/spirit/tree/parse_tree.hpp>
#include <iostream>
#include "lib/error.h" #include "lib/Map.h" #include "MICG_factory.h" #include "MICG.h" #include "process_ir/debug.h" #include "process_ir/XML_unparser.h"
using namespace std; using namespace boost::spirit; using namespace boost; using namespace MICG;
MICG_parser::MICG_parser () { }
/* A skip grammar seemed like a good way to skip comments and whitespace. It * turns out that trying to create exceptions to whitespace parsing is awful, * and frequently almost impossible. However, it is also very difficult to * remove the existance of the skip parser. So we just return nothing_p, which * never matches anything. */ struct skip_grammar : public grammar<skip_grammar> { template <typename ScannerT> struct definition { definition (skip_grammar const& self) { }
rule<ScannerT> const start() const { return nothing_p; } }; };
/* * Limitations: * - Its hard to put {} around the body of a template. Use @@@ instead. * - Quoted strings dont support escaping of '"'. However, a quoted string * is only used as a parameter to a macro, so thats not a great problem. */ struct MICG_grammar : public grammar<MICG_grammar> { // TODO: the duplcation here is tedious enum { // leave 0 to indicate no ID all_id = 1, attr_name_id, attr_name_list_id, body_id, body_part_id, body_part_list_id, callback_id, c_code_id, equals_id, expr_id, expr_list_id, formal_parameter_id, formal_parameter_list_id, interpolation_id, lookup_id, macro_call_id, macro_id, macro_list_id, macro_name_id, param_id, param_name_id, quoted_string_id, _rule_id, _rule_list_id, signature_id, type_name_id,
comment_id, wsc_id, ws_id, };
template <typename ScannerT> struct definition { #define DECL_RULE(NAME) rule<ScannerT, parser_context<>, parser_tag<NAME##_id> > NAME; DECL_RULE(all); DECL_RULE(attr_name); DECL_RULE(attr_name_list); DECL_RULE(body); DECL_RULE(body_part); DECL_RULE(body_part_list); DECL_RULE(callback); DECL_RULE(c_code); DECL_RULE(equals); DECL_RULE(expr); DECL_RULE(expr_list); DECL_RULE(formal_parameter); DECL_RULE(formal_parameter_list); DECL_RULE(interpolation); DECL_RULE(lookup); DECL_RULE(macro); DECL_RULE(macro_call); DECL_RULE(macro_list); DECL_RULE(macro_name); DECL_RULE(param); DECL_RULE(param_name); DECL_RULE(quoted_string); DECL_RULE(_rule); DECL_RULE(_rule_list); DECL_RULE(signature); DECL_RULE(type_name);
// For ignored tokens, use 0 rule<ScannerT, parser_context<>, parser_tag<0> > comment; rule<ScannerT, parser_context<>, parser_tag<0> > til_eol; rule<ScannerT, parser_context<>, parser_tag<0> > ws; rule<ScannerT, parser_context<>, parser_tag<0> > wsc;
definition (MICG_grammar const& self) { BOOST_SPIRIT_DEBUG_RULE(all); BOOST_SPIRIT_DEBUG_RULE(attr_name); BOOST_SPIRIT_DEBUG_RULE(attr_name_list); BOOST_SPIRIT_DEBUG_RULE(body); BOOST_SPIRIT_DEBUG_RULE(body_part); BOOST_SPIRIT_DEBUG_RULE(body_part_list); BOOST_SPIRIT_DEBUG_RULE(callback); BOOST_SPIRIT_DEBUG_RULE(c_code); BOOST_SPIRIT_DEBUG_RULE(equals); BOOST_SPIRIT_DEBUG_RULE(expr); BOOST_SPIRIT_DEBUG_RULE(formal_parameter); BOOST_SPIRIT_DEBUG_RULE(interpolation); BOOST_SPIRIT_DEBUG_RULE(lookup); BOOST_SPIRIT_DEBUG_RULE(macro); BOOST_SPIRIT_DEBUG_RULE(macro_call); BOOST_SPIRIT_DEBUG_RULE(macro_list); BOOST_SPIRIT_DEBUG_RULE(macro_name); BOOST_SPIRIT_DEBUG_RULE(param); BOOST_SPIRIT_DEBUG_RULE(param_name); BOOST_SPIRIT_DEBUG_RULE(quoted_string); BOOST_SPIRIT_DEBUG_RULE(_rule); BOOST_SPIRIT_DEBUG_RULE(_rule_list); BOOST_SPIRIT_DEBUG_RULE(signature); BOOST_SPIRIT_DEBUG_RULE(type_name);
BOOST_SPIRIT_DEBUG_RULE(comment); BOOST_SPIRIT_DEBUG_RULE(ws); BOOST_SPIRIT_DEBUG_RULE(wsc); BOOST_SPIRIT_DEBUG_RULE(comment_p); BOOST_SPIRIT_DEBUG_RULE(space_p); BOOST_SPIRIT_DEBUG_RULE(no_node_d); BOOST_SPIRIT_DEBUG_RULE(til_eol);
// This prevents nodes from being created for tokens or whitespace #define WSC no_node_d[wsc] #define COMMA no_node_d[WSC >> NO(",") >> WSC] #define NO(A) no_node_d[str_p(A)]
comment = no_node_d[comment_p ("/*", "*/")] | no_node_d[comment_p ("//")]; ws = *no_node_d[space_p]; wsc = no_node_d[*(space_p | comment)]; til_eol = *(blank_p | comment) >> eol_p;
// leaf_node_d groups all the characters into a single node, instead // of a list of characters. attr_name = leaf_node_d[+(alpha_p | '_')]; macro_name = leaf_node_d[+(alpha_p | '_')]; param_name = leaf_node_d[+(upper_p | '_')]; quoted_string = leaf_node_d[confix_p ('"', *anychar_p, '"')]; type_name = leaf_node_d[+lower_p]; // A signature line formal_parameter = type_name >> WSC >> param_name; formal_parameter_list = list_p(formal_parameter, COMMA); signature = macro_name >> WSC >> NO("(") >> WSC >> formal_parameter_list >> WSC >> NO(")") >> WSC;
// A rule line lookup = param_name >> NO(".") >> attr_name; attr_name_list = *(NO("#") >> attr_name); param = param_name >> attr_name_list; expr = param | quoted_string | lookup | macro_call | callback ; equals = expr >> WSC >> NO("==") >> WSC >> expr; _rule = NO("where") >> WSC >> (equals | lookup) >> WSC; _rule_list = *_rule;
// Bodies // We don't want c_code to take in macro_call, interpolation, or @@@. // However, we do want to allow '$' and '\\' in c_code. So we allow // them as the first character only. Since we match interpolation and // macro_call before c_code, if C_code stops on $ and \\, it will try // macro_call and interpolation before it incorporates '$' and '\\'. c_code = leaf_node_d[(anychar_p - '@') >> *(anychar_p - (ch_p('\\') | '$' | '@'))];
expr_list = list_p (expr, COMMA); macro_call = NO("\\") >> macro_name >> WSC >> NO("(") >> WSC >> expr_list >> WSC >> NO(")") >> !NO(";"); callback = NO("\\cb:") >> macro_name >> WSC >> NO("(") >> WSC >> expr_list >> WSC >> NO(")") >> !NO(";");
interpolation = (NO("$") >> param_name) | (NO("${") >> param_name >> NO("}")) | (NO("${") >> lookup >> NO("}"));
// A template body_part = macro_call | callback | interpolation | c_code; body_part_list = *body_part; body = NO("@@@") >> no_node_d[!til_eol] >> body_part_list >> NO("@@@"); macro = signature >> _rule_list >> body;
macro_list = WSC >> *(macro >> WSC); all = macro_list; }
rule<ScannerT, parser_context<>, parser_tag<all_id> > const& start() const { return all; } }; };
typedef position_iterator<char const *> pos_iter_t; typedef tree_match<pos_iter_t, node_iter_data_factory<> > tree_match_t; typedef tree_match_t::container_t container; typedef tree_match_t::tree_iterator tree_iter_t;
/* Extract the structure from the Boost AST, and put it into the maketea MICG. * * There are 3 kinds of node: * - maketea tokens: their value is available in the node * - conjunctions: their contructor arguments are available as their children * - syntax tokens: these have an id of 0, and must be ignored */
Object* create_micg_node (tree_iter_t tree);
Object_list* create_micg_list (container trees) { Object_list* result = new Object_list; for (tree_iter_t tree = trees.begin (); tree != trees.end (); tree++) { Object* obj = create_micg_node (tree);
// Ignore syntax tokens if (obj == NULL) continue;
result->push_back (obj); }
return result; }
/* * Spirit will not create empty lists, instead producing * nothing. This checks position INDEX to see if it is a T_list. If not, add * empty list, * * This checks at the type level, so it assumes that one T_list is not followed * by another. */ template <class T> Object_list* check_argument_list (Object_list* in, unsigned int index) { // Too short. Add it in last position. if (index == in->size ()) { in->push_back (new List<T*>); return in; }
assert (index < in->size());
// Check if its already perfect. if (isa<List<T*> > (in->at(index))) return in;
Object* subject = in->at (index); Object_list* result = new Object_list; foreach (Object* obj, *in) { if (obj == subject) { // Empty list: add a list. result->push_back (new List<T*>); }
result->push_back (obj); } return result; }
Map<long, string> names;
Object* create_micg_node (tree_iter_t iter) { Object *result;
long id = iter->value.id ().to_long (); assert (id == 0 || names[id] != ""); String* value = s(string (iter->value.begin(), iter->value.end())); DEBUG ("entering " << id << " (" << names[id] << ")");
switch (id) { /* * Conjunctions (in the order they appear in micg.tea) */ case MICG_grammar::all_id: case MICG_grammar::macro_id: case MICG_grammar::signature_id: case MICG_grammar::formal_parameter_id: case MICG_grammar::lookup_id: case MICG_grammar::equals_id: case MICG_grammar::param_id: case MICG_grammar::body_id: case MICG_grammar::macro_call_id: case MICG_grammar::callback_id: { Object_list* params = create_micg_list (iter->children);
// Hack for Spirit 'bug' (see comment at check_argument_list() // definition). We need one of these for each conjunction with a list // argument in micg.tea. if (id == MICG_grammar::macro_id) params = check_argument_list<Rule> (params, 1);
else if (id == MICG_grammar::signature_id) params = check_argument_list<Formal_parameter> (params, 1);
else if (id == MICG_grammar::param_id) params = check_argument_list<ATTR_NAME> (params, 1);
else if (id == MICG_grammar::body_id) params = check_argument_list<Body_part> (params, 0);
else if (id == MICG_grammar::macro_call_id) params = check_argument_list<Expr> (params, 1);
else if (id == MICG_grammar::callback_id) params = check_argument_list<Expr> (params, 1);
result = Node_factory::create (names[id].c_str(), params); assert (result);
if (Node* node = dynamic_cast<Node*> (result)) { // if (debugging_enabled) xml_unparse (node, cdebug, true, false); node->assert_valid (); file_position pos = iter->value.begin().get_position (); node->attrs->set ("phc.filename", new ::String (pos.file)); node->attrs->set ("phc.line_number", new ::Integer (pos.line)); node->attrs->set ("phc.column_number", new ::Integer (pos.column)); }
break; }
/* * Lists */ case MICG_grammar::macro_list_id: case MICG_grammar::_rule_list_id: case MICG_grammar::formal_parameter_list_id: case MICG_grammar::attr_name_list_id: case MICG_grammar::body_part_list_id: case MICG_grammar::expr_list_id: { Object_list* params = create_micg_list (iter->children); result = Node_factory::create (names[id].c_str(), params); assert (result); break; }
/* Disjunctions - if they are not disjunctions in the Spirit grammar, * they still occur in the Spirit AST. Extract their contents and remove * them. */ case MICG_grammar::_rule_id: case MICG_grammar::interpolation_id: case MICG_grammar::body_part_id: case MICG_grammar::expr_id: { Object_list* list = create_micg_list (iter->children); assert (list->size () == 1); result = list->front (); break; }
/* * Tokens - tokens are put in the Spirit AST twice, but we can ignore this. */
case MICG_grammar::quoted_string_id: // Strip off the ""s value = new String (value->substr (1, value->size() - 2)); // Fallthrough
case MICG_grammar::macro_name_id: case MICG_grammar::type_name_id: case MICG_grammar::param_name_id: case MICG_grammar::attr_name_id: case MICG_grammar::c_code_id: { assert (*value != ""); DEBUG (" - " << *value);
result = Node_factory::create ( names[id].c_str(), new Object_list (value));
assert (result); Node* node = dyc<Node> (result); node->assert_valid ();
// if (debugging_enabled) xml_unparse (node, cdebug, true, false); node->assert_valid (); file_position pos = iter->value.begin().get_position (); node->attrs->set ("phc.filename", new ::String (pos.file)); node->attrs->set ("phc.line_number", new ::Integer (pos.line)); node->attrs->set ("phc.column_number", new ::Integer (pos.column)); break; }
/* Ignore syntactic tokens */ case 0: result = NULL; break;
default: cerr << "havent handled node with ID: " << id << endl; phc_unreachable (); }
DEBUG ("leaving " << id << " (" << names[id] << ")"); return result; }
Macro_list* MICG_parser::parse (string str, string filename) { names[MICG_grammar::all_id] = "All"; names[MICG_grammar::attr_name_id] = "ATTR_NAME"; names[MICG_grammar::attr_name_list_id] = "ATTR_NAME_list"; names[MICG_grammar::body_id] = "Body"; names[MICG_grammar::body_part_id] = "Body_part"; names[MICG_grammar::body_part_list_id] = "Body_part_list"; names[MICG_grammar::callback_id] = "Callback"; names[MICG_grammar::c_code_id] = "C_CODE"; names[MICG_grammar::equals_id] = "Equals"; names[MICG_grammar::expr_id] = "Expr"; names[MICG_grammar::expr_list_id] = "Expr_list"; names[MICG_grammar::formal_parameter_id] = "Formal_parameter"; names[MICG_grammar::formal_parameter_list_id] = "Formal_parameter_list"; names[MICG_grammar::interpolation_id] = "Interpolation"; names[MICG_grammar::lookup_id] = "Lookup"; names[MICG_grammar::macro_call_id] = "Macro_call"; names[MICG_grammar::macro_id] = "Macro"; names[MICG_grammar::macro_list_id] = "Macro_list"; names[MICG_grammar::macro_name_id] = "MACRO_NAME"; names[MICG_grammar::param_id] = "Param"; names[MICG_grammar::param_name_id] = "PARAM_NAME"; names[MICG_grammar::quoted_string_id] = "STRING"; names[MICG_grammar::_rule_id] = "Rule"; names[MICG_grammar::_rule_list_id] = "Rule_list"; names[MICG_grammar::signature_id] = "Signature"; names[MICG_grammar::type_name_id] = "TYPE_NAME";
MICG_grammar g; skip_grammar skipg;
BOOST_SPIRIT_DEBUG_GRAMMAR(g); BOOST_SPIRIT_DEBUG_TRACE_NODE(skipg, false);
// The pos_iter_t stores line numbers, column numbers and filenames. pos_iter_t begin(str.c_str (), str.c_str() + str.size (), filename); pos_iter_t end; begin.set_tabchars (1); tree_parse_info<pos_iter_t, node_iter_data_factory<> > info = pt_parse(begin, end, g >> end_p, skipg, node_iter_data_factory<>());
file_position pos = info.stop.get_position ();
DEBUG ("stop: " << pos.file << ", " << pos.line << ", " << pos.column << "\n" << "full: " << info.full << "\n" << "length: " << info.length << "\n");
if (!info.full) phc_internal_error ("Cannot parse template", s(pos.file), pos.line, pos.column);
assert (info.trees.size() == 1);
All* result = dyc<All> (create_micg_node (info.trees.begin())); if (debugging_enabled) { // Far too verbose. // xml_unparse (result, cdebug, true, false); } return result->macros; }
|