Changeset b354c6b519a44d37e66bbfe4571631c26e55ddeb
- Timestamp:
- 06/21/2008 02:54:18 PM (2 months ago)
- git-parent:
- Files:
-
- PARSER_DOC (modified) (2 diffs)
- ext/ohcount_native/ragel_parser_macros.h (modified) (12 diffs)
- ext/ohcount_native/ragel_parsers/common.rl (modified) (1 diff)
- ext/ohcount_native/ragel_parsers/html.rl (modified) (1 diff)
- ext/ohcount_native/ragel_parsers/perl.rl (modified) (1 diff)
- ext/ohcount_native/ragel_parsers/ruby.rl (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
PARSER_DOC
r78421f8 rb354c6b 221 221 defining line_start. 222 222 223 Also for multi-line matches, it may be necessary to use the 'queue' and 224 'commit' actions. If it is possible that a multi-line entity will not have 225 an ending delimiter (for example a string), use the 'queue' action as soon 226 as the start delimitter has been detected, and the 'commit' action as soon 227 as the end delimitter has been detected. This will eliminate the potential 228 for any counting errors. 229 223 230 Notes: 224 231 * You can be a bit sloppy with the line counting machine. For example the … … 298 305 } 299 306 307 Note: the 'ls', 'code', 'comment', 'queue' and 'commit' actions are 308 completely unnecessary. 309 300 310 Parsers for Embedded Languages: 301 311 TODO: ext/ohcount_native/ragel_parser_macros.h
ra3e0e88 rb354c6b 2 2 #define RAGEL_PARSER_MACROS 3 3 4 /* Sets the line_start variable to ts. 4 /** 5 * Callback struct for queues. 6 * @field lang The Language name. 7 * @field entity The entity name. 8 * @field s The start position of the entity in the buffer. 9 * @field e The end position of the entity in the buffer. 10 * @field next The next Callback in the queue. 11 */ 12 typedef struct callback_list_item { 13 const char *lang; // language name; should NOT be freed 14 const char *entity; // entity name; should NOT be freed 15 int s, e; // start and end positions of the entity in the buffer (use cint) 16 struct callback_list_item *next; // the next callback to call 17 } Callback; 18 19 Callback *callback_list_head = NULL; 20 Callback *callback_list_tail = NULL; 21 22 /** 23 * Enqueues a callback for calling upon commit. 24 * This is only necessary for line counting machines. 25 * Ragel will execute actions in real-time rather than after a complete match. 26 * This is a problem for entities that contain internal newlines, since there is 27 * a callback for each internal newline whether or not the end of the entity 28 * matches. This means that if, for example, the beginning of a string entity is 29 * matched, the text following is treated as code until the ending delimiter. If 30 * there is no ending delimiter (it was not actually a string entity), Ragel 31 * will jump back to the beginning of the string and reparse the text again. 32 * This means all the callbacks called were probably not accurate. 33 * To remedy this, any entity which needs an ending delimiter that may not 34 * appear will have its callbacks enqueued and then committed when the ending 35 * delimitter is reached. If that delimitter is not reached, the callbacks are 36 * never called. 37 * @param lang The language name. 38 * @param entity The entity (lcode, lcomment, lblank). 39 * @param s The start position of the entity in the buffer. 40 * @param e The end position of the entity in the buffer. 41 */ 42 void enqueue(const char *lang, const char *entity, int s, int e) { 43 Callback *item = (Callback *) malloc(sizeof(Callback)); 44 //assert(item != NULL); // trap malloc errors 45 46 item->lang = lang; 47 item->entity = entity; 48 item->s = s; 49 item->e = e; 50 item->next = NULL; 51 52 if (!callback_list_head) { 53 callback_list_head = item; 54 callback_list_tail = item; 55 } else { 56 callback_list_tail->next = item; 57 callback_list_tail = item; 58 } 59 } 60 61 /** Frees the memory used by a queue. */ 62 void free_queue() { 63 Callback *item = callback_list_head; 64 while (item != NULL) { 65 Callback *next = item->next; 66 free(item); 67 item = next; 68 } 69 } 70 71 /** 72 * Restores settings for a failed enqueued entity. 73 * This is typically used in the ls, code, and comment macros. 74 */ 75 #define dequeue { \ 76 inqueue = 0; \ 77 line_start = last_line_start; \ 78 line_contains_code = last_line_contains_code; \ 79 whole_line_comment = last_whole_line_comment; \ 80 } 81 82 /** 83 * Sets the line_start variable to ts. 5 84 * This is typically used for the SPACE entity in the main action. 6 85 */ 7 #define ls { if (!line_start) line_start = ts; } 8 9 /* The C equivalent of the Ragel 'code' action. 86 #define ls { \ 87 if (inqueue) { dequeue; } \ 88 if (!line_start) line_start = ts; \ 89 } 90 91 /** 92 * The C equivalent of the Ragel 'code' action. 10 93 * This is tyically used in the main action for entities where Ragel actions 11 94 * cannot, for one reason or another, be used. 12 95 */ 13 96 #define code { \ 97 if (inqueue) { dequeue; } \ 14 98 if (!line_contains_code && !line_start) line_start = ts; \ 15 99 line_contains_code = 1; \ 16 100 } 17 101 18 /* The C equivalent of the Ragel 'comment' action. 102 /** 103 * The C equivalent of the Ragel 'comment' action. 19 104 * This is typically unused, but here for consistency. 20 105 */ 21 106 #define comment { \ 107 if (inqueue) { dequeue; } \ 22 108 if (!line_contains_code) { \ 23 109 whole_line_comment = 1; \ … … 26 112 } 27 113 28 /* Sets up for having seen an embedded language. 114 /** 115 * Sets up for having seen an embedded language. 29 116 * This is typically used when entering an embedded language which usually does 30 117 * not span multiple lines (e.g. php for <?php echo 'blah' ?> on single lines) … … 38 125 } 39 126 40 /* Executes standard line counting actions for INTERNAL_NL entities. 127 /** 128 * Executes standard line counting actions for INTERNAL_NL entities. 41 129 * This is typically used in the main action for the INTERNAL_NL entity. 42 130 * @param lang The language name string. … … 44 132 #define std_internal_newline(lang) { \ 45 133 if (callback && p > line_start) { \ 46 if (line_contains_code) \ 47 callback(lang, "lcode", cint(line_start), cint(p)); \ 48 else if (whole_line_comment) \ 49 callback(lang, "lcomment", cint(line_start), cint(p)); \ 50 else \ 51 callback(lang, "lblank", cint(line_start), cint(p)); \ 134 if (line_contains_code) { \ 135 if (inqueue) \ 136 enqueue(lang, "lcode", cint(line_start), cint(p)); \ 137 else \ 138 callback(lang, "lcode", cint(line_start), cint(p)); \ 139 } else if (whole_line_comment) { \ 140 if (inqueue) \ 141 enqueue(lang, "lcomment", cint(line_start), cint(p)); \ 142 else \ 143 callback(lang, "lcomment", cint(line_start), cint(p)); \ 144 } else { \ 145 if (inqueue) \ 146 enqueue(lang, "lblank", cint(line_start), cint(p)); \ 147 else \ 148 callback(lang, "lblank", cint(line_start), cint(p)); \ 149 } \ 52 150 } \ 53 151 whole_line_comment = 0; \ … … 56 154 } 57 155 58 /* Executes emebedded language line counting actions for INTERNAL_NL entities 156 /** 157 * Executes emebedded language line counting actions for INTERNAL_NL entities 59 158 * based on whether or not the embedded language's code has been seen in a 60 159 * parent line. … … 70 169 } 71 170 72 /* Executes standard line counting actions for NEWLINE entities. 171 /** 172 * Executes standard line counting actions for NEWLINE entities. 73 173 * This is typically used in the main action for the NEWLINE entity. 74 174 * @param lang The language name string. … … 88 188 } 89 189 90 /* Executes embedded language line counting actions for NEWLINE entities based 190 /** 191 * Executes embedded language line counting actions for NEWLINE entities based 91 192 * on whether or not the embedded language's code has been seen in a parent 92 193 * line. … … 102 203 } 103 204 104 /* Processes the last line for buffers that don't have a newline at EOF. 205 /** 206 * Processes the last line for buffers that don't have a newline at EOF. 105 207 * This is typically used at the end of the parse_lang function after the Ragel 106 208 * parser has been executed. … … 116 218 } 117 219 118 /* Determines whether or not the rest of the line is blank. 220 /** 221 * Determines whether or not the rest of the line is blank. 119 222 * This is typically used when entering an embedded language. 120 223 * @param p The position of entry into the emebedded language. … … 133 236 } 134 237 135 /* If there is a transition into an embedded language and there is only parent 238 /** 239 * If there is a transition into an embedded language and there is only parent 136 240 * language code on the line (the rest of the line is blank with no child code), 137 241 * count the line as a line of parent code. … … 180 284 // keeps track of an embedded language 181 285 const char *seen; 286 287 // whether or not to enqueue callbacks instead of calling them in real time 288 int inqueue; 289 290 // backups for 'inqueue'ing 291 char *last_line_start; 292 int last_line_contains_code, last_whole_line_comment; 182 293 183 294 #define init { \ … … 192 303 entity = 0; \ 193 304 seen = 0; \ 305 inqueue = 0; \ 194 306 } 195 307 ext/ohcount_native/ragel_parsers/common.rl
r0b4bd62 rb354c6b 27 27 28 28 # common actions 29 30 action queue { 31 inqueue = 1; 32 free_queue(); // free the current queue 33 callback_list_head = NULL; 34 callback_list_tail = NULL; 35 // set backup variables 36 last_line_start = line_start; 37 last_line_contains_code = line_contains_code; 38 last_whole_line_comment = whole_line_comment; 39 } 40 41 action commit { 42 if (inqueue) { 43 Callback *item; 44 for (item = callback_list_head; item != NULL; item = item->next) 45 callback(item->lang, item->entity, item->s, item->e); 46 free_queue(); 47 inqueue = 0; 48 } 49 } 29 50 30 51 action ls { if (!line_start) line_start = ts; } ext/ohcount_native/ragel_parsers/html.rl
r0b4bd62 rb354c6b 64 64 )* :>> '-->'; 65 65 66 html_sq_str = 67 '\'' @code ( 68 newline %{ entity = INTERNAL_NL; } %html_ccallback 69 | 70 ws 71 | 72 [^\r\n\f\t '\\] @code 73 | 74 '\\' nonnewline @code 75 )* '\''; 76 html_dq_str = 77 '"' @code ( 78 newline %{ entity = INTERNAL_NL; } %html_ccallback 79 | 80 ws 81 | 82 [^\r\n\f\t "\\] @code 83 | 84 '\\' nonnewline @code 85 )* '"'; 66 html_sq_str = '\'' ([^\r\n\f'\\] | '\\' nonnewline)* '\'' @code; 67 html_dq_str = '"' ([^\r\n\f"\\] | '\\' nonnewline)* '"' @code; 86 68 html_string = html_sq_str | html_dq_str; 87 69 ext/ohcount_native/ragel_parsers/perl.rl
r0b4bd62 rb354c6b 88 88 '\\' nonnewline @code 89 89 )* '`' @code; 90 perl_regex = '/' ([^\r\n\f \t/\\] | '\\' nonnewline)* '/' @code;90 perl_regex = '/' ([^\r\n\f/\\] | '\\' nonnewline)* '/' @code; 91 91 # TODO: heredoc detection 92 92 # This is impossible with current Ragel. We need to extract what the end ext/ohcount_native/ragel_parsers/ruby.rl
r0b4bd62 rb354c6b 49 49 # Can't do that now because using 'when starts_line' fails a Ragel assertion. 50 50 ruby_block_comment = 51 '=begin' @ comment (51 '=begin' @queue @comment ( 52 52 newline %{ entity = INTERNAL_NL; } %ruby_ccallback 53 53 | … … 55 55 | 56 56 (nonnewline - ws) @comment 57 )* :>> '=end' ;57 )* :>> '=end' @commit; 58 58 ruby_comment = ruby_line_comment | ruby_block_comment; 59 59 60 60 ruby_sq_str = 61 '\'' @ code (61 '\'' @queue @code ( 62 62 newline %{ entity = INTERNAL_NL; } %ruby_ccallback 63 63 | … … 67 67 | 68 68 '\\' nonnewline @code 69 )* '\'' @co de;69 )* '\'' @commit @code; 70 70 ruby_dq_str = 71 '"' @ code (71 '"' @queue @code ( 72 72 newline %{ entity = INTERNAL_NL; } %ruby_ccallback 73 73 | … … 77 77 | 78 78 '\\' nonnewline @code 79 )* '"' @co de;79 )* '"' @commit @code; 80 80 # TODO: true literal string detection 81 81 # Turns out any non-alphanum char can be after the initial '%' for a literal … … 88 88 # closing char in the literal string below. 89 89 ruby_lit_str = 90 '%' [qQ]? [(\[{] @ code (90 '%' [qQ]? [(\[{] @queue @code ( 91 91 newline %{ entity = INTERNAL_NL; } %ruby_ccallback 92 92 | … … 96 96 | 97 97 '\\' nonnewline @code 98 )* [)\]}] @co de;98 )* [)\]}] @commit @code; 99 99 ruby_cmd_str = 100 '`' @ code (100 '`' @queue @code ( 101 101 newline %{ entity = INTERNAL_NL; } %ruby_ccallback 102 102 | … … 106 106 | 107 107 '\\' nonnewline @code 108 )* '`' @code; 109 ruby_regex = 110 '/' @code ( 111 newline %{ entity = INTERNAL_NL; } %ruby_ccallback 112 | 113 ws 114 | 115 [^\r\n\f\t /\\] @code 116 | 117 '\\' nonnewline @code 118 )* '/' @code; 108 )* '`' @commit @code; 109 ruby_regex = '/' ([^\r\n\f\t /\\] | '\\' nonnewline)* '/' @code; 119 110 # TODO: true literal array and command detection 120 111 # See TODO above about literal string detection 121 112 ruby_lit_other = 122 '%' [wrx] [(\[{] @ code (113 '%' [wrx] [(\[{] @queue @code ( 123 114 newline %{ entity = INTERNAL_NL; } %ruby_ccallback 124 115 | … … 128 119 | 129 120 '\\' nonnewline @code 130 )* [)\]}] @co de;121 )* [)\]}] @commit @code; 131 122 # TODO: heredoc detection 132 123 # This is impossible with current Ragel. We need to extract what the end