Changeset adfc63c96dc82134b341c1fc225397eea16d4167
- Timestamp:
- 06/26/2008 03:37:23 PM (2 months ago)
- git-parent:
- Files:
-
- README (modified) (2 diffs)
- Rakefile (modified) (4 diffs)
- ext/ohcount_native/common.c (deleted)
- ext/ohcount_native/common.h (modified) (2 diffs)
- ext/ohcount_native/compiled_state.c (deleted)
- ext/ohcount_native/compiled_state.h (deleted)
- ext/ohcount_native/escape_helper.rb (deleted)
- ext/ohcount_native/extconf.rb (modified) (1 diff)
- ext/ohcount_native/generator.rb (deleted)
- ext/ohcount_native/glots/biglot.rb (deleted)
- ext/ohcount_native/glots/c_monoglot.rb (deleted)
- ext/ohcount_native/glots/clearsilver_template_polyglot.rb (deleted)
- ext/ohcount_native/glots/d_monoglot.rb (deleted)
- ext/ohcount_native/glots/dcl_monoglot.rb (deleted)
- ext/ohcount_native/glots/html_polyglot.rb (deleted)
- ext/ohcount_native/glots/html_with_php_polyglot.rb (deleted)
- ext/ohcount_native/glots/jsp_polyglot.rb (deleted)
- ext/ohcount_native/glots/line_comment_monoglot.rb (deleted)
- ext/ohcount_native/glots/monoglot.rb (deleted)
- ext/ohcount_native/glots/mxml_polyglot.rb (deleted)
- ext/ohcount_native/glots/polyglot.rb (deleted)
- ext/ohcount_native/glots/python_monoglot.rb (deleted)
- ext/ohcount_native/glots/rhtml_polyglot.rb (deleted)
- ext/ohcount_native/glots/xml_monoglot.rb (deleted)
- ext/ohcount_native/parser.c (deleted)
- ext/ohcount_native/parser.h (deleted)
- ext/ohcount_native/polyglot.c (deleted)
- ext/ohcount_native/polyglot.h (deleted)
- ext/ohcount_native/polyglots.h (deleted)
- ext/ohcount_native/ragel_parser.h (modified) (1 diff)
- ext/ohcount_native/ragel_parser_macros.h (modified) (1 diff)
- ext/ohcount_native/ruby_binding.c (modified) (2 diffs)
- ext/ohcount_native/state.c (deleted)
- ext/ohcount_native/state.h (deleted)
- ext/ohcount_native/state.rb (deleted)
- ext/ohcount_native/transition.h (deleted)
- ext/ohcount_native/transition.rb (deleted)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
README
r338ef7c radfc63c 48 48 also require a C compiler to build the native extensions. 49 49 50 Ohcount requires the pcre library (http:///www.pcre.org). 50 Ohcount requires Ragel (http://research.cs.queensu.ca/~thurston/ragel/) 51 Unfortunately, Ragel 6.2 is not recent enough so you will need the latest 52 version in SVN: svn://mambo.cs.queensu.ca/ragel/trunk/. 51 53 52 54 == Download … … 118 120 * Update Ohcount::Detector to identify files that use the new language. 119 121 * Update Ohcount::DetectorTest to confirm the Detector changes. 120 * In Ohcount::Generator, instantiate a new Ohcount::Monoglot or 121 Ohcount::Polyglot to parse the language. 122 * Follow the detailed instructions in PARSER_DOC. 122 123 * In Ohcount::SlocInfo, provide a "nice name" and category (procedural 123 124 code vs. markup) for the new language. Rakefile
r9638a53 radfc63c 17 17 CLEAN.include FileList["#{EXT_DIR}/*.{so,bundle,#{CONFIG['DLEXT']}}"], 18 18 FileList["#{EXT_DIR}/*.o"], 19 FileList["#{EXT_DIR}/polyglots.c"],20 19 FileList["#{EXT_DIR}/Makefile"], 21 20 (FileList["#{EXT_DIR}/*_parser.h"] - FileList["#{EXT_DIR}/ragel_parser.h"]) … … 25 24 PKG_FILES = %w(README COPYING Rakefile lib/ohcount.rb) + 26 25 Dir.glob("ext/ohcount_native/*.{h,c,rb}") + 27 Dir.glob("ext/ohcount_native/glots/*.rb") +28 26 Dir.glob("lib/ohcount/*.rb") + 29 27 Dir.glob("test/*") + … … 64 62 end 65 63 66 file EXT_DL => FileList["#{EXT_DIR}/ polyglots.c", "#{EXT_DIR}/Makefile", "#{EXT_DIR}/*.{c,h,rb}"] do64 file EXT_DL => FileList["#{EXT_DIR}/Makefile", "#{EXT_DIR}/*.{c,h,rb}"] do 67 65 cd EXT_DIR do 68 66 cd 'ragel_parsers' do … … 95 93 end 96 94 97 file "#{EXT_DIR}/polyglots.c" => FileList["#{EXT_DIR}/*.rb", "#{EXT_DIR}/glots/*.rb"] do98 cd EXT_DIR do99 ruby 'generator.rb'100 end101 end102 103 95 Rake::RDocTask.new do |rdoc| 104 96 rdoc.rdoc_dir = 'doc' ext/ohcount_native/common.h
r069dddc radfc63c 13 13 Limits 14 14 *******************************************/ 15 // The Parser's CompiledState Stack16 #define MAX_CS_STACK 2017 15 // Parser's Maximum number of LanguageBreakdowns it can return 18 16 #define MAX_LANGUAGE_BREAKDOWN_SIZE 8 19 // How large can a CompiledState's regex term be?20 #define MAX_REGEX 20021 // CompiledState's number of transitions22 #define MAX_TRANSITIONS 1023 17 // The longest a language name can be 24 18 #define MAX_LANGUAGE_NAME 20 … … 30 24 #include <stdio.h> 31 25 #include <string.h> 32 #include <pcre.h>33 #include "transition.h"34 #include "state.h"35 #include "compiled_state.h"36 #include "polyglot.h"37 #include "polyglots.h"38 26 #include "language_breakdown.h" 39 #include "parser.h"40 27 #include "ragel_parser.h" 41 42 /*******************************************43 Error Handling44 *******************************************/45 void die(char *err, int exit_code);46 47 enum EXIT_CODES {48 ERR_PCRE_OUT_OF_MEMORY = 15,49 ERR_PCRE_GENERIC,50 ERR_UNKNOWN_SEMANTIC51 };52 53 28 54 29 /******************************************* ext/ohcount_native/extconf.rb
rb4f9575 radfc63c 3 3 4 4 dir_config('ohcount_native') 5 have_library('pcre','pcre_compile')6 5 7 6 # FLAGS: enable logging (or not) ext/ohcount_native/ragel_parser.h
r069dddc radfc63c 1 // ragel_parser.h written by Mitchell Foral. mitchell<att>caladbolg<dott>net. 2 3 /** 4 * Each language (html, css, etc.) is represented in its own language_breakdown. 5 */ 6 typedef struct { 7 LanguageBreakdown language_breakdowns[MAX_LANGUAGE_BREAKDOWN_SIZE]; 8 int language_breakdown_count; 9 } ParseResult; 10 11 12 /** 13 * Fills out the ParseResult with the result of parsing the buffer with the specific Language. 14 */ 1 15 int ragel_parser_parse(ParseResult *pr, int count, char *buf, int buf_len, char *lang); ext/ohcount_native/ragel_parser_macros.h
r5b7fcf9 radfc63c 1 // ragel_parser_macros.h written by Mitchell Foral. mitchell<att>caladbolg<dott>net 2 1 3 #ifndef RAGEL_PARSER_MACROS 2 4 #define RAGEL_PARSER_MACROS ext/ohcount_native/ruby_binding.c
r069dddc radfc63c 122 122 */ 123 123 static VALUE _ohcount_parse(VALUE self, VALUE buffer, VALUE polyglot_name_value) { 124 125 // find the polyglot to parse with 124 ParseResult pr; 125 126 126 char *polyglot_name = RSTRING(polyglot_name_value)->ptr; 127 int i_polyglot; 128 for (i_polyglot = 0; POLYGLOTS[i_polyglot] != NULL; i_polyglot++) { 129 if (strcmp(POLYGLOTS[i_polyglot]->name, polyglot_name) == 0) { 130 Polyglot *polyglot = POLYGLOTS[i_polyglot]; 131 132 ParseResult pr; 133 parser_parse(&pr, RSTRING(buffer)->ptr, RSTRING(buffer)->len, polyglot); 134 135 // create array we'll return all the language_breakdowns in 136 VALUE ary = rb_ary_new2(pr.language_breakdown_count); 137 138 int i_pr; 139 for(i_pr = 0; i_pr < pr.language_breakdown_count; i_pr++) { 140 LanguageBreakdown *lb = (LanguageBreakdown *) malloc(sizeof(LanguageBreakdown)); 141 LanguageBreakdown *src_lb = &(pr.language_breakdowns[i_pr]); 142 strcpy(lb->name,src_lb->name); 143 lb->code = src_lb->code; 144 lb->comment = src_lb->comment; 145 lb->blank_count = src_lb->blank_count; 146 rb_ary_store(ary, i_pr, Data_Wrap_Struct(rb_class_language_breakdown, 0, _language_breakdown_free, lb)); 147 } 148 149 return ary; 127 if (ragel_parser_parse(&pr, 1, RSTRING(buffer)->ptr, RSTRING(buffer)->len, polyglot_name)) { 128 // create array we'll return all the language_breakdowns in 129 VALUE ary = rb_ary_new2(pr.language_breakdown_count); 130 131 int i_pr; 132 for(i_pr = 0; i_pr < pr.language_breakdown_count; i_pr++) { 133 LanguageBreakdown *lb = (LanguageBreakdown *) malloc(sizeof(LanguageBreakdown)); 134 LanguageBreakdown *src_lb = &(pr.language_breakdowns[i_pr]); 135 strcpy(lb->name,src_lb->name); 136 lb->code = src_lb->code; 137 lb->comment = src_lb->comment; 138 lb->blank_count = src_lb->blank_count; 139 rb_ary_store(ary, i_pr, Data_Wrap_Struct(rb_class_language_breakdown, 0, _language_breakdown_free, lb)); 150 140 } 141 142 return ary; 151 143 } 152 144 rb_raise(rb_eStandardError,"Polyglot name invalid: '%s'", polyglot_name); 153 145 return Qnil; 154 }155 156 157 static VALUE _ohcount_polyglots(VALUE self) {158 159 // how many are they?160 int poly_count = 0;161 Polyglot **p = POLYGLOTS;162 while ((*p++) != NULL) {163 poly_count++;164 }165 166 // create the array167 VALUE ary = rb_ary_new2(poly_count);168 169 // fill it in170 int i_poly;171 for (i_poly = 0; POLYGLOTS[i_poly] != NULL; i_poly++) {172 VALUE poly_name = rb_str_new2(POLYGLOTS[i_poly]->name);173 rb_ary_store(ary, i_poly, poly_name);174 }175 176 return ary;177 146 } 178 147 … … 219 188 rb_define_module_function(rb_module_ohcount, "parse", _ohcount_parse, 2); 220 189 rb_define_module_function(rb_module_ohcount, "parse_entities", _ohcount_parse_entities, 2); 221 rb_define_module_function(rb_module_ohcount, "polyglots", _ohcount_polyglots, 0);222 190 223 191 // define language_breakdown