Changeset 069dddc59c195676361a8943bc9c92c2cb26c09a

Show
Ignore:
Timestamp:
06/12/2008 10:36:43 AM (2 months ago)
Author:
mitchell <mitchell@frost.(none)>
git-committer:
mitchell <mitchell@frost.(none)> 1213292203 -0400
git-parent:

[01610779fcf24388e97f8de5cc385b8ffdbcd1d1]

git-author:
mitchell <mitchell@frost.(none)> 1213292203 -0400
Message:

Integrated entity parsing into Ohcount.
See the documentation for Ohcount::parse_entities.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • bin/ohcount

    r2baa202 r069dddc  
    7171  end 
    7272 
     73  # Entities 
     74  # TODO: do more fun stuff rather than print entities and positions 
     75  def entities 
     76    files.each do |file| 
     77      sfc = Ohcount::SimpleFileContext.new(file, files) 
     78      polyglot = Ohcount::Detector.detect(sfc) 
     79      if polyglot 
     80        Ohcount::parse_entities(sfc.contents, polyglot) do |language, entity, s, e| 
     81          puts "#{language}\t#{entity}\t#{s}\t#{e}" 
     82        end 
     83      end 
     84    end 
     85  end 
     86 
    7387  def help 
    7488    puts <<HELP 
     
    8195   -a, --annotate 
    8296   -d, --detect 
     97   -e, --entities 
    8398   -h, --help 
    8499   -s, --summary 
     
    96111   For each source code file found, the file name will be emitted to 
    97112   stdout prefixed with a tab-delimited language name. 
     113 
     114-e, --entities 
     115 
     116   For now, simply prints the positions of entities parsed in all 
     117   source files found within the given paths. 
    98118 
    99119-h, --help                      Display this message 
     
    245265    when '-i', '--individual' 
    246266      self.subcommand = :individual 
     267    when '-e', '--entities' 
     268      self.subcommand = :entities 
    247269    when '-?', '-h', '--help' 
    248270      self.subcommand = :help 
  • ext/ohcount_native/common.h

    rb4f9575 r069dddc  
    3838#include "language_breakdown.h" 
    3939#include "parser.h" 
     40#include "ragel_parser.h" 
    4041 
    4142/******************************************* 
  • ext/ohcount_native/parser.c

    r80275ce r069dddc  
    1111#include "ruby.h" 
    1212#include "common.h" 
    13 #include "ragel_parser.h" 
    1413 
    1514 
     
    466465#endif 
    467466 
    468   if (ragel_parser_parse(pr, buffer, buffer_len, polyglot->name)) 
     467  if (ragel_parser_parse(pr, 1, buffer, buffer_len, polyglot->name)) 
    469468    return; 
    470469 
  • ext/ohcount_native/ragel_parser.c

    r678401c r069dddc  
    137137}; 
    138138 
    139 /* Returns a language_breakdown for a given language name. */ 
     139/** Returns a language_breakdown for a given language name. */ 
    140140LanguageBreakdown *get_language_breakdown(char *name) { 
    141141  int i; 
     
    150150} 
    151151 
    152 /* Yields a line's language, semantic, and text to an optional Ruby block. */ 
     152/** Yields a line's language, semantic, and text to an optional Ruby block. */ 
    153153void ragel_parse_yield_line(const char *lang, const char *entity, int s, int e) { 
    154154  if (rb_block_given_p()) { 
     
    167167} 
    168168 
    169 /* Callback function called for every entity in the source file discovered. 
     169/** Yields an entity's language, id, start, and end position to a required Ruby block */ 
     170void ragel_parse_yield_entity(const char *lang, const char *entity, int s, int e) { 
     171  if (rb_block_given_p()) { 
     172    VALUE ary; 
     173    ary = rb_ary_new2(3); 
     174    rb_ary_store(ary, 0, ID2SYM(rb_intern(lang))); 
     175    rb_ary_store(ary, 1, ID2SYM(rb_intern(entity))); 
     176    rb_ary_store(ary, 2, rb_int_new(s)); 
     177    rb_ary_store(ary, 3, rb_int_new(e)); 
     178    rb_yield(ary); 
     179  } 
     180
     181 
     182/** 
     183 * Callback function called for every entity in the source file discovered. 
    170184 * 
    171185 * Entities are defined in the parser and are things like comments, strings, 
    172186 * keywords, etc. 
    173187 * This callback yields for a Ruby block if necessary: 
    174  *   |language, semantic, line| 
     188 *   |language, semantic, line| for line counting 
     189 *   |language, entity, s, e| for entity parsing 
    175190 * @param *lang The language associated with the entity. 
    176191 * @param *entity The entity discovered. There are 3 additional entities used 
     
    193208    lb->blank_count++; 
    194209    ragel_parse_yield_line(lang, entity, s, e); 
     210  } else { 
     211    ragel_parse_yield_entity(lang, entity, s, e); 
    195212  } 
    196213} 
    197214 
    198 /* Tries to use an existing Ragel parser for the given language. 
     215/** 
     216 * Tries to use an existing Ragel parser for the given language. 
    199217 * 
    200218 * @param *parse_result An allocated, empty ParseResult to hold parse results. 
     219 * @param count An integer flag indicating whether to count lines or parse 
     220 *   entities. 
    201221 * @param *buffer A pointer to the buffer or character in the buffer to start 
    202222 *   parsing at. 
     
    205225 * @return 1 if a Ragel parser is found, 0 otherwise. 
    206226 */ 
    207 int ragel_parser_parse(ParseResult *parse_result, 
     227int ragel_parser_parse(ParseResult *parse_result, int count, 
    208228                       char *buffer, int buffer_len, char *lang) { 
    209229  pr = parse_result; 
     
    214234  for (i = 0; strlen(languages[i].name) != 0; i++) 
    215235    if (strcmp(languages[i].name, lang) == 0) { 
    216       languages[i].parser(buffer, buffer_len, 1, ragel_parser_callback); 
     236      languages[i].parser(buffer, buffer_len, count, ragel_parser_callback); 
    217237      return 1; 
    218238    } 
  • ext/ohcount_native/ragel_parser.h

    r80275ce r069dddc  
    1 int ragel_parser_parse(ParseResult *pr, char *buf, int buf_len, char *lang); 
     1int ragel_parser_parse(ParseResult *pr, int count, char *buf, int buf_len, char *lang); 
  • ext/ohcount_native/ruby_binding.c

    r83e0b71 r069dddc  
    178178 
    179179 
     180/** 
     181 * Parses a source file's entities (if available). 
     182 * An entity is each comment, string, number, keyword, etc. that occurs in a 
     183 * source file. 
     184 * 
     185 * You must pass a Ruby block to this function of the form: 
     186 *   |language, entity, s, e| where: 
     187 *     language is the language's name (symbol) e.g. ruby. 
     188 *     entity is the entity's name (symbol) e.g. comment. 
     189 *     s is the entity's start position in the buffer (number). 
     190 *     e is the entity's end position in the buffer non-inclusive (number). 
     191 * If an entity parser is not available for the given language, the block will 
     192 * never be called. There is currently no way to dynamically test if a language 
     193 * has an entity parser. 
     194 * 
     195 * @param buffer The buffer to parse. 
     196 * @param language String language name to parse the buffer as. If you are 
     197 *   unsure which language name is correct, use Ohcount::Detector.detect(file). 
     198 * @return nil 
     199 * 
     200 * @usage 
     201 * 
     202 *   # Print each entity and its position in the buffer 
     203 *   buffer = File.read("helloworld.c") 
     204 *   Ohcount::parse_entities(buffer, 'c') do |lang, entity, s, e| 
     205 *     puts "#{lang}\t#{entity}\t#{s}\t#{e}" 
     206 *   end 
     207 */ 
     208static VALUE _ohcount_parse_entities(VALUE self, VALUE buffer, VALUE polyglot_name_value) { 
     209  char *polyglot_name = RSTRING(polyglot_name_value)->ptr; 
     210  ParseResult pr; 
     211  if (!ragel_parser_parse(&pr, 0, RSTRING(buffer)->ptr, RSTRING(buffer)->len, polyglot_name)) 
     212    rb_raise(rb_eStandardError,"Polyglot name invalid: '%s'", polyglot_name); 
     213  return Qnil; 
     214} 
     215 
     216 
    180217void Init_ohcount_native () { 
    181218  rb_module_ohcount = rb_define_module("Ohcount"); 
    182219  rb_define_module_function(rb_module_ohcount, "parse", _ohcount_parse, 2); 
     220  rb_define_module_function(rb_module_ohcount, "parse_entities", _ohcount_parse_entities, 2); 
    183221  rb_define_module_function(rb_module_ohcount, "polyglots", _ohcount_polyglots, 0); 
    184222