Changeset 629f9cab359caeb39a81c40233f3369d904a7629
- Timestamp:
- 03/14/2008 02:24:19 PM
(10 months ago)
- Author:
- Robin Luckey <robin@Tangier.local>
- git-committer:
- Robin Luckey <robin@Tangier.local> 1205529859 -0700
- git-parent:
[b5d153b1240eb6c91024d163c8a4cc368e7eaf6f]
- git-author:
- ciaranm <robin@Tangier.local> 1205331170 -0700
- Message:
split cncpp into c and cpp
-
Files:
-
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
| rac2c8db |
r629f9ca |
|
| 27 | 27 | boo = PythonMonoglot.new("boo") |
|---|
| 28 | 28 | clearsilver = CMonoglot.new("clearsilver", '#', nil, true, true) |
|---|
| 29 | | cncpp = CMonoglot.new("cncpp", '//', [e('/*'), e('*/')], true, false) |
|---|
| | 29 | c = CMonoglot.new("c", '//', [e('/*'), e('*/')], true, false) |
|---|
| | 30 | cpp = CMonoglot.new("cpp", '//', [e('/*'), e('*/')], true, false) |
|---|
| 30 | 31 | csharp = CMonoglot.new("csharp", '//', [e('/*'), e('*/')], true, false) |
|---|
| 31 | 32 | css = CMonoglot.new("css", nil, [e('/*'), e('*/')], false, false) |
|---|
| … | … | |
| 73 | 74 | boo , |
|---|
| 74 | 75 | clearsilver , |
|---|
| 75 | | cncpp , |
|---|
| | 76 | c , |
|---|
| | 77 | cpp , |
|---|
| 76 | 78 | csharp , |
|---|
| 77 | 79 | css , |
|---|
| r101d05d |
r629f9ca |
|
| 105 | 105 | * # Print each line to the console, labeled as code or comments |
|---|
| 106 | 106 | * buffer = File.read("helloworld.c") |
|---|
| 107 | | * results = Ohcount::parse(buffer, 'cncpp') do |language, semantic, line| |
|---|
| | 107 | * results = Ohcount::parse(buffer, 'c') do |language, semantic, line| |
|---|
| 108 | 108 | * puts "#{semantic.to_s} #{line}" |
|---|
| 109 | 109 | * end |
|---|
| … | … | |
| 113 | 113 | * # Print total lines of code |
|---|
| 114 | 114 | * buffer = File.read("helloworld.c") |
|---|
| 115 | | * results = Ohcount::parse(buffer, 'cncpp') |
|---|
| | 115 | * results = Ohcount::parse(buffer, 'c') |
|---|
| 116 | 116 | * results.each do |result| |
|---|
| 117 | 117 | * puts "Lines of #{result.name} code: #{ result.code.split("\n").size }" |
|---|
| rb5d153b |
r629f9ca |
|
| 36 | 36 | # Example: |
|---|
| 37 | 37 | # |
|---|
| 38 | | # # List all C/C++ files in the 'src' directory |
|---|
| | 38 | # # List all C files in the 'src' directory |
|---|
| 39 | 39 | # Dir.entries("src").each do |file| |
|---|
| 40 | 40 | # context = Ohcount::SimpleFileContext.new(file) |
|---|
| 41 | 41 | # polyglot = Ohcount::Detector.detect(context) |
|---|
| 42 | | # puts "#{file}" if polyglot == 'cncpp' |
|---|
| | 42 | # puts "#{file}" if polyglot == 'c' |
|---|
| 43 | 43 | # end |
|---|
| 44 | 44 | # |
|---|
| 45 | 45 | def self.detect(file_context) |
|---|
| 46 | 46 | # start with extension |
|---|
| 47 | | polyglot = EXTENSION_MAP[File.extname(file_context.filename).downcase] |
|---|
| | 47 | polyglot = EXTENSION_MAP[File.extname(file_context.filename)] |
|---|
| | 48 | polyglot = EXTENSION_MAP[File.extname(file_context.filename).downcase] unless polyglot |
|---|
| 48 | 49 | case polyglot |
|---|
| 49 | 50 | when String |
|---|
| … | … | |
| 104 | 105 | '.bat' => "bat", |
|---|
| 105 | 106 | '.boo' => "boo", |
|---|
| 106 | | '.c' => "cncpp", |
|---|
| 107 | | '.cc' => "cncpp", |
|---|
| 108 | | '.cpp' => "cncpp", |
|---|
| | 107 | '.c' => "c", |
|---|
| | 108 | '.C' => "cpp", |
|---|
| | 109 | '.cc' => "cpp", |
|---|
| | 110 | '.cpp' => "cpp", |
|---|
| 109 | 111 | '.css' => "css", |
|---|
| 110 | | '.c++' => "cncpp", |
|---|
| 111 | | '.cxx' => "cncpp", |
|---|
| | 112 | '.c++' => "cpp", |
|---|
| | 113 | '.cxx' => "cpp", |
|---|
| 112 | 114 | '.el' => "emacslisp", |
|---|
| 113 | 115 | # '.cbl' => "cobol", |
|---|
| … | … | |
| 130 | 132 | '.groovy'=> "groovy", |
|---|
| 131 | 133 | '.h' => :disambiguate_h_header, |
|---|
| 132 | | '.hpp' => "cncpp", |
|---|
| 133 | | '.h++' => "cncpp", |
|---|
| | 134 | '.H' => "cpp", |
|---|
| | 135 | '.hpp' => "cpp", |
|---|
| | 136 | '.h++' => "cpp", |
|---|
| 134 | 137 | '.hs' => "haskell", |
|---|
| 135 | | '.hxx' => "cncpp", |
|---|
| 136 | | '.hh' => "cncpp", |
|---|
| | 138 | '.hxx' => "cpp", |
|---|
| | 139 | '.hh' => "cpp", |
|---|
| 137 | 140 | '.hrl' => "erlang", |
|---|
| 138 | 141 | '.htm' => "html", |
|---|
| … | … | |
| 218 | 221 | end |
|---|
| 219 | 222 | |
|---|
| 220 | | # For *.h files, differentiates C/C++ from Objective-C. |
|---|
| | 223 | # For *.h files, differentiates C, C++ and Objective-C. |
|---|
| 221 | 224 | # |
|---|
| 222 | 225 | # This is done with a weighted heuristic that |
|---|
| 223 | 226 | # scans the *.h file contents for Objective-C keywords, |
|---|
| 224 | | # and also checks for the presence of matching *.m files. |
|---|
| | 227 | # C++ keywords and C++ headers, and also checks for the |
|---|
| | 228 | # presence of matching *.m files. |
|---|
| 225 | 229 | def self.disambiguate_h_header(file_context) |
|---|
| 226 | 230 | buffer = file_context.contents |
|---|
| … | … | |
| 234 | 238 | file_context.filenames.contains_m = file_context.filenames.select { |a| a =~ /\.m$/ }.any? |
|---|
| 235 | 239 | end |
|---|
| 236 | | return 'cncpp' unless file_context.filenames.contains_m |
|---|
| | 240 | return disambiguate_c_cpp(buffer) unless file_context.filenames.contains_m |
|---|
| 237 | 241 | |
|---|
| 238 | 242 | # if the dir contains a matching *.m file, likely objective_c |
|---|
| … | … | |
| 246 | 250 | objective_c += lines_matching(buffer, objective_c_signatures) |
|---|
| 247 | 251 | |
|---|
| 248 | | return objective_c > 1 ? 'objective_c' : 'cncpp' |
|---|
| | 252 | return objective_c > 1 ? 'objective_c' : disambiguate_c_cpp(buffer) |
|---|
| | 253 | end |
|---|
| | 254 | |
|---|
| | 255 | # A map of headers that indicate C++, but that do not have C++-specific file |
|---|
| | 256 | # extensions. This list is made from the Standard, plus Technical Report 1. |
|---|
| | 257 | CPP_HEADERS_MAP = %w[ |
|---|
| | 258 | algorithm |
|---|
| | 259 | array |
|---|
| | 260 | bitset |
|---|
| | 261 | cassert |
|---|
| | 262 | ccomplex |
|---|
| | 263 | cctype |
|---|
| | 264 | cerrno |
|---|
| | 265 | cfenv |
|---|
| | 266 | cfloat |
|---|
| | 267 | cinttypes |
|---|
| | 268 | ciso646 |
|---|
| | 269 | climits |
|---|
| | 270 | clocale |
|---|
| | 271 | cmath |
|---|
| | 272 | csetjmp |
|---|
| | 273 | csignal |
|---|
| | 274 | cstdarg |
|---|
| | 275 | cstdbool |
|---|
| | 276 | cstddef |
|---|
| | 277 | cstdint |
|---|
| | 278 | cstdio |
|---|
| | 279 | cstdlib |
|---|
| | 280 | cstring |
|---|
| | 281 | ctgmath |
|---|
| | 282 | ctime |
|---|
| | 283 | cwchar |
|---|
| | 284 | cwctype |
|---|
| | 285 | deque |
|---|
| | 286 | exception |
|---|
| | 287 | fstream |
|---|
| | 288 | functional |
|---|
| | 289 | iomanip |
|---|
| | 290 | ios |
|---|
| | 291 | iosfwd |
|---|
| | 292 | iostream |
|---|
| | 293 | istream |
|---|
| | 294 | iterator |
|---|
| | 295 | limits |
|---|
| | 296 | list |
|---|
| | 297 | locale |
|---|
| | 298 | map |
|---|
| | 299 | memory |
|---|
| | 300 | new |
|---|
| | 301 | numeric |
|---|
| | 302 | ostream |
|---|
| | 303 | queue |
|---|
| | 304 | random |
|---|
| | 305 | regex |
|---|
| | 306 | set |
|---|
| | 307 | sstream |
|---|
| | 308 | stack |
|---|
| | 309 | stdexcept |
|---|
| | 310 | streambuf |
|---|
| | 311 | string |
|---|
| | 312 | system_error |
|---|
| | 313 | tuple |
|---|
| | 314 | type_traits |
|---|
| | 315 | typeinfo |
|---|
| | 316 | unordered_map |
|---|
| | 317 | unordered_set |
|---|
| | 318 | utility |
|---|
| | 319 | valarray |
|---|
| | 320 | vector |
|---|
| | 321 | tr1/array |
|---|
| | 322 | tr1/ccomplex |
|---|
| | 323 | tr1/cctype |
|---|
| | 324 | tr1/cfenv |
|---|
| | 325 | tr1/cfloat |
|---|
| | 326 | tr1/cinttypes |
|---|
| | 327 | tr1/climits |
|---|
| | 328 | tr1/cmath |
|---|
| | 329 | tr1/complex |
|---|
| | 330 | tr1/cstdarg |
|---|
| | 331 | tr1/cstdbool |
|---|
| | 332 | tr1/cstdint |
|---|
| | 333 | tr1/cstdio |
|---|
| | 334 | tr1/cstdlib |
|---|
| | 335 | tr1/ctgmath |
|---|
| | 336 | tr1/ctime |
|---|
| | 337 | tr1/cwchar |
|---|
| | 338 | tr1/cwctype |
|---|
| | 339 | tr1/memory |
|---|
| | 340 | tr1/random |
|---|
| | 341 | tr1/regex |
|---|
| | 342 | tr1/tuple |
|---|
| | 343 | tr1/type_traits |
|---|
| | 344 | tr1/unordered_map |
|---|
| | 345 | tr1/unordered_set |
|---|
| | 346 | tr1/utility |
|---|
| | 347 | ].inject({}) { | h, k | h[k] = true ; h } |
|---|
| | 348 | |
|---|
| | 349 | # A map of keywords that indicate C++. |
|---|
| | 350 | CPP_KEYWORDS_MAP = %w[ |
|---|
| | 351 | template |
|---|
| | 352 | typename |
|---|
| | 353 | class |
|---|
| | 354 | namespace |
|---|
| | 355 | ].inject({}) { | h, k | h[k] = true ; h } |
|---|
| | 356 | |
|---|
| | 357 | # For *.h files that we know aren't Objective-C, differentiates C and C++. |
|---|
| | 358 | # |
|---|
| | 359 | # This is done with a weighted heuristic that |
|---|
| | 360 | # scans the *.h file contents for C++ keywords and C++ headers. |
|---|
| | 361 | def self.disambiguate_c_cpp(buffer) |
|---|
| | 362 | # Look for C++ headers |
|---|
| | 363 | return 'cpp' if extract_c_cpp_headers(buffer).detect do | header | |
|---|
| | 364 | EXTENSION_MAP[File.extname(header)] == 'cpp' or CPP_HEADERS_MAP.include? header |
|---|
| | 365 | end |
|---|
| | 366 | |
|---|
| | 367 | # Look for C++ keywords. This could check for comments, but doesn't. |
|---|
| | 368 | return 'cpp' if buffer.find do | line | |
|---|
| | 369 | line.split(/\W/).find do | word | |
|---|
| | 370 | CPP_KEYWORDS_MAP.include? word |
|---|
| | 371 | end |
|---|
| | 372 | end |
|---|
| | 373 | |
|---|
| | 374 | # Nothing to suggest C++ |
|---|
| | 375 | 'c' |
|---|
| | 376 | end |
|---|
| | 377 | |
|---|
| | 378 | # Return a list of files included in a C or C++ source file. |
|---|
| | 379 | def self.extract_c_cpp_headers(buffer) |
|---|
| | 380 | buffer.map do | line | |
|---|
| | 381 | m = line.match(/^#\s*include\s+[<"](.*)[>"]/) and m[1] |
|---|
| | 382 | end.find_all { | a | a } |
|---|
| 249 | 383 | end |
|---|
| 250 | 384 | |
|---|
| r733e1e6 |
r629f9ca |
|
| 45 | 45 | 'bat' => {:nice_name => 'DOS batch script' , :category => 0}, |
|---|
| 46 | 46 | 'boo' => {:nice_name => 'Boo' , :category => 0}, |
|---|
| 47 | | 'cncpp' => {:nice_name => 'C/C++' , :category => 0}, |
|---|
| | 47 | 'c' => {:nice_name => 'C' , :category => 0}, |
|---|
| | 48 | 'cpp' => {:nice_name => 'C++' , :category => 0}, |
|---|
| 48 | 49 | 'clearsilver' => {:nice_name => 'ClearSilver' , :category => 0}, |
|---|
| 49 | 50 | 'csharp' => {:nice_name => 'C#' , :category => 0}, |
|---|
| r101d05d |
r629f9ca |
|
| 4 | 4 | |
|---|
| 5 | 5 | def test_comments |
|---|
| 6 | | lb = [Ohcount::LanguageBreakdown.new("cncpp", "", "//comment", 0)] |
|---|
| 7 | | assert_equal lb, Ohcount::parse(" //comment", "cncpp") |
|---|
| | 6 | lb = [Ohcount::LanguageBreakdown.new("c", "", "//comment", 0)] |
|---|
| | 7 | assert_equal lb, Ohcount::parse(" //comment", "c") |
|---|
| 8 | 8 | end |
|---|
| 9 | 9 | |
|---|
| 10 | 10 | def test_empty_comments |
|---|
| 11 | | lb = [Ohcount::LanguageBreakdown.new("cncpp", "","//\n", 0)] |
|---|
| 12 | | assert_equal lb, Ohcount::parse(" //\n", "cncpp") |
|---|
| | 11 | lb = [Ohcount::LanguageBreakdown.new("c", "","//\n", 0)] |
|---|
| | 12 | assert_equal lb, Ohcount::parse(" //\n", "c") |
|---|
| 13 | 13 | end |
|---|
| 14 | 14 | |
|---|
| 15 | 15 | |
|---|
| 16 | 16 | def test_block_comment |
|---|
| 17 | | lb = [Ohcount::LanguageBreakdown.new("cncpp", "","/*c*/", 0)] |
|---|
| 18 | | assert_equal lb, Ohcount::parse("/*c*/", "cncpp") |
|---|
| | 17 | lb = [Ohcount::LanguageBreakdown.new("c", "","/*c*/", 0)] |
|---|
| | 18 | assert_equal lb, Ohcount::parse("/*c*/", "c") |
|---|
| 19 | 19 | end |
|---|
| 20 | 20 | |
|---|
| rac2c8db |
r629f9ca |
|
| 38 | 38 | |
|---|
| 39 | 39 | def test_detect_polyglot |
|---|
| 40 | | assert_equal "cncpp", do_detect("foo.c") |
|---|
| | 40 | assert_equal "c", do_detect("foo.c") |
|---|
| | 41 | assert_equal "c", do_detect("uses_no_cpp.h") |
|---|
| | 42 | assert_equal "cpp", do_detect("uses_cpp_headers.h") |
|---|
| | 43 | assert_equal "cpp", do_detect("uses_cpp_stdlib_headers.h") |
|---|
| | 44 | assert_equal "cpp", do_detect("uses_cpp_keywords.h") |
|---|
| 41 | 45 | assert_equal "ruby", do_detect("foo.rb") |
|---|
| 42 | 46 | assert_equal "matlab", do_detect("foo_matlab.m", ["foo_matlab.m", "bar.m", "README"]) |
|---|
| … | … | |
| 53 | 57 | |
|---|
| 54 | 58 | def test_upper_case_extensions |
|---|
| 55 | | assert_equal "cncpp", do_detect("foo_upper_case.C") |
|---|
| | 59 | assert_equal "cpp", do_detect("foo_upper_case.C") |
|---|
| 56 | 60 | assert_equal "ruby", do_detect("foo_upper_case.RB") |
|---|
| 57 | 61 | end |
|---|
| r101d05d |
r629f9ca |
|
| 25 | 25 | sloc_infos = Ohcount.diff_files(src_dir + 'diff2_old.c', src_dir + 'diff2_new.c') |
|---|
| 26 | 26 | |
|---|
| 27 | | c = Ohcount::SlocInfo.new('cncpp') |
|---|
| | 27 | c = Ohcount::SlocInfo.new('c') |
|---|
| 28 | 28 | c.code_added, c.code_removed = [1,1] |
|---|
| 29 | 29 | c.comments_added, c.comments_removed = [1,1] |
|---|