Average number of lines per method in the JDK

The JDK itself can help us to count the lines of code, we just need to parse the source and get the method bodies in a String representation–then we can count the lines. A quick statistic from the new Java 8 Stream API will give us the numbers.

Code first:

import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.*;
import javax.tools.JavaCompiler;
import javax.tools.JavaCompiler.CompilationTask;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;
import com.sun.source.tree.CompilationUnitTree;
import com.sun.source.tree.MethodTree;
import com.sun.source.util.JavacTask;
import com.sun.source.util.TreeScanner;

public class AverageNumberOfLinesInJDKFinder {
  public static void main( String[] args ) throws IOException {

    String[] files = findAllJavaSourceFiles( "C:/Program Files/Java/jdk1.8.0/src/" );
    JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();

    try ( StandardJavaFileManager fileManager = compiler.getStandardFileManager( null, null, null ) ) {
      CompilationTask task = compiler.getTask( null, fileManager, null, null, null,
                                               fileManager.getJavaFileObjects( files ) );

      JavacTask javacTask = (JavacTask) task;
      Iterable<? extends CompilationUnitTree> trees = javacTask.parse();

      LineCountingVisitor lineCountingVisitor = new LineCountingVisitor();
      for ( CompilationUnitTree compilationUnitTree : trees )
        compilationUnitTree.accept( lineCountingVisitor, null );

      DoubleSummaryStatistics stats = lineCountingVisitor.numberOfLines.stream().mapToDouble( d -> d ).summaryStatistics();
      System.out.println( stats );

  static String[] findAllJavaSourceFiles( String base ) throws IOException {
    final List<String> result = new ArrayList<String>();

    Files.walkFileTree( Paths.get( base ), new SimpleFileVisitor<Path>() {
      public FileVisitResult visitFile( Path path, BasicFileAttributes attribs ) {
        if ( path.toString().endsWith( ".java" ) )
          result.add( path.toString() );
        return FileVisitResult.CONTINUE;
    } );
    return result.toArray( new String[result.size()] );

class LineCountingVisitor extends TreeScanner<Void, Void> {
  final List<Integer> numberOfLines = new ArrayList<Integer>( 2048 );

  public Void visitMethod( MethodTree node, Void p ) {
    if ( node.getBody() != null ) {
      int lines = new StringTokenizer( node.getBody().toString(), "\n" ).countTokens() - 1 /* { */ - 1 /* } */;
      if ( lines != 0 ) // ignore empty bodies
        numberOfLines.add( lines );
    return super.visitMethod( node, p );


DoubleSummaryStatistics{count=84695, sum=576427,000000, min=1,000000, average=6,805915, max=1716,000000}

Exercise for the readers:

  • Analyse different open source products and post the results here.
  • Generate a SVG with the lines of code.
  • Change the calculation so we not only get the arithmetic mean but also the geometric and harmonic mean.
  • Sort the result by package, are there differences?
  • Recognize the @since Javadoc tag and try to find out if the number of lines change over time.
  • How about the line size?


