Coding Domain

Perl Programming: Samples


The program
Now at school, I'm learing Java. It's a nice structural/robust language, but I still prefer the ease of the Perl syntax. For our reports, we also need to include something called a class diagram. In other words, an overview of all the classes and methods (the headers only).

This program examines all *.java files in a directory, and creates a classdiagram.txt file there. That file holds a nice formatted overview of all files, and classes. Having multiple classes in one file is supported! Note that Perl was ment as Practical Extraction and Report Language in the old days. Now it's just Perl.

Just for the die-hard Java programmers: try re-writing this is Java!!

classdiagram.pl
#!/usr/bin/perl -w    ## UNIX: Put path to perl here.
#
#
# classdiagram.pl
#
# Generate a text-classdiagram for Java programs
# located in the directory specified.
#
#
#   Copyright (c) 2001 - Diederik van der Boor
#   All rights reserved.
#

# Using this program
#
# o Run it using perl:
#    perl classdiagram.pl
#
# o Extra tips:
#   - Windows:
#       x Convert the program into an .exe file
#         using perl2exe from http://www.indigostar.com
#       x Or run it with perl through a MS-DOS Batch file.
#         You might need to download perl from http://www.perl.com first.
#
#   - UNIX
#       x Remove the extension of this file (optional)
#       x execute this shell command: chmod +x <<the file>>
#       x move the file to a directory accesable from every path
#       x run the file as if it's a command:
#         classdiagram /home/vdboor/java/helloworld/
#




################################################
# Load modules

use strict;
use Fcntl qw(:flock);


################################################
# Get Input from the SHELL or STDIN

my $dir = $ARGV[0];
until(defined $dir && $dir ne "")
{
  print "Geef directory: ";
  chomp($dir = <STDIN>);
}



################################################
# Read in the files

opendir(DH, $dir) or die "Can't open $dir: $!\n";
my @files = grep( /\.java/, sort readdir DH);
closedir(DH);

if(@files == 0)
{
  print "No java files found in $dir!\n";
  exit;
}



# Remove a trailing path separator when present
$dir = $1 if $dir =~ m[(.+)[/\\]$];



################################################
# Building up the regexps in a readable way.

my $begin_line         = '^';
my $end_line           = '$';
my $begin_capture      = '(';
my $end_capture        = ')';
my $optional_space     = '\s*';
my $required_space     = '\s+';
my $data_type          = '([a-zA-Z0-9_]+)';
my $identifier         = '([a-zA-Z0-9_]+)';
my $instance_begin     = '(private|protected|public)';
my $instance_end       = '(=.+|;)\s*' . $end_line;
my $method_begin       = '(protected|public)';
my $method_extra       = '((final|static|native|synchronized)\s+)*';
my $method_params      = '\(.*?\)';
my $class_begin        = '(public\s+)?class';
my $class_extends      = 'extends\s+'.$identifier;
my $class_implements   = 'implements\s+'.$identifier.'(\s*,\s*'.$identifier.')*';
my $class_extra        = '(\s+('.$class_extends.'|'.$class_implements.'))*';
my $constructor_begin  = 'public';
my $constructor_params = '\(.*?\)';


# Optimize regexps:
s{(^|[^\\])\(}  # Replace non-capturing (...)
 {$1(?:}g       # with real non-captuing(?:...)
 foreach        # foreach of the following variables:
 (
   $begin_line,    $end_line,         $optional_space, $required_space,
   $data_type,     $identifier,       $instance_begin, $instance_end,
   $method_begin,  $method_extra,     $method_params,  $class_begin,
   $class_extends, $class_implements, $class_extra,    $constructor_begin,
   $constructor_params
   # Don't include $begin_capture and $end_capture here!
 );


my $instance_regexp    = $begin_line          . $optional_space
                        . $begin_capture
                         . $instance_begin    . $required_space
                         . $data_type         . $required_space
                         . $identifier        . $optional_space
                       . $end_capture
                       . $instance_end;

my $method_regexp      = $begin_line          . $optional_space
                       . $begin_capture
                         . $method_begin      . $required_space
                         . $method_extra
                         . $data_type         . $required_space
                         . $identifier        . $optional_space
                         . $method_params
                       . $end_capture;

my $class_regexp       = $begin_line          . $optional_space
                       . $class_begin         . $required_space
                       . $begin_capture
                         . $begin_capture
                           . $identifier
                         . $end_capture
                         . $class_extra
                       . $end_capture;

my $constructor_regexp = $begin_line          . $optional_space
                       . $begin_capture
                         . $constructor_begin . $required_space
                         . $identifier        . $optional_space
                         . $constructor_params
                       . $end_capture;




################################################
# Open the result file

open(RES, ">$dir/classdiagram.txt") or die "Can't create classdiagram.txt: $!\n";
flock(RES, LOCK_EX);





################################################
# Parse all the files

foreach my $file (@files) {


  my %classes;  # works like a HashMap in Java

  # Declare a data structure, using a hash containing
  # another hash that contains array references as key.
  # This hash holds all data from one file, and prints
  # it formatted when the file is fully parsed.
  #
  # Here is a sample:
  #
  #  %classes = (
  #               'Apl' => {
  #                          'fullinfo'     => 'Apl',
  #                          'constructors' => [],
  #                          'instance'     => [],
  #                          'methods'      => [
  #                                              'public static void main(String[] args)'
  #                                            ]
  #                        };
  #             );
  #
  # We use this structure, since one .java file might hold several classes

  my $inclass = 0;

  open(FH, "$dir/$file") or die "Can't open $file: $!\n";
  flock(FH, LOCK_SH);
  {
    line:while(my $line = <FH>)
    {
      if($inclass)
      {
        # Only if we're already in a class ... { } block
        if($line =~ m/$instance_regexp/)
        {
          # Instance variable
          push @{$classes{$class}{'instance'}}, $1;
          next line;
        }
        elsif($line =~ m/$method_regexp/)
        {
          # Method
          push @{$classes{$class}{'methods'}}, $1;
          next line;
        }
        elsif($line =~ m/$constructor_regexp/)
        {
          # Object Constructor
          push @{$classes{$class}{'constructors'}}, $1;
          next line;
        }
      }

      if($line =~ m/$class_regexp/)
      {
        # This is the begin of a class ... { .. } block
        $inclass = 1;
        $class = $2;
        $classes{$class}                 = {};
        $classes{$class}{'fullinfo'}     = $1;
        $classes{$class}{'instance'}     = [];
        $classes{$class}{'methods'}      = [];
        $classes{$class}{'constructors'} = [];
      }
    }
  }
  close(FH);






  # Now we have parsed the entire file,
  # and we print it into the result file
  # in a nice formatted way.

  print RES "$file\n";
  print RES "\n";

  foreach my $class (keys %classes)
  {
    my $fullinfo     = $classes{$class}{'fullinfo'};
    my $constructors = $classes{$class}{'constructors'};
    my $instance     = $classes{$class}{'instance'};
    my $methods      = $classes{$class}{'methods'};

    print RES "  class $fullinfo\n";

    if(@$constructors > 0)
    {
      print RES "  - Constructor:\n";
      print RES map { "\t$_\n" } sort @$constructors;
      print RES "\n";
    }

    if(@$instance > 0)
    {
      print RES "  - Instance Variables:\n";
      print RES map { "\t$_\n" } sort @$instance;
      print RES "\n";
    }

    if(@$methods > 0)
    {
      print RES "  - Methods:\n";
      print RES map { "\t$_\n" } sort @$methods;
      print RES "\n";
    }

    print RES "\n";
  }
  print RES "\n";
}




################################################
# Done

close(RES);
print "Class diagram saved as $dir/classdiagram.txt\n";

Written by Diederik van der Boor at 26 February 2002