René Nyffenegger's collection of things on the web
René Nyffenegger on Oracle - Most wanted - Feedback -
 

Creating an ERD out of SQL create table statements

The following perl script parses a file containing create table statements in SQL and creates an ERD using dot and Parse::RecDescent.
#   create_erd.pl
#
#   Copyright (C) 2004 René Nyffenegger
#
#   This source code is provided 'as-is', without any express or implied
#   warranty. In no event will the authors be held liable for any damages
#   arising from the use of this software.
#
#   Permission is granted to anyone to use this software for any purpose,
#   including commercial applications, and to alter it and redistribute it
#   freely, subject to the following restrictions:
#
#   1. The origin of this source code must not be misrepresented; you must not
#      claim that you wrote the original source code. If you use this source code
#      in a product, an acknowledgment in the product documentation would be
#      appreciated but is not required.
#
#   2. Altered source versions must be plainly marked as such, and must not be
#      misrepresented as being the original source code.
#
#   3. This notice may not be removed or altered from any source distribution.
#
#   René Nyffenegger rene.nyffenegger@adp-gmbh.ch
#
################################################################################

use strict;
use warnings;

use Parse::RecDescent;
use Data::Dumper;

my $out_file_type = 'png';

$::RD_ERRORS = 1; # Make sure the parser dies when it encounters an error
$::RD_WARN   = 1; # Enable warnings. This will warn on unused rules &c.
$::RD_HINT   = 1; # Give out hints to help fix problems.
#$::RD_TRACE  = 1; # Trace of parser

my $create_db_file = $ARGV[0];

open F, $create_db_file or die "could not open $create_db_file";

my @f = <F>;

my $create_table_grammar = q {

  create_table_stmts : create_table_stmt(s?)
   { @item[1..$#item] }

  create_table_stmt  : /create/i /table/i table_name '(' rel_props ')' /;?/
                       { {tab_nam => $item{table_name}, cols => $item{rel_props} } }

  table_name         : identifier
                       {$item[1]}

  rel_props          : columns                                        #relational properties (according Oracle documentation)
                       {$item[1]}

  columns            : column ',' columns
                       { if ($item[1]) { unshift @{$item[3]}, $item[1]} else {print "xxx\n"}; $item[3] }
                     | column
                       { [ $item[1] ] }

  column             : out_of_line_constr
                       { { col_nam=>'constraint' } } 
                     | identifier reference_clause
                       { $return = {col_nam=> $item{identifier}} ;  @{$return}{keys %{$item{reference_clause}}} =  values %{$item{reference_clause}}  } 
                     | identifier data_type primary_key(?) not_null(?) default(?)
                       { {col_nam=>$item{identifier}, type=>$item{data_type}} }

  out_of_line_constr : named_const constraint
                     | constraint

  named_const        : /constraint/i identifier

  constraint         : /check/i           paranthesis
                     | /unique/i          paranthesis
                     | /primary/i /key/i  paranthesis
                     | /foreighn/i /key/i paranthesis

  paranthesis        : '(' in_paranthesis ')'

  in_paranthesis     : ( /[^()]+/ | paranthesis)(s)
                     
  reference_clause   : /references/i identifier not_null(?)
                       { { refd_table=>$item[2] } }
                     | data_type /references/i identifier not_null(?)
                       { { type=>$item[1], refd_table=>$item[3] } }
                     
  default            : /default/i sql_string

  sql_string         : /'([^']|'')*'/

  primary_key        : /primary/i /key/i

  not_null           : /not/i /null/i

  data_type          : dt_ident precision(?)
                       {$item[1]. $item[2][0] || "" } 

  precision          : '(' number ')'
                        {$item[1].$item[2].$item[3]}
                     | '(' number ',' number ')'
                        {$item[1].$item[2].$item[3].$item[4].$item[5]}

  dt_ident           : /number/i         {$item[1]} 
                     | /int +identity/i  {$item[1]} 
                     | /int/i            {$item[1]} 
                     | /decimal/i        {$item[1]} 
                     | /smallint/i       {$item[1]} 
                     | /integer/i        {$item[1]} 
                     | /long raw/i       {$item[1]} 
                     | /long/i           {$item[1]} 
                     | /varchar2/i       {$item[1]} 
                     | /varchar/i        {$item[1]} 
                     | /char/i           {$item[1]} 
                     | /raw/i            {$item[1]} 
                     | /date/i           {$item[1]} 
                     | /smalldatetime/i  {$item[1]} 
                     | /blob/i           {$item[1]} 
                     | /clob/i           {$item[1]} 
                     | /nclob/i          {$item[1]} 
                     | /bit/i            {$item[1]} 


  number             : /\d+/
                      { $item[1] }
                      

  identifier         : m([A-Za-z_]\w*)
                      {$item[1]}
};

my $parser=Parse::RecDescent->new($create_table_grammar);

my $in_comment=0;

my $l_temp;
LINE:
foreach my $l (@f) {
  my $len = length $l;
  $l_temp="";

  my $first_quote =0;  # only set when already $in_comment
  my $asterik     =0;
  my $slash       =0;
  my $in_string   =0;
  my $hyphen      =0;

  for (my $i=0; $i<$len; $i++) {

    my $c = substr($l, $i, 1); 

    if ($in_comment) {
      if ($c eq "*") {
        $asterik     = 1;
      }
      elsif ($c eq "/") {
        if ($asterik) {
          $asterik     = 0;
          $slash       = 0;
          $in_comment  = 0;
        }
      }
      else {
        $asterik = 0;
      }
    }
    else {
      if ($in_string) {
        if ($c eq "'") {

          if ($first_quote) {
            $first_quote = 0;          
            $l_temp .= $c;
          }
          else {
            $first_quote = 1;
            $l_temp .= $c;
          }
        }
      }
      elsif ($c eq "/") {
        if ($slash) {
          $l_temp .= "/";
        }
        else {
          $slash = 1;
        }
      }
      elsif ($c eq "*") {
        if ($slash) {
          $in_comment = 1;
        }
        else {
          $l_temp .= $c;
        }
      }
      elsif ($c eq "-") {
        if ($hyphen) {
          $l_temp .= "\n";
          next LINE;
        }
        else {
          $hyphen = 1;
        }
      }
      elsif ($hyphen) {
        $hyphen = 0;
        $l_temp .= "-$c";
      }
      elsif ($first_quote) {
        $in_string   = 0;
        $first_quote = 0;
        $l_temp .= $c;
      }
      else {
        $l_temp .= $c;
      }
    }
  }
}
continue {
  $l = $l_temp;
}

# print join " ", @f;

my $result = $parser->create_table_stmts(join " ", @f) or die "could'n parse $create_db_file";

# print "result: \n";

#print Dumper($result);

#exit 0;


my %tables;
open DOT, "|dot -T$out_file_type > erd.$out_file_type";
print DOT "digraph erd {\n";

#print DOT 'page="11.69,8.27"' . "\n";
my $id_cnt=0;
my @refs;
foreach my $table (@$result) {

  my $table_name = uc $table->{tab_nam};
  
  print DOT uc $table_name;
  print DOT '[ shape="record" ';
  print DOT ' label = "{';
  print DOT uc $table_name . "|";

  foreach my $col (@{$table->{cols}}) {
    print DOT "|";
    if (exists $col->{refd_table} and uc $col->{refd_table} ne $table_name) {
      my $id_ref = "i" . ++$id_cnt;
      print DOT "<$id_ref> ";
      push @refs, {from_tbl=>$table_name, from_id=>$id_ref, to_tbl=>uc ($col->{refd_table})};
    }
    print DOT $col->{col_nam};
  }

 # print DOT join "|", map {$_->{col_nam}} @{$table->{cols}};
  print DOT '}"';
  print DOT "];\n";
}

foreach my $ref (@refs) {
  print DOT $ref->{from_tbl} . ":" . $ref->{from_id} . " -> " . $ref->{to_tbl} . ";\n";
}

print DOT "}";


It seems as though graphviz crashes with self referencing tables.

Links

Create table syntax for Oracle.