#!/usr/bin/perl
##########################################################################
#
# Minimal Java bytecode assembler
#
# Brad Fitzpatrick, bradfitz@bradfitz.com
# November 1st, 1999
#
# University of Washington, Fall 1999, CSE 378, Section AC
#
##########################################################################
#
# Usage:
#
#     jassempler.pl filename
#
# The resulting byte codes are dumped to STDOUT.  So perhaps a useful
# way to use this would be:
#
#     jassembler.pl filename > mipsjava.dat
#     (or whatever)
#
# Where filename is of the format:
#
#    - any thing after # or a ; on a line is a comment, including the
#      character itself
#
#    - blank lines are ignored
#
#    - labels are any non-whitespace followed immediately by a colon (:) 
#
# The following Java operations are permitted, and with the following
# argument types:
#
#       IADD
#       ISUB
#       IMUL
#       IDIV
#       ILOAD 20            ; an integer index
#       ILOAD 0x14          ; a hex index
#       ISTORE 20           ; an integer index
#       ISTORE 0x14         ; a hex index
#       IALOAD
#       BIPUSH 61           ; an integer
#       BIPUSH 0x3d         ; hex value
#       SIPUSH 65535
#       SIPUSH 0xFFFF
#       DUP
#       DUP2
#       POP
#       IFEQ 0xFFFD
#       IFEQ label
#       IFNE 0xFFFD
#       IFNE label
#       IFLT 0xFFFD
#       IFLT label
#       IFLE 0xFFFD
#       IFLE label
#       IFGT 0xFFFD
#       IFGT label
#       IFGE 0xFFFD
#       IFGE label
#       GOTO 0xFFFD
#       GOTO label
#       IRETURN

$file = $ARGV[0];

$base = $0;
$base =~ s/^.+\///;

unless ($file)
{
    die "Usage: $base <javasm_file>\n";
}

unless (-e $file)
{
    die "Cannot open file: $file\n";
}

%label = ();

# two pass system: 1st pass-- find where labels are at.  2nd pass--- rest.
for ($pass=1; $pass<=2; $pass++)
{
    $bytenum = 0;
    $line = 0;
    @bytes = ();
    
    open (ASM, $file);
    while ($l = <ASM>)
    {
        $line++;
        chomp $l;
        $saveline = $l; $saveline =~ s/^\s+//;
        $l =~ s/[\#\;].+//;
        next unless ($l =~ /\S/);
        
        $l =~ s/^\s+//;
        $l =~ s/\s+$//;
        
        if ($l =~ s/(\S+)://)
        {
	  if ($pass==1 && defined $label{$1})
	  {
	      warn "Redefinition of label \"$1\" on line $line.\n";
	  }
	  $label{$1} = $bytenum;
	  $l =~ s/^\s+//;
        }
        next unless ($l =~ /\S/);
        
        $bytenum++;
        @words = split(/\s+/, $l);
        $w = lc($words[0]);
        
        if ($w eq "iadd") {
	  push @bytes, "0x60";       
        } 
        elsif ($w eq "isub") {
	  push @bytes, "0x64";
        }
        elsif ($w eq "imul") {
	  push @bytes, "0x68";
        }
        elsif ($w eq "idiv") {
	  push @bytes, "0x6b";
        }
        elsif ($w eq "iload") {
	  push @bytes, "0x15";
	  if ($words[1] =~ /^\d+$/)
	  {
	      push @bytes, "0x".sprintf("%02lX", $words[1]);
	  } elsif ($words[1] =~ /^0x[0-9a-f]{2,2}$/i)
	  {
	      push @bytes, $words[1];
	  }
	  else
	  {
	      die "Syntax error on line $line: $saveline\n";
	  }
	  $bytenum++;
        }
        elsif ($w eq "istore") {
	  push @bytes, "0x36";
	  if ($words[1] =~ /^\d+$/)
	  {
	      push @bytes, "0x".sprintf("%02lX", $words[1]);
	  } elsif ($words[1] =~ /^0x[0-9a-f]{2,2}$/i)
	  {
	      push @bytes, $words[1];
	  }
	  else
	  {
	      die "Syntax error on line $line: $saveline\n";
	  }
	  $bytenum++;
        }
        elsif ($w eq "iaload") {
	  push @bytes, "0x2e";
        }
        elsif ($w eq "iastore") {
	  push @bytes, "0x4f";
        }
        elsif ($w eq "bipush") {
	  push @bytes, "0x10";
	  if ($words[1] =~ /^\-?\d+$/)
	  {
	      push @bytes, "0x".sprintf("%02lX", $words[1]>=0 ? $words[1] : (256+$words[1]) );
	  } elsif ($words[1] =~ /^0x[0-9a-f]{2,2}$/i)
	  {
	      push @bytes, $words[1];
	  }
	  else
	  {
	      die "Syntax error on line $line: $saveline\n";
	  }
	  $bytenum++;        
        }
        elsif ($w eq "sipush") {
	  push @bytes, "0x17";
	  if ($words[1] =~ /^\-?\d+$/)
	  {
	      $hex = sprintf("%04lX", $words[1]>=0 ? $words[1] : (65536+$words[1]) );
	      push @bytes, "0x".substr($hex, 0, 2);
	      push @bytes, "0x".substr($hex, 2, 2);
	  } elsif ($words[1] =~ /^0x([0-9a-f]{4,4})$/i)
	  {
	      $hex = $1;
	      push @bytes, "0x".substr($hex, 0, 2);
	      push @bytes, "0x".substr($hex, 2, 2);
	  }
	  else
	  {
	      die "Syntax error on line $line: $saveline\n";
	  }
	  $bytenum += 2;        
        }
        elsif ($w eq "dup") {
	  push @bytes, "0x59";
        }
        elsif ($w eq "dup2") {
	  push @bytes, "0x5c";
        }
        elsif ($w eq "pop") {
	  push @bytes, "0x57";
        }
        elsif (substr($w, 0, 2) eq "if" || $w eq "goto")
        {
	  my %code = ( "ifeq" => "0x99", "ifne" => "0x9a", "iflt" => "0x9b", 
		     "ifle" => "0x9e", "ifgt" => "0x9d", "ifge" => "0x9c",
		     "goto" => "0xa7" );
	  if (defined $code{$w})
	  {
	      push @bytes, $code{$w};
	      if ($words[1] =~ /^0x([0-9a-f]{4,4})$/i)
	      {
		$hex = $1;
		push @bytes, "0x".substr($hex, 0, 2);
		push @bytes, "0x".substr($hex, 2, 2);
	      }
	      elsif (defined $label{$words[1]})
	      {
		$offset = $label{$words[1]} - $bytenum + 1;
		$hex = sprintf("%04lX", $offset>=0 ? $offset : (65536+$offset));
		push @bytes, "0x".substr($hex, 0, 2);
		push @bytes, "0x".substr($hex, 2, 2);
	      }
	      else
	      {
		if ($pass==2) 
		{
		    die "Syntax error or undefined label at line $line: $saveline\n";
		}
	      }
	      $bytenum += 2;
	  }
        }
        elsif ($w eq "ireturn") {
	  push @bytes, "0xac";
        }
        elsif ($w eq "popprint") {
	  push @bytes, "0x01";
        }
        else
        {
	  $bytenum--;  # nevermind, we never found an instruction
	  die "Syntax error on line $line: $saveline\n";
        }
    }
    close(ASM);

} # end passes for loop

push @bytes, "0x00";
grep { $_ = lc; } @bytes;

while (@ten = grep { $_ } splice (@bytes, 0, 10))
{
    print ".byte ", join(", ", @ten), "\n";
}

