#!/usr/bin/perl
use strict;
use File::Copy;
use Getopt::Std;
use List::Util qw(max);

sub usage {
  print("Usage: $0 [OPTION]... -i <input>\n");
  print("Extract the device kernels from an hcc executable.\n\n");
  print("-h          \t\t\t\tshow this help message\n");
  print("-i <input>  \t\t\t\tinput file\n");
  exit;
}

my $debug = 0;

# use clang offload bundler (instead of "dd")
# to extract device object from the bundle
my $use_clang_offload_bundler = 1;

my %options=();
getopts('hi:', \%options);

if (!%options || defined $options{h}) {
  usage();
}

my $rocm_path = "/opt/rocm";
# set the default to rocm path
my $llvm_objdump = "$rocm_path/hcc/bin/llvm-objdump";
my $clang_offload_bundler = "$rocm_path/hcc/bin/clang-offload-bundler";
if (defined $ENV{'HCC_HOME'}) {
  $llvm_objdump = "$ENV{'HCC_HOME'}/bin/llvm-objdump";
  $clang_offload_bundler = "$ENV{'HCC_HOME'}/bin/clang-offload-bundler";
}

my $input_file;
defined $options{i} || die("input not specified");
$input_file = $options{i};
(-f $input_file) || die("can't find $input_file");

my $kernel_section_size =  hex(`objdump -h $input_file | grep ".kernel" | awk '{print \$3}'`);
my $kernel_section_offset =  hex(`objdump -h $input_file | grep ".kernel" | awk '{print \$6}'`);
my $kernel_section_end = $kernel_section_offset + $kernel_section_size;
$kernel_section_size or die("No .kernel section found\n");
if ($debug) {
  print "kernel section size: $kernel_section_size\n";
  print "kernel section offset: $kernel_section_offset\n";
  print "kernel section end: $kernel_section_end\n";
}

# parse kernel bundle header
open INPUT_FP, $input_file || die $!;
binmode INPUT_FP;

my $current_blob_offset = $kernel_section_offset;
my $num_blobs = 0;
#while ($current_blob_offset < $kernel_section_end) {
while(1) {

  if ($debug) {
    print "Current blob offset: $current_blob_offset\n";
  }

  if ($current_blob_offset >= $kernel_section_end) {
    if ($debug) {
      print "reached end of kernel section\n";
    }
    last;
  }

  seek(INPUT_FP, $current_blob_offset, 0);

  # skip OFFLOAD_BUNDLER_MAGIC_STR
  my $magic_str;
  my $read_bytes = read(INPUT_FP, $magic_str, 24);
  if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) {
    # didn't detect the bundle magic string
    if ($debug) {
      print "Offload bundle magic string not detected\n";
    }
    last;
  }
  # read number of bundles
  my $num_bundles;
  $read_bytes = read(INPUT_FP, $num_bundles, 8);
  $read_bytes == 8 or die("Fail to parse number of bundles\n");
  $num_bundles = unpack("Q", $num_bundles);
  if ($debug) {
    print "Blob $num_blobs, number of bundles: $num_bundles\n";
  }

  # detected GPU targets
  my @asic_target_array;

  my $last_bundle_offset = 0;
  my $last_bundle_size = 0;

  # strings for creating new files
  my $file_blob_number = sprintf("%03d", $num_blobs);
  my $filename_prefix = "${input_file}-${file_blob_number}";

  my $clang_offloadbundler_outputs="-outputs=/dev/null";
  my $clang_offloadbundler_targets="-targets=host-x86_64-unknown-linux";

  for (my $iter = 0; $iter < $num_bundles; $iter++) {
    # read bundle offset
    my $offset;
    $read_bytes = read(INPUT_FP, $offset, 8);
    $read_bytes == 8 or die("Fail to parse bundle offset\n");
    $offset = unpack("Q", $offset);
    $last_bundle_offset = max($last_bundle_offset, $offset);

    # read bundle size
    my $size;
    $read_bytes = read(INPUT_FP, $size, 8);
    $read_bytes == 8 or die("Fail to parse bundle size\n");
    $size = unpack("Q", $size);
    if ($last_bundle_offset == $offset) {
      $last_bundle_size = $size;
    }

    # read triple size
    my $triple_size;
    $read_bytes = read(INPUT_FP, $triple_size, 8);
    $read_bytes == 8 or die("Fail to parse triple size\n");
    $triple_size = unpack("Q", $triple_size);

    # triple
    my $triple;
    $read_bytes = read(INPUT_FP, $triple, $triple_size);
    $read_bytes == $triple_size or die("Fail to parse triple\n");

    if ($debug) {
      print("\t bundle $iter: offset=$offset, size=$size, triple_size=$triple_size, triple=$triple\n");
    }

    # Only process GPU targets, skip host targets
    if ($triple =~ /^hcc-amdgcn-amd-amdhsa--/) {
      my $asic_target = substr($triple, 23); # hcc-amdgcn-amd-amdhsa--

      # augment arguments for clang-offload-bundler
      my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco";
      $clang_offloadbundler_outputs = "${clang_offloadbundler_outputs},${hsaco_file_name}";
      $clang_offloadbundler_targets = "${clang_offloadbundler_targets},${triple}";

      # add into asic_target_array
      $asic_target_array[$#asic_target_array + 1]=$asic_target;

      if (!$use_clang_offload_bundler) {
        my $offset_for_hsaco = $current_blob_offset + $offset;
        my $dd_command ="dd if=${input_file} of=${hsaco_file_name} skip=$offset_for_hsaco count=$size bs=1 status=none";
        if ($debug) {
          print("extract code bundle with dd: $dd_command\n");
        }
        system($dd_command) == 0
          or die("Fail to extract code bundle with dd\n");
      }

    } else {
      #print("Host target: " . $Triple . "\n");
    }
  }

  # extract the code blob
  my $blob_filename = "${filename_prefix}.bundle";
  my $write_bytes = $last_bundle_offset + $last_bundle_size;
  system("dd if=$input_file of=$blob_filename skip=$current_blob_offset count=$write_bytes bs=1 status=none") == 0
    or die("Extracting kernel bundle file failed: $?");

  if ($use_clang_offload_bundler) {
    if (-f $clang_offload_bundler) {
      # use clang-offload-bundler to unbundle HSACO
      my $command = "${clang_offload_bundler} -unbundle -type=o -inputs=${blob_filename} ${clang_offloadbundler_outputs} ${clang_offloadbundler_targets}";
      if ($debug) {
        print("clang offload bundler command: $command\n");
      }
      system($command) == 0 
        or die("Fail to execute clang-offload-bundler");
    } else {
      die("Can't find clang-offload-bundler\n");
    }
  }

  if (-f $llvm_objdump) {
    for (my $iter = 0; $iter <= $#asic_target_array; $iter++) {
      my $asic_target = $asic_target_array[$iter];
      my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco";
      my $isa_file_name = "${filename_prefix}-${asic_target}.isa";

      # use llvm-objdump to dump out GCN ISA
      system("$llvm_objdump -disassemble -mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target);
      
      if ($debug) {
        print("Generated GCN ISA for " . $asic_target . " at: " . $isa_file_name . "\n");
      }
    }
  } else {
    die("Can't find llvm-objdump\n");
  }

  $current_blob_offset = $current_blob_offset + $last_bundle_offset + $last_bundle_size;
  $num_blobs++;
}

$num_blobs or die("No device code found.\n");
exit(0);

