#! /usr/bin/env perl

use strict;
use warnings;
use File::Basename;
use File::Path;
use Cwd 'getcwd';
use Encode;
use HTML::Entities;

my $CWD=dirname(${0});
my $DB=basename(${0}, '.pl');
chdir $CWD;
$CWD=getcwd;
my $PWD=dirname($CWD);
my $ESROOT=$PWD;

my $miriam_xml = $ARGV[0];
use Cwd 'abs_path';
$miriam_xml = abs_path($miriam_xml);

open(FH, $miriam_xml);

my $flag=0;
my $id = "";
my $name = "";
my $identifiers = "";
my $rid = "";
my $entry = "";
my @n = ();

printf "name\tmiriam.collection\ttype\tmiriam.resource\tentry\n";
#printf "name\tmiriam.collection\tmiriam.resource\tentry\n";
while(my $line = <FH>){
   if($line =~ /datatype id/){
      ($id) = ( $line =~ /id="(\S+)"/ );
      $flag = 1;
      @n = ();
      next;
   }
   if($line =~ /<\/datatype>/){
       $flag=0;
       next;
   }
   if($line =~ /<namespace>/ && $flag ){
      ($name) = ( $line =~ />(.*?)</ );
      $name =~ s/kegg.//;
      $name =~ s/ec-code/ec/;
      push(@n, $name);
      next;
   }
   if($line =~ /<uri type="URL">/){
      ($identifiers) = ( $line =~ />(.*?)</ );
      $identifiers =~ s/\/$//;
      next;
   }
   if($line =~ /resource id/){
      ($rid) = ( $line =~ /id="(\S+)"/ );
      next;
   }
   if($line =~ /<dataEntry>/){
      ($entry) = ( $line =~ />(.*?)</ );
      $entry = decode_entities($entry);
      for my $name ( @n ){
        printf "%s\t%s\t%s\t%s\t%s\n", lc($name), $id, $identifiers, $rid, $entry;
#        printf "%s\t%s\t%s\t%s\n", lc($name), $id, $rid, $entry;
      }
      next;
   }
}

close(FH);
