#! /usr/bin/env perl

use strict;
use warnings;
use File::Basename;
use File::Path;
use Cwd 'getcwd';

my $dbxl = $ARGV[0];
use Cwd 'abs_path';
$dbxl = abs_path($dbxl);

my $CWD=dirname(${0});
chdir $CWD;
$CWD=getcwd;
my $PWD=dirname($CWD);
my $ESROOT=$PWD;

my %DBINFO=();
my $miriam = $ESROOT . "/data/table/miriam.tab";
open(FH, $miriam);
while(my $line = <FH>){
  chomp($line);
  my $key = (split(/\t/, $line))[0];
  if( $key eq 'uniprot' && $line !~ /purl/ ){
    next;
  }
  if( exists($DBINFO{$key}) ){
    next;
  }
  $DBINFO{$key} = $line;
}
close(FH);

printf "\@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n";
printf "\@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n";
printf "\@prefix dcterms: <http://purl.org/dc/terms/> .\n";
printf "\@prefix idoo: <http://rdf.identifiers.org/ontology/> .\n";

my $target = "";
my $db = "";
my $id = "";

my @entry=();
my $i = 0;
open(FH, $dbxl);
while(my $line = <FH>){
  chomp($line);
  my $e1 = (split(/\t/, $line))[0];
  my $e2 = (split(/\t/, $line))[1];
  if( $target eq $e1 ){
    $entry[$i] = $e2;
    $i++;
  }else{
    if( ! $target ){
      $entry[0] = $e1;
      $entry[1] = $e2;
      $i = 2;
      $target = $e1;
      next;
    }

    if( $#entry >= 0 ){
      &print_ttl(@entry);
    }

    @entry = ();
    $entry[0] = $e1;
    $entry[1] = $e2;
    $i = 2;
    $target = $e1;
  }
}
close(FH);
sub print_ttl(){
  my @e = @_;

  my ($db1, $id1) = ($e[0] =~ /(.*?):(.*)$/);

  if( ! exists($DBINFO{$db1}) ){
    printf STDERR "%s\n", $db1;
    return;
  }

  my @d = split(/\t/, $DBINFO{$db1}) if( $DBINFO{$db1} );
  my $collection1 = $d[1];
  my $type1 = $d[2];
  my $resource1 = $d[3];
  my $url1 = $d[4];

  if( $db1 eq 'refseq'){
    $url1 = 'https://www.ncbi.nlm.nih.gov/nucleotide/%s';
  }

  $url1 =~ s/\$id/%s/;
  printf "<%s/%s> rdfs:seeAlso <$url1> .\n", $type1, $id1, $id1;
  printf "<%s/%s> idoo:database <%s> .\n", $type1, $id1, $type1;

  for(my $j = 1; $j <= $#e; $j++){
    my ($db2, $id2) = ($e[$j] =~ /(.*?):(.*)$/);
    if( exists($DBINFO{$db2}) ){
      my @d = split(/\t/, $DBINFO{$db2}) if( $DBINFO{$db2} );
      my $collection2 = $d[1];
      my $type2 = $d[2];
      my $resource2 = $d[3];
      my $url2 = $d[4];
      $url2 =~ s/\$id/%s/;
      if( $db2 eq 'refseq'){
        $url2 = 'https://www.ncbi.nlm.nih.gov/nucleotide/%s';
      }
      printf "<%s/%s> rdfs:seeAlso <%s/%s> .\n", $type1, $id1, $type2, $id2;
      printf "<%s/%s> rdfs:seeAlso <$url2> .\n", $type2, $id2, $id2;
      printf "<%s/%s> idoo:database <%s> .\n", $type2, $id2, $type2;
    }
  }
}
