#!/usr/bin/env perl
#
# cat prosite.dat | prosite.pl
#

use strict;
use warnings;

my $entry = "";
while (my $l = <STDIN>) {
  if ($l =~ /^AC +(PS\d+)/) {
    $entry = $1;
  }
  elsif ($l =~ /^DR +/) {
    dr(substr($l,5)) if length $l > 5;
  }
  elsif ($l =~ /^3D +/) {
    pdb(substr($l,5)) if length $l > 5;
  }
  elsif ($l =~ /^DO +/) {
    prosdoc(substr($l,5)) if length $l > 5;
  }
  if ($l =~ /^\/\//) {
    $entry = "";
  }
}

sub dr {
  my $s = shift;
  my @r = split /\;/,$s;
  foreach my $e (@r) {
    my @y = split /,/,$e;
    my $m = $y[0];
    $m =~ s/\s+//g;
    if (isuniprot($m)) {
      print "prosite:$entry\tuniprot:$m\n";
    }
  }
}

sub pdb {
  my $s = shift;
  my @r = split /\;/,$s;
  foreach my $e (@r) {
    $e =~ s/\s+//g;
    if (ispdb($e)) {
      print "prosite:$entry\tpdb:$e\n";
    }
  }
}

sub prosdoc {
  my $s = shift;
  my @r = split /\;/,$s;
  foreach my $e (@r) {
    $e =~ s/\s+//g;
    if ($e =~ /^PDOC\d{5}$/) {
      print "prosite:$entry\tprosdoc:$e\n";
    }
  }
}

sub isuniprot {
  my $s = shift;
  my $l = length $s;
  return 0 if $l < 5;
  return 0 if $l > 12;
  my @r = split //,$s;
  return 0 unless $r[0] =~ /[0-9A-Z]/;
  for (my $i = 1; $i < $l; $i++){
    return 0 unless $r[$i] =~ /[0-9A-Z_]/;
  }
  my $c = 0;
  for (my $i = 0; $i < $l; $i++){
    $c++ if $r[$i] =~ /[A-Z]/;
  }
  return 0 if $c == $l;
  return 1;
}

sub isgenbank {
  my $s = shift;
  my $l = length $s;
  return 0 if $l < 6;
  return 0 if $l > 8;
  my @r = split //,$s;
  return 0 unless $r[0] =~ /[A-Z]/;
  return 0 unless $r[1] =~ /[0-9A-Z]/;
  for (my $i = 2; $i < $l; $i++){
    return 0 unless $r[$i] =~ /\d/;
  }
  return 1;
}

sub isembl {
  my $s = shift;
  my $l = length $s;
  return 0 if $l < 6;
  return 0 if $l > 8;
  my @r = split //,$s;
  return 0 unless $r[0] =~ /[A-Z]/;
  return 0 unless $r[1] =~ /[0-9A-Z]/;
  for (my $i = 2; $i < $l; $i++){
    return 0 unless $r[$i] =~ /\d/;
  }
  return 1;
}

sub ispdb {
  my $s = shift;
  my $l = length $s;
  return 0 if $l < 4;
  return 0 if $l > 5;
  my @r = split //,$s;
  return 0 unless $r[0] =~ /\d/;
  for (my $i = 1; $i < $l; $i++ ){
    return 0 unless $r[$i] =~ /[0-9A-Z]/;
  }
  my $c = 0;
  my $d = 0;
  for (my $i = 0; $i < $l; $i++){
    $c++ if $r[$i] =~ /[A-Z]/;
    $d++ if $r[$i] =~ /\d/;
  }
  return 0 if $c == $l;
  return 0 if $d == $l;
  return 1;
}
