#!/usr/bin/env perl
#
# cat prosite.doc | prosdoc.pl
#

use strict;
use warnings;

my $entry = "";
while (my $l = <STDIN>) {
  if ($l =~ /^\{(PDOC\d{5})\}/) {
    $entry = $1;
    next;
  }
  elsif ($l =~ /^\{(PS\d{5})/) {
    print "prosdoc:$entry\tprosite:$1\n" if $entry ne "PDOC00000";
    next;
  }

  my @r = ($l =~ /(PDOC\{5})/g);
  if ($entry ne "PDOC00000") {
    foreach my $e (@r) {
      print "prosdoc:$entry\tprosdoc:$e\n";
    }
  }
  @r = ($l =~ /(PS\{5})/g);
  foreach my $e (@r) {
    print "prosdoc:$entry\tprosite:$e\n";
  }
  @r = ($l =~ /([1-6]\.\d+\.\d+\.\d+)/g);
  foreach my $e (@r) {
    print "prosdoc:$entry\tec:$e\n";
  }
  @r = ($l =~ /PDB:(\w+)/g);
  foreach my $e (@r) {
    if (ispdb($e)) {
      print "prosdoc:$entry\tpdb:$e\n";
    }
  }
  if ($l =~ /PubMed=(\d+)/) {
    print "prosdoc:$entry\tpubmed:$1\n";
  }
  if ($l =~ /^\{END\}/) {
    $entry = "";
  }
}

sub isuniprot {
  my $s = shift;
  my $l = length $s;
  return 0 if $l < 5;
  return 0 if $l > 12;
  my @r = split //,$s;
  return 0 unless $r[0] =~ /[0-9A-Z]/;
  for (my $i = 1; $i < $l; $i++){
    return 0 unless $r[$i] =~ /[0-9A-Z_]/;
  }
  my $c = 0;
  for (my $i = 0; $i < $l; $i++){
    $c++ if $r[$i] =~ /[A-Z]/;
  }
  return 0 if $c == $l;
  return 1;
}

sub isgenbank {
  my $s = shift;
  my $l = length $s;
  return 0 if $l < 6;
  return 0 if $l > 8;
  my @r = split //,$s;
  return 0 unless $r[0] =~ /[A-Z]/;
  return 0 unless $r[1] =~ /[0-9A-Z]/;
  for (my $i = 2; $i < $l; $i++){
    return 0 unless $r[$i] =~ /\d/;
  }
  return 1;
}

sub isembl {
  my $s = shift;
  my $l = length $s;
  return 0 if $l < 6;
  return 0 if $l > 8;
  my @r = split //,$s;
  return 0 unless $r[0] =~ /[A-Z]/;
  return 0 unless $r[1] =~ /[0-9A-Z]/;
  for (my $i = 2; $i < $l; $i++){
    return 0 unless $r[$i] =~ /\d/;
  }
  return 1;
}

sub ispdb {
  my $s = shift;
  my $l = length $s;
  return 0 if $l < 4;
  return 0 if $l > 5;
  my @r = split //,$s;
  return 0 unless $r[0] =~ /\d/;
  for (my $i = 1; $i < $l; $i++ ){
    return 0 unless $r[$i] =~ /[0-9A-Z]/;
  }
  my $c = 0;
  my $d = 0;
  for (my $i = 0; $i < $l; $i++){
    $c++ if $r[$i] =~ /[A-Z]/;
    $d++ if $r[$i] =~ /\d/;
  }
  return 0 if $c == $l;
  return 0 if $d == $l;
  return 1;
}
