###
#### EBI update procedures
####
#
RDF_DIR = "/data/store/rdf"
GRAPH_NS = "http://rdf.ebi.ac.uk/dataset"
#
#HTTP_GET = "curl -O"
#HTTP_GET = "wget"
HTTP_GET = "lftpget"
#

####
#### Triple store
####
#
ISQL = "/data/store/virtuoso7.1tmp/bin/isql 20891 dba dba"
#ISQL = "/data/store/virtuoso7.1ebi/bin/isql 20851 dba dba"
ENDPOINT = "http://ep.dbcls.jp/sparql71ebi"

USAGE = <<"USAGE"
71ebi.sh stop
71ebi.sh clear
71ebi.sh start


rake -f RakefileEBI biomodels:fetch r26
rake -f RakefileEBI biomodels:load r26

rake -f RakefileEBI reactome:fetch r49
rake -f RakefileEBI reactome:load r49

rake -f RakefileEBI biosamples:fetch v20151118
rake -f RakefileEBI biosamples:load v20151118

rake -f RakefileEBI chembl:fetch 20.1
rake -f RakefileEBI chembl:load 20.1

## TODO check the changing expressionatlas-rdf-XX-XX-XXXX.tar.gz
rake -f RakefileEBI atlas:fetch 13.07
rake -f RakefileEBI atlas:load 13.07

rake -f RakefileEBI ensembl:fetch 84
rake -f RakefileEBI ensembl:load 84

USAGE

def set_name
  if ARGV.size > 1
    name = ARGV.last
    task name.to_sym  # do nothing, just to avoid "Don't know how to build task #{name}"
  else
    date = Time.now.strftime("%Y%m%d")
  end
  return name || date
end

def create_subdir(path, name)
  sh "mkdir -p #{path}/#{name}"
  return "#{path}/#{name}"
end

def link_current(path, name)
  sh "cd #{path}; ln -snf #{name} current"
end

def isql_create(graph, name)
  sleep 1
  time = Time.now.strftime("%Y%m%d-%H%M%S")
  path = "isql/#{time}-#{graph}-#{name}.isql"
  return path
end

def isql_write(file, line)
  file.puts "ECHOLN \"#{line}\";"
  file.puts line
end


def load_ttl(path, graph, name)
  isql = isql_create(graph.sub('/',''), name)
  File.open(isql, "w") do |file|
    isql_write(file, "log_enable(3, 1);")
    isql_write(file, "DB.DBA.TTLP_MT(file_to_string_output('#{path}'), '', '#{GRAPH_NS}/#{graph}', 81);")
    isql_write(file, "checkpoint;")
  end
  sh "#{ISQL} #{isql}"
end

def load_rdf(path, graph, name)
  isql = isql_create(graph, name)
  File.open(isql, "w") do |file|
    isql_write(file, "log_enable(3, 1);")
    isql_write(file, "DB.DBA.RDF_LOAD_RDFXML_MT(file_to_string_output('#{path}'), '', '#{GRAPH_NS}/#{graph}');")
    isql_write(file, "checkpoint;")
  end
  sh "#{ISQL} #{isql}"
end

def load_dir(path, pattern, graph, name)
  isql = isql_create(graph.sub('/',''), name)
  File.open(isql, "w") do |file|
    isql_write(file, "log_enable(3, 1);")
    isql_write(file, "ld_dir_all('#{path}', '#{pattern}', '#{GRAPH_NS}/#{graph}');")
    isql_write(file, "rdf_loader_run();")
    isql_write(file, "checkpoint;")
  end
  sh "#{ISQL} #{isql}"
end

def update_graph(graph, name)
  sparql = "sparql 
    PREFIX dct: <http://purl.org/dc/terms/>
    DELETE FROM <#{GRAPH_NS}/graph> {
      <#{GRAPH_NS}/#{graph}> ?p ?o .
    }
    WHERE {
      GRAPH <#{GRAPH_NS}/graph> {
        <#{GRAPH_NS}/#{graph}> ?p ?o .
      }
    }
    INSERT DATA INTO <#{GRAPH_NS}/graph> {
      <#{GRAPH_NS}/graph/#{graph}> dct:isVersionOf <#{GRAPH_NS}/#{graph}/#{name}> .
    }
  ;"
  isql = isql_create(graph, name)
  File.open(isql, "w") do |file|
    isql_write(file, sparql)
  end
  sh "#{ISQL} #{isql}"
end


namespace :biomodels do
  desc "Retrieve BioModels"
  task :fetch do
    name = set_name
    path = create_subdir('biomodels', name)
    sh "cd #{path}; wget ftp://ftp.ebi.ac.uk/pub/databases/RDF/biomodels/#{name}/biomodels-rdf.tar.bz2"
    link_current('biomodels', name)
    sh "cd #{RDF_DIR}/ebi/biomodels/current; tar xjvf biomodels-rdf.tar.bz2" 
  end

  desc "Load BioModels to EBI ep"
  task :load do
    name = set_name
    load_dir("#{RDF_DIR}/ebi/biomodels/current", '*.rdf', 'biomodels', name)
    update_graph('biomodels', name)
  end
end

namespace :reactome do
  desc "Retrieve REACTOME"
  task :fetch do
    name = set_name
    path = create_subdir('reactome', name)
    sh "cd #{path}; wget ftp://ftp.ebi.ac.uk/pub/databases/RDF/reactome/#{name}/reactome-biopax.tar.bz2"
    link_current('reactome', name)
    sh "cd #{RDF_DIR}/ebi/reactome/current; tar xjvf reactome-biopax.tar.bz2" 
  end

  desc "Load REACTOME to EBI ep"
  task :load do
    name = set_name
    load_dir("#{RDF_DIR}/ebi/reactome/current", '*.owl', 'reactome', name)
    load_dir("#{RDF_DIR}/ebi/reactome/current", '*.ttl', 'reactome', name)
    update_graph('reactome', name)
  end
end

namespace :biosamples do
  desc "Retrieve BioSamples"
  task :fetch do
    name = set_name
    path = create_subdir('biosamples', name)
    sh "cd #{path}; wget ftp://ftp.ebi.ac.uk/pub/databases/biosamples/biosd2rdf/biosd_rdf_#{name}.tar.bz2"
    link_current('biosamples', name)
    sh "cd #{RDF_DIR}/ebi/biosamples/current; tar xjvf biosd_rdf_#{name}.tar.bz2" 
  end

  desc "Load BioSamples to EBI ep"
  task :load do
    name = set_name
    load_dir("#{RDF_DIR}/ebi/biosamples/current", '*.owl', 'biosamples', name)
    load_dir("#{RDF_DIR}/ebi/biosamples/current", '*.ttl', 'biosamples', name)
    load_dir("#{RDF_DIR}/ebi/biosamples/current", '*.rdf', 'biosamples', name)
    update_graph('biosamples', name)
  end
end

namespace :chembl do
  desc "Retrieve ChEMBL"
  task :fetch do
    name = set_name
    path = create_subdir('chembl', name)
    sh "cd #{path}; wget -r -nH --cut-dirs=5 ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBL-RDF/#{name}"
    link_current('chembl', name)
  end

  desc "Load ChEMBL to EBI ep"
  task :load do
    name = set_name
    load_dir("#{RDF_DIR}/ebi/chembl/current", '*.ttl.gz', 'chembl', name)
    update_graph('chembl', name)
  end
end

namespace :atlas do
  desc "Retrieve Atlas"
  task :fetch do
    name = set_name
    path = create_subdir('atlas', name)
    sh "cd #{path}; wget ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/rdf/atlas-rdf-#{name}.tar.gz"
    sh "cd #{path}; wget ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/rdf/expressionatlas-rdf-19-11-2014.tar.gz"
    link_current('atlas', name)
    sh "cd #{RDF_DIR}/ebi/atlas/current; tar xvf atlas-rdf-#{name}.tar.gz" 
    sh "cd #{RDF_DIR}/ebi/atlas/current; tar xvf expressionatlas-rdf-19-11-2014.tar.gz" 
  end

  desc "Load Atlas to EBI ep"
  task :load do
    name = set_name
    load_dir("#{RDF_DIR}/ebi/atlas/current", '*.rdf', 'atlas', name)
    load_dir("#{RDF_DIR}/ebi/atlas/current", '*.ttl', 'atlas', name)
    load_dir("#{RDF_DIR}/ebi/atlas/current", '*.owl', 'atlas', name)
    update_graph('atlas', name)
  end
end

namespace :ensembl do
  desc "Retrieve Ensemble"
  task :fetch do
    name = set_name
    path = create_subdir('ensembl', name)
    sh "cd #{path}; wget -r -nH --cut-dirs=5 ftp://ftp.ensembl.org/pub/release-#{name}/rdf"
    link_current('ensembl', name)
  end

  desc "Load Ensembl to EBI ep"
  task :load do
    name = set_name
    load_dir("#{RDF_DIR}/ebi/ensembl/current", '*.ttl.gz', 'ensembl', name)
    update_graph('ensembl', name)
  end
end
