###
#### TogoGenome update procedures
####
#
RDF_DIR = "/data/store/rdf"
GRAPH_NS = "http://rdf.ncbi.nlm.nih.gov/pubchem"

####
#### Triple store
####
#
ISQL = "/data/store/virtuoso7.1ebi/bin/isql 20851 dba dba"
ENDPOINT = "http://ep.dbcls.jp/sparql71ebi"

USAGE = <<"USAGE"
71ebi.sh stop
71ebi.sh clear
71ebi.sh start


rake -f RakefilePubChem pubchem:fetch 2015_11
rake -f RakefilePubChem pubchem:load 2015_11

USAGE

def set_name
  if ARGV.size > 1
    name = ARGV.last
    task name.to_sym  # do nothing, just to avoid "Don't know how to build task #{name}"
  else
    date = Time.now.strftime("%Y%m%d")
  end
  return name || date
end

def create_subdir(path, name)
  sh "mkdir -p #{path}/#{name}"
  return "#{path}/#{name}"
end

def link_current(path, name)
  sh "cd #{path}; ln -snf #{name} current"
end

def isql_create(graph, name)
  sleep 1
  time = Time.now.strftime("%Y%m%d-%H%M%S")
  path = "isql/#{time}-#{graph}-#{name}.isql"
  return path
end

def isql_write(file, line)
  file.puts "ECHOLN \"#{line}\";"
  file.puts line
end

def load_dir(path, pattern, graph, name)
  isql = isql_create(graph.sub('/',''), name)
  File.open(isql, "w") do |file|
    isql_write(file, "log_enable(3, 1);")
    isql_write(file, "ld_dir_all('#{path}', '#{pattern}', '#{GRAPH_NS}/#{graph}');")
    isql_write(file, "rdf_loader_run();")
    isql_write(file, "checkpoint;")
  end
  sh "#{ISQL} #{isql}"
end

def update_graph(graph, name)
  sparql = "sparql 
    PREFIX dct: <http://purl.org/dc/terms/>
    DELETE FROM <#{GRAPH_NS}/graph> {
      <#{GRAPH_NS}/#{graph}> ?p ?o .
    }
    WHERE {
      GRAPH <#{GRAPH_NS}/graph> {
        <#{GRAPH_NS}/#{graph}> ?p ?o .
      }
    }
    INSERT DATA INTO <#{GRAPH_NS}/graph> {
      <#{GRAPH_NS}/graph/#{graph}> dct:isVersionOf <#{GRAPH_NS}/#{graph}/#{name}> .
    }
  ;"
  isql = isql_create(graph, name)
  File.open(isql, "w") do |file|
    isql_write(file, sparql)
  end
  sh "#{ISQL} #{isql}"
end


namespace :pubchem do
  desc "Retrieve PubChem"
  task :fetch do
    name = set_name
    path = create_subdir('pubchem', name)
    sh "cd #{path}; sh /data/store/rdf/ebi/pubchem/download_script.sh"
    link_current('pubchem', name)
  end

  desc "Load PubChem to EBI ep"
  task :load do
    name = set_name
    load_dir("#{RDF_DIR}/ebi/pubchem/current/compound/general", '*.ttl.gz', 'compound', name)
    update_graph('compound', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/substance", '*.ttl.gz', 'substance', name)
    update_graph('substance', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/descriptor/compound", '*.ttl.gz', 'descriptor/compound', name)
    update_graph('descriptorcompound', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/descriptor/substance", '*.ttl.gz', 'descriptor/substance', name)
    update_graph('descriptorsubstance', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/synonym", '*.ttl.gz', 'synonym', name)
    update_graph('synonym', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/inchikey", '*.ttl.gz', 'inchikey', name)
    update_graph('inchikey', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/measuregroup", '*.ttl.gz', 'measuregroup', name)
    update_graph('measuregroup', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/endpoint", '*.ttl.gz', 'endpoint', name)
    update_graph('endpoint', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/bioassay", '*.ttl.gz', 'bioassay', name)
    update_graph('bioassay', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/protein", '*.ttl.gz', 'protein', name)
    update_graph('protein', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/biosystem", '*.ttl.gz', 'biosystem', name)
    update_graph('biosystem', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/conserveddomain", '*.ttl.gz', 'conserveddomain', name)
    update_graph('conserveddomain', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/gene", '*.ttl.gz', 'gene', name)
    update_graph('gene', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/reference", '*.ttl.gz', 'reference', name)
    update_graph('reference', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/source", '*.ttl.gz', 'source', name)
    update_graph('source', name)
    load_dir("#{RDF_DIR}/ebi/pubchem/current/concept", '*.ttl.gz', 'concept', name)
    update_graph('concept', name)
  end

  desc "Retrieve CHEBI ontology"
  task :fetch_chebi do
    name = set_name
    path = create_subdir('chebi', name)
    file = "ftp://ftp.ebi.ac.uk/pub/databases/chebi/ontology/chebi.owl"
    sh "cd #{path}; wget #{file}"
    link_current('chebi', name)
  end

  desc "Load CHEBI ontology to EBI ep"
  task :load_chebi do
    name = set_name
    load_dir("#{RDF_DIR}/ebi/chebi/current", 'chebi.owl', 'ruleset', name)
    update_graph('ruleset', name)
  end

  desc "Set Prefix for PubChem SPARQL"
  task :set_prefix do
    isql = '/data/store/rdf/ebi/pubchem/set_prefix.sql'
    sh "#{ISQL} #{isql}"
  end
end

