/**
 *  \file mmcif.cpp
 *  \brief Functions to read PDBs in mmCIF format
 *
 *  Copyright 2007-2023 IMP Inventors. All rights reserved.
 *
 */

#include <IMP/atom/mmcif.h>
#include <IMP/atom/Chain.h>
#include <IMP/atom/Residue.h>
#include <IMP/atom/Atom.h>
#include <IMP/atom/internal/pdb.h>
#include "ihm_format.h"

#include <boost/version.hpp>
#define BOOST_FILESYSTEM_VERSION 3
#include <boost/filesystem.hpp>
#include <boost/lexical_cast.hpp>

extern "C" {
#include "cmp.c"
#include "ihm_format.c"
}


IMPATOM_BEGIN_NAMESPACE

namespace {

class Category {
protected:
  struct ihm_category *c_;
public:
  Category(struct ihm_reader *reader, const char *name,
           ihm_category_callback callback) :
    c_(ihm_category_new(reader, name, callback, NULL, NULL, this, NULL)) {}
};

class AtomSiteCategory : public Category {
  std::string name_, filename_;
  Model *model_;
  IMP::PointerMember<PDBSelector> selector_;
  bool read_all_models_, honor_model_num_;
  internal::StringCifKeyword atom_name_, residue_name_, chain_, auth_chain_,
                       element_, group_, ins_code_, auth_seq_id_, alt_loc_id_;
  internal::IntCifKeyword id_, seq_id_, model_num_;
  internal::FloatCifKeyword x_, y_, z_, occupancy_, temp_factor_;
  Particle *cp_, *rp_, *root_p_;
  Hierarchies *hiers_;
  std::string curr_chain_;
  int curr_seq_id_;
  std::string curr_auth_seq_id_str_;
  std::string curr_label_asym_id_;
  int curr_auth_seq_id_;
  int curr_model_num_;
  std::string curr_residue_icode_;
  std::string hetatm_;
  std::map<std::pair<Particle *, std::string>, Particle *> chain_map_;
  std::map<int, Particle *> root_map_;
  PDBRecord pdb_record_;

  static void callback(struct ihm_reader *, int, void *data,
                       struct ihm_error **) {
    ((AtomSiteCategory *)data)->handle();
  }

public:
  AtomSiteCategory(struct ihm_reader *reader, std::string name,
                   std::string filename, Model *model, Hierarchies *hiers,
                   PDBSelector *selector, bool read_all_models,
                   bool honor_model_num) :
        Category(reader, "_atom_site", callback),
        name_(name), filename_(filename), model_(model),
        selector_(selector),
        read_all_models_(read_all_models),
        honor_model_num_(honor_model_num),
        atom_name_(c_, "label_atom_id"),
        residue_name_(c_, "label_comp_id"),
        chain_(c_, "label_asym_id"),
        auth_chain_(c_, "auth_asym_id"),
        element_(c_, "type_symbol"),
        group_(c_, "group_pdb"),
        ins_code_(c_, "pdbx_pdb_ins_code"),
        auth_seq_id_(c_, "auth_seq_id"),
        alt_loc_id_(c_, "label_alt_id"),
        id_(c_, "id"),
        seq_id_(c_, "label_seq_id"),
        model_num_(c_, "pdbx_pdb_model_num"),
        x_(c_, "cartn_x"),
        y_(c_, "cartn_y"),
        z_(c_, "cartn_z"),
        occupancy_(c_, "occupancy"),
        temp_factor_(c_, "b_iso_or_equiv"),
        cp_(nullptr), rp_(nullptr), root_p_(nullptr),
        hiers_(hiers) {
    pdb_record_.set_keywords(group_, element_, atom_name_, alt_loc_id_,
                             residue_name_, auth_chain_, chain_, auth_seq_id_);
    curr_chain_ = "";
    curr_seq_id_ = 0;
    curr_auth_seq_id_ = 0;
    curr_auth_seq_id_str_ = "";
    curr_label_asym_id_ = "";
    curr_residue_icode_ = "";
    curr_model_num_ = 0;
    hetatm_ = "HETATM";
  }

  void set_root_particle_name(int model_num) {
    std::ostringstream oss;
    oss << model_num;
    std::string root_name = oss.str();
    root_p_->set_name(name_ + ": " + root_name);
  }

  // Get the particle for the top of this model's hierarchy.
  // Return false if this model should be skipped.
  bool get_root_particle(int model_num) {
    if (root_p_ == nullptr || model_num != curr_model_num_) {
      if (!read_all_models_ && root_p_ != nullptr) {
        return false;
      }
      curr_model_num_ = model_num;
      // Check if new model
      if (root_map_.find(model_num) == root_map_.end()) {
        root_p_ = new Particle(model_);
        set_root_particle_name(model_num);
        hiers_->push_back(Hierarchy::setup_particle(root_p_));
        root_map_[model_num] = root_p_;
      } else {
        root_p_ = root_map_[model_num];
      }
      cp_ = nullptr; // make sure we get a new chain
    }
    return true;
  }

  bool get_chain_particle(const std::string &chain,
                          const std::string &label_asym_id) {
    bool new_chain = false;
    if (cp_ == nullptr || chain != curr_chain_) {
      curr_chain_ = chain;
      std::pair<Particle *, std::string> root_chain(root_p_, chain);
      // Check if new chain (for this root)
      if (chain_map_.find(root_chain) == chain_map_.end()) {
        cp_ = internal::chain_particle(model_, chain, filename_);
        Chain(cp_).set_label_asym_id(label_asym_id);
        Hierarchy(root_p_).add_child(Chain(cp_));
        chain_map_[root_chain] = cp_;
        new_chain = true;
      } else {
        cp_ = chain_map_[root_chain];
      }
      rp_ = nullptr; // make sure we get a new residue
    }
    return new_chain;
  }

  // Replace at most maxlen chars in dest, starting at pos, with repl
  void replace(std::string &dest, size_t pos, size_t maxlen, const char *repl) {
    size_t len = std::min(maxlen, strlen(repl));
    if (len > 0) {
      dest.replace(pos, len, repl, len);
    }
  }

  // Return true iff the current atom passes the PDBSelector check
  bool get_is_selected() {
    return selector_->get_is_selected(pdb_record_);
  }

  void handle() {
    if (!get_is_selected()) {
      return;
    }
    if (!get_root_particle(honor_model_num_ ? model_num_.get() : 1)) {
      return;
    }

    Element e = get_element_table().get_element(element_.get());
    int seq_id = seq_id_.get(1);
    std::string residue_icode = ins_code_.get();

    // Use author-provided chain ID if available
    std::string label_asym_id = chain_.get();
    bool new_chain;
    if (strlen(auth_chain_.get()) > 0) {
      new_chain = get_chain_particle(auth_chain_.get(), label_asym_id);
    } else {
      new_chain = get_chain_particle(label_asym_id, label_asym_id);
    }
    std::string auth_seq_id_str = auth_seq_id_.get();
    // Check if new residue
    if (rp_ == nullptr || seq_id != curr_seq_id_
        || residue_icode != curr_residue_icode_
        || auth_seq_id_str != curr_auth_seq_id_str_
        || label_asym_id != curr_label_asym_id_) {
      curr_seq_id_ = seq_id;
      curr_residue_icode_ = residue_icode;
      curr_auth_seq_id_str_ = auth_seq_id_str;
      curr_label_asym_id_ = label_asym_id;
      // use author-provided seq_id and insertion code if available
      const char *start = auth_seq_id_str.c_str();
      char *endptr;
      int auth_seq_id = strtol(start, &endptr, 10);
      // if auth_seq_id is blank, use seq_id instead
      if (endptr == start) auth_seq_id = seq_id;
      char one_icode = 32; // default insertion code (space)

      // Set the chain's sequence offset based on the first residue numbering
      if (new_chain) {
        Chain(cp_).set_sequence_offset(auth_seq_id - seq_id);
      }

      // if auth_seq_id is not blank and contains something after the number,
      // use the first character of that as the insertion code
      if (endptr != start && *endptr) {
        one_icode = *endptr;
      }
      curr_auth_seq_id_ = auth_seq_id;
      rp_ = internal::residue_particle(model_, auth_seq_id, one_icode,
                                       residue_name_.get());
      Chain(cp_).add_child(Residue(rp_));
    }
    Particle *ap = internal::atom_particle(
                       model_, atom_name_.get(), e,
                       group_.get() == hetatm_, id_.get(),
                       curr_auth_seq_id_, x_.get(), y_.get(),
                       z_.get(), occupancy_.get(),
                       temp_factor_.get());
    Residue(rp_).add_child(Atom(ap));
  }
};


std::string cif_nicename(std::string name) {
  boost::filesystem::path path(name);
  return path.string();
}

ssize_t read_callback(char *buffer, size_t buffer_len,
                      void *data, struct ihm_error **err)
{
  std::istream *in = (std::istream *)data;
  in->read(buffer, buffer_len);
  if (in) {
    return in->gcount();
  } else {
    ihm_error_set(err, IHM_ERROR_IO, "IHM IO error");
    return -1;
  }
}

Hierarchies read_cif(std::istream& in, std::string name, std::string filename,
                     Model* model, PDBSelector *selector,
                     bool read_all_models, bool honor_model_num,
                     bool noradii, bool binary)
{
  IMP::PointerMember<PDBSelector> sp(selector);
  struct ihm_error *err = nullptr;
  struct ihm_file *fh = ihm_file_new(read_callback, &in, nullptr);

  struct ihm_reader *r = ihm_reader_new(fh, binary);
  Hierarchies ret;

  AtomSiteCategory asc(r, name, filename, model, &ret, selector,
                       read_all_models, honor_model_num);

  bool more_data;
  if (!ihm_read_file(r, &more_data, &err)) {
    std::string errmsg(err->msg);
    ihm_error_free(err);
    ihm_reader_free(r);
    IMP_THROW(errmsg, IOException);
  }
  ihm_reader_free(r);

  if (!noradii) {
    internal::add_pdb_radii(ret);
  }

  return ret;
}

} // anonymous namespace

Hierarchies read_multimodel_mmcif(TextInput in, Model *model,
                                  PDBSelector* selector, bool noradii)
{
  IMP::PointerMember<PDBSelector> sp(selector);
  Hierarchies ret = read_cif(in, cif_nicename(in.get_name()), in.get_name(),
                             model, selector, true, true, noradii, false);
  if (ret.empty()) {
    IMP_THROW("No molecule read from file " << in.get_name(), ValueException);
  }
  return ret;
}

Hierarchy read_mmcif(TextInput in, Model *model, PDBSelector* selector,
                     bool select_first_model, bool noradii)
{
  IMP::PointerMember<PDBSelector> sp(selector);
  Hierarchies ret = read_cif(in, cif_nicename(in.get_name()), in.get_name(),
                             model, selector, false, select_first_model,
                             noradii, false);
  if (ret.empty()) {
    IMP_THROW("No molecule read from file " << in.get_name(), ValueException);
  }
  return ret[0];
}

Hierarchies read_multimodel_bcif(TextInput in, Model *model,
                                 PDBSelector* selector, bool noradii)
{
  in.set_binary_open_mode(true);
  IMP::PointerMember<PDBSelector> sp(selector);
  Hierarchies ret = read_cif(in, cif_nicename(in.get_name()), in.get_name(),
                             model, selector, true, true, noradii, true);
  if (ret.empty()) {
    IMP_THROW("No molecule read from file " << in.get_name(), ValueException);
  }
  return ret;
}

Hierarchy read_bcif(TextInput in, Model *model, PDBSelector* selector,
                    bool select_first_model, bool noradii)
{
  in.set_binary_open_mode(true);
  IMP::PointerMember<PDBSelector> sp(selector);
  Hierarchies ret = read_cif(in, cif_nicename(in.get_name()), in.get_name(),
                             model, selector, false, select_first_model,
                             noradii, true);
  if (ret.empty()) {
    IMP_THROW("No molecule read from file " << in.get_name(), ValueException);
  }
  return ret[0];
}

IMPATOM_END_NAMESPACE
