IMP logo
IMP Reference Guide  develop.08ecd6469d,2026/04/02
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 import IMP
6 import IMP.pmi.tools
7 import IMP.pmi.samplers
8 import IMP.pmi.output
9 import IMP.pmi.analysis
10 import IMP.pmi.io
11 import IMP.pmi.alphabets
12 import IMP.rmf
13 import IMP.isd
14 import IMP.pmi.dof
15 import os
16 from pathlib import Path
17 import glob
18 from operator import itemgetter
19 from collections import defaultdict
20 import numpy as np
21 import itertools
22 import warnings
23 import math
24 
25 import pickle
26 
27 
28 class _MockMPIValues:
29  """Replace samplers.MPI_values when in test mode"""
30  def get_percentile(self, name):
31  return 0.
32 
33 
34 class _RMFRestraints:
35  """All restraints that are written out to the RMF file"""
36  def __init__(self, model, user_restraints):
37  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
38  self._user_restraints = user_restraints if user_restraints else []
39 
40  def __len__(self):
41  return (len(self._user_restraints)
42  + self._rmf_rs.get_number_of_restraints())
43 
44  def __bool__(self):
45  return len(self) > 0
46 
47  def __getitem__(self, i):
48  class FakePMIWrapper:
49  def __init__(self, r):
50  self.r = IMP.RestraintSet.get_from(r)
51 
52  def get_restraint(self):
53  return self.r
54 
55  lenuser = len(self._user_restraints)
56  if 0 <= i < lenuser:
57  return self._user_restraints[i]
58  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
59  r = self._rmf_rs.get_restraint(i - lenuser)
60  return FakePMIWrapper(r)
61  else:
62  raise IndexError("Out of range")
63 
64 
66  """A macro to help setup and run replica exchange.
67  Supports Monte Carlo and molecular dynamics.
68  Produces trajectory RMF files, best PDB structures,
69  and output stat files.
70  """
71  def __init__(self, model, root_hier,
72  monte_carlo_sample_objects=None,
73  molecular_dynamics_sample_objects=None,
74  output_objects=[],
75  rmf_output_objects=None,
76  monte_carlo_temperature=1.0,
77  simulated_annealing=False,
78  simulated_annealing_minimum_temperature=1.0,
79  simulated_annealing_maximum_temperature=2.5,
80  simulated_annealing_minimum_temperature_nframes=100,
81  simulated_annealing_maximum_temperature_nframes=100,
82  replica_exchange_minimum_temperature=1.0,
83  replica_exchange_maximum_temperature=2.5,
84  replica_exchange_swap=True,
85  num_sample_rounds=1,
86  number_of_best_scoring_models=500,
87  monte_carlo_steps=10,
88  self_adaptive=False,
89  molecular_dynamics_steps=10,
90  molecular_dynamics_max_time_step=1.0,
91  number_of_frames=1000,
92  save_coordinates_mode="lowest_temperature",
93  nframes_write_coordinates=1,
94  write_initial_rmf=True,
95  initial_rmf_name_suffix="initial",
96  stat_file_name_suffix="stat",
97  best_pdb_name_suffix="model",
98  mmcif=False,
99  do_clean_first=True,
100  do_create_directories=True,
101  global_output_directory="./",
102  rmf_dir="rmfs/",
103  best_pdb_dir="pdbs/",
104  replica_stat_file_suffix="stat_replica",
105  em_object_for_rmf=None,
106  atomistic=False,
107  replica_exchange_object=None,
108  test_mode=False,
109  score_moved=False,
110  use_nestor=False,
111  nestor_restraints=None,
112  nestor_rmf_fname_prefix="nested",
113  use_jax=False):
114  """Constructor.
115  @param model The IMP model
116  @param root_hier Top-level (System)hierarchy
117  @param monte_carlo_sample_objects Objects for MC sampling, which
118  should generally be a simple list of Mover objects, e.g.
119  from DegreesOfFreedom.get_movers().
120  @param molecular_dynamics_sample_objects Objects for MD sampling,
121  which should generally be a simple list of particles.
122  @param output_objects A list of structural objects and restraints
123  that will be included in output (ie, statistics "stat"
124  files). Any object that provides a get_output() method
125  can be used here. If None is passed
126  the macro will not write stat files.
127  @param rmf_output_objects A list of structural objects and
128  restraints that will be included in rmf. Any object
129  that provides a get_output() method can be used here.
130  @param monte_carlo_temperature MC temp (may need to be optimized
131  based on post-sampling analysis)
132  @param simulated_annealing If True, perform simulated annealing
133  @param simulated_annealing_minimum_temperature Should generally be
134  the same as monte_carlo_temperature.
135  @param simulated_annealing_minimum_temperature_nframes Number of
136  frames to compute at minimum temperature.
137  @param simulated_annealing_maximum_temperature_nframes Number of
138  frames to compute at
139  temps > simulated_annealing_maximum_temperature.
140  @param replica_exchange_minimum_temperature Low temp for REX; should
141  generally be the same as monte_carlo_temperature.
142  @param replica_exchange_maximum_temperature High temp for REX
143  @param replica_exchange_swap Boolean, enable disable temperature
144  swap (Default=True)
145  @param num_sample_rounds Number of rounds of MC/MD per cycle
146  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
147  models to keep around for analysis.
148  @param mmcif If True, write best scoring models in mmCIF format;
149  if False (the default), write in legacy PDB format.
150  @param best_pdb_dir The directory under `global_output_directory`
151  where best-scoring PDB/mmCIF files are written.
152  @param best_pdb_name_suffix Part of the file name for best-scoring
153  PDB/mmCIF files.
154  @param monte_carlo_steps Number of MC steps per round
155  @param self_adaptive self adaptive scheme for Monte Carlo movers
156  @param molecular_dynamics_steps Number of MD steps per round
157  @param molecular_dynamics_max_time_step Max time step for MD
158  @param number_of_frames Number of REX frames to run
159  @param save_coordinates_mode string: how to save coordinates.
160  "lowest_temperature" (default) only the lowest temperatures
161  is saved
162  "25th_score" all replicas whose score is below the 25th
163  percentile
164  "50th_score" all replicas whose score is below the 50th
165  percentile
166  "75th_score" all replicas whose score is below the 75th
167  percentile
168  @param nframes_write_coordinates How often to write the coordinates
169  of a frame
170  @param write_initial_rmf Write the initial configuration
171  @param global_output_directory Folder that will be created to house
172  output.
173  @param test_mode Set to True to avoid writing any files, just test
174  one frame.
175  @param score_moved If True, attempt to speed up Monte Carlo
176  sampling by caching scoring function terms on particles
177  that didn't move.
178  @param use_nestor If True, follows the Nested Sampling workflow
179  of the NestOR module and skips writing stat files and
180  replica stat files.
181  @param nestor_restraints A list of restraints for which
182  likelihoods are to be computed for use by NestOR module.
183  @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
184  files generated by NestOR .
185  @param use_jax If set to True, sample the scoring function using
186  JAX instead of IMP's internal C++ implementation (requires
187  that all PMI restraints used have a JAX implementation).
188  """
189  self.model = model
190  self.vars = {}
191 
192  # add check hierarchy is multistate
193  if output_objects == []:
194  # The "[]" in the default parameters is a global object, so make
195  # our own copy here
196  self.output_objects = []
197  else:
198  self.output_objects = output_objects
199  self.rmf_output_objects = rmf_output_objects
200  if (isinstance(root_hier, IMP.atom.Hierarchy)
201  and not root_hier.get_parent()):
202  if self.output_objects is not None:
203  self.output_objects.append(
204  IMP.pmi.io.TotalScoreOutput(self.model))
205  if self.rmf_output_objects is not None:
206  self.rmf_output_objects.append(
207  IMP.pmi.io.TotalScoreOutput(self.model))
208  self.root_hier = root_hier
209  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
210  self.vars["number_of_states"] = len(states)
211  if len(states) > 1:
212  self.root_hiers = states
213  self.is_multi_state = True
214  else:
215  self.root_hier = root_hier
216  self.is_multi_state = False
217  else:
218  raise TypeError("Must provide System hierarchy (root_hier)")
219 
220  self._rmf_restraints = _RMFRestraints(model, None)
221  self.em_object_for_rmf = em_object_for_rmf
222  self.monte_carlo_sample_objects = monte_carlo_sample_objects
223  self.vars["self_adaptive"] = self_adaptive
224  self.molecular_dynamics_sample_objects = \
225  molecular_dynamics_sample_objects
226  self.replica_exchange_object = replica_exchange_object
227  self.molecular_dynamics_max_time_step = \
228  molecular_dynamics_max_time_step
229  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
230  self.vars["replica_exchange_minimum_temperature"] = \
231  replica_exchange_minimum_temperature
232  self.vars["replica_exchange_maximum_temperature"] = \
233  replica_exchange_maximum_temperature
234  self.vars["replica_exchange_swap"] = replica_exchange_swap
235  self.vars["simulated_annealing"] = simulated_annealing
236  self.vars["simulated_annealing_minimum_temperature"] = \
237  simulated_annealing_minimum_temperature
238  self.vars["simulated_annealing_maximum_temperature"] = \
239  simulated_annealing_maximum_temperature
240  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
241  simulated_annealing_minimum_temperature_nframes
242  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
243  simulated_annealing_maximum_temperature_nframes
244 
245  self.vars["num_sample_rounds"] = num_sample_rounds
246  self.vars[
247  "number_of_best_scoring_models"] = number_of_best_scoring_models
248  self.vars["monte_carlo_steps"] = monte_carlo_steps
249  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
250  self.vars["number_of_frames"] = number_of_frames
251  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
252  "50th_score", "75th_score"):
253  raise Exception("save_coordinates_mode has unrecognized value")
254  else:
255  self.vars["save_coordinates_mode"] = save_coordinates_mode
256  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
257  self.vars["write_initial_rmf"] = write_initial_rmf
258  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
259  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
260  self.vars["mmcif"] = mmcif
261  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
262  self.vars["do_clean_first"] = do_clean_first
263  self.vars["do_create_directories"] = do_create_directories
264  self.vars["global_output_directory"] = global_output_directory
265  self.vars["rmf_dir"] = rmf_dir
266  self.vars["best_pdb_dir"] = best_pdb_dir
267  self.vars["atomistic"] = atomistic
268  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
269  self.vars["geometries"] = None
270  self.test_mode = test_mode
271  self.score_moved = score_moved
272  self.use_jax = use_jax
273  self.nest = use_nestor
274  self.nestor_restraints = nestor_restraints
275  self.nestor_rmf_fname = nestor_rmf_fname_prefix
276 
277  def add_geometries(self, geometries):
278  if self.vars["geometries"] is None:
279  self.vars["geometries"] = list(geometries)
280  else:
281  self.vars["geometries"].extend(geometries)
282 
283  def show_info(self):
284  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
285  "rmfs/*.rmf3 for each replica ")
286  print("--- it stores the best scoring pdb models in pdbs/")
287  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
288  "lowest temperature")
289  print("--- variables:")
290  keys = list(self.vars.keys())
291  keys.sort()
292  for v in keys:
293  print("------", v.ljust(30), self.vars[v])
294  print("Use nestor: ", self.nest)
295 
296  def get_replica_exchange_object(self):
297  return self.replica_exchange_object
298 
299  def _add_provenance(self, sampler_md, sampler_mc):
300  """Record details about the sampling in the IMP Hierarchies"""
301  iterations = 0
302  if sampler_md:
303  method = "Molecular Dynamics"
304  iterations += self.vars["molecular_dynamics_steps"]
305  if sampler_mc:
306  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
307  iterations += self.vars["monte_carlo_steps"]
308  # If no sampling is actually done, no provenance to write
309  if iterations == 0 or self.vars["number_of_frames"] == 0:
310  return
311  iterations *= self.vars["num_sample_rounds"]
312 
313  pi = self.model.add_particle("sampling")
315  self.model, pi, method, self.vars["number_of_frames"],
316  iterations)
317  p.set_number_of_replicas(
318  self.replica_exchange_object.get_number_of_replicas())
319  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
320  IMP.core.add_provenance(self.model, self.root_hier, p)
321 
322  def execute_macro(self):
323  temp_index_factor = 100000.0
324  samplers = []
325  sampler_mc = None
326  sampler_md = None
327  if self.monte_carlo_sample_objects is not None:
328  print("Setting up MonteCarlo")
329  sampler_mc = IMP.pmi.samplers.MonteCarlo(
330  self.model, self.monte_carlo_sample_objects,
331  self.vars["monte_carlo_temperature"],
332  score_moved=self.score_moved,
333  use_jax=self.use_jax)
334  if self.vars["simulated_annealing"]:
335  tmin = self.vars["simulated_annealing_minimum_temperature"]
336  tmax = self.vars["simulated_annealing_maximum_temperature"]
337  nfmin = self.vars[
338  "simulated_annealing_minimum_temperature_nframes"]
339  nfmax = self.vars[
340  "simulated_annealing_maximum_temperature_nframes"]
341  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
342  if self.vars["self_adaptive"]:
343  sampler_mc.set_self_adaptive(
344  isselfadaptive=self.vars["self_adaptive"])
345  if self.output_objects is not None:
346  self.output_objects.append(sampler_mc)
347  if self.rmf_output_objects is not None:
348  self.rmf_output_objects.append(sampler_mc)
349  samplers.append(sampler_mc)
350 
351  if self.molecular_dynamics_sample_objects is not None:
352  print("Setting up MolecularDynamics")
354  self.model, self.molecular_dynamics_sample_objects,
355  self.vars["monte_carlo_temperature"],
356  maximum_time_step=self.molecular_dynamics_max_time_step,
357  use_jax=self.use_jax)
358  if self.vars["simulated_annealing"]:
359  tmin = self.vars["simulated_annealing_minimum_temperature"]
360  tmax = self.vars["simulated_annealing_maximum_temperature"]
361  nfmin = self.vars[
362  "simulated_annealing_minimum_temperature_nframes"]
363  nfmax = self.vars[
364  "simulated_annealing_maximum_temperature_nframes"]
365  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
366  if self.output_objects is not None:
367  self.output_objects.append(sampler_md)
368  if self.rmf_output_objects is not None:
369  self.rmf_output_objects.append(sampler_md)
370  samplers.append(sampler_md)
371 # -------------------------------------------------------------------------
372 
373  print("Setting up ReplicaExchange")
375  self.model, self.vars["replica_exchange_minimum_temperature"],
376  self.vars["replica_exchange_maximum_temperature"], samplers,
377  replica_exchange_object=self.replica_exchange_object)
378  self.replica_exchange_object = rex.rem
379 
380  myindex = rex.get_my_index()
381  if self.output_objects is not None:
382  self.output_objects.append(rex)
383  if self.rmf_output_objects is not None:
384  self.rmf_output_objects.append(rex)
385  # must reset the minimum temperature due to the
386  # different binary length of rem.get_my_parameter double and python
387  # float
388  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
389 
390 # -------------------------------------------------------------------------
391 
392  globaldir = self.vars["global_output_directory"] + "/"
393  rmf_dir = globaldir + self.vars["rmf_dir"]
394  pdb_dir = globaldir + self.vars["best_pdb_dir"]
395 
396  if not self.test_mode and not self.nest:
397  if self.vars["do_clean_first"]:
398  pass
399 
400  if self.vars["do_create_directories"]:
401 
402  try:
403  os.makedirs(globaldir)
404  except: # noqa: E722
405  pass
406  try:
407  os.makedirs(rmf_dir)
408  except: # noqa: E722
409  pass
410 
411  if not self.is_multi_state:
412  try:
413  os.makedirs(pdb_dir)
414  except: # noqa: E722
415  pass
416  else:
417  for n in range(self.vars["number_of_states"]):
418  try:
419  os.makedirs(pdb_dir + "/" + str(n))
420  except: # noqa: E722
421  pass
422 
423 # -------------------------------------------------------------------------
424 
426  if self.output_objects is not None:
427  self.output_objects.append(sw)
428  if self.rmf_output_objects is not None:
429  self.rmf_output_objects.append(sw)
430 
431  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
432 
433  if not self.nest:
434  print("Setting up stat file")
435  low_temp_stat_file = globaldir + \
436  self.vars["stat_file_name_suffix"] + "." + \
437  str(myindex) + ".out"
438 
439  # Ensure model is updated before saving init files
440  if not self.test_mode:
441  self.model.update()
442 
443  if not self.test_mode and not self.nest:
444  if self.output_objects is not None:
445  output.init_stat2(low_temp_stat_file,
446  self.output_objects,
447  extralabels=["rmf_file", "rmf_frame_index"])
448  else:
449  print("Stat file writing is disabled")
450 
451  if self.rmf_output_objects is not None and not self.nest:
452  print("Stat info being written in the rmf file")
453 
454  if not self.test_mode and not self.nest:
455  print("Setting up replica stat file")
456  replica_stat_file = globaldir + \
457  self.vars["replica_stat_file_suffix"] + "." + \
458  str(myindex) + ".out"
459  if not self.test_mode:
460  output.init_stat2(replica_stat_file, [rex],
461  extralabels=["score"])
462 
463  print("Setting up best pdb files")
464  if not self.is_multi_state:
465  if self.vars["number_of_best_scoring_models"] > 0:
466  output.init_pdb_best_scoring(
467  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
468  self.root_hier,
469  self.vars["number_of_best_scoring_models"],
470  replica_exchange=True,
471  mmcif=self.vars['mmcif'],
472  best_score_file=globaldir + "best.scores.rex.py")
473  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
474  output.write_psf(
475  pdb_dir + "/" + "model.psf",
476  pdb_dir + "/" +
477  self.vars["best_pdb_name_suffix"] + pdbext)
478  else:
479  if self.vars["number_of_best_scoring_models"] > 0:
480  for n in range(self.vars["number_of_states"]):
481  output.init_pdb_best_scoring(
482  pdb_dir + "/" + str(n) + "/" +
483  self.vars["best_pdb_name_suffix"],
484  self.root_hiers[n],
485  self.vars["number_of_best_scoring_models"],
486  replica_exchange=True,
487  mmcif=self.vars['mmcif'],
488  best_score_file=globaldir + "best.scores.rex.py")
489  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
490  output.write_psf(
491  pdb_dir + "/" + str(n) + "/" + "model.psf",
492  pdb_dir + "/" + str(n) + "/" +
493  self.vars["best_pdb_name_suffix"] + pdbext)
494 # ---------------------------------------------
495 
496  if self.em_object_for_rmf is not None:
497  output_hierarchies = [
498  self.root_hier,
499  self.em_object_for_rmf.get_density_as_hierarchy(
500  )]
501  else:
502  output_hierarchies = [self.root_hier]
503 
504  if not self.test_mode and not self.nest:
505  print("Setting up and writing initial rmf coordinate file")
506  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
507  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
508  output_hierarchies,
509  listofobjects=self.rmf_output_objects)
510  if self._rmf_restraints:
511  output.add_restraints_to_rmf(
512  init_suffix + "." + str(myindex) + ".rmf3",
513  self._rmf_restraints)
514  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
515  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
516 
517  if not self.test_mode:
518  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
519  else:
520  mpivs = _MockMPIValues()
521 
522  self._add_provenance(sampler_md, sampler_mc)
523 
524  if not self.test_mode and not self.nest:
525  print("Setting up production rmf files")
526  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
527  output.init_rmf(rmfname, output_hierarchies,
528  geometries=self.vars["geometries"],
529  listofobjects=self.rmf_output_objects)
530 
531  if self._rmf_restraints:
532  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
533 
534  if not self.test_mode and self.nest:
535  print("Setting up NestOR rmf files")
536  nestor_rmf_fname = str(self.nestor_rmf_fname) + '_' + \
537  str(self.replica_exchange_object.get_my_index()) + '.rmf3'
538 
539  output.init_rmf(nestor_rmf_fname, output_hierarchies,
540  geometries=self.vars["geometries"],
541  listofobjects=self.rmf_output_objects)
542 
543  ntimes_at_low_temp = 0
544 
545  if myindex == 0 and not self.nest:
546  self.show_info()
547  self.replica_exchange_object.set_was_used(True)
548  nframes = self.vars["number_of_frames"]
549  if self.test_mode:
550  nframes = 1
551 
552  sampled_likelihoods = []
553  for i in range(nframes):
554  if self.test_mode:
555  score = 0.
556  else:
557  score = None
558  for nr in range(self.vars["num_sample_rounds"]):
559  if sampler_md is not None:
560  score = sampler_md.optimize(
561  self.vars["molecular_dynamics_steps"])
562  if sampler_mc is not None:
563  score = sampler_mc.optimize(
564  self.vars["monte_carlo_steps"])
565  if score is None:
567  self.model).evaluate(False)
568  elif IMP.get_check_level() >= IMP.USAGE_AND_INTERNAL:
569  # Final score from samplers should match the current
570  # score of the Model
571  check_score = IMP.pmi.tools.get_restraint_set(
572  self.model).evaluate(False)
573  assert abs(score - check_score) < 1e-4
574  mpivs.set_value("score", score)
575  if not self.nest:
576  output.set_output_entry("score", score)
577 
578  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
579 
580  if self.vars["save_coordinates_mode"] == "lowest_temperature":
581  save_frame = (min_temp_index == my_temp_index)
582  elif self.vars["save_coordinates_mode"] == "25th_score":
583  score_perc = mpivs.get_percentile("score")
584  save_frame = (score_perc*100.0 <= 25.0)
585  elif self.vars["save_coordinates_mode"] == "50th_score":
586  score_perc = mpivs.get_percentile("score")
587  save_frame = (score_perc*100.0 <= 50.0)
588  elif self.vars["save_coordinates_mode"] == "75th_score":
589  score_perc = mpivs.get_percentile("score")
590  save_frame = (score_perc*100.0 <= 75.0)
591 
592  # Ensure model is updated before saving output files
593  if save_frame and not self.test_mode:
594  self.model.update()
595 
596  if save_frame:
597  print("--- frame %s score %s " % (str(i), str(score)))
598 
599  if self.nest:
600  if math.isnan(score):
601  sampled_likelihoods.append(math.nan)
602  else:
603  likelihood_for_sample = 1
604  for rstrnt in self.nestor_restraints:
605  likelihood_for_sample *= rstrnt.get_likelihood()
606  sampled_likelihoods.append(likelihood_for_sample)
607  output.write_rmf(nestor_rmf_fname)
608 
609  if not self.test_mode and not self.nest:
610  if i % self.vars["nframes_write_coordinates"] == 0:
611  print('--- writing coordinates')
612  if self.vars["number_of_best_scoring_models"] > 0:
613  output.write_pdb_best_scoring(score)
614  output.write_rmf(rmfname)
615  output.set_output_entry("rmf_file", rmfname)
616  output.set_output_entry("rmf_frame_index",
617  ntimes_at_low_temp)
618  else:
619  output.set_output_entry("rmf_file", rmfname)
620  output.set_output_entry("rmf_frame_index", '-1')
621  if self.output_objects is not None:
622  output.write_stat2(low_temp_stat_file)
623  ntimes_at_low_temp += 1
624 
625  if not self.test_mode and not self.nest:
626  output.write_stat2(replica_stat_file)
627  if self.vars["replica_exchange_swap"]:
628  rex.swap_temp(i, score)
629 
630  if self.nest and len(sampled_likelihoods) > 0:
631  with open("likelihoods_"
632  + str(self.replica_exchange_object.get_my_index()),
633  "wb") as lif:
634  pickle.dump(sampled_likelihoods, lif)
635 
636  output.close_rmf(nestor_rmf_fname)
637 
638  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
639  p.add_replica_exchange(state, self)
640 
641  if not self.test_mode and not self.nest:
642  print("closing production rmf files")
643  output.close_rmf(rmfname)
644 
645 
647  """A macro to build a IMP::pmi::topology::System based on a
648  TopologyReader object.
649 
650  Easily create multi-state systems by calling this macro
651  repeatedly with different TopologyReader objects!
652  A useful function is get_molecules() which returns the PMI Molecules
653  grouped by state as a dictionary with key = (molecule name),
654  value = IMP.pmi.topology.Molecule
655  Quick multi-state system:
656  @code{.python}
657  model = IMP.Model()
658  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
659  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
660  bs = IMP.pmi.macros.BuildSystem(model)
661  bs.add_state(reader1)
662  bs.add_state(reader2)
663  bs.execute_macro() # build everything including degrees of freedom
664  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
665  ### now you have a two state system, you add restraints etc
666  @endcode
667  @note The "domain name" entry of the topology reader is not used.
668  All molecules are set up by the component name, but split into rigid bodies
669  as requested.
670  """
671 
672  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
673  'RNA': IMP.pmi.alphabets.rna}
674 
675  def __init__(self, model, sequence_connectivity_scale=4.0,
676  force_create_gmm_files=False, resolutions=[1, 10],
677  name='System'):
678  """Constructor
679  @param model An IMP Model
680  @param sequence_connectivity_scale For scaling the connectivity
681  restraint
682  @param force_create_gmm_files If True, will sample and create GMMs
683  no matter what. If False, will only sample if the
684  files don't exist. If number of Gaussians is zero, won't
685  do anything.
686  @param resolutions The resolutions to build for structured regions
687  @param name The name of the top-level hierarchy node.
688  """
689  self.model = model
690  self.system = IMP.pmi.topology.System(self.model, name=name)
691  self._readers = [] # the TopologyReaders (one per state)
692  # TempResidues for each domain key=unique name,
693  # value=(atomic_res,non_atomic_res).
694  self._domain_res = []
695  self._domains = [] # key = domain unique name, value = Component
696  self.force_create_gmm_files = force_create_gmm_files
697  self.resolutions = resolutions
698 
699  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
700  chain_ids=None):
701  """Add a state using the topology info in a
702  IMP::pmi::topology::TopologyReader object.
703  When you are done adding states, call execute_macro()
704  @param reader The TopologyReader object
705  @param keep_chain_id If True, keep the chain IDs from the
706  original PDB files, if available
707  @param fasta_name_map dictionary for converting protein names
708  found in the fasta file
709  @param chain_ids A list or string of chain IDs for assigning to
710  newly-created molecules, e.g.
711  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
712  If not specified, chain IDs A through Z are assigned, then
713  AA through AZ, then BA through BZ, and so on, in the same
714  fashion as PDB.
715  """
716  state = self.system.create_state()
717  self._readers.append(reader)
718  # key is unique name, value is (atomic res, nonatomicres)
719  these_domain_res = {}
720  these_domains = {} # key is unique name, value is _Component
721  if chain_ids is None:
722  chain_ids = IMP.pmi.output._ChainIDs()
723  numchain = 0
724 
725  # setup representation
726  # loop over molecules, copies, then domains
727  for molname in reader.get_molecules():
728  copies = reader.get_molecules()[molname].domains
729  for nc, copyname in enumerate(copies):
730  print("BuildSystem.add_state: setting up molecule %s copy "
731  "number %s" % (molname, str(nc)))
732  copy = copies[copyname]
733  # option to not rename chains
734  if keep_chain_id:
735  all_chains = [c for c in copy if c.chain is not None]
736  if all_chains:
737  chain_id = all_chains[0].chain
738  else:
739  chain_id = chain_ids[numchain]
740  warnings.warn(
741  "No PDBs specified for %s, so keep_chain_id has "
742  "no effect; using default chain ID '%s'"
743  % (molname, chain_id), IMP.pmi.ParameterWarning)
744  else:
745  chain_id = chain_ids[numchain]
746  if nc == 0:
747  alphabet = IMP.pmi.alphabets.amino_acid
748  fasta_flag = copy[0].fasta_flag
749  if fasta_flag in self._alphabets:
750  alphabet = self._alphabets[fasta_flag]
752  copy[0].fasta_file, fasta_name_map)
753  seq = seqs[copy[0].fasta_id]
754  print("BuildSystem.add_state: molecule %s sequence has "
755  "%s residues" % (molname, len(seq)))
756  orig_mol = state.create_molecule(
757  molname, seq, chain_id, alphabet=alphabet,
758  uniprot=seqs.uniprot.get(copy[0].fasta_id))
759  mol = orig_mol
760  numchain += 1
761  else:
762  print("BuildSystem.add_state: creating a copy for "
763  "molecule %s" % molname)
764  mol = orig_mol.create_copy(chain_id)
765  numchain += 1
766 
767  for domainnumber, domain in enumerate(copy):
768  print("BuildSystem.add_state: ---- setting up domain %s "
769  "of molecule %s" % (domainnumber, molname))
770  # we build everything in the residue range, even if it
771  # extends beyond what's in the actual PDB file
772  these_domains[domain.get_unique_name()] = domain
773  if domain.residue_range == [] or \
774  domain.residue_range is None:
775  domain_res = mol.get_residues()
776  else:
777  start = domain.residue_range[0]+domain.pdb_offset
778  if domain.residue_range[1] == 'END':
779  end = len(mol.sequence)
780  else:
781  end = domain.residue_range[1]+domain.pdb_offset
782  domain_res = mol.residue_range(start-1, end-1)
783  print("BuildSystem.add_state: -------- domain %s of "
784  "molecule %s extends from residue %s to "
785  "residue %s "
786  % (domainnumber, molname, start, end))
787  if domain.pdb_file == "BEADS":
788  print("BuildSystem.add_state: -------- domain %s of "
789  "molecule %s represented by BEADS "
790  % (domainnumber, molname))
791  mol.add_representation(
792  domain_res,
793  resolutions=[domain.bead_size],
794  setup_particles_as_densities=(
795  domain.em_residues_per_gaussian != 0),
796  color=domain.color)
797  these_domain_res[domain.get_unique_name()] = \
798  (set(), domain_res)
799  elif domain.pdb_file == "IDEAL_HELIX":
800  print("BuildSystem.add_state: -------- domain %s of "
801  "molecule %s represented by IDEAL_HELIX "
802  % (domainnumber, molname))
803  emper = domain.em_residues_per_gaussian
804  mol.add_representation(
805  domain_res,
806  resolutions=self.resolutions,
807  ideal_helix=True,
808  density_residues_per_component=emper,
809  density_prefix=domain.density_prefix,
810  density_force_compute=self.force_create_gmm_files,
811  color=domain.color)
812  these_domain_res[domain.get_unique_name()] = \
813  (domain_res, set())
814  else:
815  print("BuildSystem.add_state: -------- domain %s of "
816  "molecule %s represented by pdb file %s "
817  % (domainnumber, molname, domain.pdb_file))
818  domain_atomic = mol.add_structure(domain.pdb_file,
819  domain.chain,
820  domain.residue_range,
821  domain.pdb_offset,
822  soft_check=True)
823  domain_non_atomic = domain_res - domain_atomic
824  if not domain.em_residues_per_gaussian:
825  mol.add_representation(
826  domain_atomic, resolutions=self.resolutions,
827  color=domain.color)
828  if len(domain_non_atomic) > 0:
829  mol.add_representation(
830  domain_non_atomic,
831  resolutions=[domain.bead_size],
832  color=domain.color)
833  else:
834  print("BuildSystem.add_state: -------- domain %s "
835  "of molecule %s represented by gaussians "
836  % (domainnumber, molname))
837  emper = domain.em_residues_per_gaussian
838  creategmm = self.force_create_gmm_files
839  mol.add_representation(
840  domain_atomic,
841  resolutions=self.resolutions,
842  density_residues_per_component=emper,
843  density_prefix=domain.density_prefix,
844  density_force_compute=creategmm,
845  color=domain.color)
846  if len(domain_non_atomic) > 0:
847  mol.add_representation(
848  domain_non_atomic,
849  resolutions=[domain.bead_size],
850  setup_particles_as_densities=True,
851  color=domain.color)
852  these_domain_res[domain.get_unique_name()] = (
853  domain_atomic, domain_non_atomic)
854  self._domain_res.append(these_domain_res)
855  self._domains.append(these_domains)
856  print('BuildSystem.add_state: State', len(self.system.states), 'added')
857  return state
858 
859  def get_molecules(self):
860  """Return list of all molecules grouped by state.
861  For each state, it's a dictionary of Molecules where key is the
862  molecule name
863  """
864  return [s.get_molecules() for s in self.system.get_states()]
865 
866  def get_molecule(self, molname, copy_index=0, state_index=0):
867  return self.system.get_states()[state_index].get_molecules()[
868  molname][copy_index]
869 
870  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
871  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
872  """Builds representations and sets up degrees of freedom"""
873  print("BuildSystem.execute_macro: building representations")
874  self.root_hier = self.system.build()
875 
876  print("BuildSystem.execute_macro: setting up degrees of freedom")
877  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
878  for nstate, reader in enumerate(self._readers):
879  rbs = reader.get_rigid_bodies()
880  srbs = reader.get_super_rigid_bodies()
881  csrbs = reader.get_chains_of_super_rigid_bodies()
882 
883  # add rigid bodies
884  domains_in_rbs = set()
885  for rblist in rbs:
886  print("BuildSystem.execute_macro: -------- building rigid "
887  "body %s" % (str(rblist)))
888  all_res = IMP.pmi.tools.OrderedSet()
889  bead_res = IMP.pmi.tools.OrderedSet()
890  for dname in rblist:
891  domain = self._domains[nstate][dname]
892  print("BuildSystem.execute_macro: -------- adding %s"
893  % (str(dname)))
894  all_res |= self._domain_res[nstate][dname][0]
895  bead_res |= self._domain_res[nstate][dname][1]
896  domains_in_rbs.add(dname)
897  all_res |= bead_res
898  print("BuildSystem.execute_macro: -------- creating rigid "
899  "body with max_trans %s max_rot %s "
900  "non_rigid_max_trans %s"
901  % (str(max_rb_trans), str(max_rb_rot),
902  str(max_bead_trans)))
903  self.dof.create_rigid_body(all_res,
904  nonrigid_parts=bead_res,
905  max_trans=max_rb_trans,
906  max_rot=max_rb_rot,
907  nonrigid_max_trans=max_bead_trans,
908  name="RigidBody %s" % dname)
909 
910  # if you have any domains not in an RB, set them as flexible beads
911  for dname, domain in self._domains[nstate].items():
912  if dname not in domains_in_rbs:
913  if domain.pdb_file != "BEADS":
914  warnings.warn(
915  "No rigid bodies set for %s. Residues read from "
916  "the PDB file will not be sampled - only regions "
917  "missing from the PDB will be treated flexibly. "
918  "To sample the entire sequence, use BEADS instead "
919  "of a PDB file name" % dname,
921  self.dof.create_flexible_beads(
922  self._domain_res[nstate][dname][1],
923  max_trans=max_bead_trans)
924 
925  # add super rigid bodies
926  for srblist in srbs:
927  print("BuildSystem.execute_macro: -------- building "
928  "super rigid body %s" % (str(srblist)))
929  all_res = IMP.pmi.tools.OrderedSet()
930  for dname in srblist:
931  print("BuildSystem.execute_macro: -------- adding %s"
932  % (str(dname)))
933  all_res |= self._domain_res[nstate][dname][0]
934  all_res |= self._domain_res[nstate][dname][1]
935 
936  print("BuildSystem.execute_macro: -------- creating super "
937  "rigid body with max_trans %s max_rot %s "
938  % (str(max_srb_trans), str(max_srb_rot)))
939  self.dof.create_super_rigid_body(
940  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
941 
942  # add chains of super rigid bodies
943  for csrblist in csrbs:
944  all_res = IMP.pmi.tools.OrderedSet()
945  for dname in csrblist:
946  all_res |= self._domain_res[nstate][dname][0]
947  all_res |= self._domain_res[nstate][dname][1]
948  all_res = list(all_res)
949  all_res.sort(key=lambda r: r.get_index())
950  self.dof.create_main_chain_mover(all_res)
951  return self.root_hier, self.dof
952 
953 
954 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
956  """A macro for running all the basic operations of analysis.
957  Includes clustering, precision analysis, and making ensemble density maps.
958  A number of plots are also supported.
959  """
960  def __init__(self, model,
961  merge_directories=["./"],
962  stat_file_name_suffix="stat",
963  best_pdb_name_suffix="model",
964  do_clean_first=True,
965  do_create_directories=True,
966  global_output_directory="output/",
967  replica_stat_file_suffix="stat_replica",
968  global_analysis_result_directory="./analysis/",
969  test_mode=False):
970  """Constructor.
971  @param model The IMP model
972  @param stat_file_name_suffix
973  @param merge_directories The directories containing output files
974  @param best_pdb_name_suffix
975  @param do_clean_first
976  @param do_create_directories
977  @param global_output_directory Where everything is
978  @param replica_stat_file_suffix
979  @param global_analysis_result_directory
980  @param test_mode If True, nothing is changed on disk
981  """
982 
983  try:
984  from mpi4py import MPI
985  self.comm = MPI.COMM_WORLD
986  self.rank = self.comm.Get_rank()
987  self.number_of_processes = self.comm.size
988  except ImportError:
989  self.rank = 0
990  self.number_of_processes = 1
991 
992  self.test_mode = test_mode
993  self._protocol_output = []
994  self.cluster_obj = None
995  self.model = model
996  stat_dir = global_output_directory
997  self.stat_files = []
998  # it contains the position of the root directories
999  for rd in merge_directories:
1000  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
1001  if len(stat_files) == 0:
1002  warnings.warn("no stat files found in %s"
1003  % os.path.join(rd, stat_dir),
1005  self.stat_files += stat_files
1006 
1007  def add_protocol_output(self, p):
1008  """Capture details of the modeling protocol.
1009  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1010  """
1011  # Assume last state is the one we're interested in
1012  self._protocol_output.append((p, p._last_state))
1013 
1014  def get_modeling_trajectory(self,
1015  score_key="Total_Score",
1016  rmf_file_key="rmf_file",
1017  rmf_file_frame_key="rmf_frame_index",
1018  outputdir="./",
1019  get_every=1,
1020  nframes_trajectory=10000):
1021  """ Get a trajectory of the modeling run, for generating
1022  demonstrative movies
1023 
1024  @param score_key The score for ranking models
1025  @param rmf_file_key Key pointing to RMF filename
1026  @param rmf_file_frame_key Key pointing to RMF frame number
1027  @param outputdir The local output directory used in the run
1028  @param get_every Extract every nth frame
1029  @param nframes_trajectory Total number of frames of the trajectory
1030  """
1031  import math
1032 
1033  trajectory_models = IMP.pmi.io.get_trajectory_models(
1034  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1035  get_every)
1036  score_list = list(map(float, trajectory_models[2]))
1037 
1038  max_score = max(score_list)
1039  min_score = min(score_list)
1040 
1041  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1042  for i in range(nframes_trajectory)]
1043  binned_scores = [None]*nframes_trajectory
1044  binned_model_indexes = [-1]*nframes_trajectory
1045 
1046  for model_index, s in enumerate(score_list):
1047  bins_score_diffs = [abs(s-b) for b in bins]
1048  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1049  if binned_scores[bin_index] is None:
1050  binned_scores[bin_index] = s
1051  binned_model_indexes[bin_index] = model_index
1052  else:
1053  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1054  new_diff = abs(s-bins[bin_index])
1055  if new_diff < old_diff:
1056  binned_scores[bin_index] = s
1057  binned_model_indexes[bin_index] = model_index
1058 
1059  print(binned_scores)
1060  print(binned_model_indexes)
1061 
1062  def _expand_ambiguity(self, prot, d):
1063  """If using PMI2, expand the dictionary to include copies as
1064  ambiguous options
1065 
1066  This also keeps the states separate.
1067  """
1068  newdict = {}
1069  for key in d:
1070  val = d[key]
1071  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1072  newdict[key] = val
1073  continue
1074  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1075  if isinstance(val, tuple):
1076  start = val[0]
1077  stop = val[1]
1078  name = val[2]
1079  else:
1080  start = 1
1081  stop = -1
1082  name = val
1083  for nst in range(len(states)):
1084  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1085  copies = sel.get_selected_particles(with_representation=False)
1086  if len(copies) > 1:
1087  for nc in range(len(copies)):
1088  if len(states) > 1:
1089  newdict['%s.%i..%i' % (name, nst, nc)] = \
1090  (start, stop, name, nc, nst)
1091  else:
1092  newdict['%s..%i' % (name, nc)] = \
1093  (start, stop, name, nc, nst)
1094  else:
1095  newdict[key] = val
1096  return newdict
1097 
1098  def clustering(self,
1099  score_key="Total_Score",
1100  rmf_file_key="rmf_file",
1101  rmf_file_frame_key="rmf_frame_index",
1102  state_number=0,
1103  prefiltervalue=None,
1104  feature_keys=[],
1105  outputdir="./",
1106  alignment_components=None,
1107  number_of_best_scoring_models=10,
1108  rmsd_calculation_components=None,
1109  distance_matrix_file='distances.mat',
1110  load_distance_matrix_file=False,
1111  skip_clustering=False,
1112  number_of_clusters=1,
1113  display_plot=False,
1114  exit_after_display=True,
1115  get_every=1,
1116  first_and_last_frames=None,
1117  density_custom_ranges=None,
1118  write_pdb_with_centered_coordinates=False,
1119  voxel_size=5.0):
1120  """Get the best scoring models, compute a distance matrix,
1121  cluster them, and create density maps.
1122 
1123  Tuple format: "molname" just the molecule,
1124  or (start,stop,molname,copy_num(optional),state_num(optional)
1125  Can pass None for copy or state to ignore that field.
1126  If you don't pass a specific copy number
1127 
1128  @param score_key The score for ranking models.
1129  @param rmf_file_key Key pointing to RMF filename
1130  @param rmf_file_frame_key Key pointing to RMF frame number
1131  @param state_number State number to analyze
1132  @param prefiltervalue Only include frames where the
1133  score key is below this value
1134  @param feature_keys Keywords for which you want to
1135  calculate average, medians, etc.
1136  If you pass "Keyname" it'll include everything that matches
1137  "*Keyname*"
1138  @param outputdir The local output directory used in
1139  the run
1140  @param alignment_components Dictionary with keys=groupname,
1141  values are tuples for aligning the structures
1142  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1143  @param number_of_best_scoring_models Num models to keep per run
1144  @param rmsd_calculation_components For calculating RMSD
1145  (same format as alignment_components)
1146  @param distance_matrix_file Where to store/read the
1147  distance matrix
1148  @param load_distance_matrix_file Try to load the distance
1149  matrix file
1150  @param skip_clustering Just extract the best scoring
1151  models and save the pdbs
1152  @param number_of_clusters Number of k-means clusters
1153  @param display_plot Display the distance matrix
1154  @param exit_after_display Exit after displaying distance
1155  matrix
1156  @param get_every Extract every nth frame
1157  @param first_and_last_frames A tuple with the first and last
1158  frames to be analyzed. Values are percentages!
1159  Default: get all frames
1160  @param density_custom_ranges For density calculation
1161  (same format as alignment_components)
1162  @param write_pdb_with_centered_coordinates
1163  @param voxel_size Used for the density output
1164  """
1165  # Track provenance information to be added to each output model
1166  prov = []
1167  self._outputdir = Path(outputdir).absolute()
1168  self._number_of_clusters = number_of_clusters
1169  for p, state in self._protocol_output:
1170  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1171 
1172  if self.test_mode:
1173  return
1174 
1175  if self.rank == 0:
1176  try:
1177  os.mkdir(outputdir)
1178  except: # noqa: E722
1179  pass
1180 
1181  if not load_distance_matrix_file:
1182  if len(self.stat_files) == 0:
1183  print("ERROR: no stat file found in the given path")
1184  return
1185  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1186  self.stat_files, self.number_of_processes)[self.rank]
1187 
1188  # read ahead to check if you need the PMI2 score key instead
1189  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1190  if k in feature_keys:
1191  warnings.warn(
1192  "no need to pass " + k + " to feature_keys.",
1194  feature_keys.remove(k)
1195 
1196  best_models = IMP.pmi.io.get_best_models(
1197  my_stat_files, score_key, feature_keys, rmf_file_key,
1198  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1199  rmf_file_list = best_models[0]
1200  rmf_file_frame_list = best_models[1]
1201  score_list = best_models[2]
1202  feature_keyword_list_dict = best_models[3]
1203 
1204 # ------------------------------------------------------------------------
1205 # collect all the files and scores
1206 # ------------------------------------------------------------------------
1207 
1208  if self.number_of_processes > 1:
1209  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1210  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1211  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1212  rmf_file_frame_list)
1213  for k in feature_keyword_list_dict:
1214  feature_keyword_list_dict[k] = \
1216  feature_keyword_list_dict[k])
1217 
1218  # sort by score and get the best scoring ones
1219  score_rmf_tuples = list(zip(score_list,
1220  rmf_file_list,
1221  rmf_file_frame_list,
1222  list(range(len(score_list)))))
1223 
1224  if density_custom_ranges:
1225  for k in density_custom_ranges:
1226  if not isinstance(density_custom_ranges[k], list):
1227  raise Exception("Density custom ranges: values must "
1228  "be lists of tuples")
1229 
1230  # keep subset of frames if requested
1231  if first_and_last_frames is not None:
1232  nframes = len(score_rmf_tuples)
1233  first_frame = int(first_and_last_frames[0] * nframes)
1234  last_frame = int(first_and_last_frames[1] * nframes)
1235  if last_frame > len(score_rmf_tuples):
1236  last_frame = -1
1237  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1238 
1239  # sort RMFs by the score_key in ascending order, and store the rank
1240  best_score_rmf_tuples = sorted(
1241  score_rmf_tuples,
1242  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1243  best_score_rmf_tuples = [t+(n,) for n, t in
1244  enumerate(best_score_rmf_tuples)]
1245  # Note in the provenance info that we only kept best-scoring models
1246  prov.append(IMP.pmi.io.FilterProvenance(
1247  "Best scoring", 0, number_of_best_scoring_models))
1248  # sort the feature scores in the same way
1249  best_score_feature_keyword_list_dict = defaultdict(list)
1250  for tpl in best_score_rmf_tuples:
1251  index = tpl[3]
1252  for f in feature_keyword_list_dict:
1253  best_score_feature_keyword_list_dict[f].append(
1254  feature_keyword_list_dict[f][index])
1255  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1256  best_score_rmf_tuples,
1257  self.number_of_processes)[self.rank]
1258 
1259  # expand the dictionaries to include ambiguous copies
1260  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1261  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1262  if rmsd_calculation_components is not None:
1263  tmp = self._expand_ambiguity(
1264  prot_ahead, rmsd_calculation_components)
1265  if tmp != rmsd_calculation_components:
1266  print('Detected ambiguity, expand rmsd components to',
1267  tmp)
1268  rmsd_calculation_components = tmp
1269  if alignment_components is not None:
1270  tmp = self._expand_ambiguity(prot_ahead,
1271  alignment_components)
1272  if tmp != alignment_components:
1273  print('Detected ambiguity, expand alignment '
1274  'components to', tmp)
1275  alignment_components = tmp
1276 
1277 # -------------------------------------------------------------
1278 # read the coordinates
1279 # ------------------------------------------------------------
1280  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1281  self.model, my_best_score_rmf_tuples[0],
1282  rmsd_calculation_components, state_number=state_number)
1284  self.model, my_best_score_rmf_tuples, alignment_components,
1285  rmsd_calculation_components, state_number=state_number)
1286 
1287  # note! the coordinates are simply float tuples, NOT decorators,
1288  # NOT Vector3D, NOR particles, because these object cannot be
1289  # serialized. We need serialization
1290  # for the parallel computation based on mpi.
1291 
1292  # dict:key=component name,val=coords per hit
1293  all_coordinates = got_coords[0]
1294 
1295  # same as above, limited to alignment bits
1296  alignment_coordinates = got_coords[1]
1297 
1298  # same as above, limited to RMSD bits
1299  rmsd_coordinates = got_coords[2]
1300 
1301  # dictionary with key=RMF, value=score rank
1302  rmf_file_name_index_dict = got_coords[3]
1303 
1304  # RMF file per hit
1305  all_rmf_file_names = got_coords[4]
1306 
1307 # ------------------------------------------------------------------------
1308 # optionally don't compute distance matrix or cluster, just write top files
1309 # ------------------------------------------------------------------------
1310  if skip_clustering:
1311  if density_custom_ranges:
1312  DensModule = IMP.pmi.analysis.GetModelDensity(
1313  density_custom_ranges, voxel=voxel_size)
1314 
1315  dircluster = os.path.join(outputdir,
1316  "all_models."+str(self.rank))
1317  try:
1318  os.mkdir(outputdir)
1319  except: # noqa: E722
1320  pass
1321  try:
1322  os.mkdir(dircluster)
1323  except: # noqa: E722
1324  pass
1325  clusstat = open(os.path.join(
1326  dircluster, "stat."+str(self.rank)+".out"), "w")
1327  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1328  rmf_name = tpl[1]
1329  rmf_frame_number = tpl[2]
1330  tmp_dict = {}
1331  index = tpl[4]
1332  for key in best_score_feature_keyword_list_dict:
1333  tmp_dict[key] = \
1334  best_score_feature_keyword_list_dict[key][index]
1335 
1336  if cnt == 0:
1337  prots, rs = \
1338  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1339  self.model, rmf_frame_number, rmf_name)
1340  else:
1341  linking_successful = \
1342  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1343  self.model, prots, rs, rmf_frame_number,
1344  rmf_name)
1345  if not linking_successful:
1346  continue
1347 
1348  if not prots:
1349  continue
1350 
1351  states = IMP.atom.get_by_type(
1352  prots[0], IMP.atom.STATE_TYPE)
1353  prot = states[state_number]
1354 
1355  # get transformation aligning coordinates of
1356  # requested tuples to the first RMF file
1357  if cnt == 0:
1358  coords_f1 = alignment_coordinates[cnt]
1359  if cnt > 0:
1360  coords_f2 = alignment_coordinates[cnt]
1361  if coords_f2:
1363  coords_f1, coords_f2)
1364  transformation = Ali.align()[1]
1365  else:
1366  transformation = \
1368 
1369  rbs = set()
1370  for p in IMP.atom.get_leaves(prot):
1371  if not IMP.core.XYZR.get_is_setup(p):
1373  IMP.core.XYZR(p).set_radius(0.0001)
1374  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1375 
1377  rbm = IMP.core.RigidBodyMember(p)
1378  rb = rbm.get_rigid_body()
1379  rbs.add(rb)
1380  else:
1382  transformation)
1383  for rb in rbs:
1384  IMP.core.transform(rb, transformation)
1385 
1386  o = IMP.pmi.output.Output()
1387  self.model.update()
1388  out_pdb_fn = os.path.join(
1389  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1390  out_rmf_fn = os.path.join(
1391  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1392  o.init_pdb(out_pdb_fn, prot)
1393  tc = write_pdb_with_centered_coordinates
1394  o.write_pdb(out_pdb_fn,
1395  translate_to_geometric_center=tc)
1396 
1397  tmp_dict["local_pdb_file_name"] = \
1398  os.path.basename(out_pdb_fn)
1399  tmp_dict["rmf_file_full_path"] = rmf_name
1400  tmp_dict["local_rmf_file_name"] = \
1401  os.path.basename(out_rmf_fn)
1402  tmp_dict["local_rmf_frame_number"] = 0
1403 
1404  clusstat.write(str(tmp_dict)+"\n")
1405 
1406  # create a single-state System and write that
1408  IMP.Particle(self.model))
1409  h.set_name("System")
1410  h.add_child(prot)
1411  o.init_rmf(out_rmf_fn, [h], rs)
1412 
1413  o.write_rmf(out_rmf_fn)
1414  o.close_rmf(out_rmf_fn)
1415  # add the density
1416  if density_custom_ranges:
1417  DensModule.add_subunits_density(prot)
1418 
1419  if density_custom_ranges:
1420  DensModule.write_mrc(path=dircluster)
1421  del DensModule
1422  return
1423 
1424  # broadcast the coordinates
1425  if self.number_of_processes > 1:
1426  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1427  all_coordinates)
1428  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1429  all_rmf_file_names)
1430  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1431  rmf_file_name_index_dict)
1432  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1433  alignment_coordinates)
1434  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1435  rmsd_coordinates)
1436 
1437  if self.rank == 0:
1438  # save needed information in external files
1439  self.save_objects(
1440  [best_score_feature_keyword_list_dict,
1441  rmf_file_name_index_dict],
1442  ".macro.pkl")
1443 
1444 # ------------------------------------------------------------------------
1445 # Calculate distance matrix and cluster
1446 # ------------------------------------------------------------------------
1447  print("setup clustering class")
1448  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1449 
1450  for n, model_coordinate_dict in enumerate(all_coordinates):
1451  # let's try to align
1452  if (alignment_components is not None
1453  and len(self.cluster_obj.all_coords) == 0):
1454  # set the first model as template coordinates
1455  self.cluster_obj.set_template(alignment_coordinates[n])
1456  self.cluster_obj.fill(all_rmf_file_names[n],
1457  rmsd_coordinates[n])
1458  print("Global calculating the distance matrix")
1459 
1460  # calculate distance matrix, all against all
1461  self.cluster_obj.dist_matrix()
1462 
1463  # perform clustering and optionally display
1464  if self.rank == 0:
1465  self.cluster_obj.do_cluster(number_of_clusters)
1466  if display_plot:
1467  if self.rank == 0:
1468  self.cluster_obj.plot_matrix(
1469  figurename=os.path.join(outputdir,
1470  'dist_matrix.pdf'))
1471  if exit_after_display:
1472  exit()
1473  self.cluster_obj.save_distance_matrix_file(
1474  file_name=distance_matrix_file)
1475 
1476 # ------------------------------------------------------------------------
1477 # Alternatively, load the distance matrix from file and cluster that
1478 # ------------------------------------------------------------------------
1479  else:
1480  if self.rank == 0:
1481  print("setup clustering class")
1482  self.cluster_obj = IMP.pmi.analysis.Clustering()
1483  self.cluster_obj.load_distance_matrix_file(
1484  file_name=distance_matrix_file)
1485  print("clustering with %s clusters" % str(number_of_clusters))
1486  self.cluster_obj.do_cluster(number_of_clusters)
1487  [best_score_feature_keyword_list_dict,
1488  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1489  if display_plot:
1490  if self.rank == 0:
1491  self.cluster_obj.plot_matrix(figurename=os.path.join(
1492  outputdir, 'dist_matrix.pdf'))
1493  if exit_after_display:
1494  exit()
1495  if self.number_of_processes > 1:
1496  self.comm.Barrier()
1497 
1498 # ------------------------------------------------------------------------
1499 # now save all information about the clusters
1500 # ------------------------------------------------------------------------
1501 
1502  if self.rank == 0:
1503  print(self.cluster_obj.get_cluster_labels())
1504  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1505  print("rank %s " % str(self.rank))
1506  print("cluster %s " % str(n))
1507  print("cluster label %s " % str(cl))
1508  print(self.cluster_obj.get_cluster_label_names(cl))
1509  cluster_size = \
1510  len(self.cluster_obj.get_cluster_label_names(cl))
1511  cluster_prov = \
1512  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1513 
1514  # first initialize the Density class if requested
1515  if density_custom_ranges:
1516  DensModule = IMP.pmi.analysis.GetModelDensity(
1517  density_custom_ranges,
1518  voxel=voxel_size)
1519 
1520  dircluster = outputdir + "/cluster." + str(n) + "/"
1521  try:
1522  os.mkdir(dircluster)
1523  except: # noqa: E722
1524  pass
1525 
1526  rmsd_dict = {
1527  "AVERAGE_RMSD":
1528  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1529  clusstat = open(dircluster + "stat.out", "w")
1530  for k, structure_name in enumerate(
1531  self.cluster_obj.get_cluster_label_names(cl)):
1532  # extract the features
1533  tmp_dict = {}
1534  tmp_dict.update(rmsd_dict)
1535  index = rmf_file_name_index_dict[structure_name]
1536  for key in best_score_feature_keyword_list_dict:
1537  tmp_dict[
1538  key] = best_score_feature_keyword_list_dict[
1539  key][
1540  index]
1541 
1542  # get the rmf name and the frame number from the list of
1543  # frame names
1544  rmf_name = structure_name.split("|")[0]
1545  rmf_frame_number = int(structure_name.split("|")[1])
1546  clusstat.write(str(tmp_dict) + "\n")
1547 
1548  # extract frame (open or link to existing)
1549  if k == 0:
1550  prots, rs = \
1551  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1552  self.model, rmf_frame_number, rmf_name)
1553  else:
1554  linking_successful = \
1555  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1556  self.model, prots, rs, rmf_frame_number,
1557  rmf_name)
1558  if not linking_successful:
1559  continue
1560  if not prots:
1561  continue
1562 
1563  states = IMP.atom.get_by_type(
1564  prots[0], IMP.atom.STATE_TYPE)
1565  prot = states[state_number]
1566  if k == 0:
1567  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1568 
1569  # transform clusters onto first
1570  if k > 0:
1571  co = self.cluster_obj
1572  model_index = co.get_model_index_from_name(
1573  structure_name)
1574  transformation = co.get_transformation_to_first_member(
1575  cl, model_index)
1576  rbs = set()
1577  for p in IMP.atom.get_leaves(prot):
1578  if not IMP.core.XYZR.get_is_setup(p):
1580  IMP.core.XYZR(p).set_radius(0.0001)
1581  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1582 
1584  rbm = IMP.core.RigidBodyMember(p)
1585  rb = rbm.get_rigid_body()
1586  rbs.add(rb)
1587  else:
1589  transformation)
1590  for rb in rbs:
1591  IMP.core.transform(rb, transformation)
1592 
1593  # add the density
1594  if density_custom_ranges:
1595  DensModule.add_subunits_density(prot)
1596 
1597  # pdb writing should be optimized!
1598  o = IMP.pmi.output.Output()
1599  self.model.update()
1600  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1601  o.write_pdb(dircluster + str(k) + ".pdb")
1602 
1603  # create a single-state System and write that
1605  IMP.Particle(self.model))
1606  h.set_name("System")
1607  h.add_child(prot)
1608  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1609  o.write_rmf(dircluster + str(k) + ".rmf3")
1610  o.close_rmf(dircluster + str(k) + ".rmf3")
1611 
1612  del o
1613  # IMP.atom.destroy(prot)
1614 
1615  if density_custom_ranges:
1616  DensModule.write_mrc(path=dircluster)
1617  del DensModule
1618 
1619  if self.number_of_processes > 1:
1620  self.comm.Barrier()
1621 
1622  def get_cluster_rmsd(self, cluster_num):
1623  if self.cluster_obj is None:
1624  raise Exception("Run clustering first")
1625  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1626 
1627  def save_objects(self, objects, file_name):
1628  import pickle
1629  outf = open(file_name, 'wb')
1630  pickle.dump(objects, outf)
1631  outf.close()
1632 
1633  def load_objects(self, file_name):
1634  import pickle
1635  inputf = open(file_name, 'rb')
1636  objects = pickle.load(inputf)
1637  inputf.close()
1638  return objects
1639 
1640 
1642 
1643  """
1644  This class contains analysis utilities to investigate ReplicaExchange
1645  results.
1646  """
1647 
1648  ########################
1649  # Construction and Setup
1650  ########################
1651 
1652  def __init__(self, model, stat_files, best_models=None, score_key=None,
1653  alignment=True):
1654  """
1655  Construction of the Class.
1656  @param model IMP.Model()
1657  @param stat_files list of string. Can be ascii stat files,
1658  rmf files names
1659  @param best_models Integer. Number of best scoring models,
1660  if None: all models will be read
1661  @param score_key Use the provided stat key keyword as the score
1662  (by default, the total score is used)
1663  @param alignment boolean (Default=True). Align before computing
1664  the rmsd.
1665  """
1666 
1667  self.model = model
1668  self.best_models = best_models
1670  model, stat_files, self.best_models, score_key, cache=True)
1672  StatHierarchyHandler=self.stath0)
1673 
1674  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1676  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1678  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1679  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1680  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1681  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1682  self.clusters = []
1683  # fill the cluster list with a single cluster containing all models
1684  c = IMP.pmi.output.Cluster(0)
1685  self.clusters.append(c)
1686  for n0 in range(len(self.stath0)):
1687  c.add_member(n0)
1688  self.pairwise_rmsd = {}
1689  self.pairwise_molecular_assignment = {}
1690  self.alignment = alignment
1691  self.symmetric_molecules = {}
1692  self.issymmetricsel = {}
1693  self.update_seldicts()
1694  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1695  IMP.atom.get_leaves(self.stath0))
1696  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1697  IMP.atom.get_leaves(self.stath1))
1698 
1699  def set_rmsd_selection(self, **kwargs):
1700  """
1701  Setup the selection onto which the rmsd is computed
1702  @param kwargs use IMP.atom.Selection keywords
1703  """
1704  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1705  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1706  self.update_seldicts()
1707 
1708  def set_symmetric(self, molecule_name):
1709  """
1710  Store names of symmetric molecules
1711  """
1712  self.symmetric_molecules[molecule_name] = 0
1713  self.update_seldicts()
1714 
1715  def set_alignment_selection(self, **kwargs):
1716  """
1717  Setup the selection onto which the alignment is computed
1718  @param kwargs use IMP.atom.Selection keywords
1719  """
1720  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1721  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1722 
1723  ######################
1724  # Clustering functions
1725  ######################
1726  def clean_clusters(self):
1727  for c in self.clusters:
1728  del c
1729  self.clusters = []
1730 
1731  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1732  """
1733  Cluster the models based on RMSD.
1734  @param rmsd_cutoff Float the distance cutoff in Angstrom
1735  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1736  be used to compute rmsds
1737  """
1738  self.clean_clusters()
1739  not_clustered = set(range(len(self.stath1)))
1740  while len(not_clustered) > 0:
1741  self.aggregate(not_clustered, rmsd_cutoff, metric)
1742  self.update_clusters()
1743 
1744  def refine(self, rmsd_cutoff=10):
1745  """
1746  Refine the clusters by merging the ones whose centers are close
1747  @param rmsd_cutoff cutoff distance in Angstorms
1748  """
1749  clusters_copy = self.clusters
1750  for c0, c1 in itertools.combinations(self.clusters, 2):
1751  if c0.center_index is None:
1752  self.compute_cluster_center(c0)
1753  if c1.center_index is None:
1754  self.compute_cluster_center(c1)
1755  _ = self.stath0[c0.center_index]
1756  _ = self.stath1[c1.center_index]
1757  rmsd, molecular_assignment = self.rmsd()
1758  if rmsd <= rmsd_cutoff:
1759  if c1 in self.clusters:
1760  clusters_copy.remove(c1)
1761  c0 += c1
1762  self.clusters = clusters_copy
1763  self.update_clusters()
1764 
1765  ####################
1766  # Input Output
1767  ####################
1768 
1769  def set_cluster_assignments(self, cluster_ids):
1770  if len(cluster_ids) != len(self.stath0):
1771  raise ValueError('cluster ids has to be same length as '
1772  'number of frames')
1773 
1774  self.clusters = []
1775  for i in sorted(list(set(cluster_ids))):
1776  self.clusters.append(IMP.pmi.output.Cluster(i))
1777  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1778  self.clusters[idx].add_member(i, d)
1779 
1780  def get_cluster_data(self, cluster):
1781  """
1782  Return the model data from a cluster
1783  @param cluster IMP.pmi.output.Cluster object
1784  """
1785  data = []
1786  for m in cluster:
1787  data.append(m)
1788  return data
1789 
1790  def save_data(self, filename='data.pkl'):
1791  """
1792  Save the data for the whole models into a pickle file
1793  @param filename string
1794  """
1795  self.stath0.save_data(filename)
1796 
1797  def set_data(self, data):
1798  """
1799  Set the data from an external IMP.pmi.output.Data
1800  @param data IMP.pmi.output.Data
1801  """
1802  self.stath0.data = data
1803  self.stath1.data = data
1804 
1805  def load_data(self, filename='data.pkl'):
1806  """
1807  Load the data from an external pickled file
1808  @param filename string
1809  """
1810  self.stath0.load_data(filename)
1811  self.stath1.load_data(filename)
1812  self.best_models = len(self.stath0)
1813 
1814  def add_cluster(self, rmf_name_list):
1815  c = IMP.pmi.output.Cluster(len(self.clusters))
1816  print("creating cluster index "+str(len(self.clusters)))
1817  self.clusters.append(c)
1818  current_len = len(self.stath0)
1819 
1820  for rmf in rmf_name_list:
1821  print("adding rmf "+rmf)
1822  self.stath0.add_stat_file(rmf)
1823  self.stath1.add_stat_file(rmf)
1824 
1825  for n0 in range(current_len, len(self.stath0)):
1826  d0 = self.stath0[n0]
1827  c.add_member(n0, d0)
1828  self.update_clusters()
1829 
1830  def save_clusters(self, filename='clusters.pkl'):
1831  """
1832  Save the clusters into a pickle file
1833  @param filename string
1834  """
1835  import pickle
1836  fl = open(filename, 'wb')
1837  pickle.dump(self.clusters, fl)
1838 
1839  def load_clusters(self, filename='clusters.pkl', append=False):
1840  """
1841  Load the clusters from a pickle file
1842  @param filename string
1843  @param append bool (Default=False), if True. append the clusters
1844  to the ones currently present
1845  """
1846  import pickle
1847  fl = open(filename, 'rb')
1848  self.clean_clusters()
1849  if append:
1850  self.clusters += pickle.load(fl)
1851  else:
1852  self.clusters = pickle.load(fl)
1853  self.update_clusters()
1854 
1855  ####################
1856  # Analysis Functions
1857  ####################
1858 
1859  def compute_cluster_center(self, cluster):
1860  """
1861  Compute the cluster center for a given cluster
1862  """
1863  member_distance = defaultdict(float)
1864 
1865  for n0, n1 in itertools.combinations(cluster.members, 2):
1866  _ = self.stath0[n0]
1867  _ = self.stath1[n1]
1868  rmsd, _ = self.rmsd()
1869  member_distance[n0] += rmsd
1870 
1871  if len(member_distance) > 0:
1872  cluster.center_index = min(member_distance,
1873  key=member_distance.get)
1874  else:
1875  cluster.center_index = cluster.members[0]
1876 
1877  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1878  prefix="./"):
1879  """
1880  Save the coordinates of the current cluster a single rmf file
1881  """
1882  print("saving coordinates", cluster)
1883  if self.alignment:
1884  self.set_reference(reference, cluster)
1885  o = IMP.pmi.output.Output()
1886  if rmf_name is None:
1887  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1888 
1889  _ = self.stath1[cluster.members[0]]
1890  self.model.update()
1891  o.init_rmf(rmf_name, [self.stath1])
1892  for n1 in cluster.members:
1893  _ = self.stath1[n1]
1894  self.model.update()
1896  if self.alignment:
1897  self.align()
1898  o.write_rmf(rmf_name)
1900  o.close_rmf(rmf_name)
1901 
1902  def prune_redundant_structures(self, rmsd_cutoff=10):
1903  """
1904  remove structures that are similar
1905  append it to a new cluster
1906  """
1907  print("pruning models")
1908  selected = 0
1909  filtered = [selected]
1910  remaining = range(1, len(self.stath1), 10)
1911 
1912  while len(remaining) > 0:
1913  d0 = self.stath0[selected]
1914  rm = []
1915  for n1 in remaining:
1916  _ = self.stath1[n1]
1917  if self.alignment:
1918  self.align()
1919  d, _ = self.rmsd()
1920  if d <= rmsd_cutoff:
1921  rm.append(n1)
1922  print("pruning model %s, similar to model %s, rmsd %s"
1923  % (str(n1), str(selected), str(d)))
1924  remaining = [x for x in remaining if x not in rm]
1925  if len(remaining) == 0:
1926  break
1927  selected = remaining[0]
1928  filtered.append(selected)
1929  remaining.pop(0)
1930  c = IMP.pmi.output.Cluster(len(self.clusters))
1931  self.clusters.append(c)
1932  for n0 in filtered:
1933  d0 = self.stath0[n0]
1934  c.add_member(n0, d0)
1935  self.update_clusters()
1936 
1937  def precision(self, cluster):
1938  """
1939  Compute the precision of a cluster
1940  """
1941  npairs = 0
1942  rmsd = 0.0
1943  precision = None
1944 
1945  if cluster.center_index is not None:
1946  members1 = [cluster.center_index]
1947  else:
1948  members1 = cluster.members
1949 
1950  for n0 in members1:
1951  _ = self.stath0[n0]
1952  for n1 in cluster.members:
1953  if n0 != n1:
1954  npairs += 1
1955  _ = self.stath1[n1]
1957  tmp_rmsd, _ = self.rmsd()
1958  rmsd += tmp_rmsd
1960 
1961  if npairs > 0:
1962  precision = rmsd/npairs
1963  cluster.precision = precision
1964  return precision
1965 
1966  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1967  """
1968  Compute the bipartite precision (ie the cross-precision)
1969  between two clusters
1970  """
1971  npairs = 0
1972  rmsd = 0.0
1973  for cn0, n0 in enumerate(cluster1.members):
1974  _ = self.stath0[n0]
1975  for cn1, n1 in enumerate(cluster2.members):
1976  _ = self.stath1[n1]
1977  tmp_rmsd, _ = self.rmsd()
1978  if verbose:
1979  print("--- rmsd between structure %s and structure "
1980  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1981  rmsd += tmp_rmsd
1982  npairs += 1
1983  precision = rmsd/npairs
1984  return precision
1985 
1986  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1987  cluster_ref=None, step=1):
1988  """
1989  Compute the Root mean square fluctuations
1990  of a molecule in a cluster
1991  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1992  residue indexes and the value is the rmsf
1993  """
1994  rmsf = IMP.pmi.tools.OrderedDict()
1995 
1996  # assumes that residue indexes are identical for stath0 and stath1
1997  if cluster_ref is not None:
1998  if cluster_ref.center_index is not None:
1999  members0 = [cluster_ref.center_index]
2000  else:
2001  members0 = cluster_ref.members
2002  else:
2003  if cluster.center_index is not None:
2004  members0 = [cluster.center_index]
2005  else:
2006  members0 = cluster.members
2007 
2008  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
2009  copy_index=copy_index, state_index=state_index)
2010  ps0 = s0.get_selected_particles()
2011  # get the residue indexes
2012  residue_indexes = list(IMP.pmi.tools.OrderedSet(
2013  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
2014 
2015  # get the corresponding particles
2016  npairs = 0
2017  for n0 in members0:
2018  d0 = self.stath0[n0]
2019  for n1 in cluster.members[::step]:
2020  if n0 != n1:
2021  print("--- rmsf %s %s" % (str(n0), str(n1)))
2023 
2024  s1 = IMP.atom.Selection(
2025  self.stath1, molecule=molecule,
2026  residue_indexes=residue_indexes, resolution=1,
2027  copy_index=copy_index, state_index=state_index)
2028  ps1 = s1.get_selected_particles()
2029 
2030  d1 = self.stath1[n1]
2031  if self.alignment:
2032  self.align()
2033  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
2034  r = residue_indexes[n]
2035  d0 = IMP.core.XYZ(p0)
2036  d1 = IMP.core.XYZ(p1)
2037  if r in rmsf:
2038  rmsf[r] += IMP.core.get_distance(d0, d1)
2039  else:
2040  rmsf[r] = IMP.core.get_distance(d0, d1)
2041  npairs += 1
2043  for r in rmsf:
2044  rmsf[r] /= npairs
2045 
2046  for stath in [self.stath0, self.stath1]:
2047  if molecule not in self.symmetric_molecules:
2048  s = IMP.atom.Selection(
2049  stath, molecule=molecule, residue_index=r,
2050  resolution=1, copy_index=copy_index,
2051  state_index=state_index)
2052  else:
2053  s = IMP.atom.Selection(
2054  stath, molecule=molecule, residue_index=r,
2055  resolution=1, state_index=state_index)
2056 
2057  ps = s.get_selected_particles()
2058  for p in ps:
2060  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2061  else:
2063 
2064  return rmsf
2065 
2066  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2067  reference="Absolute", prefix="./", step=1):
2068  if self.alignment:
2069  self.set_reference(reference, cluster)
2070  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2071  voxel=voxel_size)
2072 
2073  for n1 in cluster.members[::step]:
2074  print("density "+str(n1))
2075  _ = self.stath1[n1]
2077  if self.alignment:
2078  self.align()
2079  dens.add_subunits_density(self.stath1)
2081  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2082  del dens
2083 
2084  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2085  consolidate=False, molecules=None, prefix='./',
2086  reference="Absolute"):
2087  if self.alignment:
2088  self.set_reference(reference, cluster)
2089  import numpy as np
2090  import matplotlib.pyplot as plt
2091  import matplotlib.cm as cm
2092  from scipy.spatial.distance import cdist
2093  import IMP.pmi.topology
2094  if molecules is None:
2096  for mol in IMP.pmi.tools.get_molecules(
2097  IMP.atom.get_leaves(self.stath1))]
2098  else:
2100  for mol in IMP.pmi.tools.get_molecules(
2102  self.stath1,
2103  molecules=molecules).get_selected_particles())]
2104  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2105  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2106  total_len_unique = sum(max(mol.get_residue_indexes())
2107  for mol in unique_copies)
2108 
2109  index_dict = {}
2110  prev_stop = 0
2111 
2112  if not consolidate:
2113  for mol in mols:
2114  seqlen = max(mol.get_residue_indexes())
2115  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2116  prev_stop += seqlen
2117 
2118  else:
2119  for mol in unique_copies:
2120  seqlen = max(mol.get_residue_indexes())
2121  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2122  prev_stop += seqlen
2123 
2124  for ncl, n1 in enumerate(cluster.members):
2125  print(ncl)
2126  _ = self.stath1[n1]
2127  coord_dict = IMP.pmi.tools.OrderedDict()
2128  for mol in mols:
2129  rindexes = mol.get_residue_indexes()
2130  coords = np.ones((max(rindexes), 3))
2131  for rnum in rindexes:
2132  sel = IMP.atom.Selection(mol, residue_index=rnum,
2133  resolution=1)
2134  selpart = sel.get_selected_particles()
2135  if len(selpart) == 0:
2136  continue
2137  selpart = selpart[0]
2138  coords[rnum - 1, :] = \
2139  IMP.core.XYZ(selpart).get_coordinates()
2140  coord_dict[mol] = coords
2141 
2142  if not consolidate:
2143  coords = np.concatenate(list(coord_dict.values()))
2144  dists = cdist(coords, coords)
2145  binary_dists = np.where((dists <= contact_threshold)
2146  & (dists >= 1.0), 1.0, 0.0)
2147  else:
2148  binary_dists_dict = {}
2149  for mol1 in mols:
2150  len1 = max(mol1.get_residue_indexes())
2151  for mol2 in mols:
2152  name1 = mol1.get_name()
2153  name2 = mol2.get_name()
2154  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2155  if (name1, name2) not in binary_dists_dict:
2156  binary_dists_dict[(name1, name2)] = \
2157  np.zeros((len1, len1))
2158  binary_dists_dict[(name1, name2)] += \
2159  np.where((dists <= contact_threshold)
2160  & (dists >= 1.0), 1.0, 0.0)
2161  binary_dists = np.zeros((total_len_unique, total_len_unique))
2162 
2163  for name1, name2 in binary_dists_dict:
2164  r1 = index_dict[mol_names_unique[name1]]
2165  r2 = index_dict[mol_names_unique[name2]]
2166  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2167  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2168  1.0, 0.0)
2169 
2170  if ncl == 0:
2171  dist_maps = [dists]
2172  av_dist_map = dists
2173  contact_freqs = binary_dists
2174  else:
2175  dist_maps.append(dists)
2176  av_dist_map += dists
2177  contact_freqs += binary_dists
2178 
2179  if log_scale:
2180  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2181  else:
2182  contact_freqs = 1.0/len(cluster)*contact_freqs
2183  av_dist_map = 1.0/len(cluster)*contact_freqs
2184 
2185  fig = plt.figure(figsize=(100, 100))
2186  ax = fig.add_subplot(111)
2187  ax.set_xticks([])
2188  ax.set_yticks([])
2189  gap_between_components = 50
2190  colormap = cm.Blues
2191  colornorm = None
2192 
2193  if not consolidate:
2194  sorted_tuple = sorted(
2196  mol).get_extended_name(), mol) for mol in mols)
2197  prot_list = list(zip(*sorted_tuple))[1]
2198  else:
2199  sorted_tuple = sorted(
2200  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2201  for mol in unique_copies)
2202  prot_list = list(zip(*sorted_tuple))[1]
2203 
2204  prot_listx = prot_list
2205  nresx = gap_between_components + \
2206  sum([max(mol.get_residue_indexes())
2207  + gap_between_components for mol in prot_listx])
2208 
2209  # set the list of proteins on the y axis
2210  prot_listy = prot_list
2211  nresy = gap_between_components + \
2212  sum([max(mol.get_residue_indexes())
2213  + gap_between_components for mol in prot_listy])
2214 
2215  # this is the residue offset for each protein
2216  resoffsetx = {}
2217  resendx = {}
2218  res = gap_between_components
2219  for mol in prot_listx:
2220  resoffsetx[mol] = res
2221  res += max(mol.get_residue_indexes())
2222  resendx[mol] = res
2223  res += gap_between_components
2224 
2225  resoffsety = {}
2226  resendy = {}
2227  res = gap_between_components
2228  for mol in prot_listy:
2229  resoffsety[mol] = res
2230  res += max(mol.get_residue_indexes())
2231  resendy[mol] = res
2232  res += gap_between_components
2233 
2234  resoffsetdiagonal = {}
2235  res = gap_between_components
2236  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2237  resoffsetdiagonal[mol] = res
2238  res += max(mol.get_residue_indexes())
2239  res += gap_between_components
2240 
2241  # plot protein boundaries
2242  xticks = []
2243  xlabels = []
2244  for n, prot in enumerate(prot_listx):
2245  res = resoffsetx[prot]
2246  end = resendx[prot]
2247  for proty in prot_listy:
2248  resy = resoffsety[proty]
2249  endy = resendy[proty]
2250  ax.plot([res, res], [resy, endy], linestyle='-',
2251  color='gray', lw=0.4)
2252  ax.plot([end, end], [resy, endy], linestyle='-',
2253  color='gray', lw=0.4)
2254  xticks.append((float(res) + float(end)) / 2)
2256  prot).get_extended_name())
2257 
2258  yticks = []
2259  ylabels = []
2260  for n, prot in enumerate(prot_listy):
2261  res = resoffsety[prot]
2262  end = resendy[prot]
2263  for protx in prot_listx:
2264  resx = resoffsetx[protx]
2265  endx = resendx[protx]
2266  ax.plot([resx, endx], [res, res], linestyle='-',
2267  color='gray', lw=0.4)
2268  ax.plot([resx, endx], [end, end], linestyle='-',
2269  color='gray', lw=0.4)
2270  yticks.append((float(res) + float(end)) / 2)
2272  prot).get_extended_name())
2273 
2274  # plot the contact map
2275 
2276  tmp_array = np.zeros((nresx, nresy))
2277  ret = {}
2278  for px in prot_listx:
2279  for py in prot_listy:
2280  resx = resoffsetx[px]
2281  lengx = resendx[px] - 1
2282  resy = resoffsety[py]
2283  lengy = resendy[py] - 1
2284  indexes_x = index_dict[px]
2285  minx = min(indexes_x)
2286  maxx = max(indexes_x)
2287  indexes_y = index_dict[py]
2288  miny = min(indexes_y)
2289  maxy = max(indexes_y)
2290  tmp_array[resx:lengx, resy:lengy] = \
2291  contact_freqs[minx:maxx, miny:maxy]
2292  ret[(px, py)] = np.argwhere(
2293  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2294 
2295  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2296  origin='lower', alpha=0.6, interpolation='nearest')
2297 
2298  ax.set_xticks(xticks)
2299  ax.set_xticklabels(xlabels, rotation=90)
2300  ax.set_yticks(yticks)
2301  ax.set_yticklabels(ylabels)
2302  plt.setp(ax.get_xticklabels(), fontsize=6)
2303  plt.setp(ax.get_yticklabels(), fontsize=6)
2304 
2305  # display and write to file
2306  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2307  [i.set_linewidth(2.0) for i in ax.spines.values()]
2308 
2309  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2310  dpi=300, transparent="False")
2311  return ret
2312 
2313  def plot_rmsd_matrix(self, filename):
2314  self.compute_all_pairwise_rmsd()
2315  distance_matrix = np.zeros(
2316  (len(self.stath0), len(self.stath1)))
2317  for (n0, n1) in self.pairwise_rmsd:
2318  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2319 
2320  import matplotlib as mpl
2321  mpl.use('Agg')
2322  import matplotlib.pylab as pl
2323  from scipy.cluster import hierarchy as hrc
2324 
2325  fig = pl.figure(figsize=(10, 8))
2326  ax = fig.add_subplot(212)
2327  dendrogram = hrc.dendrogram(
2328  hrc.linkage(distance_matrix),
2329  color_threshold=7,
2330  no_labels=True)
2331  leaves_order = dendrogram['leaves']
2332  ax.set_xlabel('Model')
2333  ax.set_ylabel('RMSD [Angstroms]')
2334 
2335  ax2 = fig.add_subplot(221)
2336  cax = ax2.imshow(
2337  distance_matrix[leaves_order, :][:, leaves_order],
2338  interpolation='nearest')
2339  cb = fig.colorbar(cax)
2340  cb.set_label('RMSD [Angstroms]')
2341  ax2.set_xlabel('Model')
2342  ax2.set_ylabel('Model')
2343 
2344  pl.savefig(filename, dpi=300)
2345  pl.close(fig)
2346 
2347  ####################
2348  # Internal Functions
2349  ####################
2350 
2351  def update_clusters(self):
2352  """
2353  Update the cluster id numbers
2354  """
2355  for n, c in enumerate(self.clusters):
2356  c.cluster_id = n
2357 
2358  def get_molecule(self, hier, name, copy):
2359  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2360  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2361 
2362  def update_seldicts(self):
2363  """
2364  Update the seldicts
2365  """
2366  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2367  self.sel0_rmsd.get_selected_particles())
2368  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2369  self.sel1_rmsd.get_selected_particles())
2370  for mol in self.seldict0:
2371  for sel in self.seldict0[mol]:
2372  self.issymmetricsel[sel] = False
2373  for mol in self.symmetric_molecules:
2374  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2375  for sel in self.seldict0[mol]:
2376  self.issymmetricsel[sel] = True
2377 
2378  def align(self):
2380  self.sel1_alignment, self.sel0_alignment)
2381 
2382  for rb in self.rbs1:
2383  IMP.core.transform(rb, tr)
2384 
2385  for bead in self.beads1:
2386  try:
2387  IMP.core.transform(IMP.core.XYZ(bead), tr)
2388  except: # noqa: E722
2389  continue
2390 
2391  self.model.update()
2392 
2393  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2394  '''
2395  initial filling of the clusters.
2396  '''
2397  n0 = idxs.pop()
2398  print("clustering model "+str(n0))
2399  d0 = self.stath0[n0]
2400  c = IMP.pmi.output.Cluster(len(self.clusters))
2401  print("creating cluster index "+str(len(self.clusters)))
2402  self.clusters.append(c)
2403  c.add_member(n0, d0)
2404  clustered = set([n0])
2405  for n1 in idxs:
2406  print("--- trying to add model " + str(n1) + " to cluster "
2407  + str(len(self.clusters)))
2408  d1 = self.stath1[n1]
2409  if self.alignment:
2410  self.align()
2411  rmsd, _ = self.rmsd(metric=metric)
2412  if rmsd < rmsd_cutoff:
2413  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2414  c.add_member(n1, d1)
2415  clustered.add(n1)
2416  else:
2417  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2418  idxs -= clustered
2419 
2420  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2421  """
2422  merge the clusters that have close members
2423 
2424  @param rmsd_cutoff cutoff distance in Angstorms
2425  @param metric Function to calculate distance between two Selections
2426  (by default, IMP.atom.get_rmsd is used)
2427  """
2428  # before merging, clusters are spheres of radius rmsd_cutoff
2429  # centered on the 1st element
2430  # here we only try to merge clusters whose centers are closer
2431  # than 2*rmsd_cutoff
2432  to_merge = []
2433  print("merging...")
2434  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2435  itertools.combinations(self.clusters, 2)):
2436  n0, n1 = [c.members[0] for c in (c0, c1)]
2437  _ = self.stath0[n0]
2438  _ = self.stath1[n1]
2439  rmsd, _ = self.rmsd()
2440  if (rmsd < 2*rmsd_cutoff and
2441  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2442  to_merge.append((c0, c1))
2443 
2444  for c0, c in reversed(to_merge):
2445  self.merge(c0, c)
2446 
2447  # keep only full clusters
2448  self.clusters = [c for c in
2449  filter(lambda x: len(x.members) > 0, self.clusters)]
2450 
2451  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2452  '''
2453  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2454  '''
2455  print("check close members for clusters " + str(c0.cluster_id) +
2456  " and " + str(c1.cluster_id))
2457  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2458  _ = self.stath0[n0]
2459  _ = self.stath1[n1]
2460  rmsd, _ = self.rmsd(metric=metric)
2461  if rmsd < rmsd_cutoff:
2462  return True
2463 
2464  return False
2465 
2466  def merge(self, c0, c1):
2467  '''
2468  merge two clusters
2469  '''
2470  c0 += c1
2471  c1.members = []
2472  c1.data = {}
2473 
2474  def rmsd_helper(self, sels0, sels1, metric):
2475  '''
2476  a function that returns the permutation best_sel of sels0 that
2477  minimizes metric
2478  '''
2479  best_rmsd2 = float('inf')
2480  best_sel = None
2481  if self.issymmetricsel[sels0[0]]:
2482  # this cases happens when symmetries were defined
2483  N = len(sels0)
2484  for offset in range(N):
2485  sels = [sels0[(offset+i) % N] for i in range(N)]
2486  sel0 = sels[0]
2487  sel1 = sels1[0]
2488  r = metric(sel0, sel1)
2489  rmsd2 = r*r*N
2490  if rmsd2 < best_rmsd2:
2491  best_rmsd2 = rmsd2
2492  best_sel = sels
2493  else:
2494  for sels in itertools.permutations(sels0):
2495  rmsd2 = 0.0
2496  for sel0, sel1 in itertools.takewhile(
2497  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2498  r = metric(sel0, sel1)
2499  rmsd2 += r*r
2500  if rmsd2 < best_rmsd2:
2501  best_rmsd2 = rmsd2
2502  best_sel = sels
2503  return best_sel, best_rmsd2
2504 
2505  def compute_all_pairwise_rmsd(self):
2506  for d0 in self.stath0:
2507  for d1 in self.stath1:
2508  rmsd, _ = self.rmsd()
2509 
2510  def rmsd(self, metric=IMP.atom.get_rmsd):
2511  '''
2512  Computes the RMSD. Resolves ambiguous pairs assignments
2513  '''
2514  # here we memoize the rmsd and molecular assignment so that it's
2515  # not done multiple times
2516  n0 = self.stath0.current_index
2517  n1 = self.stath1.current_index
2518  if ((n0, n1) in self.pairwise_rmsd) \
2519  and ((n0, n1) in self.pairwise_molecular_assignment):
2520  return (self.pairwise_rmsd[(n0, n1)],
2521  self.pairwise_molecular_assignment[(n0, n1)])
2522 
2523  if self.alignment:
2524  self.align()
2525  # if it's not yet memoized
2526  total_rmsd = 0.0
2527  total_N = 0
2528  # this is a dictionary which keys are the molecule names, and values
2529  # are the list of IMP.atom.Selection for all molecules that share
2530  # the molecule name
2531  molecular_assignment = {}
2532  for molname, sels0 in self.seldict0.items():
2533  sels_best_order, best_rmsd2 = \
2534  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2535 
2536  Ncoords = len(sels_best_order[0].get_selected_particles())
2537  Ncopies = len(self.seldict1[molname])
2538  total_rmsd += Ncoords*best_rmsd2
2539  total_N += Ncoords*Ncopies
2540 
2541  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2542  p0 = sel0.get_selected_particles()[0]
2543  p1 = sel1.get_selected_particles()[0]
2544  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2545  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2546  c0 = IMP.atom.Copy(m0).get_copy_index()
2547  c1 = IMP.atom.Copy(m1).get_copy_index()
2548  molecular_assignment[(molname, c0)] = (molname, c1)
2549 
2550  total_rmsd = math.sqrt(total_rmsd/total_N)
2551 
2552  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2553  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2554  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2555  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2556  return total_rmsd, molecular_assignment
2557 
2558  def set_reference(self, reference, cluster):
2559  """
2560  Fix the reference structure for structural alignment, rmsd and
2561  chain assignment
2562 
2563  @param reference can be either "Absolute" (cluster center of the
2564  first cluster) or Relative (cluster center of the current
2565  cluster)
2566  #param cluster the reference IMP.pmi.output.Cluster object
2567  """
2568  if reference == "Absolute":
2569  _ = self.stath0[0]
2570  elif reference == "Relative":
2571  if cluster.center_index:
2572  n0 = cluster.center_index
2573  else:
2574  n0 = cluster.members[0]
2575  _ = self.stath0[n0]
2576 
2578  """
2579  compute the molecular assignments between multiple copies
2580  of the same sequence. It changes the Copy index of Molecules
2581  """
2582  _ = self.stath1[n1]
2583  _, molecular_assignment = self.rmsd()
2584  for (m0, c0), (m1, c1) in molecular_assignment.items():
2585  mol0 = self.molcopydict0[m0][c0]
2586  mol1 = self.molcopydict1[m1][c1]
2587  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2588  p1 = IMP.atom.Copy(mol1).get_particle()
2589  p1.set_value(cik0, c0)
2590 
2592  """
2593  Undo the Copy index assignment
2594  """
2595  _ = self.stath1[n1]
2596  _, molecular_assignment = self.rmsd()
2597  for (m0, c0), (m1, c1) in molecular_assignment.items():
2598  mol0 = self.molcopydict0[m0][c0]
2599  mol1 = self.molcopydict1[m1][c1]
2600  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2601  p1 = IMP.atom.Copy(mol1).get_particle()
2602  p1.set_value(cik0, c1)
2603 
2604  ####################
2605  # Container Functions
2606  ####################
2607 
2608  def __repr__(self):
2609  s = "AnalysisReplicaExchange\n"
2610  s += "---- number of clusters %s \n" % str(len(self.clusters))
2611  s += "---- number of models %s \n" % str(len(self.stath0))
2612  return s
2613 
2614  def __getitem__(self, int_slice_adaptor):
2615  if isinstance(int_slice_adaptor, int):
2616  return self.clusters[int_slice_adaptor]
2617  elif isinstance(int_slice_adaptor, slice):
2618  return self.__iter__(int_slice_adaptor)
2619  else:
2620  raise TypeError("Unknown Type")
2621 
2622  def __len__(self):
2623  return len(self.clusters)
2624 
2625  def __iter__(self, slice_key=None):
2626  if slice_key is None:
2627  for i in range(len(self)):
2628  yield self[i]
2629  else:
2630  for i in range(len(self))[slice_key]:
2631  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2510
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
Definition: macros.py:2558
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1839
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: pmi/tools.py:1062
def precision
Compute the precision of a cluster.
Definition: macros.py:1937
CheckLevel get_check_level()
Get the current audit mode.
Definition: exception.h:80
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:955
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: pmi/tools.py:104
A container for models organized into clusters.
Definition: output.py:1505
Sample using molecular dynamics.
Definition: samplers.py:346
def aggregate
initial filling of the clusters.
Definition: macros.py:2393
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:632
A macro to help setup and run replica exchange.
Definition: macros.py:65
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:633
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1902
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1986
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: pmi/tools.py:1159
A helper output for model evaluation.
Miscellaneous utilities.
Definition: pmi/tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1699
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1780
def __init__
Construction of the Class.
Definition: macros.py:1652
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:859
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1797
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2591
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1715
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2474
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1877
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1098
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
Definition: macros.py:2577
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1641
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:646
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2420
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:1007
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1859
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1018
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:140
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:21
def update_seldicts
Update the seldicts.
Definition: macros.py:2362
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2351
def scatter_and_gather
Synchronize data over a parallel run.
Definition: pmi/tools.py:542
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1744
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:199
Collect timing information.
Definition: pmi/tools.py:119
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1708
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9886
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:70
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2466
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:699
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1253
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1790
Class to handle individual particles of a Model object.
Definition: Particle.h:45
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:870
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1966
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:675
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1731
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1830
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2451
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: pmi/tools.py:1135
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1805
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: pmi/tools.py:499
Sample using replica exchange.
Definition: samplers.py:437
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27