1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
16 from pathlib
import Path
18 from operator
import itemgetter
19 from collections
import defaultdict
29 """Replace samplers.MPI_values when in test mode"""
30 def get_percentile(self, name):
35 """All restraints that are written out to the RMF file"""
36 def __init__(self, model, user_restraints):
38 self._user_restraints = user_restraints
if user_restraints
else []
41 return (len(self._user_restraints)
42 + self._rmf_rs.get_number_of_restraints())
47 def __getitem__(self, i):
49 def __init__(self, r):
50 self.r = IMP.RestraintSet.get_from(r)
52 def get_restraint(self):
55 lenuser = len(self._user_restraints)
57 return self._user_restraints[i]
58 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
59 r = self._rmf_rs.get_restraint(i - lenuser)
60 return FakePMIWrapper(r)
62 raise IndexError(
"Out of range")
66 """A macro to help setup and run replica exchange.
67 Supports Monte Carlo and molecular dynamics.
68 Produces trajectory RMF files, best PDB structures,
69 and output stat files.
72 monte_carlo_sample_objects=
None,
73 molecular_dynamics_sample_objects=
None,
75 rmf_output_objects=
None,
76 monte_carlo_temperature=1.0,
77 simulated_annealing=
False,
78 simulated_annealing_minimum_temperature=1.0,
79 simulated_annealing_maximum_temperature=2.5,
80 simulated_annealing_minimum_temperature_nframes=100,
81 simulated_annealing_maximum_temperature_nframes=100,
82 replica_exchange_minimum_temperature=1.0,
83 replica_exchange_maximum_temperature=2.5,
84 replica_exchange_swap=
True,
86 number_of_best_scoring_models=500,
89 molecular_dynamics_steps=10,
90 molecular_dynamics_max_time_step=1.0,
91 number_of_frames=1000,
92 save_coordinates_mode=
"lowest_temperature",
93 nframes_write_coordinates=1,
94 write_initial_rmf=
True,
95 initial_rmf_name_suffix=
"initial",
96 stat_file_name_suffix=
"stat",
97 best_pdb_name_suffix=
"model",
100 do_create_directories=
True,
101 global_output_directory=
"./",
103 best_pdb_dir=
"pdbs/",
104 replica_stat_file_suffix=
"stat_replica",
105 em_object_for_rmf=
None,
107 replica_exchange_object=
None,
111 nestor_restraints=
None,
112 nestor_rmf_fname_prefix=
"nested",
115 @param model The IMP model
116 @param root_hier Top-level (System)hierarchy
117 @param monte_carlo_sample_objects Objects for MC sampling, which
118 should generally be a simple list of Mover objects, e.g.
119 from DegreesOfFreedom.get_movers().
120 @param molecular_dynamics_sample_objects Objects for MD sampling,
121 which should generally be a simple list of particles.
122 @param output_objects A list of structural objects and restraints
123 that will be included in output (ie, statistics "stat"
124 files). Any object that provides a get_output() method
125 can be used here. If None is passed
126 the macro will not write stat files.
127 @param rmf_output_objects A list of structural objects and
128 restraints that will be included in rmf. Any object
129 that provides a get_output() method can be used here.
130 @param monte_carlo_temperature MC temp (may need to be optimized
131 based on post-sampling analysis)
132 @param simulated_annealing If True, perform simulated annealing
133 @param simulated_annealing_minimum_temperature Should generally be
134 the same as monte_carlo_temperature.
135 @param simulated_annealing_minimum_temperature_nframes Number of
136 frames to compute at minimum temperature.
137 @param simulated_annealing_maximum_temperature_nframes Number of
139 temps > simulated_annealing_maximum_temperature.
140 @param replica_exchange_minimum_temperature Low temp for REX; should
141 generally be the same as monte_carlo_temperature.
142 @param replica_exchange_maximum_temperature High temp for REX
143 @param replica_exchange_swap Boolean, enable disable temperature
145 @param num_sample_rounds Number of rounds of MC/MD per cycle
146 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
147 models to keep around for analysis.
148 @param mmcif If True, write best scoring models in mmCIF format;
149 if False (the default), write in legacy PDB format.
150 @param best_pdb_dir The directory under `global_output_directory`
151 where best-scoring PDB/mmCIF files are written.
152 @param best_pdb_name_suffix Part of the file name for best-scoring
154 @param monte_carlo_steps Number of MC steps per round
155 @param self_adaptive self adaptive scheme for Monte Carlo movers
156 @param molecular_dynamics_steps Number of MD steps per round
157 @param molecular_dynamics_max_time_step Max time step for MD
158 @param number_of_frames Number of REX frames to run
159 @param save_coordinates_mode string: how to save coordinates.
160 "lowest_temperature" (default) only the lowest temperatures
162 "25th_score" all replicas whose score is below the 25th
164 "50th_score" all replicas whose score is below the 50th
166 "75th_score" all replicas whose score is below the 75th
168 @param nframes_write_coordinates How often to write the coordinates
170 @param write_initial_rmf Write the initial configuration
171 @param global_output_directory Folder that will be created to house
173 @param test_mode Set to True to avoid writing any files, just test
175 @param score_moved If True, attempt to speed up Monte Carlo
176 sampling by caching scoring function terms on particles
178 @param use_nestor If True, follows the Nested Sampling workflow
179 of the NestOR module and skips writing stat files and
181 @param nestor_restraints A list of restraints for which
182 likelihoods are to be computed for use by NestOR module.
183 @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
184 files generated by NestOR .
185 @param use_jax If set to True, sample the scoring function using
186 JAX instead of IMP's internal C++ implementation (requires
187 that all PMI restraints used have a JAX implementation).
193 if output_objects == []:
196 self.output_objects = []
198 self.output_objects = output_objects
199 self.rmf_output_objects = rmf_output_objects
201 and not root_hier.get_parent()):
202 if self.output_objects
is not None:
203 self.output_objects.append(
205 if self.rmf_output_objects
is not None:
206 self.rmf_output_objects.append(
208 self.root_hier = root_hier
209 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
210 self.vars[
"number_of_states"] = len(states)
212 self.root_hiers = states
213 self.is_multi_state =
True
215 self.root_hier = root_hier
216 self.is_multi_state =
False
218 raise TypeError(
"Must provide System hierarchy (root_hier)")
220 self._rmf_restraints = _RMFRestraints(model,
None)
221 self.em_object_for_rmf = em_object_for_rmf
222 self.monte_carlo_sample_objects = monte_carlo_sample_objects
223 self.vars[
"self_adaptive"] = self_adaptive
224 self.molecular_dynamics_sample_objects = \
225 molecular_dynamics_sample_objects
226 self.replica_exchange_object = replica_exchange_object
227 self.molecular_dynamics_max_time_step = \
228 molecular_dynamics_max_time_step
229 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
230 self.vars[
"replica_exchange_minimum_temperature"] = \
231 replica_exchange_minimum_temperature
232 self.vars[
"replica_exchange_maximum_temperature"] = \
233 replica_exchange_maximum_temperature
234 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
235 self.vars[
"simulated_annealing"] = simulated_annealing
236 self.vars[
"simulated_annealing_minimum_temperature"] = \
237 simulated_annealing_minimum_temperature
238 self.vars[
"simulated_annealing_maximum_temperature"] = \
239 simulated_annealing_maximum_temperature
240 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
241 simulated_annealing_minimum_temperature_nframes
242 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
243 simulated_annealing_maximum_temperature_nframes
245 self.vars[
"num_sample_rounds"] = num_sample_rounds
247 "number_of_best_scoring_models"] = number_of_best_scoring_models
248 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
249 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
250 self.vars[
"number_of_frames"] = number_of_frames
251 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
252 "50th_score",
"75th_score"):
253 raise Exception(
"save_coordinates_mode has unrecognized value")
255 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
256 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
257 self.vars[
"write_initial_rmf"] = write_initial_rmf
258 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
259 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
260 self.vars[
"mmcif"] = mmcif
261 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
262 self.vars[
"do_clean_first"] = do_clean_first
263 self.vars[
"do_create_directories"] = do_create_directories
264 self.vars[
"global_output_directory"] = global_output_directory
265 self.vars[
"rmf_dir"] = rmf_dir
266 self.vars[
"best_pdb_dir"] = best_pdb_dir
267 self.vars[
"atomistic"] = atomistic
268 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
269 self.vars[
"geometries"] =
None
270 self.test_mode = test_mode
271 self.score_moved = score_moved
272 self.use_jax = use_jax
273 self.nest = use_nestor
274 self.nestor_restraints = nestor_restraints
275 self.nestor_rmf_fname = nestor_rmf_fname_prefix
278 if self.vars[
"geometries"]
is None:
279 self.vars[
"geometries"] = list(geometries)
281 self.vars[
"geometries"].extend(geometries)
284 print(
"ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
285 "rmfs/*.rmf3 for each replica ")
286 print(
"--- it stores the best scoring pdb models in pdbs/")
287 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
288 "lowest temperature")
289 print(
"--- variables:")
290 keys = list(self.vars.keys())
293 print(
"------", v.ljust(30), self.vars[v])
294 print(
"Use nestor: ", self.nest)
296 def get_replica_exchange_object(self):
297 return self.replica_exchange_object
299 def _add_provenance(self, sampler_md, sampler_mc):
300 """Record details about the sampling in the IMP Hierarchies"""
303 method =
"Molecular Dynamics"
304 iterations += self.vars[
"molecular_dynamics_steps"]
306 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
307 iterations += self.vars[
"monte_carlo_steps"]
309 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
311 iterations *= self.vars[
"num_sample_rounds"]
313 pi = self.model.add_particle(
"sampling")
315 self.model, pi, method, self.vars[
"number_of_frames"],
317 p.set_number_of_replicas(
318 self.replica_exchange_object.get_number_of_replicas())
319 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
322 def execute_macro(self):
323 temp_index_factor = 100000.0
327 if self.monte_carlo_sample_objects
is not None:
328 print(
"Setting up MonteCarlo")
330 self.model, self.monte_carlo_sample_objects,
331 self.vars[
"monte_carlo_temperature"],
332 score_moved=self.score_moved,
333 use_jax=self.use_jax)
334 if self.vars[
"simulated_annealing"]:
335 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
336 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
338 "simulated_annealing_minimum_temperature_nframes"]
340 "simulated_annealing_maximum_temperature_nframes"]
341 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
342 if self.vars[
"self_adaptive"]:
343 sampler_mc.set_self_adaptive(
344 isselfadaptive=self.vars[
"self_adaptive"])
345 if self.output_objects
is not None:
346 self.output_objects.append(sampler_mc)
347 if self.rmf_output_objects
is not None:
348 self.rmf_output_objects.append(sampler_mc)
349 samplers.append(sampler_mc)
351 if self.molecular_dynamics_sample_objects
is not None:
352 print(
"Setting up MolecularDynamics")
354 self.model, self.molecular_dynamics_sample_objects,
355 self.vars[
"monte_carlo_temperature"],
356 maximum_time_step=self.molecular_dynamics_max_time_step,
357 use_jax=self.use_jax)
358 if self.vars[
"simulated_annealing"]:
359 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
360 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
362 "simulated_annealing_minimum_temperature_nframes"]
364 "simulated_annealing_maximum_temperature_nframes"]
365 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
366 if self.output_objects
is not None:
367 self.output_objects.append(sampler_md)
368 if self.rmf_output_objects
is not None:
369 self.rmf_output_objects.append(sampler_md)
370 samplers.append(sampler_md)
373 print(
"Setting up ReplicaExchange")
375 self.model, self.vars[
"replica_exchange_minimum_temperature"],
376 self.vars[
"replica_exchange_maximum_temperature"], samplers,
377 replica_exchange_object=self.replica_exchange_object)
378 self.replica_exchange_object = rex.rem
380 myindex = rex.get_my_index()
381 if self.output_objects
is not None:
382 self.output_objects.append(rex)
383 if self.rmf_output_objects
is not None:
384 self.rmf_output_objects.append(rex)
388 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
392 globaldir = self.vars[
"global_output_directory"] +
"/"
393 rmf_dir = globaldir + self.vars[
"rmf_dir"]
394 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
396 if not self.test_mode
and not self.nest:
397 if self.vars[
"do_clean_first"]:
400 if self.vars[
"do_create_directories"]:
403 os.makedirs(globaldir)
411 if not self.is_multi_state:
417 for n
in range(self.vars[
"number_of_states"]):
419 os.makedirs(pdb_dir +
"/" + str(n))
426 if self.output_objects
is not None:
427 self.output_objects.append(sw)
428 if self.rmf_output_objects
is not None:
429 self.rmf_output_objects.append(sw)
434 print(
"Setting up stat file")
435 low_temp_stat_file = globaldir + \
436 self.vars[
"stat_file_name_suffix"] +
"." + \
437 str(myindex) +
".out"
440 if not self.test_mode:
443 if not self.test_mode
and not self.nest:
444 if self.output_objects
is not None:
445 output.init_stat2(low_temp_stat_file,
447 extralabels=[
"rmf_file",
"rmf_frame_index"])
449 print(
"Stat file writing is disabled")
451 if self.rmf_output_objects
is not None and not self.nest:
452 print(
"Stat info being written in the rmf file")
454 if not self.test_mode
and not self.nest:
455 print(
"Setting up replica stat file")
456 replica_stat_file = globaldir + \
457 self.vars[
"replica_stat_file_suffix"] +
"." + \
458 str(myindex) +
".out"
459 if not self.test_mode:
460 output.init_stat2(replica_stat_file, [rex],
461 extralabels=[
"score"])
463 print(
"Setting up best pdb files")
464 if not self.is_multi_state:
465 if self.vars[
"number_of_best_scoring_models"] > 0:
466 output.init_pdb_best_scoring(
467 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
469 self.vars[
"number_of_best_scoring_models"],
470 replica_exchange=
True,
471 mmcif=self.vars[
'mmcif'],
472 best_score_file=globaldir +
"best.scores.rex.py")
473 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
475 pdb_dir +
"/" +
"model.psf",
477 self.vars[
"best_pdb_name_suffix"] + pdbext)
479 if self.vars[
"number_of_best_scoring_models"] > 0:
480 for n
in range(self.vars[
"number_of_states"]):
481 output.init_pdb_best_scoring(
482 pdb_dir +
"/" + str(n) +
"/" +
483 self.vars[
"best_pdb_name_suffix"],
485 self.vars[
"number_of_best_scoring_models"],
486 replica_exchange=
True,
487 mmcif=self.vars[
'mmcif'],
488 best_score_file=globaldir +
"best.scores.rex.py")
489 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
491 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
492 pdb_dir +
"/" + str(n) +
"/" +
493 self.vars[
"best_pdb_name_suffix"] + pdbext)
496 if self.em_object_for_rmf
is not None:
497 output_hierarchies = [
499 self.em_object_for_rmf.get_density_as_hierarchy(
502 output_hierarchies = [self.root_hier]
504 if not self.test_mode
and not self.nest:
505 print(
"Setting up and writing initial rmf coordinate file")
506 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
507 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
509 listofobjects=self.rmf_output_objects)
510 if self._rmf_restraints:
511 output.add_restraints_to_rmf(
512 init_suffix +
"." + str(myindex) +
".rmf3",
513 self._rmf_restraints)
514 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
515 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
517 if not self.test_mode:
518 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
520 mpivs = _MockMPIValues()
522 self._add_provenance(sampler_md, sampler_mc)
524 if not self.test_mode
and not self.nest:
525 print(
"Setting up production rmf files")
526 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
527 output.init_rmf(rmfname, output_hierarchies,
528 geometries=self.vars[
"geometries"],
529 listofobjects=self.rmf_output_objects)
531 if self._rmf_restraints:
532 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
534 if not self.test_mode
and self.nest:
535 print(
"Setting up NestOR rmf files")
536 nestor_rmf_fname = str(self.nestor_rmf_fname) +
'_' + \
537 str(self.replica_exchange_object.get_my_index()) +
'.rmf3'
539 output.init_rmf(nestor_rmf_fname, output_hierarchies,
540 geometries=self.vars[
"geometries"],
541 listofobjects=self.rmf_output_objects)
543 ntimes_at_low_temp = 0
545 if myindex == 0
and not self.nest:
547 self.replica_exchange_object.set_was_used(
True)
548 nframes = self.vars[
"number_of_frames"]
552 sampled_likelihoods = []
553 for i
in range(nframes):
558 for nr
in range(self.vars[
"num_sample_rounds"]):
559 if sampler_md
is not None:
560 score = sampler_md.optimize(
561 self.vars[
"molecular_dynamics_steps"])
562 if sampler_mc
is not None:
563 score = sampler_mc.optimize(
564 self.vars[
"monte_carlo_steps"])
567 self.model).evaluate(
False)
572 self.model).evaluate(
False)
573 assert abs(score - check_score) < 1e-4
574 mpivs.set_value(
"score", score)
576 output.set_output_entry(
"score", score)
578 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
580 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
581 save_frame = (min_temp_index == my_temp_index)
582 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
583 score_perc = mpivs.get_percentile(
"score")
584 save_frame = (score_perc*100.0 <= 25.0)
585 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
586 score_perc = mpivs.get_percentile(
"score")
587 save_frame = (score_perc*100.0 <= 50.0)
588 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
589 score_perc = mpivs.get_percentile(
"score")
590 save_frame = (score_perc*100.0 <= 75.0)
593 if save_frame
and not self.test_mode:
597 print(
"--- frame %s score %s " % (str(i), str(score)))
600 if math.isnan(score):
601 sampled_likelihoods.append(math.nan)
603 likelihood_for_sample = 1
604 for rstrnt
in self.nestor_restraints:
605 likelihood_for_sample *= rstrnt.get_likelihood()
606 sampled_likelihoods.append(likelihood_for_sample)
607 output.write_rmf(nestor_rmf_fname)
609 if not self.test_mode
and not self.nest:
610 if i % self.vars[
"nframes_write_coordinates"] == 0:
611 print(
'--- writing coordinates')
612 if self.vars[
"number_of_best_scoring_models"] > 0:
613 output.write_pdb_best_scoring(score)
614 output.write_rmf(rmfname)
615 output.set_output_entry(
"rmf_file", rmfname)
616 output.set_output_entry(
"rmf_frame_index",
619 output.set_output_entry(
"rmf_file", rmfname)
620 output.set_output_entry(
"rmf_frame_index",
'-1')
621 if self.output_objects
is not None:
622 output.write_stat2(low_temp_stat_file)
623 ntimes_at_low_temp += 1
625 if not self.test_mode
and not self.nest:
626 output.write_stat2(replica_stat_file)
627 if self.vars[
"replica_exchange_swap"]:
628 rex.swap_temp(i, score)
630 if self.nest
and len(sampled_likelihoods) > 0:
631 with open(
"likelihoods_"
632 + str(self.replica_exchange_object.get_my_index()),
634 pickle.dump(sampled_likelihoods, lif)
636 output.close_rmf(nestor_rmf_fname)
638 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
639 p.add_replica_exchange(state, self)
641 if not self.test_mode
and not self.nest:
642 print(
"closing production rmf files")
643 output.close_rmf(rmfname)
647 """A macro to build a IMP::pmi::topology::System based on a
648 TopologyReader object.
650 Easily create multi-state systems by calling this macro
651 repeatedly with different TopologyReader objects!
652 A useful function is get_molecules() which returns the PMI Molecules
653 grouped by state as a dictionary with key = (molecule name),
654 value = IMP.pmi.topology.Molecule
655 Quick multi-state system:
658 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
659 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
660 bs = IMP.pmi.macros.BuildSystem(model)
661 bs.add_state(reader1)
662 bs.add_state(reader2)
663 bs.execute_macro() # build everything including degrees of freedom
664 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
665 ### now you have a two state system, you add restraints etc
667 @note The "domain name" entry of the topology reader is not used.
668 All molecules are set up by the component name, but split into rigid bodies
672 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
673 'RNA': IMP.pmi.alphabets.rna}
675 def __init__(self, model, sequence_connectivity_scale=4.0,
676 force_create_gmm_files=
False, resolutions=[1, 10],
679 @param model An IMP Model
680 @param sequence_connectivity_scale For scaling the connectivity
682 @param force_create_gmm_files If True, will sample and create GMMs
683 no matter what. If False, will only sample if the
684 files don't exist. If number of Gaussians is zero, won't
686 @param resolutions The resolutions to build for structured regions
687 @param name The name of the top-level hierarchy node.
694 self._domain_res = []
696 self.force_create_gmm_files = force_create_gmm_files
697 self.resolutions = resolutions
699 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
701 """Add a state using the topology info in a
702 IMP::pmi::topology::TopologyReader object.
703 When you are done adding states, call execute_macro()
704 @param reader The TopologyReader object
705 @param keep_chain_id If True, keep the chain IDs from the
706 original PDB files, if available
707 @param fasta_name_map dictionary for converting protein names
708 found in the fasta file
709 @param chain_ids A list or string of chain IDs for assigning to
710 newly-created molecules, e.g.
711 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
712 If not specified, chain IDs A through Z are assigned, then
713 AA through AZ, then BA through BZ, and so on, in the same
716 state = self.system.create_state()
717 self._readers.append(reader)
719 these_domain_res = {}
721 if chain_ids
is None:
722 chain_ids = IMP.pmi.output._ChainIDs()
727 for molname
in reader.get_molecules():
728 copies = reader.get_molecules()[molname].domains
729 for nc, copyname
in enumerate(copies):
730 print(
"BuildSystem.add_state: setting up molecule %s copy "
731 "number %s" % (molname, str(nc)))
732 copy = copies[copyname]
735 all_chains = [c
for c
in copy
if c.chain
is not None]
737 chain_id = all_chains[0].chain
739 chain_id = chain_ids[numchain]
741 "No PDBs specified for %s, so keep_chain_id has "
742 "no effect; using default chain ID '%s'"
745 chain_id = chain_ids[numchain]
747 alphabet = IMP.pmi.alphabets.amino_acid
748 fasta_flag = copy[0].fasta_flag
749 if fasta_flag
in self._alphabets:
750 alphabet = self._alphabets[fasta_flag]
752 copy[0].fasta_file, fasta_name_map)
753 seq = seqs[copy[0].fasta_id]
754 print(
"BuildSystem.add_state: molecule %s sequence has "
755 "%s residues" % (molname, len(seq)))
756 orig_mol = state.create_molecule(
757 molname, seq, chain_id, alphabet=alphabet,
758 uniprot=seqs.uniprot.get(copy[0].fasta_id))
762 print(
"BuildSystem.add_state: creating a copy for "
763 "molecule %s" % molname)
764 mol = orig_mol.create_copy(chain_id)
767 for domainnumber, domain
in enumerate(copy):
768 print(
"BuildSystem.add_state: ---- setting up domain %s "
769 "of molecule %s" % (domainnumber, molname))
772 these_domains[domain.get_unique_name()] = domain
773 if domain.residue_range == []
or \
774 domain.residue_range
is None:
775 domain_res = mol.get_residues()
777 start = domain.residue_range[0]+domain.pdb_offset
778 if domain.residue_range[1] ==
'END':
779 end = len(mol.sequence)
781 end = domain.residue_range[1]+domain.pdb_offset
782 domain_res = mol.residue_range(start-1, end-1)
783 print(
"BuildSystem.add_state: -------- domain %s of "
784 "molecule %s extends from residue %s to "
786 % (domainnumber, molname, start, end))
787 if domain.pdb_file ==
"BEADS":
788 print(
"BuildSystem.add_state: -------- domain %s of "
789 "molecule %s represented by BEADS "
790 % (domainnumber, molname))
791 mol.add_representation(
793 resolutions=[domain.bead_size],
794 setup_particles_as_densities=(
795 domain.em_residues_per_gaussian != 0),
797 these_domain_res[domain.get_unique_name()] = \
799 elif domain.pdb_file ==
"IDEAL_HELIX":
800 print(
"BuildSystem.add_state: -------- domain %s of "
801 "molecule %s represented by IDEAL_HELIX "
802 % (domainnumber, molname))
803 emper = domain.em_residues_per_gaussian
804 mol.add_representation(
806 resolutions=self.resolutions,
808 density_residues_per_component=emper,
809 density_prefix=domain.density_prefix,
810 density_force_compute=self.force_create_gmm_files,
812 these_domain_res[domain.get_unique_name()] = \
815 print(
"BuildSystem.add_state: -------- domain %s of "
816 "molecule %s represented by pdb file %s "
817 % (domainnumber, molname, domain.pdb_file))
818 domain_atomic = mol.add_structure(domain.pdb_file,
820 domain.residue_range,
823 domain_non_atomic = domain_res - domain_atomic
824 if not domain.em_residues_per_gaussian:
825 mol.add_representation(
826 domain_atomic, resolutions=self.resolutions,
828 if len(domain_non_atomic) > 0:
829 mol.add_representation(
831 resolutions=[domain.bead_size],
834 print(
"BuildSystem.add_state: -------- domain %s "
835 "of molecule %s represented by gaussians "
836 % (domainnumber, molname))
837 emper = domain.em_residues_per_gaussian
838 creategmm = self.force_create_gmm_files
839 mol.add_representation(
841 resolutions=self.resolutions,
842 density_residues_per_component=emper,
843 density_prefix=domain.density_prefix,
844 density_force_compute=creategmm,
846 if len(domain_non_atomic) > 0:
847 mol.add_representation(
849 resolutions=[domain.bead_size],
850 setup_particles_as_densities=
True,
852 these_domain_res[domain.get_unique_name()] = (
853 domain_atomic, domain_non_atomic)
854 self._domain_res.append(these_domain_res)
855 self._domains.append(these_domains)
856 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
860 """Return list of all molecules grouped by state.
861 For each state, it's a dictionary of Molecules where key is the
864 return [s.get_molecules()
for s
in self.system.get_states()]
866 def get_molecule(self, molname, copy_index=0, state_index=0):
867 return self.system.get_states()[state_index].
get_molecules()[
871 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
872 """Builds representations and sets up degrees of freedom"""
873 print(
"BuildSystem.execute_macro: building representations")
874 self.root_hier = self.system.build()
876 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
878 for nstate, reader
in enumerate(self._readers):
879 rbs = reader.get_rigid_bodies()
880 srbs = reader.get_super_rigid_bodies()
881 csrbs = reader.get_chains_of_super_rigid_bodies()
884 domains_in_rbs = set()
886 print(
"BuildSystem.execute_macro: -------- building rigid "
887 "body %s" % (str(rblist)))
888 all_res = IMP.pmi.tools.OrderedSet()
889 bead_res = IMP.pmi.tools.OrderedSet()
891 domain = self._domains[nstate][dname]
892 print(
"BuildSystem.execute_macro: -------- adding %s"
894 all_res |= self._domain_res[nstate][dname][0]
895 bead_res |= self._domain_res[nstate][dname][1]
896 domains_in_rbs.add(dname)
898 print(
"BuildSystem.execute_macro: -------- creating rigid "
899 "body with max_trans %s max_rot %s "
900 "non_rigid_max_trans %s"
901 % (str(max_rb_trans), str(max_rb_rot),
902 str(max_bead_trans)))
903 self.dof.create_rigid_body(all_res,
904 nonrigid_parts=bead_res,
905 max_trans=max_rb_trans,
907 nonrigid_max_trans=max_bead_trans,
908 name=
"RigidBody %s" % dname)
911 for dname, domain
in self._domains[nstate].items():
912 if dname
not in domains_in_rbs:
913 if domain.pdb_file !=
"BEADS":
915 "No rigid bodies set for %s. Residues read from "
916 "the PDB file will not be sampled - only regions "
917 "missing from the PDB will be treated flexibly. "
918 "To sample the entire sequence, use BEADS instead "
919 "of a PDB file name" % dname,
921 self.dof.create_flexible_beads(
922 self._domain_res[nstate][dname][1],
923 max_trans=max_bead_trans)
927 print(
"BuildSystem.execute_macro: -------- building "
928 "super rigid body %s" % (str(srblist)))
929 all_res = IMP.pmi.tools.OrderedSet()
930 for dname
in srblist:
931 print(
"BuildSystem.execute_macro: -------- adding %s"
933 all_res |= self._domain_res[nstate][dname][0]
934 all_res |= self._domain_res[nstate][dname][1]
936 print(
"BuildSystem.execute_macro: -------- creating super "
937 "rigid body with max_trans %s max_rot %s "
938 % (str(max_srb_trans), str(max_srb_rot)))
939 self.dof.create_super_rigid_body(
940 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
943 for csrblist
in csrbs:
944 all_res = IMP.pmi.tools.OrderedSet()
945 for dname
in csrblist:
946 all_res |= self._domain_res[nstate][dname][0]
947 all_res |= self._domain_res[nstate][dname][1]
948 all_res = list(all_res)
949 all_res.sort(key=
lambda r: r.get_index())
950 self.dof.create_main_chain_mover(all_res)
951 return self.root_hier, self.dof
956 """A macro for running all the basic operations of analysis.
957 Includes clustering, precision analysis, and making ensemble density maps.
958 A number of plots are also supported.
961 merge_directories=[
"./"],
962 stat_file_name_suffix=
"stat",
963 best_pdb_name_suffix=
"model",
965 do_create_directories=
True,
966 global_output_directory=
"output/",
967 replica_stat_file_suffix=
"stat_replica",
968 global_analysis_result_directory=
"./analysis/",
971 @param model The IMP model
972 @param stat_file_name_suffix
973 @param merge_directories The directories containing output files
974 @param best_pdb_name_suffix
975 @param do_clean_first
976 @param do_create_directories
977 @param global_output_directory Where everything is
978 @param replica_stat_file_suffix
979 @param global_analysis_result_directory
980 @param test_mode If True, nothing is changed on disk
984 from mpi4py
import MPI
985 self.comm = MPI.COMM_WORLD
986 self.rank = self.comm.Get_rank()
987 self.number_of_processes = self.comm.size
990 self.number_of_processes = 1
992 self.test_mode = test_mode
993 self._protocol_output = []
994 self.cluster_obj =
None
996 stat_dir = global_output_directory
999 for rd
in merge_directories:
1000 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
1001 if len(stat_files) == 0:
1002 warnings.warn(
"no stat files found in %s"
1003 % os.path.join(rd, stat_dir),
1005 self.stat_files += stat_files
1008 """Capture details of the modeling protocol.
1009 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1012 self._protocol_output.append((p, p._last_state))
1015 score_key=
"Total_Score",
1016 rmf_file_key=
"rmf_file",
1017 rmf_file_frame_key=
"rmf_frame_index",
1020 nframes_trajectory=10000):
1021 """ Get a trajectory of the modeling run, for generating
1022 demonstrative movies
1024 @param score_key The score for ranking models
1025 @param rmf_file_key Key pointing to RMF filename
1026 @param rmf_file_frame_key Key pointing to RMF frame number
1027 @param outputdir The local output directory used in the run
1028 @param get_every Extract every nth frame
1029 @param nframes_trajectory Total number of frames of the trajectory
1034 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1036 score_list = list(map(float, trajectory_models[2]))
1038 max_score = max(score_list)
1039 min_score = min(score_list)
1041 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1042 for i
in range(nframes_trajectory)]
1043 binned_scores = [
None]*nframes_trajectory
1044 binned_model_indexes = [-1]*nframes_trajectory
1046 for model_index, s
in enumerate(score_list):
1047 bins_score_diffs = [abs(s-b)
for b
in bins]
1048 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1049 if binned_scores[bin_index]
is None:
1050 binned_scores[bin_index] = s
1051 binned_model_indexes[bin_index] = model_index
1053 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1054 new_diff = abs(s-bins[bin_index])
1055 if new_diff < old_diff:
1056 binned_scores[bin_index] = s
1057 binned_model_indexes[bin_index] = model_index
1059 print(binned_scores)
1060 print(binned_model_indexes)
1062 def _expand_ambiguity(self, prot, d):
1063 """If using PMI2, expand the dictionary to include copies as
1066 This also keeps the states separate.
1071 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1074 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1075 if isinstance(val, tuple):
1083 for nst
in range(len(states)):
1085 copies = sel.get_selected_particles(with_representation=
False)
1087 for nc
in range(len(copies)):
1089 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1090 (start, stop, name, nc, nst)
1092 newdict[
'%s..%i' % (name, nc)] = \
1093 (start, stop, name, nc, nst)
1099 score_key=
"Total_Score",
1100 rmf_file_key=
"rmf_file",
1101 rmf_file_frame_key=
"rmf_frame_index",
1103 prefiltervalue=
None,
1106 alignment_components=
None,
1107 number_of_best_scoring_models=10,
1108 rmsd_calculation_components=
None,
1109 distance_matrix_file=
'distances.mat',
1110 load_distance_matrix_file=
False,
1111 skip_clustering=
False,
1112 number_of_clusters=1,
1114 exit_after_display=
True,
1116 first_and_last_frames=
None,
1117 density_custom_ranges=
None,
1118 write_pdb_with_centered_coordinates=
False,
1120 """Get the best scoring models, compute a distance matrix,
1121 cluster them, and create density maps.
1123 Tuple format: "molname" just the molecule,
1124 or (start,stop,molname,copy_num(optional),state_num(optional)
1125 Can pass None for copy or state to ignore that field.
1126 If you don't pass a specific copy number
1128 @param score_key The score for ranking models.
1129 @param rmf_file_key Key pointing to RMF filename
1130 @param rmf_file_frame_key Key pointing to RMF frame number
1131 @param state_number State number to analyze
1132 @param prefiltervalue Only include frames where the
1133 score key is below this value
1134 @param feature_keys Keywords for which you want to
1135 calculate average, medians, etc.
1136 If you pass "Keyname" it'll include everything that matches
1138 @param outputdir The local output directory used in
1140 @param alignment_components Dictionary with keys=groupname,
1141 values are tuples for aligning the structures
1142 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1143 @param number_of_best_scoring_models Num models to keep per run
1144 @param rmsd_calculation_components For calculating RMSD
1145 (same format as alignment_components)
1146 @param distance_matrix_file Where to store/read the
1148 @param load_distance_matrix_file Try to load the distance
1150 @param skip_clustering Just extract the best scoring
1151 models and save the pdbs
1152 @param number_of_clusters Number of k-means clusters
1153 @param display_plot Display the distance matrix
1154 @param exit_after_display Exit after displaying distance
1156 @param get_every Extract every nth frame
1157 @param first_and_last_frames A tuple with the first and last
1158 frames to be analyzed. Values are percentages!
1159 Default: get all frames
1160 @param density_custom_ranges For density calculation
1161 (same format as alignment_components)
1162 @param write_pdb_with_centered_coordinates
1163 @param voxel_size Used for the density output
1167 self._outputdir = Path(outputdir).absolute()
1168 self._number_of_clusters = number_of_clusters
1169 for p, state
in self._protocol_output:
1170 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1181 if not load_distance_matrix_file:
1182 if len(self.stat_files) == 0:
1183 print(
"ERROR: no stat file found in the given path")
1185 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1186 self.stat_files, self.number_of_processes)[self.rank]
1189 for k
in (score_key, rmf_file_key, rmf_file_frame_key):
1190 if k
in feature_keys:
1192 "no need to pass " + k +
" to feature_keys.",
1194 feature_keys.remove(k)
1197 my_stat_files, score_key, feature_keys, rmf_file_key,
1198 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1199 rmf_file_list = best_models[0]
1200 rmf_file_frame_list = best_models[1]
1201 score_list = best_models[2]
1202 feature_keyword_list_dict = best_models[3]
1208 if self.number_of_processes > 1:
1212 rmf_file_frame_list)
1213 for k
in feature_keyword_list_dict:
1214 feature_keyword_list_dict[k] = \
1216 feature_keyword_list_dict[k])
1219 score_rmf_tuples = list(zip(score_list,
1221 rmf_file_frame_list,
1222 list(range(len(score_list)))))
1224 if density_custom_ranges:
1225 for k
in density_custom_ranges:
1226 if not isinstance(density_custom_ranges[k], list):
1227 raise Exception(
"Density custom ranges: values must "
1228 "be lists of tuples")
1231 if first_and_last_frames
is not None:
1232 nframes = len(score_rmf_tuples)
1233 first_frame = int(first_and_last_frames[0] * nframes)
1234 last_frame = int(first_and_last_frames[1] * nframes)
1235 if last_frame > len(score_rmf_tuples):
1237 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1240 best_score_rmf_tuples = sorted(
1242 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1243 best_score_rmf_tuples = [t+(n,)
for n, t
in
1244 enumerate(best_score_rmf_tuples)]
1246 prov.append(IMP.pmi.io.FilterProvenance(
1247 "Best scoring", 0, number_of_best_scoring_models))
1249 best_score_feature_keyword_list_dict = defaultdict(list)
1250 for tpl
in best_score_rmf_tuples:
1252 for f
in feature_keyword_list_dict:
1253 best_score_feature_keyword_list_dict[f].append(
1254 feature_keyword_list_dict[f][index])
1255 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1256 best_score_rmf_tuples,
1257 self.number_of_processes)[self.rank]
1260 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1261 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1262 if rmsd_calculation_components
is not None:
1263 tmp = self._expand_ambiguity(
1264 prot_ahead, rmsd_calculation_components)
1265 if tmp != rmsd_calculation_components:
1266 print(
'Detected ambiguity, expand rmsd components to',
1268 rmsd_calculation_components = tmp
1269 if alignment_components
is not None:
1270 tmp = self._expand_ambiguity(prot_ahead,
1271 alignment_components)
1272 if tmp != alignment_components:
1273 print(
'Detected ambiguity, expand alignment '
1274 'components to', tmp)
1275 alignment_components = tmp
1281 self.model, my_best_score_rmf_tuples[0],
1282 rmsd_calculation_components, state_number=state_number)
1284 self.model, my_best_score_rmf_tuples, alignment_components,
1285 rmsd_calculation_components, state_number=state_number)
1293 all_coordinates = got_coords[0]
1296 alignment_coordinates = got_coords[1]
1299 rmsd_coordinates = got_coords[2]
1302 rmf_file_name_index_dict = got_coords[3]
1305 all_rmf_file_names = got_coords[4]
1311 if density_custom_ranges:
1313 density_custom_ranges, voxel=voxel_size)
1315 dircluster = os.path.join(outputdir,
1316 "all_models."+str(self.rank))
1322 os.mkdir(dircluster)
1325 clusstat = open(os.path.join(
1326 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1327 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1329 rmf_frame_number = tpl[2]
1332 for key
in best_score_feature_keyword_list_dict:
1334 best_score_feature_keyword_list_dict[key][index]
1338 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1339 self.model, rmf_frame_number, rmf_name)
1341 linking_successful = \
1342 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1343 self.model, prots, rs, rmf_frame_number,
1345 if not linking_successful:
1351 states = IMP.atom.get_by_type(
1352 prots[0], IMP.atom.STATE_TYPE)
1353 prot = states[state_number]
1358 coords_f1 = alignment_coordinates[cnt]
1360 coords_f2 = alignment_coordinates[cnt]
1363 coords_f1, coords_f2)
1364 transformation = Ali.align()[1]
1378 rb = rbm.get_rigid_body()
1388 out_pdb_fn = os.path.join(
1389 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1390 out_rmf_fn = os.path.join(
1391 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1392 o.init_pdb(out_pdb_fn, prot)
1393 tc = write_pdb_with_centered_coordinates
1394 o.write_pdb(out_pdb_fn,
1395 translate_to_geometric_center=tc)
1397 tmp_dict[
"local_pdb_file_name"] = \
1398 os.path.basename(out_pdb_fn)
1399 tmp_dict[
"rmf_file_full_path"] = rmf_name
1400 tmp_dict[
"local_rmf_file_name"] = \
1401 os.path.basename(out_rmf_fn)
1402 tmp_dict[
"local_rmf_frame_number"] = 0
1404 clusstat.write(str(tmp_dict)+
"\n")
1409 h.set_name(
"System")
1411 o.init_rmf(out_rmf_fn, [h], rs)
1413 o.write_rmf(out_rmf_fn)
1414 o.close_rmf(out_rmf_fn)
1416 if density_custom_ranges:
1417 DensModule.add_subunits_density(prot)
1419 if density_custom_ranges:
1420 DensModule.write_mrc(path=dircluster)
1425 if self.number_of_processes > 1:
1431 rmf_file_name_index_dict)
1433 alignment_coordinates)
1440 [best_score_feature_keyword_list_dict,
1441 rmf_file_name_index_dict],
1447 print(
"setup clustering class")
1450 for n, model_coordinate_dict
in enumerate(all_coordinates):
1452 if (alignment_components
is not None
1453 and len(self.cluster_obj.all_coords) == 0):
1455 self.cluster_obj.set_template(alignment_coordinates[n])
1456 self.cluster_obj.fill(all_rmf_file_names[n],
1457 rmsd_coordinates[n])
1458 print(
"Global calculating the distance matrix")
1461 self.cluster_obj.dist_matrix()
1465 self.cluster_obj.do_cluster(number_of_clusters)
1468 self.cluster_obj.plot_matrix(
1469 figurename=os.path.join(outputdir,
1471 if exit_after_display:
1473 self.cluster_obj.save_distance_matrix_file(
1474 file_name=distance_matrix_file)
1481 print(
"setup clustering class")
1483 self.cluster_obj.load_distance_matrix_file(
1484 file_name=distance_matrix_file)
1485 print(
"clustering with %s clusters" % str(number_of_clusters))
1486 self.cluster_obj.do_cluster(number_of_clusters)
1487 [best_score_feature_keyword_list_dict,
1488 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1491 self.cluster_obj.plot_matrix(figurename=os.path.join(
1492 outputdir,
'dist_matrix.pdf'))
1493 if exit_after_display:
1495 if self.number_of_processes > 1:
1503 print(self.cluster_obj.get_cluster_labels())
1504 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1505 print(
"rank %s " % str(self.rank))
1506 print(
"cluster %s " % str(n))
1507 print(
"cluster label %s " % str(cl))
1508 print(self.cluster_obj.get_cluster_label_names(cl))
1510 len(self.cluster_obj.get_cluster_label_names(cl))
1512 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1515 if density_custom_ranges:
1517 density_custom_ranges,
1520 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1522 os.mkdir(dircluster)
1528 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1529 clusstat = open(dircluster +
"stat.out",
"w")
1530 for k, structure_name
in enumerate(
1531 self.cluster_obj.get_cluster_label_names(cl)):
1534 tmp_dict.update(rmsd_dict)
1535 index = rmf_file_name_index_dict[structure_name]
1536 for key
in best_score_feature_keyword_list_dict:
1538 key] = best_score_feature_keyword_list_dict[
1544 rmf_name = structure_name.split(
"|")[0]
1545 rmf_frame_number = int(structure_name.split(
"|")[1])
1546 clusstat.write(str(tmp_dict) +
"\n")
1551 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1552 self.model, rmf_frame_number, rmf_name)
1554 linking_successful = \
1555 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1556 self.model, prots, rs, rmf_frame_number,
1558 if not linking_successful:
1563 states = IMP.atom.get_by_type(
1564 prots[0], IMP.atom.STATE_TYPE)
1565 prot = states[state_number]
1571 co = self.cluster_obj
1572 model_index = co.get_model_index_from_name(
1574 transformation = co.get_transformation_to_first_member(
1585 rb = rbm.get_rigid_body()
1594 if density_custom_ranges:
1595 DensModule.add_subunits_density(prot)
1600 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1601 o.write_pdb(dircluster + str(k) +
".pdb")
1606 h.set_name(
"System")
1608 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1609 o.write_rmf(dircluster + str(k) +
".rmf3")
1610 o.close_rmf(dircluster + str(k) +
".rmf3")
1615 if density_custom_ranges:
1616 DensModule.write_mrc(path=dircluster)
1619 if self.number_of_processes > 1:
1622 def get_cluster_rmsd(self, cluster_num):
1623 if self.cluster_obj
is None:
1625 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1627 def save_objects(self, objects, file_name):
1629 outf = open(file_name,
'wb')
1630 pickle.dump(objects, outf)
1633 def load_objects(self, file_name):
1635 inputf = open(file_name,
'rb')
1636 objects = pickle.load(inputf)
1644 This class contains analysis utilities to investigate ReplicaExchange
1652 def __init__(self, model, stat_files, best_models=None, score_key=None,
1655 Construction of the Class.
1656 @param model IMP.Model()
1657 @param stat_files list of string. Can be ascii stat files,
1659 @param best_models Integer. Number of best scoring models,
1660 if None: all models will be read
1661 @param score_key Use the provided stat key keyword as the score
1662 (by default, the total score is used)
1663 @param alignment boolean (Default=True). Align before computing
1668 self.best_models = best_models
1670 model, stat_files, self.best_models, score_key, cache=
True)
1672 StatHierarchyHandler=self.stath0)
1685 self.clusters.append(c)
1686 for n0
in range(len(self.stath0)):
1688 self.pairwise_rmsd = {}
1689 self.pairwise_molecular_assignment = {}
1690 self.alignment = alignment
1691 self.symmetric_molecules = {}
1692 self.issymmetricsel = {}
1694 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1696 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1701 Setup the selection onto which the rmsd is computed
1702 @param kwargs use IMP.atom.Selection keywords
1710 Store names of symmetric molecules
1712 self.symmetric_molecules[molecule_name] = 0
1717 Setup the selection onto which the alignment is computed
1718 @param kwargs use IMP.atom.Selection keywords
1726 def clean_clusters(self):
1727 for c
in self.clusters:
1731 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1733 Cluster the models based on RMSD.
1734 @param rmsd_cutoff Float the distance cutoff in Angstrom
1735 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1736 be used to compute rmsds
1738 self.clean_clusters()
1739 not_clustered = set(range(len(self.stath1)))
1740 while len(not_clustered) > 0:
1741 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1746 Refine the clusters by merging the ones whose centers are close
1747 @param rmsd_cutoff cutoff distance in Angstorms
1749 clusters_copy = self.clusters
1750 for c0, c1
in itertools.combinations(self.clusters, 2):
1751 if c0.center_index
is None:
1753 if c1.center_index
is None:
1755 _ = self.stath0[c0.center_index]
1756 _ = self.stath1[c1.center_index]
1757 rmsd, molecular_assignment = self.
rmsd()
1758 if rmsd <= rmsd_cutoff:
1759 if c1
in self.clusters:
1760 clusters_copy.remove(c1)
1762 self.clusters = clusters_copy
1769 def set_cluster_assignments(self, cluster_ids):
1770 if len(cluster_ids) != len(self.stath0):
1771 raise ValueError(
'cluster ids has to be same length as '
1775 for i
in sorted(list(set(cluster_ids))):
1777 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1778 self.clusters[idx].add_member(i, d)
1782 Return the model data from a cluster
1783 @param cluster IMP.pmi.output.Cluster object
1792 Save the data for the whole models into a pickle file
1793 @param filename string
1795 self.stath0.save_data(filename)
1799 Set the data from an external IMP.pmi.output.Data
1800 @param data IMP.pmi.output.Data
1802 self.stath0.data = data
1803 self.stath1.data = data
1807 Load the data from an external pickled file
1808 @param filename string
1810 self.stath0.load_data(filename)
1811 self.stath1.load_data(filename)
1812 self.best_models = len(self.stath0)
1814 def add_cluster(self, rmf_name_list):
1816 print(
"creating cluster index "+str(len(self.clusters)))
1817 self.clusters.append(c)
1818 current_len = len(self.stath0)
1820 for rmf
in rmf_name_list:
1821 print(
"adding rmf "+rmf)
1822 self.stath0.add_stat_file(rmf)
1823 self.stath1.add_stat_file(rmf)
1825 for n0
in range(current_len, len(self.stath0)):
1826 d0 = self.stath0[n0]
1827 c.add_member(n0, d0)
1832 Save the clusters into a pickle file
1833 @param filename string
1836 fl = open(filename,
'wb')
1837 pickle.dump(self.clusters, fl)
1841 Load the clusters from a pickle file
1842 @param filename string
1843 @param append bool (Default=False), if True. append the clusters
1844 to the ones currently present
1847 fl = open(filename,
'rb')
1848 self.clean_clusters()
1850 self.clusters += pickle.load(fl)
1852 self.clusters = pickle.load(fl)
1861 Compute the cluster center for a given cluster
1863 member_distance = defaultdict(float)
1865 for n0, n1
in itertools.combinations(cluster.members, 2):
1868 rmsd, _ = self.
rmsd()
1869 member_distance[n0] += rmsd
1871 if len(member_distance) > 0:
1872 cluster.center_index = min(member_distance,
1873 key=member_distance.get)
1875 cluster.center_index = cluster.members[0]
1880 Save the coordinates of the current cluster a single rmf file
1882 print(
"saving coordinates", cluster)
1886 if rmf_name
is None:
1887 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1889 _ = self.stath1[cluster.members[0]]
1891 o.init_rmf(rmf_name, [self.stath1])
1892 for n1
in cluster.members:
1898 o.write_rmf(rmf_name)
1900 o.close_rmf(rmf_name)
1904 remove structures that are similar
1905 append it to a new cluster
1907 print(
"pruning models")
1909 filtered = [selected]
1910 remaining = range(1, len(self.stath1), 10)
1912 while len(remaining) > 0:
1913 d0 = self.stath0[selected]
1915 for n1
in remaining:
1920 if d <= rmsd_cutoff:
1922 print(
"pruning model %s, similar to model %s, rmsd %s"
1923 % (str(n1), str(selected), str(d)))
1924 remaining = [x
for x
in remaining
if x
not in rm]
1925 if len(remaining) == 0:
1927 selected = remaining[0]
1928 filtered.append(selected)
1931 self.clusters.append(c)
1933 d0 = self.stath0[n0]
1934 c.add_member(n0, d0)
1939 Compute the precision of a cluster
1945 if cluster.center_index
is not None:
1946 members1 = [cluster.center_index]
1948 members1 = cluster.members
1952 for n1
in cluster.members:
1957 tmp_rmsd, _ = self.
rmsd()
1962 precision = rmsd/npairs
1963 cluster.precision = precision
1968 Compute the bipartite precision (ie the cross-precision)
1969 between two clusters
1973 for cn0, n0
in enumerate(cluster1.members):
1975 for cn1, n1
in enumerate(cluster2.members):
1977 tmp_rmsd, _ = self.
rmsd()
1979 print(
"--- rmsd between structure %s and structure "
1980 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1983 precision = rmsd/npairs
1986 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1987 cluster_ref=
None, step=1):
1989 Compute the Root mean square fluctuations
1990 of a molecule in a cluster
1991 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1992 residue indexes and the value is the rmsf
1994 rmsf = IMP.pmi.tools.OrderedDict()
1997 if cluster_ref
is not None:
1998 if cluster_ref.center_index
is not None:
1999 members0 = [cluster_ref.center_index]
2001 members0 = cluster_ref.members
2003 if cluster.center_index
is not None:
2004 members0 = [cluster.center_index]
2006 members0 = cluster.members
2009 copy_index=copy_index, state_index=state_index)
2010 ps0 = s0.get_selected_particles()
2012 residue_indexes = list(IMP.pmi.tools.OrderedSet(
2018 d0 = self.stath0[n0]
2019 for n1
in cluster.members[::step]:
2021 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
2025 self.stath1, molecule=molecule,
2026 residue_indexes=residue_indexes, resolution=1,
2027 copy_index=copy_index, state_index=state_index)
2028 ps1 = s1.get_selected_particles()
2030 d1 = self.stath1[n1]
2033 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
2034 r = residue_indexes[n]
2046 for stath
in [self.stath0, self.stath1]:
2047 if molecule
not in self.symmetric_molecules:
2049 stath, molecule=molecule, residue_index=r,
2050 resolution=1, copy_index=copy_index,
2051 state_index=state_index)
2054 stath, molecule=molecule, residue_index=r,
2055 resolution=1, state_index=state_index)
2057 ps = s.get_selected_particles()
2066 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2067 reference=
"Absolute", prefix=
"./", step=1):
2073 for n1
in cluster.members[::step]:
2074 print(
"density "+str(n1))
2079 dens.add_subunits_density(self.stath1)
2081 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2084 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2085 consolidate=
False, molecules=
None, prefix=
'./',
2086 reference=
"Absolute"):
2090 import matplotlib.pyplot
as plt
2091 import matplotlib.cm
as cm
2092 from scipy.spatial.distance
import cdist
2094 if molecules
is None:
2103 molecules=molecules).get_selected_particles())]
2104 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2105 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2106 total_len_unique = sum(max(mol.get_residue_indexes())
2107 for mol
in unique_copies)
2114 seqlen = max(mol.get_residue_indexes())
2115 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2119 for mol
in unique_copies:
2120 seqlen = max(mol.get_residue_indexes())
2121 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2124 for ncl, n1
in enumerate(cluster.members):
2127 coord_dict = IMP.pmi.tools.OrderedDict()
2129 rindexes = mol.get_residue_indexes()
2130 coords = np.ones((max(rindexes), 3))
2131 for rnum
in rindexes:
2134 selpart = sel.get_selected_particles()
2135 if len(selpart) == 0:
2137 selpart = selpart[0]
2138 coords[rnum - 1, :] = \
2140 coord_dict[mol] = coords
2143 coords = np.concatenate(list(coord_dict.values()))
2144 dists = cdist(coords, coords)
2145 binary_dists = np.where((dists <= contact_threshold)
2146 & (dists >= 1.0), 1.0, 0.0)
2148 binary_dists_dict = {}
2150 len1 = max(mol1.get_residue_indexes())
2152 name1 = mol1.get_name()
2153 name2 = mol2.get_name()
2154 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2155 if (name1, name2)
not in binary_dists_dict:
2156 binary_dists_dict[(name1, name2)] = \
2157 np.zeros((len1, len1))
2158 binary_dists_dict[(name1, name2)] += \
2159 np.where((dists <= contact_threshold)
2160 & (dists >= 1.0), 1.0, 0.0)
2161 binary_dists = np.zeros((total_len_unique, total_len_unique))
2163 for name1, name2
in binary_dists_dict:
2164 r1 = index_dict[mol_names_unique[name1]]
2165 r2 = index_dict[mol_names_unique[name2]]
2166 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2167 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2173 contact_freqs = binary_dists
2175 dist_maps.append(dists)
2176 av_dist_map += dists
2177 contact_freqs += binary_dists
2180 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2182 contact_freqs = 1.0/len(cluster)*contact_freqs
2183 av_dist_map = 1.0/len(cluster)*contact_freqs
2185 fig = plt.figure(figsize=(100, 100))
2186 ax = fig.add_subplot(111)
2189 gap_between_components = 50
2194 sorted_tuple = sorted(
2196 mol).get_extended_name(), mol)
for mol
in mols)
2197 prot_list = list(zip(*sorted_tuple))[1]
2199 sorted_tuple = sorted(
2201 for mol
in unique_copies)
2202 prot_list = list(zip(*sorted_tuple))[1]
2204 prot_listx = prot_list
2205 nresx = gap_between_components + \
2206 sum([max(mol.get_residue_indexes())
2207 + gap_between_components
for mol
in prot_listx])
2210 prot_listy = prot_list
2211 nresy = gap_between_components + \
2212 sum([max(mol.get_residue_indexes())
2213 + gap_between_components
for mol
in prot_listy])
2218 res = gap_between_components
2219 for mol
in prot_listx:
2220 resoffsetx[mol] = res
2221 res += max(mol.get_residue_indexes())
2223 res += gap_between_components
2227 res = gap_between_components
2228 for mol
in prot_listy:
2229 resoffsety[mol] = res
2230 res += max(mol.get_residue_indexes())
2232 res += gap_between_components
2234 resoffsetdiagonal = {}
2235 res = gap_between_components
2236 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2237 resoffsetdiagonal[mol] = res
2238 res += max(mol.get_residue_indexes())
2239 res += gap_between_components
2244 for n, prot
in enumerate(prot_listx):
2245 res = resoffsetx[prot]
2247 for proty
in prot_listy:
2248 resy = resoffsety[proty]
2249 endy = resendy[proty]
2250 ax.plot([res, res], [resy, endy], linestyle=
'-',
2251 color=
'gray', lw=0.4)
2252 ax.plot([end, end], [resy, endy], linestyle=
'-',
2253 color=
'gray', lw=0.4)
2254 xticks.append((float(res) + float(end)) / 2)
2256 prot).get_extended_name())
2260 for n, prot
in enumerate(prot_listy):
2261 res = resoffsety[prot]
2263 for protx
in prot_listx:
2264 resx = resoffsetx[protx]
2265 endx = resendx[protx]
2266 ax.plot([resx, endx], [res, res], linestyle=
'-',
2267 color=
'gray', lw=0.4)
2268 ax.plot([resx, endx], [end, end], linestyle=
'-',
2269 color=
'gray', lw=0.4)
2270 yticks.append((float(res) + float(end)) / 2)
2272 prot).get_extended_name())
2276 tmp_array = np.zeros((nresx, nresy))
2278 for px
in prot_listx:
2279 for py
in prot_listy:
2280 resx = resoffsetx[px]
2281 lengx = resendx[px] - 1
2282 resy = resoffsety[py]
2283 lengy = resendy[py] - 1
2284 indexes_x = index_dict[px]
2285 minx = min(indexes_x)
2286 maxx = max(indexes_x)
2287 indexes_y = index_dict[py]
2288 miny = min(indexes_y)
2289 maxy = max(indexes_y)
2290 tmp_array[resx:lengx, resy:lengy] = \
2291 contact_freqs[minx:maxx, miny:maxy]
2292 ret[(px, py)] = np.argwhere(
2293 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2295 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2296 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2298 ax.set_xticks(xticks)
2299 ax.set_xticklabels(xlabels, rotation=90)
2300 ax.set_yticks(yticks)
2301 ax.set_yticklabels(ylabels)
2302 plt.setp(ax.get_xticklabels(), fontsize=6)
2303 plt.setp(ax.get_yticklabels(), fontsize=6)
2306 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2307 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2309 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2310 dpi=300, transparent=
"False")
2313 def plot_rmsd_matrix(self, filename):
2314 self.compute_all_pairwise_rmsd()
2315 distance_matrix = np.zeros(
2316 (len(self.stath0), len(self.stath1)))
2317 for (n0, n1)
in self.pairwise_rmsd:
2318 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2320 import matplotlib
as mpl
2322 import matplotlib.pylab
as pl
2323 from scipy.cluster
import hierarchy
as hrc
2325 fig = pl.figure(figsize=(10, 8))
2326 ax = fig.add_subplot(212)
2327 dendrogram = hrc.dendrogram(
2328 hrc.linkage(distance_matrix),
2331 leaves_order = dendrogram[
'leaves']
2332 ax.set_xlabel(
'Model')
2333 ax.set_ylabel(
'RMSD [Angstroms]')
2335 ax2 = fig.add_subplot(221)
2337 distance_matrix[leaves_order, :][:, leaves_order],
2338 interpolation=
'nearest')
2339 cb = fig.colorbar(cax)
2340 cb.set_label(
'RMSD [Angstroms]')
2341 ax2.set_xlabel(
'Model')
2342 ax2.set_ylabel(
'Model')
2344 pl.savefig(filename, dpi=300)
2353 Update the cluster id numbers
2355 for n, c
in enumerate(self.clusters):
2358 def get_molecule(self, hier, name, copy):
2366 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2367 self.sel0_rmsd.get_selected_particles())
2368 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2369 self.sel1_rmsd.get_selected_particles())
2370 for mol
in self.seldict0:
2371 for sel
in self.seldict0[mol]:
2372 self.issymmetricsel[sel] =
False
2373 for mol
in self.symmetric_molecules:
2374 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2375 for sel
in self.seldict0[mol]:
2376 self.issymmetricsel[sel] =
True
2380 self.sel1_alignment, self.sel0_alignment)
2382 for rb
in self.rbs1:
2385 for bead
in self.beads1:
2393 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2395 initial filling of the clusters.
2398 print(
"clustering model "+str(n0))
2399 d0 = self.stath0[n0]
2401 print(
"creating cluster index "+str(len(self.clusters)))
2402 self.clusters.append(c)
2403 c.add_member(n0, d0)
2404 clustered = set([n0])
2406 print(
"--- trying to add model " + str(n1) +
" to cluster "
2407 + str(len(self.clusters)))
2408 d1 = self.stath1[n1]
2411 rmsd, _ = self.
rmsd(metric=metric)
2412 if rmsd < rmsd_cutoff:
2413 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2414 c.add_member(n1, d1)
2417 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2422 merge the clusters that have close members
2424 @param rmsd_cutoff cutoff distance in Angstorms
2425 @param metric Function to calculate distance between two Selections
2426 (by default, IMP.atom.get_rmsd is used)
2434 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2435 itertools.combinations(self.clusters, 2)):
2436 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2439 rmsd, _ = self.
rmsd()
2440 if (rmsd < 2*rmsd_cutoff
and
2442 to_merge.append((c0, c1))
2444 for c0, c
in reversed(to_merge):
2448 self.clusters = [c
for c
in
2449 filter(
lambda x: len(x.members) > 0, self.clusters)]
2453 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2455 print(
"check close members for clusters " + str(c0.cluster_id) +
2456 " and " + str(c1.cluster_id))
2457 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2460 rmsd, _ = self.
rmsd(metric=metric)
2461 if rmsd < rmsd_cutoff:
2476 a function that returns the permutation best_sel of sels0 that
2479 best_rmsd2 = float(
'inf')
2481 if self.issymmetricsel[sels0[0]]:
2484 for offset
in range(N):
2485 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2488 r = metric(sel0, sel1)
2490 if rmsd2 < best_rmsd2:
2494 for sels
in itertools.permutations(sels0):
2496 for sel0, sel1
in itertools.takewhile(
2497 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2498 r = metric(sel0, sel1)
2500 if rmsd2 < best_rmsd2:
2503 return best_sel, best_rmsd2
2505 def compute_all_pairwise_rmsd(self):
2506 for d0
in self.stath0:
2507 for d1
in self.stath1:
2508 rmsd, _ = self.
rmsd()
2510 def rmsd(self, metric=IMP.atom.get_rmsd):
2512 Computes the RMSD. Resolves ambiguous pairs assignments
2516 n0 = self.stath0.current_index
2517 n1 = self.stath1.current_index
2518 if ((n0, n1)
in self.pairwise_rmsd) \
2519 and ((n0, n1)
in self.pairwise_molecular_assignment):
2520 return (self.pairwise_rmsd[(n0, n1)],
2521 self.pairwise_molecular_assignment[(n0, n1)])
2531 molecular_assignment = {}
2532 for molname, sels0
in self.seldict0.items():
2533 sels_best_order, best_rmsd2 = \
2534 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2536 Ncoords = len(sels_best_order[0].get_selected_particles())
2537 Ncopies = len(self.seldict1[molname])
2538 total_rmsd += Ncoords*best_rmsd2
2539 total_N += Ncoords*Ncopies
2541 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2542 p0 = sel0.get_selected_particles()[0]
2543 p1 = sel1.get_selected_particles()[0]
2548 molecular_assignment[(molname, c0)] = (molname, c1)
2550 total_rmsd = math.sqrt(total_rmsd/total_N)
2552 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2553 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2554 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2555 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2556 return total_rmsd, molecular_assignment
2560 Fix the reference structure for structural alignment, rmsd and
2563 @param reference can be either "Absolute" (cluster center of the
2564 first cluster) or Relative (cluster center of the current
2566 #param cluster the reference IMP.pmi.output.Cluster object
2568 if reference ==
"Absolute":
2570 elif reference ==
"Relative":
2571 if cluster.center_index:
2572 n0 = cluster.center_index
2574 n0 = cluster.members[0]
2579 compute the molecular assignments between multiple copies
2580 of the same sequence. It changes the Copy index of Molecules
2583 _, molecular_assignment = self.
rmsd()
2584 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2585 mol0 = self.molcopydict0[m0][c0]
2586 mol1 = self.molcopydict1[m1][c1]
2589 p1.set_value(cik0, c0)
2593 Undo the Copy index assignment
2596 _, molecular_assignment = self.
rmsd()
2597 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2598 mol0 = self.molcopydict0[m0][c0]
2599 mol1 = self.molcopydict1[m1][c1]
2602 p1.set_value(cik0, c1)
2609 s =
"AnalysisReplicaExchange\n"
2610 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2611 s +=
"---- number of models %s \n" % str(len(self.stath0))
2614 def __getitem__(self, int_slice_adaptor):
2615 if isinstance(int_slice_adaptor, int):
2616 return self.clusters[int_slice_adaptor]
2617 elif isinstance(int_slice_adaptor, slice):
2618 return self.__iter__(int_slice_adaptor)
2620 raise TypeError(
"Unknown Type")
2623 return len(self.clusters)
2625 def __iter__(self, slice_key=None):
2626 if slice_key
is None:
2627 for i
in range(len(self)):
2630 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
CheckLevel get_check_level()
Get the current audit mode.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A member of a rigid body, it has internal (local) coordinates.
A macro to help setup and run replica exchange.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.