Source code for stdpopsim.engines

import logging
import warnings

import attr
import msprime
import stdpopsim
import numpy as np
import math

logger = logging.getLogger(__name__)

_registered_engines = {}


[docs]def register_engine(engine): """ Registers the specified simulation engine. :param engine: The simulation engine object to register. :type engine: :class:`.Engine` """ if engine.id in _registered_engines: raise ValueError(f"Simulation engine '{engine.id}' already registered.") logger.debug(f"Registering simulation engine '{engine.id}'") _registered_engines[engine.id] = engine
[docs]def get_engine(id): """ Returns the simulation engine with the specified ``id``. :param str id: The string identifier for the requested engine. The currently supported engines are "msprime" and "slim". :return: A simulation engine object with a ``simulate()`` method. :rtype: :class:`.Engine` """ if id not in _registered_engines: raise ValueError(f"Simulation engine '{id}' not registered") return _registered_engines[id]
def all_engines(): """ Returns an iterator over all registered simulation engines. """ for sim_engine in _registered_engines.values(): yield sim_engine
[docs]@attr.s class Engine: """ Abstract class representing a simulation engine. To implement a new simulation engine, one should inherit from this class. At a minimum, the ``id``, ``description`` and ``citations`` attributes must be set, and the :func:`simulate()` and :func:`get_version()` methods must be implemented. See msprime example in ``engines.py``. :ivar ~.id: The unique identifier for the simulation engine. :vartype ~.id: str :ivar ~.description: A short description of this engine. :vartype ~.description: str :ivar citations: A list of citations for the simulation engine. :vartype citations: list of :class:`.Citation` """
[docs] def simulate( self, demographic_model, contig, samples, *, seed=None, dry_run=False, ): """ Simulates the model for the specified contig and samples. ``demographic_model``, ``contig``, and ``samples`` must be specified. :param demographic_model: The demographic model to simulate. :type demographic_model: :class:`.DemographicModel` :param contig: The contig, defining the length, mutation rate, and recombination rate(s). :type contig: :class:`.Contig` :param samples: The samples to be obtained from the simulation. :type samples: dict containing number of individuals per population :param seed: The seed for the random number generator. :type seed: int :param dry_run: If True, the simulation engine will return None without running the simulation. :type dry_run: bool :return: A succinct tree sequence. :rtype: :class:`tskit.trees.TreeSequence` or None """ raise NotImplementedError()
[docs] def get_version(self): """ Returns the version of the engine. :rtype: str """ raise NotImplementedError()
def _warn_zigzag(self, demographic_model): if demographic_model.id == "Zigzag_1S14": warnings.warn( "In stdpopsim <= 0.1.2, the Zigzag_1S14 model produced population " "sizes 5x lower than those in Schiffels & Durbin (2014). " "The population sizes have now been corrected. For details see: " "https://github.com/popsim-consortium/stdpopsim/issues/745" ) def _warn_mutation_rate_mismatch(self, contig, demographic_model): if demographic_model.mutation_rate is not None and not math.isclose( demographic_model.mutation_rate, contig.mutation_rate ): warnings.warn( "The demographic model has mutation rate " f"{demographic_model.mutation_rate}, but this simulation used the " f"contig's mutation rate {contig.mutation_rate}. Diversity levels " "may be different than expected for this species. For details see " "documentation at " "https://popsim-consortium.github.io/stdpopsim-docs/stable/tutorial.html" )
[docs]class _MsprimeEngine(Engine): id = "msprime" #: description = "Msprime coalescent simulator" #: citations = [ stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pcbi.1004842", year="2016", author="Kelleher et al.", reasons={stdpopsim.CiteReason.ENGINE}, ) ] # We default to the first model in the list. model_class_map = { "hudson": msprime.StandardCoalescent, "dtwf": msprime.DiscreteTimeWrightFisher, "smc": msprime.SmcApproxCoalescent, "smc_prime": msprime.SmcPrimeApproxCoalescent, } model_citations = { "dtwf": [ stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pgen.1008619", year="2020", author="Nelson et al.", reasons={stdpopsim.CiteReason.ENGINE}, ) ] } @property def supported_models(self): return list(self.model_class_map.keys()) def _convert_model_spec(self, model_str, model_changes): """ Convert the specified model specification into a form suitable for sim_ancestry. The model param is a string or None. The model_changes is either None or list of (time, model_str) tuples. Also return the appropriate extra citations. """ citations = [] if model_str is None: model_str = "hudson" else: if model_str not in self.model_class_map: raise ValueError(f"Unrecognised model '{model_str}'") if model_str in self.model_citations: citations.extend(self.model_citations[model_str]) if model_changes is None: model = model_str else: model_list = [] last_t = 0 last_model = model_str for t, model in model_changes: if model not in self.supported_models: raise ValueError(f"Unrecognised model '{model}'") if model in self.model_citations: citations.extend(self.model_citations[model]) duration = t - last_t model_list.append(self.model_class_map[last_model](duration=duration)) last_model = model last_t = t model_list.append(self.model_class_map[last_model](duration=None)) model = model_list return model, citations
[docs] def simulate( self, demographic_model, contig, samples, *, seed=None, msprime_model=None, msprime_change_model=None, dry_run=False, **kwargs, ): """ Simulate the demographic model using msprime. See :meth:`.Engine.simulate()` for definitions of parameters defined for all engines. :param msprime_model: The msprime simulation model to be used. One of ``hudson``, ``dtwf``, ``smc``, or ``smc_prime``. See msprime API documentation for details. :type msprime_model: str :param msprime_change_model: A list of (time, model) tuples, which changes the simulation model to the new model at the time specified. :type msprime_change_model: list of (float, str) tuples :param dry_run: If True, ``end_time=0`` is passed to :meth:`msprime.simulate()` to initialise the simulation and then immediately return. :type dry_run: bool :param \\**kwargs: Further arguments passed to :meth:`msprime.sim_ancestry()` """ model, citations = self._convert_model_spec(msprime_model, msprime_change_model) self.citations.extend(citations) if "random_seed" in kwargs.keys(): if seed is None: seed = kwargs["random_seed"] del kwargs["random_seed"] else: raise ValueError("Cannot set both seed and random_seed") # test to make sure contig is fully neutral if not contig.is_neutral: raise ValueError( "Contig had non neutral mutation types " "but you are using the msprime engine" ) # handle deprecated samples=[msprime.SampleSet] input if isinstance(samples, dict): sample_sets = demographic_model.get_sample_sets( samples, ploidy=contig.ploidy ) elif all([isinstance(x, msprime.SampleSet) for x in samples]): sample_sets = samples else: raise ValueError( "Samples must be a dict of the form {population_name:num_samples}." ) # TODO: remove this after a release or two. See #745. self._warn_zigzag(demographic_model) self._warn_mutation_rate_mismatch(contig, demographic_model) rng = np.random.default_rng(seed) seeds = rng.integers(1, 2**31 - 1, size=2) # if bacterial_recombination=True then "recombination_rate" # is implemented by msprime as gene conversion rate # (which must be constant) recombination_map = contig.recombination_map gc_rate = None gc_frac = contig.gene_conversion_fraction if contig.bacterial_recombination: gc_rate = contig.recombination_map.mean_rate recombination_map = msprime.RateMap( position=[0.0, contig.length], rate=[0.0] ) elif (gc_frac is not None) and (gc_frac > 0.0): # the recombination map is a map of double-stranded breaks, # so we need to separate out GC from crossovers gc_rate = contig.recombination_map.mean_rate * gc_frac / (1 - gc_frac) recombination_map = msprime.RateMap( position=recombination_map.position, rate=recombination_map.rate * (1 - gc_frac), ) ts = msprime.sim_ancestry( samples=sample_sets, recombination_rate=recombination_map, gene_conversion_rate=gc_rate, gene_conversion_tract_length=contig.gene_conversion_length, demography=demographic_model.model, ploidy=contig.ploidy, random_seed=seeds[0], model=model, end_time=0 if dry_run else None, **kwargs, ) ts = msprime.sim_mutations( ts, end_time=0 if dry_run else None, random_seed=seeds[1], rate=contig.mutation_rate, ) if contig.inclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask, False) if contig.exclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask, True) if dry_run: ts = None return ts
def get_version(self): return msprime.__version__
register_engine(_MsprimeEngine())
[docs]def get_default_engine(): """ Returns the default simulation engine (msprime). :rtype: :class:`.Engine` """ return get_engine("msprime")