diff --git a/docs/elsim.rst b/docs/elsim.rst index b2c4686..96d97e0 100644 --- a/docs/elsim.rst +++ b/docs/elsim.rst @@ -18,3 +18,4 @@ Subpackages elsim.elections elsim.strategies elsim.methods + elsim.studies diff --git a/docs/elsim.studies.rst b/docs/elsim.studies.rst new file mode 100644 index 0000000..50bccf2 --- /dev/null +++ b/docs/elsim.studies.rst @@ -0,0 +1,8 @@ +elsim.studies module +==================== + +.. automodule:: elsim.studies + :members: + :undoc-members: + :imported-members: + :show-inheritance: diff --git a/elsim/__init__.py b/elsim/__init__.py index bfc59ca..afd8c81 100644 --- a/elsim/__init__.py +++ b/elsim/__init__.py @@ -15,6 +15,6 @@ Functions that implement voting methods, which calculate a winner from a collection of ballots. """ -from . import elections, methods, strategies +from . import elections, methods, strategies, studies __version__ = "0.1.3" diff --git a/elsim/studies/__init__.py b/elsim/studies/__init__.py new file mode 100644 index 0000000..bc39e39 --- /dev/null +++ b/elsim/studies/__init__.py @@ -0,0 +1,38 @@ +""" +Tools for Monte Carlo election studies and paper-style reproduction scripts. + +This subpackage supports declarative spatial-model sweeps (see +:mod:`elsim.studies.spatial_normal`), parameter expansion for scenario grids, +serial batching via :func:`run_batched`, and small tallies shared by several +examples (Condorcet agreement, social-utility totals). + +The ``elections`` / ``strategies`` / ``methods`` modules remain the core model; +``studies`` orchestrates repeated draws and aggregation. +""" + +from .backends import SerialBackend +from .condorcet_metrics import approval_at_optimal, tally_condorcet_agreement +from .parameters import expand_product, expand_rows, expand_zip +from .runner import merge_counters, run_batched +from .social_utility import ( + random_society_utility_updates, + ranked_rated_utility_updates, + spatial_random_reference_utility_updates, +) +from .spatial_normal import accumulate_spatial_condorcet_by_ncands, accumulate_spatial_sue_by_ncands + +__all__ = [ + "SerialBackend", + "approval_at_optimal", + "expand_product", + "expand_rows", + "expand_zip", + "merge_counters", + "run_batched", + "tally_condorcet_agreement", + "accumulate_spatial_condorcet_by_ncands", + "accumulate_spatial_sue_by_ncands", + "spatial_random_reference_utility_updates", + "random_society_utility_updates", + "ranked_rated_utility_updates", +] diff --git a/elsim/studies/backends.py b/elsim/studies/backends.py new file mode 100644 index 0000000..8146bda --- /dev/null +++ b/elsim/studies/backends.py @@ -0,0 +1,24 @@ +""" +Execution helpers for repeating independent Monte Carlo batches. + +Serial execution is always available for :func:`elsim.studies.runner.run_batched`. +""" + +from __future__ import annotations + +from typing import Callable, Sequence, TypeVar + +T = TypeVar("T") + + +class SerialBackend: + """Run ``fn()`` ``n`` times in the current process.""" + + def map_repeat(self, fn: Callable[[], T], n: int) -> list[T]: + if n < 0: + raise ValueError("n must be non-negative") + return [fn() for _ in range(n)] + + def map_each(self, fns: Sequence[Callable[[], T]]) -> list[T]: + """Invoke each zero-argument callable once, in order, and collect results.""" + return [fn() for fn in fns] diff --git a/elsim/studies/condorcet_metrics.py b/elsim/studies/condorcet_metrics.py new file mode 100644 index 0000000..a784842 --- /dev/null +++ b/elsim/studies/condorcet_metrics.py @@ -0,0 +1,80 @@ +""" +Condorcet-efficiency tallies used by several Merrill (1984) example scripts. + +These are thin wrappers around existing ``methods`` and ``strategies`` helpers +so driver loops can ``counter.update(...)`` in one line. +""" + +from __future__ import annotations + +from collections import Counter +from typing import Callable, Mapping, Optional + +import numpy as np + +from elsim.methods import approval, condorcet +from elsim.strategies import approval_optimal + +RankedMethod = Callable[..., Optional[int]] +RatedMethod = Callable[..., Optional[int]] + + +def approval_at_optimal(utilities: np.ndarray, tiebreaker: str = "random") -> Optional[int]: # noqa: UP045 + """ + Rated-method helper: build an optimal approval ballot, then apply :func:`elsim.methods.approval`. + + Intended for use in a ``rated_methods`` mapping passed to + :func:`tally_condorcet_agreement` or the spatial sweep helpers, so scripts + stay declarative without repeating lambdas. + """ + return approval(approval_optimal(utilities), tiebreaker) + + +def tally_condorcet_agreement( + rankings: np.ndarray, + utilities: np.ndarray, + ranked_methods: Mapping[str, RankedMethod], + rated_methods: Mapping[str, RatedMethod], + *, + tiebreaker: str = "random", +) -> Counter: + """ + Count whether each method agrees with the Condorcet winner for one election. + + If there is no Condorcet winner, returns an empty counter. + + Parameters + ---------- + rankings : array_like + Honest (or strategic) rankings, shape ``(n_voters, n_cands)``. + utilities : array_like + Utilities aligned with ``rankings``, shape ``(n_voters, n_cands)``. + ranked_methods, rated_methods + Name to callable maps; ranked methods take ``(rankings, tiebreaker=...)``; + rated methods take ``(utilities, tiebreaker=...)``. + tiebreaker : str, optional + Passed through to each method callable. + + Returns + ------- + collections.Counter + Includes key ``\"CW\"`` when a Condorcet winner exists, plus one key per + supplied method name when that method's winner matches the Condorcet + winner. + """ + cw = condorcet(rankings) + if cw is None: + return Counter() + + out: Counter = Counter() + out["CW"] += 1 + + for name, fn in ranked_methods.items(): + if fn(rankings, tiebreaker=tiebreaker) == cw: + out[name] += 1 + + for name, fn in rated_methods.items(): + if fn(utilities, tiebreaker=tiebreaker) == cw: + out[name] += 1 + + return out diff --git a/elsim/studies/parameters.py b/elsim/studies/parameters.py new file mode 100644 index 0000000..019a27d --- /dev/null +++ b/elsim/studies/parameters.py @@ -0,0 +1,116 @@ +""" +Expand simulation parameters into explicit scenario dictionaries. + +Issue `#10 `_ called out three +common shapes: + +* **Cartesian product** — every combination of voter counts, candidate counts, + methods, etc. Use :func:`expand_product`. +* **Zipped columns** — parallel lists of the same length (e.g. sweep ``n_voters`` + and ``n_cands`` together). Use :func:`expand_zip`. +* **Explicit rows** — a small table of tuples such as Merrill 1984 Table 2 + ``(disp, corr, D)`` that is *not* a full product. Use :func:`expand_rows` or + pass your own sequence of mappings to your driver loop. + +Strings are treated as scalars (not iterated character-wise). +""" + +from __future__ import annotations + +from itertools import product +from typing import Any, Iterable, Mapping, Sequence, Union + +Scalar = Any +ScalarOrIterable = Union[Scalar, Iterable[Scalar]] + + +def _as_tuple(x: ScalarOrIterable) -> tuple[Scalar, ...]: + if isinstance(x, (str, bytes)): + return (x,) + if isinstance(x, Mapping): + raise TypeError("Mappings are not treated as iterables of scenarios; " + "pass keys to expand_product or use expand_rows.") + if isinstance(x, Iterable): + return tuple(x) + return (x,) + + +def expand_product(**params: ScalarOrIterable) -> list[dict[str, Any]]: + """ + Cartesian product of parameter values. + + Each keyword argument is either a single value or an iterable of values. + The return value is a list of dicts, one per combination, in deterministic + order (same as :func:`itertools.product`). + + Examples + -------- + >>> expand_product(n_voters=[10, 20], n_cands=3) + [{'n_voters': 10, 'n_cands': 3}, {'n_voters': 20, 'n_cands': 3}] + """ + if not params: + return [{}] + keys = list(params) + value_lists = [_as_tuple(params[k]) for k in keys] + return [dict(zip(keys, combo)) for combo in product(*value_lists)] + + +def expand_zip(**params: Iterable) -> list[dict[str, Any]]: + """ + Zip parallel parameter columns into scenario dicts. + + All iterables must have the same length. + + Parameters + ---------- + **params + Each value must be an iterable of scenario values for that key. + + Examples + -------- + >>> expand_zip(n_voters=[100, 200], n_cands=[3, 5]) + [{'n_voters': 100, 'n_cands': 3}, {'n_voters': 200, 'n_cands': 5}] + """ + if not params: + return [] + keys = list(params) + columns = [list(params[k]) for k in keys] + lengths = {len(c) for c in columns} + if len(lengths) > 1: + raise ValueError( + "expand_zip: all parameter lists must have the same length; " + f"got {dict(zip(keys, map(len, columns)))}" + ) + rows = zip(*columns) + return [dict(zip(keys, row)) for row in rows] + + +def expand_rows(rows: Sequence[Sequence[Any]], keys: Sequence[str]) -> list[dict[str, Any]]: + """ + Turn fixed scenario rows into dicts. + + Use this for tables like Merrill (1984) Table 2 where each row is a + deliberate ``(disp, corr, D)`` triple rather than a combination from a grid. + + Parameters + ---------- + rows : sequence of row sequences + Each inner sequence must have ``len(keys)`` entries. + keys : sequence of str + Names for each column. + + Examples + -------- + >>> expand_rows([(1.0, 0.5, 2), (0.5, 0.0, 4)], ('disp', 'corr', 'D')) + [{'disp': 1.0, 'corr': 0.5, 'D': 2}, {'disp': 0.5, 'corr': 0.0, 'D': 4}] + """ + keys_t = tuple(keys) + out: list[dict[str, Any]] = [] + for i, row in enumerate(rows): + row_t = tuple(row) + if len(row_t) != len(keys_t): + raise ValueError( + f"expand_rows: row {i} has length {len(row_t)} but {len(keys_t)} keys were given" + ) + out.append(dict(zip(keys_t, row_t))) + return out diff --git a/elsim/studies/runner.py b/elsim/studies/runner.py new file mode 100644 index 0000000..61e39e2 --- /dev/null +++ b/elsim/studies/runner.py @@ -0,0 +1,69 @@ +""" +Batched Monte Carlo execution and simple result merging. +""" + +from __future__ import annotations + +from collections import Counter +from typing import Callable, Iterable, TypeVar + +from .backends import SerialBackend + +T = TypeVar("T") + + +def run_batched( + batch_fn: Callable[[int], T], + n_trials: int, + batch_size: int, + *, + backend=None, +) -> list[T]: + """ + Run a trial batch worker an integer number of times. + + ``batch_fn(k)`` is invoked with ``k == batch_size`` for each full batch, + and once more with ``k == n_trials % batch_size`` when the remainder is + non-zero. + + Parameters + ---------- + batch_fn : callable + ``batch_fn(batch_size) -> partial result`` for one batch. + n_trials : int + Total number of trials across all batches. + batch_size : int + Preferred batch size (must be positive). + backend : object with ``map_repeat(fn, n) -> list``, optional + Defaults to :class:`elsim.studies.backends.SerialBackend` inside + :func:`map_repeat` for the full-sized batches only; the remainder batch + always runs in-process. + + Returns + ------- + list + One return value per batch invocation (length ``ceil(n_trials / batch_size)``). + """ + if batch_size <= 0: + raise ValueError("batch_size must be positive") + if n_trials < 0: + raise ValueError("n_trials must be non-negative") + if n_trials == 0: + return [] + + n_full, rem = divmod(n_trials, batch_size) + if backend is None: + backend = SerialBackend() + + parts: list[T] = backend.map_repeat(lambda: batch_fn(batch_size), n_full) + if rem: + parts.append(batch_fn(rem)) + return parts + + +def merge_counters(partials: Iterable[Counter]) -> Counter: + """Sum a sequence of :class:`~collections.Counter` objects.""" + total: Counter = Counter() + for c in partials: + total.update(c) + return total diff --git a/elsim/studies/social_utility.py b/elsim/studies/social_utility.py new file mode 100644 index 0000000..b028cb4 --- /dev/null +++ b/elsim/studies/social_utility.py @@ -0,0 +1,104 @@ +""" +Scalar social-utility totals for Monte Carlo scripts (Merrill, Weber, etc.). + +These return per-election increments as plain floats so callers can accumulate +into :class:`collections.Counter` objects keyed by scenario (as in Merrill +Table 4) or by ``(method, n_cands)`` via nested counters. +""" + +from __future__ import annotations + +import random +from typing import Callable, Mapping, Optional + +import numpy as np + +from elsim.methods import utility_winner + +RankedMethod = Callable[..., Optional[int]] +RatedMethod = Callable[..., Optional[int]] + + +def spatial_random_reference_utility_updates( + utilities: np.ndarray, + rankings: np.ndarray, + ranked_methods: Mapping[str, RankedMethod], + rated_methods: Mapping[str, RatedMethod], + *, + tiebreaker: str = "random", +) -> dict[str, float]: + """ + Total utility (summed over voters) for each method winner plus random baseline. + + Matches the Merrill (1984) spatial social-utility-efficiency figures: pick + ``RW`` with ``random.randint``, accumulate rated and ranked method winners, + and use the same per-winner column sum as ``utilities.sum(axis=0)[w]``. + """ + n_cands = utilities.shape[1] + rw = random.randint(0, n_cands - 1) + out: dict[str, float] = {"RW": float(utilities.sum(axis=0)[rw])} + + for name, fn in rated_methods.items(): + w = fn(utilities, tiebreaker=tiebreaker) + out[name] = float(utilities.sum(axis=0)[w]) + + for name, fn in ranked_methods.items(): + w = fn(rankings, tiebreaker=tiebreaker) + out[name] = float(utilities.sum(axis=0)[w]) + + return out + + +def random_society_utility_updates( + utilities: np.ndarray, + rankings: np.ndarray, + ranked_methods: Mapping[str, RankedMethod], + rated_methods: Mapping[str, RatedMethod], + *, + tiebreaker: str = "random", + uw_key: str = "UW", + utility_winner_tiebreaker: Optional[str] = "random", # noqa: UP045 +) -> dict[str, float]: + """ + Utility totals for Merrill-style random societies (Table 3 / Fig 3). + + Parameters + ---------- + utility_winner_tiebreaker + If ``None``, call ``utility_winner(utilities)`` with no tiebreaker + (Weber-style scripts). Otherwise pass through to ``utility_winner``. + """ + if utility_winner_tiebreaker is None: + uw = utility_winner(utilities) + else: + uw = utility_winner(utilities, tiebreaker=utility_winner_tiebreaker) + out: dict[str, float] = {uw_key: float(utilities.sum(axis=0)[uw])} + + for name, fn in rated_methods.items(): + w = fn(utilities, tiebreaker=tiebreaker) + out[name] = float(utilities.sum(axis=0)[w]) + + for name, fn in ranked_methods.items(): + w = fn(rankings, tiebreaker=tiebreaker) + out[name] = float(utilities.sum(axis=0)[w]) + + return out + + +def ranked_rated_utility_updates( + utilities: np.ndarray, + rankings: np.ndarray, + ranked_methods: Mapping[str, RankedMethod], + rated_methods: Mapping[str, RatedMethod], + *, + tiebreaker: str = "random", +) -> dict[str, float]: + """Per-election utility totals for ranked and rated methods only (no UW/RW).""" + out: dict[str, float] = {} + for name, fn in rated_methods.items(): + w = fn(utilities, tiebreaker=tiebreaker) + out[name] = float(utilities.sum(axis=0)[w]) + for name, fn in ranked_methods.items(): + w = fn(rankings, tiebreaker=tiebreaker) + out[name] = float(utilities.sum(axis=0)[w]) + return out diff --git a/elsim/studies/spatial_normal.py b/elsim/studies/spatial_normal.py new file mode 100644 index 0000000..b70ec5a --- /dev/null +++ b/elsim/studies/spatial_normal.py @@ -0,0 +1,95 @@ +""" +Spatial normal-electorate Monte Carlo sweeps (Merrill-style figures). + +These functions implement the common ``for each election: for each n_cands:`` +pattern so example scripts only declare parameters and method maps. +""" + +from __future__ import annotations + +from collections import Counter +from typing import Mapping, Sequence + +from elsim.elections import normal_electorate, normed_dist_utilities +from elsim.strategies import honest_rankings + +from .condorcet_metrics import RankedMethod, RatedMethod, tally_condorcet_agreement +from .social_utility import spatial_random_reference_utility_updates + + +def accumulate_spatial_condorcet_by_ncands( + n_elections: int, + *, + n_voters: int, + n_cands_list: Sequence[int], + dims: int, + corr: float, + disp: float, + ranked_methods: Mapping[str, RankedMethod], + rated_methods: Mapping[str, RatedMethod], + tiebreaker: str = "random", +) -> dict[str, Counter]: + """ + Run elections on a spatial normal model and tally Condorcet agreement by ``n_cands``. + + For each of ``n_elections`` iterations, draws one electorate per entry in + ``n_cands_list`` (same pattern as Merrill figures 2.c / 2.d). + """ + keys = ranked_methods.keys() | rated_methods.keys() | {"CW"} + out: dict[str, Counter] = {k: Counter() for k in keys} + + for _ in range(n_elections): + for n_cands in n_cands_list: + v, c = normal_electorate(n_voters, n_cands, dims=dims, corr=corr, disp=disp) + utilities = normed_dist_utilities(v, c) + rankings = honest_rankings(utilities) + delta = tally_condorcet_agreement( + rankings, + utilities, + ranked_methods, + rated_methods, + tiebreaker=tiebreaker, + ) + for key, value in delta.items(): + out[key][n_cands] += value + + return out + + +def accumulate_spatial_sue_by_ncands( + n_elections: int, + *, + n_voters: int, + n_cands_list: Sequence[int], + dims: int, + corr: float, + disp: float, + ranked_methods: Mapping[str, RankedMethod], + rated_methods: Mapping[str, RatedMethod], + tiebreaker: str = "random", +) -> dict[str, Counter]: + """ + Accumulate summed social utilities (plus random reference) by ``n_cands``. + + Uses :func:`spatial_random_reference_utility_updates` each election, matching + Merrill figures 4.a / 4.b. + """ + keys = ranked_methods.keys() | rated_methods.keys() | {"SU max", "RW"} + utility_sums: dict[str, Counter] = {k: Counter() for k in keys} + + for _ in range(n_elections): + for n_cands in n_cands_list: + v, c = normal_electorate(n_voters, n_cands, dims=dims, corr=corr, disp=disp) + utilities = normed_dist_utilities(v, c) + rankings = honest_rankings(utilities) + delta = spatial_random_reference_utility_updates( + utilities, + rankings, + ranked_methods, + rated_methods, + tiebreaker=tiebreaker, + ) + for name, value in delta.items(): + utility_sums[name][n_cands] += value + + return utility_sums diff --git a/examples/distributions_by_dispersion.py b/examples/distributions_by_dispersion.py index 91a9b81..ae65ee0 100644 --- a/examples/distributions_by_dispersion.py +++ b/examples/distributions_by_dispersion.py @@ -40,7 +40,7 @@ def human_format(num): def simulate_batch(): winners = defaultdict(list) for disp in disps_list: - for iteration in range(batch_size): + for _iteration in range(batch_size): v, c = normal_electorate(n_voters, n_cands, dims=1, disp=disp) if cand_dist == 'uniform': diff --git a/examples/distributions_by_method.py b/examples/distributions_by_method.py index 1af6342..5e7e451 100644 --- a/examples/distributions_by_method.py +++ b/examples/distributions_by_method.py @@ -35,7 +35,7 @@ def human_format(num): def simulate_batch(): winners = defaultdict(list) - for iteration in range(batch_size): + for _iteration in range(batch_size): v, c = normal_electorate(n_voters, n_cands, dims=1, disp=disp) if cand_dist == 'uniform': @@ -87,8 +87,8 @@ def simulate_batch(): return winners -jobs = [delayed(simulate_batch)()] * n_batches -print(f'{len(jobs)} tasks total:') +jobs = [delayed(simulate_batch)() for _ in range(n_batches)] +print(f'{n_batches} tasks total:') results = Parallel(n_jobs=-3, verbose=5)(jobs) winners = {k: [v for d in results for v in d[k]] for k in results[0]} diff --git a/examples/distributions_by_method_2D.py b/examples/distributions_by_method_2D.py index 0d3531c..0aa620a 100644 --- a/examples/distributions_by_method_2D.py +++ b/examples/distributions_by_method_2D.py @@ -24,20 +24,20 @@ import os import pickle from collections import defaultdict +from functools import partial import matplotlib.pyplot as plt import numpy as np + # from colorcet import fire from joblib import Parallel, delayed from elsim.elections import normal_electorate, normed_dist_utilities -from elsim.methods import (approval, black, borda, coombs, fptp, irv, runoff, - star) -from elsim.strategies import (approval_optimal, honest_normed_scores, - honest_rankings, vote_for_k) +from elsim.methods import approval, black, borda, coombs, fptp, irv, runoff, star +from elsim.strategies import approval_optimal, honest_normed_scores, honest_rankings, vote_for_k try: - import ehtplot.color # Creates afmhot_u colormap + pass # Creates afmhot_u colormap except ValueError: # https://github.com/liamedeiros/ehtplot/pull/6 pass @@ -72,7 +72,7 @@ def human_format(num): def simulate_batch(n_cands): winners = defaultdict(list) - for iteration in range(batch_size): + for _iteration in range(batch_size): v, c = normal_electorate(n_voters, n_cands, dims=dims, disp=disp) # Contrived candidate at exact center @@ -155,21 +155,19 @@ def simulate_batch(n_cands): title += f'{human_format(n_voters)} voters, ' title += f'{human_format(n_cands)} candidates' if cand_dist == 'normal': - title += f', both Gaussian' + title += ', both Gaussian' title += f', {disp:.1f} relative dispersion' # Load from .pkl file if it exists pkl_filename = title + '.pkl' if os.path.exists(pkl_filename): - print('Loading pickled simulation results') with open(pkl_filename, "rb") as file: aggregated_histograms, standard_deviations = pickle.load(file) else: print('Running simulations') - jobs = [delayed(simulate_batch)(n_cands)] * n_batches - print(f'{len(jobs)} tasks total:') - results = Parallel(n_jobs=-3, verbose=5)(jobs) - del jobs + worker = partial(simulate_batch, n_cands) + print(f'{n_batches} tasks total:') + results = Parallel(n_jobs=-3, verbose=5)(delayed(worker)() for _ in range(n_batches)) # Get keys from the histograms of the first result keys = results[0][0].keys() @@ -203,9 +201,7 @@ def simulate_batch(n_cands): # %% Measure distributions for method, std in standard_deviations.items(): - print(f"{method}:") - print(f"Winner distribution std: {std[0]:.3f}") - print() + pass # %% Plotting diff --git a/examples/distributions_by_n_cands.py b/examples/distributions_by_n_cands.py index a220836..3e249c3 100644 --- a/examples/distributions_by_n_cands.py +++ b/examples/distributions_by_n_cands.py @@ -40,7 +40,7 @@ def human_format(num): def simulate_batch(): winners = defaultdict(list) for n_cands in n_cands_list: - for iteration in range(batch_size): + for _iteration in range(batch_size): v, c = normal_electorate(n_voters, n_cands, dims=1, disp=disp) if cand_dist == 'uniform': @@ -85,8 +85,8 @@ def simulate_batch(): return winners -jobs = [delayed(simulate_batch)()] * n_batches -print(f'{len(jobs)} tasks total:') +jobs = [delayed(simulate_batch)() for _ in range(n_batches)] +print(f'{n_batches} tasks total:') results = Parallel(n_jobs=-3, verbose=5)(jobs) winners = {k: [v for d in results for v in d[k]] for k in results[0]} diff --git a/examples/hypothesis_election_finder.py b/examples/hypothesis_election_finder.py index 97d80ca..1944583 100644 --- a/examples/hypothesis_election_finder.py +++ b/examples/hypothesis_election_finder.py @@ -1,6 +1,9 @@ """ Use Hypothesis to find simple elections that violate Condorcet compliance. +This is property-based search (Hypothesis), not a batched Monte Carlo study, so +it does not use ``elsim.studies`` parallel helpers. + This depends on Hypothesis' "shrinking" algorithm, which is not guaranteed to find the absolute simplest case (or any at all), but typically works well. https://hypothesis.readthedocs.io/en/latest/data.html#shrinking diff --git a/examples/merrill_1984_fig_2c_2d.py b/examples/merrill_1984_fig_2c_2d.py index 8d50b09..9c51ee8 100644 --- a/examples/merrill_1984_fig_2c_2d.py +++ b/examples/merrill_1984_fig_2c_2d.py @@ -42,16 +42,13 @@ high. """ import time -from collections import Counter import matplotlib.pyplot as plt import numpy as np from tabulate import tabulate -from elsim.elections import normal_electorate, normed_dist_utilities -from elsim.methods import (approval, black, borda, condorcet, coombs, fptp, - irv, runoff, utility_winner) -from elsim.strategies import approval_optimal, honest_rankings +from elsim.methods import black, borda, coombs, fptp, irv, runoff, utility_winner +from elsim.studies import accumulate_spatial_condorcet_by_ncands, approval_at_optimal n_elections = 10_000 # Roughly 30 seconds each on a 2019 6-core i7-9750H n_voters = 201 @@ -59,12 +56,18 @@ corr = 0.5 D = 2 -ranked_methods = {'Plurality': fptp, 'Runoff': runoff, 'Hare': irv, - 'Borda': borda, 'Coombs': coombs, 'Black': black} - -rated_methods = {'SU max': utility_winner, - 'Approval': lambda utilities, tiebreaker: - approval(approval_optimal(utilities), tiebreaker)} +ranked_methods = { + "Plurality": fptp, + "Runoff": runoff, + "Hare": irv, + "Borda": borda, + "Coombs": coombs, + "Black": black, +} +rated_methods = { + "SU max": utility_winner, + "Approval": approval_at_optimal, +} # Plot Merrill's results as dotted lines for comparison (traced from plots) merrill_fig_2c = { @@ -90,31 +93,18 @@ for fig, disp, ymin, orig in (('2.c', 1.0, 50, merrill_fig_2c), ('2.d', 0.5, 0, merrill_fig_2d)): - condorcet_winner_count = {key: Counter() for key in ( - ranked_methods.keys() | rated_methods.keys() | {'CW'})} start_time = time.monotonic() - - for iteration in range(n_elections): - for n_cands in n_cands_list: - v, c = normal_electorate(n_voters, n_cands, dims=D, corr=corr, - disp=disp) - utilities = normed_dist_utilities(v, c) - rankings = honest_rankings(utilities) - - # If there is a Condorcet winner, analyze election, otherwise skip - # it - CW = condorcet(rankings) - if CW is not None: - condorcet_winner_count['CW'][n_cands] += 1 - - for name, method in ranked_methods.items(): - if method(rankings, tiebreaker='random') == CW: - condorcet_winner_count[name][n_cands] += 1 - - for name, method in rated_methods.items(): - if method(utilities, tiebreaker='random') == CW: - condorcet_winner_count[name][n_cands] += 1 - + condorcet_winner_count = accumulate_spatial_condorcet_by_ncands( + n_elections, + n_voters=n_voters, + n_cands_list=n_cands_list, + dims=D, + corr=corr, + disp=disp, + ranked_methods=ranked_methods, + rated_methods=rated_methods, + tiebreaker='random', + ) elapsed_time = time.monotonic() - start_time print('Elapsed:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)), '\n') @@ -148,6 +138,7 @@ print(tabulate(table, ["Method", *x], tablefmt="pipe", floatfmt='.1f')) print() + plt.plot([], [], 'k:', lw=0.8, label='Merrill') # Dummy plot for label plt.legend() plt.grid(True, color='0.7', linestyle='-', which='major', axis='both') diff --git a/examples/merrill_1984_fig_2c_2d_updated.py b/examples/merrill_1984_fig_2c_2d_updated.py index a5aaa24..d7eef03 100644 --- a/examples/merrill_1984_fig_2c_2d_updated.py +++ b/examples/merrill_1984_fig_2c_2d_updated.py @@ -46,17 +46,14 @@ high. """ import time -from collections import Counter import matplotlib.pyplot as plt import numpy as np from tabulate import tabulate -from elsim.elections import normal_electorate, normed_dist_utilities -from elsim.methods import (approval, black, borda, condorcet, coombs, fptp, - irv, runoff, score, star, utility_winner) -from elsim.strategies import (approval_optimal, honest_normed_scores, - honest_rankings) +from elsim.methods import approval, black, borda, coombs, fptp, irv, runoff, score, star, utility_winner +from elsim.strategies import approval_optimal, honest_normed_scores +from elsim.studies import accumulate_spatial_condorcet_by_ncands n_elections = 5_000 # Roughly 30 seconds each on a 2019 6-core i7-9750H n_voters = 201 @@ -79,31 +76,18 @@ for fig, disp, ymin in (('2.c', 1.0, 50), ('2.d', 0.5, 0)): - condorcet_winner_count = {key: Counter() for key in ( - ranked_methods.keys() | rated_methods.keys() | {'CW'})} start_time = time.monotonic() - - for iteration in range(n_elections): - for n_cands in n_cands_list: - v, c = normal_electorate(n_voters, n_cands, dims=D, corr=corr, - disp=disp) - utilities = normed_dist_utilities(v, c) - rankings = honest_rankings(utilities) - - # If there is a Condorcet winner, analyze election, otherwise skip - # it - CW = condorcet(rankings) - if CW is not None: - condorcet_winner_count['CW'][n_cands] += 1 - - for name, method in ranked_methods.items(): - if method(rankings, tiebreaker='random') == CW: - condorcet_winner_count[name][n_cands] += 1 - - for name, method in rated_methods.items(): - if method(utilities, tiebreaker='random') == CW: - condorcet_winner_count[name][n_cands] += 1 - + condorcet_winner_count = accumulate_spatial_condorcet_by_ncands( + n_elections, + n_voters=n_voters, + n_cands_list=n_cands_list, + dims=D, + corr=corr, + disp=disp, + ranked_methods=ranked_methods, + rated_methods=rated_methods, + tiebreaker='random', + ) elapsed_time = time.monotonic() - start_time print('Elapsed:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)), '\n') diff --git a/examples/merrill_1984_fig_4a_4b.py b/examples/merrill_1984_fig_4a_4b.py index d229c28..f72525d 100644 --- a/examples/merrill_1984_fig_4a_4b.py +++ b/examples/merrill_1984_fig_4a_4b.py @@ -41,17 +41,13 @@ simulations. """ import time -from collections import Counter -from random import randint import matplotlib.pyplot as plt import numpy as np from tabulate import tabulate -from elsim.elections import normal_electorate, normed_dist_utilities -from elsim.methods import (approval, black, borda, coombs, fptp, irv, runoff, - utility_winner) -from elsim.strategies import approval_optimal, honest_rankings +from elsim.methods import black, borda, coombs, fptp, irv, runoff, utility_winner +from elsim.studies import accumulate_spatial_sue_by_ncands, approval_at_optimal n_elections = 10_000 # Roughly 30 seconds each on a 2019 6-core i7-9750H n_voters = 201 @@ -59,12 +55,18 @@ corr = 0.5 D = 2 -ranked_methods = {'Plurality': fptp, 'Runoff': runoff, 'Hare': irv, - 'Borda': borda, 'Coombs': coombs, 'Black': black} - -rated_methods = {'SU max': utility_winner, - 'Approval': lambda utilities, tiebreaker: - approval(approval_optimal(utilities), tiebreaker)} +ranked_methods = { + "Plurality": fptp, + "Runoff": runoff, + "Hare": irv, + "Borda": borda, + "Coombs": coombs, + "Black": black, +} +rated_methods = { + "SU max": utility_winner, + "Approval": approval_at_optimal, +} # Plot Merrill's results as dotted lines for comparison (traced from plots) merrill_fig_4a = { @@ -90,30 +92,18 @@ for fig, disp, ymin, orig in (('4.a', 1.0, 55, merrill_fig_4a), ('4.b', 0.5, 0, merrill_fig_4b)): - utility_sums = {key: Counter() for key in (ranked_methods.keys() | - rated_methods.keys() | - {'SU max', 'RW'})} start_time = time.monotonic() - - for iteration in range(n_elections): - for n_cands in n_cands_list: - v, c = normal_electorate(n_voters, n_cands, dims=D, corr=corr, - disp=disp) - utilities = normed_dist_utilities(v, c) - rankings = honest_rankings(utilities) - - # Pick a random winner and accumulate utilities - RW = randint(0, n_cands - 1) - utility_sums['RW'][n_cands] += utilities.sum(axis=0)[RW] - - for name, method in rated_methods.items(): - winner = method(utilities, tiebreaker='random') - utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] - - for name, method in ranked_methods.items(): - winner = method(rankings, tiebreaker='random') - utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] - + utility_sums = accumulate_spatial_sue_by_ncands( + n_elections, + n_voters=n_voters, + n_cands_list=n_cands_list, + dims=D, + corr=corr, + disp=disp, + ranked_methods=ranked_methods, + rated_methods=rated_methods, + tiebreaker='random', + ) elapsed_time = time.monotonic() - start_time print('Elapsed:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)), '\n') diff --git a/examples/merrill_1984_fig_4a_4b_updated.py b/examples/merrill_1984_fig_4a_4b_updated.py index f6b286b..ce041f7 100644 --- a/examples/merrill_1984_fig_4a_4b_updated.py +++ b/examples/merrill_1984_fig_4a_4b_updated.py @@ -45,18 +45,14 @@ simulations. """ import time -from collections import Counter -from random import randint import matplotlib.pyplot as plt import numpy as np from tabulate import tabulate -from elsim.elections import normal_electorate, normed_dist_utilities -from elsim.methods import (approval, black, borda, coombs, fptp, irv, runoff, - score, star, utility_winner) -from elsim.strategies import (approval_optimal, honest_normed_scores, - honest_rankings) +from elsim.methods import approval, black, borda, coombs, fptp, irv, runoff, score, star, utility_winner +from elsim.strategies import approval_optimal, honest_normed_scores +from elsim.studies import accumulate_spatial_sue_by_ncands n_elections = 5_000 # Roughly 30 seconds each on a 2019 6-core i7-9750H n_voters = 201 @@ -78,33 +74,21 @@ tiebreaker), } -for fig, disp, ymin in (('4.a', 1.0, 55), - ('4.b', 0.5, 0)): +for fig, disp in (('4.a', 1.0), + ('4.b', 0.5)): - utility_sums = {key: Counter() for key in (ranked_methods.keys() | - rated_methods.keys() | - {'SU max', 'RW'})} start_time = time.monotonic() - - for iteration in range(n_elections): - for n_cands in n_cands_list: - v, c = normal_electorate(n_voters, n_cands, dims=D, corr=corr, - disp=disp) - utilities = normed_dist_utilities(v, c) - rankings = honest_rankings(utilities) - - # Pick a random winner and accumulate utilities - RW = randint(0, n_cands - 1) - utility_sums['RW'][n_cands] += utilities.sum(axis=0)[RW] - - for name, method in rated_methods.items(): - winner = method(utilities, tiebreaker='random') - utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] - - for name, method in ranked_methods.items(): - winner = method(rankings, tiebreaker='random') - utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] - + utility_sums = accumulate_spatial_sue_by_ncands( + n_elections, + n_voters=n_voters, + n_cands_list=n_cands_list, + dims=D, + corr=corr, + disp=disp, + ranked_methods=ranked_methods, + rated_methods=rated_methods, + tiebreaker='random', + ) elapsed_time = time.monotonic() - start_time print('Elapsed:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)), '\n') @@ -132,6 +116,6 @@ plt.legend() plt.grid(True, color='0.7', linestyle='-', which='major', axis='both') plt.grid(True, color='0.9', linestyle='-', which='minor', axis='both') - plt.ylim(85, 100.5) # or ymin + plt.ylim(85, 100.5) plt.xlim(1.8, 7.2) plt.show() diff --git a/examples/merrill_1984_table_1_fig_1.py b/examples/merrill_1984_table_1_fig_1.py index 4a817c2..c8cee1d 100644 --- a/examples/merrill_1984_table_1_fig_1.py +++ b/examples/merrill_1984_table_1_fig_1.py @@ -32,20 +32,26 @@ from tabulate import tabulate from elsim.elections import random_utilities -from elsim.methods import (approval, black, borda, condorcet, coombs, fptp, - irv, runoff, utility_winner) -from elsim.strategies import approval_optimal, honest_rankings +from elsim.strategies import honest_rankings +from elsim.methods import black, borda, coombs, fptp, irv, runoff, utility_winner +from elsim.studies import approval_at_optimal, tally_condorcet_agreement n_elections = 10_000 # Roughly 15 seconds on a 2019 6-core i7-9750H n_voters = 25 n_cands_list = (2, 3, 4, 5, 7, 10) -ranked_methods = {'Plurality': fptp, 'Runoff': runoff, 'Hare': irv, - 'Borda': borda, 'Coombs': coombs, 'Black': black} - -rated_methods = {'SU max': utility_winner, - 'Approval': lambda utilities, tiebreaker: - approval(approval_optimal(utilities), tiebreaker)} +ranked_methods = { + "Plurality": fptp, + "Runoff": runoff, + "Hare": irv, + "Borda": borda, + "Coombs": coombs, + "Black": black, +} +rated_methods = { + "SU max": utility_winner, + "Approval": approval_at_optimal, +} condorcet_winner_count = {key: Counter() for key in ( ranked_methods.keys() | rated_methods.keys() | {'CW'})} @@ -68,18 +74,11 @@ rankings = honest_rankings(utilities) - # If there is a Condorcet winner, analyze election, otherwise skip it - CW = condorcet(rankings) - if CW is not None: - condorcet_winner_count['CW'][n_cands] += 1 - - for name, method in ranked_methods.items(): - if method(rankings, tiebreaker='random') == CW: - condorcet_winner_count[name][n_cands] += 1 - - for name, method in rated_methods.items(): - if method(utilities, tiebreaker='random') == CW: - condorcet_winner_count[name][n_cands] += 1 + delta = tally_condorcet_agreement( + rankings, utilities, ranked_methods, rated_methods, tiebreaker='random', + ) + for key, value in delta.items(): + condorcet_winner_count[key][n_cands] += value elapsed_time = time.monotonic() - start_time print('Elapsed:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)), '\n') diff --git a/examples/merrill_1984_table_2.py b/examples/merrill_1984_table_2.py index bda0e82..c861348 100644 --- a/examples/merrill_1984_table_2.py +++ b/examples/merrill_1984_table_2.py @@ -38,42 +38,50 @@ from tabulate import tabulate from elsim.elections import normal_electorate, normed_dist_utilities -from elsim.methods import (approval, black, borda, condorcet, coombs, fptp, - irv, runoff, utility_winner) -from elsim.strategies import approval_optimal, honest_rankings +from elsim.strategies import honest_rankings +from elsim.methods import black, borda, coombs, fptp, irv, runoff, utility_winner +from elsim.studies import approval_at_optimal, expand_rows, tally_condorcet_agreement n_elections = 10_000 # Roughly 60 seconds on a 2019 6-core i7-9750H n_voters = 201 n_cands = 5 -ranked_methods = {'Plurality': fptp, 'Runoff': runoff, 'Hare': irv, - 'Borda': borda, 'Coombs': coombs, 'Black': black} +ranked_methods = { + "Plurality": fptp, + "Runoff": runoff, + "Hare": irv, + "Borda": borda, + "Coombs": coombs, + "Black": black, +} +rated_methods = { + "SU max": utility_winner, + "Approval": approval_at_optimal, +} -rated_methods = {'SU max': utility_winner, - 'Approval': lambda utilities, tiebreaker: - approval(approval_optimal(utilities), tiebreaker)} +# disp, corr, D +condition_rows = ((1.0, 0.5, 2), + (1.0, 0.5, 4), + (1.0, 0.0, 2), + (1.0, 0.0, 4), + (0.5, 0.5, 2), + (0.5, 0.5, 4), + (0.5, 0.0, 2), + (0.5, 0.0, 4), + ) +conditions = expand_rows(condition_rows, ('disp', 'corr', 'D')) start_time = time.monotonic() -# disp, corr, D -conditions = ((1.0, 0.5, 2), - (1.0, 0.5, 4), - (1.0, 0.0, 2), - (1.0, 0.0, 4), - (0.5, 0.5, 2), - (0.5, 0.5, 4), - (0.5, 0.0, 2), - (0.5, 0.0, 4), - ) - results = [] -for disp, corr, D in conditions: +for scenario in conditions: + disp, corr, D = scenario['disp'], scenario['corr'], scenario['D'] print(disp, corr, D) condorcet_winner_count = Counter() - for iteration in range(n_elections): + for _ in range(n_elections): v, c = normal_electorate(n_voters, n_cands, dims=D, corr=corr, disp=disp) @@ -90,18 +98,11 @@ utilities = normed_dist_utilities(v, c) rankings = honest_rankings(utilities) - # If there is a Condorcet winner, analyze election, otherwise skip it - CW = condorcet(rankings) - if CW is not None: - condorcet_winner_count['CW'] += 1 - - for name, method in ranked_methods.items(): - if method(rankings, tiebreaker='random') == CW: - condorcet_winner_count[name] += 1 - - for name, method in rated_methods.items(): - if method(utilities, tiebreaker='random') == CW: - condorcet_winner_count[name] += 1 + condorcet_winner_count.update( + tally_condorcet_agreement( + rankings, utilities, ranked_methods, rated_methods, tiebreaker='random', + ), + ) results.append(condorcet_winner_count) @@ -110,7 +111,7 @@ # Neither Tabulate nor Markdown support column span or multiple headers, but # at least this prints to plain text in a readable way. -header = ['Disp\nCorr\nDims'] + [f'{x}\n{y}\n{z}' for x, y, z in conditions] +header = ['Disp\nCorr\nDims'] + [f'{x}\n{y}\n{z}' for x, y, z in condition_rows] # Of those elections with CW, likelihood that method chooses CW table = [] diff --git a/examples/merrill_1984_table_3_fig_3.py b/examples/merrill_1984_table_3_fig_3.py index 7fc1676..7d51d12 100644 --- a/examples/merrill_1984_table_3_fig_3.py +++ b/examples/merrill_1984_table_3_fig_3.py @@ -30,9 +30,9 @@ from tabulate import tabulate from elsim.elections import random_utilities -from elsim.methods import (approval, black, borda, coombs, fptp, irv, runoff, - utility_winner) +from elsim.methods import approval, black, borda, coombs, fptp, irv, runoff from elsim.strategies import approval_optimal, honest_rankings +from elsim.studies import random_society_utility_updates n_elections = 10_000 # Roughly 30 seconds on a 2019 6-core i7-9750H n_voters = 25 @@ -49,7 +49,7 @@ start_time = time.monotonic() -for iteration in range(n_elections): +for _ in range(n_elections): for n_cands in n_cands_list: utilities = random_utilities(n_voters, n_cands) @@ -62,18 +62,16 @@ utilities -= utilities.min(1)[:, np.newaxis] utilities /= utilities.max(1)[:, np.newaxis] - # Find the social utility winner and accumulate utilities - UW = utility_winner(utilities) - utility_sums['UW'][n_cands] += utilities.sum(axis=0)[UW] - - for name, method in rated_methods.items(): - winner = method(utilities, tiebreaker='random') - utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] - rankings = honest_rankings(utilities) - for name, method in ranked_methods.items(): - winner = method(rankings, tiebreaker='random') - utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] + + delta = random_society_utility_updates( + utilities, rankings, ranked_methods, rated_methods, + tiebreaker='random', + uw_key='UW', + utility_winner_tiebreaker=None, + ) + for name, value in delta.items(): + utility_sums[name][n_cands] += value elapsed_time = time.monotonic() - start_time diff --git a/examples/merrill_1984_table_4.py b/examples/merrill_1984_table_4.py index 80e706f..12ba1eb 100644 --- a/examples/merrill_1984_table_4.py +++ b/examples/merrill_1984_table_4.py @@ -31,48 +31,55 @@ """ import time from collections import Counter -from random import randint import numpy as np from tabulate import tabulate from elsim.elections import normal_electorate, normed_dist_utilities -from elsim.methods import (approval, black, borda, coombs, fptp, irv, runoff, - utility_winner) -from elsim.strategies import approval_optimal, honest_rankings +from elsim.methods import black, borda, coombs, fptp, irv, runoff, utility_winner +from elsim.strategies import honest_rankings +from elsim.studies import approval_at_optimal, expand_rows, spatial_random_reference_utility_updates n_elections = 10_000 # Roughly 60 seconds on a 2019 6-core i7-9750H n_voters = 201 n_cands = 5 -ranked_methods = {'Plurality': fptp, 'Runoff': runoff, 'Hare': irv, - 'Borda': borda, 'Coombs': coombs, 'Black': black} +ranked_methods = { + "Plurality": fptp, + "Runoff": runoff, + "Hare": irv, + "Borda": borda, + "Coombs": coombs, + "Black": black, +} +rated_methods = { + "SU max": utility_winner, + "Approval": approval_at_optimal, +} -rated_methods = {'SU max': utility_winner, - 'Approval': lambda utilities, tiebreaker: - approval(approval_optimal(utilities), tiebreaker)} +# disp, corr, D +condition_rows = ((1.0, 0.5, 2), + (1.0, 0.5, 4), + (1.0, 0.0, 2), + (1.0, 0.0, 4), + (0.5, 0.5, 2), + (0.5, 0.5, 4), + (0.5, 0.0, 2), + (0.5, 0.0, 4), + ) +conditions = expand_rows(condition_rows, ('disp', 'corr', 'D')) start_time = time.monotonic() -# disp, corr, D -conditions = ((1.0, 0.5, 2), - (1.0, 0.5, 4), - (1.0, 0.0, 2), - (1.0, 0.0, 4), - (0.5, 0.5, 2), - (0.5, 0.5, 4), - (0.5, 0.0, 2), - (0.5, 0.0, 4), - ) - results = [] -for disp, corr, D in conditions: +for scenario in conditions: + disp, corr, D = scenario['disp'], scenario['corr'], scenario['D'] print(disp, corr, D) utility_sums = Counter() - for iteration in range(n_elections): + for _ in range(n_elections): v, c = normal_electorate(n_voters, n_cands, dims=D, corr=corr, disp=disp) @@ -89,17 +96,12 @@ utilities = normed_dist_utilities(v, c) rankings = honest_rankings(utilities) - # Pick a random winner and accumulate utilities - RW = randint(0, n_cands - 1) - utility_sums['RW'] += utilities.sum(axis=0)[RW] - - for name, method in rated_methods.items(): - winner = method(utilities, tiebreaker='random') - utility_sums[name] += utilities.sum(axis=0)[winner] - - for name, method in ranked_methods.items(): - winner = method(rankings, tiebreaker='random') - utility_sums[name] += utilities.sum(axis=0)[winner] + delta = spatial_random_reference_utility_updates( + utilities, rankings, ranked_methods, rated_methods, + tiebreaker='random', + ) + for name, value in delta.items(): + utility_sums[name] += value results.append(utility_sums) @@ -108,7 +110,7 @@ # Neither Tabulate nor Markdown support column span or multiple headers, but # at least this prints to plain text in a readable way. -header = ['Disp\nCorr\nDims'] + [f'{x}\n{y}\n{z}' for x, y, z in conditions] +header = ['Disp\nCorr\nDims'] + [f'{x}\n{y}\n{z}' for x, y, z in condition_rows] # Calculate Social Utility Efficiency from summed utilities y_uw = np.array([c['SU max'] for c in results]) diff --git a/examples/niemi_1968_table_1.py b/examples/niemi_1968_table_1.py index ede3e34..cce58f2 100644 --- a/examples/niemi_1968_table_1.py +++ b/examples/niemi_1968_table_1.py @@ -36,6 +36,7 @@ """ from collections import Counter +from functools import partial import matplotlib.pyplot as plt import numpy as np @@ -44,6 +45,7 @@ from elsim.elections import impartial_culture from elsim.methods import condorcet +from elsim.studies import merge_counters # Probability That There Is No Majority Winner niemi_table = [.0000, .0000, .0877, .1755, .2513, .3152, .3692, .4151, .4545, @@ -77,14 +79,14 @@ def simulate_batch(n_voters, n_cands, batch_size): return condorcet_paradox_count -jobs = [] -for n_cands in n_cands_list: - jobs.extend(n_batches * - [delayed(simulate_batch)(n_voters, n_cands, batch_size)]) - -print(f'{len(jobs)} tasks total:') -results = Parallel(n_jobs=-3, verbose=5)(jobs) -condorcet_paradox_counts = sum(results, Counter()) +fns = [ + partial(simulate_batch, n_voters, n_cands, batch_size) + for n_cands in n_cands_list + for _ in range(n_batches) +] +print(f'{len(fns)} tasks total:') +results = Parallel(n_jobs=-3, verbose=5)(delayed(fn)() for fn in fns) +condorcet_paradox_counts = merge_counters(results) x, y = zip(*niemi_table.items()) plt.plot(x, y, label='Niemi') diff --git a/examples/niemi_1968_table_2.py b/examples/niemi_1968_table_2.py index f74ba9f..b67f88e 100644 --- a/examples/niemi_1968_table_2.py +++ b/examples/niemi_1968_table_2.py @@ -22,6 +22,7 @@ """ from collections import Counter +from functools import partial import numpy as np from joblib import Parallel, delayed @@ -29,6 +30,7 @@ from elsim.elections import impartial_culture from elsim.methods import condorcet +from elsim.studies import merge_counters # It needs many simulations to get similar accuracy as the analytical results n_elections = 100_000 # Roughly 30 seconds on a 2019 6-core i7-9750H @@ -45,26 +47,23 @@ def simulate_batch(n_voters, n_cands, batch_size): condorcet_paradox_count = Counter() # Reuse the same chunk of memory to save time election = np.empty((n_voters, n_cands), dtype=np.uint8) - for iteration in range(batch_size): + for _iteration in range(batch_size): election[:] = impartial_culture(n_voters, n_cands) - CW = condorcet(election) - if CW is None: + cw = condorcet(election) + if cw is None: condorcet_paradox_count[n_cands, n_voters] += 1 return condorcet_paradox_count -jobs = [] -for n_voters in n_voters_list: - for n_cands in n_cands_list: - jobs.extend(n_batches * - [delayed(simulate_batch)(n_voters, n_cands, batch_size)]) - -print(f'{len(jobs)} tasks total:') -results = Parallel(n_jobs=-3, verbose=5)(jobs) -condorcet_paradox_counts = sum(results, Counter()) - -nm, P = zip(*sorted(condorcet_paradox_counts.items())) -P = np.asarray(P) / n_elections # Percent likelihood of paradox +fns = [ + partial(simulate_batch, n_voters, n_cands, batch_size) + for n_voters in n_voters_list + for n_cands in n_cands_list + for _ in range(n_batches) +] +print(f'{len(fns)} tasks total:') +results = Parallel(n_jobs=-3, verbose=5)(delayed(fn)() for fn in fns) +condorcet_paradox_counts = merge_counters(results) table = [] for n in n_cands_list: @@ -73,4 +72,4 @@ def simulate_batch(n_voters, n_cands, batch_size): table.append(row) print(tabulate(table, n_voters_list, tablefmt="pipe", showindex=n_cands_list, - floatfmt='.4f')) + floatfmt='.4f')) \ No newline at end of file diff --git a/examples/tomlinson_2023_figure_3.py b/examples/tomlinson_2023_figure_3.py index 9c7f24d..4d80455 100644 --- a/examples/tomlinson_2023_figure_3.py +++ b/examples/tomlinson_2023_figure_3.py @@ -12,6 +12,7 @@ """ import pickle from collections import defaultdict +from functools import partial import matplotlib.pyplot as plt import numpy as np @@ -46,7 +47,7 @@ def human_format(num): def simulate_batch(n_cands): winners = defaultdict(list) - for iteration in range(batch_size): + for _iteration in range(batch_size): # "voters and candidates come from the uniform distribution on [0, 1]" v = np.random.uniform(0, 1, n_voters) @@ -80,9 +81,9 @@ def simulate_batch(n_cands): fig.suptitle(title) for n_cands in n_cands_list: - jobs = [delayed(simulate_batch)(n_cands)] * n_batches - print(f'{len(jobs)} tasks total:') - results = Parallel(n_jobs=-3, verbose=5)(jobs) + worker = partial(simulate_batch, n_cands) + print(f'{n_batches} tasks total:') + results = Parallel(n_jobs=-3, verbose=5)(delayed(worker)() for _ in range(n_batches)) winners = {k: [v for d in results for v in d[k]] for k in results[0]} diff --git a/examples/tomlinson_2023_figure_3_updated.py b/examples/tomlinson_2023_figure_3_updated.py index 1f433a4..a24cb12 100644 --- a/examples/tomlinson_2023_figure_3_updated.py +++ b/examples/tomlinson_2023_figure_3_updated.py @@ -15,6 +15,7 @@ """ import pickle from collections import defaultdict +from functools import partial import matplotlib.pyplot as plt import numpy as np @@ -22,10 +23,8 @@ from seaborn import histplot from elsim.elections import normed_dist_utilities -from elsim.methods import (approval, black, borda, coombs, fptp, irv, runoff, - star) -from elsim.strategies import (approval_optimal, honest_normed_scores, - honest_rankings, vote_for_k) +from elsim.methods import approval, black, borda, coombs, fptp, irv, runoff, star +from elsim.strategies import approval_optimal, honest_normed_scores, honest_rankings, vote_for_k n_elections = 100_000 # Roughly 1 minute on a 2019 6-core i7-9750H n_voters = 1_000 @@ -54,7 +53,7 @@ def human_format(num): def simulate_batch(n_cands): winners = defaultdict(list) - for iteration in range(batch_size): + for _iteration in range(batch_size): # v, c = normal_electorate(n_voters, n_cands, dims=1, disp=disp) if cand_dist == 'uniform': @@ -115,9 +114,9 @@ def simulate_batch(n_cands): return winners -jobs = [delayed(simulate_batch)(n_cands)] * n_batches -print(f'{len(jobs)} tasks total:') -results = Parallel(n_jobs=-3, verbose=5)(jobs) +worker = partial(simulate_batch, n_cands) +print(f'{n_batches} tasks total:') +results = Parallel(n_jobs=-3, verbose=5)(delayed(worker)() for _ in range(n_batches)) winners = {k: [v for d in results for v in d[k]] for k in results[0]} title = f'{human_format(n_elections)} 1D elections, ' diff --git a/examples/weber_1977_effectiveness_table.py b/examples/weber_1977_effectiveness_table.py index 3b8b3f9..1f19ff6 100644 --- a/examples/weber_1977_effectiveness_table.py +++ b/examples/weber_1977_effectiveness_table.py @@ -27,11 +27,12 @@ import matplotlib.pyplot as plt import numpy as np from tabulate import tabulate +from weber_1977_expressions import eff_borda, eff_standard, eff_vote_for_half from elsim.elections import random_utilities -from elsim.methods import approval, borda, fptp, utility_winner +from elsim.methods import approval, borda, fptp from elsim.strategies import honest_rankings, vote_for_k -from weber_1977_expressions import eff_borda, eff_standard, eff_vote_for_half +from elsim.studies import random_society_utility_updates n_elections = 2_000 # Roughly 60 seconds on a 2019 6-core i7-9750H n_voters = 1_000 @@ -47,22 +48,20 @@ start_time = time.monotonic() -for iteration in range(n_elections): +for _ in range(n_elections): for n_cands in n_cands_list: utilities = random_utilities(n_voters, n_cands) - # Find the social utility winner and accumulate utilities - UW = utility_winner(utilities) - utility_sums['UW'][n_cands] += utilities.sum(axis=0)[UW] - - for name, method in rated_methods.items(): - winner = method(utilities, tiebreaker='random') - utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] - rankings = honest_rankings(utilities) - for name, method in ranked_methods.items(): - winner = method(rankings, tiebreaker='random') - utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] + + delta = random_society_utility_updates( + utilities, rankings, ranked_methods, rated_methods, + tiebreaker='random', + uw_key='UW', + utility_winner_tiebreaker=None, + ) + for name, value in delta.items(): + utility_sums[name][n_cands] += value elapsed_time = time.monotonic() - start_time print('Elapsed:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)), '\n') diff --git a/examples/weber_1977_table_4.py b/examples/weber_1977_table_4.py index 9331205..973afee 100644 --- a/examples/weber_1977_table_4.py +++ b/examples/weber_1977_table_4.py @@ -34,6 +34,7 @@ from elsim.elections import random_utilities from elsim.methods import approval, borda, fptp from elsim.strategies import approval_optimal, honest_rankings +from elsim.studies import ranked_rated_utility_updates n_elections = 30_000 # Roughly 30 seconds on a 2019 6-core i7-9750H n_voters_list = (2, 3, 4, 5, 10, 15, 20, 25, 30) @@ -48,18 +49,18 @@ start_time = time.monotonic() -for iteration in range(n_elections): +for _ in range(n_elections): for n_voters in n_voters_list: utilities = random_utilities(n_voters, n_cands) - for name, method in rated_methods.items(): - winner = method(utilities, tiebreaker='random') - utility_sums[name][n_voters] += utilities.sum(axis=0)[winner] - rankings = honest_rankings(utilities) - for name, method in ranked_methods.items(): - winner = method(rankings, tiebreaker='random') - utility_sums[name][n_voters] += utilities.sum(axis=0)[winner] + + delta = ranked_rated_utility_updates( + utilities, rankings, ranked_methods, rated_methods, + tiebreaker='random', + ) + for name, value in delta.items(): + utility_sums[name][n_voters] += value elapsed_time = time.monotonic() - start_time print('Elapsed:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)), '\n') @@ -72,4 +73,4 @@ table.update({method: np.array(y) / n_elections}) print(tabulate(table, 'keys', showindex=n_voters_list, - tablefmt="pipe", floatfmt='.4f')) + tablefmt="pipe", floatfmt='.4f')) \ No newline at end of file diff --git a/examples/weber_1977_verify_vote_for_k.py b/examples/weber_1977_verify_vote_for_k.py index f754551..8ab1809 100644 --- a/examples/weber_1977_verify_vote_for_k.py +++ b/examples/weber_1977_verify_vote_for_k.py @@ -46,12 +46,11 @@ import numpy as np from joblib import Parallel, delayed from tabulate import tabulate +from weber_1977_expressions import eff_standard, eff_vote_for_half, eff_vote_for_k from elsim.elections import random_utilities from elsim.methods import approval, fptp, utility_winner from elsim.strategies import honest_rankings, vote_for_k -from weber_1977_expressions import (eff_standard, eff_vote_for_half, - eff_vote_for_k) n_elections = 10_000 # Roughly 60 seconds on a 2019 6-core i7-9750H n_voters = 1_000 @@ -102,10 +101,9 @@ def simulate_election(): return utility_sums -print(f'Doing {n_elections:,} elections (tasks), {n_voters:,} voters, ' - f'{n_cands_list} candidates') -results = Parallel(n_jobs=-3, verbose=5)(delayed(simulate_election)() - for i in range(n_elections)) +results = Parallel(n_jobs=-3, verbose=5)( + delayed(simulate_election)() for _ in range(n_elections) +) for result in results: for method, d in result.items(): diff --git a/examples/wikipedia_condorcet_paradox_likelihood.py b/examples/wikipedia_condorcet_paradox_likelihood.py index f6954d1..56a01dd 100644 --- a/examples/wikipedia_condorcet_paradox_likelihood.py +++ b/examples/wikipedia_condorcet_paradox_likelihood.py @@ -21,6 +21,7 @@ """ from collections import Counter +from functools import partial import matplotlib.pyplot as plt import numpy as np @@ -29,6 +30,7 @@ from elsim.elections import impartial_culture from elsim.methods import condorcet +from elsim.studies import merge_counters # Number of voters vs percent of elections with Condorcet paradox. WP_table = {3: 5.556, @@ -53,22 +55,22 @@ def simulate_batch(n_voters, n_cands, batch_size): condorcet_paradox_count = Counter() # Reuse the same chunk of memory to save time election = np.empty((n_voters, n_cands), dtype=np.uint8) - for iteration in range(batch_size): + for _iteration in range(batch_size): election[:] = impartial_culture(n_voters, n_cands) - CW = condorcet(election) - if CW is None: + cw = condorcet(election) + if cw is None: condorcet_paradox_count[n_voters] += 1 return condorcet_paradox_count -jobs = [] -for n_voters in WP_table: - jobs.extend(n_batches * - [delayed(simulate_batch)(n_voters, n_cands, batch_size)]) - -print(f'{len(jobs)} tasks total:') -results = Parallel(n_jobs=-3, verbose=5)(jobs) -condorcet_paradox_counts = sum(results, Counter()) +fns = [ + partial(simulate_batch, n_voters, n_cands, batch_size) + for n_voters in WP_table + for _ in range(n_batches) +] +print(f'{len(fns)} tasks total:') +results = Parallel(n_jobs=-3, verbose=5)(delayed(fn)() for fn in fns) +condorcet_paradox_counts = merge_counters(results) x, y = zip(*WP_table.items()) plt.plot(x, y, label='WP') diff --git a/pyproject.toml b/pyproject.toml index 235e83d..5d95414 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ test = [ "pytest-cov", "hypothesis", "ruff", + "joblib", ] examples = [ "tabulate", diff --git a/tests/test_studies.py b/tests/test_studies.py new file mode 100644 index 0000000..e917885 --- /dev/null +++ b/tests/test_studies.py @@ -0,0 +1,339 @@ +"""Tests for elsim.studies (Monte Carlo helpers, parameter expansion).""" + +from collections import Counter +from functools import partial + +import numpy as np +import pytest + +from elsim.methods import approval, black, borda, condorcet, coombs, fptp, irv, runoff, utility_winner +from elsim.strategies import approval_optimal +from elsim.studies import ( + SerialBackend, + accumulate_spatial_condorcet_by_ncands, + accumulate_spatial_sue_by_ncands, + approval_at_optimal, + expand_product, + expand_rows, + expand_zip, + merge_counters, + random_society_utility_updates, + ranked_rated_utility_updates, + run_batched, + spatial_random_reference_utility_updates, + tally_condorcet_agreement, +) + + +def test_expand_product_scalar_and_list(): + got = expand_product(n_voters=[10, 20], n_cands=3) + assert got == [{"n_voters": 10, "n_cands": 3}, {"n_voters": 20, "n_cands": 3}] + + +def test_expand_product_bytes_scalar(): + assert expand_product(blob=b"ab") == [{"blob": b"ab"}] + + +def test_expand_rows_empty(): + assert expand_rows((), ("a",)) == [] + + +def test_run_batched_uses_implicit_serial_backend(): + out = run_batched(lambda k: k, n_trials=5, batch_size=2) + assert out == [2, 2, 1] + + +def test_expand_zip_basic(): + assert expand_zip(a=[1, 2], b=[3, 4]) == [{"a": 1, "b": 3}, {"a": 2, "b": 4}] + + +def test_expand_zip_length_mismatch(): + with pytest.raises(ValueError, match="same length"): + expand_zip(a=[1, 2], b=[3]) + + +def test_expand_rows_merrill_style(): + rows = ((1.0, 0.5, 2), (0.5, 0.0, 4)) + keys = ("disp", "corr", "D") + assert expand_rows(rows, keys) == [ + {"disp": 1.0, "corr": 0.5, "D": 2}, + {"disp": 0.5, "corr": 0.0, "D": 4}, + ] + + +def test_expand_rows_width_mismatch(): + with pytest.raises(ValueError, match="row 0"): + expand_rows([(1, 2, 3)], ("a", "b")) + + +def test_merge_counters(): + assert merge_counters([Counter({"a": 1}), Counter({"a": 2, "b": 1})]) == Counter({"a": 3, "b": 1}) + + +def test_run_batched_serial(): + sizes = [] + + def batch_fn(k): + sizes.append(k) + return k + + out = run_batched(batch_fn, n_trials=25, batch_size=10, backend=SerialBackend()) + assert out == [10, 10, 5] + assert sizes == [10, 10, 5] + + +def test_run_batched_exact_batches(): + out = run_batched(lambda k: k, n_trials=30, batch_size=10, backend=SerialBackend()) + assert out == [10, 10, 10] + + +def test_tally_condorcet_agreement_no_cw(): + rankings = np.array( + [ + [0, 1, 2], + [1, 2, 0], + [2, 0, 1], + ], + dtype=np.uint8, + ) + utilities = np.zeros_like(rankings, dtype=float) + ranked = {"Plurality": fptp} + rated: dict = {} + assert tally_condorcet_agreement(rankings, utilities, ranked, rated) == Counter() + + +def test_tally_condorcet_agreement_with_cw(): + # Candidate 0 beats everyone pairwise + rankings = np.array( + [ + [0, 1, 2], + [0, 2, 1], + [0, 1, 2], + ], + dtype=np.uint8, + ) + utilities = np.array( + [ + [1.0, 0.0, 0.0], + [1.0, 0.0, 0.0], + [1.0, 0.0, 0.0], + ] + ) + assert condorcet(rankings) == 0 + assert fptp(rankings, tiebreaker="random") == 0 + + ranked = {"Plurality": fptp} + rated: dict = {} + c = tally_condorcet_agreement(rankings, utilities, ranked, rated, tiebreaker="random") + assert c["CW"] == 1 + assert c["Plurality"] == 1 + + +def test_serial_backend_map_each(): + out = SerialBackend().map_each([lambda: 1, lambda: 2]) + assert out == [1, 2] + + +def test_parallel_delayed_map_each(): + pytest.importorskip("joblib") + from joblib import Parallel, delayed + + def f(x): + return x + + out = list(Parallel(n_jobs=2, verbose=0)(delayed(fn)() for fn in [partial(f, 1), partial(f, 2)])) + assert out == [1, 2] + + +def test_ranked_rated_utility_updates(): + utilities = np.array([[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]) + rankings = np.array([[0, 1], [0, 1], [0, 1]], dtype=np.uint8) + + delta = ranked_rated_utility_updates( + utilities, rankings, {'Plurality': fptp}, {}, tiebreaker='random', + ) + assert set(delta) == {'Plurality'} + assert delta['Plurality'] == float(utilities.sum(axis=0)[0]) + + +def test_spatial_random_reference_includes_rw(): + np.random.seed(0) + utilities = np.array([[1.0, 0.0, 0.5], [0.0, 1.0, 0.5]]) + rankings = np.array([[0, 1, 2], [1, 0, 2]], dtype=np.uint8) + + delta = spatial_random_reference_utility_updates( + utilities, rankings, {'Plurality': fptp}, {}, tiebreaker='random', + ) + assert 'RW' in delta + assert 'Plurality' in delta + + +def test_expand_zip_empty(): + assert expand_zip() == [] + + +def test_expand_product_mapping_value_raises(): + with pytest.raises(TypeError, match="Mappings"): + expand_product(x={"a": 1}) + + +def test_run_batched_zero_trials(): + assert run_batched(lambda k: k, n_trials=0, batch_size=10) == [] + + +def test_run_batched_negative_trials_raises(): + with pytest.raises(ValueError, match="non-negative"): + run_batched(lambda k: k, n_trials=-1, batch_size=10) + + +def test_run_batched_invalid_batch_size_raises(): + with pytest.raises(ValueError, match="positive"): + run_batched(lambda k: k, n_trials=10, batch_size=0) + + +def test_serial_backend_map_repeat_negative_raises(): + with pytest.raises(ValueError, match="non-negative"): + SerialBackend().map_repeat(lambda: 1, n=-1) + + +def test_merge_counters_empty(): + assert merge_counters([]) == Counter() + + +def test_random_society_utility_updates_tiebreaker_none(): + utilities = np.array([[0.9, 0.1], [0.8, 0.2]]) + rankings = np.array([[0, 1], [0, 1]], dtype=np.uint8) + delta = random_society_utility_updates( + utilities, + rankings, + {"Plurality": fptp}, + {}, + tiebreaker="random", + uw_key="UW", + utility_winner_tiebreaker=None, + ) + assert "UW" in delta + assert "Plurality" in delta + + +def test_random_society_utility_updates_custom_uw_key_and_rated(): + utilities = np.array([[1.0, 0.0], [1.0, 0.0]]) + rankings = np.array([[0, 1], [0, 1]], dtype=np.uint8) + + rated = { + "Approval": lambda u, tiebreaker: approval( + approval_optimal(u), tiebreaker, + ), + } + delta = random_society_utility_updates( + utilities, + rankings, + {"Plurality": fptp}, + rated, + tiebreaker="random", + uw_key="XX", + utility_winner_tiebreaker="random", + ) + assert "XX" in delta + assert "Approval" in delta + assert "Plurality" in delta + + +def test_spatial_random_reference_with_rated(): + utilities = np.array([[1.0, 0.0], [1.0, 0.0]]) + rankings = np.array([[0, 1], [0, 1]], dtype=np.uint8) + + rated = { + "Approval": lambda u, tiebreaker: approval( + approval_optimal(u), tiebreaker, + ), + } + delta = spatial_random_reference_utility_updates( + utilities, rankings, {"Plurality": fptp}, rated, tiebreaker="random", + ) + assert set(delta) >= {"RW", "Plurality", "Approval"} + + +def test_ranked_rated_with_both_method_kinds(): + utilities = np.array([[1.0, 0.0], [1.0, 0.0]]) + rankings = np.array([[0, 1], [0, 1]], dtype=np.uint8) + + rated = { + "Approval": lambda u, tiebreaker: approval( + approval_optimal(u), tiebreaker, + ), + } + delta = ranked_rated_utility_updates( + utilities, rankings, {"Plurality": fptp}, rated, tiebreaker="random", + ) + assert "Plurality" in delta + assert "Approval" in delta + + +def test_approval_at_optimal_matches_explicit(): + utilities = np.array([[0.2, 0.8], [0.1, 0.9]]) + tb = "random" + assert approval_at_optimal(utilities, tb) == approval(approval_optimal(utilities), tb) + + +def test_tally_condorcet_agreement_rated_branch(): + rankings = np.array([[0, 1], [0, 1], [0, 1]], dtype=np.uint8) + utilities = np.array( + [ + [1.0, 0.0], + [1.0, 0.0], + [1.0, 0.0], + ], + ) + assert condorcet(rankings) == 0 + ranked = { + "Plurality": fptp, + "Runoff": runoff, + "Hare": irv, + "Borda": borda, + "Coombs": coombs, + "Black": black, + } + rated = {"SU max": utility_winner, "Approval": approval_at_optimal} + c = tally_condorcet_agreement(rankings, utilities, ranked, rated, tiebreaker="random") + assert c["CW"] == 1 + assert c["SU max"] == 1 + assert c["Plurality"] == 1 + + +def test_accumulate_spatial_condorcet_by_ncands_keys(): + np.random.seed(0) + ranked = {"Plurality": fptp, "Hare": irv} + rated = {"SU max": utility_winner, "Approval": approval_at_optimal} + out = accumulate_spatial_condorcet_by_ncands( + 4, + n_voters=15, + n_cands_list=(3, 4), + dims=2, + corr=0.5, + disp=1.0, + ranked_methods=ranked, + rated_methods=rated, + tiebreaker="random", + ) + assert set(out.keys()) == {"CW", "Plurality", "Hare", "SU max", "Approval"} + assert sum(out["CW"].values()) <= 4 * 2 + + +def test_accumulate_spatial_sue_by_ncands_keys(): + np.random.seed(1) + ranked = {"Plurality": fptp} + rated = {"SU max": utility_winner, "Approval": approval_at_optimal} + out = accumulate_spatial_sue_by_ncands( + 3, + n_voters=12, + n_cands_list=(3,), + dims=2, + corr=0.5, + disp=1.0, + ranked_methods=ranked, + rated_methods=rated, + tiebreaker="random", + ) + assert set(out.keys()) == {"SU max", "RW", "Plurality", "Approval"} + assert out["RW"][3] > 0