diff --git a/elsim/strategies/__init__.py b/elsim/strategies/__init__.py index fa1d3692..df5fb75b 100644 --- a/elsim/strategies/__init__.py +++ b/elsim/strategies/__init__.py @@ -5,4 +5,4 @@ ballots that voters cast for a voting method. """ from .strategies import (approval_optimal, honest_normed_scores, - honest_rankings, vote_for_k) + honest_rankings, vote_for_k, vote_for_or_against_k) diff --git a/elsim/strategies/strategies.py b/elsim/strategies/strategies.py index e69312d6..8a7892c5 100644 --- a/elsim/strategies/strategies.py +++ b/elsim/strategies/strategies.py @@ -267,3 +267,72 @@ def vote_for_k(utilities, k): # TODO: Not sure if this is the most efficient way approvals[np.arange(len(approvals))[:, np.newaxis], top_k] = 1 return approvals + + +def vote_for_or_against_k(utilities, k, rng=None): + """ + Convert utilities to combined-approval ballots (vote-for-or-against-k). + + Weber (*Comparison of Public Choice Systems*, Cowles Discussion Paper 498) + defines ``2 * (m choose k)`` strategic types: for each cardinality-``k`` set + ``S``, types **vote for** ``S`` (``+1`` on ``S``) and **vote against** ``S`` + (``-1`` on ``S``), each with probability ``1 / (2 * (m choose k))``. The + effectiveness formulas follow from the resulting reproducing scores + ``u_t(c)`` over regions of the preference cube. [1]_ + + For **simulation**, each voter independently flips a fair coin. **Vote for** + puts ``+1`` on that voter's ``k`` **highest**-utility candidates (ties broken + with noise). **Vote against** puts ``-1`` on their ``k`` **lowest**-utility + candidates---not on their favorites. The unused candidates stay at ``0``. + When ``k <= n_cands // 2`` the top and bottom blocks are disjoint, so each + ballot lies in ``{-1, 0, +1}`` with exactly ``k`` nonzero entries. + + Monte Carlo Social Utility Efficiency under impartial culture may or may not + track ``eff_vote_for_or_against_k`` from the paper's infinite-voter analysis; + see ``examples/weber_1977_effectiveness_table.py``. + + Parameters + ---------- + utilities : array_like + A 2D collection of utilities. Rows are voters; columns are candidates. + k : int + Must satisfy ``0 < k <= n_cands // 2`` (Weber allows ``k = m/2`` when + ``m`` is even). + rng : numpy.random.Generator, optional + Random number generator. If omitted, ``numpy.random.default_rng()`` + is used. + + Returns + ------- + election : ndarray + A 2D collection of combined approval ballots (``int8``). + + References + ---------- + .. [1] Weber, Robert J. (1978). "Comparison of Public Choice Systems". + Cowles Foundation Discussion Papers. Cowles Foundation for Research in + Economics. No. 498. https://cowles.yale.edu/publications/cfdp/cfdp-498 + + """ + utilities = np.asarray(utilities) + n_voters, n_cands = utilities.shape + if not 0 < k <= n_cands // 2: + raise ValueError( + f'k of {k} not possible for vote-for-or-against-k with ' + f'{n_cands} candidates (require 0 < k <= n_cands // 2)' + ) + + rng = np.random.default_rng(rng) + ballots = np.zeros((n_voters, n_cands), dtype=np.int8) + rows = np.arange(n_voters)[:, np.newaxis] + + u = utilities.astype(np.float64, copy=False) + u_j = u + rng.random(u.shape) * (np.finfo(np.float64).eps * 64) + top_k = np.argpartition(u_j, -k, axis=1)[:, -k:] + bot_k = np.argpartition(u_j, k - 1, axis=1)[:, :k] + choice = rng.integers(2, size=n_voters, dtype=np.int8) + vote_for = (choice == 0)[:, np.newaxis] + target = np.where(vote_for, top_k, bot_k) + vals = np.where(vote_for, np.int8(1), np.int8(-1)) + ballots[rows, target] = vals + return ballots diff --git a/examples/weber_1977_effectiveness_table.py b/examples/weber_1977_effectiveness_table.py index 3b8b3f92..70556f4e 100644 --- a/examples/weber_1977_effectiveness_table.py +++ b/examples/weber_1977_effectiveness_table.py @@ -7,20 +7,27 @@ Cowles Foundation Discussion Papers. Cowles Foundation for Research in Economics. No. 498. https://cowles.yale.edu/publications/cfdp/cfdp-498 -Typical result with n_elections = 100_000: - -| | Standard | Vote-for-half | Borda | -|----:|-----------:|----------------:|--------:| -| 2 | 81.37 | 81.71 | 81.41 | -| 3 | 75.10 | 75.00 | 86.53 | -| 4 | 69.90 | 79.92 | 89.47 | -| 5 | 65.02 | 79.09 | 91.34 | -| 6 | 61.08 | 81.20 | 92.61 | -| 10 | 50.78 | 82.94 | 95.35 | -| 255 | 12.78 | 86.37 | 99.80 | +The dashed lines are Weber's infinite-voter closed forms. The solid curves +from this script are **not** asserted to coincide with them for +vote-for-or-against-k; compare visually or numerically as you like. + +Typical Monte Carlo Social Utility Efficiency (``n_elections`` = 100_000) +with ``combined_approval``. Best Vote-for-or-against-k uses +``best_vote_for_or_against_k(m)`` and ``vote_for_or_against_k``: each voter +flips a fair coin, then either ``+1`` on their ``k`` best candidates by utility +or ``-1`` on their ``k`` worst (never disapproving favorites alone). + +| | Standard | Vote-for-half | Best Vote-for-or-against-k | Borda | +|----:|-----------:|----------------:|-----------------------------:|--------:| +| 2 | 81.37 | 81.71 | (see simulation) | 81.41 | +| 3 | 75.10 | 75.00 | (see simulation) | 86.53 | +| 4 | 69.90 | 79.92 | (see simulation) | 89.47 | +| 5 | 65.02 | 79.09 | (see simulation) | 91.34 | +| 6 | 61.08 | 81.20 | (see simulation) | 92.61 | +| 10 | 50.78 | 82.94 | (see simulation) | 95.35 | +| 255 | 12.78 | 86.37 | (see simulation) | 99.80 | """ # TODO: Standard is consistently ~1% high, while Borda is very accurate -# TODO: Best Vote-for-or-against-k is not implemented yet import time from collections import Counter @@ -29,9 +36,12 @@ from tabulate import tabulate from elsim.elections import random_utilities -from elsim.methods import approval, borda, fptp, utility_winner -from elsim.strategies import honest_rankings, vote_for_k -from weber_1977_expressions import eff_borda, eff_standard, eff_vote_for_half +from elsim.methods import approval, borda, combined_approval, fptp, utility_winner +from elsim.strategies import (honest_rankings, vote_for_k, + vote_for_or_against_k) +from weber_1977_expressions import (eff_best_vote_for_or_against_k, eff_borda, + eff_standard, eff_vote_for_half, + best_vote_for_or_against_k) n_elections = 2_000 # Roughly 60 seconds on a 2019 6-core i7-9750H n_voters = 1_000 @@ -43,7 +53,8 @@ approval(vote_for_k(utilities, 'half'), tiebreaker)} utility_sums = {key: Counter() for key in (ranked_methods.keys() | - rated_methods.keys() | {'UW'})} + rated_methods.keys() | + {'Best Vote-for-or-against-k', 'UW'})} start_time = time.monotonic() @@ -59,6 +70,12 @@ winner = method(utilities, tiebreaker='random') utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] + k_voa = best_vote_for_or_against_k(n_cands) + winner = combined_approval( + vote_for_or_against_k(utilities, k_voa), tiebreaker='random') + utility_sums['Best Vote-for-or-against-k'][n_cands] += ( + utilities.sum(axis=0)[winner]) + rankings = honest_rankings(utilities) for name, method in ranked_methods.items(): winner = method(rankings, tiebreaker='random') @@ -71,6 +88,8 @@ plt.title('The Effectiveness of Several Voting Systems') for name, method in (('Standard', eff_standard), ('Vote-for-half', eff_vote_for_half), + ('Best Vote-for-or-against-k', + eff_best_vote_for_or_against_k), ('Borda', eff_borda)): plt.plot(n_cands_list, method(np.array(n_cands_list))*100, ':', lw=0.8) @@ -82,7 +101,8 @@ # Calculate Social Utility Efficiency from summed utilities x_uw, y_uw = zip(*sorted(utility_sums['UW'].items())) average_utility = n_voters * n_elections / 2 -for method in ('Standard', 'Vote-for-half', 'Borda'): +for method in ('Standard', 'Vote-for-half', 'Best Vote-for-or-against-k', + 'Borda'): x, y = zip(*sorted(utility_sums[method].items())) SUE = (np.array(y) - average_utility)/(np.array(y_uw) - average_utility) plt.plot(x, SUE*100, '-', label=method) diff --git a/examples/weber_1977_expressions.py b/examples/weber_1977_expressions.py index b8b79fbc..8f5ecee8 100644 --- a/examples/weber_1977_expressions.py +++ b/examples/weber_1977_expressions.py @@ -21,7 +21,7 @@ | 10 | 49.79% | 82.99% | 88.09% | 95.35% | | ∞ | 0.00% | 86.60% | 92.25% | 100.00% | """ -from numpy import round, sqrt +from numpy import sqrt from numpy.testing import assert_, assert_almost_equal @@ -86,15 +86,19 @@ def eff_vote_for_or_against_k(m, k): """ Calculate effectiveness of the "vote-for-or-against-k" voting system. - This is a variant of combined approval voting (CAV) in which every voter - approves or disapproves of `k` candidates. + Weber's closed-form value for impartial culture in the infinite-voter + limit (Cowles DP 498). It is not asserted here to coincide with finite + Monte Carlo Social Utility Efficiency for any particular ballot generator; + compare against :func:`elsim.strategies.vote_for_or_against_k` in + ``examples/weber_1977_effectiveness_table.py`` if desired. Parameters ---------- m : int Total number of candidates. k : int - Number of candidates that each voter approves or disapproves of. + Number of candidates approved (``+1``) or disapproved (``-1``) on each + ballot in the vote-for-or-against-``k`` system analyzed in the paper. Returns ------- @@ -120,11 +124,18 @@ def best_vote_for_or_against_k(m): Returns ------- - k : float - Number of candidates for every voter to approve or disapprove. + k : int + Number of candidates in each voter's for- or against-set (Weber allows + ``k = m/2`` when ``m`` is even; otherwise ``1 <= k <= m // 2``). """ - alpha = (9 - sqrt(21))/12 - return round(alpha * m) + best_k = 1 + best_eff = eff_vote_for_or_against_k(m, 1) + for k in range(2, m // 2 + 1): + e = eff_vote_for_or_against_k(m, k) + if e > best_eff: + best_eff = e + best_k = k + return best_k def eff_best_vote_for_or_against_k(m): @@ -203,22 +214,35 @@ def test_cases(): assert_almost_equal(eff_best_vote_for_or_against_k(4), 80.83/100, 4) assert_almost_equal(eff_borda(6), 92.58/100, decimal=4) + # Discrete optimum can differ from round(alpha * m); e.g. m == 91. + assert best_vote_for_or_against_k(91) == 34 + if __name__ == '__main__': test_cases() - from numpy import array + from numpy import array, concatenate, sqrt from tabulate import tabulate + m_finite = (2, 3, 4, 5, 6, 10) + m_arr = array(m_finite, dtype=float) table = {} - m_cands_list = (2, 3, 4, 5, 6, 10, 1e30) - for m in m_cands_list: - for name, method in (('Standard', eff_standard), - ('Vote-for-half', eff_vote_for_half), - ('Best Vote-for-or-against-k', - eff_best_vote_for_or_against_k), - ('Borda', eff_borda)): - table.update({name: method(array(m_cands_list))}) - - print(tabulate(table, 'keys', showindex=m_cands_list[:-1] + ('∞',), + for name, method in (('Standard', eff_standard), + ('Vote-for-half', eff_vote_for_half), + ('Borda', eff_borda)): + table[name] = method(m_arr) + table['Best Vote-for-or-against-k'] = array( + [eff_best_vote_for_or_against_k(m) for m in m_finite]) + + lim_best = float((42 * sqrt(21) - 138)**0.5 / 8) + inf_values = { + 'Standard': 0.0, + 'Vote-for-half': float(sqrt(3) / 2), + 'Borda': 1.0, + 'Best Vote-for-or-against-k': lim_best, + } + for name in table: + table[name] = concatenate([table[name], [inf_values[name]]]) + + print(tabulate(table, 'keys', showindex=m_finite + ('∞',), tablefmt="pipe", floatfmt='.2%')) diff --git a/examples/weber_voa_k_ballot_variants_benchmark.py b/examples/weber_voa_k_ballot_variants_benchmark.py new file mode 100644 index 00000000..0f936fca --- /dev/null +++ b/examples/weber_voa_k_ballot_variants_benchmark.py @@ -0,0 +1,187 @@ +""" +Benchmark: Monte Carlo SUE vs Weber's vote-for-or-against-k closed form. + +Weber (1978), Cowles DP 498, gives ``eff_vote_for_or_against_k(m, k)`` under +impartial culture in the infinite-voter limit. This script compares that +formula to several **finite-voter** combined-approval ballot rules that have +been suggested as readings of "vote for S" / "vote against S". + +Run from the repository root:: + + PYTHONPATH=. python examples/weber_voa_k_ballot_variants_benchmark.py + +Or from ``examples/``:: + + PYTHONPATH=.. python weber_voa_k_ballot_variants_benchmark.py + +Adjust ``--n-voters`` and ``--n-elections`` for smoother estimates (slower). +""" +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +import numpy as np + +_EXAMPLES = Path(__file__).resolve().parent +if str(_EXAMPLES) not in sys.path: + sys.path.insert(0, str(_EXAMPLES)) + +from elsim.elections import random_utilities +from elsim.methods import combined_approval, utility_winner +from elsim.strategies import vote_for_k, vote_for_or_against_k +from weber_1977_expressions import ( + best_vote_for_or_against_k, + eff_best_vote_for_or_against_k, + eff_vote_for_or_against_k, +) + + +def _jittered(utilities: np.ndarray, rng: np.random.Generator) -> np.ndarray: + u = np.asarray(utilities, dtype=np.float64) + return u + rng.random(u.shape) * (np.finfo(np.float64).eps * 64) + + +def ballot_simultaneous_top_plus_bottom_minus( + utilities: np.ndarray, k: int, rng: np.random.Generator | int | None, +) -> np.ndarray: + """+1 on utility-top-k and -1 on utility-bottom-k on the same ballot.""" + rng = np.random.default_rng(rng) + n_voters, _ = utilities.shape + uj = _jittered(utilities, rng) + top_k = np.argpartition(uj, -k, axis=1)[:, -k:] + bot_k = np.argpartition(uj, k - 1, axis=1)[:, :k] + ballots = np.zeros((n_voters, utilities.shape[1]), dtype=np.int8) + rows = np.arange(n_voters)[:, np.newaxis] + ballots[rows, top_k] = 1 + ballots[rows, bot_k] = -1 + return ballots + + +def ballot_coin_plus_top_or_minus_same_top( + utilities: np.ndarray, k: int, rng: np.random.Generator | int | None, +) -> np.ndarray: + """Fair coin: +1 on top-k or -1 on the same top-k (disapprove favorites).""" + rng = np.random.default_rng(rng) + n_voters, _ = utilities.shape + uj = _jittered(utilities, rng) + top_k = np.argpartition(uj, -k, axis=1)[:, -k:] + signs = (1 - 2 * rng.integers(2, size=n_voters, dtype=np.int8))[:, np.newaxis] + ballots = np.zeros((n_voters, utilities.shape[1]), dtype=np.int8) + rows = np.arange(n_voters)[:, np.newaxis] + ballots[rows, top_k] = signs + return ballots + + +def ballot_uniform_subset_coin( + utilities: np.ndarray, k: int, rng: np.random.Generator | int | None, +) -> np.ndarray: + """Uniform random k-subset; fair +1 or -1 on that subset (utilities unused).""" + rng = np.random.default_rng(rng) + n_voters, n_cands = utilities.shape + keys = rng.random((n_voters, n_cands)) + subset = np.argpartition(keys, -k, axis=1)[:, -k:] + signs = (1 - 2 * rng.integers(2, size=n_voters, dtype=np.int8))[:, np.newaxis] + ballots = np.zeros((n_voters, n_cands), dtype=np.int8) + rows = np.arange(n_voters)[:, np.newaxis] + ballots[rows, subset] = signs + return ballots + + +def ballot_vote_for_k_plus_only( + utilities: np.ndarray, k: int, rng: np.random.Generator | int | None, +) -> np.ndarray: + """Baseline: approval vote-for-k (+1 on top-k only), as int8 for combined_approval.""" + a = vote_for_k(utilities, k) + return a.astype(np.int8) + + +def mc_social_utility_efficiency( + ballot_fn, + n_cands: int, + k: int, + *, + n_voters: int, + n_elections: int, + seed: int, +) -> float: + rng = np.random.default_rng(seed) + uw_sum = 0.0 + meth_sum = 0.0 + for _ in range(n_elections): + u = random_utilities(n_voters, n_cands, random_state=rng) + b = ballot_fn(u, k, rng) + winner = combined_approval(b, tiebreaker='random') + row = u.sum(axis=0) + uw_sum += row[utility_winner(u)] + meth_sum += row[winner] + average = n_voters * n_elections / 2 + return (meth_sum - average) / (uw_sum - average) + + +def main() -> None: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument('--n-voters', type=int, default=5000) + p.add_argument('--n-elections', type=int, default=3500) + p.add_argument('--seed', type=int, default=11) + args = p.parse_args() + + strategies = ( + ('repo coin +top OR -bottom', vote_for_or_against_k), + ('simult +top & -bottom', ballot_simultaneous_top_plus_bottom_minus), + ('coin +top OR -same top', ballot_coin_plus_top_or_minus_same_top), + ('uniform S, ignore u', ballot_uniform_subset_coin), + ('vote-for-k (+1 top)', ballot_vote_for_k_plus_only), + ) + + cases = ((3, 1), (4, 1), (4, 2), (5, 2), (6, 3), (10, 4)) + + print('Monte Carlo SUE minus Weber eff_vote_for_or_against_k (percentage points).\n' + f'n_voters={args.n_voters}, n_elections={args.n_elections}\n') + + header = ['m', 'k', 'Weber %'] + [name for name, _ in strategies] + rows = [] + for m, k in cases: + theory = 100 * eff_vote_for_or_against_k(m, k) + row = [m, k, f'{theory:.2f}'] + for name, fn in strategies: + seed = args.seed + m * 31 + k + sum(map(ord, name[:6])) + mc = 100 * mc_social_utility_efficiency( + fn, m, k, + n_voters=args.n_voters, + n_elections=args.n_elections, + seed=seed, + ) + row.append(f'{mc - theory:+.2f}') + rows.append(row) + + widths = [max(len(str(row[i])) for row in [header] + rows) for i in range(len(header))] + fmt = ' '.join('{{:{}}}'.format(w) for w in widths) + + print(fmt.format(*[str(h) for h in header])) + print(' '.join('-' * w for w in widths)) + for row in rows: + print(fmt.format(*[str(x) for x in row])) + + print('\nSame comparison at Weber best k per m (theory vs MC, percent SUE):\n') + for m in (4, 6, 10): + kb = best_vote_for_or_against_k(m) + th = 100 * eff_best_vote_for_or_against_k(m) + sub = [] + for name, fn in strategies[:2]: + seed = args.seed + m * 97 + sum(map(ord, name[:6])) + mc = 100 * mc_social_utility_efficiency( + fn, m, kb, + n_voters=args.n_voters, + n_elections=args.n_elections, + seed=seed, + ) + sub.append(f'{name:26s} MC={mc:6.2f}% delta={mc - th:+6.2f} pp') + print(f'm={m} best k={kb} Weber best={th:.2f}%') + for line in sub: + print(' ', line) + + +if __name__ == '__main__': + main() diff --git a/tests/test_strategies.py b/tests/test_strategies.py index 730c7d09..fb9df475 100644 --- a/tests/test_strategies.py +++ b/tests/test_strategies.py @@ -1,11 +1,12 @@ import numpy as np import pytest -from hypothesis import given +from hypothesis import assume, given, settings from hypothesis.extra.numpy import arrays from hypothesis.strategies import floats, integers, tuples from numpy.testing import assert_array_equal -from elsim.strategies import approval_optimal, honest_normed_scores, vote_for_k +from elsim.strategies import (approval_optimal, honest_normed_scores, + vote_for_k, vote_for_or_against_k) def test_approval_optimal(): @@ -72,6 +73,28 @@ def test_invalid_k(k): vote_for_k(election, k) +def test_vote_for_or_against_k_shape(): + rng = np.random.default_rng(0) + utilities = rng.random((50, 7)) + k = 3 + b = vote_for_or_against_k(utilities, k, rng=rng) + assert b.shape == utilities.shape + assert b.dtype == np.int8 + assert set(np.unique(b)) <= {-1, 0, 1} + assert_array_equal(np.abs(b).sum(axis=1), np.full(50, k)) + assert_array_equal((b == 0).sum(axis=1), np.full(50, 7 - k)) + pos = (b == 1).sum(axis=1) == k + neg = (b == -1).sum(axis=1) == k + assert_array_equal(pos | neg, np.ones(50, dtype=bool)) + + +@pytest.mark.parametrize("k", [0, 4]) +def test_vote_for_or_against_k_invalid_k(k): + utilities = np.random.default_rng(1).random((4, 7)) + with pytest.raises(ValueError): + vote_for_or_against_k(utilities, k) + + def utilities(min_cands=2, max_cands=25, min_voters=1, max_voters=100): """ Strategy to generate utilities arrays @@ -96,6 +119,49 @@ def test_vote_for_k_properties(utilities): assert set(election.flat) <= {0, 1} +@given(utilities=utilities(min_cands=4, max_cands=20)) +def test_vote_for_or_against_k_properties(utilities): + m = utilities.shape[1] + k = m // 2 + assume(k >= 1) + rng = np.random.default_rng(0) + b = vote_for_or_against_k(utilities, k, rng=rng) + assert b.shape == utilities.shape + assert set(b.flat) <= {-1, 0, 1} + n = utilities.shape[0] + assert_array_equal(np.abs(b).sum(axis=1), np.full(n, k)) + pos = (b == 1).sum(axis=1) == k + neg = (b == -1).sum(axis=1) == k + assert_array_equal(pos | neg, np.ones(n, dtype=bool)) + + +@settings(max_examples=20, deadline=None) +@given(utilities=utilities(min_cands=4, max_cands=16, min_voters=2000, + max_voters=2800)) +def test_vote_for_or_against_k_fair_coin_distribution(utilities): + m = utilities.shape[1] + k = m // 2 + assume(k >= 1) + rng = np.random.default_rng(0) + b = vote_for_or_against_k(utilities, k, rng=rng) + n = utilities.shape[0] + assert_array_equal(np.abs(b).sum(axis=1), np.full(n, k)) + pos = (b == 1).sum(axis=1) == k + neg = (b == -1).sum(axis=1) == k + assert_array_equal(pos | neg, np.ones(n, dtype=bool)) + n_for = int(np.count_nonzero(pos)) + assert abs(n_for / n - 0.5) < 0.06 + + +def test_vote_for_or_against_k_fair_coin_large_sample(): + rng = np.random.default_rng(42) + n_voters, m, k = 25_000, 9, 4 + utilities = rng.random((n_voters, m)) + b = vote_for_or_against_k(utilities, k, rng=rng) + n_for = int(np.count_nonzero((b == 1).sum(axis=1) == k)) + assert 12_000 < n_for < 13_000 + + @given(utilities=utilities(), max_score=integers(1, 100)) def test_honest_normed_scores_properties(utilities, max_score): election = honest_normed_scores(utilities, max_score)