diff --git a/elsim/strategies/__init__.py b/elsim/strategies/__init__.py index fa1d3692..df5fb75b 100644 --- a/elsim/strategies/__init__.py +++ b/elsim/strategies/__init__.py @@ -5,4 +5,4 @@ ballots that voters cast for a voting method. """ from .strategies import (approval_optimal, honest_normed_scores, - honest_rankings, vote_for_k) + honest_rankings, vote_for_k, vote_for_or_against_k) diff --git a/elsim/strategies/strategies.py b/elsim/strategies/strategies.py index e69312d6..853d36f3 100644 --- a/elsim/strategies/strategies.py +++ b/elsim/strategies/strategies.py @@ -267,3 +267,65 @@ def vote_for_k(utilities, k): # TODO: Not sure if this is the most efficient way approvals[np.arange(len(approvals))[:, np.newaxis], top_k] = 1 return approvals + + +def vote_for_or_against_k(utilities, k, rng=None): + """ + Convert utilities to combined-approval ballots (vote-for-or-against-k). + + Weber (*Comparison of Public Choice Systems*, Cowles Discussion Paper 498) + fixes ``k < m/2`` and considers every cardinality-``k`` subset ``S`` of + candidates. For each ``S`` there are two strategic types: **vote for** + ``S`` (assign ``+1`` to each candidate in ``S``) and **vote against** ``S`` + (assign ``-1`` to each candidate in ``S``). There are ``2 * binom(m, k)`` types, each with probability ``1 / (2 * + binom(m, k))``. [1]_ + + This implementation draws those types **independently** of the utility + matrix: each row uses a uniformly random ``k``-subset ``S`` (via a random + ``argpartition`` key) and an independent fair coin for for/against. The + ``utilities`` array only supplies the ballot shape (and optional RNG + seeding); it does **not** enter the ballot rule. That matches the literal + type-counting definition on the page where ``u_t(c)`` is tabulated, but it + may **not** reproduce Merrill-style Social Utility Efficiency from the + page-19 table when utilities are drawn impartially—see + ``examples/weber_1977_effectiveness_table.py``. + + Parameters + ---------- + utilities : array_like + Shape ``(n_voters, n_cands)``; values are not used for the ballot rule. + k : int + Size of the subset ``S`` (must satisfy ``0 < k <= n_cands // 2``, + so ``k <= m/2`` with the usual ``k = m/2`` even case allowed). + rng : numpy.random.Generator, optional + Random number generator. If omitted, ``numpy.random.default_rng()`` + is used. + + Returns + ------- + election : ndarray + A 2D collection of combined approval ballots (``int8``). + + References + ---------- + .. [1] Weber, Robert J. (1978). "Comparison of Public Choice Systems". + Cowles Foundation Discussion Papers. Cowles Foundation for Research in + Economics. No. 498. https://cowles.yale.edu/publications/cfdp/cfdp-498 + + """ + utilities = np.asarray(utilities) + n_voters, n_cands = utilities.shape + if not 0 < k <= n_cands // 2: + raise ValueError( + f'k of {k} not possible for vote-for-or-against-k with ' + f'{n_cands} candidates (require 0 < k <= n_cands // 2)' + ) + + rng = np.random.default_rng(rng) + keys = rng.random((n_voters, n_cands)) + subset = np.argpartition(keys, -k, axis=1)[:, -k:] + ballots = np.zeros((n_voters, n_cands), dtype=np.int8) + rows = np.arange(n_voters)[:, np.newaxis] + signs = (1 - 2 * rng.integers(2, size=n_voters, dtype=np.int8))[:, np.newaxis] + ballots[rows, subset] = signs + return ballots diff --git a/examples/weber_1977_effectiveness_table.py b/examples/weber_1977_effectiveness_table.py index 3b8b3f92..438e2638 100644 --- a/examples/weber_1977_effectiveness_table.py +++ b/examples/weber_1977_effectiveness_table.py @@ -7,20 +7,28 @@ Cowles Foundation Discussion Papers. Cowles Foundation for Research in Economics. No. 498. https://cowles.yale.edu/publications/cfdp/cfdp-498 -Typical result with n_elections = 100_000: - -| | Standard | Vote-for-half | Borda | -|----:|-----------:|----------------:|--------:| -| 2 | 81.37 | 81.71 | 81.41 | -| 3 | 75.10 | 75.00 | 86.53 | -| 4 | 69.90 | 79.92 | 89.47 | -| 5 | 65.02 | 79.09 | 91.34 | -| 6 | 61.08 | 81.20 | 92.61 | -| 10 | 50.78 | 82.94 | 95.35 | -| 255 | 12.78 | 86.37 | 99.80 | +Typical Monte Carlo Social Utility Efficiency (``n_elections`` = 100_000) +with ``combined_approval``. Best Vote-for-or-against-k uses +``best_vote_for_or_against_k(m)`` and ``vote_for_or_against_k``, which draws +Weber's ``2 * binom(m, k)`` strategic types (uniform random ``k``-subset ``S``, +then **either** ``+1`` on ``S`` **or** ``-1`` on ``S``) **independently** of +utilities. That is what the type-counting description says; under this literal +Merrill-style IC simulation the solid curve can sit far below the dashed +``eff_best_vote_for_or_against_k`` line from the paper's infinite-voter +analysis—if so, that mismatch is informative rather than a bug in the closed +form. + +| | Standard | Vote-for-half | Best Vote-for-or-against-k | Borda | +|----:|-----------:|----------------:|-----------------------------:|--------:| +| 2 | 81.37 | 81.71 | (see simulation) | 81.41 | +| 3 | 75.10 | 75.00 | (see simulation) | 86.53 | +| 4 | 69.90 | 79.92 | (see simulation) | 89.47 | +| 5 | 65.02 | 79.09 | (see simulation) | 91.34 | +| 6 | 61.08 | 81.20 | (see simulation) | 92.61 | +| 10 | 50.78 | 82.94 | (see simulation) | 95.35 | +| 255 | 12.78 | 86.37 | (see simulation) | 99.80 | """ # TODO: Standard is consistently ~1% high, while Borda is very accurate -# TODO: Best Vote-for-or-against-k is not implemented yet import time from collections import Counter @@ -29,9 +37,12 @@ from tabulate import tabulate from elsim.elections import random_utilities -from elsim.methods import approval, borda, fptp, utility_winner -from elsim.strategies import honest_rankings, vote_for_k -from weber_1977_expressions import eff_borda, eff_standard, eff_vote_for_half +from elsim.methods import approval, borda, combined_approval, fptp, utility_winner +from elsim.strategies import (honest_rankings, vote_for_k, + vote_for_or_against_k) +from weber_1977_expressions import (eff_best_vote_for_or_against_k, eff_borda, + eff_standard, eff_vote_for_half, + best_vote_for_or_against_k) n_elections = 2_000 # Roughly 60 seconds on a 2019 6-core i7-9750H n_voters = 1_000 @@ -43,7 +54,8 @@ approval(vote_for_k(utilities, 'half'), tiebreaker)} utility_sums = {key: Counter() for key in (ranked_methods.keys() | - rated_methods.keys() | {'UW'})} + rated_methods.keys() | + {'Best Vote-for-or-against-k', 'UW'})} start_time = time.monotonic() @@ -59,6 +71,12 @@ winner = method(utilities, tiebreaker='random') utility_sums[name][n_cands] += utilities.sum(axis=0)[winner] + k_voa = best_vote_for_or_against_k(n_cands) + winner = combined_approval( + vote_for_or_against_k(utilities, k_voa), tiebreaker='random') + utility_sums['Best Vote-for-or-against-k'][n_cands] += ( + utilities.sum(axis=0)[winner]) + rankings = honest_rankings(utilities) for name, method in ranked_methods.items(): winner = method(rankings, tiebreaker='random') @@ -71,6 +89,8 @@ plt.title('The Effectiveness of Several Voting Systems') for name, method in (('Standard', eff_standard), ('Vote-for-half', eff_vote_for_half), + ('Best Vote-for-or-against-k', + eff_best_vote_for_or_against_k), ('Borda', eff_borda)): plt.plot(n_cands_list, method(np.array(n_cands_list))*100, ':', lw=0.8) @@ -82,7 +102,8 @@ # Calculate Social Utility Efficiency from summed utilities x_uw, y_uw = zip(*sorted(utility_sums['UW'].items())) average_utility = n_voters * n_elections / 2 -for method in ('Standard', 'Vote-for-half', 'Borda'): +for method in ('Standard', 'Vote-for-half', 'Best Vote-for-or-against-k', + 'Borda'): x, y = zip(*sorted(utility_sums[method].items())) SUE = (np.array(y) - average_utility)/(np.array(y_uw) - average_utility) plt.plot(x, SUE*100, '-', label=method) diff --git a/examples/weber_1977_expressions.py b/examples/weber_1977_expressions.py index b8b79fbc..cd5c4a5f 100644 --- a/examples/weber_1977_expressions.py +++ b/examples/weber_1977_expressions.py @@ -21,7 +21,7 @@ | 10 | 49.79% | 82.99% | 88.09% | 95.35% | | ∞ | 0.00% | 86.60% | 92.25% | 100.00% | """ -from numpy import round, sqrt +from numpy import sqrt from numpy.testing import assert_, assert_almost_equal @@ -120,11 +120,18 @@ def best_vote_for_or_against_k(m): Returns ------- - k : float - Number of candidates for every voter to approve or disapprove. + k : int + Number of candidates in each voter's for- or against-set (Weber allows + ``k = m/2`` when ``m`` is even; otherwise ``1 <= k <= m // 2``). """ - alpha = (9 - sqrt(21))/12 - return round(alpha * m) + best_k = 1 + best_eff = eff_vote_for_or_against_k(m, 1) + for k in range(2, m // 2 + 1): + e = eff_vote_for_or_against_k(m, k) + if e > best_eff: + best_eff = e + best_k = k + return best_k def eff_best_vote_for_or_against_k(m): @@ -203,22 +210,35 @@ def test_cases(): assert_almost_equal(eff_best_vote_for_or_against_k(4), 80.83/100, 4) assert_almost_equal(eff_borda(6), 92.58/100, decimal=4) + # Discrete optimum can differ from round(alpha * m); e.g. m == 91. + assert best_vote_for_or_against_k(91) == 34 + if __name__ == '__main__': test_cases() - from numpy import array + from numpy import array, concatenate, sqrt from tabulate import tabulate + m_finite = (2, 3, 4, 5, 6, 10) + m_arr = array(m_finite, dtype=float) table = {} - m_cands_list = (2, 3, 4, 5, 6, 10, 1e30) - for m in m_cands_list: - for name, method in (('Standard', eff_standard), - ('Vote-for-half', eff_vote_for_half), - ('Best Vote-for-or-against-k', - eff_best_vote_for_or_against_k), - ('Borda', eff_borda)): - table.update({name: method(array(m_cands_list))}) - - print(tabulate(table, 'keys', showindex=m_cands_list[:-1] + ('∞',), + for name, method in (('Standard', eff_standard), + ('Vote-for-half', eff_vote_for_half), + ('Borda', eff_borda)): + table[name] = method(m_arr) + table['Best Vote-for-or-against-k'] = array( + [eff_best_vote_for_or_against_k(m) for m in m_finite]) + + lim_best = float((42 * sqrt(21) - 138)**0.5 / 8) + inf_values = { + 'Standard': 0.0, + 'Vote-for-half': float(sqrt(3) / 2), + 'Borda': 1.0, + 'Best Vote-for-or-against-k': lim_best, + } + for name in table: + table[name] = concatenate([table[name], [inf_values[name]]]) + + print(tabulate(table, 'keys', showindex=m_finite + ('∞',), tablefmt="pipe", floatfmt='.2%')) diff --git a/tests/test_strategies.py b/tests/test_strategies.py index 730c7d09..7b18f56a 100644 --- a/tests/test_strategies.py +++ b/tests/test_strategies.py @@ -5,7 +5,8 @@ from hypothesis.strategies import floats, integers, tuples from numpy.testing import assert_array_equal -from elsim.strategies import approval_optimal, honest_normed_scores, vote_for_k +from elsim.strategies import (approval_optimal, honest_normed_scores, + vote_for_k, vote_for_or_against_k) def test_approval_optimal(): @@ -72,6 +73,28 @@ def test_invalid_k(k): vote_for_k(election, k) +def test_vote_for_or_against_k_shape(): + rng = np.random.default_rng(0) + utilities = rng.random((50, 7)) + k = 3 + b = vote_for_or_against_k(utilities, k, rng=rng) + assert b.shape == utilities.shape + assert b.dtype == np.int8 + assert set(np.unique(b)) <= {-1, 0, 1} + assert_array_equal(np.abs(b).sum(axis=1), np.full(50, k)) + assert_array_equal((b == 0).sum(axis=1), np.full(50, 7 - k)) + pos = (b == 1).sum(axis=1) == k + neg = (b == -1).sum(axis=1) == k + assert_array_equal(pos | neg, np.ones(50, dtype=bool)) + + +@pytest.mark.parametrize("k", [0, 4]) +def test_vote_for_or_against_k_invalid_k(k): + utilities = np.random.default_rng(1).random((4, 7)) + with pytest.raises(ValueError): + vote_for_or_against_k(utilities, k) + + def utilities(min_cands=2, max_cands=25, min_voters=1, max_voters=100): """ Strategy to generate utilities arrays