nucypher/tests/acceptance/blockchain/agents/test_sampling_distribution.py



import random
from collections import Counter
from itertools import permutations

import pytest

from nucypher.blockchain.eth.actors import Operator
from nucypher.blockchain.eth.agents import WeightedSampler, ContractAgency, PREApplicationAgent
from nucypher.blockchain.eth.constants import NULL_ADDRESS
from nucypher.blockchain.eth.signers.software import Web3Signer
from nucypher.config.constants import TEMPORARY_DOMAIN
from nucypher.crypto.powers import TransactingPower


@pytest.mark.nightly
@pytest.mark.usefixtures("agency")
def test_sampling_distribution(testerchain, test_registry, threshold_staking, application_economics):

    # setup
    application_agent = ContractAgency.get_agent(PREApplicationAgent, registry=test_registry)
    stake_provider_accounts = testerchain.stake_providers_accounts
    amount = application_economics.min_authorization
    all_locked_tokens = len(stake_provider_accounts) * amount

    # providers and operators
    for provider_address in stake_provider_accounts:
        operator_address = provider_address

        # initialize threshold stake
        tx = threshold_staking.functions.setRoles(provider_address).transact()
        testerchain.wait_for_receipt(tx)
        tx = threshold_staking.functions.setStakes(provider_address, amount, 0, 0).transact()
        testerchain.wait_for_receipt(tx)

        power = TransactingPower(account=provider_address, signer=Web3Signer(testerchain.client))

        # We assume that the staking provider knows in advance the account of her operator
        application_agent.bond_operator(staking_provider=provider_address,
                                        operator=operator_address,
                                        transacting_power=power)

        operator = Operator(is_me=True,
                            operator_address=operator_address,
                            domain=TEMPORARY_DOMAIN,
                            registry=test_registry,
                            transacting_power=power)
        operator.confirm_address()

    #
    # Test sampling distribution
    #

    ERROR_TOLERANCE = 0.05  # With this tolerance, all sampling ratios should between 5% and 15% (expected is 10%)
    SAMPLES = 1000
    quantity = 3
    counter = Counter()

    sampled, failed = 0, 0
    while sampled < SAMPLES:
        try:
            reservoir = application_agent.get_staking_provider_reservoir()
            addresses = set(reservoir.draw(quantity))
            addresses.discard(NULL_ADDRESS)
        except application_agent.NotEnoughStakingProviders:
            failed += 1
            continue
        else:
            sampled += 1
            counter.update(addresses)

    total_times = sum(counter.values())

    expected = amount / all_locked_tokens
    for stake_provider in stake_provider_accounts:
        times = counter[stake_provider]
        sampled_ratio = times / total_times
        abs_error = abs(expected - sampled_ratio)
        assert abs_error < ERROR_TOLERANCE

    # TODO: Test something wrt to % of failed


def probability_reference_no_replacement(weights, idxs):
    """
    The probability of drawing elements with (distinct) indices ``idxs`` (in given order),
    given ``weights``. No replacement.
    """
    assert len(set(idxs)) == len(idxs)
    all_weights = sum(weights)
    p = 1
    for idx in idxs:
        p *= weights[idx] / all_weights
        all_weights -= weights[idx]
    return p


@pytest.mark.parametrize('sample_size', [1, 2, 3])
def test_weighted_sampler(sample_size):
    weights = [1, 9, 100, 2, 18, 70]
    elements = list(range(len(weights)))

    # Use a fixed seed to avoid flakyness of the test
    rng = random.Random(123)

    counter = Counter()

    weighted_elements = {element: weight for element, weight in zip(elements, weights)}

    samples = 100000
    for i in range(samples):
        sampler = WeightedSampler(weighted_elements)
        sample_set = sampler.sample_no_replacement(rng, sample_size)
        counter.update({tuple(sample_set): 1})

    for idxs in permutations(elements, sample_size):
        test_prob = counter[idxs] / samples
        ref_prob = probability_reference_no_replacement(weights, idxs)

        # A rough estimate to check probabilities.
        # A little too forgiving for samples with smaller probabilities,
        # but can go up to 0.5 on occasion.
        assert abs(test_prob - ref_prob) * samples**0.5 < 1
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00
Adapt test for sampling distribution and relocate to agent tests 2019-10-20 16:16:32 +00:00
adjust tests 2022-02-02 19:34:05 +00:00			`import random`
Adapt test for sampling distribution and relocate to agent tests 2019-10-20 16:16:32 +00:00			`from collections import Counter`
Simplify staker sampling and add unit tests for proper sampling distribution 2020-05-28 23:24:41 +00:00			`from itertools import permutations`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00
Optimize imports for the entire codebase 2020-05-13 03:23:33 +00:00			`import pytest`

adjust tests 2022-02-02 19:34:05 +00:00			`from nucypher.blockchain.eth.actors import Operator`
			`from nucypher.blockchain.eth.agents import WeightedSampler, ContractAgency, PREApplicationAgent`
			`from nucypher.blockchain.eth.constants import NULL_ADDRESS`
Fix sampling distribution test 2021-03-15 13:49:52 +00:00			`from nucypher.blockchain.eth.signers.software import Web3Signer`
adjust tests 2022-02-02 19:34:05 +00:00			`from nucypher.config.constants import TEMPORARY_DOMAIN`
Fix sampling distribution test 2021-03-15 13:49:52 +00:00			`from nucypher.crypto.powers import TransactingPower`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00

skip (and unskip) tests 2022-02-02 22:09:51 +00:00			`@pytest.mark.nightly`
adjust tests 2022-02-02 19:34:05 +00:00			`@pytest.mark.usefixtures("agency")`
			`def test_sampling_distribution(testerchain, test_registry, threshold_staking, application_economics):`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00
adjust tests 2022-02-02 19:34:05 +00:00			`# setup`
			`application_agent = ContractAgency.get_agent(PREApplicationAgent, registry=test_registry)`
Respond to RFCs for PR #2857 2022-02-07 18:54:53 +00:00			`stake_provider_accounts = testerchain.stake_providers_accounts`
adjust tests 2022-02-02 19:34:05 +00:00			`amount = application_economics.min_authorization`
			`all_locked_tokens = len(stake_provider_accounts) * amount`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00
adjust tests 2022-02-02 19:34:05 +00:00			`# providers and operators`
			`for provider_address in stake_provider_accounts:`
			`operator_address = provider_address`
Adapt test for sampling distribution and relocate to agent tests 2019-10-20 16:16:32 +00:00
adjust tests 2022-02-02 19:34:05 +00:00			`# initialize threshold stake`
			`tx = threshold_staking.functions.setRoles(provider_address).transact()`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00			`testerchain.wait_for_receipt(tx)`
adjust tests 2022-02-02 19:34:05 +00:00			`tx = threshold_staking.functions.setStakes(provider_address, amount, 0, 0).transact()`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00			`testerchain.wait_for_receipt(tx)`

adjust tests 2022-02-02 19:34:05 +00:00			`power = TransactingPower(account=provider_address, signer=Web3Signer(testerchain.client))`

			`# We assume that the staking provider knows in advance the account of her operator`
Respond to RFCs in PR #2862 2022-02-16 20:24:58 +00:00			`application_agent.bond_operator(staking_provider=provider_address,`
adjust tests 2022-02-02 19:34:05 +00:00			`operator=operator_address,`
			`transacting_power=power)`
Adapt test for sampling distribution and relocate to agent tests 2019-10-20 16:16:32 +00:00
adjust tests 2022-02-02 19:34:05 +00:00			`operator = Operator(is_me=True,`
			`operator_address=operator_address,`
			`domain=TEMPORARY_DOMAIN,`
			`registry=test_registry,`
			`transacting_power=power)`
			`operator.confirm_address()`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00
			`#`
			`# Test sampling distribution`
			`#`

			`ERROR_TOLERANCE = 0.05 # With this tolerance, all sampling ratios should between 5% and 15% (expected is 10%)`
Statistical test: Raise number of samples to 1000. Expected runtime 5 min 2019-10-02 09:37:17 +00:00			`SAMPLES = 1000`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00			`quantity = 3`
			`counter = Counter()`
Adapt test for sampling distribution and relocate to agent tests 2019-10-20 16:16:32 +00:00
			`sampled, failed = 0, 0`
			`while sampled < SAMPLES:`
			`try:`
adjust tests 2022-02-02 19:34:05 +00:00			`reservoir = application_agent.get_staking_provider_reservoir()`
Expose staker sampling as an iterator (ish) 2020-06-09 19:51:27 +00:00			`addresses = set(reservoir.draw(quantity))`
Move NULL_ADDRESS constant from BlockchainInterface to the eth.constants module 2020-04-05 17:59:11 +00:00			`addresses.discard(NULL_ADDRESS)`
adjust tests 2022-02-02 19:34:05 +00:00			`except application_agent.NotEnoughStakingProviders:`
Adapt test for sampling distribution and relocate to agent tests 2019-10-20 16:16:32 +00:00			`failed += 1`
			`continue`
			`else:`
			`sampled += 1`
			`counter.update(addresses)`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00
			`total_times = sum(counter.values())`

			`expected = amount / all_locked_tokens`
adjust tests 2022-02-02 19:34:05 +00:00			`for stake_provider in stake_provider_accounts:`
			`times = counter[stake_provider]`
Test that sampling distribution is as expected. Closes #1019 2019-09-02 21:22:49 +00:00			`sampled_ratio = times / total_times`
			`abs_error = abs(expected - sampled_ratio)`
			`assert abs_error < ERROR_TOLERANCE`
Adapt test for sampling distribution and relocate to agent tests 2019-10-20 16:16:32 +00:00
			`# TODO: Test something wrt to % of failed`
Simplify staker sampling and add unit tests for proper sampling distribution 2020-05-28 23:24:41 +00:00

			`def probability_reference_no_replacement(weights, idxs):`
			`"""`
			The probability of drawing elements with (distinct) indices ``idxs`` (in given order),
			given ``weights``. No replacement.
			`"""`
			`assert len(set(idxs)) == len(idxs)`
			`all_weights = sum(weights)`
			`p = 1`
			`for idx in idxs:`
			`p *= weights[idx] / all_weights`
			`all_weights -= weights[idx]`
			`return p`


			`@pytest.mark.parametrize('sample_size', [1, 2, 3])`
			`def test_weighted_sampler(sample_size):`
			`weights = [1, 9, 100, 2, 18, 70]`
Implement RFCs, part 2 2020-08-08 01:37:58 +00:00			`elements = list(range(len(weights)))`
Fix RNG seed in the sampling test to avoid low-probability random fails 2020-08-18 00:22:37 +00:00
			`# Use a fixed seed to avoid flakyness of the test`
			`rng = random.Random(123)`

Simplify staker sampling and add unit tests for proper sampling distribution 2020-05-28 23:24:41 +00:00			`counter = Counter()`

Implement RFCs, part 2 2020-08-08 01:37:58 +00:00			`weighted_elements = {element: weight for element, weight in zip(elements, weights)}`
Simplify staker sampling and add unit tests for proper sampling distribution 2020-05-28 23:24:41 +00:00
			`samples = 100000`
			`for i in range(samples):`
Make WeightedSampler behave correctly in case of several consecutive draws 2020-12-18 05:16:03 +00:00			`sampler = WeightedSampler(weighted_elements)`
Simplify staker sampling and add unit tests for proper sampling distribution 2020-05-28 23:24:41 +00:00			`sample_set = sampler.sample_no_replacement(rng, sample_size)`
			`counter.update({tuple(sample_set): 1})`

			`for idxs in permutations(elements, sample_size):`
			`test_prob = counter[idxs] / samples`
			`ref_prob = probability_reference_no_replacement(weights, idxs)`

			`# A rough estimate to check probabilities.`
			`# A little too forgiving for samples with smaller probabilities,`
			`# but can go up to 0.5 on occasion.`
			`assert abs(test_prob - ref_prob) * samples**0.5 < 1`