Merge pull request #3524 from KPrasch/dkg-errors

Do not retry unrecoverable ferveo errors
v7.4.x
KPrasch 2024-07-30 21:54:41 +07:00 committed by GitHub
commit 0c33c154d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 30 additions and 8 deletions

View File

@ -0,0 +1 @@
Do not continuously retry ritual actions when unrecoverable ferveo error occurs during ritual ceremony.

View File

@ -633,11 +633,11 @@ class Operator(BaseActor):
ritual_id=ritual.id,
)
except Exception as e:
# TODO: Handle this better #3096
stack_trace = traceback.format_stack()
self.log.critical(
f"Failed to generate a transcript for ritual #{ritual.id}: {str(e)}"
f"Failed to generate a transcript for ritual #{ritual.id}: {str(e)}\n{stack_trace}"
)
raise e
return
# publish the transcript and store the receipt
self.dkg_storage.store_validators(ritual_id=ritual.id, validators=validators)
@ -724,10 +724,11 @@ class Operator(BaseActor):
transcripts=messages,
)
except Exception as e:
self.log.debug(
f"Failed to aggregate transcripts for ritual #{ritual.id}: {str(e)}"
stack_trace = traceback.format_stack()
self.log.critical(
f"Failed to aggregate transcripts for ritual #{ritual.id}: {str(e)}\n{stack_trace}"
)
raise e
return
# publish the transcript with network-wide jitter to avoid tx congestion
time.sleep(random.randint(0, self.AGGREGATION_SUBMISSION_MAX_DELAY))

View File

@ -1,3 +1,5 @@
from unittest.mock import patch
import pytest
from atxm.exceptions import Fault, InsufficientFunds
@ -139,8 +141,18 @@ def test_perform_round_1(
lambda *args, **kwargs: Coordinator.RitualStatus.DKG_AWAITING_TRANSCRIPTS
)
phase_id = PhaseId(ritual_id=0, phase=PHASE1)
# cryptographic issue does not raise exception
with patch(
"nucypher.crypto.ferveo.dkg.generate_transcript",
side_effect=Exception("transcript cryptography failed"),
):
async_tx = ursula.perform_round_1(
ritual_id=0, authority=random_address, participants=cohort, timestamp=0
)
# exception not raised, but None returned
assert async_tx is None
phase_id = PhaseId(ritual_id=0, phase=PHASE1)
assert (
ursula.dkg_storage.get_ritual_phase_async_tx(phase_id=phase_id) is None
), "no tx data as yet"
@ -244,8 +256,16 @@ def test_perform_round_2(
lambda *args, **kwargs: Coordinator.RitualStatus.DKG_AWAITING_AGGREGATIONS
)
phase_2_id = PhaseId(ritual_id=0, phase=PHASE2)
# cryptographic issue does not raise exception
with patch(
"nucypher.crypto.ferveo.dkg.verify_aggregate",
side_effect=Exception("aggregate cryptography failed"),
):
async_tx = ursula.perform_round_2(ritual_id=0, timestamp=0)
# exception not raised, but None returned
assert async_tx is None
phase_2_id = PhaseId(ritual_id=0, phase=PHASE2)
assert (
ursula.dkg_storage.get_ritual_phase_async_tx(phase_id=phase_2_id) is None
), "no tx data as yet"