Reporting scheduling failures

It is quite possible that the user request for scheduling an uplink is deferred because of backoff or if it was a CONFIRMED message, a retry may take place on a different datarate and different channel. We didn't have a hook for such deferred scheduling, telling the user whether the async rescheduling worked or not. This commit adds that capability and now we can tell the application if a scheduling failure took place after the original schedule request was accepted.
2018-07-12 12:16:00 +03:00 · 2018-07-12 12:16:00 +03:00 · b07c3e791f
parent d65e614a14
commit b07c3e791f
4 changed files with 76 additions and 26 deletions
--- a/features/lorawan/LoRaWANStack.cpp
+++ b/features/lorawan/LoRaWANStack.cpp
@ -332,7 +332,7 @@ int16_t LoRaWANStack::handle_tx(const uint8_t port, const uint8_t *data,
        return status;
    }

-    // All the flags mutually exclusive. In addition to that MSG_MULTICAST_FLAG cannot be
+    // All the flags are mutually exclusive. In addition to that MSG_MULTICAST_FLAG cannot be
    // used for uplink.
    switch (flags & MSG_FLAG_MASK) {
        case MSG_UNCONFIRMED_FLAG:
@ -631,6 +631,13 @@ void LoRaWANStack::handle_ack_expiry_for_class_c(void)
    state_controller(DEVICE_STATE_STATUS_CHECK);
 }

+void LoRaWANStack::handle_scheduling_failure(void)
+{
+    tr_error("Failed to schedule transmission");
+    state_controller(DEVICE_STATE_STATUS_CHECK);
+    state_machine_run_to_completion();
+}
+
 void LoRaWANStack::process_reception(const uint8_t *const payload, uint16_t size,
                                     int16_t rssi, int8_t snr)
 {
@ -948,22 +955,26 @@ void LoRaWANStack::mlme_confirm_handler()

 void LoRaWANStack::mcps_confirm_handler()
 {
-    // success case
-    if (_loramac.get_mcps_confirmation()->status == LORAMAC_EVENT_INFO_STATUS_OK) {
-        _lw_session.uplink_counter = _loramac.get_mcps_confirmation()->ul_frame_counter;
-        send_event_to_application(TX_DONE);
-        return;
-    }
+    switch (_loramac.get_mcps_confirmation()->status) {

-    // failure case
-    if (_loramac.get_mcps_confirmation()->status == LORAMAC_EVENT_INFO_STATUS_TX_TIMEOUT) {
-        tr_error("Fatal Error, Radio failed to transmit");
-        send_event_to_application(TX_TIMEOUT);
-        return;
-    }
+        case LORAMAC_EVENT_INFO_STATUS_OK:
+            _lw_session.uplink_counter = _loramac.get_mcps_confirmation()->ul_frame_counter;
+            send_event_to_application(TX_DONE);
+            break;

-    // if no ack was received, send TX_ERROR
-    send_event_to_application(TX_ERROR);
+        case LORAMAC_EVENT_INFO_STATUS_TX_TIMEOUT:
+            tr_error("Fatal Error, Radio failed to transmit");
+            send_event_to_application(TX_TIMEOUT);
+            break;
+
+        case LORAMAC_EVENT_INFO_STATUS_TX_DR_PAYLOAD_SIZE_ERROR:
+            send_event_to_application(TX_SCHEDULING_ERROR);
+            break;
+
+        default:
+            // if no ack was received after enough retries, send TX_ERROR
+            send_event_to_application(TX_ERROR);
+    }
 }

 void LoRaWANStack::mcps_indication_handler()
@ -1089,11 +1100,13 @@ void LoRaWANStack::process_status_check_state()
 {
    if (_device_current_state == DEVICE_STATE_SENDING ||
            _device_current_state == DEVICE_STATE_AWAITING_ACK) {
-        // this happens after RX2 slot is exhausted
-        // we may or may not have a successful UNCONFIRMED transmission
+        // If there was a successful transmission, this block gets a kick after
+        // RX2 slot is exhausted. We may or may not have a successful UNCONFIRMED transmission
        // here. In CONFIRMED case this block is invoked only
        // when the MAX number of retries are exhausted, i.e., only error
        // case will fall here. Moreover, it will happen for Class A only.
+        // Another possibility is the case when the stack fails to schedule a
+        // deferred transmission and a scheduling failure handler is invoked.
        _ctrl_flags &= ~TX_DONE_FLAG;
        _ctrl_flags &= ~TX_ONGOING_FLAG;
        _loramac.set_tx_ongoing(false);
@ -1215,7 +1228,8 @@ void LoRaWANStack::process_idle_state(lorawan_status_t &op_status)

 void LoRaWANStack::process_uninitialized_state(lorawan_status_t &op_status)
 {
-    op_status = _loramac.initialize(_queue);
+    op_status = _loramac.initialize(_queue, mbed::callback(this,
+                                                           &LoRaWANStack::handle_scheduling_failure));

    if (op_status == LORAWAN_STATUS_OK) {
        _device_current_state = DEVICE_STATE_IDLE;
--- a/features/lorawan/LoRaWANStack.h
+++ b/features/lorawan/LoRaWANStack.h
@ -484,6 +484,7 @@ private:
    void make_rx_metadata_available(void);

    void handle_ack_expiry_for_class_c(void);
+    void handle_scheduling_failure(void);

 private:
    LoRaMac _loramac;
--- a/features/lorawan/lorastack/mac/LoRaMac.cpp
+++ b/features/lorawan/lorastack/mac/LoRaMac.cpp
@ -839,10 +839,12 @@ lorawan_status_t LoRaMac::handle_retransmission()
 void LoRaMac::on_backoff_timer_expiry(void)
 {
    Lock lock(*this);
+
    _lora_time.stop(_params.timers.backoff_timer);
-    lorawan_status_t status = schedule_tx();
-    MBED_ASSERT(status == LORAWAN_STATUS_OK);
-    (void) status;
+
+    if ((schedule_tx() != LORAWAN_STATUS_OK) && nwk_joined()) {
+        _scheduling_failure_handler.call();
+    }
 }

 void LoRaMac::open_rx1_window(void)
@ -927,8 +929,12 @@ void LoRaMac::on_ack_timeout_timer_event(void)

    _mcps_confirmation.nb_retries = _params.ack_timeout_retry_counter;

+
    // Schedule a retry
-    if (handle_retransmission() != LORAWAN_STATUS_OK) {
+    lorawan_status_t status = handle_retransmission();
+
+    if (status == LORAWAN_STATUS_NO_CHANNEL_FOUND ||
+            status == LORAWAN_STATUS_NO_FREE_CHANNEL_FOUND) {
        // In a case when enabled channels are not found, PHY layer
        // resorts to default channels. Next attempt should go forward as the
        // default channels are always available if there is a base station in the
@ -939,10 +945,24 @@ void LoRaMac::on_ack_timeout_timer_event(void)
        _mcps_confirmation.ack_received = false;
        _mcps_confirmation.nb_retries = _params.ack_timeout_retry_counter;

-        // now that is a critical failure
-        lorawan_status_t status = handle_retransmission();
+        // For the next attempt we need to make sure that we do not incur length error
+        // which would mean that the datarate changed during retransmissions and
+        // the original packet doesn't fit into allowed payload buffer anymore.
+        status = handle_retransmission();
+
+        if (status == LORAWAN_STATUS_LENGTH_ERROR) {
+            _scheduling_failure_handler.call();
+            return;
+        }
+
+        // if we did not incur a length error and still the status is not OK,
+        // it is a critical failure
+        status = handle_retransmission();
        MBED_ASSERT(status == LORAWAN_STATUS_OK);
        (void) status;
+    } else if (status != LORAWAN_STATUS_OK) {
+        _scheduling_failure_handler.call();
+        return;
    }

    _params.ack_timeout_retry_counter++;
@ -1064,6 +1084,7 @@ lorawan_status_t LoRaMac::schedule_tx()
    switch (status) {
        case LORAWAN_STATUS_NO_CHANNEL_FOUND:
        case LORAWAN_STATUS_NO_FREE_CHANNEL_FOUND:
+            _mcps_confirmation.status = LORAMAC_EVENT_INFO_STATUS_ERROR;
            return status;
        case LORAWAN_STATUS_DUTYCYCLE_RESTRICTED:
            if (backoff_time != 0) {
@ -1713,12 +1734,14 @@ void LoRaMac::set_tx_continuous_wave(uint8_t channel, int8_t datarate, int8_t tx
    _lora_phy->set_tx_cont_mode(&continuous_wave);
 }

-lorawan_status_t LoRaMac::initialize(EventQueue *queue)
+lorawan_status_t LoRaMac::initialize(EventQueue *queue,
+                                     mbed::Callback<void(void)>scheduling_failure_handler)
 {
    _lora_time.activate_timer_subsystem(queue);
    _lora_phy->initialize(&_lora_time);

    _ev_queue = queue;
+    _scheduling_failure_handler = scheduling_failure_handler;

    _channel_plan.activate_channelplan_subsystem(_lora_phy);

--- a/features/lorawan/lorastack/mac/LoRaMac.h
+++ b/features/lorawan/lorastack/mac/LoRaMac.h
@ -78,11 +78,15 @@ public:
     *
     * @param   queue [in]        A pointer to the application provided EventQueue.
     *
+     * @param   scheduling_failure_handler    A callback to inform upper layer if a deferred
+     *                                        transmission (after backoff or retry) fails to schedule.
+     *
     * @return  `lorawan_status_t` The status of the operation. The possible values are:
     *          \ref LORAWAN_STATUS_OK
     *          \ref LORAWAN_STATUS_PARAMETER_INVALID
     */
-    lorawan_status_t initialize(events::EventQueue *queue);
+    lorawan_status_t initialize(events::EventQueue *queue,
+                                mbed::Callback<void(void)>scheduling_failure_handler);

    /**
     * @brief   Disconnect LoRaMac layer
@ -666,6 +670,14 @@ private:
     */
    mbed::Callback<void(void)> _ack_expiry_handler_for_class_c;

+    /**
+     * Transmission is async, i.e., a call to schedule_tx() may be deferred to
+     * a time after a certain back off. We use this callback to inform the
+     * controller layer that a specific TX transaction failed to schedule after
+     * backoff or retry.
+     */
+    mbed::Callback<void(void)> _scheduling_failure_handler;
+
    /**
     * Structure to hold MCPS indication data.
     */