mbed-os/source/Common_Protocols/tcp.c

1743 lines
60 KiB
C

/*
* Copyright (c) 2013-2018, Arm Limited and affiliates.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nsconfig.h"
#include "string.h"
#include "ns_types.h"
#include "ns_trace.h"
#include "eventOS_event.h"
#include "Core/include/socket.h"
#include "nsdynmemLIB.h"
#include "ip_fsc.h"
#include "ns_sha256.h"
#include "NWK_INTERFACE/Include/protocol.h"
#include "NWK_INTERFACE/Include/protocol_timer.h"
#include "randLIB.h"
#include "Common_Protocols/ipv6_constants.h"
#include "Common_Protocols/ipv6_flow.h"
#include "Common_Protocols/tcp.h"
#include "NWK_INTERFACE/Include/protocol_stats.h"
#include "common_functions.h"
#include "net_interface.h"
#ifndef NO_TCP
#define TRACE_GROUP "tcp"
/* Standard flags for outgoing packets in each state, with FIN applying
* only if there is no more data in queue (because the state is entered
* while queue not empty).
*/
static const uint8_t state_flag[TCP_STATES] =
{
[TCP_STATE_CLOSED] = TCP_FLAG_RST | TCP_FLAG_ACK,
[TCP_STATE_SYN_SENT] = TCP_FLAG_SYN,
[TCP_STATE_SYN_RECEIVED] = TCP_FLAG_SYN | TCP_FLAG_ACK,
[TCP_STATE_ESTABLISHED] = TCP_FLAG_ACK,
[TCP_STATE_FIN_WAIT_1] = TCP_FLAG_FIN | TCP_FLAG_ACK,
[TCP_STATE_FIN_WAIT_2] = TCP_FLAG_ACK,
[TCP_STATE_CLOSE_WAIT] = TCP_FLAG_ACK,
[TCP_STATE_LAST_ACK] = TCP_FLAG_FIN | TCP_FLAG_ACK,
[TCP_STATE_CLOSING] = TCP_FLAG_FIN | TCP_FLAG_ACK,
[TCP_STATE_TIME_WAIT] = TCP_FLAG_ACK
};
#ifdef TCP_TEST
static uint8_t rx_drops[TCP_STATES];
static uint8_t tx_drops[TCP_STATES];
/*Test - functions*/
int8_t tcp_test_drop_tx(int state, uint8_t count) {
tx_drops[state] = count;
return 0;
}
int8_t tcp_test_drop_rx(int state, uint8_t count) {
rx_drops[state] = count;
return 0;
}
void tcp_test_drop_reset() {
memset(rx_drops, 0, sizeof rx_drops);
memset(tx_drops, 0, sizeof tx_drops);
}
#endif
#ifdef FEA_TRACE_SUPPORT
static const char *trace_tcp_flags(uint16_t flags)
{
static char buf[9];
buf[0] = flags & TCP_FLAG_CWR ? 'C' : '-';
buf[1] = flags & TCP_FLAG_ECE ? 'E' : '-';
buf[2] = flags & TCP_FLAG_URG ? 'U' : '-';
buf[3] = flags & TCP_FLAG_ACK ? 'A' : '-';
buf[4] = flags & TCP_FLAG_PSH ? 'P' : '-';
buf[5] = flags & TCP_FLAG_RST ? 'R' : '-';
buf[6] = flags & TCP_FLAG_SYN ? 'S' : '-';
buf[7] = flags & TCP_FLAG_FIN ? 'F' : '-';
return buf;
}
#endif
static void tcp_timer_handle(uint16_t ticksUpdate);
static void tcp_segment_start(tcp_session_t *tcp_info, bool timeout);
static void tcp_uack_segment(buffer_t *buf, tcp_session_t *tcp_info, uint16_t header_length);
static buffer_t *tcp_ack_buffer(tcp_session_t *tcp_info, uint16_t new_ack);
static void tcp_ack_segment(uint32_t ack, tcp_session_t *tcp_info);
static buffer_t *tcp_build_reset_packet(const sockaddr_t *dst_addr, const sockaddr_t *src_addr, uint32_t seq, uint32_t ack, uint8_t flag);
static void tcp_session_established(protocol_interface_info_entry_t *cur, tcp_session_t *tcp_info);
static void tcp_build(buffer_t *buf, tcp_session_t *tcp_info);
static uint16_t tcp_session_count;
#define TCP_FUNC_ENTRY_TRACE
#ifdef TCP_FUNC_ENTRY_TRACE
#define FUNC_ENTRY_TRACE tr_debug
#else
#define FUNC_ENTRY_TRACE(...)
#endif
/* Returns true if min <= val <= max */
static bool tcp_seq_in_range(uint32_t min, uint32_t val, uint32_t max)
{
if (min <= max) {
return min <= val && val <= max;
} else {
return min <= val || val <= max;
}
}
/* Returns true if val > low */
static bool tcp_seq_gt(uint32_t val, uint32_t low)
{
return common_serial_number_greater_32(val, low);
}
/* Returns true if val < high */
static bool tcp_seq_lt(uint32_t val, uint32_t high)
{
return common_serial_number_greater_32(high, val);
}
/* Returns true if val >= min */
static bool tcp_seq_ge(uint32_t val, uint32_t min)
{
return val == min || common_serial_number_greater_32(val, min);
}
#if 0
/* Returns true if val <= max */
static bool tcp_seq_le(uint32_t val, uint32_t max)
{
return val == max || common_serial_number_greater_32(max, val);
}
#endif
/* Delete TCP session (optionally with error) */
static tcp_session_t *tcp_session_delete_with_error(tcp_session_t *tcp_info, uint8_t error)
{
socket_t *socket = tcp_info->inet_pcb->socket;
FUNC_ENTRY_TRACE("tcp_session_delete_with_error(), s=%d", socket->id);
socket_connection_abandoned(socket, tcp_info->interface->id, error);
return tcp_session_ptr_free(tcp_info);
}
static tcp_session_t *tcp_session_delete(tcp_session_t *tcp_info)
{
return tcp_session_delete_with_error(tcp_info, 0);
}
/* Socket released by application */
void tcp_socket_released(tcp_session_t *cur)
{
FUNC_ENTRY_TRACE("tcp_socket_released() s=%d", cur->inet_pcb->socket->id);
/* Most clear-up already done, including tcp_session_close */
/* Only thing to do is start "orphaned session" timer for FIN-WAIT-2 */
if (cur->state == TCP_STATE_FIN_WAIT_2) {
cur->timer = TCP_FIN_WAIT_2_TIMEOUT;
}
}
/* RFC 6528 Initial Sequence Number generation */
static uint32_t tcp_generate_isn(const uint8_t local_ip[static 16], uint16_t local_port,
const uint8_t remote_ip[static 16], uint16_t remote_port)
{
static struct {
uint8_t key[16];
bool initialised;
} secret;
static uint32_t fudge;
tr_debug("isn [%s]:%u -> [%s]:%u", trace_ipv6(local_ip), local_port, trace_ipv6(remote_ip), remote_port);
if (!secret.initialised) {
/* randLIB should do - we're not after total cryptographic security, and
* it should have been seeded from true randomness. An alternative would
* be the secret given to addr_opaque_iid_key_set, but that would have
* the effect of restarting with the same ISN after reboot. Random secret
* means we start with a random ISN after reboot.
*/
randLIB_get_n_bytes_random(secret.key, sizeof secret.key);
secret.initialised = true;
tr_debug("secret init");
}
ns_sha256_context ctx;
uint32_t F;
ns_sha256_init(&ctx);
ns_sha256_starts(&ctx);
ns_sha256_update(&ctx, local_ip, 16);
ns_sha256_update(&ctx, &local_port, sizeof local_port);
ns_sha256_update(&ctx, remote_ip, 16);
ns_sha256_update(&ctx, &remote_port, sizeof remote_port);
ns_sha256_update(&ctx, secret.key, sizeof secret.key);
ns_sha256_finish_nbits(&ctx, &F, 32);
ns_sha256_free(&ctx);
/* Want to make sure we're always going up in successive connects, so
* add a positive fraction of a tick every time.
*/
fudge += randLIB_get_8bit() + 1;
/* Monotonic time is 100ms ticks, so *25000 converts to 4us ticks */
uint32_t M = 25000 * protocol_core_monotonic_time + fudge;
tr_debug("ISN=%"PRIx32" M=%"PRIx32" F=%"PRIx32" fudge=%"PRIx32, M + F, M, F, fudge);
return M + F;
}
static void tcp_set_metadata(const inet_pcb_t *inet_pcb, buffer_t *buf)
{
/* Fill in metadata from inet_pcb. Everything used here should be cloned into
* new PCBs created by incoming connections from the listening PCB, so that it's
* possible to set options for the first transactions prior to accept().
*/
buf->options.traffic_class = inet_pcb->tclass;
buf->options.flow_label = inet_pcb->flow_label;
buf->options.ll_security_bypass_tx = (inet_pcb->link_layer_security == 0);
buf->options.ipv6_dontfrag = true; // Sanity-checking - if doing MTU work right, fragmentation should never be needed
socket_inet_pcb_set_buffer_hop_limit(inet_pcb, buf, NULL);
}
static void tcp_rethink_mss(tcp_session_t *tcp_info)
{
tcp_info->send_mss_eff = tcp_info->send_mss_peer;
if (tcp_info->interface->nwk_id == IF_6LoWPAN) {
// This conservatism also covers up worries about extension headers,
// RPL tunnelling...
if (tcp_info->send_mss_eff > TCP_LOWPAN_MSS_LIMIT) {
tcp_info->send_mss_eff = TCP_LOWPAN_MSS_LIMIT;
}
}
/* No proper PMTUD at the minute... */
if (tcp_info->send_mss_eff > IPV6_MIN_LINK_MTU - 20 - 40) {
tcp_info->send_mss_eff = IPV6_MIN_LINK_MTU - 20 - 40;
}
/* Don't get totally silly */
if (tcp_info->send_mss_eff < 64) {
tcp_info->send_mss_eff = 64;
}
}
/* Allocate new TCP socket data structure */
tcp_session_t *tcp_session_ptr_allocate(inet_pcb_t *inet_pcb, tcp_session_t *from_time_wait)
{
tcp_session_t *cur = NULL;
FUNC_ENTRY_TRACE("tcp_session_ptr_allocate() s=%d", inet_pcb->socket->id);
cur = ns_dyn_mem_alloc(sizeof(tcp_session_t));
if (cur) {
if (++tcp_session_count == 1) {
protocol_timer_start(PROTOCOL_TIMER_TCP_TIM, tcp_timer_handle, TCP_TIMER_PERIOD);
}
cur->passive_open = false;
cur->interface = 0;
cur->busy = false;
cur->persist = false;
cur->sent_fin = false;
cur->receive_next = 0;
cur->receive_adv = 0;
cur->send_window = 0;
if (from_time_wait) {
cur->send_next = from_time_wait->send_next + UINT32_C(250000) + randLIB_get_16bit();
tcp_session_delete(from_time_wait);
} else {
cur->send_next = tcp_generate_isn(inet_pcb->local_address, inet_pcb->local_port, inet_pcb->remote_address, inet_pcb->remote_port);
}
cur->send_unacknowledged = cur->send_next;
cur->state = TCP_STATE_CLOSED;
cur->timer = 0;
cur->retry = 0;
cur->inet_pcb = inet_pcb;
cur->srtt8 = INT16_MAX; // indicates no sample yet
cur->srttvar4 = INT16_MAX;
cur->rto = TCP_INITIAL_RTO;
cur->receive_mss = IPV6_MIN_LINK_MTU - 20 - 40;
cur->send_mss_eff = 536;
cur->send_mss_peer = 536;
inet_pcb->session = cur;
socket_reference(inet_pcb->socket);
}
return cur;
}
/* Free TCP session pointer */
tcp_session_t *tcp_session_ptr_free(tcp_session_t *tcp_info)
{
socket_t *so = tcp_info->inet_pcb->socket;
FUNC_ENTRY_TRACE("tcp_session_ptr_free() s=%d", so->id);
tcp_info->inet_pcb->session = NULL;
so->flags |= SOCKET_FLAG_CANT_RECV_MORE | SOCKET_FLAG_SHUT_WR;
// This could free both the inet PCB and the socket - do not reference further
socket_dereference(so);
tcp_info->inet_pcb = NULL;
ns_dyn_mem_free(tcp_info);
if (--tcp_session_count == 0) {
protocol_timer_stop(PROTOCOL_TIMER_TCP_TIM);
}
return NULL;
}
tcp_error tcp_session_send(tcp_session_t *tcp_info, buffer_t *buf)
{
FUNC_ENTRY_TRACE("tcp_session_send()");
if (tcp_info->state != TCP_STATE_ESTABLISHED && tcp_info->state != TCP_STATE_CLOSE_WAIT) {
tr_error("tcp error, bad state %d", tcp_info->state);
return TCP_ERROR_WRONG_STATE;
}
/* Wipe out the socket ID - never having socket ID set on down buffers ensures no callbacks are generated by core layers */
buffer_socket_set(buf, NULL);
buf->session_ptr = NULL;
sockbuf_append_and_compress(&tcp_info->inet_pcb->socket->sndq, buf);
if (!tcp_info->busy) {
tcp_info->timer = 0;
tcp_info->retry = 0;
tcp_info->busy = true;
tcp_segment_start(tcp_info, false);
}
return TCP_ERROR_NO_ERROR;
}
/* Builds and returns a RST packet; frees incoming buffer;
* if session is provided, deletes session and signals error to user
*/
static buffer_t *tcp_reset_response(tcp_session_t *tcp_info, uint8_t error, buffer_t *incoming_buf, uint32_t seq, uint32_t ack, uint8_t flags)
{
FUNC_ENTRY_TRACE("tcp_reset_response()");
buffer_t *rst_buffer;
rst_buffer = tcp_build_reset_packet(&incoming_buf->src_sa, &incoming_buf->dst_sa, seq, ack, flags);
if (rst_buffer) {
rst_buffer->interface = incoming_buf->interface;
if (tcp_info) {
tcp_set_metadata(tcp_info->inet_pcb, rst_buffer);
}
}
buffer_free(incoming_buf);
if (tcp_info) {
tcp_session_delete_with_error(tcp_info, error);
}
return (rst_buffer);
}
static buffer_t *tcp_build_reset_packet(const sockaddr_t *dst_addr, const sockaddr_t *src_addr, uint32_t seq, uint32_t ack, uint8_t flags)
{
buffer_t *buf;
uint8_t *ptr;
FUNC_ENTRY_TRACE("tcp_build_reset_packet()");
buf = buffer_get(20);
if (!buf) {
return NULL;
}
buf->dst_sa = *dst_addr;
buf->src_sa = *src_addr;
buf->info = (buffer_info_t)(B_DIR_DOWN | B_FROM_TCP | B_TO_IPV6);
buf->options.type = IPV6_NH_TCP;
ptr = buffer_data_pointer(buf);
ptr = common_write_16_bit(src_addr->port, ptr);
ptr = common_write_16_bit(dst_addr->port, ptr);
ptr = common_write_32_bit(seq, ptr);
ptr = common_write_32_bit(ack, ptr);
*ptr++ = 5 << 4; /* data offset */
*ptr++ = flags;
ptr = common_write_16_bit(0, ptr);
ptr = common_write_32_bit(0, ptr); /* checksum and URG pointer */
buffer_data_end_set(buf, ptr);
/* calculate checksum */
common_write_16_bit(buffer_ipv6_fcf(buf, IPV6_NH_TCP), buffer_data_pointer(buf) + 16);
return (buf);
}
static void tcp_session_send_reset_to_abort_connection(tcp_session_t *tcp_info) {
sockaddr_t dst, src;
memcpy(&dst.address, tcp_info->inet_pcb->remote_address, 16);
memcpy(&src.address, tcp_info->inet_pcb->local_address, 16);
dst.addr_type = ADDR_IPV6;
src.addr_type = ADDR_IPV6;
dst.port = tcp_info->inet_pcb->remote_port;
src.port = tcp_info->inet_pcb->local_port;
buffer_t *buf = tcp_build_reset_packet(&dst, &src, tcp_info->send_next, 0, TCP_FLAG_RST);
if (!buf) {
return;
}
buf->interface = tcp_info->interface;
tcp_set_metadata(tcp_info->inet_pcb, buf);
/* Don't fill in socket ID, or app would get a TX DONE callback for this */
protocol_push(buf);
}
/**
* \brief Function used for starting a TCP connection
* to a remote server.
*
* \param tcp_session pointer to allocated tcp session
* \param dst_address_ptr pointer to connection dstination address
* \param dst_port connection destination port
*
* \return TCP_ERROR_NO_ERROR
* \return TCP_ERROR_BUFFER_ALLOCATION_ERROR
*/
tcp_error tcp_session_open(tcp_session_t *tcp_session)
{
buffer_t *new_buffer;
protocol_interface_info_entry_t *cur_interface;
FUNC_ENTRY_TRACE("tcp_session_open()");
if (tcp_session->state != TCP_STATE_CLOSED && tcp_session->state != TCP_STATE_LISTEN) {
return TCP_ERROR_WRONG_STATE;
}
new_buffer = buffer_get(127);
if (!new_buffer) {
return TCP_ERROR_BUFFER_ALLOCATION_ERROR;
}
new_buffer->dst_sa.port = tcp_session->inet_pcb->remote_port;
new_buffer->dst_sa.addr_type = ADDR_IPV6;
memcpy(new_buffer->dst_sa.address, tcp_session->inet_pcb->remote_address, 16);
cur_interface = socket_interface_determine(tcp_session->inet_pcb->socket, new_buffer);
if (!cur_interface) {
buffer_free(new_buffer);
return TCP_ERROR_BUFFER_ALLOCATION_ERROR;
}
/**
* Save interface id and type
*/
new_buffer->interface = cur_interface;
tcp_session->interface = cur_interface;
new_buffer->interface->id = cur_interface->id;
tcp_session->interface->id = cur_interface->id;
if (tcp_session->interface->nwk_id == IF_6LoWPAN) {
if (tcp_session->receive_mss > TCP_LOWPAN_MSS_LIMIT) {
tcp_session->receive_mss = TCP_LOWPAN_MSS_LIMIT;
}
}
tcp_session->state = TCP_STATE_SYN_SENT;
new_buffer->options.code = TCP_FLAG_SYN;
tcp_build(new_buffer, tcp_session);
return TCP_ERROR_NO_ERROR;
}
tcp_error tcp_session_shutdown_read(tcp_session_t *tcp_session)
{
FUNC_ENTRY_TRACE("tcp_session_shutdown_read");
if (tcp_session->inet_pcb->socket->rcvq.data_bytes != 0) {
// Force a reset shutdown (RFC 1122 4.2.2.13, RFC 2525 2.17)
tr_warn("Shutdown read with pending data");
return tcp_session_abort(tcp_session);
}
return TCP_ERROR_NO_ERROR;
}
/**
* \brief Function used for closing a TCP connection.
*
* If it returns TCP_ERROR_NO_ERROR, TCP session is persisting.
* Any error return means TCP session has been deleted.
*
* \param tcp_session pointer to indicate tcp session
*
* \return TCP_ERROR_NO_ERROR
* \return TCP_ERROR_BUFFER_ALLOCATION_ERROR
*/
tcp_error tcp_session_close(tcp_session_t *tcp_session)
{
FUNC_ENTRY_TRACE("tcp_session_close");
switch (tcp_session->state) {
case TCP_STATE_CLOSED:
case TCP_STATE_LAST_ACK:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
case TCP_STATE_CLOSING:
case TCP_STATE_TIME_WAIT:
return TCP_ERROR_NO_ERROR;
case TCP_STATE_LISTEN:
case TCP_STATE_SYN_SENT:
default:
tcp_session_delete(tcp_session);
return TCP_SESSION_REMOVED;
case TCP_STATE_SYN_RECEIVED:
case TCP_STATE_ESTABLISHED:
case TCP_STATE_CLOSE_WAIT:
// Do the state change immediately, even if data
// pending - this deals with the API changes. Data
// output routines need to set flags appropriately.
if (tcp_session->state == TCP_STATE_CLOSE_WAIT) {
tcp_session->state = TCP_STATE_LAST_ACK; // RFC 1122
tr_debug("sLA");
} else {
tcp_session->state = TCP_STATE_FIN_WAIT_1;
tr_debug("sFW");
}
if (!tcp_session->busy) {
tcp_session->busy = true;
tcp_segment_start(tcp_session, false);
}
return TCP_ERROR_NO_ERROR;
}
}
tcp_error tcp_session_abort(tcp_session_t *tcp_session)
{
FUNC_ENTRY_TRACE("tcp_session_abort");
switch (tcp_session->state) {
case TCP_STATE_CLOSED:
return TCP_ERROR_SOCKET_NOT_FOUND;
case TCP_STATE_SYN_RECEIVED:
case TCP_STATE_ESTABLISHED:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
case TCP_STATE_CLOSE_WAIT:
tcp_session_send_reset_to_abort_connection(tcp_session);
/* fall through */
case TCP_STATE_LISTEN:
case TCP_STATE_SYN_SENT:
tcp_session_delete_with_error(tcp_session, SOCKET_CONNECTION_RESET);
return TCP_ERROR_NO_ERROR;
default:
tcp_session_delete(tcp_session);
return TCP_ERROR_NO_ERROR;
}
}
/**
* \brief Function used to send TCP message
*
* Can be either used to send a data segment or FIN in case there
* is no data. When retransmission is made, starts to sent from the
* start of the unacknowledged data.
*
* \param tcp_info pointer to indicate tcp session
* \param timeout triggered from timeout
*
* \return TCP_ERROR_NO_ERROR
* \return TCP_ERROR_BUFFER_ALLOCATION_ERROR
*/
static void tcp_segment_start(tcp_session_t *tcp_info, bool timeout)
{
socket_t *so = tcp_info->inet_pcb->socket;
buffer_t *tx_data = ns_list_get_first(&so->sndq.bufs);
buffer_t *buf;
FUNC_ENTRY_TRACE("tcp_segment_start() s=%d", so->id);
if (timeout) {
// If not same means that retransmission is made. Could be either
// partial or full depending on what has been acknowledged. If data
// is to be sent, sends always window size
tcp_info->send_next = tcp_info->send_unacknowledged;
tcp_info->sent_fin = false;
}
uint16_t data_length;
if (!tx_data) {
// Should only be called with no data if needing to send a SYN or FIN
if ((state_flag[tcp_info->state] & TCP_FLAG_SYN) ||
((state_flag[tcp_info->state] & TCP_FLAG_FIN) && !tcp_info->sent_fin)) {
data_length = 0;
tcp_info->persist = false;
goto build;
} else {
tr_warn("unexpected tcp_segment_start");
return;
}
}
data_length = buffer_data_length(tx_data);
if (data_length > tcp_info->send_mss_eff) {
data_length = tcp_info->send_mss_eff;
}
/* Can transmit up to SND.UNA+SND.WND-1 */
/* Simplified by SND.UNA==SND.NXT */
if (data_length > tcp_info->send_window) {
data_length = tcp_info->send_window;
}
if (data_length == 0) {
if (!timeout) {
/* Start timer for probe */
tcp_info->persist = true;
tcp_info->timer = tcp_info->rto;
tcp_info->retry = 0;
return;
} else {
// Think - why actually 1? Given we don't resegmentise, and end
// up committed to sending a 1-byte segment in this situation, may
// as well just go for send_mss_eff or 64 for our probe. No rule
// against it.
data_length = 1;
}
} else {
tcp_info->persist = false;
}
build:
/* Create buffer for message */
buf = buffer_get(data_length);
if (buf == NULL) {
goto out;
}
buf->dst_sa.port = tcp_info->inet_pcb->remote_port;
buf->dst_sa.addr_type = ADDR_IPV6;
memcpy(buf->dst_sa.address, tcp_info->inet_pcb->remote_address, 16);
buf->src_sa.port = tcp_info->inet_pcb->local_port;
buf->src_sa.addr_type = ADDR_IPV6;
memcpy(buf->src_sa.address, tcp_info->inet_pcb->local_address, 16);
// Add data to message buffer, but do not remove it from socket buffer until
// it is acked
if (tx_data) {
buffer_data_add(buf, buffer_data_pointer(tx_data), data_length);
}
buf->interface = tcp_info->interface;
tcp_build(buf, tcp_info);
out:
/* Start retransmit timer (also covers buffer failure) */
tcp_info->timer = tcp_info->rto;
}
/**
* \brief Function used for resenting segment after timeout.
*
* \param tcp_info identifies the connection
*/
static tcp_session_t *tcp_resend_segment(tcp_session_t *tcp_info)
{
FUNC_ENTRY_TRACE("tcp_resend_segment()");
if (tcp_info->retry >= (tcp_info->state < TCP_STATE_ESTABLISHED ? TCP_SYN_RETRIES : TCP_MAX_RETRIES)) {
tr_debug("Too many retries");
return tcp_session_delete_with_error(tcp_info, SOCKET_TX_FAIL);
}
if (++tcp_info->retry == TCP_PROBLEM_RETRIES) {
socket_event_push(SOCKET_CONNECTION_PROBLEM, tcp_info->inet_pcb->socket, tcp_info->interface->id, NULL, 0);
ipv6_neighbour_reachability_problem(tcp_info->inet_pcb->remote_address, tcp_info->interface->id);
}
tcp_segment_start(tcp_info, true);
tr_debug("RE-TX, timer %d", tcp_info->timer);
return tcp_info;
}
static uint16_t tcp_compute_window_incr(tcp_session_t *tcp_info)
{
// Careful window adjustment (RFC 1122 et al) - don't move right edge
// leftwards, and don't jump up in small steps
int32_t window = sockbuf_space(&tcp_info->inet_pcb->socket->rcvq);
if (window < 0) {
window = 0;
}
if (window > 0xffff) {
window = 0xffff;
}
if (window < (int32_t) (tcp_info->receive_adv - tcp_info->receive_next)) {
// Don't move left
return 0;
}
// Don't make small increments (avoid Silly Window Syndrome)
uint16_t incr = tcp_info->receive_next + window - tcp_info->receive_adv;
if (incr > 0) {
if (incr < tcp_info->inet_pcb->socket->rcvq.data_byte_limit / 2 &&
incr < tcp_info->send_mss_eff) {
incr = 0;
} else {
tcp_info->receive_adv += incr;
}
}
return incr;
}
void tcp_session_data_received(tcp_session_t *tcp_info)
{
// If this read nudges up the window, send an ack
// Layering all a little wonky here, but this should roughly work.
// Revisit when delayed ack infrastructure in place.
if (tcp_compute_window_incr(tcp_info)) {
buffer_t *buf = tcp_ack_buffer(tcp_info, 0);
tcp_build(buf, tcp_info);
}
}
/**
* \brief Function used for allocating buffer for ack segment.
*
* \param tcp_info identifies the connection
* \param new_ack new bytes to be acknowledged
*
* \return allocated buffer on success
* \return 0 pointer indicates an error
*/
static buffer_t *tcp_ack_buffer(tcp_session_t *tcp_info, uint16_t new_ack)
{
/* make ack segment */
buffer_t *new_buffer;
FUNC_ENTRY_TRACE("tcp_ack_buffer() s=%d, new_ack=%d", tcp_info->inet_pcb->socket->id, new_ack);
new_buffer = buffer_get(0);
if (!new_buffer) {
return NULL;
}
new_buffer->dst_sa.port = tcp_info->inet_pcb->remote_port;
new_buffer->dst_sa.addr_type = ADDR_IPV6;
memcpy(new_buffer->dst_sa.address, tcp_info->inet_pcb->remote_address, 16);
new_buffer->src_sa.port = tcp_info->inet_pcb->local_port;
new_buffer->src_sa.addr_type = ADDR_IPV6;
memcpy(new_buffer->src_sa.address, tcp_info->inet_pcb->local_address, 16);
new_buffer->interface = tcp_info->interface;
new_buffer->info = (buffer_info_t)(B_DIR_DOWN | B_FROM_TCP);
new_buffer->options.code = 0xff;
tcp_info->receive_next += new_ack;
return new_buffer;
}
/**
* \brief Function used for saving segments needing to be acknowledged.
*
* \param buf buffer to be acknowledged
* \param tcp_info identifies the connection
*/
static void tcp_uack_segment(buffer_t *buf, tcp_session_t *tcp_info, uint16_t header_length)
{
FUNC_ENTRY_TRACE("tcp_uack_segment() s=%d", tcp_info->inet_pcb->socket->id);
uint16_t seg_size = buffer_data_length(buf) - header_length;
if (buf->options.code & TCP_FLAG_SYN) {
seg_size++;
}
if (buf->options.code & TCP_FLAG_FIN) {
seg_size++;
if (tcp_info->sent_fin) {
tr_err("sent 2 FINs");
}
tcp_info->sent_fin = true;
}
if (seg_size == 0) {
return;
}
tcp_info->send_next += seg_size;
if (tcp_info->timer == 0) {
tcp_info->timer = tcp_info->rto;
}
tcp_info->busy = true;
}
/**
* \brief Peer has acknowledged data.
*
* \param ack sequence being acknowledged
* \param tcp_info identifies the connection
*
*/
static void tcp_ack_segment(uint32_t ack, tcp_session_t *tcp_info)
{
FUNC_ENTRY_TRACE("tcp_ack_segment() s=%d", (int)tcp_info->inet_pcb->socket->id);
socket_t *so = tcp_info->inet_pcb->socket;
uint32_t acked_bytes;
acked_bytes = ack - tcp_info->send_unacknowledged;
tr_debug("tcp_ack_segment() acked %"PRIu32, acked_bytes);
// Do not allow to remove more than sent data from buffer
if (acked_bytes > so->sndq.data_bytes) {
acked_bytes = so->sndq.data_bytes;
}
sockbuf_drop(&so->sndq, acked_bytes);
tr_debug("tcp_ack_segment() socket remove from buffer %"PRIu32" data to be sent %"PRIu32, acked_bytes, so->sndq.data_bytes);
tcp_info->send_unacknowledged = ack;
ipv6_neighbour_reachability_confirmation(tcp_info->inet_pcb->remote_address, tcp_info->interface->id);
uint32_t remaining_bytes = so->sndq.data_bytes;
if (tcp_info->state >= TCP_STATE_ESTABLISHED) {
// Could consider not sending event if space below low water?
socket_event_push(SOCKET_TX_DONE, so, tcp_info->interface->id, tcp_info, remaining_bytes);
}
// All data that has been send is acked so sent next segment
if (ack == tcp_info->send_next) {
// Made when all data has been acked
// Could do this only periodically, based on a timer, for speed
// Or, as RFC 4861 suggests, once per RTT. Which RTO calculation is,
// so keep this with the RTO recalculation code.
if (tcp_info->retry == 0 && !tcp_info->persist) {
/* Update RTT - RFC 6298, using tricks in Van Jacoben's 1988 paper */
/* Most variables held as scaled 16-bit positive signed integers */
int16_t R = (int16_t) tcp_info->rto - (int16_t)tcp_info->timer;
if (R < 0) {
tr_err("R=%"PRId16, R);
R = 0;
}
if (tcp_info->srtt8 == INT16_MAX) {
tcp_info->srtt8 = R << 3; // srtt := R
tcp_info->srttvar4 = R << (2-1); // rttvar := R / 2
} else {
/* 1/8 gain and scaling on smoothed RTT measurement */
int16_t R_diff = R - (tcp_info->srtt8 >> 3);
tcp_info->srtt8 += R_diff;
if (R_diff < 0) {
R_diff = -R_diff;
}
/* 1/4 gain and scaling on smoothed RTTVAR measurement */
int16_t V_diff = R_diff - (tcp_info->srttvar4 >> 2);
tcp_info->srttvar4 += V_diff;
}
/* RTO = RTT + 4 * RTTVAR - rounds nicely as described by Van Jacobsen */
tcp_info->rto = (tcp_info->srtt8 >> 3) + tcp_info->srttvar4;
if (tcp_info->rto < TCP_MINIMUM_RTO) {
tcp_info->rto = TCP_MINIMUM_RTO;
}
//tr_debug("R=%"PRId16" rto=%"PRIu16" srtt8=%"PRId16" rttvar4=%"PRId16, R, tcp_info->rto, tcp_info->srtt8, tcp_info->srttvar4);
//tr_debug("R=%.2f rto=%.2f srtt=%.2f rttvar=%.2f", R * .150F, tcp_info->rto * .150F, tcp_info->srtt8 * (.15F/8), tcp_info->srttvar4 * (.15F/4));
}
tcp_info->retry = 0;
tcp_info->timer = 0;
if (remaining_bytes ||
((state_flag[tcp_info->state] & TCP_FLAG_FIN) && !tcp_info->sent_fin)) {
tcp_segment_start(tcp_info, false);
} else {
tcp_info->busy = false;
}
}
}
/**
* \brief Function called every time timer ticks.
*/
static void tcp_timer_handle(uint16_t ticksUpdate)
{
arm_event_s event = {
.receiver = socket_event_handler_id_get(),
.sender = 0,
.event_type = ARM_SOCKET_TCP_TIMER_CB,
.event_data = ticksUpdate,
.data_ptr = NULL,
.priority = ARM_LIB_LOW_PRIORITY_EVENT,
};
if (eventOS_event_send(&event) != 0) {
tr_error("tcp_timer_handle(): event send failed");
}
}
/**
* \brief Function used for handling time events.
*/
void tcp_handle_time_event(uint16_t tickUpdate)
{
if (tcp_session_count == 0) {
return;
}
ns_list_foreach_safe(socket_t, socket, &socket_list) {
if (!socket_is_ipv6(socket) || socket->type != SOCKET_TYPE_STREAM) {
continue;
}
inet_pcb_t *inet_pcb = socket->inet_pcb;
tcp_session_t *cur = inet_pcb->session;
if (cur && cur->timer) {
if (cur->timer > tickUpdate) {
cur->timer -= tickUpdate;
} else {
cur->timer = 0;
if (inet_pcb->socket->flags & SOCKET_LISTEN_STATE) {
//shouldn't happen
} else if (cur->state == TCP_STATE_CLOSED) {
//tcp_timers_active &= ~the_bit;
} else if (cur->state == TCP_STATE_TIME_WAIT) {
//tr_debug("Timewait --> Close");
tcp_session_delete(cur);
} else {
if (cur->busy) {
cur->rto *= 2;
if (cur->rto > TCP_MAXIMUM_RTO) {
cur->rto = TCP_MAXIMUM_RTO;
}
cur = tcp_resend_segment(cur);
} else if (cur->state == TCP_STATE_FIN_WAIT_2) {
if (inet_pcb->socket->flags & SOCKET_FLAG_CLOSED) {
tr_debug("sTW No Fin from host yet"); // Timeout shouldn't happen if socket is still open, so logically no event needed
cur->state = TCP_STATE_TIME_WAIT;
cur->timer = TCP_TIME_WAIT_TO_CLOSE;
} else {
tr_err("FW2 timeout with socket open");
}
}
}
}
}
}
// XXX is this right? Surely should check current session list.
// Guess we just get one more tick though, as we won't re-set next time.
protocol_timer_start(PROTOCOL_TIMER_TCP_TIM, tcp_timer_handle, TCP_TIMER_PERIOD);
}
/* find listen socket from socket instances */
static socket_t *tcp_find_listen_socket(const uint8_t addr[static 16], uint16_t port)
{
FUNC_ENTRY_TRACE("tcp_find_listen_socket() %d", port);
ns_list_foreach(socket_t, so, &socket_list) {
if ((so->flags & SOCKET_LISTEN_STATE) &&
socket_is_ipv6(so) &&
so->inet_pcb->local_port == port &&
(addr_ipv6_equal(so->inet_pcb->local_address, ns_in6addr_any) ||
addr_ipv6_equal(so->inet_pcb->local_address, addr))
) {
return so;
}
}
return NULL;
}
/**
* \brief Function used for sending data through TCP.
*
* \param buf buffer to be sent
* \param tcp_info points to TCP session
*
* Input: data = TCP payload
* options.code = TCP flags (or 0 meaning "normal data from socket" or 0xFF meaning "ACK-only segment")
* src_sa and dst_sa need not be set
* Output to IP down:
* data = IP payload
* src_sa and dst_sa addresses and ports filled in from session
* options.type = IPV6_NH_TCP
*
*/
static void tcp_build(buffer_t *buf, tcp_session_t *tcp_info)
{
uint16_t header_length = 20;
uint8_t *ptr;
if (!buf) {
return;
}
if (!tcp_info) {
buffer_free(buf);
return;
}
FUNC_ENTRY_TRACE("tcp_build() s=%d", tcp_info->inet_pcb->socket->id);
if (!tcp_info) {
tr_error("DW No Session: dst: %s, src port: %d, dst port: %d", tr_ipv6(buf->dst_sa.address), buf->src_sa.port,
buf->dst_sa.port);
buffer_free(buf);
return;
}
if (buffer_data_length(buf) != 0 && tcp_info->sent_fin) {
tr_error("TCP:DW send fail by state %02x", tcp_info->state);
buffer_free(buf);
return;
}
//Set Flags
if (buf->options.code) {
/* options.code contains TCP flags when used with TCP */
if (buf->options.code == 0xff) {
/* code == 0xff if ACK only */
buf->options.code = TCP_FLAG_ACK;
}
//tr_debug("options from icmp");
} else {
/* data send request from socket */
buf->options.code = state_flag[tcp_info->state];
uint16_t buf_len = buffer_data_length(buf);
if (tcp_info->inet_pcb->socket->sndq.data_bytes <= buf_len) {
/* push data if we have no more (RFC 1122 4.2.2.2) */
if (buffer_data_length(buf) != 0) {
buf->options.code |= TCP_FLAG_PSH;
}
} else {
/* Close advances state even while outstanding data (more like BSD
* than RFC 793), so make sure FIN flag only actually sent when no
* more data */
buf->options.code &= ~TCP_FLAG_FIN;
}
}
if (buf->options.code != 0xff) {
if (buf->options.code & TCP_FLAG_SYN) {
header_length += 4;
}
}
buf = buffer_headroom(buf, header_length);
if (!buf) {
tr_error("TCP_DW:HeadROOM Fail");
return;
}
ptr = buffer_data_reserve_header(buf, header_length);
ptr = common_write_16_bit(tcp_info->inet_pcb->local_port, ptr);
ptr = common_write_16_bit(tcp_info->inet_pcb->remote_port, ptr);
ptr = common_write_32_bit(tcp_info->send_next, ptr);
ptr = common_write_32_bit(tcp_info->receive_next, ptr);
*ptr++ = header_length << 2; /* data offset */
*ptr++ = buf->options.code;
tcp_compute_window_incr(tcp_info);
ptr = common_write_16_bit(tcp_info->receive_adv - tcp_info->receive_next, ptr);
ptr = common_write_32_bit(0, ptr); /* checksum and URG pointer */
/* Advertise a minimal MSS based on minimum IPv6 MTU; keeping it simple for
* now, rather than actually looking at the real link MTUs. See RFC 6991,
* RFC 1191, etc.
*/
if (buf->options.code & TCP_FLAG_SYN) {
*ptr++ = TCP_OPTION_MSS;
*ptr++ = 4; // option length
ptr = common_write_16_bit(tcp_info->receive_mss, ptr);
}
memcpy(buf->dst_sa.address, tcp_info->inet_pcb->remote_address, 16);
buf->dst_sa.port = tcp_info->inet_pcb->remote_port;
buf->dst_sa.addr_type = ADDR_IPV6;
memcpy(buf->src_sa.address, tcp_info->inet_pcb->local_address, 16);
buf->src_sa.port = tcp_info->inet_pcb->local_port;
buf->src_sa.addr_type = ADDR_IPV6;
/* calculate checksum */
common_write_16_bit(buffer_ipv6_fcf(buf, IPV6_NH_TCP), buffer_data_pointer(buf) + 16);
tcp_uack_segment(buf, tcp_info, header_length);
buf->info = (buffer_info_t)(B_FROM_TCP | B_TO_IPV6 | B_DIR_DOWN);
buf->options.type = IPV6_NH_TCP;
buf->options.code = 0;
tcp_set_metadata(tcp_info->inet_pcb, buf);
#ifdef TCP_TEST
if (tx_drops[tcp_info->state]) {
tx_drops[tcp_info->state]--;
tr_info("TX drop %s", tcp_state_name(tcp_info));
socket_tx_buffer_event_and_free(buf, SOCKET_TX_FAIL);
return;
}
#endif
//tr_debug("DW buf_len=%d", buffer_data_length(buf));
protocol_push(buf);
}
static void tcp_session_established(protocol_interface_info_entry_t *cur, tcp_session_t *tcp_info)
{
tr_debug("UP:sES");
socket_t *so = tcp_info->inet_pcb->socket;
socket_connection_complete(so, cur->id);
tcp_info->state = TCP_STATE_ESTABLISHED;
/* RFC 6298 - if timed out during connection, revert to conservative initial RTO for data */
/* (What's the point - Karn's algorithm would cover this?) */
if (tcp_info->retry > 0 && tcp_info->rto < TCP_INITIAL_CONSERVATIVE_RTO) {
tcp_info->rto = TCP_INITIAL_CONSERVATIVE_RTO;
}
}
/**
* \brief Function that handles data coming from lower level to TCP.
*
* \param buf buffer received
*
* \return allocated pointer if acknowledgment needs to be sent
* \return zero pointer, no further action required
*/
buffer_t *tcp_up(buffer_t *buf)
{
uint8_t *ptr;
protocol_interface_info_entry_t *cur;
uint16_t data_offset;
uint16_t mss_option = 536;
uint16_t window_size;
inet_pcb_t *inet_pcb;
tcp_session_t *tcp_info;
buffer_t *segment_ack = NULL;
uint32_t ack_no;
uint32_t seq_no;
uint16_t seg_len;
uint8_t flags;
cur = buf->interface;
/* Multicast source or link-layer destination already handled by IP */
if (!cur || addr_is_ipv6_multicast(buf->dst_sa.address) || buf->options.ll_security_bypass_rx) {
tr_error("TCP UP:Invalid");
return buffer_free(buf);
}
if (buffer_data_length(buf) < 20) {
tr_error("TCP UP:Too short");
return buffer_free(buf);
}
if (buffer_ipv6_fcf(buf, IPV6_NH_TCP)) {
tr_warn("TCP CKSUM ERROR!!!: src: %s, dst: %s", tr_ipv6(buf->src_sa.address), tr_ipv6(buf->dst_sa.address));
protocol_stats_update(STATS_IP_CKSUM_ERROR, 1);
return buffer_free(buf);
}
// save received port(s), seq_no and ack_no, data_offset, flags, window_size,
ptr = buffer_data_pointer(buf);
buf->src_sa.port = common_read_16_bit(ptr);
ptr += 2;
buf->dst_sa.port = common_read_16_bit(ptr);
ptr += 2;
seq_no = common_read_32_bit(ptr);
ptr += 4;
ack_no = common_read_32_bit(ptr);
ptr += 4;
data_offset = (*ptr++ >> 4) << 2;
if (data_offset > buffer_data_length(buf)) {
tr_error("TCP UP:Offset length fail");
return buffer_free(buf);
}
buffer_data_strip_header(buf, data_offset);
flags = *ptr++;
window_size = common_read_16_bit(ptr);
ptr += 2;
// Skip over Checksum (already checked) and Urgent Pointer (ignored)
ptr += 4;
seg_len = buffer_data_length(buf);
if (flags & TCP_FLAG_SYN) {
seg_len++;
}
if (flags & TCP_FLAG_FIN) {
seg_len++;
}
tr_debug("TCP_UP: dst_p=%d, src_p=%d, flags=%s", buf->dst_sa.port, buf->src_sa.port, trace_tcp_flags(flags));
/* clear flags that will be ignored */
flags &= ~(TCP_FLAG_CWR | TCP_FLAG_ECE | TCP_FLAG_URG);
// Parse options
bool malformed_options = false;
if (data_offset > 20) {
uint16_t options_len = data_offset - 20;
while (options_len > 0) {
uint8_t type = ptr[0];
uint8_t len;
if (type == TCP_OPTION_END) {
break;
} else if (type == TCP_OPTION_NOP) {
len = 1;
} else {
if (options_len < 2) {
goto bad_opts;
}
len = ptr[1];
if (len < 2) {
goto bad_opts;
}
}
if (len > options_len) {
bad_opts:
malformed_options = true;
tr_err("Malformed options");
break;
}
if (type == TCP_OPTION_MSS && len == 4) {
mss_option = common_read_16_bit(ptr + 2);
tr_debug("MSS %"PRIu16, mss_option);
} else if (type != TCP_OPTION_NOP){
tr_info("Unsupported option %d", type);
}
ptr += len;
options_len -= len;
}
}
// find socket from existing connections based on local and remote addresses
socket_t *so = socket_lookup_ipv6(IPV6_NH_TCP, &buf->dst_sa, &buf->src_sa, false);
inet_pcb = so ? so->inet_pcb : NULL;
tcp_info = inet_pcb ? inet_pcb->session : NULL;
// if not found, and it's a SYN, look for a listening socket
if (tcp_info == NULL && !malformed_options) find_listen: { // Can jump here from TIME-WAIT, with tcp_info set
socket_t *listen_socket = tcp_find_listen_socket(buf->dst_sa.address, buf->dst_sa.port);
if (listen_socket) {
tr_debug("UP: Packet for LISTEN socket %d", listen_socket->id);
#ifdef TCP_TEST
if (rx_drops[TCP_STATE_LISTEN]) {
tr_info("RX drop LISTEN");
rx_drops[TCP_STATE_LISTEN]--;
return buffer_free(buf);
}
#endif
if (flags & TCP_FLAG_RST) {
tr_warn("UP, RST to LISTEN");
return buffer_free(buf);
}
if (flags & TCP_FLAG_ACK) {
tr_warn("UP, ACK to LISTEN");
return tcp_reset_response(NULL, 0, buf, ack_no, 0, TCP_FLAG_RST);
}
if (flags & TCP_FLAG_SYN) {
socket_t *new_socket = socket_new_incoming_connection(listen_socket);
if (!new_socket) {
tr_error("Couldn't allocate socket");
return buffer_free(buf);
}
inet_pcb = new_socket->inet_pcb;
/* new socket has an inet PCB cloned from the listening one - lock down addresses+ports */
memcpy(inet_pcb->remote_address, buf->src_sa.address, 16);
memcpy(inet_pcb->local_address, buf->dst_sa.address, 16);
inet_pcb->local_port = buf->dst_sa.port;
inet_pcb->remote_port = buf->src_sa.port;
tr_debug("remote=[%s]:%u", trace_ipv6(inet_pcb->remote_address), inet_pcb->remote_port);
tr_debug("local=[%s]:%u", trace_ipv6(inet_pcb->local_address), inet_pcb->local_port);
/* If someone sets a fixed (non-0) flow label on a listening socket, all incoming
* connections will end up using the same flow label, which is dumb, but user's fault. */
/* Same logic as connect(). */
if (inet_pcb->flow_label == IPV6_FLOW_AUTOGENERATE ||
(inet_pcb->flow_label == IPV6_FLOW_UNSPECIFIED && ipv6_flow_auto_label)) {
inet_pcb->flow_label = ipv6_flow_random();
}
tcp_info = tcp_session_ptr_allocate(inet_pcb, tcp_info);
if (!tcp_info) {
tr_error("Couldn't allocate TCP session");
socket_release(new_socket);
return buffer_free(buf);
}
// save source port and address to tcp session data
tcp_info->interface = cur;
if (tcp_info->interface->nwk_id == IF_6LoWPAN) {
if (tcp_info->receive_mss > TCP_LOWPAN_MSS_LIMIT) {
tcp_info->receive_mss = TCP_LOWPAN_MSS_LIMIT;
}
}
tcp_info->receive_next = seq_no;
tcp_info->receive_adv = seq_no;
tcp_info->send_window = window_size;
tcp_info->send_mss_peer = mss_option;
tcp_rethink_mss(tcp_info);
buffer_free(buf);
// Acknowledge 1 byte (the SYN) - ignore anything further
buf = tcp_ack_buffer(tcp_info, 1);
if (!buf) {
tr_error("Couldn't create SYN-ACK");
tcp_session_delete(tcp_info);
socket_release(new_socket);
return NULL;
}
buf->options.code = TCP_FLAG_SYN | TCP_FLAG_ACK;
tr_debug("UP:sSR");
tcp_info->state = TCP_STATE_SYN_RECEIVED;
tcp_info->passive_open = true;
tcp_build(buf, tcp_info);
return NULL;
} // SYN
// Not RST, ACK or SYN
tr_warn("UP, no flags to LISTEN");
return buffer_free(buf);
} // Listening socket found
// Wipe out tcp_info, for the case where we jumped into this block from TIME-WAIT, so we send
// a reset (for this new connection attempt, with a higher sequence number, for which
// we have no listening socket).
tcp_info = NULL;
} // No existing session
if (tcp_info == NULL || tcp_info->state == TCP_STATE_CLOSED || malformed_options) {
tr_info("No tcp_info for port=%d from %s", buf->dst_sa.port, trace_ipv6(buf->src_sa.address));
if (flags & TCP_FLAG_RST) {
tr_debug("RST to CLOSED");
return buffer_free(buf);
}
if (!(flags & TCP_FLAG_ACK)) {
return tcp_reset_response(tcp_info, SOCKET_CONNECTION_RESET, buf, 0, seq_no + seg_len, TCP_FLAG_RST|TCP_FLAG_ACK);
}
return tcp_reset_response(tcp_info, SOCKET_CONNECTION_RESET, buf, ack_no, 0, TCP_FLAG_RST);
}
#ifdef TCP_TEST
if (rx_drops[tcp_info->state]) {
tr_info("RX drop %s", tcp_state_name(tcp_info));
rx_drops[tcp_info->state]--;
return buffer_free(buf);
}
#endif
buffer_socket_set(buf, so);
tr_debug("UP: sock=%d: state=%s", so->id, tcp_state_name(tcp_info));
if (tcp_info->state == TCP_STATE_SYN_SENT) {
if (flags & TCP_FLAG_ACK) {
if (ack_no != tcp_info->send_next) {
if (flags & TCP_FLAG_RST) {
return buffer_free(buf);
}
return tcp_reset_response(NULL, 0, buf, ack_no, 0, TCP_FLAG_RST);
}
}
if (flags & TCP_FLAG_RST) {
if (flags & TCP_FLAG_ACK) {
tcp_session_delete_with_error(tcp_info, SOCKET_CONNECT_FAIL);
}
return buffer_free(buf);
}
if (flags & TCP_FLAG_SYN) {
tcp_info->receive_next = seq_no;
tcp_info->receive_adv = seq_no;
tcp_info->send_mss_peer = mss_option;
tcp_rethink_mss(tcp_info);
if (flags & TCP_FLAG_ACK) {
tcp_ack_segment(ack_no, tcp_info);
tcp_info->send_wl1 = seq_no; // RFC 1122 4.2.2.20(c)
tcp_info->send_wl2 = ack_no;
tcp_info->send_window = window_size;
tcp_session_established(cur, tcp_info);
goto syn_sent_to_established;
} else {
tr_debug("UP:sSR");
tcp_info->state = TCP_STATE_SYN_RECEIVED;
segment_ack = tcp_ack_buffer(tcp_info, 1);
if (segment_ack) {
buffer_socket_set(segment_ack, buf->socket);
segment_ack->options.code = TCP_FLAG_SYN | TCP_FLAG_ACK;
tcp_build(segment_ack, tcp_info);
}
buffer_free(buf);
return NULL;
}
}
} // state == TCP_STATE_SYN_SENT
// Early escape from TIME-WAIT: processing of SYN segments as per RFC 6191
// (simple non-timestamp case)
if ((flags & (TCP_FLAG_SYN|TCP_FLAG_FIN|TCP_FLAG_ACK|TCP_FLAG_RST)) == TCP_FLAG_SYN && tcp_info->state == TCP_STATE_TIME_WAIT) {
if (tcp_seq_ge(seq_no, tcp_info->receive_next)) {
goto find_listen;
} else {
return buffer_free(buf);
}
}
// Window checks
int32_t receive_window = sockbuf_space(&so->rcvq);
if (receive_window < 0) {
receive_window = 0;
}
if (receive_window < (int32_t) (tcp_info->receive_adv - tcp_info->receive_next)) {
receive_window = tcp_info->receive_adv - tcp_info->receive_next;
}
bool seq_ok = false;
if (receive_window == 0) {
if (seg_len == 0 && seq_no == tcp_info->receive_next) {
seq_ok = true;
}
} else {
if (tcp_seq_in_range(tcp_info->receive_next, seq_no, tcp_info->receive_next + receive_window - 1)) {
seq_ok = true;
} else if (seg_len > 0 && tcp_seq_in_range(tcp_info->receive_next, seq_no + seg_len - 1, tcp_info->receive_next + receive_window - 1)) {
seq_ok = true;
}
}
if (!seq_ok) {
tr_debug("Out-of-window seq_no=%"PRIu32", seg_len=%"PRIu16", window [%"PRIu32" - %"PRIu32")", seq_no, seg_len, tcp_info->receive_next, tcp_info->receive_next + receive_window);
buffer_free(buf);
buf = tcp_ack_buffer(tcp_info, 0);
tcp_build(buf, tcp_info);
return NULL;
}
/* Strip data preceding the window */
if (tcp_seq_lt(seq_no, tcp_info->receive_next)) {
uint16_t excess = tcp_info->receive_next - seq_no;
tr_debug("Strip preceding excess %d", excess);
/* Strip SYN "byte" */
if (flags & TCP_FLAG_SYN) {
flags &= ~TCP_FLAG_SYN;
seg_len--;
excess--;
seq_no++;
}
if (excess) {
buffer_data_strip_header(buf, excess);
seg_len -= excess;
seq_no += excess;
}
}
/* Strip data following the window */
if (tcp_seq_gt(seq_no + buffer_data_length(buf), tcp_info->receive_next + receive_window)) {
uint16_t excess = (seq_no + buffer_data_length(buf)) - (tcp_info->receive_next + receive_window);
tr_debug("Strip trailing excess %d", excess);
/* Strip FIN "byte" */
if (flags & TCP_FLAG_FIN) {
flags &= ~TCP_FLAG_FIN;
seg_len--;
}
buf->buf_end -= excess;
seg_len -= excess;
}
if (flags & TCP_FLAG_RST) {
switch (tcp_info->state) {
case TCP_STATE_SYN_RECEIVED:
if (tcp_info->passive_open) {
tcp_session_delete_with_error(tcp_info, 0);
} else {
tcp_session_delete_with_error(tcp_info, SOCKET_CONNECT_FAIL);
}
break;
case TCP_STATE_ESTABLISHED:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
case TCP_STATE_CLOSE_WAIT:
tcp_session_delete_with_error(tcp_info, SOCKET_CONNECTION_RESET);
break;
case TCP_STATE_CLOSING:
case TCP_STATE_LAST_ACK:
tcp_session_delete(tcp_info);
break;
case TCP_STATE_TIME_WAIT: // RFC 1337
default:
break;
}
return buffer_free(buf);
}
if (flags & TCP_FLAG_SYN) {
tr_debug("unexpected SYN");
if (tcp_info->state == TCP_STATE_SYN_RECEIVED && tcp_info->passive_open) {
// RFC 1122 4.2.2.20(e)
tcp_session_delete_with_error(tcp_info, 0); // really no reset sent?
return buffer_free(buf);
}
if (!(flags & TCP_FLAG_ACK)) {
return tcp_reset_response(tcp_info, SOCKET_CONNECTION_RESET, buf, 0, seq_no + seg_len, TCP_FLAG_RST|TCP_FLAG_ACK);
}
return tcp_reset_response(tcp_info, SOCKET_CONNECTION_RESET, buf, ack_no, 0, TCP_FLAG_RST);
}
if (!(flags & TCP_FLAG_ACK)) {
tr_debug("ACK not set");
return buffer_free(buf);
}
if (tcp_info->state == TCP_STATE_SYN_RECEIVED) {
if (ack_no == tcp_info->send_next) {
tcp_info->send_wl1 = seq_no; // RFC 1122 4.2.2.20(f)
tcp_info->send_wl2 = ack_no;
tcp_info->send_window = window_size;
tcp_session_established(cur, tcp_info);
} else {
return tcp_reset_response(NULL, 0, buf, ack_no, 0, TCP_FLAG_RST);
}
}
if (tcp_seq_lt(ack_no, tcp_info->send_unacknowledged)) {
tr_debug("Already acked ack_no=%"PRIu32", in-flight [%"PRIu32" - %"PRIu32")", ack_no, tcp_info->send_unacknowledged, tcp_info->send_next);
} else if (tcp_seq_gt(ack_no, tcp_info->send_next)) {
tr_debug("Future ack ack_no=%"PRIu32", in-flight [%"PRIu32" - %"PRIu32")", ack_no, tcp_info->send_unacknowledged, tcp_info->send_next);
buffer_free(buf);
// Generating this ack can lead to an ack storm if we're somehow out of sync...
// Seems to be a TCP flaw...
buf = tcp_ack_buffer(tcp_info, 0);
tcp_build(buf, tcp_info);
return NULL;
} else /* SND.UNA <= SEG.ACK <= SND.NXT */ {
/* Update window, if packet not older than last window information */
if (tcp_seq_gt(seq_no, tcp_info->send_wl1) ||
(seq_no == tcp_info->send_wl1 && tcp_seq_ge(ack_no, tcp_info->send_wl2))) {
if (tcp_info->send_window != window_size) {
tr_debug("Send window update %"PRIu16"->%"PRIu16, tcp_info->send_window, window_size);
}
tcp_info->send_wl1 = seq_no;
tcp_info->send_wl2 = ack_no;
tcp_info->send_window = window_size;
// Watch out for shrinking right edge
if (window_size == 0 && ack_no != tcp_info->send_next) {
tcp_info->persist = true;
}
}
if (ack_no != tcp_info->send_unacknowledged) {
tr_debug("New ack_no=%"PRIu32", in-flight [%"PRIu32" - %"PRIu32")", ack_no, tcp_info->send_unacknowledged, tcp_info->send_next);
tcp_ack_segment(ack_no, tcp_info);
} else if (tcp_info->persist) {
tcp_info->retry = 0;
if (window_size != 0) {
tcp_info->persist = false;
// Got a pure window update. (Re)transmit now
tcp_resend_segment(tcp_info);
} else {
// Let the retransmit timer keep on running, which will trigger
// another persist probe at RTO after the last one (effectively
// ignoring this ack for timing purposes), but reset the retry
// count so we don't give up in resend_segment.
}
}
bool fin_acknowledged = tcp_info->sent_fin && tcp_info->send_unacknowledged == tcp_info->send_next;
switch (tcp_info->state) {
case TCP_STATE_FIN_WAIT_1:
if (fin_acknowledged) {
tcp_info->state = TCP_STATE_FIN_WAIT_2;
// Only time out FIN_WAIT_2 if no longer have a socket
if (so->flags & SOCKET_FLAG_CLOSED) {
tcp_info->timer = TCP_FIN_WAIT_2_TIMEOUT;
} else {
tcp_info->timer = 0;
}
tr_debug("UP:sF2");
}
break;
case TCP_STATE_CLOSING:
if (fin_acknowledged) {
tcp_info->state = TCP_STATE_TIME_WAIT;
tr_debug("UP:sTW");
tcp_info->timer = TCP_TIME_WAIT_TO_CLOSE;
}
break;
case TCP_STATE_LAST_ACK:
if (fin_acknowledged) {
socket_event_push(SOCKET_CONNECT_CLOSED, so, cur->id, NULL, 0);
tr_debug("Last ACK remove Session");
tcp_session_delete(tcp_info);
return buffer_free(buf);
}
break;
case TCP_STATE_TIME_WAIT:
tcp_info->timer = TCP_TIME_WAIT_TO_CLOSE;
break;
}
}
syn_sent_to_established:
if (flags & TCP_FLAG_URG) {
/* No handling of urgent */
}
/* This is the point we're actually processing incoming text or FINs. All of the
* above is done as long as in-window, but from here on we need the correct sequence.
* seg_len is length including FIN marker.
*/
if (seg_len > 0 && seq_no != tcp_info->receive_next) {
tr_debug("Out-of-order data");
buffer_t *ack_buf = tcp_ack_buffer(tcp_info, 0);
tcp_build(ack_buf, tcp_info);
return buffer_free(buf);
}
/* Data only processed in some states - other states silently ignore */
if (buffer_data_length(buf) &&
(tcp_info->state == TCP_STATE_ESTABLISHED || tcp_info->state == TCP_STATE_FIN_WAIT_1 || tcp_info->state == TCP_STATE_FIN_WAIT_2)) {
// This handles closed sockets (see RFC 1122 4.2.2.13 and RFC2525 2.16)
// and also extends it to read shutdown (like Windows, apparently, but not Linux or BSD)
if (so->flags & SOCKET_FLAG_CANT_RECV_MORE) {
tr_warn("Excess data we can't receive - resetting");
return tcp_reset_response(tcp_info, SOCKET_CONNECTION_RESET, buf, ack_no, 0, TCP_FLAG_RST);
}
buf->info = (buffer_info_t)(B_FROM_TCP | B_TO_NONE | B_DIR_UP);
//tr_debug("data up");
sockbuf_append_and_compress(&so->rcvq, buf);
buf = NULL;
if ((flags & (TCP_FLAG_FIN|TCP_FLAG_PSH)) || sockbuf_space(&so->rcvq) <= (int32_t) (so->rcvq.data_byte_limit / 2)) {
socket_data_queued_event_push(so);
}
}
// Original buffer no longer required
if (buf) {
buf = buffer_free(buf);
}
if (flags & TCP_FLAG_FIN) {
tr_debug("fin");
switch (tcp_info->state) {
case TCP_STATE_SYN_RECEIVED: // can't happen?
case TCP_STATE_ESTABLISHED:
tcp_info->state = TCP_STATE_CLOSE_WAIT;
tcp_info->timer = 0;
tr_debug("UP:sCW");
socket_cant_recv_more(so, cur->id);
break;
case TCP_STATE_FIN_WAIT_1:
if (tcp_info->send_unacknowledged == tcp_info->send_next) {
tcp_info->state = TCP_STATE_TIME_WAIT;
tcp_info->timer = TCP_TIME_WAIT_TO_CLOSE;
tr_debug("UP:sTW");
} else {
tcp_info->state = TCP_STATE_CLOSING;
tr_debug("UP:sCl");
}
socket_cant_recv_more(so, cur->id);
break;
case TCP_STATE_FIN_WAIT_2:
tcp_info->state = TCP_STATE_TIME_WAIT;
tr_debug("UP:sTW");
tcp_info->timer = TCP_TIME_WAIT_TO_CLOSE;
socket_cant_recv_more(so, cur->id);
socket_event_push(SOCKET_CONNECT_CLOSED, so, cur->id, 0, 0);
break;
case TCP_STATE_TIME_WAIT:
tcp_info->timer = TCP_TIME_WAIT_TO_CLOSE;
break;
default:
tr_debug("UP:fin, other state");
// No action
break;
}
}
// if there is new data to be acknowledged then send ack
if (seg_len > 0) {
segment_ack = tcp_ack_buffer(tcp_info, seg_len);
tcp_build(segment_ack, tcp_info);
}
return buf;
}
const char *tcp_state_name(const tcp_session_t *tcp_info)
{
switch (tcp_info->state) {
case TCP_STATE_LISTEN:
return "LISTEN";
case TCP_STATE_SYN_SENT:
return "SYN-SENT";
case TCP_STATE_SYN_RECEIVED:
return "SYN-RECEIVED";
case TCP_STATE_ESTABLISHED:
return "ESTABLISHED";
case TCP_STATE_CLOSE_WAIT:
return "CLOSE-WAIT";
case TCP_STATE_LAST_ACK:
return "LAST-ACK";
case TCP_STATE_FIN_WAIT_1:
return "FIN-WAIT-1";
case TCP_STATE_FIN_WAIT_2:
return "FIN-WAIT-2";
case TCP_STATE_CLOSING:
return "CLOSING";
case TCP_STATE_TIME_WAIT:
return "TIME-WAIT";
default:
return "?";
}
}
#endif /*NO_TCP*/
/* end of file tcp.c */