
Am Mo., 25. Juni 2018 um 00:42 Uhr schrieb DH@synoia.com:
From: Duncan Hare DH@Synoia.com
Currently file transfers are done using tftp or NFS both over udp. This requires a request to be sent from client (u-boot) to the boot server.
The current standard is TCP with selective acknowledgment.
In our testing we have reduce kernel transmission time to around 0.4 seconds for a 4Mbyte kernel, with a 100 Mbps downlink.
Series-Changes 11
- Add TCP with SACK
- Clean formatting
- Remove buffer search and print routines
Series-Changes 12
- Fix License statement
Signed-off-by: Duncan Hare DH@Synoia.com Signed-off-by: Duncan Hare DuncanCHare@yahoo.com
include/net.h | 11 +- include/net/tcp.h | 227 ++++++++++++++++++ net/Kconfig | 6 + net/Makefile | 1 + net/net.c | 33 +++ net/tcp.c | 700 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 977 insertions(+), 1 deletion(-) create mode 100644 include/net/tcp.h create mode 100644 net/tcp.c
diff --git a/include/net.h b/include/net.h index a54160fff6..ba96267eb8 100644 --- a/include/net.h +++ b/include/net.h @@ -26,6 +26,9 @@
*/
+#if defined(CONFIG_TCP) /* Protected UDP uses less bufferes than TCP */ +#define CONFIG_SYS_RX_ETH_BUFFER 12 +#endif #ifdef CONFIG_SYS_RX_ETH_BUFFER # define PKTBUFSRX CONFIG_SYS_RX_ETH_BUFFER #else @@ -350,6 +353,7 @@ struct vlan_ethernet_hdr { #define PROT_PPP_SES 0x8864 /* PPPoE session messages */
#define IPPROTO_ICMP 1 /* Internet Control Message Protocol */ +#define IPPROTO_TCP 6 /* Transmission Control Protocol */ #define IPPROTO_UDP 17 /* User Datagram Protocol */
/* @@ -659,7 +663,7 @@ static inline void net_send_packet(uchar *pkt, int len) }
/*
- Transmit "net_tx_packet" as UDP packet, performing ARP request if needed
- Transmit "net_tx_packet" as UDP or TCPpacket, send ARP request if needed
missing space - should be * Transmit "net_tx_packet" as UDP or TCP packet, send ARP request if needed
- (ether will be populated)
- @param ether Raw packet buffer
@@ -667,10 +671,15 @@ static inline void net_send_packet(uchar *pkt, int len)
- @param dport Destination UDP port
- @param sport Source UDP port
- @param payload_len Length of data after the UDP header
- @param action TCP action to be performed
- @param tcp_seq_num TCP sequence number of this transmission
*/
- @param tcp_ack_num TCP stream acknolegement number
int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len, int proto, u8 action, u32 tcp_seq_num, u32 tcp_ack_num); +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action,
u32 tcp_seq_num, u32 tcp_ack_num);
int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len);
diff --git a/include/net/tcp.h b/include/net/tcp.h new file mode 100644 index 0000000000..d0e90e07dd --- /dev/null +++ b/include/net/tcp.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*
- TCP Support with SACK for file transfer.
- Copyright 2017 Duncan Hare, All rights reserved.
- */
+#define TCP_ACTIVITY 127 /* Number of packets received */
/* before console progress mark */
+struct ip_tcp_hdr {
u8 ip_hl_v; /* header length and version */
u8 ip_tos; /* type of service */
u16 ip_len; /* total length */
u16 ip_id; /* identification */
u16 ip_off; /* fragment offset field */
u8 ip_ttl; /* time to live */
u8 ip_p; /* protocol */
u16 ip_sum; /* checksum */
struct in_addr ip_src; /* Source IP address */
struct in_addr ip_dst; /* Destination IP address */
u16 tcp_src; /* TCP source port */
u16 tcp_dst; /* TCP destination port */
u32 tcp_seq; /* TCP sequence number */
u32 tcp_ack; /* TCP Acknowledgment number */
u8 tcp_hlen; /* 4 bits TCP header Length/4 */
/* 4 bits Reserved */
/* 2 more bits reserved */
u8 tcp_flags; /* see defines */
u16 tcp_win; /* TCP windows size */
u16 tcp_xsum; /* Checksum */
u16 tcp_ugr; /* Pointer to urgent data */
+} __packed;
+#define IP_TCP_HDR_SIZE (sizeof(struct ip_tcp_hdr)) +#define TCP_HDR_SIZE (IP_TCP_HDR_SIZE - IP_HDR_SIZE)
+#define TCP_DATA 0x00 /* Data Packet - internal use only */ +#define TCP_FIN 0x01 /* Finish flag */ +#define TCP_SYN 0x02 /* Synch (start) flag */ +#define TCP_RST 0x04 /* reset flag */ +#define TCP_PUSH 0x08 /* Push - Notify app */ +#define TCP_ACK 0x10 /* Acknowledgment of data received */ +#define TCP_URG 0x20 /* Urgent */ +#define TCP_ECE 0x40 /* Congestion control */ +#define TCP_CWR 0x80 /* Congestion Control */
+/*
- TCP header options, Seq, MSS, and SACK
- */
+#define TCP_SACK 32 /* Number of packets analyzed */
/* on leading edge of stream */
+#define TCP_O_END 0x00 /* End of option list */ +#define TCP_1_NOP 0x01 /* Single padding NOP */ +#define TCP_O_NOP 0x01010101 /* NOPs pad to 32 bit boundary */ +#define TCP_O_MSS 0x02 /* MSS Size option */ +#define TCP_O_SCL 0x03 /* Window Scale option */ +#define TCP_P_SACK 0x04 /* SACK permitted */ +#define TCP_V_SACK 0x05 /* SACK values */ +#define TCP_O_TS 0x08 /* Timestamp option */ +#define TCP_OPT_LEN_2 0x02 +#define TCP_OPT_LEN_3 0x03 +#define TCP_OPT_LEN_4 0x04 +#define TCP_OPT_LEN_6 0x06 +#define TCP_OPT_LEN_8 0x08 +#define TCP_OPT_LEN_A 0x0a /* Timestamp Length */
+/*
- Please review the warning in net.c about these two parameters.
- They are part of a promise of RX buffer size to the sending TCP
- */
+#define TCP_MSS 1460 /* Max segment size */ +#define TCP_SCALE 0x01 /* Scale */
+struct tcp_mss { /* TCP Max Segment size */
u8 kind; /* Field ID */
u8 len; /* Field length */
u16 mss; /* Segment size value */
+} __packed;
+struct tcp_scale { /* TCP Windows Scale */
u8 kind; /* Field ID */
u8 len; /* Filed length */
u8 scale; /* windows shift value used for */
/* networks with many hops */
/* Typically 4 or more hops */
extra empty line needed?
+} __packed;
+struct tcp_sack_p { /* SACK permitted */
u8 kind; /* Field Id */
u8 len; /* Field length */
+} __packed;
/* Terse definitions used */
/* long definitions make the */
/* indented code overflow line */
/* length linits */
wrong/funny indentation?
+struct sack_edges {
u32 l; /* Left edge of stream */
u32 r; /* right edge of stream */
+} __packed;
+#define TCP_SACK_SIZE (sizeof(struct sack_edges))
+/*
- A TCP stream has holes when packets are missing or disordered.
- A hill is the inverese of a hole, and is data received.
Should be A hill is the inverse of a ..
- TCP receiveds hills (a sequence of data), and inferrs Holes
Should be TCP received hills ..
- from the "hills" or packets received.
- */
+#define TCP_SACK_HILLS 4
+struct tcp_sack_v {
u8 kind; /* Field ID */
u8 len; /* Field Length */
struct sack_edges hill[TCP_SACK_HILLS]; /* L & R window edges */
+} __packed;
+struct tcp_t_opt { /* TCP time stamps option */
u8 kind; /* Field id */
u8 len; /* Field length */
u32 t_snd; /* Sender timestamp */
u32 t_rcv; /* Receiver timestamp */
+} __packed;
+#define TCP_TSOPT_SIZE (sizeof(struct tcp_t_opt))
+/*
- ip tcp structure with options
- */
+struct ip_tcp_hdr_o {
struct ip_tcp_hdr hdr;
struct tcp_mss mss;
struct tcp_scale scale;
struct tcp_sack_p sack_p;
struct tcp_t_opt t_opt;
u8 end;
+} __packed;
+#define IP_TCP_O_SIZE (sizeof(struct ip_tcp_hdr_o))
+struct ip_tcp_hdr_s {
struct ip_tcp_hdr hdr;
struct tcp_t_opt t_opt;
struct tcp_sack_v sack_v;
u8 end;
+} __packed;
+#define IP_TCP_SACK_SIZE (sizeof(struct ip_tcp_hdr_s))
+/*
- TCP pseudo header definitions
- */
+#define PSEUDO_PAD_SIZE 8
+struct pseudo_hdr {
u8 padding[PSEUDO_PAD_SIZE]; /* pseudo hdr size = ip_tcp hdr size */
struct in_addr p_src;
struct in_addr p_dst;
u8 rsvd;
u8 p;
u16 len;
+} __packed;
+#define PSEUDO_HDR_SIZE (sizeof(struct pseudo_hdr)) - PSEUDO_PAD_SIZE
+/*
- union for building TCP/IP packet. Build Pseudo header in packed buffer
- first, calculate TCP checksum, then build IP header in packed buffer.
- */
+union tcp_build_pkt {
struct pseudo_hdr ph;
struct ip_tcp_hdr_o ip;
struct ip_tcp_hdr_s sack;
uchar raw[1600];
+} __packed;
+/*
- TCP State machine states for connection
- */
+enum TCP_STATE {
TCP_CLOSED, /* Need to send SYN to connect */
TCP_SYN_SENT, /* Trying to connect, waiting for SYN ACK */
TCP_ESTABLISHED, /* both server & client have a connection */
TCP_CLOSE_WAIT, /* Rec FIN, passed to app for FIN, ACK rsp*/
TCP_CLOSING, /* Rec FIN, sent FIN, ACK waiting for ACK */
TCP_FIN_WAIT_1, /* Sent FIN waiting for response */
TCP_FIN_WAIT_2 /* Rec ACK from FIN sent, waiting for FIN */
+};
+enum TCP_STATE tcp_get_tcp_state(void); +void tcp_set_tcp_state(enum TCP_STATE new_state); +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len,
u8 action, u32 tcp_seq_num, u32 tcp_ack_num);
+/*
- An incoming packet handler.
- @param pkt pointer to the application packet
- @param dport destination UDP port
- @param sip source IP address
- @param sport source UDP port
- @param len packet length
- */
+typedef void rxhand_tcp(uchar *pkt, unsigned int dport,
struct in_addr sip, unsigned int sport,
unsigned int len);
+void tcp_set_tcp_handler(rxhand_tcp *f);
+void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int len);
+/*
- An incoming TCP packet handler for the TCP protocol.
- There is also a dynamic function pointer for TCP based commands to
- receive incoming traffic after the TCP protocol code has done its work.
- */
+void rxhand_action(u8 tcp_action, int payload_len, u32 tcp_seq_num,
u32 tcp_ack_num, unsigned int pkt_len,
union tcp_build_pkt *b);
diff --git a/net/Kconfig b/net/Kconfig index f2363e5256..77ab683eb8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -22,4 +22,10 @@ config NETCONSOLE Support the 'nc' input/output device for networked console. See README.NetConsole for details.
+config TCP
bool "TCP stack"
help
TCP protocol support with SACK for wget. Selecting this will provide
the fastest file transfer possible.
endif # if NET diff --git a/net/Makefile b/net/Makefile index 07466879f5..237023407f 100644 --- a/net/Makefile +++ b/net/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_CMD_RARP) += rarp.o obj-$(CONFIG_CMD_SNTP) += sntp.o obj-$(CONFIG_CMD_TFTPBOOT) += tftp.o obj-$(CONFIG_UDP_FUNCTION_FASTBOOT) += fastboot.o +obj-$(CONFIG_TCP) += tcp.o
# Disable this warning as it is triggered by: # sprintf(buf, index ? "foo%d" : "foo", index) diff --git a/net/net.c b/net/net.c index f831c34599..8ac1ff050f 100644 --- a/net/net.c +++ b/net/net.c @@ -108,6 +108,7 @@ #if defined(CONFIG_CMD_SNTP) #include "sntp.h" #endif +#include <net/tcp.h>
/** BOOTP EXTENTIONS **/
@@ -380,6 +381,9 @@ void net_init(void)
/* Only need to setup buffer pointers once. */ first_call = 0;
+#if defined(CONFIG_TCP)
tcp_set_tcp_state(TCP_CLOSED);
+#endif }
net_init_loop();
@@ -790,6 +794,16 @@ return net_send_ip_packet(ether, dest, dport, sport, payload_len, IPPROTO_UDP, 0, 0, 0); }
+#if defined(CONFIG_TCP) +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action,
u32 tcp_seq_num, u32 tcp_ack_num)
+{
return net_send_ip_packet(net_server_ethaddr, net_server_ip, dport,
sport, payload_len, IPPROTO_TCP, action,
tcp_seq_num, tcp_ack_num);
+} +#endif
int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len, int proto, u8 action, u32 tcp_seq_num, u32 tcp_ack_num) @@ -821,6 +835,15 @@ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, dport, sport, payload_len); pkt_hdr_size = eth_hdr_size + IP_UDP_HDR_SIZE; break; +#if defined(CONFIG_TCP)
case IPPROTO_TCP:
pkt_hdr_size = eth_hdr_size +
tcp_set_tcp_header(pkt + eth_hdr_size, dport, sport,
payload_len, action, tcp_seq_num,
tcp_ack_num);
break;
+#endif
default: return -EINVAL; }
@@ -1229,6 +1252,16 @@ void net_process_received_packet(uchar *in_packet, int len) if (ip->ip_p == IPPROTO_ICMP) { receive_icmp(ip, len, src_ip, et); return; +#if defined(CONFIG_TCP)
} else if (ip->ip_p == IPPROTO_TCP) {
debug_cond(DEBUG_DEV_PKT,
"TCP PH (to=%pI4, from=%pI4, len=%d)\n",
&dst_ip, &src_ip, len);
rxhand_tcp_f((union tcp_build_pkt *)ip, len);
return;
+#endif
} else if (ip->ip_p != IPPROTO_UDP) { /* Only UDP packets */ return; }
diff --git a/net/tcp.c b/net/tcp.c new file mode 100644 index 0000000000..12fa0a72cd --- /dev/null +++ b/net/tcp.c @@ -0,0 +1,700 @@ +// SPDX-License-Identifier: GPL-2.0 +/*
- Copyright 2017 Duncan Hare, all rights reserved.
- */
+/*
- General Desription:
- TCP support for the wget command, for fast file downloading.
- HTTP/TCP Receiver:
Prequeisites: - own ethernet address
Should be * Prerequisites: - own ethernet address
- own IP address
- Server IP address
- Server with TCP
- TCP application (eg wget)
Next Step HTTPS?
- */
+#include <common.h> +#include <command.h> +#include <console.h> +#include <environment.h> +#include <errno.h> +#include <net.h> +#include <net/tcp.h>
+/*
- TCP sliding window control used by us to request re-TX
- */
+static struct tcp_sack_v tcp_lost;
+/* TCP option timestamp */ +static u32 loc_timestamp; +static u32 rmt_timestamp;
+u32 tcp_seq_init; +u32 tcp_ack_edge; +u32 tcp_seq_max;
+int tcp_activity_count;
+/*
- Search for TCP_SACK and review the comments before the code section
- TCP_SACK is the number of packets at the front of the stream
- */
+enum pkt_state {PKT, NOPKT}; +struct sack_r {
struct sack_edges se;
enum pkt_state st;
+};
+struct sack_r edge_a[TCP_SACK]; +unsigned int sack_idx; +unsigned int prev_len;
+/* TCP connection state */ +static enum TCP_STATE tcp_state;
+/*
- An incoming TCP packet handler for the TCP protocol.
- There is also a dynamic function pointer for TCP based commands to
- receive incoming traffic after the TCP protocol code has done its work.
- */
+/* Current TCP RX packet handler */ +static rxhand_tcp *tcp_packet_handler;
+enum TCP_STATE tcp_get_tcp_state(void) +{
return tcp_state;
+}
+void tcp_set_tcp_state(enum TCP_STATE new_state) +{
tcp_state = new_state;
+}
+static void dummy_handler(uchar *pkt, unsigned int dport,
struct in_addr sip, unsigned int sport,
unsigned int len)
+{ +}
+void tcp_set_tcp_handler(rxhand_tcp *f) +{
debug_cond(DEBUG_INT_STATE, "--- net_loop TCP handler set (%p)\n", f);
if (!f)
tcp_packet_handler = dummy_handler;
else
tcp_packet_handler = f;
+}
+u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest,
int tcp_len, int pkt_len)
+{
union tcp_build_pkt *b = (union tcp_build_pkt *)pkt;
int checksum_len;
/*
* Pseudo header
*
* Zero the byte after the last byte so that the header checksum
* will always work.
*/
pkt[pkt_len] = 0x00;
net_copy_ip((void *)&b->ph.p_src, &src);
net_copy_ip((void *)&b->ph.p_dst, &dest);
b->ph.rsvd = 0x00;
b->ph.p = IPPROTO_TCP;
b->ph.len = htons(tcp_len);
checksum_len = tcp_len + PSEUDO_HDR_SIZE;
debug_cond(DEBUG_DEV_PKT,
"TCP Pesudo Header (to=%pI4, from=%pI4, Len=%d)\n",
&b->ph.p_dst, &b->ph.p_src, checksum_len);
return compute_ip_checksum(pkt + PSEUDO_PAD_SIZE, checksum_len);
+}
+int net_set_ack_options(union tcp_build_pkt *b) +{
b->sack.hdr.tcp_hlen = (TCP_HDR_SIZE >> 2) << 4;
b->sack.t_opt.kind = TCP_O_TS;
b->sack.t_opt.len = TCP_OPT_LEN_A;
b->sack.t_opt.t_snd = htons(loc_timestamp);
b->sack.t_opt.t_rcv = rmt_timestamp;
b->sack.sack_v.kind = TCP_1_NOP;
b->sack.sack_v.len = 0x00;
if (tcp_lost.len > TCP_OPT_LEN_2) {
debug_cond(DEBUG_DEV_PKT, "TCP ack opt lost.len %x\n",
tcp_lost.len);
b->sack.sack_v.len = tcp_lost.len;
b->sack.sack_v.kind = TCP_V_SACK;
b->sack.sack_v.hill[0].l = htonl(tcp_lost.hill[0].l);
b->sack.sack_v.hill[0].r = htonl(tcp_lost.hill[0].r);
/*
* These SACK structures are initialized with NOPs to
* provide TCP header alignment padding. There are 4
* SACK structures used for both header padding and
* internally.
*/
b->sack.sack_v.hill[1].l = htonl(tcp_lost.hill[1].l);
b->sack.sack_v.hill[1].r = htonl(tcp_lost.hill[1].r);
b->sack.sack_v.hill[2].l = htonl(tcp_lost.hill[2].l);
b->sack.sack_v.hill[2].r = htonl(tcp_lost.hill[2].r);
b->sack.sack_v.hill[3].l = TCP_O_NOP;
b->sack.sack_v.hill[3].r = TCP_O_NOP;
}
/*
* TCP lengths are stored as a rounded up number of 32 bit words
* Add 3 to length round up, rounded, then divided into the length
* in 32 bit words.
*/
b->sack.hdr.tcp_hlen = (((TCP_HDR_SIZE + TCP_TSOPT_SIZE
+ tcp_lost.len + 3) >> 2) << 4);
/*
* This returns the actual rounded up length of the
* TCP header to add to the total packet length
*/
return b->sack.hdr.tcp_hlen >> 2;
+}
+void net_set_syn_options(union tcp_build_pkt *b) +{
tcp_lost.len = 0;
b->ip.hdr.tcp_hlen = 0xa0;
b->ip.mss.kind = TCP_O_MSS;
b->ip.mss.len = TCP_OPT_LEN_4;
b->ip.mss.mss = htons(TCP_MSS);
b->ip.scale.kind = TCP_O_SCL;
b->ip.scale.scale = TCP_SCALE;
b->ip.scale.len = TCP_OPT_LEN_3;
b->ip.sack_p.kind = TCP_P_SACK;
b->ip.sack_p.len = TCP_OPT_LEN_2;
b->ip.t_opt.kind = TCP_O_TS;
b->ip.t_opt.len = TCP_OPT_LEN_A;
loc_timestamp = get_ticks();
rmt_timestamp = 0x00000000;
b->ip.t_opt.t_snd = 0;
b->ip.t_opt.t_rcv = 0x00000000;
b->ip.end = TCP_O_END;
+}
+int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len,
u8 action, u32 tcp_seq_num, u32 tcp_ack_num)
+{
union tcp_build_pkt *b = (union tcp_build_pkt *)pkt;
int pkt_hdr_len;
int pkt_len;
int tcp_len;
+/*
- Header: 5 32 bit words. 4 bits TCP header Length, 4 bits reserved options
- */
b->ip.hdr.tcp_flags = action;
pkt_hdr_len = IP_TCP_HDR_SIZE;
b->ip.hdr.tcp_hlen = 0x50;
switch (action) {
case TCP_SYN:
debug_cond(DEBUG_DEV_PKT,
"TCP Hdr:SYN (%pI4, %pI4, sq=%d, ak=%d)\n",
&net_server_ip, &net_ip,
tcp_seq_num, tcp_ack_num);
tcp_activity_count = 0;
net_set_syn_options(b);
tcp_seq_num = 0;
tcp_ack_num = 0;
pkt_hdr_len = IP_TCP_O_SIZE;
if (tcp_state == TCP_SYN_SENT) { /* Too many SYNs */
action = TCP_FIN;
tcp_state = TCP_FIN_WAIT_1;
} else {
tcp_state = TCP_SYN_SENT;
}
break;
case TCP_ACK:
pkt_hdr_len = IP_HDR_SIZE +
net_set_ack_options(b);
Do we need the extraces spaes before = and do we need to move net_set_ack_options(..) to next line?
b->ip.hdr.tcp_flags = action;
debug_cond(DEBUG_DEV_PKT,
"TCP Hdr:ACK (%pI4, %pI4, s=%d, a=%d, A=%x)\n",
&net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num,
action);
break;
case TCP_FIN:
debug_cond(DEBUG_DEV_PKT,
"TCP Hdr:FIN (%pI4, %pI4, s=%d, a=%d)\n",
&net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num);
payload_len = 0;
pkt_hdr_len = IP_TCP_HDR_SIZE;
tcp_state = TCP_FIN_WAIT_1;
break;
/* Notify connection closing */
case (TCP_FIN | TCP_ACK):
case ((TCP_FIN | TCP_ACK) | TCP_PUSH):
if (tcp_state == TCP_CLOSE_WAIT)
tcp_state = TCP_CLOSING;
tcp_ack_edge++;
debug_cond(DEBUG_DEV_PKT,
"TCP Hdr:FIN ACK PSH(%pI4, %pI4, s=%d, a=%d, A=%x)\n",
&net_server_ip, &net_ip,
tcp_seq_num, tcp_ack_edge, action);
/* FALLTHRU */
default:
pkt_hdr_len = IP_HDR_SIZE +
net_set_ack_options(b);
Do we need the extraces spaes before = and do we need to move net_set_ack_options(..) to next line?
b->ip.hdr.tcp_flags = action | TCP_PUSH | TCP_ACK;
debug_cond(DEBUG_DEV_PKT,
"TCP Hdr:dft (%pI4, %pI4, s=%d, a=%d, A=%x)\n",
&net_server_ip, &net_ip,
tcp_seq_num, tcp_ack_num, action);
}
pkt_len = pkt_hdr_len + payload_len;
tcp_len = pkt_len - IP_HDR_SIZE;
/* TCP Header */
b->ip.hdr.tcp_ack = htonl(tcp_ack_edge);
b->ip.hdr.tcp_src = htons(sport);
b->ip.hdr.tcp_dst = htons(dport);
b->ip.hdr.tcp_seq = htonl(tcp_seq_num);
tcp_seq_num = tcp_seq_num + payload_len;
/*
* TCP window size - TCP header variable tcp_win.
* Change tcp_win only if you have an understanding of network
* overrun, congestion, TCP segment sizes, TCP windows, TCP scale,
* queuing theory and packet buffering. If there are too few buffers,
* there will be data loss, recovery may work or the sending TCP,
* the server, could abort the stream transmission.
* MSS is governed by maximum Ethernet frame length.
* The number of buffers is governed by the desire to have a queue of
* full buffers to be processed at the destination to maximize
* throughput. Temporary memory use for the boot phase on modern
* SOCs is may not be considered a constraint to buffer space, if
* it is, then the u-boot tftp or nfs kernel netboot should be
* considered.
*/
b->ip.hdr.tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE);
b->ip.hdr.tcp_xsum = 0x0000;
b->ip.hdr.tcp_ugr = 0x0000;
b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, net_server_ip,
tcp_len, pkt_len);
net_set_ip_header((uchar *)&b->ip, net_server_ip, net_ip,
pkt_len, IPPROTO_TCP);
return pkt_hdr_len;
+}
+/*
- Selective Acknowledgment (Essential for fast stream transfer)
- */
+void tcp_hole(u32 tcp_seq_num, u32 len, u32 tcp_seq_max) +{
unsigned int idx_sack;
unsigned int sack_end = TCP_SACK - 1;
unsigned int sack_in;
unsigned int hill = 0;
enum pkt_state expect = PKT;
u32 seq = tcp_seq_num - tcp_seq_init;
u32 hol_l = tcp_ack_edge - tcp_seq_init;
u32 hol_r = 0;
/* Place new seq number in correct place in receive array */
if (prev_len == 0)
prev_len = len;
idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len);
if (idx_sack < TCP_SACK) {
edge_a[idx_sack].se.l = tcp_seq_num;
edge_a[idx_sack].se.r = tcp_seq_num + len;
edge_a[idx_sack].st = PKT;
+/*
- The fin (last) packet is not the same length as data packets, and if it's
- length is recorded and used for array index calculation, calculation breaks.
- */
if (prev_len < len)
prev_len = len;
}
debug_cond(DEBUG_DEV_PKT,
"TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n",
seq, hol_l, len, sack_idx, sack_end);
/* Right edge of contiguous stream, is the left edge of first hill */
hol_l = tcp_seq_num - tcp_seq_init;
hol_r = hol_l + len;
tcp_lost.len = TCP_OPT_LEN_2;
debug_cond(DEBUG_DEV_PKT,
"TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n",
idx_sack, seq, hol_l, hol_r, sack_idx, sack_end);
for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS;
sack_in++) {
switch (expect) {
case NOPKT:
switch (edge_a[sack_in].st) {
case NOPKT:
debug_cond(DEBUG_INT_STATE, "N");
break;
case PKT:
debug_cond(DEBUG_INT_STATE, "n");
tcp_lost.hill[hill].l =
edge_a[sack_in].se.l;
tcp_lost.hill[hill].r =
edge_a[sack_in].se.r;
expect = PKT;
break;
}
break;
case PKT:
switch (edge_a[sack_in].st) {
case NOPKT:
debug_cond(DEBUG_INT_STATE, "p");
if (sack_in > sack_idx &&
hill < TCP_SACK_HILLS) {
hill++;
tcp_lost.len += TCP_OPT_LEN_8;
}
expect = NOPKT;
break;
case PKT:
debug_cond(DEBUG_INT_STATE, "P");
if (tcp_ack_edge == edge_a[sack_in].se.l) {
tcp_ack_edge = edge_a[sack_in].se.r;
edge_a[sack_in].st = NOPKT;
sack_idx++;
} else {
if (hill < TCP_SACK_HILLS)
tcp_lost.hill[hill].r =
edge_a[sack_in].se.r;
if (sack_in == sack_end - 1)
tcp_lost.hill[hill].r =
edge_a[sack_in].se.r;
}
break;
}
break;
}
}
debug_cond(DEBUG_INT_STATE, "\n");
if (tcp_lost.len <= TCP_OPT_LEN_2)
sack_idx = 0;
+}
+void tcp_parse_options(uchar *o, int o_len) +{
struct tcp_t_opt *tsopt;
uchar *p = o;
+/*
NOPs are options with a zero length, and thus are special.
All other options have length fields.
- */
for (p = o; p < (o + o_len); p = p + p[1]) {
if (p[1] != 0) {
switch (p[0]) {
case TCP_O_END:
return; /* Finished processing options */
case TCP_O_MSS:
case TCP_O_SCL:
case TCP_P_SACK:
case TCP_V_SACK:
break; /* Continue to process options */
case TCP_O_TS:
tsopt = (struct tcp_t_opt *)p;
rmt_timestamp = tsopt->t_snd;
return;
break;
} /* End switch, process optional NOPs */
if (p[0] == TCP_O_NOP)
p++;
} else {
return; /* Finished processing options */
}
}
+}
+u8 tcp_state_machine(u8 tcp_flags, u32 *tcp_seq_num, int payload_len) +{
u8 tcp_fin = tcp_flags & TCP_FIN;
u8 tcp_syn = tcp_flags & TCP_SYN;
u8 tcp_rst = tcp_flags & TCP_RST;
u8 tcp_push = tcp_flags & TCP_PUSH;
u8 tcp_ack = tcp_flags & TCP_ACK;
u8 action = TCP_DATA;
int i;
/*
* tcp_flags are examined to determine TX action in a given state
* tcp_push is interpreted to mean "inform the app"
* urg, ece, cer and nonce flags are not supported.
*
* exe and crw are use to signal and confirm knowledge of congestion.
* This TCP only sends a file request and acks. If it generates
* congestion, the network is broken.
*/
debug_cond(DEBUG_INT_STATE, "TCP STATE ENTRY %x\n", action);
if (tcp_rst) {
action = TCP_DATA;
tcp_state = TCP_CLOSED;
net_set_state(NETLOOP_FAIL);
debug_cond(DEBUG_INT_STATE, "TCP Reset %x\n", tcp_flags);
return TCP_RST;
}
switch (tcp_state) {
case TCP_CLOSED:
debug_cond(DEBUG_INT_STATE, "TCP CLOSED %x\n", tcp_flags);
if (tcp_fin)
action = TCP_DATA;
if (tcp_syn)
action = TCP_RST;
if (tcp_ack)
action = TCP_DATA;
break;
case TCP_SYN_SENT:
debug_cond(DEBUG_INT_STATE, "TCP_SYN_SENT %x, %d\n",
tcp_flags, *tcp_seq_num);
if (tcp_fin) {
action = action | TCP_PUSH;
tcp_state = TCP_CLOSE_WAIT;
}
if (tcp_syn) {
action = action | TCP_ACK | TCP_PUSH;
if (tcp_ack) {
tcp_seq_init = *tcp_seq_num;
*tcp_seq_num = *tcp_seq_num + 1;
tcp_seq_max = *tcp_seq_num;
tcp_ack_edge = *tcp_seq_num;
sack_idx = 0;
edge_a[sack_idx].se.l = *tcp_seq_num;
edge_a[sack_idx].se.r = *tcp_seq_num;
prev_len = 0;
tcp_state = TCP_ESTABLISHED;
for (i = 0; i < TCP_SACK; i++)
edge_a[i].st = NOPKT;
}
} else {
if (tcp_ack)
action = TCP_DATA;
}
break;
case TCP_ESTABLISHED:
debug_cond(DEBUG_INT_STATE,
"TCP_ESTABLISHED %x\n", tcp_flags);
if (*tcp_seq_num > tcp_seq_max)
tcp_seq_max = *tcp_seq_num;
if (payload_len > 0) {
tcp_hole(*tcp_seq_num, payload_len, tcp_seq_max);
tcp_fin = TCP_DATA; /* cause standalone FIN */
}
if ((tcp_fin) && tcp_lost.len <= TCP_OPT_LEN_2) {
action = action | TCP_FIN | TCP_PUSH | TCP_ACK;
tcp_state = TCP_CLOSE_WAIT;
} else {
if (tcp_ack)
action = TCP_DATA;
}
if (tcp_push)
action = action | TCP_PUSH;
if (tcp_syn)
action = TCP_ACK + TCP_RST;
break;
case TCP_CLOSE_WAIT:
debug_cond(DEBUG_INT_STATE, "TCP_CLOSE_WAIT (%x)\n", tcp_flags);
action = TCP_DATA; /* Wait for app */
break;
case TCP_FIN_WAIT_2:
debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_2 (%x)\n", tcp_flags);
if (tcp_fin)
action = TCP_DATA;
if (tcp_syn)
action = TCP_DATA;
if (tcp_ack) {
action = TCP_PUSH | TCP_ACK;
tcp_state = TCP_CLOSED;
puts("\n");
}
break;
case TCP_FIN_WAIT_1:
debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_1 (%x)\n", tcp_flags);
if (tcp_fin) {
action = TCP_ACK | TCP_FIN;
tcp_state = TCP_FIN_WAIT_2;
}
if (tcp_syn)
action = TCP_RST;
if (tcp_ack) {
tcp_state = TCP_CLOSED;
tcp_seq_num = tcp_seq_num + 1;
}
break;
case TCP_CLOSING:
debug_cond(DEBUG_INT_STATE, "TCP_CLOSING (%x)\n", tcp_flags);
if (tcp_fin)
action = TCP_DATA;
if (tcp_syn)
action = TCP_RST;
if (tcp_ack) {
action = TCP_PUSH;
tcp_state = TCP_CLOSED;
puts("\n");
}
break;
}
return action;
+}
+void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len) +{
int tcp_len = pkt_len - IP_HDR_SIZE;
u16 tcp_rx_xsum = b->ip.hdr.ip_sum;
u8 tcp_action = TCP_DATA;
u32 tcp_seq_num;
u32 tcp_ack_num;
struct in_addr action_and_state;
int tcp_hdr_len;
int payload_len;
/*
* Verify IP header
*/
debug_cond(DEBUG_DEV_PKT,
"TCP RX in RX Sum (to=%pI4, from=%pI4, len=%d)\n",
&b->ip.hdr.ip_src, &b->ip.hdr.ip_dst, pkt_len);
debug_cond(DEBUG_DEV_PKT,
"In__________________________________________\n");
b->ip.hdr.ip_src = net_server_ip;
b->ip.hdr.ip_dst = net_ip;
b->ip.hdr.ip_sum = 0x0000;
if (tcp_rx_xsum != compute_ip_checksum(b, IP_HDR_SIZE)) {
debug_cond(DEBUG_DEV_PKT,
"TCP RX IP xSum Error (%pI4, =%pI4, len=%d)\n",
&net_ip, &net_server_ip, pkt_len);
return;
}
/*
* Build pseudo header and verify TCP header
*/
tcp_rx_xsum = b->ip.hdr.tcp_xsum;
b->ip.hdr.tcp_xsum = 0x0000;
if (tcp_rx_xsum != tcp_set_pseudo_header((uchar *)b, b->ip.hdr.ip_src,
b->ip.hdr.ip_dst, tcp_len,
pkt_len)) {
debug_cond(DEBUG_DEV_PKT,
"TCP RX TCP xSum Error (%pI4, %pI4, len=%d)\n",
&net_ip, &net_server_ip, tcp_len);
return;
}
tcp_hdr_len = (b->ip.hdr.tcp_hlen >> 2);
payload_len = tcp_len - tcp_hdr_len;
if (tcp_hdr_len > TCP_HDR_SIZE)
tcp_parse_options((uchar *)b + IP_TCP_HDR_SIZE,
tcp_hdr_len - TCP_HDR_SIZE);
/*
* Incoming sequence and ack numbers are server's view of the numbers.
* The app must swap the numbers when responding.
*/
tcp_seq_num = ntohl(b->ip.hdr.tcp_seq);
tcp_ack_num = ntohl(b->ip.hdr.tcp_ack);
/* Packets are not ordered. Send to app as received. */
tcp_action = tcp_state_machine(b->ip.hdr.tcp_flags,
&tcp_seq_num, payload_len);
/*
* State-altering command to be sent.
* The packet sequence and ack numbers are in the tcp_seq_num
* and tcp_ack_num variables. The current packet, its position
* in the data stream, is the in the range of those variables.
*
* In the "application push" invocation, the TCP header with all
* its information is pointed to by the packet pointer.
*
* In the typedef
* void rxhand_tcp(uchar *pkt, unsigned int dport,
* struct in_addr sip, unsigned int sport,
* unsigned int len);
* *pkt is the pointer to the payload
* dport is used for tcp_seg_num
* action_and_state.s_addr is used for TCP state
* sport is used for tcp_ack_num (which is unused by the app)
* pkt_ length is the payload length.
*
* TCP_PUSH from the state machine with a payload length of 0 is a
* connect or disconnect event
*/
tcp_activity_count++;
if (tcp_activity_count > TCP_ACTIVITY) {
puts("| ");
tcp_activity_count = 0;
}
if ((tcp_action & TCP_PUSH) || payload_len > 0) {
debug_cond(DEBUG_DEV_PKT,
"TCP Notify (action=%x, Seq=%d,Ack=%d,Pay%d)\n",
tcp_action, tcp_seq_num, tcp_ack_num, payload_len);
action_and_state.s_addr = tcp_action;
(*tcp_packet_handler) ((uchar *)b + pkt_len - payload_len,
tcp_seq_num, action_and_state,
tcp_ack_num, payload_len);
} else if (tcp_action != TCP_DATA) {
debug_cond(DEBUG_DEV_PKT,
"TCP Action (action=%x,Seq=%d,Ack=%d,Pay=%d)\n",
tcp_action, tcp_seq_num, tcp_ack_num, payload_len);
/*
* Warning: Incoming Ack & Seq sequence numbers are transposed
* here to outgoing Seq & Ack sequence numbers
*/
net_send_tcp_packet(0, ntohs(b->ip.hdr.tcp_src),
ntohs(b->ip.hdr.tcp_dst),
(tcp_action & (~TCP_PUSH)),
tcp_seq_num, tcp_ack_num);
}
+}
Overall this does not look too bad - hopefully it will land during this year.