Re: [U-Boot] [PATCH v12 2/3] Add TCP

14 Aug 2018

Am Mo., 25. Juni 2018 um 00:42 Uhr schrieb DH@synoia.com:
...
From: Duncan Hare DH@Synoia.com
Currently file transfers are done using tftp or NFS both
over udp. This requires a request to be sent from client
(u-boot) to the boot server.
The current standard is TCP with selective acknowledgment.
In our testing we have reduce kernel transmission time to
around 0.4 seconds for a 4Mbyte kernel, with a 100 Mbps
downlink.
Series-Changes 11

Add TCP with SACK
Clean formatting
Remove buffer search and print routines

Series-Changes 12

Fix License statement

Signed-off-by: Duncan Hare DH@Synoia.com
Signed-off-by: Duncan Hare DuncanCHare@yahoo.com

include/net.h     |  11 +-
 include/net/tcp.h | 227 ++++++++++++++++++
 net/Kconfig       |   6 +
 net/Makefile      |   1 +
 net/net.c         |  33 +++
 net/tcp.c         | 700 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 977 insertions(+), 1 deletion(-)
 create mode 100644 include/net/tcp.h
 create mode 100644 net/tcp.c

diff --git a/include/net.h b/include/net.h
index a54160fff6..ba96267eb8 100644
--- a/include/net.h
+++ b/include/net.h
@@ -26,6 +26,9 @@



*/
+#if defined(CONFIG_TCP)                /* Protected UDP uses less bufferes than TCP */
+#define CONFIG_SYS_RX_ETH_BUFFER 12
+#endif
 #ifdef CONFIG_SYS_RX_ETH_BUFFER
 # define PKTBUFSRX     CONFIG_SYS_RX_ETH_BUFFER
 #else
@@ -350,6 +353,7 @@ struct vlan_ethernet_hdr {
 #define PROT_PPP_SES   0x8864          /* PPPoE session messages       */
#define IPPROTO_ICMP    1      /* Internet Control Message Protocol    */
+#define IPPROTO_TCP     6      /* Transmission Control Protocol        */
 #define IPPROTO_UDP    17      /* User Datagram Protocol               */
/*
@@ -659,7 +663,7 @@ static inline void net_send_packet(uchar *pkt, int len)
 }
/*


Transmit "net_tx_packet" as UDP packet, performing ARP request if needed





Transmit "net_tx_packet" as UDP or TCPpacket, send ARP request if needed



missing space - should be
 * Transmit "net_tx_packet" as UDP or TCP packet, send ARP request if needed
...

(ether will be populated)

@param ether Raw packet buffer

@@ -667,10 +671,15 @@ static inline void net_send_packet(uchar *pkt, int len)

@param dport Destination UDP port
@param sport Source UDP port
@param payload_len Length of data after the UDP header



@param action TCP action to be performed



@param tcp_seq_num TCP sequence number of this transmission



@param tcp_ack_num TCP stream acknolegement number

*/

int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport,
                       int payload_len, int proto, u8 action, u32 tcp_seq_num,
                       u32 tcp_ack_num);
+int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action,

                  u32 tcp_seq_num, u32 tcp_ack_num);



int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport,
                        int sport, int payload_len);
diff --git a/include/net/tcp.h b/include/net/tcp.h
new file mode 100644
index 0000000000..d0e90e07dd
--- /dev/null
+++ b/include/net/tcp.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*


TCP Support with SACK for file transfer.







Copyright 2017 Duncan Hare, All rights reserved.


*/


+#define TCP_ACTIVITY 127               /* Number of packets received   */

                                  /* before console progress mark */




+struct ip_tcp_hdr {

  u8              ip_hl_v;        /* header length and version    */


  u8              ip_tos;         /* type of service              */


  u16             ip_len;         /* total length                 */


  u16             ip_id;          /* identification               */


  u16             ip_off;         /* fragment offset field        */


  u8              ip_ttl;         /* time to live                 */


  u8              ip_p;           /* protocol                     */


  u16             ip_sum;         /* checksum                     */


  struct in_addr  ip_src;         /* Source IP address            */


  struct in_addr  ip_dst;         /* Destination IP address       */


  u16             tcp_src;        /* TCP source port              */


  u16             tcp_dst;        /* TCP destination port         */


  u32             tcp_seq;        /* TCP sequence number          */


  u32             tcp_ack;        /* TCP Acknowledgment number    */


  u8              tcp_hlen;       /* 4 bits TCP header Length/4   */


                                  /* 4 bits Reserved              */


                                  /* 2 more bits reserved         */


  u8              tcp_flags;      /* see defines                  */


  u16             tcp_win;        /* TCP windows size             */


  u16             tcp_xsum;       /* Checksum                     */


  u16             tcp_ugr;        /* Pointer to urgent data       */



+} __packed;



+#define IP_TCP_HDR_SIZE                (sizeof(struct ip_tcp_hdr))
+#define TCP_HDR_SIZE           (IP_TCP_HDR_SIZE  - IP_HDR_SIZE)



+#define TCP_DATA       0x00    /* Data Packet - internal use only      */
+#define TCP_FIN                0x01    /* Finish flag                          */
+#define TCP_SYN                0x02    /* Synch (start) flag                   */
+#define TCP_RST                0x04    /* reset flag                           */
+#define TCP_PUSH       0x08    /* Push - Notify app                    */
+#define TCP_ACK                0x10    /* Acknowledgment of data received      */
+#define TCP_URG                0x20    /* Urgent                               */
+#define TCP_ECE                0x40    /* Congestion control                   */
+#define TCP_CWR                0x80    /* Congestion Control                   */



+/*


TCP header options, Seq, MSS, and SACK


*/


+#define TCP_SACK 32                    /* Number of packets analyzed   */

                                  /* on leading edge of stream    */




+#define TCP_O_END      0x00            /* End of option list           */
+#define TCP_1_NOP      0x01            /* Single padding NOP           */
+#define TCP_O_NOP      0x01010101      /* NOPs pad to 32 bit boundary  */
+#define TCP_O_MSS      0x02            /* MSS Size option              */
+#define TCP_O_SCL      0x03            /* Window Scale option          */
+#define TCP_P_SACK     0x04            /* SACK permitted               */
+#define TCP_V_SACK     0x05            /* SACK values                  */
+#define TCP_O_TS       0x08            /* Timestamp option             */
+#define TCP_OPT_LEN_2  0x02
+#define TCP_OPT_LEN_3  0x03
+#define TCP_OPT_LEN_4  0x04
+#define TCP_OPT_LEN_6  0x06
+#define TCP_OPT_LEN_8  0x08
+#define TCP_OPT_LEN_A  0x0a            /* Timestamp Length             */



+/*


Please review the warning in net.c about these two parameters.



They are part of a promise of RX buffer size to the sending TCP


*/


+#define TCP_MSS                1460            /* Max segment size             */
+#define TCP_SCALE      0x01            /* Scale                        */



+struct tcp_mss {                       /* TCP Max Segment size         */

  u8      kind;                   /* Field ID                     */


  u8      len;                    /* Field length                 */


  u16     mss;                    /* Segment size value           */



+} __packed;



+struct tcp_scale {                     /* TCP Windows Scale            */

  u8      kind;                   /* Field ID                     */


  u8      len;                    /* Filed length                 */


  u8      scale;                  /* windows shift value used for */


                                  /* networks with many hops      */


                                  /* Typically 4 or more hops     */



extra empty line needed?
...
+} __packed;



+struct tcp_sack_p {                    /* SACK permitted               */

  u8      kind;                   /* Field Id                     */


  u8      len;                    /* Field length                 */



+} __packed;


                                  /* Terse definitions used       */


                                  /* long definitions make the    */


                                  /* indented code overflow line  */


                                  /* length linits                */



wrong/funny indentation?
...
+struct sack_edges {

  u32     l;                      /* Left edge of stream          */


  u32     r;                      /* right edge of stream         */



+} __packed;



+#define TCP_SACK_SIZE (sizeof(struct sack_edges))



+/*


A TCP stream has holes when packets are missing or disordered.



A hill is the inverese of a hole, and is data received.



Should be
A hill is the inverse of a ..
...


TCP receiveds hills (a sequence of data), and inferrs Holes



Should be
TCP received hills ..
...


from the "hills" or packets received.


*/


+#define TCP_SACK_HILLS 4



+struct tcp_sack_v {

  u8      kind;                   /* Field ID                     */


  u8      len;                    /* Field Length                 */


  struct  sack_edges hill[TCP_SACK_HILLS]; /* L & R window edges  */



+} __packed;



+struct tcp_t_opt {                     /* TCP time stamps option       */

  u8      kind;                   /* Field id                     */


  u8      len;                    /* Field length                 */


  u32     t_snd;                  /* Sender timestamp             */


  u32     t_rcv;                  /* Receiver timestamp           */



+} __packed;



+#define TCP_TSOPT_SIZE (sizeof(struct tcp_t_opt))



+/*


ip tcp  structure with options


*/


+struct ip_tcp_hdr_o {

  struct  ip_tcp_hdr hdr;


  struct  tcp_mss    mss;


  struct  tcp_scale  scale;


  struct  tcp_sack_p sack_p;


  struct  tcp_t_opt  t_opt;


  u8      end;



+} __packed;



+#define IP_TCP_O_SIZE (sizeof(struct ip_tcp_hdr_o))



+struct ip_tcp_hdr_s {

  struct  ip_tcp_hdr      hdr;


  struct  tcp_t_opt       t_opt;


  struct  tcp_sack_v      sack_v;


  u8      end;



+} __packed;



+#define IP_TCP_SACK_SIZE (sizeof(struct ip_tcp_hdr_s))



+/*


TCP pseudo header definitions


*/

+#define PSEUDO_PAD_SIZE        8



+struct pseudo_hdr {

  u8 padding[PSEUDO_PAD_SIZE];    /* pseudo hdr size = ip_tcp hdr size */


  struct in_addr p_src;


  struct in_addr p_dst;


  u8      rsvd;


  u8      p;


  u16     len;



+} __packed;



+#define PSEUDO_HDR_SIZE        (sizeof(struct pseudo_hdr)) - PSEUDO_PAD_SIZE



+/*


union for building TCP/IP packet. Build Pseudo header in packed buffer



first, calculate TCP checksum, then build IP header in packed buffer.


*/


+union tcp_build_pkt {

  struct pseudo_hdr ph;


  struct ip_tcp_hdr_o ip;


  struct ip_tcp_hdr_s sack;


  uchar  raw[1600];



+} __packed;



+/*


TCP State machine states for connection


*/


+enum TCP_STATE {

  TCP_CLOSED,             /* Need to send SYN to connect            */


  TCP_SYN_SENT,           /* Trying to connect, waiting for SYN ACK */


  TCP_ESTABLISHED,        /* both server & client have a connection */


  TCP_CLOSE_WAIT,         /* Rec FIN, passed to app for FIN, ACK rsp*/


  TCP_CLOSING,            /* Rec FIN, sent FIN, ACK waiting for ACK */


  TCP_FIN_WAIT_1,         /* Sent FIN waiting for response          */


  TCP_FIN_WAIT_2          /* Rec ACK from FIN sent, waiting for FIN */



+};



+enum TCP_STATE tcp_get_tcp_state(void);
+void tcp_set_tcp_state(enum TCP_STATE new_state);
+int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len,

                 u8 action, u32 tcp_seq_num, u32 tcp_ack_num);




+/*


An incoming packet handler.



@param pkt    pointer to the application packet



@param dport  destination UDP port



@param sip    source IP address



@param sport  source UDP port



@param len    packet length


*/

+typedef void rxhand_tcp(uchar *pkt, unsigned int dport,

                  struct in_addr sip, unsigned int sport,


                  unsigned int len);



+void tcp_set_tcp_handler(rxhand_tcp *f);



+void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int len);



+/*


An incoming TCP packet handler for the TCP protocol.



There is also a dynamic function pointer for TCP based commands to



receive incoming traffic after the TCP protocol code has done its work.


*/


+void rxhand_action(u8 tcp_action, int payload_len, u32 tcp_seq_num,

             u32 tcp_ack_num, unsigned int pkt_len,


             union tcp_build_pkt *b);



diff --git a/net/Kconfig b/net/Kconfig
index f2363e5256..77ab683eb8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -22,4 +22,10 @@ config NETCONSOLE
          Support the 'nc' input/output device for networked console.
          See README.NetConsole for details.
+config TCP

  bool "TCP stack"


  help


    TCP protocol support with SACK for wget. Selecting this will provide


    the fastest file transfer possible.




endif   # if NET
diff --git a/net/Makefile b/net/Makefile
index 07466879f5..237023407f 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_CMD_RARP) += rarp.o
 obj-$(CONFIG_CMD_SNTP) += sntp.o
 obj-$(CONFIG_CMD_TFTPBOOT) += tftp.o
 obj-$(CONFIG_UDP_FUNCTION_FASTBOOT)  += fastboot.o
+obj-$(CONFIG_TCP)      += tcp.o
# Disable this warning as it is triggered by:
 # sprintf(buf, index ? "foo%d" : "foo", index)
diff --git a/net/net.c b/net/net.c
index f831c34599..8ac1ff050f 100644
--- a/net/net.c
+++ b/net/net.c
@@ -108,6 +108,7 @@
 #if defined(CONFIG_CMD_SNTP)
 #include "sntp.h"
 #endif
+#include <net/tcp.h>
/** BOOTP EXTENTIONS **/
@@ -380,6 +381,9 @@ void net_init(void)
            /* Only need to setup buffer pointers once. */
            first_call = 0;

+#if defined(CONFIG_TCP)

          tcp_set_tcp_state(TCP_CLOSED);



+#endif
        }
    net_init_loop();

@@ -790,6 +794,16 @@ return net_send_ip_packet(ether, dest, dport, sport, payload_len,
                          IPPROTO_UDP, 0, 0, 0);
 }
+#if defined(CONFIG_TCP)
+int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action,

                  u32 tcp_seq_num, u32 tcp_ack_num)



+{

  return net_send_ip_packet(net_server_ethaddr, net_server_ip, dport,


                            sport, payload_len, IPPROTO_TCP, action,


                            tcp_seq_num, tcp_ack_num);



+}
+#endif



int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport,
                       int payload_len, int proto, u8 action, u32 tcp_seq_num,
                       u32 tcp_ack_num)
@@ -821,6 +835,15 @@ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport,
                                   dport, sport, payload_len);
                pkt_hdr_size = eth_hdr_size + IP_UDP_HDR_SIZE;
                break;
+#if defined(CONFIG_TCP)

  case IPPROTO_TCP:


          pkt_hdr_size = eth_hdr_size +


          tcp_set_tcp_header(pkt + eth_hdr_size, dport, sport,


                             payload_len, action, tcp_seq_num,


                             tcp_ack_num);


  break;



+#endif

  default: return -EINVAL;
  }




@@ -1229,6 +1252,16 @@ void net_process_received_packet(uchar *in_packet, int len)
                if (ip->ip_p == IPPROTO_ICMP) {
                        receive_icmp(ip, len, src_ip, et);
                        return;
+#if defined(CONFIG_TCP)

          } else if (ip->ip_p == IPPROTO_TCP) {


                  debug_cond(DEBUG_DEV_PKT,


                             "TCP PH (to=%pI4, from=%pI4, len=%d)\n",


                             &dst_ip, &src_ip, len);



                  rxhand_tcp_f((union tcp_build_pkt *)ip, len);


                  return;



+#endif

          } else if (ip->ip_p != IPPROTO_UDP) {   /* Only UDP packets */
                  return;
          }



diff --git a/net/tcp.c b/net/tcp.c
new file mode 100644
index 0000000000..12fa0a72cd
--- /dev/null
+++ b/net/tcp.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0
+/*


Copyright 2017 Duncan Hare, all rights reserved.


*/


+/*


General Desription:







TCP support for the wget command, for fast file downloading.







HTTP/TCP Receiver:







 Prequeisites:   - own ethernet address





Should be
*      Prerequisites:   - own ethernet address
...


                 - own IP address





                 - Server IP address





                 - Server with TCP





                 - TCP application (eg wget)





 Next Step       HTTPS?




*/

+#include <common.h>
+#include <command.h>
+#include <console.h>
+#include <environment.h>
+#include <errno.h>
+#include <net.h>
+#include <net/tcp.h>



+/*


TCP sliding window  control used by us to request re-TX


*/


+static struct tcp_sack_v tcp_lost;



+/* TCP option timestamp */
+static u32 loc_timestamp;
+static u32 rmt_timestamp;



+u32 tcp_seq_init;
+u32 tcp_ack_edge;
+u32 tcp_seq_max;



+int tcp_activity_count;



+/*


Search for TCP_SACK and review the comments before the code section



TCP_SACK is the number of packets at the front of the stream


*/


+enum pkt_state {PKT, NOPKT};
+struct sack_r {

  struct sack_edges se;


  enum   pkt_state st;



+};



+struct sack_r edge_a[TCP_SACK];
+unsigned int sack_idx;
+unsigned int prev_len;



+/* TCP connection state */
+static enum TCP_STATE tcp_state;



+/*


An incoming TCP packet handler for the TCP protocol.



There is also a dynamic function pointer for TCP based commands to



receive incoming traffic after the TCP protocol code has done its work.


*/


+/* Current TCP RX packet handler */
+static rxhand_tcp *tcp_packet_handler;



+enum TCP_STATE tcp_get_tcp_state(void)
+{

  return tcp_state;



+}



+void tcp_set_tcp_state(enum TCP_STATE new_state)
+{

  tcp_state = new_state;



+}



+static void dummy_handler(uchar *pkt, unsigned int dport,

                    struct in_addr sip, unsigned int sport,


                    unsigned int len)



+{
+}



+void tcp_set_tcp_handler(rxhand_tcp *f)
+{

  debug_cond(DEBUG_INT_STATE, "--- net_loop TCP handler set (%p)\n", f);


  if (!f)


          tcp_packet_handler = dummy_handler;


  else


          tcp_packet_handler = f;



+}



+u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest,

                    int tcp_len, int pkt_len)



+{

  union tcp_build_pkt *b = (union tcp_build_pkt *)pkt;


  int checksum_len;



  /*


   * Pseudo header


   *


   * Zero the byte after the last byte so that the header checksum


   * will always work.


   */



  pkt[pkt_len] = 0x00;



  net_copy_ip((void *)&b->ph.p_src, &src);


  net_copy_ip((void *)&b->ph.p_dst, &dest);


  b->ph.rsvd      = 0x00;


  b->ph.p         = IPPROTO_TCP;


  b->ph.len       = htons(tcp_len);


  checksum_len    = tcp_len + PSEUDO_HDR_SIZE;



  debug_cond(DEBUG_DEV_PKT,


             "TCP Pesudo  Header  (to=%pI4, from=%pI4, Len=%d)\n",


             &b->ph.p_dst, &b->ph.p_src, checksum_len);



  return compute_ip_checksum(pkt + PSEUDO_PAD_SIZE, checksum_len);



+}



+int net_set_ack_options(union tcp_build_pkt *b)
+{

  b->sack.hdr.tcp_hlen  = (TCP_HDR_SIZE >> 2) << 4;



  b->sack.t_opt.kind  = TCP_O_TS;


  b->sack.t_opt.len   = TCP_OPT_LEN_A;


  b->sack.t_opt.t_snd = htons(loc_timestamp);


  b->sack.t_opt.t_rcv = rmt_timestamp;


  b->sack.sack_v.kind = TCP_1_NOP;


  b->sack.sack_v.len  = 0x00;



  if (tcp_lost.len > TCP_OPT_LEN_2) {


          debug_cond(DEBUG_DEV_PKT, "TCP ack opt lost.len %x\n",


                     tcp_lost.len);


          b->sack.sack_v.len       = tcp_lost.len;


          b->sack.sack_v.kind      = TCP_V_SACK;


          b->sack.sack_v.hill[0].l = htonl(tcp_lost.hill[0].l);


          b->sack.sack_v.hill[0].r = htonl(tcp_lost.hill[0].r);



          /*


           * These SACK structures are initialized with NOPs to


           * provide TCP header alignment padding. There are 4


           * SACK structures used for both header padding and


           * internally.


           */



          b->sack.sack_v.hill[1].l = htonl(tcp_lost.hill[1].l);


          b->sack.sack_v.hill[1].r = htonl(tcp_lost.hill[1].r);


          b->sack.sack_v.hill[2].l = htonl(tcp_lost.hill[2].l);


          b->sack.sack_v.hill[2].r = htonl(tcp_lost.hill[2].r);


          b->sack.sack_v.hill[3].l = TCP_O_NOP;


          b->sack.sack_v.hill[3].r = TCP_O_NOP;


  }



  /*


   * TCP lengths are stored as a rounded up number of 32 bit words


   * Add 3 to length round up, rounded, then divided into the length


   * in 32 bit words.


   */



  b->sack.hdr.tcp_hlen = (((TCP_HDR_SIZE + TCP_TSOPT_SIZE


                          + tcp_lost.len + 3)  >> 2) << 4);



  /*


   * This returns the actual rounded up length of the


   * TCP header to add to the total packet length


   */



  return b->sack.hdr.tcp_hlen >> 2;



+}



+void net_set_syn_options(union tcp_build_pkt *b)
+{

  tcp_lost.len            = 0;


  b->ip.hdr.tcp_hlen      = 0xa0;



  b->ip.mss.kind          = TCP_O_MSS;


  b->ip.mss.len           = TCP_OPT_LEN_4;


  b->ip.mss.mss           = htons(TCP_MSS);


  b->ip.scale.kind        = TCP_O_SCL;


  b->ip.scale.scale       = TCP_SCALE;


  b->ip.scale.len         = TCP_OPT_LEN_3;


  b->ip.sack_p.kind       = TCP_P_SACK;


  b->ip.sack_p.len        = TCP_OPT_LEN_2;


  b->ip.t_opt.kind        = TCP_O_TS;


  b->ip.t_opt.len         = TCP_OPT_LEN_A;


  loc_timestamp           = get_ticks();


  rmt_timestamp           = 0x00000000;


  b->ip.t_opt.t_snd       = 0;


  b->ip.t_opt.t_rcv       = 0x00000000;


  b->ip.end               = TCP_O_END;



+}



+int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len,

                 u8 action, u32 tcp_seq_num, u32 tcp_ack_num)



+{

  union tcp_build_pkt *b = (union tcp_build_pkt *)pkt;


  int     pkt_hdr_len;


  int     pkt_len;


  int     tcp_len;




+/*


Header: 5 32 bit words. 4 bits TCP header Length, 4 bits reserved options


*/
  b->ip.hdr.tcp_flags     = action;


  pkt_hdr_len             = IP_TCP_HDR_SIZE;


  b->ip.hdr.tcp_hlen      = 0x50;



  switch (action) {


  case TCP_SYN:


          debug_cond(DEBUG_DEV_PKT,


                     "TCP Hdr:SYN (%pI4, %pI4, sq=%d, ak=%d)\n",


                     &net_server_ip, &net_ip,


                     tcp_seq_num, tcp_ack_num);


          tcp_activity_count = 0;


          net_set_syn_options(b);


          tcp_seq_num = 0;


          tcp_ack_num = 0;


          pkt_hdr_len = IP_TCP_O_SIZE;


          if (tcp_state == TCP_SYN_SENT) {  /* Too many SYNs */


                  action    = TCP_FIN;


                  tcp_state = TCP_FIN_WAIT_1;


          } else {


                  tcp_state = TCP_SYN_SENT;


          }


  break;


  case TCP_ACK:


          pkt_hdr_len         = IP_HDR_SIZE +


                                net_set_ack_options(b);



Do we need the extraces spaes before = and do we need to move
net_set_ack_options(..) to next line?
...

          b->ip.hdr.tcp_flags = action;


          debug_cond(DEBUG_DEV_PKT,


                     "TCP Hdr:ACK (%pI4, %pI4, s=%d, a=%d, A=%x)\n",


                     &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num,


                     action);


  break;


  case TCP_FIN:


          debug_cond(DEBUG_DEV_PKT,


                     "TCP Hdr:FIN  (%pI4, %pI4, s=%d, a=%d)\n",


                     &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num);


          payload_len = 0;


          pkt_hdr_len = IP_TCP_HDR_SIZE;


          tcp_state   = TCP_FIN_WAIT_1;



  break;



  /* Notify connection closing */



  case (TCP_FIN | TCP_ACK):


  case ((TCP_FIN | TCP_ACK) | TCP_PUSH):


          if (tcp_state == TCP_CLOSE_WAIT)


                  tcp_state = TCP_CLOSING;


          tcp_ack_edge++;


          debug_cond(DEBUG_DEV_PKT,


                     "TCP Hdr:FIN ACK PSH(%pI4, %pI4, s=%d, a=%d, A=%x)\n",


                     &net_server_ip, &net_ip,


                     tcp_seq_num, tcp_ack_edge, action);


                                  /* FALLTHRU */


  default:


          pkt_hdr_len         = IP_HDR_SIZE +


                                net_set_ack_options(b);



Do we need the extraces spaes before = and do we need to move
net_set_ack_options(..) to next line?
...

          b->ip.hdr.tcp_flags = action | TCP_PUSH | TCP_ACK;


          debug_cond(DEBUG_DEV_PKT,


                     "TCP Hdr:dft  (%pI4, %pI4, s=%d, a=%d, A=%x)\n",


                     &net_server_ip, &net_ip,


                     tcp_seq_num, tcp_ack_num, action);


  }



  pkt_len = pkt_hdr_len + payload_len;


  tcp_len = pkt_len - IP_HDR_SIZE;



  /* TCP Header */


  b->ip.hdr.tcp_ack       = htonl(tcp_ack_edge);


  b->ip.hdr.tcp_src       = htons(sport);


  b->ip.hdr.tcp_dst       = htons(dport);


  b->ip.hdr.tcp_seq       = htonl(tcp_seq_num);


  tcp_seq_num             = tcp_seq_num + payload_len;



  /*


   * TCP window size - TCP header variable tcp_win.


   * Change tcp_win only if you have an understanding of network


   * overrun, congestion, TCP segment sizes, TCP windows, TCP scale,


   * queuing theory  and packet buffering. If there are too few buffers,


   * there will be data loss, recovery may work or the sending TCP,


   * the server, could abort the stream transmission.


   * MSS is governed by maximum Ethernet frame length.


   * The number of buffers is governed by the desire to have a queue of


   * full buffers to be processed at the destination to maximize


   * throughput. Temporary memory use for the boot phase on modern


   * SOCs is may not be considered a constraint to buffer space, if


   * it is, then the u-boot tftp or nfs kernel netboot should be


   * considered.


   */



  b->ip.hdr.tcp_win       = htons(PKTBUFSRX * TCP_MSS >>  TCP_SCALE);



  b->ip.hdr.tcp_xsum      = 0x0000;


  b->ip.hdr.tcp_ugr       = 0x0000;



  b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, net_server_ip,


                                             tcp_len, pkt_len);



  net_set_ip_header((uchar *)&b->ip, net_server_ip, net_ip,


                    pkt_len, IPPROTO_TCP);



  return pkt_hdr_len;



+}



+/*


Selective Acknowledgment (Essential for fast stream transfer)


*/


+void tcp_hole(u32 tcp_seq_num, u32 len, u32 tcp_seq_max)
+{

  unsigned int idx_sack;


  unsigned int sack_end = TCP_SACK - 1;


  unsigned int sack_in;


  unsigned int hill = 0;


  enum pkt_state expect = PKT;



  u32 seq   = tcp_seq_num - tcp_seq_init;


  u32 hol_l = tcp_ack_edge - tcp_seq_init;


  u32 hol_r = 0;



  /* Place new seq number in correct place in receive array */



  if (prev_len == 0)


          prev_len = len;


  idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len);


  if (idx_sack < TCP_SACK) {


          edge_a[idx_sack].se.l = tcp_seq_num;


          edge_a[idx_sack].se.r = tcp_seq_num + len;


          edge_a[idx_sack].st   = PKT;




+/*


The fin (last) packet is not the same length as data packets, and if it's



length is recorded and used for array index calculation, calculation breaks.


*/
          if (prev_len < len)


                  prev_len = len;


  }



  debug_cond(DEBUG_DEV_PKT,


             "TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n",


              seq, hol_l, len, sack_idx, sack_end);



  /* Right edge of contiguous stream, is the left edge of first hill */



  hol_l = tcp_seq_num - tcp_seq_init;


  hol_r = hol_l + len;



  tcp_lost.len = TCP_OPT_LEN_2;



  debug_cond(DEBUG_DEV_PKT,


             "TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n",


             idx_sack, seq, hol_l, hol_r, sack_idx, sack_end);



  for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS;


           sack_in++)  {


          switch (expect) {


          case NOPKT:


                  switch (edge_a[sack_in].st) {


                  case NOPKT:


                          debug_cond(DEBUG_INT_STATE, "N");


                  break;


                  case PKT:


                          debug_cond(DEBUG_INT_STATE, "n");


                                  tcp_lost.hill[hill].l =


                                          edge_a[sack_in].se.l;


                                  tcp_lost.hill[hill].r =


                                          edge_a[sack_in].se.r;


                          expect = PKT;


                  break;


                  }


          break;


          case PKT:


                  switch (edge_a[sack_in].st) {


                  case NOPKT:


                          debug_cond(DEBUG_INT_STATE, "p");


                          if (sack_in > sack_idx &&


                              hill < TCP_SACK_HILLS) {


                                  hill++;


                                  tcp_lost.len += TCP_OPT_LEN_8;


                          }


                          expect = NOPKT;


                  break;


                  case PKT:


                          debug_cond(DEBUG_INT_STATE, "P");



                          if (tcp_ack_edge == edge_a[sack_in].se.l) {


                                  tcp_ack_edge = edge_a[sack_in].se.r;


                                  edge_a[sack_in].st = NOPKT;


                                  sack_idx++;


                          } else {


                                  if (hill < TCP_SACK_HILLS)


                                          tcp_lost.hill[hill].r =


                                                  edge_a[sack_in].se.r;


                          if (sack_in == sack_end - 1)


                                  tcp_lost.hill[hill].r =


                                          edge_a[sack_in].se.r;


                          }


                  break;


                  }


          break;


          }


  }


  debug_cond(DEBUG_INT_STATE, "\n");


  if (tcp_lost.len <= TCP_OPT_LEN_2)


          sack_idx = 0;



+}



+void tcp_parse_options(uchar *o, int o_len)
+{

  struct tcp_t_opt  *tsopt;


  uchar *p = o;



+/*


NOPs are options with a zero length, and thus are special.





All other options have length fields.




*/

  for (p = o; p < (o + o_len); p = p + p[1]) {


          if (p[1] != 0) {


                  switch (p[0]) {


                  case TCP_O_END:


                          return; /* Finished processing options */


                  case TCP_O_MSS:


                  case TCP_O_SCL:


                  case TCP_P_SACK:


                  case TCP_V_SACK:


                          break; /* Continue to process options */


                  case TCP_O_TS:


                          tsopt = (struct tcp_t_opt *)p;


                          rmt_timestamp = tsopt->t_snd;


                          return;


                  break;


                  } /* End switch, process optional NOPs */



                  if (p[0] == TCP_O_NOP)


                          p++;


          } else {


                  return; /* Finished processing options */


          }


  }



+}



+u8 tcp_state_machine(u8 tcp_flags, u32 *tcp_seq_num, int payload_len)
+{

  u8  tcp_fin  = tcp_flags & TCP_FIN;


  u8  tcp_syn  = tcp_flags & TCP_SYN;


  u8  tcp_rst  = tcp_flags & TCP_RST;


  u8  tcp_push = tcp_flags & TCP_PUSH;


  u8  tcp_ack  = tcp_flags & TCP_ACK;


  u8  action   = TCP_DATA;


  int i;



  /*


   * tcp_flags are examined to determine TX action in a given state


   * tcp_push is interpreted to mean "inform the app"


   * urg, ece, cer and nonce flags are not supported.


   *


   * exe and crw are use to signal and confirm knowledge of congestion.


   * This TCP only sends a file request and acks. If it generates


   * congestion, the network is broken.


   */



  debug_cond(DEBUG_INT_STATE, "TCP STATE ENTRY %x\n", action);


  if (tcp_rst) {


          action    = TCP_DATA;


          tcp_state = TCP_CLOSED;


          net_set_state(NETLOOP_FAIL);


          debug_cond(DEBUG_INT_STATE, "TCP Reset %x\n", tcp_flags);


          return TCP_RST;


  }



  switch  (tcp_state) {


  case TCP_CLOSED:


          debug_cond(DEBUG_INT_STATE, "TCP CLOSED %x\n", tcp_flags);


          if (tcp_fin)


                  action = TCP_DATA;


          if (tcp_syn)


                  action = TCP_RST;


          if (tcp_ack)


                  action = TCP_DATA;


          break;


  case TCP_SYN_SENT:


          debug_cond(DEBUG_INT_STATE, "TCP_SYN_SENT %x, %d\n",


                     tcp_flags, *tcp_seq_num);


          if (tcp_fin) {


                  action = action | TCP_PUSH;


                  tcp_state = TCP_CLOSE_WAIT;


          }


          if (tcp_syn) {


                  action = action |  TCP_ACK | TCP_PUSH;


                  if (tcp_ack) {


                          tcp_seq_init          = *tcp_seq_num;


                          *tcp_seq_num          = *tcp_seq_num + 1;


                          tcp_seq_max           = *tcp_seq_num;


                          tcp_ack_edge          = *tcp_seq_num;


                          sack_idx              = 0;


                          edge_a[sack_idx].se.l = *tcp_seq_num;


                          edge_a[sack_idx].se.r = *tcp_seq_num;


                          prev_len              = 0;


                          tcp_state             = TCP_ESTABLISHED;


                          for (i = 0; i < TCP_SACK; i++)


                                  edge_a[i].st   = NOPKT;


                  }


          } else {


                  if (tcp_ack)


                          action = TCP_DATA;


          }


          break;


  case TCP_ESTABLISHED:


          debug_cond(DEBUG_INT_STATE,


                     "TCP_ESTABLISHED %x\n", tcp_flags);


          if (*tcp_seq_num > tcp_seq_max)


                  tcp_seq_max = *tcp_seq_num;


          if (payload_len > 0) {


                  tcp_hole(*tcp_seq_num, payload_len, tcp_seq_max);


                  tcp_fin = TCP_DATA;  /* cause standalone FIN */


          }



          if ((tcp_fin) && tcp_lost.len <= TCP_OPT_LEN_2) {


                  action    = action | TCP_FIN | TCP_PUSH | TCP_ACK;


                  tcp_state =  TCP_CLOSE_WAIT;


          } else {


                  if (tcp_ack)


                          action = TCP_DATA;


          }


          if (tcp_push)


                  action = action | TCP_PUSH;


          if (tcp_syn)


                  action = TCP_ACK + TCP_RST;


          break;


  case TCP_CLOSE_WAIT:


          debug_cond(DEBUG_INT_STATE, "TCP_CLOSE_WAIT (%x)\n", tcp_flags);


          action = TCP_DATA;                      /* Wait for app */


          break;


  case TCP_FIN_WAIT_2:


          debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_2 (%x)\n", tcp_flags);


          if (tcp_fin)


                  action =  TCP_DATA;


          if (tcp_syn)


                  action =  TCP_DATA;


          if (tcp_ack) {


                  action =  TCP_PUSH | TCP_ACK;


                  tcp_state = TCP_CLOSED;


                  puts("\n");


          }


          break;


  case TCP_FIN_WAIT_1:


          debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_1 (%x)\n", tcp_flags);


          if (tcp_fin) {


                  action = TCP_ACK | TCP_FIN;


                   tcp_state = TCP_FIN_WAIT_2;


          }


          if (tcp_syn)


                  action =  TCP_RST;


          if (tcp_ack) {


                  tcp_state = TCP_CLOSED;


                  tcp_seq_num = tcp_seq_num + 1;


          }


          break;


  case TCP_CLOSING:


          debug_cond(DEBUG_INT_STATE, "TCP_CLOSING (%x)\n", tcp_flags);


          if (tcp_fin)


                  action = TCP_DATA;


          if (tcp_syn)


                  action = TCP_RST;


          if (tcp_ack) {


                  action = TCP_PUSH;


                  tcp_state = TCP_CLOSED;


                  puts("\n");


          }


          break;


  }


  return action;



+}



+void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len)
+{

  int tcp_len = pkt_len - IP_HDR_SIZE;


  u16 tcp_rx_xsum = b->ip.hdr.ip_sum;


  u8  tcp_action = TCP_DATA;


  u32 tcp_seq_num;


  u32 tcp_ack_num;


  struct in_addr action_and_state;



  int tcp_hdr_len;


  int payload_len;



  /*


   * Verify IP header


   */


  debug_cond(DEBUG_DEV_PKT,


             "TCP RX in RX Sum (to=%pI4, from=%pI4, len=%d)\n",


                     &b->ip.hdr.ip_src, &b->ip.hdr.ip_dst, pkt_len);



  debug_cond(DEBUG_DEV_PKT,


             "In__________________________________________\n");



  b->ip.hdr.ip_src = net_server_ip;


  b->ip.hdr.ip_dst = net_ip;


  b->ip.hdr.ip_sum = 0x0000;


  if (tcp_rx_xsum != compute_ip_checksum(b, IP_HDR_SIZE)) {


          debug_cond(DEBUG_DEV_PKT,


                     "TCP RX IP xSum Error (%pI4, =%pI4, len=%d)\n",


                     &net_ip, &net_server_ip, pkt_len);


          return;


  }



  /*


   * Build pseudo header and verify TCP header


   */


  tcp_rx_xsum = b->ip.hdr.tcp_xsum;


  b->ip.hdr.tcp_xsum = 0x0000;


  if (tcp_rx_xsum != tcp_set_pseudo_header((uchar *)b, b->ip.hdr.ip_src,


                                           b->ip.hdr.ip_dst, tcp_len,


                                           pkt_len)) {


          debug_cond(DEBUG_DEV_PKT,


                     "TCP RX TCP xSum Error (%pI4, %pI4, len=%d)\n",


                     &net_ip, &net_server_ip, tcp_len);


          return;


  }



  tcp_hdr_len = (b->ip.hdr.tcp_hlen >> 2);


  payload_len = tcp_len - tcp_hdr_len;



  if (tcp_hdr_len > TCP_HDR_SIZE)


          tcp_parse_options((uchar *)b + IP_TCP_HDR_SIZE,


                            tcp_hdr_len - TCP_HDR_SIZE);


  /*


   * Incoming sequence and ack numbers are server's view of the numbers.


   * The app must swap the numbers when responding.


   */



  tcp_seq_num = ntohl(b->ip.hdr.tcp_seq);


  tcp_ack_num = ntohl(b->ip.hdr.tcp_ack);



  /* Packets are not ordered. Send to app as received. */



  tcp_action  = tcp_state_machine(b->ip.hdr.tcp_flags,


                                  &tcp_seq_num, payload_len);



  /*


   * State-altering command to be sent.


   * The packet sequence and ack numbers are in the tcp_seq_num


   * and tcp_ack_num variables. The current packet, its position


   * in the data stream, is the in the range of those variables.


   *


   * In the "application push" invocation, the TCP header with all


   * its information is pointed to by the packet pointer.


   *


   * In the typedef


   *      void rxhand_tcp(uchar *pkt, unsigned int dport,


   *                      struct in_addr sip, unsigned int sport,


   *                      unsigned int len);


   * *pkt is the pointer to the payload


   * dport is used for tcp_seg_num


   * action_and_state.s_addr is used for TCP state


   * sport is used for tcp_ack_num (which is unused by the app)


   * pkt_ length is the payload length.


   *


   * TCP_PUSH from the state machine with a payload length of 0 is a


   * connect or disconnect event


   */



  tcp_activity_count++;


  if (tcp_activity_count > TCP_ACTIVITY) {


          puts("| ");


          tcp_activity_count = 0;


  }



  if ((tcp_action & TCP_PUSH) || payload_len > 0) {


          debug_cond(DEBUG_DEV_PKT,


                     "TCP Notify (action=%x, Seq=%d,Ack=%d,Pay%d)\n",


                     tcp_action, tcp_seq_num, tcp_ack_num, payload_len);



          action_and_state.s_addr = tcp_action;


          (*tcp_packet_handler) ((uchar *)b + pkt_len - payload_len,


                                 tcp_seq_num, action_and_state,


                                 tcp_ack_num, payload_len);



  } else if (tcp_action != TCP_DATA) {


          debug_cond(DEBUG_DEV_PKT,


                     "TCP Action (action=%x,Seq=%d,Ack=%d,Pay=%d)\n",


                     tcp_action, tcp_seq_num, tcp_ack_num, payload_len);



          /*


           * Warning: Incoming Ack & Seq sequence numbers are transposed


           * here to outgoing Seq & Ack sequence numbers


           */


          net_send_tcp_packet(0, ntohs(b->ip.hdr.tcp_src),


                              ntohs(b->ip.hdr.tcp_dst),


                              (tcp_action & (~TCP_PUSH)),


                              tcp_seq_num, tcp_ack_num);


  }



+}
Overall this does not look too bad - hopefully it will land during this year.
-- 
greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info