lwip-users
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[lwip-users] Stack corruption with high load tcp raw api


From: Erik Ekman
Subject: [lwip-users] Stack corruption with high load tcp raw api
Date: Wed, 14 Nov 2007 11:22:04 +0100

Hi there

I am having more problems with my tcp raw program connecting to a port
and sending some data, with lwIP 1.2.0. When I stress the system and
request a new connection every 100 ms things crash after a while with
a strange stack corruption. I get the same errors both on target with
lots of interrupts running and when emulating on Linux with a few
pthreads. I use a very simple os with coop multitasking and message
passing.

I now register the error callback only if the tcp_bind and tcp_connect
calls are successful, to avoid doing any double frees. My guess is
that I should not call tcp_close if bind or connect failed, but that
may be wrong.

If i send every two seconds instead it works well, but then the struct
alloced in signal_send gets the same address each time, so the heap
isnt fully used.

My tcp sender code:

#include <nosys.h>
#include <debug.h>
#include "lwip/tcp.h"
#include <basic_signal.h>

struct signal_state
{
  struct tcp_pcb *pcb;
  char *data;
  int size;
};

const char tarta[] = "tårta";

void signal_err(void *arg, err_t err);
err_t signal_connected(void *arg, struct tcp_pcb *pcb, err_t err);
err_t signal_sent(void *arg, struct tcp_pcb *pcb, u16_t len);

void signal_send(void)
{
  struct nosys_msg *m;
  struct signal_state *ss;
  struct tcp_pcb *pcb;
  struct ip_addr target;
  err_t err;

  m = nosys_getmsg();
  ASSERT(m);

  ss = mem_malloc(sizeof(struct signal_state));
  if (ss != NULL) {
    if (m->type == NOSYS_MSG_TIMER) {
      ss->data = (char *) tarta;
      ss->size = sizeof(tarta);
    } else if (m->type == NOSYS_MSG_SIGNAL_STRING) {
      ss->data = (char *) m->ptr;
      ss->size = m->data;
    } else { // Unknown message type
      nosys_delmsg(m);
      mem_free(ss);
      return;
    }
  }

  DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "ss alloced %p", ss);

  nosys_delmsg(m);

  pcb = tcp_new();
  if (pcb == NULL) {
    DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "ss free %p nopcb", ss);
    mem_free(ss);
    return;
  }


  ss->pcb = pcb;

  tcp_arg(pcb, ss);         // Register ss struct as user data for this pcb
  tcp_recv(pcb, NULL);
  tcp_poll(pcb, NULL, 0);
  err = tcp_bind(pcb, NULL, 0);   // Bind to any port on external IP
  if (err != ERR_OK)
      goto error;

  IP4_ADDR(&target, 172, 30, 51, 14);
  err = tcp_connect(pcb, &target, 8080, signal_connected);
  if (err != ERR_OK)
      goto error;

  // If all went well, register an error callback for other errors
  tcp_err(pcb, signal_err);
  return;

  error:
  DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "Aborting signal_send, got
error %d =======================================", err);
  DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "ss free %p", ss);
  mem_free(ss);
  //tcp_close(pcb);
}

void signal_err(void *arg, err_t err)
{
  struct signal_state *ss;
  ss= (struct signal_state *) arg;


  if (ss) {
    DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "ss free %p some err
%d", ss, err);
    mem_free(ss);
  }

}

err_t signal_connected(void *arg, struct tcp_pcb *pcb, err_t err)
{
  struct signal_state *ss;
  err_t senderr;

  ss= (struct signal_state *) arg;

  DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "ss connect %p", ss);

  tcp_sent(pcb, signal_sent);
  senderr = tcp_write(pcb, ss->data, ss->size, TRUE);
  if (senderr != ERR_OK)
    goto error;

  return ERR_OK;

  error:
    DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "Aborting
signal_connected, got error %d
++++++++++++++++++++++++++++++++++++++++", err);
    DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "ss free %p write err", ss);
    mem_free(ss);
    return senderr;
}

err_t signal_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
{
  struct signal_state *ss;
  ss= (struct signal_state *) arg;

  DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "ss sent %p", ss);

  tcp_sent(pcb, NULL);
  tcp_recv(pcb, NULL);
  tcp_err(pcb, NULL);
  tcp_poll(pcb, NULL, 0);

  if (ss) {
    DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "ss free %p sent", ss);
    mem_free(ss);
  }

  tcp_close(pcb);

  return ERR_OK;
}

My lwipopts are:
#define LWIP_TCP 1
#define LWIP_DHCP 1

#define NO_SYS 1

#define IP_REASSEMBLY 0
#define ARP_QUEUEING 0

#define ARP_TABLE_SIZE 4
#define PBUF_POOL_SIZE 13
#define MEMP_NUM_PBUF 13
#define MEMP_NUM_TCP_SEG 8
#define MEMP_NUM_TCP_PCB_LISTEN 2
#define MEMP_NUM_TCP_PCB 15
#define MEMP_NUM_UDP_PCB 1
#define MEMP_NUM_RAW_PCB 2
#define MEM_SIZE 2000

I get the same result regardless of what I set MEMP_NUM_TCP_PCB to.
An example of the crash in Linux:

Program received signal SIGSEGV, Segmentation fault.
0x0804ecda in pbuf_clen (p=0x80260) at ../../lwip/src/core/pbuf.c:637
637         p = p->next;
(gdb) bt
#0  0x0804ecda in pbuf_clen (p=0x80260) at ../../lwip/src/core/pbuf.c:637
#1  0x0805217e in tcp_receive (pcb=0x805ef44) at
../../lwip/src/core/tcp_in.c:769
#2  0x080519e5 in tcp_process (pcb=0x805ef44) at
../../lwip/src/core/tcp_in.c:558
#3  0x08050feb in tcp_input (p=0x805f848, inp=0x8062a40) at
../../lwip/src/core/tcp_in.c:272
#4  0x08055760 in ip_input (p=0x805f848, inp=0x8062a40) at
../../lwip/src/core/ipv4/ip.c:345
#5  0x0805b176 in tapif_handle (netif=0x8062a40, p=0x805f848) at
../../lwip/src/netif/tapif.c:412
#6  0x0805b07a in tapif_packet (netif=0x8062a40, p=0x805f848) at
../../lwip/src/netif/tapif.c:340
#7  0x080567b4 in lwip_tick () at ../../lwip/src/lwipthread.c:63
#8  0x0804b250 in nosys_run () at ../../generic/nosys/nosys.c:125
#9  0x08048fcf in app_start () at app_start.c:333
#10 0x0804900d in sys_boot () at ../../generic/boot.c:64
#11 0x0805a71d in main () at ../../arch/linux/cli/linux_cli.c:33
(gdb) list
632         if ((unsigned) p < 0x7fe00000 || (unsigned) p > 0x7fe03fff) {
633           DEBUGF_LN(SIGNAL_MSG_DBG, DBG_LEVEL_INFO, "We are dead
======================================= %p", p);
634           target_halt();
635         }
636     #endif
637         p = p->next;
638       }
639       return len;
640     }
641
(gdb) up
#1  0x0805217e in tcp_receive (pcb=0x805ef44) at
../../lwip/src/core/tcp_in.c:769
769             pcb->snd_queuelen -= pbuf_clen(next->p);
(gdb) list
764
765             next = pcb->unacked;
766             pcb->unacked = pcb->unacked->next;
767
768             LWIP_DEBUGF(TCP_QLEN_DEBUG, "tcp_receive: queuelen
%"U16_F" ... ", (u16_t)pcb->snd_queuelen);
769             pcb->snd_queuelen -= pbuf_clen(next->p);
770             tcp_seg_free(next);
771
772             LWIP_DEBUGF(TCP_QLEN_DEBUG, "%"U16_F" (after freeing
unacked)\n", (u16_t)pcb->snd_queuelen);
773             if (pcb->snd_queuelen != 0) {
(gdb) print next
$2 = (struct tcp_seg *) 0x805f5e8
(gdb) print *next
$3 = {next = 0x0, p = 0x805e48c, dataptr = 0x805e4d4, len = 6, tcphdr
= 0x805e4c0}
(gdb)

So the next->p pointer is 0x805e48c before the call to pbuf_clen, and
when it reaches the function is has changed to 0x80260. The address it
is changed to is not totally random and usually this value, both on
target and in Linux.

The lwip stack sometimes crashes on other places as well, but this is
the most frequent. I have lots of checks on target that the stack is
within bounds, but since I get the same error when emulating it must
be some other error.

Any ideas? I am thinking about trying the CVS HEAD, but I think it
just is a error in my raw api usage.

/Erik




reply via email to

[Prev in Thread] Current Thread [Next in Thread]