[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 3/3] pxenet: process transmit interrupts when out of resource
From: |
Andrei Borzenkov |
Subject: |
Re: [PATCH 3/3] pxenet: process transmit interrupts when out of resources |
Date: |
Sat, 12 Mar 2016 09:20:49 +0300 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.6.0 |
11.03.2016 19:28, Josef Bacik пишет:
> We were hitting a problem in production where our bios based machines would
> sometimes timeout while pulling down either the kernel/initrd. It turned out
> that sometimes we'd run out of transmit buffers and would then error out while
> sending packets. This is because we don't actually have an interrupt handler
> in
> PXE, we just poll the card when we want to receive. This works most of the
> time, but sometimes we end up with too many transmit interrupts pending and
> then
> we can't add new packets to the transmit buffers.
>
> So rework the whole ISR logic to be able to be called from both transmit and
> receive. If we get OUT_OF_RESOURCES while trying to transmit then we can go
> through and process the interrupts and hope that leaves us with space to retry
> the transmit. Unfortunately this puts us in a place where we can trip over a
> RECEIVE interrupt, so we have to process that interrupt and leave a netbuff
> behind for the next call into recv.
>
> With this patch we are now able to properly provision boxes suffering from
> this
> problem. Thanks,
>
> Signed-off-by: Josef Bacik <address@hidden>
> ---
> grub-core/net/drivers/i386/pc/pxe.c | 155
> +++++++++++++++++++++++++-----------
> 1 file changed, 108 insertions(+), 47 deletions(-)
>
> diff --git a/grub-core/net/drivers/i386/pc/pxe.c
> b/grub-core/net/drivers/i386/pc/pxe.c
> index 3f4152d..57445b7 100644
> --- a/grub-core/net/drivers/i386/pc/pxe.c
> +++ b/grub-core/net/drivers/i386/pc/pxe.c
> @@ -166,16 +166,11 @@ grub_pxe_scan (void)
> return bangpxe;
> }
>
> -static struct grub_net_buff *
> -grub_pxe_recv (struct grub_net_card *dev __attribute__ ((unused)))
> -{
> - struct grub_pxe_undi_isr *isr;
> - static int in_progress = 0;
> - grub_uint8_t *ptr, *end;
> - struct grub_net_buff *buf;
> -
> - isr = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> +static int in_progress = 0;
>
You need to reset it in ->open or ->close.
> +static void
> +grub_pxe_process_isr (struct grub_pxe_undi_isr *isr)
> +{
> if (!in_progress)
> {
> grub_memset (isr, 0, sizeof (*isr));
> @@ -186,13 +181,14 @@ grub_pxe_recv (struct grub_net_card *dev __attribute__
> ((unused)))
> breaks on intel cards.
> */
> if (isr->status)
> - {
> - in_progress = 0;
> - return NULL;
> + {
> + grub_dprintf("net", "problem pulling isr %d\n", (int)isr->status);
"pxe" would be better for dedicated debugging, "net" is too generic. We
can also set debug="pxe net" if needed. Also PXE spec lists status
values as hex, would be better to print it this way too.
> + return;
> }
> grub_memset (isr, 0, sizeof (*isr));
> isr->func_flag = GRUB_PXE_ISR_IN_PROCESS;
> grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> + in_progress = 1;
> }
> else
> {
> @@ -200,18 +196,13 @@ grub_pxe_recv (struct grub_net_card *dev __attribute__
> ((unused)))
> isr->func_flag = GRUB_PXE_ISR_IN_GET_NEXT;
> grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> }
> +}
>
> - while (isr->func_flag != GRUB_PXE_ISR_OUT_RECEIVE)
> - {
> - if (isr->status || isr->func_flag == GRUB_PXE_ISR_OUT_DONE)
> - {
> - in_progress = 0;
> - return NULL;
> - }
> - grub_memset (isr, 0, sizeof (*isr));
> - isr->func_flag = GRUB_PXE_ISR_IN_GET_NEXT;
> - grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> - }
> +static struct grub_net_buff *
> +grub_pxe_make_net_buff (struct grub_pxe_undi_isr *isr)
> +{
> + struct grub_net_buff *buf;
> + grub_uint8_t *ptr, *end;
>
> buf = grub_netbuff_alloc (isr->frame_len + 2);
> if (!buf)
> @@ -230,48 +221,118 @@ grub_pxe_recv (struct grub_net_card *dev __attribute__
> ((unused)))
> ptr += isr->buffer_len;
> while (ptr < end)
> {
> - grub_memset (isr, 0, sizeof (*isr));
> - isr->func_flag = GRUB_PXE_ISR_IN_GET_NEXT;
> - grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> + grub_pxe_process_isr (isr);
> if (isr->status || isr->func_flag != GRUB_PXE_ISR_OUT_RECEIVE)
> {
> - in_progress = 1;
> + grub_dprintf("net", "half processed packet\n");
> grub_netbuff_free (buf);
> return NULL;
> }
> -
> grub_memcpy (ptr, LINEAR (isr->buffer), isr->buffer_len);
> ptr += isr->buffer_len;
> }
> - in_progress = 1;
> -
> return buf;
> }
>
> +static struct grub_net_buff *
> +grub_pxe_recv (struct grub_net_card *dev)
> +{
> + struct grub_pxe_undi_isr *isr;
> +
> + if (dev->data)
> + {
> + struct grub_net_buff *buf = dev->data;
> + grub_dprintf("net", "Pulling existing receive packet\n");
> + dev->data = NULL;
> + return buf;
> + }
> +
> + isr = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> + grub_pxe_process_isr (isr);
> + while (isr->func_flag != GRUB_PXE_ISR_OUT_RECEIVE)
> + {
> + if (isr->status || isr->func_flag == GRUB_PXE_ISR_OUT_DONE)
> + {
> + if (isr->status)
> + grub_dprintf("net", "error pulling next %d\n", (int)isr->status);
> + in_progress = 0;
> + return 0;
> + }
> + grub_memset (isr, 0, sizeof (*isr));
> + isr->func_flag = GRUB_PXE_ISR_IN_GET_NEXT;
> + grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> + }
> + return grub_pxe_make_net_buff (isr);
> +}
> +
> static grub_err_t
> -grub_pxe_send (struct grub_net_card *dev __attribute__ ((unused)),
> - struct grub_net_buff *pack)
> +grub_pxe_send (struct grub_net_card *dev, struct grub_net_buff *pack)
> {
> struct grub_pxe_undi_transmit *trans;
> struct grub_pxe_undi_tbd *tbd;
> char *buf;
>
> - trans = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> - grub_memset (trans, 0, sizeof (*trans));
> - tbd = (void *) (GRUB_MEMORY_MACHINE_SCRATCH_ADDR + 128);
> - grub_memset (tbd, 0, sizeof (*tbd));
> - buf = (void *) (GRUB_MEMORY_MACHINE_SCRATCH_ADDR + 256);
> - grub_memcpy (buf, pack->data, pack->tail - pack->data);
> -
> - trans->tbd = SEGOFS ((grub_addr_t) tbd);
> - trans->protocol = 0;
> - tbd->len = pack->tail - pack->data;
> - tbd->buf = SEGOFS ((grub_addr_t) buf);
> -
> - grub_pxe_call (GRUB_PXENV_UNDI_TRANSMIT, trans, pxe_rm_entry);
> - if (trans->status)
> - return grub_error (GRUB_ERR_IO, N_("couldn't send network packet"));
> + while (1)
> + {
> + int made_progress = 0;
> +
> + trans = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> + grub_memset (trans, 0, sizeof (*trans));
> + tbd = (void *) (GRUB_MEMORY_MACHINE_SCRATCH_ADDR + 128);
> + grub_memset (tbd, 0, sizeof (*tbd));
> + buf = (void *) (GRUB_MEMORY_MACHINE_SCRATCH_ADDR + 256);
> + grub_memcpy (buf, pack->data, pack->tail - pack->data);
> +
> + trans->tbd = SEGOFS ((grub_addr_t) tbd);
> + trans->protocol = 0;
> + tbd->len = pack->tail - pack->data;
> + tbd->buf = SEGOFS ((grub_addr_t) buf);
> +
> + grub_pxe_call (GRUB_PXENV_UNDI_TRANSMIT, trans, pxe_rm_entry);
> + if (!trans->status)
> + break;
This looks like the only terminating condition. What if hardware is
stuck and cannot make progress anymore and continues to return error here?
> + if (trans->status == GRUB_PXENV_STATUS_OUT_OF_RESOURCES)
> + {
> + struct grub_pxe_undi_isr *isr;
> +
> + grub_dprintf("net", "Out of transmit buffers, processing "
> + "interrupts\n");
> + /* Process any outstanding transmit interrupts. */
> + isr = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> + do
> + {
> + grub_pxe_process_isr (isr);
> + if (isr->status)
> + {
> + grub_dprintf("net", "process interrupts errored %d,"
> + "made_progress %d\n", (int)isr->status,
> made_progress);
> + if (made_progress)
> + break;
> + else
> + goto out_err;
> + }
> + if (isr->func_flag == GRUB_PXE_ISR_OUT_DONE)
> + in_progress = 0;
> + made_progress = 1;
> + } while (isr->func_flag == GRUB_PXE_ISR_OUT_TRANSMIT);
> +
> + /* If we had a receive interrupt in the queue we need to copy this
> + buffer out otherwise we'll lose it. */
> + if (isr->func_flag == GRUB_PXE_ISR_OUT_RECEIVE)
It probably should check isr->status, we come here also if we got an
error previously. Such packets are ignored everywhere else.
> + {
> + if (dev->data)
> + grub_dprintf("net", "dropping packet, already have a "
> + "pending packet.\n");
> + else
> + dev->data = grub_pxe_make_net_buff (isr);
> + }
> + }
> + else
> + goto out_err;
> + }
> return 0;
> +out_err:
> + return grub_error (GRUB_ERR_IO, N_("couldn't send network packet"));
> }
>
> static void
>
- [PATCH 1/3] push/pop errno in initrd read file path, Josef Bacik, 2016/03/11
- [PATCH 2/3] tcp: add a dprintf for opening tcp connections, Josef Bacik, 2016/03/11
- [PATCH 3/3] pxenet: process transmit interrupts when out of resources, Josef Bacik, 2016/03/11
- Re: [PATCH 3/3] pxenet: process transmit interrupts when out of resources,
Andrei Borzenkov <=
- Re: [PATCH 1/3] push/pop errno in initrd read file path, Vladimir 'phcoder' Serbinenko, 2016/03/11
- Re: [PATCH 1/3] push/pop errno in initrd read file path, Josef Bacik, 2016/03/11
- Re: [PATCH 1/3] push/pop errno in initrd read file path, Vladimir 'phcoder' Serbinenko, 2016/03/11
- Re: [PATCH 1/3] push/pop errno in initrd read file path, Josef Bacik, 2016/03/11
- Re: [PATCH 1/3] push/pop errno in initrd read file path, Vladimir 'phcoder' Serbinenko, 2016/03/11
- Re: [PATCH 1/3] push/pop errno in initrd read file path, Andrei Borzenkov, 2016/03/12
- Re: [PATCH 1/3] push/pop errno in initrd read file path, Vladimir 'phcoder' Serbinenko, 2016/03/15