[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
RE: [PATCH for 8.0] igb: Save more Tx states
From: |
Sriram Yagnaraman |
Subject: |
RE: [PATCH for 8.0] igb: Save more Tx states |
Date: |
Tue, 21 Mar 2023 11:05:32 +0000 |
> -----Original Message-----
> From: qemu-devel-bounces+sriram.yagnaraman=est.tech@nongnu.org
> <qemu-devel-bounces+sriram.yagnaraman=est.tech@nongnu.org> On Behalf
> Of Sriram Yagnaraman
> Sent: Friday, 17 March 2023 16:26
> To: Akihiko Odaki <akihiko.odaki@daynix.com>
> Cc: qemu-devel@nongnu.org; Jason Wang <jasowang@redhat.com>; Dmitry
> Fleytman <dmitry.fleytman@gmail.com>; quintela@redhat.com; Philippe
> Mathieu-Daudé <philmd@linaro.org>
> Subject: RE: [PATCH for 8.0] igb: Save more Tx states
>
>
> > -----Original Message-----
> > From: Akihiko Odaki <akihiko.odaki@daynix.com>
> > Sent: Friday, 17 March 2023 15:21
> > To: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> > Cc: qemu-devel@nongnu.org; Jason Wang <jasowang@redhat.com>; Dmitry
> > Fleytman <dmitry.fleytman@gmail.com>; quintela@redhat.com; Philippe
> > Mathieu-Daudé <philmd@linaro.org>
> > Subject: Re: [PATCH for 8.0] igb: Save more Tx states
> >
> > On 2023/03/17 22:08, Sriram Yagnaraman wrote:
> > >
> > >
> > >> -----Original Message-----
> > >> From: Akihiko Odaki <akihiko.odaki@daynix.com>
> > >> Sent: Friday, 17 March 2023 13:25
> > >> Cc: qemu-devel@nongnu.org; Jason Wang <jasowang@redhat.com>;
> > Dmitry
> > >> Fleytman <dmitry.fleytman@gmail.com>; quintela@redhat.com; Philippe
> > >> Mathieu-Daudé <philmd@linaro.org>; Sriram Yagnaraman
> > >> <sriram.yagnaraman@est.tech>; Akihiko Odaki
> > >> <akihiko.odaki@daynix.com>
> > >> Subject: [PATCH for 8.0] igb: Save more Tx states
> > >>
> > >> The current implementation of igb uses only part of a advanced Tx
> > >> context descriptor and first data descriptor because it misses some
> > >> features and sniffs the trait of the packet instead of respecting
> > >> the packet type specified in the descriptor. However, we will
> > >> certainly need the entire Tx context descriptor when we update igb
> > >> to respect these ignored fields. Save the entire context descriptor
> > >> and first data descriptor except the buffer address to prepare for such a
> change.
> > >>
> > >> This also introduces the distinction of contexts with different
> > >> indexes, which was not present in e1000e but in igb.
> > >>
> > >> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> > >> ---
> > >> Supersedes: <20230316155707.27007-1-akihiko.odaki@daynix.com>
> > >>
> > >> hw/net/igb.c | 25 ++++++++++++++++++-------
> > >> hw/net/igb_core.c | 36 +++++++++++++++++++-----------------
> > >> hw/net/igb_core.h | 8 +++-----
> > >> 3 files changed, 40 insertions(+), 29 deletions(-)
> > >>
> > >> diff --git a/hw/net/igb.c b/hw/net/igb.c index
> > >> c6d753df87..7c05896325
> > >> 100644
> > >> --- a/hw/net/igb.c
> > >> +++ b/hw/net/igb.c
> > >> @@ -502,16 +502,27 @@ static int igb_post_load(void *opaque, int
> > >> version_id)
> > >> return igb_core_post_load(&s->core); }
> > >>
> > >> -static const VMStateDescription igb_vmstate_tx = {
> > >> - .name = "igb-tx",
> > >> +static const VMStateDescription igb_vmstate_tx_ctx = {
> > >> + .name = "igb-tx-ctx",
> > >> .version_id = 1,
> > >> .minimum_version_id = 1,
> > >> .fields = (VMStateField[]) {
> > >> - VMSTATE_UINT16(vlan, struct igb_tx),
> > >> - VMSTATE_UINT16(mss, struct igb_tx),
> > >> - VMSTATE_BOOL(tse, struct igb_tx),
> > >> - VMSTATE_BOOL(ixsm, struct igb_tx),
> > >> - VMSTATE_BOOL(txsm, struct igb_tx),
> > >> + VMSTATE_UINT32(vlan_macip_lens, struct
> > e1000_adv_tx_context_desc),
> > >> + VMSTATE_UINT32(seqnum_seed, struct
> e1000_adv_tx_context_desc),
> > >> + VMSTATE_UINT32(type_tucmd_mlhl, struct
> > >> e1000_adv_tx_context_desc),
> > >> + VMSTATE_UINT32(mss_l4len_idx, struct
> e1000_adv_tx_context_desc),
> > >> + }
> > >> +};
> > >> +
> > >> +static const VMStateDescription igb_vmstate_tx = {
> > >> + .name = "igb-tx",
> > >> + .version_id = 2,
> > >> + .minimum_version_id = 2,
> > >> + .fields = (VMStateField[]) {
> > >> + VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0,
> igb_vmstate_tx_ctx,
> > >> + struct e1000_adv_tx_context_desc),
> > >> + VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
> > >> + VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
> > >> VMSTATE_BOOL(first, struct igb_tx),
> > >> VMSTATE_BOOL(skip_cp, struct igb_tx),
> > >> VMSTATE_END_OF_LIST()
> > >> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> > >> a7c7bfdc75..36027c2b54 100644
> > >> --- a/hw/net/igb_core.c
> > >> +++ b/hw/net/igb_core.c
> > >> @@ -389,8 +389,10 @@ igb_rss_parse_packet(IGBCore *core, struct
> > >> NetRxPkt *pkt, bool tx, static bool igb_setup_tx_offloads(IGBCore
> > >> *core, struct igb_tx
> > >> *tx) {
> > >> - if (tx->tse) {
> > >> - if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss))
> > >> {
> > >> + if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
> > >> + uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
> > >
> > > [...] More below
> > >
> > >> + uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
> > >> + if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true,
> > >> + mss)) {
> > >> return false;
> > >> }
> > >>
> > >> @@ -399,13 +401,13 @@ igb_setup_tx_offloads(IGBCore *core, struct
> > >> igb_tx
> > >> *tx)
> > >> return true;
> > >> }
> > >>
> > >> - if (tx->txsm) {
> > >> + if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
> > >> if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
> > >> return false;
> > >> }
> > >> }
> > >>
> > >> - if (tx->ixsm) {
> > >> + if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
> > >> net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
> > >> }
> > >>
> > >> @@ -527,7 +529,7 @@ igb_process_tx_desc(IGBCore *core, {
> > >> struct e1000_adv_tx_context_desc *tx_ctx_desc;
> > >> uint32_t cmd_type_len;
> > >> - uint32_t olinfo_status;
> > >> + uint32_t idx;
> > >> uint64_t buffer_addr;
> > >> uint16_t length;
> > >>
> > >> @@ -538,20 +540,19 @@ igb_process_tx_desc(IGBCore *core,
> > >> E1000_ADVTXD_DTYP_DATA) {
> > >> /* advanced transmit data descriptor */
> > >> if (tx->first) {
> > >> - olinfo_status =
> > >> le32_to_cpu(tx_desc->read.olinfo_status);
> > >> -
> > >> - tx->tse = !!(cmd_type_len & E1000_ADVTXD_DCMD_TSE);
> > >> - tx->ixsm = !!(olinfo_status & E1000_ADVTXD_POTS_IXSM);
> > >> - tx->txsm = !!(olinfo_status & E1000_ADVTXD_POTS_TXSM);
> > >> -
> > >> + tx->first_cmd_type_len = cmd_type_len;
> > >> + tx->first_olinfo_status =
> > >> + le32_to_cpu(tx_desc->read.olinfo_status);
> > >> tx->first = false;
> > >> }
> > >> } else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) ==
> > >> E1000_ADVTXD_DTYP_CTXT) {
> > >> /* advanced transmit context descriptor */
> > >> tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
> > >> - tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
> > >> - tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
> > >> + idx = (tx_ctx_desc->mss_l4len_idx >> 4) & 1;
> > >
> > > I do not know if there are any other drivers that use more than 2
> > > contexts,
> > but as I read 7.2.2.2.11 IDX (3), IDX is a 3 bit field.
> > > The above line will interpret 3, 5 and 7 as 1 for e.g.
> >
> > 7.2.1.4 "Transmit Contexts" says:
> > > The 82576 supports 32 context register sets on-chip (two per queue)
> >
> > DPDK also uses only two contexts while its design is extensible and
> > can use more if the hardware allows. Therefore, it can be concluded
> > that the device actually has only two contexts.
> >
> > I don't know why IDX is defined as 3-bit field, but I think it's safe
> > to ignore the other two bits as we do for the other reserved bits.
>
> 7.2.1.4 also clearly specifies that "The IDX field contains an index to one
> of the
> two queue contexts".
> Thanks for replying to my comments.
>
> >
> > >
> > >> + tx->ctx[idx].vlan_macip_lens =
> > >> + le32_to_cpu(tx_ctx_desc-
> > >>> vlan_macip_lens);
> > >> + tx->ctx[idx].seqnum_seed = le32_to_cpu(tx_ctx_desc-
> > >seqnum_seed);
> > >> + tx->ctx[idx].type_tucmd_mlhl =
> > >> + le32_to_cpu(tx_ctx_desc-
> > >>> type_tucmd_mlhl);
> > >> + tx->ctx[idx].mss_l4len_idx =
> > >> + le32_to_cpu(tx_ctx_desc->mss_l4len_idx);
> > >> return;
> > >> } else {
> > >> /* unknown descriptor type */ @@ -575,8 +576,10 @@
> > >> igb_process_tx_desc(IGBCore *core,
> > >> if (cmd_type_len & E1000_TXD_CMD_EOP) {
> > >> if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
> > >> if (cmd_type_len & E1000_TXD_CMD_VLE) {
> > >> - net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
> > >> - core->mac[VET] & 0xffff);
> > >> + idx = (tx->first_olinfo_status >> 4) & 1;
> > >> + uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
> > >> + uint16_t vet = core->mac[VET] & 0xffff;
> > >> + net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
> > >> + vet);
> > >> }
> > >> if (igb_tx_pkt_send(core, tx, queue_index)) {
> > >> igb_on_tx_done_update_stats(core, tx->tx_pkt); @@
> > >> -4024,8
> > >> +4027,7 @@ static void igb_reset(IGBCore *core, bool sw)
> > >> for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
> > >> tx = &core->tx[i];
> > >> net_tx_pkt_reset(tx->tx_pkt);
> > >> - tx->vlan = 0;
> > >> - tx->mss = 0;
> > >> + memset(&tx->ctx, 0, sizeof(tx->ctx));
> > >> tx->tse = false;
> > >> tx->ixsm = false;
> > >> tx->txsm = false;
BTW, just noticed you will have to remove the above 3 lines as well. Doesn't
compile otherwise.
> > >> diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h index
> > >> 814c1e264b..8914e0b801 100644
> > >> --- a/hw/net/igb_core.h
> > >> +++ b/hw/net/igb_core.h
> > >> @@ -72,11 +72,9 @@ struct IGBCore {
> > >> QEMUTimer *autoneg_timer;
> > >>
> > >> struct igb_tx {
> > >> - uint16_t vlan; /* VLAN Tag */
> > >> - uint16_t mss; /* Maximum Segment Size */
> > >> - bool tse; /* TCP/UDP Segmentation Enable */
> > >> - bool ixsm; /* Insert IP Checksum */
> > >> - bool txsm; /* Insert TCP/UDP Checksum */
> > >> + struct e1000_adv_tx_context_desc ctx[2];
> > >> + uint32_t first_cmd_type_len;
> > >> + uint32_t first_olinfo_status;
> > >>
> > >> bool first;
> > >> bool skip_cp;
> > >> --
> > >> 2.39.2
> > >
>
> LGTM
> Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>