qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3 2/2] igb: packet-split descriptors support


From: Tomasz Dzieciol
Subject: [PATCH v3 2/2] igb: packet-split descriptors support
Date: Thu, 27 Apr 2023 12:47:43 +0200

Packet-split descriptors are used by Linux VF driver for MTU values from 2048
---
 hw/net/igb_core.c | 300 +++++++++++++++++++++++++++++++++++++++++-----
 hw/net/igb_regs.h |   6 +
 2 files changed, 276 insertions(+), 30 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 1cb64402aa..6abb152d51 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -282,6 +282,14 @@ igb_rx_queue_desctyp_get(IGBCore *core, const 
E1000E_RingInfo *r)
     return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK;
 }
 
+static bool
+igb_rx_use_ps_descriptor(IGBCore *core, const E1000E_RingInfo *r)
+{
+    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT ||
+           desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
 static inline bool
 igb_rss_enabled(IGBCore *core)
 {
@@ -1239,21 +1247,70 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct 
e1000_rx_desc *desc,
 }
 
 static inline void
-igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
-                      hwaddr *buff_addr)
+igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+                                 hwaddr *buff_addr)
 {
     *buff_addr = le64_to_cpu(desc->read.pkt_addr);
 }
 
 static inline void
-igb_read_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
-                  hwaddr *buff_addr)
+igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+                                hwaddr *buff_addr)
+{
+    buff_addr[0] = le64_to_cpu(desc->read.hdr_addr);
+    buff_addr[1] = le64_to_cpu(desc->read.pkt_addr);
+}
+
+typedef struct IGB_BaState_st {
+    uint16_t written[MAX_PS_BUFFERS];
+    uint8_t cur_idx;
+} IGB_BaState;
+
+typedef struct IGB_PacketRxDMAState_st {
+    size_t size;
+    size_t total_size;
+    size_t ps_hdr_len;
+    size_t desc_size;
+    size_t desc_offset;
+    uint32_t rx_desc_packet_buf_size;
+    uint32_t rx_desc_header_buf_size;
+    struct iovec *iov;
+    size_t iov_ofs;
+    bool do_ps;
+    bool is_first;
+    IGB_BaState bastate;
+    hwaddr ba[MAX_PS_BUFFERS];
+} IGB_PacketRxDMAState;
+
+static inline void
+igb_read_rx_descr(IGBCore *core,
+                  union e1000_rx_desc_union *desc,
+                  IGB_PacketRxDMAState *pdma_st,
+                  const E1000E_RingInfo *r)
 {
+    uint32_t desc_type;
+
     if (igb_rx_use_legacy_descriptor(core)) {
-        igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr);
-    } else {
-        igb_read_adv_rx_descr(core, &desc->adv, buff_addr);
+        igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]);
+        pdma_st->ba[0] = 0;
+        return;
+    }
+
+    /* advanced header split descriptor */
+    if (igb_rx_use_ps_descriptor(core, r)) {
+        igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]);
+        return;
     }
+
+    /* descriptor replication modes not supported */
+    desc_type = igb_rx_queue_desctyp_get(core, r);
+    if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) {
+        trace_igb_wrn_rx_desc_modes_not_supp(desc_type);
+    }
+
+    /* advanced single buffer descriptor */
+    igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]);
+    pdma_st->ba[0] = 0;
 }
 
 static void
@@ -1397,6 +1454,13 @@ igb_write_lgcy_rx_descr(IGBCore *core,
     desc->status = (uint8_t) le32_to_cpu(status_flags);
 }
 
+static bool
+igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000E_RingInfo *r)
+{
+    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
 static uint16_t
 igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf)
 {
@@ -1483,6 +1547,49 @@ igb_write_adv_rx_descr(IGBCore *core,
     d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4));
 }
 
+typedef struct IGB_SplitDescriptorData_st {
+    bool sph;
+    bool hbo;
+    size_t hdr_len;
+} IGB_SplitDescriptorData;
+
+static inline void
+igb_write_adv_ps_split_rx_descr(IGBCore *core,
+                                union e1000_adv_rx_desc *d,
+                                struct NetRxPkt *pkt,
+                                const E1000E_RSSInfo *rss_info,
+                                const E1000E_RingInfo *r,
+                                uint16_t etqf,
+                                bool ts,
+                                IGB_SplitDescriptorData *ps_desc_data,
+                                uint16_t(*written)[MAX_PS_BUFFERS])
+{
+    size_t pkt_len;
+    size_t hdr_len = ps_desc_data->hdr_len;
+
+    bool split_always = igb_rx_ps_descriptor_split_always(core, r);
+    if (!split_always) {
+        if ((!ps_desc_data->sph && !ps_desc_data->hbo) ||
+            ( ps_desc_data->sph &&  ps_desc_data->hbo)) {
+            pkt_len = (*written)[0] + (*written)[1];
+        } else {
+            assert(!ps_desc_data->hbo);
+            pkt_len = (*written)[1];
+        }
+    } else {
+        pkt_len = (*written)[1];
+    }
+
+    igb_write_adv_rx_descr(core, d, pkt, rss_info, etqf, ts, pkt_len);
+
+    d->wb.lower.lo_dword.hdr_info = (hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) &
+                                    E1000_ADVRXD_ADV_HDR_LEN_MASK;
+    d->wb.lower.lo_dword.hdr_info |= ps_desc_data->sph ? E1000_ADVRXD_HDR_SPH
+                                                       : 0;
+    d->wb.upper.status_error |= ps_desc_data->hbo ?
+                                    E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0;
+}
+
 static inline void
 igb_write_rx_descr(IGBCore *core,
                    union e1000_rx_desc_union *desc,
@@ -1490,13 +1597,18 @@ igb_write_rx_descr(IGBCore *core,
                    const E1000E_RSSInfo *rss_info,
                    uint16_t etqf,
                    bool ts,
+                   IGB_SplitDescriptorData *ps_desc_data,
                    uint16_t(*written)[MAX_PS_BUFFERS],
                    const E1000E_RingInfo *r)
 {
     if (igb_rx_use_legacy_descriptor(core)) {
         igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, 
(*written)[1]);
+    } else if (igb_rx_use_ps_descriptor(core, r)) {
+        igb_write_adv_ps_split_rx_descr(core, &desc->adv, pkt, rss_info, r,
+                                        etqf, ts, ps_desc_data, written);
     } else {
-        igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, ts, 
(*written)[1]);
+        igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info,
+                               etqf, ts, (*written)[1]);
     }
 }
 
@@ -1557,34 +1669,149 @@ igb_rx_descr_threshold_hit(IGBCore *core, const 
E1000E_RingInfo *rxi)
            ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
 }
 
-typedef struct IGB_BaState_st {
-    uint16_t written[MAX_PS_BUFFERS];
-    uint8_t cur_idx;
-} IGB_BaState;
+static bool
+igb_do_ps(IGBCore *core,
+          const E1000E_RingInfo *r,
+          struct NetRxPkt *pkt,
+          size_t *hdr_len,
+          IGB_SplitDescriptorData *ps_desc_data)
+{
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+    bool fragment;
+    bool split_always;
+    size_t bheader_size;
+    size_t total_pkt_len;
 
-typedef struct IGB_PacketRxDMAState_st {
-    size_t size;
-    size_t total_size;
-    size_t ps_hdr_len;
-    size_t desc_size;
-    size_t desc_offset;
-    uint32_t rx_desc_packet_buf_size;
-    uint32_t rx_desc_header_buf_size;
-    struct iovec *iov;
-    size_t iov_ofs;
-    bool is_first;
-    IGB_BaState bastate;
-    hwaddr ba[MAX_PS_BUFFERS];
-} IGB_PacketRxDMAState;
+    if (!igb_rx_use_ps_descriptor(core, r)) {
+        return false;
+    }
+
+    memset(ps_desc_data, 0, sizeof(IGB_SplitDescriptorData));
+
+    total_pkt_len = net_rx_pkt_get_total_len(pkt);
+    bheader_size = igb_get_queue_rx_header_buf_size(core, r);
+    split_always = igb_rx_ps_descriptor_split_always(core, r);
+    if (split_always && total_pkt_len <= bheader_size) {
+        *hdr_len = total_pkt_len;
+        ps_desc_data->hdr_len = total_pkt_len;
+        return true;
+    }
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if (hasip4) {
+        fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
+    } else if (hasip6) {
+        fragment = net_rx_pkt_get_ip6_info(pkt)->fragment;
+    } else {
+        ps_desc_data->hdr_len = bheader_size;
+        goto header_not_handled;
+    }
+
+    if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) {
+        ps_desc_data->hdr_len = bheader_size;
+        goto header_not_handled;
+    }
+
+    /* no header splitting for SCTP */
+    if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP ||
+                      l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) {
+        *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt);
+    } else {
+        *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt);
+    }
+
+    ps_desc_data->sph = true;
+    ps_desc_data->hdr_len = *hdr_len;
+
+    if (*hdr_len > bheader_size) {
+        ps_desc_data->hbo = true;
+        goto header_not_handled;
+    }
+
+    return true;
+
+header_not_handled:
+    if (split_always) {
+        *hdr_len = bheader_size;
+        return true;
+    }
+
+    return false;
+}
 
 static void
 igb_truncate_to_descriptor_size(IGB_PacketRxDMAState *pdma_st, size_t *size)
 {
-    if (*size > pdma_st->rx_desc_packet_buf_size) {
-        *size = pdma_st->rx_desc_packet_buf_size;
+    if (pdma_st->do_ps && pdma_st->is_first) {
+        if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) {
+            *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len;
+        }
+    } else {
+        if (*size > pdma_st->rx_desc_packet_buf_size) {
+            *size = pdma_st->rx_desc_packet_buf_size;
+        }
     }
 }
 
+static inline void
+igb_write_hdr_to_rx_buffers(IGBCore *core,
+                            PCIDevice *d,
+                            hwaddr (*ba)[MAX_PS_BUFFERS],
+                            IGB_BaState *bastate,
+                            uint32_t rx_desc_header_buf_size,
+                            const char *data,
+                            dma_addr_t data_len)
+{
+    assert(data_len <= rx_desc_header_buf_size - bastate->written[0]);
+    pci_dma_write(d, (*ba)[0] + bastate->written[0], data, data_len);
+    bastate->written[0] += data_len;
+    bastate->cur_idx = 1;
+}
+
+static void
+igb_write_packet_hdr_to_descr_addr(IGBCore *core,
+                                   struct NetRxPkt *pkt,
+                                   PCIDevice *d,
+                                   IGB_PacketRxDMAState *pdma_st,
+                                   size_t *copy_size)
+{
+    size_t iov_copy;
+    size_t ps_hdr_copied = 0;
+
+    if (!pdma_st->is_first) {
+        /* Leave buffer 0 of each descriptor except first */
+        /* empty                                          */
+        igb_write_hdr_to_rx_buffers(core, d, &pdma_st->ba, &pdma_st->bastate,
+                                    pdma_st->rx_desc_header_buf_size,
+                                    NULL, 0);
+        return;
+    }
+
+    do {
+        iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied,
+                       pdma_st->iov->iov_len - pdma_st->iov_ofs);
+
+        igb_write_hdr_to_rx_buffers(core, d, &pdma_st->ba,
+                                    &pdma_st->bastate,
+                                    pdma_st->rx_desc_header_buf_size,
+                                    pdma_st->iov->iov_base,
+                                    iov_copy);
+
+        *copy_size -= iov_copy;
+        ps_hdr_copied += iov_copy;
+
+        pdma_st->iov_ofs += iov_copy;
+        if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+            pdma_st->iov++;
+            pdma_st->iov_ofs = 0;
+        }
+    } while (ps_hdr_copied < pdma_st->ps_hdr_len);
+
+    pdma_st->is_first = false;
+}
+
 static void
 igb_write_payload_frag_to_rx_buffers(IGBCore *core,
                                      PCIDevice *d,
@@ -1684,7 +1911,14 @@ igb_write_to_rx_buffers(IGBCore *core,
     igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
     copy_size = pdma_st->size - pdma_st->desc_offset;
     igb_truncate_to_descriptor_size(pdma_st, &copy_size);
-    pdma_st->bastate.cur_idx = 1;
+
+    /* For PS mode copy the packet header first */
+    if (pdma_st->do_ps) {
+        igb_write_packet_hdr_to_descr_addr(core, pkt, d, pdma_st, &copy_size);
+    } else {
+        pdma_st->bastate.cur_idx = 1;
+    }
+
     igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
 }
 
@@ -1699,6 +1933,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
     union e1000_rx_desc_union desc;
     const E1000E_RingInfo *rxi;
     size_t rx_desc_len;
+    IGB_SplitDescriptorData ps_desc_data;
 
     IGB_PacketRxDMAState pdma_st = {0};
     pdma_st.is_first = true;
@@ -1717,6 +1952,10 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
         d = core->owner;
     }
 
+    pdma_st.do_ps = igb_do_ps(core, rxi, pkt,
+                              &pdma_st.ps_hdr_len,
+                              &ps_desc_data);
+
     do {
         memset(&pdma_st.bastate, 0, sizeof(IGB_BaState));
         bool is_last = false;
@@ -1729,7 +1968,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
         pci_dma_read(d, base, &desc, rx_desc_len);
         trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
 
-        igb_read_rx_descr(core, &desc, &pdma_st->ba[1], rxi);
+        igb_read_rx_descr(core, &desc, &pdma_st, rxi);
 
         igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
         pdma_st.desc_offset += pdma_st.desc_size;
@@ -1741,6 +1980,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
                            is_last ? pkt : NULL,
                            rss_info,
                            etqf, ts,
+                           &ps_desc_data,
                            &pdma_st.bastate.written,
                            rxi);
         pci_dma_write(d, base, &desc, rx_desc_len);
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index c4ede22181..080f03fc43 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -700,6 +700,12 @@ union e1000_adv_rx_desc {
 #define E1000_ADVRXD_PKT_UDP  BIT(5)
 #define E1000_ADVRXD_PKT_SCTP BIT(6)
 
+#define E1000_ADVRXD_HDR_LEN_OFFSET    (21 - 16)
+#define E1000_ADVRXD_ADV_HDR_LEN_MASK  ((BIT(10) - 1) << \
+                                        E1000_ADVRXD_HDR_LEN_OFFSET)
+#define E1000_ADVRXD_HDR_SPH           BIT(15)
+#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20)
+
 static inline uint8_t igb_ivar_entry_rx(uint8_t i)
 {
     return i < 8 ? i * 4 : (i - 8) * 4 + 2;
-- 
2.25.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]