qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [Qemu-stable][PATCH] rdma: fix multiple VMs parallel mi


From: Lei Li
Subject: Re: [Qemu-devel] [Qemu-stable][PATCH] rdma: fix multiple VMs parallel migration
Date: Tue, 03 Sep 2013 13:38:33 +0800
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/17.0 Thunderbird/17.0

On 09/03/2013 12:20 PM, Frank Yang wrote:
Yes, it depends on low-level implementation. During my earlier test,

What do you mean by the 'it depends on low-level implementation'?  Do you test
it with IB or Ethernet?

using one CQ to send and receive may cause packet loss with heavy load:
the destination thinks it send READY message successfully but the source
still waits for it. This situation always happens when the destination polls
receive CQE first.

So I think using only one CQ may cause packet conflict or something like that,
and it should be the driver bug. However, using two CQs fix the problem.

If the receiver may not receive this READY message from sender under heavy load 
caused by
packet loss, why two CQs can avoid this?




2013/9/2 Isaku Yamahata <address@hidden <mailto:address@hidden>>

    Hi. Can you elaborate why two CQs fix it? Does it depend on
    HCA implementation?

    I'm not against two CQs for sending and receiving. In fact I'm for it
    because I use two CQs for postcopy RDMA support.

    thanks,

    On Fri, Aug 30, 2013 at 08:39:31PM +0800, Frank Yang wrote:
    > When several VMs migrate with RDMA at the same time, the
    increased pressure
    > cause packet loss probabilistically and make source and
    destination wait for
    > each other. There might be some of VMs blocked during the migration.
    >
    > Fix the bug by using two completion queues, for sending and
    receiving
    > respectively.
    >
    > From 0c4829495cdc89eea2e94b103ac42c3f6a4b32c2 Mon Sep 17
    00:00:00 2001
    > From: Frank Yang <address@hidden
    <mailto:address@hidden>>
    > Date: Fri, 30 Aug 2013 17:53:34 +0800
    > Subject: [PATCH] rdma: fix multiple VMs parallel migration
    >
    > Signed-off-by: Frank Yang <address@hidden
    <mailto:address@hidden>>
    > ---
    >  migration-rdma.c | 57
    ++++++++++++++++++++++++++++++++++++--------------------
    >  1 file changed, 37 insertions(+), 20 deletions(-)
    >
    > diff --git a/migration-rdma.c b/migration-rdma.c
    > index 3d1266f..d0eacbb 100644
    > --- a/migration-rdma.c
    > +++ b/migration-rdma.c
    > @@ -362,7 +362,8 @@ typedef struct RDMAContext {
    >      struct ibv_qp *qp;                      /* queue pair */
    >      struct ibv_comp_channel *comp_channel;  /* completion
    channel */
    >      struct ibv_pd *pd;                      /* protection domain */
    > -    struct ibv_cq *cq;                      /* completion queue */
    > +    struct ibv_cq *send_cq;                 /* send completion
    queue */
    > +    struct ibv_cq *recv_cq;                 /* receive
    completion queue */
    >
    >      /*
    >       * If a previous write failed (perhaps because of a failed
    > @@ -1006,9 +1007,12 @@ static int
    qemu_rdma_alloc_pd_cq(RDMAContext *rdma)
    >       * Completion queue can be filled by both read and write
    work requests,
    >       * so must reflect the sum of both possible queue sizes.
    >       */
    > -    rdma->cq = ibv_create_cq(rdma->verbs,
    (RDMA_SIGNALED_SEND_MAX * 3),
    > +    rdma->send_cq = ibv_create_cq(rdma->verbs,
    (RDMA_SIGNALED_SEND_MAX * 2),
    >              NULL, rdma->comp_channel, 0);
    > -    if (!rdma->cq) {
    > +    rdma->recv_cq = ibv_create_cq(rdma->verbs,
    RDMA_SIGNALED_SEND_MAX, NULL,
    > +            rdma->comp_channel, 0);
    > +
    > +    if (!rdma->send_cq || !rdma->recv_cq) {
    >          fprintf(stderr, "failed to allocate completion queue\n");
    >          goto err_alloc_pd_cq;
    >      }
    > @@ -1040,8 +1044,8 @@ static int qemu_rdma_alloc_qp(RDMAContext
    *rdma)
    >      attr.cap.max_recv_wr = 3;
    >      attr.cap.max_send_sge = 1;
    >      attr.cap.max_recv_sge = 1;
    > -    attr.send_cq = rdma->cq;
    > -    attr.recv_cq = rdma->cq;
    > +    attr.send_cq = rdma->send_cq;
    > +    attr.recv_cq = rdma->recv_cq;
    >      attr.qp_type = IBV_QPT_RC;
    >
    >      ret = rdma_create_qp(rdma->cm_id, rdma->pd, &attr);
    > @@ -1361,13 +1365,18 @@ static void
    qemu_rdma_signal_unregister(RDMAContext
    > *rdma, uint64_t index,
    >   * Return the work request ID that completed.
    >   */
    >  static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t
    *wr_id_out,
    > -                               uint32_t *byte_len)
    > +                               uint32_t *byte_len, int
    wrid_requested)
    >  {
    >      int ret;
    >      struct ibv_wc wc;
    >      uint64_t wr_id;
    >
    > -    ret = ibv_poll_cq(rdma->cq, 1, &wc);
    > +    if (wrid_requested == RDMA_WRID_RDMA_WRITE ||
    > +        wrid_requested == RDMA_WRID_SEND_CONTROL) {
    > +        ret = ibv_poll_cq(rdma->send_cq, 1, &wc);
    > +    } else if (wrid_requested >= RDMA_WRID_RECV_CONTROL) {
    > +        ret = ibv_poll_cq(rdma->recv_cq, 1, &wc);
    > +    }
    >
    >      if (!ret) {
    >          *wr_id_out = RDMA_WRID_NONE;
    > @@ -1460,12 +1469,9 @@ static int
    qemu_rdma_block_for_wrid(RDMAContext *rdma,
    > int wrid_requested,
    >      void *cq_ctx;
    >      uint64_t wr_id = RDMA_WRID_NONE, wr_id_in;
    >
    > -    if (ibv_req_notify_cq(rdma->cq, 0)) {
    > -        return -1;
    > -    }
    >      /* poll cq first */
    >      while (wr_id != wrid_requested) {
    > -        ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len);
    > +        ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len,
    wrid_requested);
    >          if (ret < 0) {
    >              return ret;
    >          }
    > @@ -1487,6 +1493,17 @@ static int
    qemu_rdma_block_for_wrid(RDMAContext *rdma,
    > int wrid_requested,
    >      }
    >
    >      while (1) {
    > +        if (wrid_requested == RDMA_WRID_RDMA_WRITE ||
    > +            wrid_requested == RDMA_WRID_SEND_CONTROL) {
    > +            if (ibv_req_notify_cq(rdma->send_cq, 0)) {
    > +                return -1;
    > +            }
    > +        } else if (wrid_requested >= RDMA_WRID_RECV_CONTROL) {
    > +            if (ibv_req_notify_cq(rdma->recv_cq, 0)) {
    > +                return -1;
    > +            }
    > +        }
    > +
    >          /*
    >           * Coroutine doesn't start until
    process_incoming_migration()
    >           * so don't yield unless we know we're running inside
    of a coroutine.
    > @@ -1502,12 +1519,8 @@ static int
    qemu_rdma_block_for_wrid(RDMAContext *rdma,
    > int wrid_requested,
    >
    >          num_cq_events++;
    >
    > -        if (ibv_req_notify_cq(cq, 0)) {
    > -            goto err_block_for_wrid;
    > -        }
    > -
    >          while (wr_id != wrid_requested) {
    > -            ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len);
    > +            ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len,
    wrid_requested);
    >              if (ret < 0) {
    >                  goto err_block_for_wrid;
    >              }
    > @@ -2236,9 +2249,13 @@ static void qemu_rdma_cleanup(RDMAContext
    *rdma)
    >          ibv_destroy_qp(rdma->qp);
    >          rdma->qp = NULL;
    >      }
    > -    if (rdma->cq) {
    > -        ibv_destroy_cq(rdma->cq);
    > -        rdma->cq = NULL;
    > +    if (rdma->send_cq) {
    > +        ibv_destroy_cq(rdma->send_cq);
    > +        rdma->send_cq = NULL;
    > +    }
    > +    if (rdma->recv_cq) {
    > +        ibv_destroy_cq(rdma->recv_cq);
    > +        rdma->recv_cq = NULL;
    >      }
    >      if (rdma->comp_channel) {
    >  ibv_destroy_comp_channel(rdma->comp_channel);
    > @@ -2770,7 +2787,7 @@ static size_t qemu_rdma_save_page(QEMUFile
    *f, void
    > *opaque,
    >       */
    >      while (1) {
    >          uint64_t wr_id, wr_id_in;
    > -        int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL);
    > +        int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL,
    RDMA_WRID_RDMA_WRITE);
    >          if (ret < 0) {
    >              fprintf(stderr, "rdma migration: polling error!
    %d\n", ret);
    >              goto err;
    > --
    > 1.8.3.msysgit.0
    >
    >

    --
    yamahata




--
Lei




reply via email to

[Prev in Thread] Current Thread [Next in Thread]