qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter


From: Benoît Canet
Subject: Re: [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum.
Date: Tue, 11 Mar 2014 21:14:05 +0100
User-agent: Mutt/1.5.21 (2010-09-15)

The Tuesday 11 Mar 2014 à 20:58:06 (+0100), Max Reitz wrote :
> On 11.03.2014 17:36, Benoît Canet wrote:
> >On read operations when this parameter is set and some replicas are corrupted
> >while quorum can be reached quorum will proceed to rewrite the correct 
> >version
> >of the data to fix the corrupted replicas.
> >
> >This will shine with SSD where the FTL will remap the same block at another
> >place on rewrite.
> >
> >Signed-off-by: Benoit Canet <address@hidden>
> >---
> >  block/quorum.c             | 97 
> > +++++++++++++++++++++++++++++++++++++++++++---
> >  qapi-schema.json           |  5 ++-
> >  tests/qemu-iotests/081     | 14 +++++++
> >  tests/qemu-iotests/081.out | 11 +++++-
> >  4 files changed, 119 insertions(+), 8 deletions(-)
> >
> >diff --git a/block/quorum.c b/block/quorum.c
> >index bd997b7..af4fd3c 100644
> >--- a/block/quorum.c
> >+++ b/block/quorum.c
> >@@ -22,6 +22,7 @@
> >  #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
> >  #define QUORUM_OPT_BLKVERIFY      "blkverify"
> >+#define QUORUM_OPT_REWRITE        "rewrite-corrupted"
> >  /* This union holds a vote hash value */
> >  typedef union QuorumVoteValue {
> >@@ -69,6 +70,9 @@ typedef struct BDRVQuorumState {
> >                              * It is useful to debug other block drivers by
> >                              * comparing them with a reference one.
> >                              */
> >+    bool rewrite_corrupted;/* true if the driver must rewrite-on-read 
> >corrupted
> >+                            * block if Quorum is reached.
> >+                            */
> >  } BDRVQuorumState;
> >  typedef struct QuorumAIOCB QuorumAIOCB;
> >@@ -104,13 +108,17 @@ struct QuorumAIOCB {
> >      int count;                  /* number of completed AIOCB */
> >      int success_count;          /* number of successfully completed AIOCB 
> > */
> >+    int rewrite_count;          /* number of replica to rewrite: count down 
> >to
> >+                                 * zero once writes are fired
> >+                                 */
> >+
> >      QuorumVotes votes;
> >      bool is_read;
> >      int vote_ret;
> >  };
> >-static void quorum_vote(QuorumAIOCB *acb);
> >+static bool quorum_vote(QuorumAIOCB *acb);
> >  static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
> >  {
> >@@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
> >      acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
> >      acb->count = 0;
> >      acb->success_count = 0;
> >+    acb->rewrite_count = 0;
> >      acb->votes.compare = quorum_sha256_compare;
> >      QLIST_INIT(&acb->votes.vote_list);
> >      acb->is_read = false;
> >@@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB 
> >*acb)
> >      return false;
> >  }
> >+static void quorum_rewrite_aio_cb(void *opaque, int ret)
> >+{
> >+    QuorumAIOCB *acb = opaque;
> >+
> >+    /* one less rewrite to do */
> >+    acb->rewrite_count--;
> >+
> >+    /* wait until all rewrite callback have completed */
> 
> *callbacks
> 
> >+    if (acb->rewrite_count) {
> >+        return;
> >+    }
> >+
> >+    quorum_aio_finalize(acb);
> >+}
> >+
> >  static void quorum_aio_cb(void *opaque, int ret)
> >  {
> >      QuorumChildRequest *sacb = opaque;
> >      QuorumAIOCB *acb = sacb->parent;
> >      BDRVQuorumState *s = acb->common.bs->opaque;
> >+    bool rewrite = false;
> >      sacb->ret = ret;
> >      acb->count++;
> >@@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret)
> >      /* Do the vote on read */
> >      if (acb->is_read) {
> >-        quorum_vote(acb);
> >+        rewrite = quorum_vote(acb);
> >      } else {
> >          quorum_has_too_much_io_failed(acb);
> >      }
> >-    quorum_aio_finalize(acb);
> >+    /* if no rewrite is done the code will finish right away */
> >+    if (!rewrite) {
> >+        quorum_aio_finalize(acb);
> >+    }
> >  }
> >  static void quorum_report_bad_versions(BDRVQuorumState *s,
> >@@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumState 
> >*s,
> >      }
> >  }
> >+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB 
> >*acb,
> >+                                        QuorumVoteValue *value)
> >+{
> >+    QuorumVoteVersion *version;
> >+    QuorumVoteItem *item;
> >+    int count = 0;
> >+
> >+    /* first count the number of bad versions: done first to avoid 
> >concurency
> 
> *concurrency
> 
> >+     * issues.
> >+     */
> >+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> >+        if (acb->votes.compare(&version->value, value)) {
> >+            continue;
> >+        }
> >+        QLIST_FOREACH(item, &version->items, next) {
> >+            count++;
> >+        }
> >+    }
> >+
> >+    /* quorum_rewrite_aio_cb will count down this to zero */
> >+    acb->rewrite_count = count;
> >+
> >+    /* now fire the correcting rewrites */
> >+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> >+        if (acb->votes.compare(&version->value, value)) {
> >+            continue;
> >+        }
> >+        QLIST_FOREACH(item, &version->items, next) {
> >+            bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
> >+                            acb->nb_sectors, quorum_rewrite_aio_cb, acb);
> >+        }
> >+    }
> >+
> >+    /* return true if any rewrite is done else false */
> >+    return count;
> >+}
> >+
> >  static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> >  {
> >      int i;
> >@@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
> >      return ret;
> >  }
> >-static void quorum_vote(QuorumAIOCB *acb)
> >+static bool quorum_vote(QuorumAIOCB *acb)
> >  {
> >      bool quorum = true;
> >+    bool rewrite = false;
> >      int i, j, ret;
> >      QuorumVoteValue hash;
> >      BDRVQuorumState *s = acb->common.bs->opaque;
> >      QuorumVoteVersion *winner;
> >      if (quorum_has_too_much_io_failed(acb)) {
> >-        return;
> >+        return false;;
> 
> Two semicolons here.
> 
> >      }
> >      /* get the index of the first successful read */
> >@@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb)
> >      /* Every successful read agrees */
> >      if (quorum) {
> >          quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
> >-        return;
> >+        return false;;
> 
> And here.
> 
> >      }
> >      /* compute hashes for each successful read, also store indexes */
> >@@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb)
> >      /* some versions are bad print them */
> >      quorum_report_bad_versions(s, acb, &winner->value);
> >+    /* corruption correction is actived */
> 
> I'd vote for "enabled" rather than "actived". But I like "corruption
> correction". :-)
> 
> >+    if (s->rewrite_corrupted) {
> >+        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
> >+    }
> >+
> >  free_exit:
> >      /* free lists */
> >      quorum_free_vote_list(&acb->votes);
> >+    return rewrite;
> >  }
> >  static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> >@@ -709,6 +781,11 @@ static QemuOptsList quorum_runtime_opts = {
> >              .type = QEMU_OPT_BOOL,
> >              .help = "Trigger block verify mode if set",
> >          },
> >+        {
> >+            .name = QUORUM_OPT_REWRITE,
> >+            .type = QEMU_OPT_BOOL,
> >+            .help = "Rewrite corrupted block on read quorum",
> >+        },
> >          { /* end of list */ }
> >      },
> >  };
> >@@ -770,6 +847,14 @@ static int quorum_open(BlockDriverState *bs, QDict 
> >*options, int flags,
> >                  "and using two files with vote_threshold=2\n");
> >      }
> >+    s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, 
> >false);
> >+    if (s->rewrite_corrupted && s->is_blkverify) {
> >+        error_setg(&local_err,
> >+                   "rewrite-corrupted=on cannot be used with blkverify=on");
> >+        ret = -EINVAL;
> >+        goto exit;
> >+    }
> >+
> >      /* allocate the children BlockDriverState array */
> >      s->bs = g_new0(BlockDriverState *, s->num_children);
> >      opened = g_new0(bool, s->num_children);
> >diff --git a/qapi-schema.json b/qapi-schema.json
> >index 6476d4a..f5a8767 100644
> >--- a/qapi-schema.json
> >+++ b/qapi-schema.json
> >@@ -4542,12 +4542,15 @@
> >  #
> >  # @vote-threshold: the vote limit under which a read will fail
> >  #
> >+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is 
> >reached
> >+#                     (Since 2.1)
> >+#
> >  # Since: 2.0
> >  ##
> >  { 'type': 'BlockdevOptionsQuorum',
> >    'data': { '*blkverify': 'bool',
> >              'children': [ 'BlockdevRef' ],
> >-            'vote-threshold': 'int' } }
> >+            'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
> >  ##
> >  # @BlockdevOptions
> >diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
> >index b512d00..bb211d6 100755
> >--- a/tests/qemu-iotests/081
> >+++ b/tests/qemu-iotests/081
> >@@ -95,6 +95,18 @@ echo "== checking quorum correction =="
> >  $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
> >  echo
> >+echo "== using quorum rewrite corrupted mode =="
> >+
> >+quorum="$quorum,file.rewrite-corrupted=on"
> >+
> >+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
> >+
> >+echo
> >+echo "== checking that quorum has corrected the corrupted file =="
> >+
> >+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> >+
> 
> I'd prefer it if you could put these new test cases below the
> "checking mixed reference/option specification" test, just because
> I'd like that test to output the quorum warning - especially there
> is no other point in the test where we can see that QMP message.
> 
> But that's up to you and if you at least fix the double semicolons:

Ok I will do these changes.

Thanks for the review

Best regards

Benoît

> 
> Reviewed-by: Max Reitz <address@hidden>
> 
> >+echo
> >  echo "== checking mixed reference/option specification =="
> >  run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" 
> > <<EOF
> >@@ -137,6 +149,8 @@ echo
> >  echo "== breaking quorum =="
> >  $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
> >+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> >+
> >  echo
> >  echo "== checking that quorum is broken =="
> >diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
> >index 84aeb0c..2d8b290 100644
> >--- a/tests/qemu-iotests/081.out
> >+++ b/tests/qemu-iotests/081.out
> >@@ -25,12 +25,19 @@ wrote 10485760/10485760 bytes at offset 0
> >  read 10485760/10485760 bytes at offset 0
> >  10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+== using quorum rewrite corrupted mode ==
> >+read 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+
> >+== checking that quorum has corrected the corrupted file ==
> >+read 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+
> >  == checking mixed reference/option specification ==
> >  Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
> >  QMP_VERSION
> >  {"return": {}}
> >  {"return": {}}
> >-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
> >"QUORUM_REPORT_BAD", "data": {"node-name": "", "sectors-count": 20480, 
> >"sector-num": 0}}
> >  read 10485760/10485760 bytes at offset 0
> >  10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >  {"return": ""}
> >@@ -43,6 +50,8 @@ read 10485760/10485760 bytes at offset 0
> >  == breaking quorum ==
> >  wrote 10485760/10485760 bytes at offset 0
> >  10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+wrote 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >  == checking that quorum is broken ==
> >  qemu-io: can't open device (null): Could not read image for determining 
> > its format: Input/output error
> 
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]