[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 2/3] nbd: Explicitly disconnect and fail infligh
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH 2/3] nbd: Explicitly disconnect and fail inflight I/O requests on error, then reconnect next I/O request. |
Date: |
Wed, 24 Oct 2012 16:31:39 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20121016 Thunderbird/16.0.1 |
Il 22/10/2012 13:09, address@hidden ha scritto:
> diff --git a/block/nbd.c b/block/nbd.c
> index 9536408..1caae89 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -71,6 +71,9 @@ typedef struct BDRVNBDState {
> char *host_spec;
> } BDRVNBDState;
>
> +static int nbd_establish_connection(BDRVNBDState *s);
> +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect);
> +
> static bool nbd_is_connected(BDRVNBDState *s)
> {
> return s->sock >= 0;
> @@ -152,6 +155,20 @@ static int nbd_have_request(void *opaque)
> return s->in_flight > 0;
> }
>
> +static void nbd_abort_inflight_requests(BDRVNBDState *s)
> +{
> + int i;
> + Coroutine *co;
> +
> + s->reply.handle = 0;
> + for (i = 0; i < MAX_NBD_REQUESTS; i++) {
> + co = s->recv_coroutine[i];
> + if (co && co != qemu_coroutine_self()) {
> + qemu_coroutine_enter(co, NULL);
> + }
> + }
> +}
I think this is quite risky. Does it work if you shutdown(s, 2) the
socket, then wait for the number of pending requests (not just those
in_flight---also those that are waiting) to become 0, and only then
close it?
(For the wait you can add another Coroutine * field to BDRVNBDState, and
reenter it in nbd_coroutine_end if the number of pending requests
becomes zero).
> static void nbd_reply_ready(void *opaque)
> {
> BDRVNBDState *s = opaque;
> @@ -168,8 +185,9 @@ static void nbd_reply_ready(void *opaque)
> return;
> }
> if (ret < 0) {
> - s->reply.handle = 0;
> - goto fail;
> + nbd_teardown_connection(s, false);
> + nbd_abort_inflight_requests(s);
This is also problematic because you first close the socket, which means
that something else can grab the file descriptor. But the original file
descriptor is in use (in qemu_co_recvv or qemu_co_sendv) until after
nbd_abort_inflight_requests returns.
So the correct order would be something like this:
assert(nbd_is_connected(s));
shutdown(s->sock, 2);
nbd_abort_inflight_requests(s);
nbd_teardown_connection(s, false);
where (if my theory is right) the shutdown should immediately cause the
socket to become readable and writable.
> + return;
> }
> }
>
> @@ -178,20 +196,15 @@ static void nbd_reply_ready(void *opaque)
> * one coroutine is called until the reply finishes. */
> i = HANDLE_TO_INDEX(s, s->reply.handle);
> if (i >= MAX_NBD_REQUESTS) {
> - goto fail;
> + nbd_teardown_connection(s, false);
> + nbd_abort_inflight_requests(s);
> + return;
> }
>
> if (s->recv_coroutine[i]) {
> qemu_coroutine_enter(s->recv_coroutine[i], NULL);
> return;
> }
> -
> -fail:
> - for (i = 0; i < MAX_NBD_REQUESTS; i++) {
> - if (s->recv_coroutine[i]) {
> - qemu_coroutine_enter(s->recv_coroutine[i], NULL);
> - }
> - }
> }
>
> static void nbd_restart_write(void *opaque)
> @@ -206,6 +219,13 @@ static int nbd_co_send_request(BDRVNBDState *s, struct
> nbd_request *request,
> int rc, ret;
>
> qemu_co_mutex_lock(&s->send_mutex);
> +
> + if (!nbd_is_connected(s) && nbd_establish_connection(s) != 0) {
> + nbd_abort_inflight_requests(s);
> + qemu_co_mutex_unlock(&s->send_mutex);
> + return -EIO;
Do you really need to abort all the requests, or only this one?
Paolo
> + }
> +
> s->send_coroutine = qemu_coroutine_self();
> qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
> nbd_have_request, s);
> @@ -214,6 +234,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct
> nbd_request *request,
> ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
> offset, request->len);
> if (ret != request->len) {
> + s->send_coroutine = NULL;
> + nbd_teardown_connection(s, false);
> + qemu_co_mutex_unlock(&s->send_mutex);
> return -EIO;
> }
> }
> @@ -259,9 +282,8 @@ static void nbd_coroutine_end(BDRVNBDState *s, struct
> nbd_request *request)
> }
> }
>
> -static int nbd_establish_connection(BlockDriverState *bs)
> +static int nbd_establish_connection(BDRVNBDState *s)
> {
> - BDRVNBDState *s = bs->opaque;
> int sock;
> int ret;
> off_t size;
> @@ -302,19 +324,23 @@ static int nbd_establish_connection(BlockDriverState
> *bs)
> return 0;
> }
>
> -static void nbd_teardown_connection(BlockDriverState *bs)
> +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect)
> {
> - BDRVNBDState *s = bs->opaque;
> +
> struct nbd_request request;
>
> - request.type = NBD_CMD_DISC;
> - request.from = 0;
> - request.len = 0;
> - nbd_send_request(s->sock, &request);
> + if (nbd_is_connected(s)) {
> + if (send_disconnect) {
> + request.type = NBD_CMD_DISC;
> + request.from = 0;
> + request.len = 0;
> + nbd_send_request(s->sock, &request);
> + }
>
> - qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
> - closesocket(s->sock);
> - s->sock = -1;
> + qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
> + closesocket(s->sock);
> + s->sock = -1; /* Makes nbd_is_connected() return true */
> + }
> }
>
> static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
> @@ -336,7 +362,7 @@ static int nbd_open(BlockDriverState *bs, const char*
> filename, int flags)
> /* establish TCP connection, return error if it fails
> * TODO: Configurable retry-until-timeout behaviour.
> */
> - result = nbd_establish_connection(bs);
> + result = nbd_establish_connection(s);
>
> return result;
> }
> @@ -494,7 +520,7 @@ static void nbd_close(BlockDriverState *bs)
> g_free(s->export_name);
> g_free(s->host_spec);
>
> - nbd_teardown_connection(bs);
> + nbd_teardown_connection(s, true);
> }
>
> static int64_t nbd_getlength(BlockDriverState *bs)
>
- [Qemu-devel] [PATCH 0/3] NBD reconnection behaviour, nick, 2012/10/22
- [Qemu-devel] [PATCH 3/3] nbd: Move reconnection attempts from each new I/O request to a 5-second timer, nick, 2012/10/22
- [Qemu-devel] [PATCH 2/3] nbd: Explicitly disconnect and fail inflight I/O requests on error, then reconnect next I/O request., nick, 2012/10/22
- [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, nick, 2012/10/22
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Kevin Wolf, 2012/10/23
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Nicholas Thomas, 2012/10/23
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Kevin Wolf, 2012/10/23
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Jamie Lokier, 2012/10/23
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Nicholas Thomas, 2012/10/24
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Kevin Wolf, 2012/10/24
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Jamie Lokier, 2012/10/24
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Paolo Bonzini, 2012/10/24
- Re: [Qemu-devel] [PATCH 1/3] nbd: Only try to send flush/discard commands if connected to the NBD server, Kevin Wolf, 2012/10/25