Make it much more understandable, add a missing
iov_cnt argument (number of iovs in the iov), and
add comments to it.
The new implementation has been extensively tested
by splitting a large buffer into many small
randomly-sized chunks, sending it over socket to
another, slow process and verifying the receiving
data is the same.
Signed-off-by: Michael Tokarev<address@hidden>
---
cutils.c | 83 ----------------------------------------
iov.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++
iov.h | 15 ++++---
qemu-coroutine-io.c | 2 +-
4 files changed, 115 insertions(+), 90 deletions(-)
diff --git a/cutils.c b/cutils.c
index cb6f638..e2bc1b8 100644
--- a/cutils.c
+++ b/cutils.c
@@ -375,86 +375,3 @@ int qemu_parse_fd(const char *param)
}
return fd;
}
-
-ssize_t iov_send_recv(int sockfd, struct iovec *iov,
- size_t offset, size_t bytes,
- bool do_sendv)
-{
- int iovlen;
- ssize_t ret;
- size_t diff;
- struct iovec *last_iov;
-
- /* last_iov is inclusive, so count from one. */
- iovlen = 1;
- last_iov = iov;
- bytes += offset;
-
- while (last_iov->iov_len< bytes) {
- bytes -= last_iov->iov_len;
-
- last_iov++;
- iovlen++;
- }
-
- diff = last_iov->iov_len - bytes;
- last_iov->iov_len -= diff;
-
- while (iov->iov_len<= offset) {
- offset -= iov->iov_len;
-
- iov++;
- iovlen--;
- }
-
- iov->iov_base = (char *) iov->iov_base + offset;
- iov->iov_len -= offset;
-
- {
-#if defined CONFIG_IOVEC&& defined CONFIG_POSIX
- struct msghdr msg;
- memset(&msg, 0, sizeof(msg));
- msg.msg_iov = iov;
- msg.msg_iovlen = iovlen;
-
- do {
- if (do_sendv) {
- ret = sendmsg(sockfd,&msg, 0);
- } else {
- ret = recvmsg(sockfd,&msg, 0);
- }
- } while (ret == -1&& errno == EINTR);
-#else
- struct iovec *p = iov;
- ret = 0;
- while (iovlen> 0) {
- int rc;
- if (do_sendv) {
- rc = send(sockfd, p->iov_base, p->iov_len, 0);
- } else {
- rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0);
- }
- if (rc == -1) {
- if (errno == EINTR) {
- continue;
- }
- if (ret == 0) {
- ret = -1;
- }
- break;
- }
- if (rc == 0) {
- break;
- }
- ret += rc;
- iovlen--, p++;
- }
-#endif
- }
-
- /* Undo the changes above */
- iov->iov_base = (char *) iov->iov_base - offset;
- iov->iov_len += offset;
- last_iov->iov_len += diff;
- return ret;
-}
diff --git a/iov.c b/iov.c
index fd518dd..6c79b68 100644
--- a/iov.c
+++ b/iov.c
@@ -18,6 +18,14 @@
#include "iov.h"
+#ifdef _WIN32
+# include<windows.h>
+# include<winsock2.h>
+#else
+# include<sys/types.h>
+# include<sys/socket.h>
+#endif
+
size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt,
size_t offset, const void *buf, size_t bytes)
{
@@ -87,6 +95,103 @@ size_t iov_size(const struct iovec *iov, const unsigned int
iov_cnt)
return len;
}
+/* helper function for iov_send_recv() */
+static ssize_t
+do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
+{
+#if defined CONFIG_IOVEC&& defined CONFIG_POSIX
+ ssize_t ret;
+ struct msghdr msg;
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_cnt;
+ do {
+ ret = do_send
+ ? sendmsg(sockfd,&msg, 0)
+ : recvmsg(sockfd,&msg, 0);
+ } while (ret< 0&& errno == EINTR);
+ return ret;
+#else
+ /* else send piece-by-piece */
+ /*XXX Note: windows has WSASend() and WSARecv() */
+ unsigned i;
+ size_t count = 0;
+ for(i = 0; i< iov_cnt; ++i) {
+ ssize_t r = do_send
+ ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
+ : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+ if (r> 0) {
+ ret += r;
+ } else if (!r) {
+ break;
+ } else if (errno == EINTR) {
+ continue;
+ } else {
+ /* else it is some "other" error,
+ * only return if there was no data processed. */
+ if (ret == 0) {
+ return -1;
+ }
+ break;
+ }
+ }
+ return count;
+#endif
+}
+
+ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes,
+ bool do_send)
+{
+ ssize_t ret;
+ size_t lastlen;
+ unsigned si, ei; /* start and end indexes */
+
+ /* Find the start position, skipping `offset' bytes:
+ * first, skip all full-sized vector elements, */
+ for(si = 0; si< iov_cnt&& offset>= iov[si].iov_len; ++si) {
+ offset -= iov[si].iov_len;
+ }
+ if (offset) {
+ assert(si< iov_cnt);
+ /* second, skip `offset' bytes from the (now) first element,
+ * undo it on exit */
+ iov[si].iov_base += offset;
+ iov[si].iov_len -= offset;
+ }
+ /* Find the end position skipping `bytes' bytes: */
+ /* first, skip all full-sized elements */
+ for(ei = si; ei< iov_cnt&& iov[ei].iov_len<= bytes; ++ei) {
+ bytes -= iov[ei].iov_len;
+ }
+ if (bytes) {
+ /* second, fixup the last element, and remember
+ * the length we've cut from the end of it in `bytes' */
+ assert(ei< iov_cnt);
+ assert(iov[ei].iov_len> bytes);
+ lastlen = iov[ei].iov_len;
+ iov[ei].iov_len = bytes;
+ ++ei;
+ } else {
+ lastlen = 0;
+ }
+
+ ret = do_send_recv(sockfd, iov + si, ei - si, do_send);
+
+ /* Undo the changes above */
+ if (offset) {
+ iov[si].iov_base -= offset;
+ iov[si].iov_len += offset;
+ }
+ if (lastlen) {
+ --ei;
+ iov[ei].iov_len = lastlen;
+ }
+
+ return ret;
+}
+
+
void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
FILE *fp, const char *prefix, size_t limit)
{
diff --git a/iov.h b/iov.h
index 9b6a883..381f37a 100644
--- a/iov.h
+++ b/iov.h
@@ -60,7 +60,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int
iov_cnt,
* `offset' bytes in the beginning of iovec buffer are skipped and
* next `bytes' bytes are used, which must be within data of iovec.
*
- * r = iov_send_recv(sockfd, iov, offset, bytes, true);
+ * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true);
*
* is logically equivalent to
*
@@ -68,13 +68,16 @@ size_t iov_memset(const struct iovec *iov, const unsigned
int iov_cnt,
* iov_to_buf(iov, iovcnt, offset, buf, bytes);
* r = send(sockfd, buf, bytes, 0);
* free(buf);
+ *
+ * For iov_send_recv() _whole_ area being sent or received
+ * should be within the iovec, not only beginning of it.
*/
-ssize_t iov_send_recv(int sockfd, struct iovec *iov,
+ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
size_t offset, size_t bytes, bool do_send);
-#define iov_recv(sockfd, iov, offset, bytes) \
- iov_send_recv(sockfd, iov, offset, bytes, false)
-#define iov_send(sockfd, iov, offset, bytes) \
- iov_send_recv(sockfd, iov, offset, bytes, true)
+#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \
+ iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false)
+#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \
+ iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true)
/**
* Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements
diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c
index 6693c78..5734965 100644
--- a/qemu-coroutine-io.c
+++ b/qemu-coroutine-io.c
@@ -34,7 +34,7 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned
iov_cnt,
size_t done = 0;
ssize_t ret;
while (done< bytes) {
- ret = iov_send_recv(sockfd, iov,
+ ret = iov_send_recv(sockfd, iov, iov_cnt,
offset + done, bytes - done, do_send);
if (ret> 0) {
done += ret;