Re: [Qemu-block] [PATCH v4] ssh: switch from libssh2 to libssh

qemu-block
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-block] [PATCH v4] ssh: switch from libssh2 to libssh

From:	Max Reitz
Subject:	Re: [Qemu-block] [PATCH v4] ssh: switch from libssh2 to libssh
Date:	Tue, 13 Feb 2018 19:49:12 +0100
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.6.0
On 2018-01-18 17:44, Pino Toscano wrote:
> Rewrite the implementation of the ssh block driver to use libssh instead
> of libssh2.  The libssh library has various advantages over libssh2:
> - easier API for authentication (for example for using ssh-agent)
> - easier API for known_hosts handling
> - supports newer types of keys in known_hosts
> 
> Kerberos authentication can be enabled once the libssh bug for it [1] is
> fixed.
> 
> The development version of libssh (i.e. the future 0.8.x) supports
> fsync, so reuse the build time check for this.
> 
> [1] https://red.libssh.org/issues/242
> 
> Signed-off-by: Pino Toscano <address@hidden>
> ---
> 
> Changes from v3:
> - fix socket cleanup in connect_to_ssh()
> - add comments about the socket cleanup
> - improve the error reporting (closer to what was with libssh2)
> - improve EOF detection on sftp_read()
> 
> Changes from v2:
> - used again an own fd
> - fixed co_yield() implementation
> 
> Changes from v1:
> - fixed jumbo packets writing
> - fixed missing 'err' assignment
> - fixed commit message

One thing: The performance seems to have dropped hugely, from what I can
tell.

Before this patch, running the iotests 1-10 over ssh (raw/ssh) took
12.6 s.  With this patch, they take 59.3 s.  Perhaps the starkest
contrast can be seen in test 1, which took 4 s before and 27 s after --
this test simply reads and writes 128 MB of continuous data.

I like having elliptic curves, but I think this patch needs optimization
work before we can replace libssh2.

>  block/Makefile.objs |   6 +-
>  block/ssh.c         | 522 
> ++++++++++++++++++++++++----------------------------
>  configure           |  65 ++++---
>  3 files changed, 278 insertions(+), 315 deletions(-)

[...]

> diff --git a/block/ssh.c b/block/ssh.c
> index b049a16eb9..2975fc27d8 100644
> --- a/block/ssh.c
> +++ b/block/ssh.c

[...]

> @@ -87,27 +81,25 @@ static void ssh_state_init(BDRVSSHState *s)
>  {
>      memset(s, 0, sizeof *s);
>      s->sock = -1;
> -    s->offset = -1;
>      qemu_co_mutex_init(&s->lock);
>  }
>  
>  static void ssh_state_free(BDRVSSHState *s)
>  {
> +    if (s->attrs) {
> +        sftp_attributes_free(s->attrs);
> +    }
>      if (s->sftp_handle) {
> -        libssh2_sftp_close(s->sftp_handle);
> +        sftp_close(s->sftp_handle);
>      }
>      if (s->sftp) {
> -        libssh2_sftp_shutdown(s->sftp);
> +        sftp_free(s->sftp);
>      }
>      if (s->session) {
> -        libssh2_session_disconnect(s->session,
> -                                   "from qemu ssh client: "
> -                                   "user closed the connection");
> -        libssh2_session_free(s->session);
> -    }
> -    if (s->sock >= 0) {
> -        close(s->sock);
> +        ssh_disconnect(s->session);
> +        ssh_free(s->session);
>      }
> +    /* s->sock is owned by the ssh_session, which free's it. */

s/free's/frees/

>  }
>  
>  static void GCC_FMT_ATTR(3, 4)
> @@ -121,13 +113,13 @@ session_error_setg(Error **errp, BDRVSSHState *s, const 
> char *fs, ...)
>      va_end(args);
>  
>      if (s->session) {
> -        char *ssh_err;
> +        const char *ssh_err;
>          int ssh_err_code;
>  
> -        /* This is not an errno.  See <libssh2.h>. */
> -        ssh_err_code = libssh2_session_last_error(s->session,
> -                                                  &ssh_err, NULL, 0);
> -        error_setg(errp, "%s: %s (libssh2 error code: %d)",
> +        /* This is not an errno.  See <libssh/libssh.h>. */
> +        ssh_err = ssh_get_error(s->session);
> +        ssh_err_code = ssh_get_error_code(s->session);
> +        error_setg(errp, "%s: %s (libssh error code: %d)",
>                     msg, ssh_err, ssh_err_code);

Maybe we should not append the error info if there is no error.

(Example:

$ ./qemu-img info ssh://localhost/tmp/foo
qemu-img: Could not open 'ssh://localhost/tmp/foo': no host key was
found in known_hosts:  (libssh error code: 0)

)

>      } else {
>          error_setg(errp, "%s", msg);

[...]

> @@ -291,68 +283,41 @@ static void ssh_parse_filename(const char *filename, 
> QDict *options,
>  static int check_host_key_knownhosts(BDRVSSHState *s,
>                                       const char *host, int port, Error 
> **errp)
>  {
> -    const char *home;
> -    char *knh_file = NULL;
> -    LIBSSH2_KNOWNHOSTS *knh = NULL;
> -    struct libssh2_knownhost *found;
> -    int ret, r;
> -    const char *hostkey;
> -    size_t len;
> -    int type;
> +    int ret;
> +    int state;
>  
> -    hostkey = libssh2_session_hostkey(s->session, &len, &type);
> -    if (!hostkey) {
> -        ret = -EINVAL;
> -        session_error_setg(errp, s, "failed to read remote host key");
> -        goto out;
> -    }
> +    state = ssh_is_server_known(s->session);
>  
> -    knh = libssh2_knownhost_init(s->session);
> -    if (!knh) {
> -        ret = -EINVAL;
> -        session_error_setg(errp, s,
> -                           "failed to initialize known hosts support");
> -        goto out;
> -    }
> -
> -    home = getenv("HOME");
> -    if (home) {
> -        knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
> -    } else {
> -        knh_file = g_strdup_printf("/root/.ssh/known_hosts");
> -    }
> -
> -    /* Read all known hosts from OpenSSH-style known_hosts file. */
> -    libssh2_knownhost_readfile(knh, knh_file, 
> LIBSSH2_KNOWNHOST_FILE_OPENSSH);
> -
> -    r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
> -                                 LIBSSH2_KNOWNHOST_TYPE_PLAIN|
> -                                 LIBSSH2_KNOWNHOST_KEYENC_RAW,
> -                                 &found);
> -    switch (r) {
> -    case LIBSSH2_KNOWNHOST_CHECK_MATCH:
> +    switch (state) {
> +    case SSH_SERVER_KNOWN_OK:
>          /* OK */
> -        DPRINTF("host key OK: %s", found->key);
>          break;
> -    case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
> +    case SSH_SERVER_KNOWN_CHANGED:
>          ret = -EINVAL;
>          session_error_setg(errp, s,
> -                      "host key does not match the one in known_hosts"
> -                      " (found key %s)", found->key);
> +                      "host key does not match the one in known_hosts");

Technically, this is against the specification which requires us to warn
the user about a possible attack. :-P

>          goto out;
> -    case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
> +    case SSH_SERVER_FOUND_OTHER:
> +        ret = -EINVAL;
> +        session_error_setg(errp, s,
> +                      "host key for this server not found, another type "
> +                      "exists");
> +        goto out;
> +    case SSH_SERVER_FILE_NOT_FOUND:
> +        ret = -EINVAL;

Technically, this should be -ENOENT, but it doesn't actually matter (I
hope).

> +        session_error_setg(errp, s, "known_hosts file not found");
> +        goto out;
> +    case SSH_SERVER_NOT_KNOWN:
>          ret = -EINVAL;
>          session_error_setg(errp, s, "no host key was found in known_hosts");
>          goto out;
> -    case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
> +    case SSH_SERVER_ERROR:
>          ret = -EINVAL;
> -        session_error_setg(errp, s,
> -                      "failure matching the host key with known_hosts");
> +        session_error_setg(errp, s, "server error");
>          goto out;
>      default:
>          ret = -EINVAL;
> -        session_error_setg(errp, s, "unknown error matching the host key"
> -                      " with known_hosts (%d)", r);
> +        session_error_setg(errp, s, "error while checking for known server");
>          goto out;
>      }

Only SSH_SERVER_ERROR actually sets a session error, I think.  So I
guess all the rest should just use error_setg() instead of
session_error_setg().  Otherwise, as in my example above, we'll just
append an empty string and error code 0, or even worse, we append an
unrelated error description.

[...]

> @@ -407,18 +368,31 @@ static int compare_fingerprint(const unsigned char 
> *fingerprint, size_t len,
>  
>  static int
>  check_host_key_hash(BDRVSSHState *s, const char *hash,
> -                    int hash_type, size_t fingerprint_len, Error **errp)
> +                    enum ssh_publickey_hash_type type, size_t 
> fingerprint_len,
> +                    Error **errp)
>  {
> -    const char *fingerprint;
> +    int r;
> +    ssh_key pubkey;
> +    unsigned char *server_hash;
> +    size_t server_hash_len;
>  
> -    fingerprint = libssh2_hostkey_hash(s->session, hash_type);
> -    if (!fingerprint) {
> +    r = ssh_get_publickey(s->session, &pubkey);

The documentation says this is deprecated and one should use
ssh_get_server_publickey() instead (it just looks like a rename, though).

> +    if (r < 0) {

This works, but I think I'd prefer "r != SSH_OK" (because that's what
the documentation says).

(Also in all other places that check ssh_* function call return values.)

>          session_error_setg(errp, s, "failed to read remote host key");
>          return -EINVAL;
>      }
>  
> -    if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
> -                           hash) != 0) {
> +    r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
> +    ssh_key_free(pubkey);
> +    if (r < 0) {
> +        session_error_setg(errp, s,
> +                           "failed reading the hash of the server SSH key");
> +        return -EINVAL;
> +    }
> +
> +    r = compare_fingerprint(server_hash, server_hash_len, hash);

Now this is interesting, thanks for fixing an out-of-bounds access, I
guess (by using server_hash_len instead of fingerprint_len).  But now
fingerprint_len is completely unused in this function.

It's OK because compare_fingerprint() stops on anything but [:0-9a-fA-F]
(including \0), even though I really don't like this implicit safety.
It would be OK if it was noted in compare_fingerprint()'s description at
least (that you don't need to pass the length of host_key_check as long
as it's null-terminated).

I guess we should just add that comment and scrap the fingerprint_len
parameter altogether.  It doesn't make any sense now anymore.  We get
the actual hash's length from libssh; and it didn't have any correlation
to the actual length of the @hash string.  It is just set to 16/20,
depending on the caller, disregarding how long the actual string is.

> +    ssh_clean_pubkey_hash(&server_hash);
> +    if (r != 0) {
>          error_setg(errp, "remote host key does not match host_key_check 
> '%s'",
>                     hash);
>          return -EPERM;

[...]

> @@ -459,57 +433,32 @@ static int check_host_key(BDRVSSHState *s, const char 
> *host, int port,
>  static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
>  {
>      int r, ret;
> -    const char *userauthlist;
> -    LIBSSH2_AGENT *agent = NULL;
> -    struct libssh2_agent_publickey *identity;
> -    struct libssh2_agent_publickey *prev_identity = NULL;
> +    int method;
>  
> -    userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
> -    if (strstr(userauthlist, "publickey") == NULL) {
> +    r = ssh_userauth_none(s->session, NULL);
> +    if (r == SSH_AUTH_ERROR) {

If you are going to keep this none-authentication, we should also catch
the case where it returns SSH_AUTH_SUCCESS and we don't even need to
supply a public key.

>          ret = -EPERM;
> -        error_setg(errp,
> -                "remote server does not support \"publickey\" 
> authentication");
> +        session_error_setg(errp, s, "failed to call ssh_userauth_none");
>          goto out;
>      }
>  
> -    /* Connect to ssh-agent and try each identity in turn. */
> -    agent = libssh2_agent_init(s->session);
> -    if (!agent) {
> -        ret = -EINVAL;
> -        session_error_setg(errp, s, "failed to initialize ssh-agent 
> support");
> -        goto out;
> -    }
> -    if (libssh2_agent_connect(agent)) {
> -        ret = -ECONNREFUSED;
> -        session_error_setg(errp, s, "failed to connect to ssh-agent");
> -        goto out;
> -    }
> -    if (libssh2_agent_list_identities(agent)) {
> -        ret = -EINVAL;
> -        session_error_setg(errp, s,
> -                           "failed requesting identities from ssh-agent");
> -        goto out;
> -    }
> +    method = ssh_userauth_list(s->session, NULL);
>  
> -    for(;;) {
> -        r = libssh2_agent_get_identity(agent, &identity, prev_identity);
> -        if (r == 1) {           /* end of list */
> -            break;
> -        }
> -        if (r < 0) {
> +    /* Try to authenticate with publickey, using the ssh-agent
> +     * if available.
> +     */
> +    if (method & SSH_AUTH_METHOD_PUBLICKEY) {
> +        r = ssh_userauth_publickey_auto(s->session, NULL, NULL);

Why do we need the none authentication to find out which authentication
methods are allowed if we are going to use ssh_userauth_publickey()
anyway?  We could call that right from the start and if the server
doesn't support it, well, then it will fail anyway.

(Maybe it makes sense to keep the ssh_userauth_none() because maybe
someone has configured his server like that, but I don't see the point
of ssh_userauth_list().)

I suspect @user is NULL because that's set through ssh_options_set()?
If so, we can just drop the @user parameter from this function (because
it is unused now).

> +        if (r == SSH_AUTH_ERROR) {
>              ret = -EINVAL;
> -            session_error_setg(errp, s,
> -                               "failed to obtain identity from ssh-agent");
> +            error_setg(errp, "failed to authenticate using publickey "
> +                             "authentication");
>              goto out;
> -        }
> -        r = libssh2_agent_userauth(agent, user, identity);
> -        if (r == 0) {
> +        } else if (r == SSH_AUTH_SUCCESS) {
>              /* Authenticated! */
>              ret = 0;
>              goto out;
>          }
> -        /* Failed to authenticate with this identity, try the next one. */
> -        prev_identity = identity;
>      }
>  
>      ret = -EPERM;

[...]

> @@ -628,6 +570,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
>      Error *local_err = NULL;
>      const char *user, *path, *host_key_check;
>      long port = 0;
> +    unsigned long portU = 0;

I was about to say: How about making port an unsigned long and swapping
the qemu_strtol() for a qemu_strtoul()?

But I think you'd rather want an unsigned int instead (and that won't
work with qemu_strtoul()).

> +    int new_sock = -1;
>  
>      opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
>      qemu_opts_absorb_qdict(opts, options, &local_err);
> @@ -678,26 +622,74 @@ static int connect_to_ssh(BDRVSSHState *s, QDict 
> *options,
>      }
>  
>      /* Open the socket and connect. */
> -    s->sock = inet_connect_saddr(s->inet, errp);
> -    if (s->sock < 0) {
> +    new_sock = inet_connect_saddr(s->inet, errp);
> +    if (new_sock < 0) {
>          ret = -EIO;
>          goto err;
>      }
>  
>      /* Create SSH session. */
> -    s->session = libssh2_session_init();
> +    s->session = ssh_new();
>      if (!s->session) {
>          ret = -EINVAL;
> -        session_error_setg(errp, s, "failed to initialize libssh2 session");
> +        session_error_setg(errp, s, "failed to initialize libssh session");
>          goto err;
>      }
>  
> -#if TRACE_LIBSSH2 != 0
> -    libssh2_trace(s->session, TRACE_LIBSSH2);
> -#endif
> +    /* Make sure we are in blocking mode during the connection and
> +     * authentication phases.
> +     */
> +    ssh_set_blocking(s->session, 1);
>  
> -    r = libssh2_session_handshake(s->session, s->sock);
> -    if (r != 0) {
> +    r = ssh_options_set(s->session, SSH_OPTIONS_USER, user);
> +    if (r < 0) {
> +        ret = -EINVAL;
> +        session_error_setg(errp, s,
> +                           "failed to set the user in the libssh session");
> +        goto err;
> +    }
> +
> +    r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
> +    if (r < 0) {
> +        ret = -EINVAL;
> +        session_error_setg(errp, s,
> +                           "failed to set the host in the libssh session");
> +        goto err;
> +    }
> +
> +    if (port > 0) {
> +        portU = port;
> +        r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &portU);

My documentation says:
> SSH_OPTIONS_PORT: The port to connect to (unsigned int).

int is not long, so this happens to do the right thing only on LE hardware.

> +        if (r < 0) {
> +            ret = -EINVAL;
> +            session_error_setg(errp, s,
> +                               "failed to set the port in the libssh 
> session");
> +            goto err;
> +        }
> +    }
> +
> +    /* Read ~/.ssh/config. */
> +    r = ssh_options_parse_config(s->session, NULL);
> +    if (r < 0) {
> +        ret = -EINVAL;
> +        session_error_setg(errp, s, "failed to parse ~/.ssh/config");
> +        goto err;
> +    }
> +
> +    r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
> +    if (r < 0) {
> +        ret = -EINVAL;
> +        session_error_setg(errp, s,
> +                           "failed to set the socket in the libssh session");
> +        goto err;
> +    }
> +    /* libssh took ownership of the socket. */
> +    s->sock = new_sock;
> +    new_sock = -1;
> +
> +    /* Connect. */
> +    r = ssh_connect(s->session);
> +    if (r < 0) {
>          ret = -EINVAL;
>          session_error_setg(errp, s, "failed to establish SSH session");
>          goto err;
> @@ -717,8 +709,15 @@ static int connect_to_ssh(BDRVSSHState *s, QDict 
> *options,
>      }
>  
>      /* Start SFTP. */
> -    s->sftp = libssh2_sftp_init(s->session);
> +    s->sftp = sftp_new(s->session);
>      if (!s->sftp) {
> +        session_error_setg(errp, s, "failed to create sftp handle");
> +        ret = -EINVAL;
> +        goto err;
> +    }
> +
> +    r = sftp_init(s->sftp);
> +    if (r < 0) {
>          session_error_setg(errp, s, "failed to initialize sftp handle");

Should this be sftp_error_setg()?

>          ret = -EINVAL;
>          goto err;
> @@ -727,17 +726,20 @@ static int connect_to_ssh(BDRVSSHState *s, QDict 
> *options,
>      /* Open the remote file. */
>      DPRINTF("opening file %s flags=0x%x creat_mode=0%o",
>              path, ssh_flags, creat_mode);
> -    s->sftp_handle = libssh2_sftp_open(s->sftp, path, ssh_flags, creat_mode);
> +    s->sftp_handle = sftp_open(s->sftp, path, ssh_flags, creat_mode);
>      if (!s->sftp_handle) {
>          session_error_setg(errp, s, "failed to open remote file '%s'", path);

Same here.

>          ret = -EINVAL;
>          goto err;
>      }
>  
> +    /* Make sure the SFTP file is handled in blocking mode. */
> +    sftp_file_set_blocking(s->sftp_handle);
> +

Hm, but ssh_file_open() makes the SSH session non-blocking immediately
afterwards, and it doesn't seem like the old libssh2 cared about how
SFTP behaved.  Was that the same thing in libssh2?  Should
ssh_file_open() make the SFTP session non-blocking as well?

>      qemu_opts_del(opts);
>  
> -    r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
> -    if (r < 0) {
> +    s->attrs = sftp_fstat(s->sftp_handle);
> +    if (!s->attrs) {
>          sftp_error_setg(errp, s, "failed to read file attributes");
>          return -EINVAL;
>      }

[...]

> @@ -937,33 +935,6 @@ static coroutine_fn void co_yield(BDRVSSHState *s, 
> BlockDriverState *bs)
>      DPRINTF("s->sock=%d - back", s->sock);
>  }
>  
> -/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
> - * in the remote file.  Notice that it just updates a field in the
> - * sftp_handle structure, so there is no network traffic and it cannot
> - * fail.
> - *
> - * However, `libssh2_sftp_seek64' does have a catastrophic effect on
> - * performance since it causes the handle to throw away all in-flight
> - * reads and buffered readahead data.  Therefore this function tries
> - * to be intelligent about when to call the underlying libssh2 function.
> - */
> -#define SSH_SEEK_WRITE 0
> -#define SSH_SEEK_READ  1
> -#define SSH_SEEK_FORCE 2
> -
> -static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
> -{
> -    bool op_read = (flags & SSH_SEEK_READ) != 0;
> -    bool force = (flags & SSH_SEEK_FORCE) != 0;
> -
> -    if (force || op_read != s->offset_op_read || offset != s->offset) {
> -        DPRINTF("seeking to offset=%" PRIi64, offset);
> -        libssh2_sftp_seek64(s->sftp_handle, offset);
> -        s->offset = offset;
> -        s->offset_op_read = op_read;
> -    }
> -}
> -

So I guess this is not an issue in libssh? :-)

Max

>  static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
>                                   int64_t offset, size_t size,
>                                   QEMUIOVector *qiov)
signature.asc
Description: OpenPGP digital signature
[Prev in Thread]
Current Thread
[Next in Thread]
Re: [Qemu-block] [PATCH v4] ssh: switch from libssh2 to libssh, Max Reitz <=
- Re: [Qemu-block] [PATCH v4] ssh: switch from libssh2 to libssh, Eric Blake, 2018/02/13
Prev by Date: Re: [Qemu-block] block_status automatically added flags
Next by Date: Re: [Qemu-block] [PATCH v4] ssh: switch from libssh2 to libssh
Previous by thread: [Qemu-block] block_status automatically added flags
Next by thread: Re: [Qemu-block] [PATCH v4] ssh: switch from libssh2 to libssh
Index(es):
- Date
- Thread