From: suruifeng <suruifeng@huawei.com>
The qio_channel_socket_connect_sync maybe called twice if the
openvswitchd restart during we attaching a vhost-user nic.
-> call trace 1:
net_vhost_user_init
tcp_chr_wait_connected //loop call sync connect until socekt connected
tcp_chr_connect_client_sync //return, but socekt state still disconnected
qio_channel_socket_connect_sync //socket connect sucess
tcp_chr_new_client
tcp_chr_connect
qemu_chr_be_event
net_vhost_user_event //CHR_EVENT_OPENED
vhost_user_start
tcp_chr_write //Broken Pipe, as peer restart
tcp_chr_disconnect_locked //disconnect & reconnect timer create
-> call trace 2:
socket_reconnect_timeout //timeout, and the peer restart just finished
tcp_chr_connect_client_async //concurrent with tcp_chr_connect_client_sync
What do you mean by "concurrent with tcp_chr_connect_client_sync"? Are we talking about threads? If not, could you provide the full backtrace?
tcp_chr_connect_client_task
qio_channel_socket_connect_sync //try connect same socket
This patch disabled tcp reconnect timer when we try to connect in synchronous mode,
it seems to work.
Signed-off-by: suruifeng <suruifeng@huawei.com>
Signed-off-by: Wang Xin <wangxinxin.wang@huawei.com>
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 213a4c8dd0..da1befca9e 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -80,6 +80,7 @@ struct SocketChardev {
bool is_websock;
+ bool async_reconnect_disable;
GSource *reconnect_timer;
int64_t reconnect_time;
bool connect_err_reported;
@@ -506,7 +507,9 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
if (emit_close) {
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
- if (s->reconnect_time && !s->reconnect_timer) {
+ if (s->reconnect_time &&
+ !s->reconnect_timer &&
+ !s->async_reconnect_disable) {
qemu_chr_socket_restart_timer(chr);
}
}
@@ -954,15 +957,23 @@ static int tcp_chr_connect_client_sync(Chardev *chr, Error **errp)
{
SocketChardev *s = SOCKET_CHARDEV(chr);
QIOChannelSocket *sioc = qio_channel_socket_new();
+
+ s->async_reconnect_disable = true;
Instead of having a new field, we could probably zero s->reconnect_timer temporarily.
tcp_chr_change_state(s, TCP_CHARDEV_STATE_CONNECTING);
tcp_chr_set_client_ioc_name(chr, sioc);
if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) {
tcp_chr_change_state(s, TCP_CHARDEV_STATE_DISCONNECTED);
object_unref(OBJECT(sioc));
+ s->async_reconnect_disable = false;
return -1;
}
tcp_chr_new_client(chr, sioc);
object_unref(OBJECT(sioc));
+ s->async_reconnect_disable = false;
+
+ if (s->state != TCP_CHARDEV_STATE_CONNECTED) {
+ return -1;
+ }
How is this related?
return 0;
}
--
2.26.0.windows.1