Hi,
This RFC aims to implement some support for SO_PEERCRED with AF_VSOCK,
so vsock servers & clients can lookup the basic peer credentials.
(further support for SO_PEERSEC could also be useful)
This is pretty straightforward for loopback transport, where both ends
are on the same host.
For vhost transport, the host will set the peer credentials associated with
the process who called VHOST_SET_OWNER (ex QEMU).
For virtio transport, the credentials are cleared upon connect, as
providing foreign credentials wouldn't make much sense.
I haven't looked at other transports. What do you think of this approach?
Note: I think it would be a better to set the peer credentials when we
actually can provide them, rather than at creation time, but I haven't
found a way yet. Help welcome!
Marc-André Lureau (10):
sock: move sock_init_peercred() from af_unix
sock: move sock_copy_peercred() from af_unix
vsock: owner field is specific to VMCI
sock: add sock_swap_peercred
virtio/vsock: add copy_peercred() to virtio_transport
vsock: set socket peercred
vsock/loopback: implement copy_peercred()
vhost/vsock: save owner pid & creds
vhost/vsock: implement copy_peercred
vsock/virtio: clear peer creds on connect
drivers/vhost/vsock.c | 46 +++++++++++++++++
include/linux/virtio_vsock.h | 2 +
include/net/af_vsock.h | 2 +
include/net/sock.h | 9 ++++
net/core/sock.c | 66 +++++++++++++++++++++++++
net/unix/af_unix.c | 50 ++-----------------
net/vmw_vsock/af_vsock.c | 8 +++
net/vmw_vsock/virtio_transport.c | 22 ++++++++-
net/vmw_vsock/virtio_transport_common.c | 9 ++++
net/vmw_vsock/vsock_loopback.c | 7 +++
10 files changed, 175 insertions(+), 46 deletions(-)
base-commit: e0bfcf9c77d9b2c11d2767f0c747f7721ae0cc51
--
2.33.0.721.g106298f7f9
SO_PEERCRED can be made to work with other kind of sockets.
Signed-off-by: Marc-André Lureau <[email protected]>
---
include/net/sock.h | 3 +++
net/core/sock.c | 17 +++++++++++++++++
net/unix/af_unix.c | 24 ++++--------------------
3 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index ea6fbc88c8f9..8b12953752e6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1816,6 +1816,9 @@ void sk_common_release(struct sock *sk);
/* Initialise core socket variables */
void sock_init_data(struct socket *sock, struct sock *sk);
+/* Set socket peer PID and credentials with current process. */
+void sock_init_peercred(struct sock *sk);
+
/*
* Socket reference counting postulates.
*
diff --git a/net/core/sock.c b/net/core/sock.c
index c1601f75ec4b..997e8d256e2f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3197,6 +3197,23 @@ void sock_init_data(struct socket *sock, struct sock *sk)
}
EXPORT_SYMBOL(sock_init_data);
+void sock_init_peercred(struct sock *sk)
+{
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
+ sk->sk_peer_pid = get_pid(task_tgid(current));
+ sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
+}
+EXPORT_SYMBOL(sock_init_peercred);
+
void lock_sock_nested(struct sock *sk, int subclass)
{
/* The sk_lock has mutex_lock() semantics here. */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 89f9e85ae970..e56f320dff20 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -606,22 +606,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_gc(); /* Garbage collect fds */
}
-static void init_peercred(struct sock *sk)
-{
- const struct cred *old_cred;
- struct pid *old_pid;
-
- spin_lock(&sk->sk_peer_lock);
- old_pid = sk->sk_peer_pid;
- old_cred = sk->sk_peer_cred;
- sk->sk_peer_pid = get_pid(task_tgid(current));
- sk->sk_peer_cred = get_current_cred();
- spin_unlock(&sk->sk_peer_lock);
-
- put_pid(old_pid);
- put_cred(old_cred);
-}
-
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
const struct cred *old_cred;
@@ -666,7 +650,7 @@ static int unix_listen(struct socket *sock, int backlog)
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
/* set credentials so connect can copy them */
- init_peercred(sk);
+ sock_init_peercred(sk);
err = 0;
out_unlock:
@@ -1446,7 +1430,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
unix_peer(newsk) = sk;
newsk->sk_state = TCP_ESTABLISHED;
newsk->sk_type = sk->sk_type;
- init_peercred(newsk);
+ sock_init_peercred(newsk);
newu = unix_sk(newsk);
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
otheru = unix_sk(other);
@@ -1518,8 +1502,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
sock_hold(skb);
unix_peer(ska) = skb;
unix_peer(skb) = ska;
- init_peercred(ska);
- init_peercred(skb);
+ sock_init_peercred(ska);
+ sock_init_peercred(skb);
ska->sk_state = TCP_ESTABLISHED;
skb->sk_state = TCP_ESTABLISHED;
--
2.33.0.721.g106298f7f9
This field isn't used by other transports.
Signed-off-by: Marc-André Lureau <[email protected]>
---
include/net/af_vsock.h | 2 ++
net/vmw_vsock/af_vsock.c | 6 ++++++
2 files changed, 8 insertions(+)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index ab207677e0a8..e626d9484bc5 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -41,7 +41,9 @@ struct vsock_sock {
* cached peer?
*/
u32 cached_peer; /* Context ID of last dgram destination check. */
+#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
const struct cred *owner;
+#endif
/* Rest are SOCK_STREAM only. */
long connect_timeout;
/* Listening socket that this came from. */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index e2c0cfb334d2..1925682a942a 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -761,7 +761,9 @@ static struct sock *__vsock_create(struct net *net,
psk = parent ? vsock_sk(parent) : NULL;
if (parent) {
vsk->trusted = psk->trusted;
+#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
vsk->owner = get_cred(psk->owner);
+#endif
vsk->connect_timeout = psk->connect_timeout;
vsk->buffer_size = psk->buffer_size;
vsk->buffer_min_size = psk->buffer_min_size;
@@ -769,7 +771,9 @@ static struct sock *__vsock_create(struct net *net,
security_sk_clone(parent, sk);
} else {
vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
+#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
vsk->owner = get_current_cred();
+#endif
vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
@@ -833,7 +837,9 @@ static void vsock_sk_destruct(struct sock *sk)
vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
put_cred(vsk->owner);
+#endif
}
static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
--
2.33.0.721.g106298f7f9
SO_PEERCRED can be made to work with other kind of sockets.
Signed-off-by: Marc-André Lureau <[email protected]>
---
include/net/sock.h | 3 +++
net/core/sock.c | 25 +++++++++++++++++++++++++
net/unix/af_unix.c | 26 +-------------------------
3 files changed, 29 insertions(+), 25 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 8b12953752e6..d6877df26200 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1819,6 +1819,9 @@ void sock_init_data(struct socket *sock, struct sock *sk);
/* Set socket peer PID and credentials with current process. */
void sock_init_peercred(struct sock *sk);
+/* Copy peer credentials from peersk */
+void sock_copy_peercred(struct sock *sk, struct sock *peersk);
+
/*
* Socket reference counting postulates.
*
diff --git a/net/core/sock.c b/net/core/sock.c
index 997e8d256e2f..f6b2662824df 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3214,6 +3214,31 @@ void sock_init_peercred(struct sock *sk)
}
EXPORT_SYMBOL(sock_init_peercred);
+void sock_copy_peercred(struct sock *sk, struct sock *peersk)
+{
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ if (sk < peersk) {
+ spin_lock(&sk->sk_peer_lock);
+ spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&peersk->sk_peer_lock);
+ spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ }
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
+ sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
+ sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+
+ spin_unlock(&sk->sk_peer_lock);
+ spin_unlock(&peersk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
+}
+EXPORT_SYMBOL(sock_copy_peercred);
+
void lock_sock_nested(struct sock *sk, int subclass)
{
/* The sk_lock has mutex_lock() semantics here. */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e56f320dff20..9109df1efdb6 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -606,30 +606,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_gc(); /* Garbage collect fds */
}
-static void copy_peercred(struct sock *sk, struct sock *peersk)
-{
- const struct cred *old_cred;
- struct pid *old_pid;
-
- if (sk < peersk) {
- spin_lock(&sk->sk_peer_lock);
- spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
- } else {
- spin_lock(&peersk->sk_peer_lock);
- spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
- }
- old_pid = sk->sk_peer_pid;
- old_cred = sk->sk_peer_cred;
- sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
- sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
-
- spin_unlock(&sk->sk_peer_lock);
- spin_unlock(&peersk->sk_peer_lock);
-
- put_pid(old_pid);
- put_cred(old_cred);
-}
-
static int unix_listen(struct socket *sock, int backlog)
{
int err;
@@ -1460,7 +1436,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
smp_store_release(&newu->addr, otheru->addr);
/* Set credentials */
- copy_peercred(sk, other);
+ sock_copy_peercred(sk, other);
sock->state = SS_CONNECTED;
sk->sk_state = TCP_ESTABLISHED;
--
2.33.0.721.g106298f7f9
Signed-off-by: Marc-André Lureau <[email protected]>
---
include/net/sock.h | 3 +++
net/core/sock.c | 24 ++++++++++++++++++++++++
2 files changed, 27 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index d6877df26200..635d270c3a65 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1822,6 +1822,9 @@ void sock_init_peercred(struct sock *sk);
/* Copy peer credentials from peersk */
void sock_copy_peercred(struct sock *sk, struct sock *peersk);
+/* Swap socket credentials */
+void sock_swap_peercred(struct sock *sk, struct sock *peersk);
+
/*
* Socket reference counting postulates.
*
diff --git a/net/core/sock.c b/net/core/sock.c
index f6b2662824df..365b63afa915 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3239,6 +3239,30 @@ void sock_copy_peercred(struct sock *sk, struct sock *peersk)
}
EXPORT_SYMBOL(sock_copy_peercred);
+void sock_swap_peercred(struct sock *sk, struct sock *peersk)
+{
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ if (sk < peersk) {
+ spin_lock(&sk->sk_peer_lock);
+ spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&peersk->sk_peer_lock);
+ spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ }
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
+ sk->sk_peer_pid = peersk->sk_peer_pid;
+ sk->sk_peer_cred = peersk->sk_peer_cred;
+ peersk->sk_peer_pid = old_pid;
+ peersk->sk_peer_cred = old_cred;
+
+ spin_unlock(&sk->sk_peer_lock);
+ spin_unlock(&peersk->sk_peer_lock);
+}
+EXPORT_SYMBOL(sock_swap_peercred);
+
void lock_sock_nested(struct sock *sk, int subclass)
{
/* The sk_lock has mutex_lock() semantics here. */
--
2.33.0.721.g106298f7f9
Signed-off-by: Marc-André Lureau <[email protected]>
---
include/linux/virtio_vsock.h | 2 ++
net/vmw_vsock/virtio_transport_common.c | 9 +++++++++
2 files changed, 11 insertions(+)
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 35d7eedb5e8e..2445bece9216 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -69,6 +69,8 @@ struct virtio_transport {
/* Takes ownership of the packet */
int (*send_pkt)(struct virtio_vsock_pkt *pkt);
+ /* Set peercreds on socket created after listen recv */
+ void (*copy_peercred)(struct sock *sk, struct virtio_vsock_pkt *pkt);
};
ssize_t
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 59ee1be5a6dd..611d25e80723 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1194,6 +1194,15 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
return -ENOMEM;
}
+ if (t->copy_peercred) {
+ t->copy_peercred(child, pkt);
+ } else {
+ put_pid(child->sk_peer_pid);
+ child->sk_peer_pid = NULL;
+ put_cred(child->sk_peer_cred);
+ child->sk_peer_cred = NULL;
+ }
+
sk_acceptq_added(sk);
lock_sock_nested(child, SINGLE_DEPTH_NESTING);
--
2.33.0.721.g106298f7f9
When AF_VSOCK socket is created, the peercreds are set to the current
process values.
This is how AF_UNIX listen work too, but unconnected AF_UNIX sockets
return pid:0 & uid/gid:-1.
Signed-off-by: Marc-André Lureau <[email protected]>
---
net/vmw_vsock/af_vsock.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 1925682a942a..9b211ff49b08 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -760,6 +760,7 @@ static struct sock *__vsock_create(struct net *net,
psk = parent ? vsock_sk(parent) : NULL;
if (parent) {
+ sock_copy_peercred(sk, parent);
vsk->trusted = psk->trusted;
#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
vsk->owner = get_cred(psk->owner);
@@ -770,6 +771,7 @@ static struct sock *__vsock_create(struct net *net,
vsk->buffer_max_size = psk->buffer_max_size;
security_sk_clone(parent, sk);
} else {
+ sock_init_peercred(sk);
vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
vsk->owner = get_current_cred();
--
2.33.0.721.g106298f7f9
Set the socket peercred to the vhost owner.
Signed-off-by: Marc-André Lureau <[email protected]>
---
drivers/vhost/vsock.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 3067436cddfc..fb492c53631d 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -308,6 +308,27 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
return len;
}
+static void vhost_transport_copy_peercred(struct sock *sk, struct virtio_vsock_pkt *pkt)
+{
+ struct vhost_vsock *vsock;
+
+ put_pid(sk->sk_peer_pid);
+ sk->sk_peer_pid = NULL;
+ put_cred(sk->sk_peer_cred);
+ sk->sk_peer_cred = NULL;
+
+ rcu_read_lock();
+ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.src_cid));
+ if (!vsock)
+ goto out;
+
+ sk->sk_peer_pid = get_pid(vsock->owner_pid);
+ sk->sk_peer_cred = get_cred(vsock->owner_cred);
+
+out:
+ rcu_read_unlock();
+}
+
static int
vhost_transport_cancel_pkt(struct vsock_sock *vsk)
{
@@ -474,6 +495,7 @@ static struct virtio_transport vhost_transport = {
},
.send_pkt = vhost_transport_send_pkt,
+ .copy_peercred = vhost_transport_copy_peercred,
};
static bool vhost_transport_seqpacket_allow(u32 remote_cid)
--
2.33.0.721.g106298f7f9
Since providing foreign creds wouldn't make much sense over VIRTIO,
let's clear the socket peer credentials on connect.
Signed-off-by: Marc-André Lureau <[email protected]>
---
net/vmw_vsock/virtio_transport.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 4f7c99dfd16c..705789272a0f 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -449,6 +449,26 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
static bool virtio_transport_seqpacket_allow(u32 remote_cid);
+static int transport_connect(struct vsock_sock *vsk)
+{
+ struct sock *sk;
+ int ret;
+
+ ret = virtio_transport_connect(vsk);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* clear creds, as we can't provide foreign creds */
+ sk = sk_vsock(vsk);
+ put_pid(sk->sk_peer_pid);
+ sk->sk_peer_pid = NULL;
+ put_cred(sk->sk_peer_cred);
+ sk->sk_peer_cred = NULL;
+
+ return ret;
+}
+
static struct virtio_transport virtio_transport = {
.transport = {
.module = THIS_MODULE,
@@ -458,7 +478,7 @@ static struct virtio_transport virtio_transport = {
.init = virtio_transport_do_socket_init,
.destruct = virtio_transport_destruct,
.release = virtio_transport_release,
- .connect = virtio_transport_connect,
+ .connect = transport_connect,
.shutdown = virtio_transport_shutdown,
.cancel_pkt = virtio_transport_cancel_pkt,
--
2.33.0.721.g106298f7f9
After VHOST_SET_OWNER success, save the owner process credentials.
Signed-off-by: Marc-André Lureau <[email protected]>
---
drivers/vhost/vsock.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 938aefbc75ec..3067436cddfc 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -58,6 +58,8 @@ struct vhost_vsock {
u32 guest_cid;
bool seqpacket_allow;
+ struct pid *owner_pid;
+ const struct cred *owner_cred;
};
static u32 vhost_transport_get_local_cid(void)
@@ -774,6 +776,10 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
vhost_dev_cleanup(&vsock->dev);
kfree(vsock->dev.vqs);
+
+ put_pid(vsock->owner_pid);
+ put_cred(vsock->owner_cred);
+
vhost_vsock_free(vsock);
return 0;
}
@@ -851,6 +857,22 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
return -EFAULT;
}
+static long vhost_vsock_set_owner(struct vhost_vsock *vsock)
+{
+ long r;
+
+ mutex_lock(&vsock->dev.mutex);
+ r = vhost_dev_set_owner(&vsock->dev);
+ if (r)
+ goto out;
+ vsock->owner_pid = get_pid(task_tgid(current));
+ vsock->owner_cred = get_current_cred();
+ vhost_vsock_flush(vsock);
+out:
+ mutex_unlock(&vsock->dev.mutex);
+ return r;
+}
+
static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
unsigned long arg)
{
@@ -894,6 +916,8 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
return -EOPNOTSUPP;
vhost_set_backend_features(&vsock->dev, features);
return 0;
+ case VHOST_SET_OWNER:
+ return vhost_vsock_set_owner(vsock);
default:
mutex_lock(&vsock->dev.mutex);
r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
--
2.33.0.721.g106298f7f9
Signed-off-by: Marc-André Lureau <[email protected]>
---
net/vmw_vsock/vsock_loopback.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 169a8cf65b39..59317baa4e5c 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -41,6 +41,12 @@ static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
return len;
}
+static void vsock_loopback_copy_peercred(struct sock *sk, struct virtio_vsock_pkt *pkt)
+{
+ /* on vsock loopback, set both peers by swaping the creds */
+ sock_swap_peercred(sk, sk_vsock(pkt->vsk));
+}
+
static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
{
struct vsock_loopback *vsock = &the_vsock_loopback;
@@ -110,6 +116,7 @@ static struct virtio_transport loopback_transport = {
},
.send_pkt = vsock_loopback_send_pkt,
+ .copy_peercred = vsock_loopback_copy_peercred,
};
static bool vsock_loopback_seqpacket_allow(u32 remote_cid)
--
2.33.0.721.g106298f7f9
Hi,
On Thu, Oct 21, 2021 at 04:37:04PM +0400, Marc-Andr? Lureau wrote:
>Hi,
>
>This RFC aims to implement some support for SO_PEERCRED with AF_VSOCK,
>so vsock servers & clients can lookup the basic peer credentials.
>(further support for SO_PEERSEC could also be useful)
Thanks for this RFC! Just had a quick look, Monday I hope to give you
better feedback :-)
>
>This is pretty straightforward for loopback transport, where both ends
>are on the same host.
>
>For vhost transport, the host will set the peer credentials associated with
>the process who called VHOST_SET_OWNER (ex QEMU).
>
>For virtio transport, the credentials are cleared upon connect, as
>providing foreign credentials wouldn't make much sense.
>
>I haven't looked at other transports. What do you think of this
>approach?
So IIUC, SO_PEERCRED will make sense only in the host and will return
the credentials of the VMM (e.g. QEMU) that manages the VM of the peer
to which we are connected.
So the features should be supported by the following type of transports:
- VSOCK_TRANSPORT_F_LOCAL (vsock_loopback)
- VSOCK_TRANSPORT_F_H2G (vhost-vsock, vmci)
>
>Note: I think it would be a better to set the peer credentials when we
>actually can provide them, rather than at creation time, but I haven't
>found a way yet. Help welcome!
Yep, I agree, cleaning credentials after connecting in the guest seems a
bit strange.
As you also said, would be better to set them only after a successful
connect(), which should be similar to what AF_UNIX does.
Maybe we can add an helper in af_vsock.c that will be called from the
transports that support this feature at the end the connection setup.
I'll think better of it and get back to you.
Thanks,
Stefano
CCing Jorgen.
On Thu, Oct 21, 2021 at 04:37:07PM +0400, Marc-Andr? Lureau wrote:
>This field isn't used by other transports.
If the field is used only in the VMCI transport, maybe it's better to
move the field and the code in that transport.
Thanks,
Stefano
>
>Signed-off-by: Marc-Andr? Lureau <[email protected]>
>---
> include/net/af_vsock.h | 2 ++
> net/vmw_vsock/af_vsock.c | 6 ++++++
> 2 files changed, 8 insertions(+)
>
>diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>index ab207677e0a8..e626d9484bc5 100644
>--- a/include/net/af_vsock.h
>+++ b/include/net/af_vsock.h
>@@ -41,7 +41,9 @@ struct vsock_sock {
> * cached peer?
> */
> u32 cached_peer; /* Context ID of last dgram destination check. */
>+#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> const struct cred *owner;
>+#endif
> /* Rest are SOCK_STREAM only. */
> long connect_timeout;
> /* Listening socket that this came from. */
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index e2c0cfb334d2..1925682a942a 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c
>@@ -761,7 +761,9 @@ static struct sock *__vsock_create(struct net *net,
> psk = parent ? vsock_sk(parent) : NULL;
> if (parent) {
> vsk->trusted = psk->trusted;
>+#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> vsk->owner = get_cred(psk->owner);
>+#endif
> vsk->connect_timeout = psk->connect_timeout;
> vsk->buffer_size = psk->buffer_size;
> vsk->buffer_min_size = psk->buffer_min_size;
>@@ -769,7 +771,9 @@ static struct sock *__vsock_create(struct net *net,
> security_sk_clone(parent, sk);
> } else {
> vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
>+#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> vsk->owner = get_current_cred();
>+#endif
> vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
> vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
> vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
>@@ -833,7 +837,9 @@ static void vsock_sk_destruct(struct sock *sk)
> vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
> vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
>
>+#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> put_cred(vsk->owner);
>+#endif
> }
>
> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>--
>2.33.0.721.g106298f7f9
>
On Thu, Oct 21, 2021 at 04:37:09PM +0400, Marc-Andr? Lureau wrote:
>Signed-off-by: Marc-Andr? Lureau <[email protected]>
>---
> include/linux/virtio_vsock.h | 2 ++
> net/vmw_vsock/virtio_transport_common.c | 9 +++++++++
> 2 files changed, 11 insertions(+)
>
>diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>index 35d7eedb5e8e..2445bece9216 100644
>--- a/include/linux/virtio_vsock.h
>+++ b/include/linux/virtio_vsock.h
>@@ -69,6 +69,8 @@ struct virtio_transport {
>
> /* Takes ownership of the packet */
> int (*send_pkt)(struct virtio_vsock_pkt *pkt);
>+ /* Set peercreds on socket created after listen recv */
>+ void (*copy_peercred)(struct sock *sk, struct virtio_vsock_pkt *pkt);
> };
>
> ssize_t
>diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>index 59ee1be5a6dd..611d25e80723 100644
>--- a/net/vmw_vsock/virtio_transport_common.c
>+++ b/net/vmw_vsock/virtio_transport_common.c
>@@ -1194,6 +1194,15 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
> return -ENOMEM;
> }
>
>+ if (t->copy_peercred) {
>+ t->copy_peercred(child, pkt);
>+ } else {
>+ put_pid(child->sk_peer_pid);
>+ child->sk_peer_pid = NULL;
>+ put_cred(child->sk_peer_cred);
>+ child->sk_peer_cred = NULL;
>+ }
>+
Should we do the same also on the other side?
I mean in virtio_transport_recv_connecting() when
VIRTIO_VSOCK_OP_RESPONSE is received.
I think we can add an helper and call it every time we call
vsock_insert_connected().
Even better if we can do it in the core, but maybe this can be a next
step.
Thanks,
Stefano
On Thu, Oct 21, 2021 at 04:37:10PM +0400, Marc-Andr? Lureau wrote:
>When AF_VSOCK socket is created, the peercreds are set to the current
>process values.
>
>This is how AF_UNIX listen work too, but unconnected AF_UNIX sockets
>return pid:0 & uid/gid:-1.
>
>Signed-off-by: Marc-Andr? Lureau <[email protected]>
>---
> net/vmw_vsock/af_vsock.c | 2 ++
> 1 file changed, 2 insertions(+)
>
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index 1925682a942a..9b211ff49b08 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c
>@@ -760,6 +760,7 @@ static struct sock *__vsock_create(struct net *net,
>
> psk = parent ? vsock_sk(parent) : NULL;
> if (parent) {
>+ sock_copy_peercred(sk, parent);
> vsk->trusted = psk->trusted;
> #if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> vsk->owner = get_cred(psk->owner);
>@@ -770,6 +771,7 @@ static struct sock *__vsock_create(struct net *net,
> vsk->buffer_max_size = psk->buffer_max_size;
> security_sk_clone(parent, sk);
> } else {
>+ sock_init_peercred(sk);
IIUC in AF_UNIX the sock_init_peercred() is called only when the
connection is established, so I think we should do the same.
In the single transports or in some way in the core when the transports
call vsock_insert_connected().
Thanks,
Stefano
On Thu, Oct 21, 2021 at 04:37:11PM +0400, Marc-Andr? Lureau wrote:
>Signed-off-by: Marc-Andr? Lureau <[email protected]>
>---
> net/vmw_vsock/vsock_loopback.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
>diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
>index 169a8cf65b39..59317baa4e5c 100644
>--- a/net/vmw_vsock/vsock_loopback.c
>+++ b/net/vmw_vsock/vsock_loopback.c
>@@ -41,6 +41,12 @@ static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
> return len;
> }
>
>+static void vsock_loopback_copy_peercred(struct sock *sk, struct virtio_vsock_pkt *pkt)
>+{
>+ /* on vsock loopback, set both peers by swaping the creds */
>+ sock_swap_peercred(sk, sk_vsock(pkt->vsk));
>+}
>+
It's a bit hacky set also the cred of `pkt->vsk`. I think here we should
only copy the cred of the remote peer.
Addind the call to t->copy_peercred() in the
virtio_transport_recv_connecting() will set the other side.
Thanks,
Stefano
> On 26 Oct 2021, at 13:16, Stefano Garzarella <[email protected]> wrote:
>
> CCing Jorgen.
>
> On Thu, Oct 21, 2021 at 04:37:07PM +0400, Marc-André Lureau wrote:
>> This field isn't used by other transports.
>
> If the field is used only in the VMCI transport, maybe it's better to
> move the field and the code in that transport.
If the transport needs initialize these fields, that should happen when we
call vsock_assign_transport. So we would need to validate that
get_current_cred() gets the right credentials and that the parent of a
socket has an Initialised owner field at that point in time.
sock_assign_transport may be called when processing an
incoming packet when a remote connects to a listening socket,
and in that case, the owner will be based on the parent socket.
If the parent socket hasn’t been assigned a transport (and as I
remember it, that isn’t the case for a listening socket), then it
isn’t possible to initialize the owner field at this point using
the value from the parent. So the initialisation of the fields
probably have to stay in af_vsock.c as part of the generic structure.
Is there a particular reason to do this change as part of this series
of patches?
Thanks,
Jorgen
> Thanks,
> Stefano
>
>>
>> Signed-off-by: Marc-André Lureau <[email protected]>
>> ---
>> include/net/af_vsock.h | 2 ++
>> net/vmw_vsock/af_vsock.c | 6 ++++++
>> 2 files changed, 8 insertions(+)
>>
>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>> index ab207677e0a8..e626d9484bc5 100644
>> --- a/include/net/af_vsock.h
>> +++ b/include/net/af_vsock.h
>> @@ -41,7 +41,9 @@ struct vsock_sock {
>> * cached peer?
>> */
>> u32 cached_peer; /* Context ID of last dgram destination check. */
>> +#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
>> const struct cred *owner;
>> +#endif
>> /* Rest are SOCK_STREAM only. */
>> long connect_timeout;
>> /* Listening socket that this came from. */
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index e2c0cfb334d2..1925682a942a 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -761,7 +761,9 @@ static struct sock *__vsock_create(struct net *net,
>> psk = parent ? vsock_sk(parent) : NULL;
>> if (parent) {
>> vsk->trusted = psk->trusted;
>> +#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
>> vsk->owner = get_cred(psk->owner);
>> +#endif
>> vsk->connect_timeout = psk->connect_timeout;
>> vsk->buffer_size = psk->buffer_size;
>> vsk->buffer_min_size = psk->buffer_min_size;
>> @@ -769,7 +771,9 @@ static struct sock *__vsock_create(struct net *net,
>> security_sk_clone(parent, sk);
>> } else {
>> vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
>> +#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
>> vsk->owner = get_current_cred();
>> +#endif
>> vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
>> vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
>> vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
>> @@ -833,7 +837,9 @@ static void vsock_sk_destruct(struct sock *sk)
>> vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
>> vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
>>
>> +#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
>> put_cred(vsk->owner);
>> +#endif
>> }
>>
>> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>> --
>> 2.33.0.721.g106298f7f9
>>
>
Hi
On Wed, Oct 27, 2021 at 12:13 PM Jorgen Hansen <[email protected]> wrote:
>
>
> > On 26 Oct 2021, at 13:16, Stefano Garzarella <[email protected]> wrote:
> >
> > CCing Jorgen.
> >
> > On Thu, Oct 21, 2021 at 04:37:07PM +0400, Marc-André Lureau wrote:
> >> This field isn't used by other transports.
> >
> > If the field is used only in the VMCI transport, maybe it's better to
> > move the field and the code in that transport.
>
> If the transport needs initialize these fields, that should happen when we
> call vsock_assign_transport. So we would need to validate that
> get_current_cred() gets the right credentials and that the parent of a
> socket has an Initialised owner field at that point in time.
>
> sock_assign_transport may be called when processing an
> incoming packet when a remote connects to a listening socket,
> and in that case, the owner will be based on the parent socket.
> If the parent socket hasn’t been assigned a transport (and as I
> remember it, that isn’t the case for a listening socket), then it
> isn’t possible to initialize the owner field at this point using
> the value from the parent. So the initialisation of the fields
> probably have to stay in af_vsock.c as part of the generic structure.
>
> Is there a particular reason to do this change as part of this series
> of patches?
No particular reason, it was just related code.
thanks
>
> Thanks,
> Jorgen
>
> > Thanks,
> > Stefano
> >
> >>
> >> Signed-off-by: Marc-André Lureau <[email protected]>
> >> ---
> >> include/net/af_vsock.h | 2 ++
> >> net/vmw_vsock/af_vsock.c | 6 ++++++
> >> 2 files changed, 8 insertions(+)
> >>
> >> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> >> index ab207677e0a8..e626d9484bc5 100644
> >> --- a/include/net/af_vsock.h
> >> +++ b/include/net/af_vsock.h
> >> @@ -41,7 +41,9 @@ struct vsock_sock {
> >> * cached peer?
> >> */
> >> u32 cached_peer; /* Context ID of last dgram destination check. */
> >> +#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> >> const struct cred *owner;
> >> +#endif
> >> /* Rest are SOCK_STREAM only. */
> >> long connect_timeout;
> >> /* Listening socket that this came from. */
> >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> >> index e2c0cfb334d2..1925682a942a 100644
> >> --- a/net/vmw_vsock/af_vsock.c
> >> +++ b/net/vmw_vsock/af_vsock.c
> >> @@ -761,7 +761,9 @@ static struct sock *__vsock_create(struct net *net,
> >> psk = parent ? vsock_sk(parent) : NULL;
> >> if (parent) {
> >> vsk->trusted = psk->trusted;
> >> +#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> >> vsk->owner = get_cred(psk->owner);
> >> +#endif
> >> vsk->connect_timeout = psk->connect_timeout;
> >> vsk->buffer_size = psk->buffer_size;
> >> vsk->buffer_min_size = psk->buffer_min_size;
> >> @@ -769,7 +771,9 @@ static struct sock *__vsock_create(struct net *net,
> >> security_sk_clone(parent, sk);
> >> } else {
> >> vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
> >> +#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> >> vsk->owner = get_current_cred();
> >> +#endif
> >> vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
> >> vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
> >> vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
> >> @@ -833,7 +837,9 @@ static void vsock_sk_destruct(struct sock *sk)
> >> vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
> >> vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
> >>
> >> +#if IS_ENABLED(CONFIG_VMWARE_VMCI_VSOCKETS)
> >> put_cred(vsk->owner);
> >> +#endif
> >> }
> >>
> >> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> >> --
> >> 2.33.0.721.g106298f7f9
> >>
> >
>