Date: Sat, 2 Dec 2017 22:08:03 +0000
From: Al Viro <viro@ZenIV.linux.org.uk>
To: Daniel Borkmann <daniel@iogearbox.net>
Cc: Kees Cook <keescook@chromium.org>,
        Shmulik Ladkani <shmulik.ladkani@gmail.com>,
        Willem de Bruijn <willemb@google.com>,
        Pablo Neira Ayuso <pablo@netfilter.org>,
        Linus Torvalds <torvalds@linux-foundation.org>,
        David Miller <davem@davemloft.net>,
        LKML <linux-kernel@vger.kernel.org>,
        Network Development <netdev@vger.kernel.org>,
        Christoph Hellwig <hch@infradead.org>,
        Thomas Garnier <thgarnie@google.com>, Jann Horn <jannh@google.com>
Subject: Re: netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of
 'xt_bpf_info_v1'
Message-ID: <20171202220802.GR21978@ZenIV.linux.org.uk>
References: <CAGXu5j+ZCJ8rRK=DAKaeuNmGzVY7xo1o9uJyWPEijmg9gw4fKA@mail.gmail.com>
 <20171201013304.GM21978@ZenIV.linux.org.uk>
 <20171201034859.GN21978@ZenIV.linux.org.uk>
 <20171201045439.GO21978@ZenIV.linux.org.uk>
 <20171201173941.GP21978@ZenIV.linux.org.uk>
 <7bbe72a8-dbbe-3343-765d-cc53eb40e0cd@iogearbox.net>
 <20171202184850.GQ21978@ZenIV.linux.org.uk>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <20171202184850.GQ21978@ZenIV.linux.org.uk>
User-Agent: Mutt/1.9.0 (2017-09-02)
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 4137
Lines: 126

On Sat, Dec 02, 2017 at 06:48:50PM +0000, Al Viro wrote:
> On Fri, Dec 01, 2017 at 09:47:00PM +0100, Daniel Borkmann wrote:
> 
> > > Might want to replace security_path_mknod() with something saner, while we are
> > > at it.
> > > 
> > > Objections?
> > 
> > No, thanks for looking into this, and sorry for this fugly hack! :( Not
> > that this doesn't make it any better, but I think back then I took it
> > over from mqueue implementation ... should have known better and looking
> > into making this generic instead, sigh. The above looks good to me, so
> > no objections from my side and thanks for working on it!
> > 
> > > PS: mqueue.c would also benefit from such primitive - do_create() there would
> > > simply pass attr as callback's argument into vfs_mkobj(), with callback being
> > > the guts of mqueue_create()...
> 
> OK...  See vfs.git#untested.mkobj; it really needs testing, though - mq_open(2)
> passes LTP tests, but that's not saying much, and BPF side is completely
> untested.

... and FWIW, completely untested patch for net/netfilter/xt_bpf.c follows:

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e55e4255a210..a7000e4775e7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -514,6 +514,9 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 	return bpf_prog_get_type_dev(ufd, type, false);
 }
 
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
+
 int bpf_prog_offload_compile(struct bpf_prog *prog);
 void bpf_prog_offload_destroy(struct bpf_prog *prog);
 
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 2b75faccc771..9d1050dc2a7a 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -364,6 +364,45 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 }
 EXPORT_SYMBOL_GPL(bpf_obj_get_user);
 
+static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
+{
+	struct bpf_prog *prog;
+	int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (inode->i_op == &bpf_map_iops)
+		return ERR_PTR(-EINVAL);
+	if (inode->i_op != &bpf_prog_iops)
+		return ERR_PTR(-EACCES);
+
+	prog = inode->i_private;
+
+	ret = security_bpf_prog(prog);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	if (!bpf_prog_get_ok(prog, &type, false))
+		return ERR_PTR(-EINVAL);
+
+	return bpf_prog_inc(prog);
+}
+
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
+{
+	struct bpf_prog *prog;
+	struct path path;
+	int ret = kern_path(name, LOOKUP_FOLLOW, &path);
+	if (ret)
+		return ERR_PTR(ret);
+	prog = __get_prog_inode(d_backing_inode(path.dentry), type);
+	if (!IS_ERR(prog))
+		touch_atime(&path);
+	path_put(&path);
+	return prog;
+}
+EXPORT_SYMBOL(bpf_prog_get_type_path);
+
 static void bpf_evict_inode(struct inode *inode)
 {
 	enum bpf_type type;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2c4cfeaa8d5e..5cb783fc8224 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1057,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
 
-static bool bpf_prog_get_ok(struct bpf_prog *prog,
+bool bpf_prog_get_ok(struct bpf_prog *prog,
 			    enum bpf_prog_type *attach_type, bool attach_drv)
 {
 	/* not an attachment, just a refcount inc, always allow */
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 041da0d9c06f..fa2ca0a13619 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -52,18 +52,8 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
 
 static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
 {
-	mm_segment_t oldfs = get_fs();
-	int retval, fd;
-
-	set_fs(KERNEL_DS);
-	fd = bpf_obj_get_user(path, 0);
-	set_fs(oldfs);
-	if (fd < 0)
-		return fd;
-
-	retval = __bpf_mt_check_fd(fd, ret);
-	sys_close(fd);
-	return retval;
+	*ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+	return PTR_ERR_OR_ZERO(*ret);
 }
 
 static int bpf_mt_check(const struct xt_mtchk_param *par)