2019-02-01 20:56:20

by Albert Vaca Cintora

[permalink] [raw]
Subject: [PATCH v2] kernel/ucounts: expose count of inotify watches in use

Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
The handler for this entry is a custom function that ends up calling
proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
and it gets mounted under /proc/sys/user/.

Inotify watches are a finite resource, in a similar way to available file
descriptors. The motivation for this patch is to be able to set up
monitoring and alerting before an application starts failing because
it runs out of inotify watches.

Signed-off-by: Albert Vaca Cintora <[email protected]>
Acked-by: Jan Kara <[email protected]>
Reviewed-by: Nicolas Saenz Julienne <[email protected]>
---
kernel/ucount.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)

diff --git a/kernel/ucount.c b/kernel/ucount.c
index f48d1b6376a4..d8b11e53f098 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -57,6 +57,11 @@ static struct ctl_table_root set_root = {
.permissions = set_permissions,
};

+#ifdef CONFIG_INOTIFY_USER
+int proc_read_inotify_watches(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
static int zero = 0;
static int int_max = INT_MAX;
#define UCOUNT_ENTRY(name) \
@@ -79,6 +84,12 @@ static struct ctl_table user_table[] = {
#ifdef CONFIG_INOTIFY_USER
UCOUNT_ENTRY("max_inotify_instances"),
UCOUNT_ENTRY("max_inotify_watches"),
+ {
+ .procname = "current_inotify_watches",
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_read_inotify_watches,
+ },
#endif
{ }
};
@@ -226,6 +237,24 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
put_ucounts(ucounts);
}

+#ifdef CONFIG_INOTIFY_USER
+int proc_read_inotify_watches(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ucounts *ucounts;
+ struct ctl_table fake_table;
+ int count;
+
+ ucounts = get_ucounts(current_user_ns(), current_euid());
+ count = atomic_read(&ucounts->ucount[UCOUNT_INOTIFY_WATCHES]);
+ put_ucounts(ucounts);
+
+ fake_table.data = &count;
+ fake_table.maxlen = sizeof(count);
+ return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
+}
+#endif
+
static __init int user_namespace_sysctl_init(void)
{
#ifdef CONFIG_SYSCTL
--
2.20.1



2019-02-22 18:00:25

by Albert Vaca Cintora

[permalink] [raw]
Subject: Re: [PATCH v2] kernel/ucounts: expose count of inotify watches in use

On Fri, Feb 1, 2019 at 9:42 PM Albert Vaca Cintora <[email protected]> wrote:
>
> Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
> The handler for this entry is a custom function that ends up calling
> proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
> and it gets mounted under /proc/sys/user/.
>
> Inotify watches are a finite resource, in a similar way to available file
> descriptors. The motivation for this patch is to be able to set up
> monitoring and alerting before an application starts failing because
> it runs out of inotify watches.
>
> Signed-off-by: Albert Vaca Cintora <[email protected]>
> Acked-by: Jan Kara <[email protected]>
> Reviewed-by: Nicolas Saenz Julienne <[email protected]>

Friendly ping. Any comments on this?

> ---
> kernel/ucount.c | 29 +++++++++++++++++++++++++++++
> 1 file changed, 29 insertions(+)
>
> diff --git a/kernel/ucount.c b/kernel/ucount.c
> index f48d1b6376a4..d8b11e53f098 100644
> --- a/kernel/ucount.c
> +++ b/kernel/ucount.c
> @@ -57,6 +57,11 @@ static struct ctl_table_root set_root = {
> .permissions = set_permissions,
> };
>
> +#ifdef CONFIG_INOTIFY_USER
> +int proc_read_inotify_watches(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp, loff_t *ppos);
> +#endif
> +
> static int zero = 0;
> static int int_max = INT_MAX;
> #define UCOUNT_ENTRY(name) \
> @@ -79,6 +84,12 @@ static struct ctl_table user_table[] = {
> #ifdef CONFIG_INOTIFY_USER
> UCOUNT_ENTRY("max_inotify_instances"),
> UCOUNT_ENTRY("max_inotify_watches"),
> + {
> + .procname = "current_inotify_watches",
> + .maxlen = sizeof(int),
> + .mode = 0444,
> + .proc_handler = proc_read_inotify_watches,
> + },
> #endif
> { }
> };
> @@ -226,6 +237,24 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
> put_ucounts(ucounts);
> }
>
> +#ifdef CONFIG_INOTIFY_USER
> +int proc_read_inotify_watches(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> + struct ucounts *ucounts;
> + struct ctl_table fake_table;
> + int count;
> +
> + ucounts = get_ucounts(current_user_ns(), current_euid());
> + count = atomic_read(&ucounts->ucount[UCOUNT_INOTIFY_WATCHES]);
> + put_ucounts(ucounts);
> +
> + fake_table.data = &count;
> + fake_table.maxlen = sizeof(count);
> + return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
> +}
> +#endif
> +
> static __init int user_namespace_sysctl_init(void)
> {
> #ifdef CONFIG_SYSCTL
> --
> 2.20.1
>

2019-04-25 16:06:12

by Matthias Brugger

[permalink] [raw]
Subject: Re: [PATCH v2] kernel/ucounts: expose count of inotify watches in use



On 22/02/2019 18:58, Albert Vaca Cintora wrote:
> On Fri, Feb 1, 2019 at 9:42 PM Albert Vaca Cintora <[email protected]> wrote:
>>
>> Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
>> The handler for this entry is a custom function that ends up calling
>> proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
>> and it gets mounted under /proc/sys/user/.
>>
>> Inotify watches are a finite resource, in a similar way to available file
>> descriptors. The motivation for this patch is to be able to set up
>> monitoring and alerting before an application starts failing because
>> it runs out of inotify watches.
>>
>> Signed-off-by: Albert Vaca Cintora <[email protected]>
>> Acked-by: Jan Kara <[email protected]>
>> Reviewed-by: Nicolas Saenz Julienne <[email protected]>
>
> Friendly ping. Any comments on this?
>

Any comments on this? Just to make it clear, Albert found this problem while
working on montitoring software, so it fixes a real problem out there.

Regards,
Matthias

>> ---
>> kernel/ucount.c | 29 +++++++++++++++++++++++++++++
>> 1 file changed, 29 insertions(+)
>>
>> diff --git a/kernel/ucount.c b/kernel/ucount.c
>> index f48d1b6376a4..d8b11e53f098 100644
>> --- a/kernel/ucount.c
>> +++ b/kernel/ucount.c
>> @@ -57,6 +57,11 @@ static struct ctl_table_root set_root = {
>> .permissions = set_permissions,
>> };
>>
>> +#ifdef CONFIG_INOTIFY_USER
>> +int proc_read_inotify_watches(struct ctl_table *table, int write,
>> + void __user *buffer, size_t *lenp, loff_t *ppos);
>> +#endif
>> +
>> static int zero = 0;
>> static int int_max = INT_MAX;
>> #define UCOUNT_ENTRY(name) \
>> @@ -79,6 +84,12 @@ static struct ctl_table user_table[] = {
>> #ifdef CONFIG_INOTIFY_USER
>> UCOUNT_ENTRY("max_inotify_instances"),
>> UCOUNT_ENTRY("max_inotify_watches"),
>> + {
>> + .procname = "current_inotify_watches",
>> + .maxlen = sizeof(int),
>> + .mode = 0444,
>> + .proc_handler = proc_read_inotify_watches,
>> + },
>> #endif
>> { }
>> };
>> @@ -226,6 +237,24 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
>> put_ucounts(ucounts);
>> }
>>
>> +#ifdef CONFIG_INOTIFY_USER
>> +int proc_read_inotify_watches(struct ctl_table *table, int write,
>> + void __user *buffer, size_t *lenp, loff_t *ppos)
>> +{
>> + struct ucounts *ucounts;
>> + struct ctl_table fake_table;
>> + int count;
>> +
>> + ucounts = get_ucounts(current_user_ns(), current_euid());
>> + count = atomic_read(&ucounts->ucount[UCOUNT_INOTIFY_WATCHES]);
>> + put_ucounts(ucounts);
>> +
>> + fake_table.data = &count;
>> + fake_table.maxlen = sizeof(count);
>> + return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
>> +}
>> +#endif
>> +
>> static __init int user_namespace_sysctl_init(void)
>> {
>> #ifdef CONFIG_SYSCTL
>> --
>> 2.20.1
>>
>

2019-04-25 22:54:31

by Albert Vaca Cintora

[permalink] [raw]
Subject: Re: [PATCH v2] kernel/ucounts: expose count of inotify watches in use

On Thu, Apr 25, 2019 at 10:07 PM Andrew Morton
<[email protected]> wrote:
>
> On Fri, 1 Feb 2019 21:39:59 +0100 Albert Vaca Cintora <[email protected]> wrote:
>
> > Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
> > The handler for this entry is a custom function that ends up calling
> > proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
> > and it gets mounted under /proc/sys/user/.
> >
> > Inotify watches are a finite resource, in a similar way to available file
> > descriptors. The motivation for this patch is to be able to set up
> > monitoring and alerting before an application starts failing because
> > it runs out of inotify watches.
>
> Matthias said "Albert found this problem while working on montitoring
> software, so it fixes a real problem out there", so please include full
> details of the problem which you encountered so that we are better able
> to understand the value of the patch.
>

This is an important metric to track as a sysadmin. Currently,
monitoring software have to workaround the lack of a single metric by
iterating all the file descriptors for all processes and checking
which ones are inotify watches [1].

If this seems enough justification to you, please say so and I will
submit a patch v3 with the requested implementation changes.

[1] https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/inotify-instances

Albert

2019-04-26 00:32:20

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v2] kernel/ucounts: expose count of inotify watches in use

On Fri, 1 Feb 2019 21:39:59 +0100 Albert Vaca Cintora <[email protected]> wrote:

> Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
> The handler for this entry is a custom function that ends up calling
> proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
> and it gets mounted under /proc/sys/user/.
>
> Inotify watches are a finite resource, in a similar way to available file
> descriptors. The motivation for this patch is to be able to set up
> monitoring and alerting before an application starts failing because
> it runs out of inotify watches.

Matthias said "Albert found this problem while working on montitoring
software, so it fixes a real problem out there", so please include full
details of the problem which you encountered so that we are better able
to understand the value of the patch.

>
> ...
>
> kernel/ucount.c | 29 +++++++++++++++++++++++++++++

Documentation, please. Documentation/filesystems/inotify.txt and/or
Documentation/filesystems/proc.txt.

Also, max_inotify_instances (at least) also appears to be undocumented,
so it would be good to address this as well while you're in there.

>
> diff --git a/kernel/ucount.c b/kernel/ucount.c
> index f48d1b6376a4..d8b11e53f098 100644
> --- a/kernel/ucount.c
> +++ b/kernel/ucount.c
> @@ -57,6 +57,11 @@ static struct ctl_table_root set_root = {
> .permissions = set_permissions,
> };
>
> +#ifdef CONFIG_INOTIFY_USER
> +int proc_read_inotify_watches(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp, loff_t *ppos);
> +#endif

The ifdefs aren't really needed. And this should be in a header file
if it is indeed to be non-static.

But it should be static, in which case the ifdef will be needed to
prevent a warning. It's kinda irksome and perhaps it would be better
to move proc_read_inotify_watches() to be ahead of user_table[].


> static int zero = 0;
> static int int_max = INT_MAX;
> #define UCOUNT_ENTRY(name) \
> @@ -79,6 +84,12 @@ static struct ctl_table user_table[] = {
> #ifdef CONFIG_INOTIFY_USER
> UCOUNT_ENTRY("max_inotify_instances"),
> UCOUNT_ENTRY("max_inotify_watches"),
> + {
> + .procname = "current_inotify_watches",
> + .maxlen = sizeof(int),
> + .mode = 0444,
> + .proc_handler = proc_read_inotify_watches,
> + },
> #endif
> { }
> };
> @@ -226,6 +237,24 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
> put_ucounts(ucounts);
> }
>
> +#ifdef CONFIG_INOTIFY_USER
> +int proc_read_inotify_watches(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> + struct ucounts *ucounts;
> + struct ctl_table fake_table;
> + int count;
> +
> + ucounts = get_ucounts(current_user_ns(), current_euid());

get_ucounts() can return NULL. The kernel will crash.

> + count = atomic_read(&ucounts->ucount[UCOUNT_INOTIFY_WATCHES]);
> + put_ucounts(ucounts);
> +
> + fake_table.data = &count;
> + fake_table.maxlen = sizeof(count);
> + return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
> +}
> +#endif