2020-05-11 01:14:17

by Stephen Rothwell

[permalink] [raw]
Subject: linux-next: manual merge of the vfs tree with the parisc-hd tree

Hi all,

Today's linux-next merge of the vfs tree got a conflict in:

kernel/sysctl.c

between commit:

b6522fa409cf ("parisc: add sysctl file interface panic_on_stackoverflow")

from the parisc-hd tree and commit:

f461d2dcd511 ("sysctl: avoid forward declarations")

from the vfs tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging. You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

--
Cheers,
Stephen Rothwell

diff --cc kernel/sysctl.c
index b9ff323e1d26,e961286d0e14..000000000000
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@@ -3372,71 -1576,1732 +1576,1734 @@@ int proc_do_large_bitmap(struct ctl_tab
return -ENOSYS;
}

- int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
- {
- return -ENOSYS;
- }
+ #endif /* CONFIG_PROC_SYSCTL */
+
+ #if defined(CONFIG_SYSCTL)
+ int proc_do_static_key(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+ {
+ struct static_key *key = (struct static_key *)table->data;
+ static DEFINE_MUTEX(static_key_mutex);
+ int val, ret;
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(val),
+ .mode = table->mode,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ mutex_lock(&static_key_mutex);
+ val = static_key_enabled(key);
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (val)
+ static_key_enable(key);
+ else
+ static_key_disable(key);
+ }
+ mutex_unlock(&static_key_mutex);
+ return ret;
+ }
+
+ static struct ctl_table kern_table[] = {
+ {
+ .procname = "sched_child_runs_first",
+ .data = &sysctl_sched_child_runs_first,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #ifdef CONFIG_SCHED_DEBUG
+ {
+ .procname = "sched_min_granularity_ns",
+ .data = &sysctl_sched_min_granularity,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_sched_granularity_ns,
+ .extra2 = &max_sched_granularity_ns,
+ },
+ {
+ .procname = "sched_latency_ns",
+ .data = &sysctl_sched_latency,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_sched_granularity_ns,
+ .extra2 = &max_sched_granularity_ns,
+ },
+ {
+ .procname = "sched_wakeup_granularity_ns",
+ .data = &sysctl_sched_wakeup_granularity,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_wakeup_granularity_ns,
+ .extra2 = &max_wakeup_granularity_ns,
+ },
+ #ifdef CONFIG_SMP
+ {
+ .procname = "sched_tunable_scaling",
+ .data = &sysctl_sched_tunable_scaling,
+ .maxlen = sizeof(enum sched_tunable_scaling),
+ .mode = 0644,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_sched_tunable_scaling,
+ .extra2 = &max_sched_tunable_scaling,
+ },
+ {
+ .procname = "sched_migration_cost_ns",
+ .data = &sysctl_sched_migration_cost,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_nr_migrate",
+ .data = &sysctl_sched_nr_migrate,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #ifdef CONFIG_SCHEDSTATS
+ {
+ .procname = "sched_schedstats",
+ .data = NULL,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_schedstats,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif /* CONFIG_SCHEDSTATS */
+ #endif /* CONFIG_SMP */
+ #ifdef CONFIG_NUMA_BALANCING
+ {
+ .procname = "numa_balancing_scan_delay_ms",
+ .data = &sysctl_numa_balancing_scan_delay,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "numa_balancing_scan_period_min_ms",
+ .data = &sysctl_numa_balancing_scan_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "numa_balancing_scan_period_max_ms",
+ .data = &sysctl_numa_balancing_scan_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "numa_balancing_scan_size_mb",
+ .data = &sysctl_numa_balancing_scan_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ {
+ .procname = "numa_balancing",
+ .data = NULL, /* filled in by handler */
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_numa_balancing,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif /* CONFIG_NUMA_BALANCING */
+ #endif /* CONFIG_SCHED_DEBUG */
+ {
+ .procname = "sched_rt_period_us",
+ .data = &sysctl_sched_rt_period,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_rt_handler,
+ },
+ {
+ .procname = "sched_rt_runtime_us",
+ .data = &sysctl_sched_rt_runtime,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sched_rt_handler,
+ },
+ {
+ .procname = "sched_rr_timeslice_ms",
+ .data = &sysctl_sched_rr_timeslice,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sched_rr_handler,
+ },
+ #ifdef CONFIG_UCLAMP_TASK
+ {
+ .procname = "sched_util_clamp_min",
+ .data = &sysctl_sched_uclamp_util_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_uclamp_handler,
+ },
+ {
+ .procname = "sched_util_clamp_max",
+ .data = &sysctl_sched_uclamp_util_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_uclamp_handler,
+ },
+ #endif
+ #ifdef CONFIG_SCHED_AUTOGROUP
+ {
+ .procname = "sched_autogroup_enabled",
+ .data = &sysctl_sched_autogroup_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ #ifdef CONFIG_CFS_BANDWIDTH
+ {
+ .procname = "sched_cfs_bandwidth_slice_us",
+ .data = &sysctl_sched_cfs_bandwidth_slice,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ #endif
+ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+ {
+ .procname = "sched_energy_aware",
+ .data = &sysctl_sched_energy_aware,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_energy_aware_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ #ifdef CONFIG_PROVE_LOCKING
+ {
+ .procname = "prove_locking",
+ .data = &prove_locking,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_LOCK_STAT
+ {
+ .procname = "lock_stat",
+ .data = &lock_stat,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ {
+ .procname = "panic",
+ .data = &panic_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #ifdef CONFIG_COREDUMP
+ {
+ .procname = "core_uses_pid",
+ .data = &core_uses_pid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "core_pattern",
+ .data = core_pattern,
+ .maxlen = CORENAME_MAX_SIZE,
+ .mode = 0644,
+ .proc_handler = proc_dostring_coredump,
+ },
+ {
+ .procname = "core_pipe_limit",
+ .data = &core_pipe_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_PROC_SYSCTL
+ {
+ .procname = "tainted",
+ .maxlen = sizeof(long),
+ .mode = 0644,
+ .proc_handler = proc_taint,
+ },
+ {
+ .procname = "sysctl_writes_strict",
+ .data = &sysctl_writes_strict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &neg_one,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ #ifdef CONFIG_LATENCYTOP
+ {
+ .procname = "latencytop",
+ .data = &latencytop_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sysctl_latencytop,
+ },
+ #endif
+ #ifdef CONFIG_BLK_DEV_INITRD
+ {
+ .procname = "real-root-dev",
+ .data = &real_root_dev,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ {
+ .procname = "print-fatal-signals",
+ .data = &print_fatal_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #ifdef CONFIG_SPARC
+ {
+ .procname = "reboot-cmd",
+ .data = reboot_command,
+ .maxlen = 256,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "stop-a",
+ .data = &stop_a_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "scons-poweroff",
+ .data = &scons_pwroff,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_SPARC64
+ {
+ .procname = "tsb-ratio",
+ .data = &sysctl_tsb_ratio,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_PARISC
+ {
+ .procname = "soft-power",
+ .data = &pwrsw_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
+ {
+ .procname = "unaligned-trap",
+ .data = &unaligned_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ {
+ .procname = "ctrl-alt-del",
+ .data = &C_A_D,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #ifdef CONFIG_FUNCTION_TRACER
+ {
+ .procname = "ftrace_enabled",
+ .data = &ftrace_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = ftrace_enable_sysctl,
+ },
+ #endif
+ #ifdef CONFIG_STACK_TRACER
+ {
+ .procname = "stack_tracer_enabled",
+ .data = &stack_tracer_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = stack_trace_sysctl,
+ },
+ #endif
+ #ifdef CONFIG_TRACING
+ {
+ .procname = "ftrace_dump_on_oops",
+ .data = &ftrace_dump_on_oops,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "traceoff_on_warning",
+ .data = &__disable_trace_on_warning,
+ .maxlen = sizeof(__disable_trace_on_warning),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tracepoint_printk",
+ .data = &tracepoint_printk,
+ .maxlen = sizeof(tracepoint_printk),
+ .mode = 0644,
+ .proc_handler = tracepoint_printk_sysctl,
+ },
+ #endif
+ #ifdef CONFIG_KEXEC_CORE
+ {
+ .procname = "kexec_load_disabled",
+ .data = &kexec_load_disabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ /* only handle a transition from default "0" to "1" */
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ #ifdef CONFIG_MODULES
+ {
+ .procname = "modprobe",
+ .data = &modprobe_path,
+ .maxlen = KMOD_PATH_LEN,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "modules_disabled",
+ .data = &modules_disabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ /* only handle a transition from default "0" to "1" */
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ #ifdef CONFIG_UEVENT_HELPER
+ {
+ .procname = "hotplug",
+ .data = &uevent_helper,
+ .maxlen = UEVENT_HELPER_PATH_LEN,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ #endif
+ #ifdef CONFIG_CHR_DEV_SG
+ {
+ .procname = "sg-big-buff",
+ .data = &sg_big_buff,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_BSD_PROCESS_ACCT
+ {
+ .procname = "acct",
+ .data = &acct_parm,
+ .maxlen = 3*sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_MAGIC_SYSRQ
+ {
+ .procname = "sysrq",
+ .data = NULL,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = sysrq_sysctl_handler,
+ },
+ #endif
+ #ifdef CONFIG_PROC_SYSCTL
+ {
+ .procname = "cad_pid",
+ .data = NULL,
+ .maxlen = sizeof (int),
+ .mode = 0600,
+ .proc_handler = proc_do_cad_pid,
+ },
+ #endif
+ {
+ .procname = "threads-max",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sysctl_max_threads,
+ },
+ {
+ .procname = "random",
+ .mode = 0555,
+ .child = random_table,
+ },
+ {
+ .procname = "usermodehelper",
+ .mode = 0555,
+ .child = usermodehelper_table,
+ },
+ #ifdef CONFIG_FW_LOADER_USER_HELPER
+ {
+ .procname = "firmware_config",
+ .mode = 0555,
+ .child = firmware_config_table,
+ },
+ #endif
+ {
+ .procname = "overflowuid",
+ .data = &overflowuid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minolduid,
+ .extra2 = &maxolduid,
+ },
+ {
+ .procname = "overflowgid",
+ .data = &overflowgid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minolduid,
+ .extra2 = &maxolduid,
+ },
+ #ifdef CONFIG_S390
+ {
+ .procname = "userprocess_debug",
+ .data = &show_unhandled_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ {
+ .procname = "pid_max",
+ .data = &pid_max,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &pid_max_min,
+ .extra2 = &pid_max_max,
+ },
+ {
+ .procname = "panic_on_oops",
+ .data = &panic_on_oops,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "panic_print",
+ .data = &panic_print,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ #if defined CONFIG_PRINTK
+ {
+ .procname = "printk",
+ .data = &console_loglevel,
+ .maxlen = 4*sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "printk_ratelimit",
+ .data = &printk_ratelimit_state.interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "printk_ratelimit_burst",
+ .data = &printk_ratelimit_state.burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "printk_delay",
+ .data = &printk_delay_msec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &ten_thousand,
+ },
+ {
+ .procname = "printk_devkmsg",
+ .data = devkmsg_log_str,
+ .maxlen = DEVKMSG_STR_MAX_SIZE,
+ .mode = 0644,
+ .proc_handler = devkmsg_sysctl_set_loglvl,
+ },
+ {
+ .procname = "dmesg_restrict",
+ .data = &dmesg_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "kptr_restrict",
+ .data = &kptr_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
+ #endif
+ {
+ .procname = "ngroups_max",
+ .data = &ngroups_max,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "cap_last_cap",
+ .data = (void *)&cap_last_cap,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ #if defined(CONFIG_LOCKUP_DETECTOR)
+ {
+ .procname = "watchdog",
+ .data = &watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "watchdog_thresh",
+ .data = &watchdog_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_watchdog_thresh,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &sixty,
+ },
+ {
+ .procname = "nmi_watchdog",
+ .data = &nmi_watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = NMI_WATCHDOG_SYSCTL_PERM,
+ .proc_handler = proc_nmi_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "watchdog_cpumask",
+ .data = &watchdog_cpumask_bits,
+ .maxlen = NR_CPUS,
+ .mode = 0644,
+ .proc_handler = proc_watchdog_cpumask,
+ },
+ #ifdef CONFIG_SOFTLOCKUP_DETECTOR
+ {
+ .procname = "soft_watchdog",
+ .data = &soft_watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_soft_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #ifdef CONFIG_SMP
+ {
+ .procname = "softlockup_all_cpu_backtrace",
+ .data = &sysctl_softlockup_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif /* CONFIG_SMP */
+ #endif
+ #ifdef CONFIG_HARDLOCKUP_DETECTOR
+ {
+ .procname = "hardlockup_panic",
+ .data = &hardlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #ifdef CONFIG_SMP
+ {
+ .procname = "hardlockup_all_cpu_backtrace",
+ .data = &sysctl_hardlockup_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif /* CONFIG_SMP */
+ #endif
+ #endif
+
+ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+ {
+ .procname = "unknown_nmi_panic",
+ .data = &unknown_nmi_panic,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
-#if defined(CONFIG_X86)
++
++#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
++ defined(CONFIG_DEBUG_STACKOVERFLOW)
+ {
- .procname = "panic_on_unrecovered_nmi",
- .data = &panic_on_unrecovered_nmi,
++ .procname = "panic_on_stackoverflow",
++ .data = &sysctl_panic_on_stackoverflow,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
++#endif
++#if defined(CONFIG_X86)
+ {
- .procname = "panic_on_io_nmi",
- .data = &panic_on_io_nmi,
++ .procname = "panic_on_unrecovered_nmi",
++ .data = &panic_on_unrecovered_nmi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ {
- .procname = "panic_on_stackoverflow",
- .data = &sysctl_panic_on_stackoverflow,
++ .procname = "panic_on_io_nmi",
++ .data = &panic_on_io_nmi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
-#endif
+ {
+ .procname = "bootloader_type",
+ .data = &bootloader_type,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "bootloader_version",
+ .data = &bootloader_version,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #if defined(CONFIG_MMU)
+ {
+ .procname = "randomize_va_space",
+ .data = &randomize_va_space,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #if defined(CONFIG_S390) && defined(CONFIG_SMP)
+ {
+ .procname = "spin_retry",
+ .data = &spin_retry,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
+ {
+ .procname = "acpi_video_flags",
+ .data = &acpi_realmode_flags,
+ .maxlen = sizeof (unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ #endif
+ #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
+ {
+ .procname = "ignore-unaligned-usertrap",
+ .data = &no_unaligned_warning,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_IA64
+ {
+ .procname = "unaligned-dump-stack",
+ .data = &unaligned_dump_stack,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_DETECT_HUNG_TASK
+ {
+ .procname = "hung_task_panic",
+ .data = &sysctl_hung_task_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "hung_task_check_count",
+ .data = &sysctl_hung_task_check_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "hung_task_timeout_secs",
+ .data = &sysctl_hung_task_timeout_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_check_interval_secs",
+ .data = &sysctl_hung_task_check_interval_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_warnings",
+ .data = &sysctl_hung_task_warnings,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &neg_one,
+ },
+ #endif
+ #ifdef CONFIG_RT_MUTEXES
+ {
+ .procname = "max_lock_depth",
+ .data = &max_lock_depth,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ {
+ .procname = "poweroff_cmd",
+ .data = &poweroff_cmd,
+ .maxlen = POWEROFF_CMD_PATH_LEN,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ #ifdef CONFIG_KEYS
+ {
+ .procname = "keys",
+ .mode = 0555,
+ .child = key_sysctls,
+ },
+ #endif
+ #ifdef CONFIG_PERF_EVENTS
+ /*
+ * User-space scripts rely on the existence of this file
+ * as a feature check for perf_events being enabled.
+ *
+ * So it's an ABI, do not remove!
+ */
+ {
+ .procname = "perf_event_paranoid",
+ .data = &sysctl_perf_event_paranoid,
+ .maxlen = sizeof(sysctl_perf_event_paranoid),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "perf_event_mlock_kb",
+ .data = &sysctl_perf_event_mlock,
+ .maxlen = sizeof(sysctl_perf_event_mlock),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "perf_event_max_sample_rate",
+ .data = &sysctl_perf_event_sample_rate,
+ .maxlen = sizeof(sysctl_perf_event_sample_rate),
+ .mode = 0644,
+ .proc_handler = perf_proc_update_handler,
+ .extra1 = SYSCTL_ONE,
+ },
+ {
+ .procname = "perf_cpu_time_max_percent",
+ .data = &sysctl_perf_cpu_time_max_percent,
+ .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
+ .mode = 0644,
+ .proc_handler = perf_cpu_time_max_percent_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &one_hundred,
+ },
+ {
+ .procname = "perf_event_max_stack",
+ .data = &sysctl_perf_event_max_stack,
+ .maxlen = sizeof(sysctl_perf_event_max_stack),
+ .mode = 0644,
+ .proc_handler = perf_event_max_stack_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &six_hundred_forty_kb,
+ },
+ {
+ .procname = "perf_event_max_contexts_per_stack",
+ .data = &sysctl_perf_event_max_contexts_per_stack,
+ .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
+ .mode = 0644,
+ .proc_handler = perf_event_max_stack_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &one_thousand,
+ },
+ #endif
+ {
+ .procname = "panic_on_warn",
+ .data = &panic_on_warn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+ {
+ .procname = "timer_migration",
+ .data = &sysctl_timer_migration,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = timer_migration_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ #ifdef CONFIG_BPF_SYSCALL
+ {
+ .procname = "unprivileged_bpf_disabled",
+ .data = &sysctl_unprivileged_bpf_disabled,
+ .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
+ .mode = 0644,
+ /* only handle a transition from default "0" to "1" */
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "bpf_stats_enabled",
+ .data = &bpf_stats_enabled_key.key,
+ .maxlen = sizeof(bpf_stats_enabled_key),
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
+ #endif
+ #if defined(CONFIG_TREE_RCU)
+ {
+ .procname = "panic_on_rcu_stall",
+ .data = &sysctl_panic_on_rcu_stall,
+ .maxlen = sizeof(sysctl_panic_on_rcu_stall),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+ {
+ .procname = "stack_erasing",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = stack_erasing_sysctl,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ { }
+ };
+
+ static struct ctl_table vm_table[] = {
+ {
+ .procname = "overcommit_memory",
+ .data = &sysctl_overcommit_memory,
+ .maxlen = sizeof(sysctl_overcommit_memory),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
+ {
+ .procname = "panic_on_oom",
+ .data = &sysctl_panic_on_oom,
+ .maxlen = sizeof(sysctl_panic_on_oom),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
+ {
+ .procname = "oom_kill_allocating_task",
+ .data = &sysctl_oom_kill_allocating_task,
+ .maxlen = sizeof(sysctl_oom_kill_allocating_task),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "oom_dump_tasks",
+ .data = &sysctl_oom_dump_tasks,
+ .maxlen = sizeof(sysctl_oom_dump_tasks),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "overcommit_ratio",
+ .data = &sysctl_overcommit_ratio,
+ .maxlen = sizeof(sysctl_overcommit_ratio),
+ .mode = 0644,
+ .proc_handler = overcommit_ratio_handler,
+ },
+ {
+ .procname = "overcommit_kbytes",
+ .data = &sysctl_overcommit_kbytes,
+ .maxlen = sizeof(sysctl_overcommit_kbytes),
+ .mode = 0644,
+ .proc_handler = overcommit_kbytes_handler,
+ },
+ {
+ .procname = "page-cluster",
+ .data = &page_cluster,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "dirty_background_ratio",
+ .data = &dirty_background_ratio,
+ .maxlen = sizeof(dirty_background_ratio),
+ .mode = 0644,
+ .proc_handler = dirty_background_ratio_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &one_hundred,
+ },
+ {
+ .procname = "dirty_background_bytes",
+ .data = &dirty_background_bytes,
+ .maxlen = sizeof(dirty_background_bytes),
+ .mode = 0644,
+ .proc_handler = dirty_background_bytes_handler,
+ .extra1 = &one_ul,
+ },
+ {
+ .procname = "dirty_ratio",
+ .data = &vm_dirty_ratio,
+ .maxlen = sizeof(vm_dirty_ratio),
+ .mode = 0644,
+ .proc_handler = dirty_ratio_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &one_hundred,
+ },
+ {
+ .procname = "dirty_bytes",
+ .data = &vm_dirty_bytes,
+ .maxlen = sizeof(vm_dirty_bytes),
+ .mode = 0644,
+ .proc_handler = dirty_bytes_handler,
+ .extra1 = &dirty_bytes_min,
+ },
+ {
+ .procname = "dirty_writeback_centisecs",
+ .data = &dirty_writeback_interval,
+ .maxlen = sizeof(dirty_writeback_interval),
+ .mode = 0644,
+ .proc_handler = dirty_writeback_centisecs_handler,
+ },
+ {
+ .procname = "dirty_expire_centisecs",
+ .data = &dirty_expire_interval,
+ .maxlen = sizeof(dirty_expire_interval),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "dirtytime_expire_seconds",
+ .data = &dirtytime_expire_interval,
+ .maxlen = sizeof(dirtytime_expire_interval),
+ .mode = 0644,
+ .proc_handler = dirtytime_interval_handler,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "swappiness",
+ .data = &vm_swappiness,
+ .maxlen = sizeof(vm_swappiness),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &one_hundred,
+ },
+ #ifdef CONFIG_HUGETLB_PAGE
+ {
+ .procname = "nr_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = hugetlb_sysctl_handler,
+ },
+ #ifdef CONFIG_NUMA
+ {
+ .procname = "nr_hugepages_mempolicy",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &hugetlb_mempolicy_sysctl_handler,
+ },
+ {
+ .procname = "numa_stat",
+ .data = &sysctl_vm_numa_stat,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sysctl_vm_numa_stat_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ {
+ .procname = "hugetlb_shm_group",
+ .data = &sysctl_hugetlb_shm_group,
+ .maxlen = sizeof(gid_t),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "nr_overcommit_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = hugetlb_overcommit_handler,
+ },
+ #endif
+ {
+ .procname = "lowmem_reserve_ratio",
+ .data = &sysctl_lowmem_reserve_ratio,
+ .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
+ .mode = 0644,
+ .proc_handler = lowmem_reserve_ratio_sysctl_handler,
+ },
+ {
+ .procname = "drop_caches",
+ .data = &sysctl_drop_caches,
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = drop_caches_sysctl_handler,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &four,
+ },
+ #ifdef CONFIG_COMPACTION
+ {
+ .procname = "compact_memory",
+ .data = &sysctl_compact_memory,
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = sysctl_compaction_handler,
+ },
+ {
+ .procname = "extfrag_threshold",
+ .data = &sysctl_extfrag_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_extfrag_threshold,
+ .extra2 = &max_extfrag_threshold,
+ },
+ {
+ .procname = "compact_unevictable_allowed",
+ .data = &sysctl_compact_unevictable_allowed,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_warn_RT_change,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },

- int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
- {
- return -ENOSYS;
- }
+ #endif /* CONFIG_COMPACTION */
+ {
+ .procname = "min_free_kbytes",
+ .data = &min_free_kbytes,
+ .maxlen = sizeof(min_free_kbytes),
+ .mode = 0644,
+ .proc_handler = min_free_kbytes_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "watermark_boost_factor",
+ .data = &watermark_boost_factor,
+ .maxlen = sizeof(watermark_boost_factor),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "watermark_scale_factor",
+ .data = &watermark_scale_factor,
+ .maxlen = sizeof(watermark_scale_factor),
+ .mode = 0644,
+ .proc_handler = watermark_scale_factor_sysctl_handler,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &one_thousand,
+ },
+ {
+ .procname = "percpu_pagelist_fraction",
+ .data = &percpu_pagelist_fraction,
+ .maxlen = sizeof(percpu_pagelist_fraction),
+ .mode = 0644,
+ .proc_handler = percpu_pagelist_fraction_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ },
+ #ifdef CONFIG_MMU
+ {
+ .procname = "max_map_count",
+ .data = &sysctl_max_map_count,
+ .maxlen = sizeof(sysctl_max_map_count),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ #else
+ {
+ .procname = "nr_trim_pages",
+ .data = &sysctl_nr_trim_pages,
+ .maxlen = sizeof(sysctl_nr_trim_pages),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ #endif
+ {
+ .procname = "laptop_mode",
+ .data = &laptop_mode,
+ .maxlen = sizeof(laptop_mode),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "block_dump",
+ .data = &block_dump,
+ .maxlen = sizeof(block_dump),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "vfs_cache_pressure",
+ .data = &sysctl_vfs_cache_pressure,
+ .maxlen = sizeof(sysctl_vfs_cache_pressure),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ },
+ #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+ defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
+ {
+ .procname = "legacy_va_layout",
+ .data = &sysctl_legacy_va_layout,
+ .maxlen = sizeof(sysctl_legacy_va_layout),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ },
+ #endif
+ #ifdef CONFIG_NUMA
+ {
+ .procname = "zone_reclaim_mode",
+ .data = &node_reclaim_mode,
+ .maxlen = sizeof(node_reclaim_mode),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "min_unmapped_ratio",
+ .data = &sysctl_min_unmapped_ratio,
+ .maxlen = sizeof(sysctl_min_unmapped_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &one_hundred,
+ },
+ {
+ .procname = "min_slab_ratio",
+ .data = &sysctl_min_slab_ratio,
+ .maxlen = sizeof(sysctl_min_slab_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &one_hundred,
+ },
+ #endif
+ #ifdef CONFIG_SMP
+ {
+ .procname = "stat_interval",
+ .data = &sysctl_stat_interval,
+ .maxlen = sizeof(sysctl_stat_interval),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "stat_refresh",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0600,
+ .proc_handler = vmstat_refresh,
+ },
+ #endif
+ #ifdef CONFIG_MMU
+ {
+ .procname = "mmap_min_addr",
+ .data = &dac_mmap_min_addr,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = mmap_min_addr_handler,
+ },
+ #endif
+ #ifdef CONFIG_NUMA
+ {
+ .procname = "numa_zonelist_order",
+ .data = &numa_zonelist_order,
+ .maxlen = NUMA_ZONELIST_ORDER_LEN,
+ .mode = 0644,
+ .proc_handler = numa_zonelist_order_handler,
+ },
+ #endif
+ #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
+ (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
+ {
+ .procname = "vdso_enabled",
+ #ifdef CONFIG_X86_32
+ .data = &vdso32_enabled,
+ .maxlen = sizeof(vdso32_enabled),
+ #else
+ .data = &vdso_enabled,
+ .maxlen = sizeof(vdso_enabled),
+ #endif
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ },
+ #endif
+ #ifdef CONFIG_HIGHMEM
+ {
+ .procname = "highmem_is_dirtyable",
+ .data = &vm_highmem_is_dirtyable,
+ .maxlen = sizeof(vm_highmem_is_dirtyable),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ #ifdef CONFIG_MEMORY_FAILURE
+ {
+ .procname = "memory_failure_early_kill",
+ .data = &sysctl_memory_failure_early_kill,
+ .maxlen = sizeof(sysctl_memory_failure_early_kill),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "memory_failure_recovery",
+ .data = &sysctl_memory_failure_recovery,
+ .maxlen = sizeof(sysctl_memory_failure_recovery),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ {
+ .procname = "user_reserve_kbytes",
+ .data = &sysctl_user_reserve_kbytes,
+ .maxlen = sizeof(sysctl_user_reserve_kbytes),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "admin_reserve_kbytes",
+ .data = &sysctl_admin_reserve_kbytes,
+ .maxlen = sizeof(sysctl_admin_reserve_kbytes),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+ {
+ .procname = "mmap_rnd_bits",
+ .data = &mmap_rnd_bits,
+ .maxlen = sizeof(mmap_rnd_bits),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&mmap_rnd_bits_min,
+ .extra2 = (void *)&mmap_rnd_bits_max,
+ },
+ #endif
+ #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ {
+ .procname = "mmap_rnd_compat_bits",
+ .data = &mmap_rnd_compat_bits,
+ .maxlen = sizeof(mmap_rnd_compat_bits),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&mmap_rnd_compat_bits_min,
+ .extra2 = (void *)&mmap_rnd_compat_bits_max,
+ },
+ #endif
+ #ifdef CONFIG_USERFAULTFD
+ {
+ .procname = "unprivileged_userfaultfd",
+ .data = &sysctl_unprivileged_userfaultfd,
+ .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ { }
+ };

- int proc_doulongvec_minmax(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
- {
- return -ENOSYS;
- }
+ static struct ctl_table fs_table[] = {
+ {
+ .procname = "inode-nr",
+ .data = &inodes_stat,
+ .maxlen = 2*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "inode-state",
+ .data = &inodes_stat,
+ .maxlen = 7*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "file-nr",
+ .data = &files_stat,
+ .maxlen = sizeof(files_stat),
+ .mode = 0444,
+ .proc_handler = proc_nr_files,
+ },
+ {
+ .procname = "file-max",
+ .data = &files_stat.max_files,
+ .maxlen = sizeof(files_stat.max_files),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &zero_ul,
+ .extra2 = &long_max,
+ },
+ {
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &sysctl_nr_open_min,
+ .extra2 = &sysctl_nr_open_max,
+ },
+ {
+ .procname = "dentry-state",
+ .data = &dentry_stat,
+ .maxlen = 6*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_dentry,
+ },
+ {
+ .procname = "overflowuid",
+ .data = &fs_overflowuid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minolduid,
+ .extra2 = &maxolduid,
+ },
+ {
+ .procname = "overflowgid",
+ .data = &fs_overflowgid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minolduid,
+ .extra2 = &maxolduid,
+ },
+ #ifdef CONFIG_FILE_LOCKING
+ {
+ .procname = "leases-enable",
+ .data = &leases_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_DNOTIFY
+ {
+ .procname = "dir-notify-enable",
+ .data = &dir_notify_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_MMU
+ #ifdef CONFIG_FILE_LOCKING
+ {
+ .procname = "lease-break-time",
+ .data = &lease_break_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+ #ifdef CONFIG_AIO
+ {
+ .procname = "aio-nr",
+ .data = &aio_nr,
+ .maxlen = sizeof(aio_nr),
+ .mode = 0444,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "aio-max-nr",
+ .data = &aio_max_nr,
+ .maxlen = sizeof(aio_max_nr),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ #endif /* CONFIG_AIO */
+ #ifdef CONFIG_INOTIFY_USER
+ {
+ .procname = "inotify",
+ .mode = 0555,
+ .child = inotify_table,
+ },
+ #endif
+ #ifdef CONFIG_EPOLL
+ {
+ .procname = "epoll",
+ .mode = 0555,
+ .child = epoll_table,
+ },
+ #endif
+ #endif
+ {
+ .procname = "protected_symlinks",
+ .data = &sysctl_protected_symlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "protected_hardlinks",
+ .data = &sysctl_protected_hardlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "protected_fifos",
+ .data = &sysctl_protected_fifos,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
+ {
+ .procname = "protected_regular",
+ .data = &sysctl_protected_regular,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
+ {
+ .procname = "suid_dumpable",
+ .data = &suid_dumpable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_coredump,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
+ #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+ {
+ .procname = "binfmt_misc",
+ .mode = 0555,
+ .child = sysctl_mount_point,
+ },
+ #endif
+ {
+ .procname = "pipe-max-size",
+ .data = &pipe_max_size,
+ .maxlen = sizeof(pipe_max_size),
+ .mode = 0644,
+ .proc_handler = proc_dopipe_max_size,
+ },
+ {
+ .procname = "pipe-user-pages-hard",
+ .data = &pipe_user_pages_hard,
+ .maxlen = sizeof(pipe_user_pages_hard),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "pipe-user-pages-soft",
+ .data = &pipe_user_pages_soft,
+ .maxlen = sizeof(pipe_user_pages_soft),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "mount-max",
+ .data = &sysctl_mount_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ { }
+ };

- int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
- {
- return -ENOSYS;
- }
+ static struct ctl_table debug_table[] = {
+ #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
+ {
+ .procname = "exception-trace",
+ .data = &show_unhandled_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ #endif
+ #if defined(CONFIG_OPTPROBES)
+ {
+ .procname = "kprobes-optimization",
+ .data = &sysctl_kprobes_optimization,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_kprobes_optimization_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+ { }
+ };

- int proc_do_large_bitmap(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
- {
- return -ENOSYS;
- }
+ static struct ctl_table dev_table[] = {
+ { }
+ };

- #endif /* CONFIG_PROC_SYSCTL */
+ static struct ctl_table sysctl_base_table[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = kern_table,
+ },
+ {
+ .procname = "vm",
+ .mode = 0555,
+ .child = vm_table,
+ },
+ {
+ .procname = "fs",
+ .mode = 0555,
+ .child = fs_table,
+ },
+ {
+ .procname = "debug",
+ .mode = 0555,
+ .child = debug_table,
+ },
+ {
+ .procname = "dev",
+ .mode = 0555,
+ .child = dev_table,
+ },
+ { }
+ };

- #if defined(CONFIG_SYSCTL)
- int proc_do_static_key(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ int __init sysctl_init(void)
{
- struct static_key *key = (struct static_key *)table->data;
- static DEFINE_MUTEX(static_key_mutex);
- int val, ret;
- struct ctl_table tmp = {
- .data = &val,
- .maxlen = sizeof(val),
- .mode = table->mode,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- };
-
- if (write && !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ struct ctl_table_header *hdr;

- mutex_lock(&static_key_mutex);
- val = static_key_enabled(key);
- ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
- if (write && !ret) {
- if (val)
- static_key_enable(key);
- else
- static_key_disable(key);
- }
- mutex_unlock(&static_key_mutex);
- return ret;
+ hdr = register_sysctl_table(sysctl_base_table);
+ kmemleak_not_leak(hdr);
+ return 0;
}
- #endif
+ #endif /* CONFIG_SYSCTL */
/*
* No sense putting this after each symbol definition, twice,
* exception granted :-)


Attachments:
(No filename) (499.00 B)
OpenPGP digital signature

2020-05-11 02:01:03

by Xiaoming Ni

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On 2020/5/11 9:11, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the vfs tree got a conflict in:
>
> kernel/sysctl.c
>
> between commit:
>
> b6522fa409cf ("parisc: add sysctl file interface panic_on_stackoverflow")
>
> from the parisc-hd tree and commit:
>
> f461d2dcd511 ("sysctl: avoid forward declarations")
>
> from the vfs tree.
>
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging. You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.
>


Kernel/sysctl.c contains more than 190 interface files, and there are a
large number of config macro controls. When modifying the sysctl
interface directly in kernel/sysctl.c , conflicts are very easy to occur.

At the same time, the register_sysctl_table() provided by the system can
easily add the sysctl interface, and there is no conflict of
kernel/sysctl.c .

Should we add instructions in the patch guide (coding-style.rst
submitting-patches.rst):
Preferentially use register_sysctl_table() to add a new sysctl
interface, centralize feature codes, and avoid directly modifying
kernel/sysctl.c ?

In addition, is it necessary to transfer the architecture-related sysctl
interface to arch/xxx/kernel/sysctl.c ?

Thanks
Xiaoming Ni

2020-05-12 00:35:49

by Luis Chamberlain

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Mon, May 11, 2020 at 09:55:16AM +0800, Xiaoming Ni wrote:
> On 2020/5/11 9:11, Stephen Rothwell wrote:
> > Hi all,
> >
> > Today's linux-next merge of the vfs tree got a conflict in:
> >
> > kernel/sysctl.c
> >
> > between commit:
> >
> > b6522fa409cf ("parisc: add sysctl file interface panic_on_stackoverflow")
> >
> > from the parisc-hd tree and commit:
> >
> > f461d2dcd511 ("sysctl: avoid forward declarations")
> >
> > from the vfs tree.
> >
> > I fixed it up (see below) and can carry the fix as necessary. This
> > is now fixed as far as linux-next is concerned, but any non trivial
> > conflicts should be mentioned to your upstream maintainer when your tree
> > is submitted for merging. You may also want to consider cooperating
> > with the maintainer of the conflicting tree to minimise any particularly
> > complex conflicts.
> >
>
>
> Kernel/sysctl.c contains more than 190 interface files, and there are a
> large number of config macro controls. When modifying the sysctl interface
> directly in kernel/sysctl.c , conflicts are very easy to occur.
>
> At the same time, the register_sysctl_table() provided by the system can
> easily add the sysctl interface, and there is no conflict of kernel/sysctl.c
> .
>
> Should we add instructions in the patch guide (coding-style.rst
> submitting-patches.rst):
> Preferentially use register_sysctl_table() to add a new sysctl interface,
> centralize feature codes, and avoid directly modifying kernel/sysctl.c ?

Yes, however I don't think folks know how to do this well. So I think we
just have to do at least start ourselves, and then reflect some of this
in the docs. The reason that this can be not easy is that we need to
ensure that at an init level we haven't busted dependencies on setting
this. We also just don't have docs on how to do this well.

> In addition, is it necessary to transfer the architecture-related sysctl
> interface to arch/xxx/kernel/sysctl.c ?

Well here's an initial attempt to start with fs stuff in a very
conservative way. What do folks think?

fs/proc/Makefile | 1 +
fs/proc/fs_sysctl_table.c | 97 +++++++++++++++++++++++++++++++++++++++
kernel/sysctl.c | 48 -------------------
3 files changed, 98 insertions(+), 48 deletions(-)
create mode 100644 fs/proc/fs_sysctl_table.c

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index bd08616ed8ba..8bf419b2ac7d 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -28,6 +28,7 @@ proc-y += namespaces.o
proc-y += self.o
proc-y += thread_self.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
+proc-$(CONFIG_SYSCTL) += fs_sysctl_table.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
proc-$(CONFIG_PROC_VMCORE) += vmcore.o
diff --git a/fs/proc/fs_sysctl_table.c b/fs/proc/fs_sysctl_table.c
new file mode 100644
index 000000000000..f56a49989872
--- /dev/null
+++ b/fs/proc/fs_sysctl_table.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * /proc/sys/fs sysctl table
+ */
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/printk.h>
+#include <linux/security.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/bpf-cgroup.h>
+#include <linux/mount.h>
+#include <linux/dnotify.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/aio.h>
+#include <linux/inotify.h>
+#include <linux/kmemleak.h>
+#include <linux/binfmts.h>
+
+static unsigned long zero_ul;
+static unsigned long long_max = LONG_MAX;
+
+static struct ctl_table fs_table[] = {
+ {
+ .procname = "inode-nr",
+ .data = &inodes_stat,
+ .maxlen = 2*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "inode-state",
+ .data = &inodes_stat,
+ .maxlen = 7*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "file-nr",
+ .data = &files_stat,
+ .maxlen = sizeof(files_stat),
+ .mode = 0444,
+ .proc_handler = proc_nr_files,
+ },
+ {
+ .procname = "file-max",
+ .data = &files_stat.max_files,
+ .maxlen = sizeof(files_stat.max_files),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &zero_ul,
+ .extra2 = &long_max,
+ },
+ {
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &sysctl_nr_open_min,
+ .extra2 = &sysctl_nr_open_max,
+ },
+ {
+ .procname = "dentry-state",
+ .data = &dentry_stat,
+ .maxlen = 6*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_dentry,
+ },
+ { }
+};
+
+static struct ctl_table fs_base_table[] = {
+ {
+ .procname = "fs",
+ .mode = 0555,
+ .child = fs_table,
+ },
+ { }
+};
+
+static int __init fs_procsys_init(void)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_sysctl_table(fs_base_table);
+ kmemleak_not_leak(hdr);
+
+ return 0;
+}
+
+early_initcall(fs_procsys_init);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3b0cecf57e79..6669d6118974 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -114,9 +114,7 @@ static int sixty = 60;
static int __maybe_unused neg_one = -1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
-static unsigned long zero_ul;
static unsigned long one_ul = 1;
-static unsigned long long_max = LONG_MAX;
static int one_hundred = 100;
static int one_thousand = 1000;
#ifdef CONFIG_PRINTK
@@ -3087,52 +3085,6 @@ static struct ctl_table vm_table[] = {
};

static struct ctl_table fs_table[] = {
- {
- .procname = "inode-nr",
- .data = &inodes_stat,
- .maxlen = 2*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_inodes,
- },
- {
- .procname = "inode-state",
- .data = &inodes_stat,
- .maxlen = 7*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_inodes,
- },
- {
- .procname = "file-nr",
- .data = &files_stat,
- .maxlen = sizeof(files_stat),
- .mode = 0444,
- .proc_handler = proc_nr_files,
- },
- {
- .procname = "file-max",
- .data = &files_stat.max_files,
- .maxlen = sizeof(files_stat.max_files),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero_ul,
- .extra2 = &long_max,
- },
- {
- .procname = "nr_open",
- .data = &sysctl_nr_open,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &sysctl_nr_open_min,
- .extra2 = &sysctl_nr_open_max,
- },
- {
- .procname = "dentry-state",
- .data = &dentry_stat,
- .maxlen = 6*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_dentry,
- },
{
.procname = "overflowuid",
.data = &fs_overflowuid,
--
2.26.2

2020-05-12 05:24:04

by Kees Cook

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Tue, May 12, 2020 at 12:33:05AM +0000, Luis Chamberlain wrote:
> On Mon, May 11, 2020 at 09:55:16AM +0800, Xiaoming Ni wrote:
> > On 2020/5/11 9:11, Stephen Rothwell wrote:
> > > Hi all,
> > >
> > > Today's linux-next merge of the vfs tree got a conflict in:
> > >
> > > kernel/sysctl.c
> > >
> > > between commit:
> > >
> > > b6522fa409cf ("parisc: add sysctl file interface panic_on_stackoverflow")
> > >
> > > from the parisc-hd tree and commit:
> > >
> > > f461d2dcd511 ("sysctl: avoid forward declarations")
> > >
> > > from the vfs tree.
> > >
> > > I fixed it up (see below) and can carry the fix as necessary. This
> > > is now fixed as far as linux-next is concerned, but any non trivial
> > > conflicts should be mentioned to your upstream maintainer when your tree
> > > is submitted for merging. You may also want to consider cooperating
> > > with the maintainer of the conflicting tree to minimise any particularly
> > > complex conflicts.
> > >
> >
> >
> > Kernel/sysctl.c contains more than 190 interface files, and there are a
> > large number of config macro controls. When modifying the sysctl interface
> > directly in kernel/sysctl.c , conflicts are very easy to occur.
> >
> > At the same time, the register_sysctl_table() provided by the system can
> > easily add the sysctl interface, and there is no conflict of kernel/sysctl.c
> > .
> >
> > Should we add instructions in the patch guide (coding-style.rst
> > submitting-patches.rst):
> > Preferentially use register_sysctl_table() to add a new sysctl interface,
> > centralize feature codes, and avoid directly modifying kernel/sysctl.c ?
>
> Yes, however I don't think folks know how to do this well. So I think we
> just have to do at least start ourselves, and then reflect some of this
> in the docs. The reason that this can be not easy is that we need to
> ensure that at an init level we haven't busted dependencies on setting
> this. We also just don't have docs on how to do this well.
>
> > In addition, is it necessary to transfer the architecture-related sysctl
> > interface to arch/xxx/kernel/sysctl.c ?
>
> Well here's an initial attempt to start with fs stuff in a very
> conservative way. What do folks think?
>
> [...]
> +static unsigned long zero_ul;
> +static unsigned long long_max = LONG_MAX;

I think it'd be nice to keep these in one place for others to reuse,
though that means making them non-static. (And now that I look at them,
I thought they were supposed to be const?)

--
Kees Cook

2020-05-12 05:48:48

by Luis Chamberlain

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Mon, May 11, 2020 at 10:22:04PM -0700, Kees Cook wrote:
> On Tue, May 12, 2020 at 12:33:05AM +0000, Luis Chamberlain wrote:
> > On Mon, May 11, 2020 at 09:55:16AM +0800, Xiaoming Ni wrote:
> > > On 2020/5/11 9:11, Stephen Rothwell wrote:
> > > > Hi all,
> > > >
> > > > Today's linux-next merge of the vfs tree got a conflict in:
> > > >
> > > > kernel/sysctl.c
> > > >
> > > > between commit:
> > > >
> > > > b6522fa409cf ("parisc: add sysctl file interface panic_on_stackoverflow")
> > > >
> > > > from the parisc-hd tree and commit:
> > > >
> > > > f461d2dcd511 ("sysctl: avoid forward declarations")
> > > >
> > > > from the vfs tree.
> > > >
> > > > I fixed it up (see below) and can carry the fix as necessary. This
> > > > is now fixed as far as linux-next is concerned, but any non trivial
> > > > conflicts should be mentioned to your upstream maintainer when your tree
> > > > is submitted for merging. You may also want to consider cooperating
> > > > with the maintainer of the conflicting tree to minimise any particularly
> > > > complex conflicts.
> > > >
> > >
> > >
> > > Kernel/sysctl.c contains more than 190 interface files, and there are a
> > > large number of config macro controls. When modifying the sysctl interface
> > > directly in kernel/sysctl.c , conflicts are very easy to occur.
> > >
> > > At the same time, the register_sysctl_table() provided by the system can
> > > easily add the sysctl interface, and there is no conflict of kernel/sysctl.c
> > > .
> > >
> > > Should we add instructions in the patch guide (coding-style.rst
> > > submitting-patches.rst):
> > > Preferentially use register_sysctl_table() to add a new sysctl interface,
> > > centralize feature codes, and avoid directly modifying kernel/sysctl.c ?
> >
> > Yes, however I don't think folks know how to do this well. So I think we
> > just have to do at least start ourselves, and then reflect some of this
> > in the docs. The reason that this can be not easy is that we need to
> > ensure that at an init level we haven't busted dependencies on setting
> > this. We also just don't have docs on how to do this well.
> >
> > > In addition, is it necessary to transfer the architecture-related sysctl
> > > interface to arch/xxx/kernel/sysctl.c ?
> >
> > Well here's an initial attempt to start with fs stuff in a very
> > conservative way. What do folks think?
> >
> > [...]
> > +static unsigned long zero_ul;
> > +static unsigned long long_max = LONG_MAX;
>
> I think it'd be nice to keep these in one place for others to reuse,
> though that means making them non-static. (And now that I look at them,
> I thought they were supposed to be const?)

So much spring cleaning to do. I can add the const and share it.
It seems odd to stuff this into a sysctl.h, types.h doesn't seem
right... I can't think of something proper, so I'll just move them
to sysctl.h for now.

Any thought on the approach though? I mean, I realize that this will
require more of the subsystem specific folks to look at the code and
review, but if this seems fair, I'll get the ball rolling.

Luis

2020-05-12 11:58:03

by Eric W. Biederman

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

Luis Chamberlain <[email protected]> writes:

> On Mon, May 11, 2020 at 09:55:16AM +0800, Xiaoming Ni wrote:
>> On 2020/5/11 9:11, Stephen Rothwell wrote:
>> > Hi all,
>> >
>> > Today's linux-next merge of the vfs tree got a conflict in:
>> >
>> > kernel/sysctl.c
>> >
>> > between commit:
>> >
>> > b6522fa409cf ("parisc: add sysctl file interface panic_on_stackoverflow")
>> >
>> > from the parisc-hd tree and commit:
>> >
>> > f461d2dcd511 ("sysctl: avoid forward declarations")
>> >
>> > from the vfs tree.
>> >
>> > I fixed it up (see below) and can carry the fix as necessary. This
>> > is now fixed as far as linux-next is concerned, but any non trivial
>> > conflicts should be mentioned to your upstream maintainer when your tree
>> > is submitted for merging. You may also want to consider cooperating
>> > with the maintainer of the conflicting tree to minimise any particularly
>> > complex conflicts.
>> >
>>
>>
>> Kernel/sysctl.c contains more than 190 interface files, and there are a
>> large number of config macro controls. When modifying the sysctl interface
>> directly in kernel/sysctl.c , conflicts are very easy to occur.
>>
>> At the same time, the register_sysctl_table() provided by the system can
>> easily add the sysctl interface, and there is no conflict of kernel/sysctl.c
>> .
>>
>> Should we add instructions in the patch guide (coding-style.rst
>> submitting-patches.rst):
>> Preferentially use register_sysctl_table() to add a new sysctl interface,
>> centralize feature codes, and avoid directly modifying kernel/sysctl.c ?
>
> Yes, however I don't think folks know how to do this well. So I think we
> just have to do at least start ourselves, and then reflect some of this
> in the docs. The reason that this can be not easy is that we need to
> ensure that at an init level we haven't busted dependencies on setting
> this. We also just don't have docs on how to do this well.
>
>> In addition, is it necessary to transfer the architecture-related sysctl
>> interface to arch/xxx/kernel/sysctl.c ?


>
> Well here's an initial attempt to start with fs stuff in a very
> conservative way. What do folks think?

I don't see how any of that deals with the current conflict in -next.

You are putting the fs sysctls in the wrong place. The should live
in fs/ not in fs/proc/. Otherwise you are pretty much repeating
the problem the problem of poorly located code in another location.


> fs/proc/Makefile | 1 +
> fs/proc/fs_sysctl_table.c | 97 +++++++++++++++++++++++++++++++++++++++
> kernel/sysctl.c | 48 -------------------
> 3 files changed, 98 insertions(+), 48 deletions(-)
> create mode 100644 fs/proc/fs_sysctl_table.c
>
> diff --git a/fs/proc/Makefile b/fs/proc/Makefile
> index bd08616ed8ba..8bf419b2ac7d 100644
> --- a/fs/proc/Makefile
> +++ b/fs/proc/Makefile
> @@ -28,6 +28,7 @@ proc-y += namespaces.o
> proc-y += self.o
> proc-y += thread_self.o
> proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
> +proc-$(CONFIG_SYSCTL) += fs_sysctl_table.o
> proc-$(CONFIG_NET) += proc_net.o
> proc-$(CONFIG_PROC_KCORE) += kcore.o
> proc-$(CONFIG_PROC_VMCORE) += vmcore.o
> diff --git a/fs/proc/fs_sysctl_table.c b/fs/proc/fs_sysctl_table.c
> new file mode 100644
> index 000000000000..f56a49989872
> --- /dev/null
> +++ b/fs/proc/fs_sysctl_table.c
> @@ -0,0 +1,97 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * /proc/sys/fs sysctl table
> + */
> +#include <linux/init.h>
> +#include <linux/sysctl.h>
> +#include <linux/poll.h>
> +#include <linux/proc_fs.h>
> +#include <linux/printk.h>
> +#include <linux/security.h>
> +#include <linux/sched.h>
> +#include <linux/cred.h>
> +#include <linux/namei.h>
> +#include <linux/mm.h>
> +#include <linux/module.h>
> +#include <linux/bpf-cgroup.h>
> +#include <linux/mount.h>
> +#include <linux/dnotify.h>
> +#include <linux/pipe_fs_i.h>
> +#include <linux/aio.h>
> +#include <linux/inotify.h>
> +#include <linux/kmemleak.h>
> +#include <linux/binfmts.h>
> +
> +static unsigned long zero_ul;
> +static unsigned long long_max = LONG_MAX;
> +
> +static struct ctl_table fs_table[] = {
> + {
> + .procname = "inode-nr",
> + .data = &inodes_stat,
> + .maxlen = 2*sizeof(long),
> + .mode = 0444,
> + .proc_handler = proc_nr_inodes,
> + },
> + {
> + .procname = "inode-state",
> + .data = &inodes_stat,
> + .maxlen = 7*sizeof(long),
> + .mode = 0444,
> + .proc_handler = proc_nr_inodes,
> + },
> + {
> + .procname = "file-nr",
> + .data = &files_stat,
> + .maxlen = sizeof(files_stat),
> + .mode = 0444,
> + .proc_handler = proc_nr_files,
> + },
> + {
> + .procname = "file-max",
> + .data = &files_stat.max_files,
> + .maxlen = sizeof(files_stat.max_files),
> + .mode = 0644,
> + .proc_handler = proc_doulongvec_minmax,
> + .extra1 = &zero_ul,
> + .extra2 = &long_max,
> + },
> + {
> + .procname = "nr_open",
> + .data = &sysctl_nr_open,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = &sysctl_nr_open_min,
> + .extra2 = &sysctl_nr_open_max,
> + },
> + {
> + .procname = "dentry-state",
> + .data = &dentry_stat,
> + .maxlen = 6*sizeof(long),
> + .mode = 0444,
> + .proc_handler = proc_nr_dentry,
> + },
> + { }
> +};
> +
> +static struct ctl_table fs_base_table[] = {
> + {
> + .procname = "fs",
> + .mode = 0555,
> + .child = fs_table,
> + },
> + { }
> +};
^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.

> > +static int __init fs_procsys_init(void)
> +{
> + struct ctl_table_header *hdr;
> +
> + hdr = register_sysctl_table(fs_base_table);
^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
AKA
hdr = register_sysctl("fs", fs_table);
> + kmemleak_not_leak(hdr);
> +
> + return 0;
> +}
> +
> +early_initcall(fs_procsys_init);

2020-05-12 17:28:01

by Luis Chamberlain

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
> Luis Chamberlain <[email protected]> writes:
>
> > On Mon, May 11, 2020 at 09:55:16AM +0800, Xiaoming Ni wrote:
> >> On 2020/5/11 9:11, Stephen Rothwell wrote:
> >> > Hi all,
> >> >
> >> > Today's linux-next merge of the vfs tree got a conflict in:
> >> >
> >> > kernel/sysctl.c
> >> >
> >> > between commit:
> >> >
> >> > b6522fa409cf ("parisc: add sysctl file interface panic_on_stackoverflow")
> >> >
> >> > from the parisc-hd tree and commit:
> >> >
> >> > f461d2dcd511 ("sysctl: avoid forward declarations")
> >> >
> >> > from the vfs tree.
> >> >
> >> > I fixed it up (see below) and can carry the fix as necessary. This
> >> > is now fixed as far as linux-next is concerned, but any non trivial
> >> > conflicts should be mentioned to your upstream maintainer when your tree
> >> > is submitted for merging. You may also want to consider cooperating
> >> > with the maintainer of the conflicting tree to minimise any particularly
> >> > complex conflicts.
> >> >
> >>
> >>
> >> Kernel/sysctl.c contains more than 190 interface files, and there are a
> >> large number of config macro controls. When modifying the sysctl interface
> >> directly in kernel/sysctl.c , conflicts are very easy to occur.
> >>
> >> At the same time, the register_sysctl_table() provided by the system can
> >> easily add the sysctl interface, and there is no conflict of kernel/sysctl.c
> >> .
> >>
> >> Should we add instructions in the patch guide (coding-style.rst
> >> submitting-patches.rst):
> >> Preferentially use register_sysctl_table() to add a new sysctl interface,
> >> centralize feature codes, and avoid directly modifying kernel/sysctl.c ?
> >
> > Yes, however I don't think folks know how to do this well. So I think we
> > just have to do at least start ourselves, and then reflect some of this
> > in the docs. The reason that this can be not easy is that we need to
> > ensure that at an init level we haven't busted dependencies on setting
> > this. We also just don't have docs on how to do this well.
> >
> >> In addition, is it necessary to transfer the architecture-related sysctl
> >> interface to arch/xxx/kernel/sysctl.c ?
>
>
> >
> > Well here's an initial attempt to start with fs stuff in a very
> > conservative way. What do folks think?
>
> I don't see how any of that deals with the current conflict in -next.

The point is to cleanup the kitchen sink full of knobs everyone from
different subsystem has put in place for random things so to reduce
the amount of edits on the file, so to then avoid the possibility
of merge conflicts.

> You are putting the fs sysctls in the wrong place. The should live
> in fs/ not in fs/proc/.

That's an easy fix, sure, I'll do that.

> Otherwise you are pretty much repeating
> the problem the problem of poorly located code in another location.

Sure, alright, well I'll chug on with trying to clean up the kitchen
sink. We can decide where we put items during review.

> > fs/proc/Makefile | 1 +
> > fs/proc/fs_sysctl_table.c | 97 +++++++++++++++++++++++++++++++++++++++
> > kernel/sysctl.c | 48 -------------------
> > 3 files changed, 98 insertions(+), 48 deletions(-)
> > create mode 100644 fs/proc/fs_sysctl_table.c
> >
> > diff --git a/fs/proc/Makefile b/fs/proc/Makefile
> > index bd08616ed8ba..8bf419b2ac7d 100644
> > --- a/fs/proc/Makefile
> > +++ b/fs/proc/Makefile
> > @@ -28,6 +28,7 @@ proc-y += namespaces.o
> > proc-y += self.o
> > proc-y += thread_self.o
> > proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
> > +proc-$(CONFIG_SYSCTL) += fs_sysctl_table.o
> > proc-$(CONFIG_NET) += proc_net.o
> > proc-$(CONFIG_PROC_KCORE) += kcore.o
> > proc-$(CONFIG_PROC_VMCORE) += vmcore.o
> > diff --git a/fs/proc/fs_sysctl_table.c b/fs/proc/fs_sysctl_table.c
> > new file mode 100644
> > index 000000000000..f56a49989872
> > --- /dev/null
> > +++ b/fs/proc/fs_sysctl_table.c
> > @@ -0,0 +1,97 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * /proc/sys/fs sysctl table
> > + */
> > +#include <linux/init.h>
> > +#include <linux/sysctl.h>
> > +#include <linux/poll.h>
> > +#include <linux/proc_fs.h>
> > +#include <linux/printk.h>
> > +#include <linux/security.h>
> > +#include <linux/sched.h>
> > +#include <linux/cred.h>
> > +#include <linux/namei.h>
> > +#include <linux/mm.h>
> > +#include <linux/module.h>
> > +#include <linux/bpf-cgroup.h>
> > +#include <linux/mount.h>
> > +#include <linux/dnotify.h>
> > +#include <linux/pipe_fs_i.h>
> > +#include <linux/aio.h>
> > +#include <linux/inotify.h>
> > +#include <linux/kmemleak.h>
> > +#include <linux/binfmts.h>
> > +
> > +static unsigned long zero_ul;
> > +static unsigned long long_max = LONG_MAX;
> > +
> > +static struct ctl_table fs_table[] = {
> > + {
> > + .procname = "inode-nr",
> > + .data = &inodes_stat,
> > + .maxlen = 2*sizeof(long),
> > + .mode = 0444,
> > + .proc_handler = proc_nr_inodes,
> > + },
> > + {
> > + .procname = "inode-state",
> > + .data = &inodes_stat,
> > + .maxlen = 7*sizeof(long),
> > + .mode = 0444,
> > + .proc_handler = proc_nr_inodes,
> > + },
> > + {
> > + .procname = "file-nr",
> > + .data = &files_stat,
> > + .maxlen = sizeof(files_stat),
> > + .mode = 0444,
> > + .proc_handler = proc_nr_files,
> > + },
> > + {
> > + .procname = "file-max",
> > + .data = &files_stat.max_files,
> > + .maxlen = sizeof(files_stat.max_files),
> > + .mode = 0644,
> > + .proc_handler = proc_doulongvec_minmax,
> > + .extra1 = &zero_ul,
> > + .extra2 = &long_max,
> > + },
> > + {
> > + .procname = "nr_open",
> > + .data = &sysctl_nr_open,
> > + .maxlen = sizeof(unsigned int),
> > + .mode = 0644,
> > + .proc_handler = proc_dointvec_minmax,
> > + .extra1 = &sysctl_nr_open_min,
> > + .extra2 = &sysctl_nr_open_max,
> > + },
> > + {
> > + .procname = "dentry-state",
> > + .data = &dentry_stat,
> > + .maxlen = 6*sizeof(long),
> > + .mode = 0444,
> > + .proc_handler = proc_nr_dentry,
> > + },
> > + { }
> > +};
> > +
> > +static struct ctl_table fs_base_table[] = {
> > + {
> > + .procname = "fs",
> > + .mode = 0555,
> > + .child = fs_table,
> > + },
> > + { }
> > +};
> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
> > > +static int __init fs_procsys_init(void)
> > +{
> > + struct ctl_table_header *hdr;
> > +
> > + hdr = register_sysctl_table(fs_base_table);
> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
> AKA
> hdr = register_sysctl("fs", fs_table);

Ah, much cleaner thanks!

Luis

2020-05-12 17:48:26

by Eric W. Biederman

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

Luis Chamberlain <[email protected]> writes:

> On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
>> Luis Chamberlain <[email protected]> writes:
>>
>> > +static struct ctl_table fs_base_table[] = {
>> > + {
>> > + .procname = "fs",
>> > + .mode = 0555,
>> > + .child = fs_table,
>> > + },
>> > + { }
>> > +};
>> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
>> > > +static int __init fs_procsys_init(void)
>> > +{
>> > + struct ctl_table_header *hdr;
>> > +
>> > + hdr = register_sysctl_table(fs_base_table);
>> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
>> AKA
>> hdr = register_sysctl("fs", fs_table);
>
> Ah, much cleaner thanks!

It is my hope you we can get rid of register_sysctl_table one of these
days. It was the original interface but today it is just a
compatibility wrapper.

I unfortunately ran out of steam last time before I finished converting
everything over.

Eric

2020-05-12 22:08:08

by Luis Chamberlain

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
> Luis Chamberlain <[email protected]> writes:
>
> > On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
> >> Luis Chamberlain <[email protected]> writes:
> >>
> >> > +static struct ctl_table fs_base_table[] = {
> >> > + {
> >> > + .procname = "fs",
> >> > + .mode = 0555,
> >> > + .child = fs_table,
> >> > + },
> >> > + { }
> >> > +};
> >> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
> >> > > +static int __init fs_procsys_init(void)
> >> > +{
> >> > + struct ctl_table_header *hdr;
> >> > +
> >> > + hdr = register_sysctl_table(fs_base_table);
> >> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
> >> AKA
> >> hdr = register_sysctl("fs", fs_table);
> >
> > Ah, much cleaner thanks!
>
> It is my hope you we can get rid of register_sysctl_table one of these
> days. It was the original interface but today it is just a
> compatibility wrapper.
>
> I unfortunately ran out of steam last time before I finished converting
> everything over.

Let's give it one more go. I'll start with the fs stuff.

Luis

2020-05-13 04:13:48

by Xiaoming Ni

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On 2020/5/13 6:03, Luis Chamberlain wrote:
> On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
>> Luis Chamberlain <[email protected]> writes:
>>
>>> On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
>>>> Luis Chamberlain <[email protected]> writes:
>>>>
>>>>> +static struct ctl_table fs_base_table[] = {
>>>>> + {
>>>>> + .procname = "fs",
>>>>> + .mode = 0555,
>>>>> + .child = fs_table,
>>>>> + },
>>>>> + { }
>>>>> +};
>>>> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
>>>>>> +static int __init fs_procsys_init(void)
>>>>> +{
>>>>> + struct ctl_table_header *hdr;
>>>>> +
>>>>> + hdr = register_sysctl_table(fs_base_table);
>>>> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
>>>> AKA
>>>> hdr = register_sysctl("fs", fs_table);
>>>
>>> Ah, much cleaner thanks!
>>
>> It is my hope you we can get rid of register_sysctl_table one of these
>> days. It was the original interface but today it is just a
>> compatibility wrapper.
>>
>> I unfortunately ran out of steam last time before I finished converting
>> everything over.
>
> Let's give it one more go. I'll start with the fs stuff.
>
> Luis
>
> .
>

If we register each feature in its own feature code file using
register() to register the sysctl interface. To avoid merge conflicts
when different features modify sysctl.c at the same time.
that is, try to Avoid mixing code with multiple features in the same
code file.

For example, the multiple file interfaces defined in sysctl.c by the
hung_task feature can be moved to hung_task.c.

Perhaps later, without centralized sysctl.c ?
Is this better?

Thanks
Xiaoming Ni

---
include/linux/sched/sysctl.h | 8 +----
kernel/hung_task.c | 78
+++++++++++++++++++++++++++++++++++++++++++-
kernel/sysctl.c | 50 ----------------------------
3 files changed, 78 insertions(+), 58 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d4f6215..bb4e0d3 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,14 +7,8 @@
struct ctl_table;

#ifdef CONFIG_DETECT_HUNG_TASK
-extern int sysctl_hung_task_check_count;
-extern unsigned int sysctl_hung_task_panic;
+/* used for block/ */
extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
-extern int sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int
write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos);
#else
/* Avoid need for ifdefs elsewhere in the code */
enum { sysctl_hung_task_timeout_secs = 0 };
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 14a625c..53589f2 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -20,10 +20,10 @@
#include <linux/utsname.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
+#include <linux/kmemleak.h>
#include <linux/sched/sysctl.h>

#include <trace/events/sched.h>
-
/*
* The number of tasks checked:
*/
@@ -296,8 +296,84 @@ static int watchdog(void *dummy)
return 0;
}

+/*
+ * This is needed for proc_doulongvec_minmax of
sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
+static unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
+static int __maybe_unused neg_one = -1;
+static struct ctl_table hung_task_sysctls[] = {
+ {
+ .procname = "hung_task_panic",
+ .data = &sysctl_hung_task_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "hung_task_check_count",
+ .data = &sysctl_hung_task_check_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "hung_task_timeout_secs",
+ .data = &sysctl_hung_task_timeout_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_check_interval_secs",
+ .data = &sysctl_hung_task_check_interval_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_warnings",
+ .data = &sysctl_hung_task_warnings,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &neg_one,
+ },
+ {}
+};
+
+/* get /proc/sys/kernel root */
+static struct ctl_table sysctls_root[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = hung_task_sysctls,
+ },
+ {}
+};
+
+static int __init hung_task_sysctl_init(void)
+{
+ struct ctl_table_header *srt = register_sysctl_table(sysctls_root);
+
+ if (!srt)
+ return -ENOMEM;
+ kmemleak_not_leak(srt);
+ return 0;
+}
+
static int __init hung_task_init(void)
{
+ int ret = hung_task_sysctl_init();
+
+ if (ret != 0)
+ return ret;
+
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);

/* Disable hung task detector on suspend */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a176d8..45a1153 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -149,13 +149,6 @@
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;

-/*
- * This is needed for proc_doulongvec_minmax of
sysctl_hung_task_timeout_secs
- * and hung_task_check_interval_secs
- */
-#ifdef CONFIG_DETECT_HUNG_TASK
-static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
-#endif

#ifdef CONFIG_INOTIFY_USER
#include <linux/inotify.h>
@@ -1085,49 +1078,6 @@ static int sysrq_sysctl_handler(struct ctl_table
*table, int write,
.proc_handler = proc_dointvec,
},
#endif
-#ifdef CONFIG_DETECT_HUNG_TASK
- {
- .procname = "hung_task_panic",
- .data = &sysctl_hung_task_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "hung_task_check_count",
- .data = &sysctl_hung_task_check_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "hung_task_timeout_secs",
- .data = &sysctl_hung_task_timeout_secs,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_dohung_task_timeout_secs,
- .extra2 = &hung_task_timeout_max,
- },
- {
- .procname = "hung_task_check_interval_secs",
- .data = &sysctl_hung_task_check_interval_secs,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_dohung_task_timeout_secs,
- .extra2 = &hung_task_timeout_max,
- },
- {
- .procname = "hung_task_warnings",
- .data = &sysctl_hung_task_warnings,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &neg_one,
- },
-#endif
#ifdef CONFIG_RT_MUTEXES
{
.procname = "max_lock_depth",
--
1.8.5.6


2020-05-13 12:52:58

by Luis Chamberlain

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Wed, May 13, 2020 at 12:04:02PM +0800, Xiaoming Ni wrote:
> On 2020/5/13 6:03, Luis Chamberlain wrote:
> > On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
> > > Luis Chamberlain <[email protected]> writes:
> > >
> > > > On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
> > > > > Luis Chamberlain <[email protected]> writes:
> > > > >
> > > > > > +static struct ctl_table fs_base_table[] = {
> > > > > > + {
> > > > > > + .procname = "fs",
> > > > > > + .mode = 0555,
> > > > > > + .child = fs_table,
> > > > > > + },
> > > > > > + { }
> > > > > > +};
> > > > > ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
> > > > > > > +static int __init fs_procsys_init(void)
> > > > > > +{
> > > > > > + struct ctl_table_header *hdr;
> > > > > > +
> > > > > > + hdr = register_sysctl_table(fs_base_table);
> > > > > ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
> > > > > AKA
> > > > > hdr = register_sysctl("fs", fs_table);
> > > >
> > > > Ah, much cleaner thanks!
> > >
> > > It is my hope you we can get rid of register_sysctl_table one of these
> > > days. It was the original interface but today it is just a
> > > compatibility wrapper.
> > >
> > > I unfortunately ran out of steam last time before I finished converting
> > > everything over.
> >
> > Let's give it one more go. I'll start with the fs stuff.
> >
> > Luis
> >
> > .
> >
>
> If we register each feature in its own feature code file using register() to
> register the sysctl interface. To avoid merge conflicts when different
> features modify sysctl.c at the same time.
> that is, try to Avoid mixing code with multiple features in the same code
> file.
>
> For example, the multiple file interfaces defined in sysctl.c by the
> hung_task feature can be moved to hung_task.c.
>
> Perhaps later, without centralized sysctl.c ?
> Is this better?
>
> Thanks
> Xiaoming Ni
>
> ---
> include/linux/sched/sysctl.h | 8 +----
> kernel/hung_task.c | 78
> +++++++++++++++++++++++++++++++++++++++++++-
> kernel/sysctl.c | 50 ----------------------------
> 3 files changed, 78 insertions(+), 58 deletions(-)
>
> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
> index d4f6215..bb4e0d3 100644
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -7,14 +7,8 @@
> struct ctl_table;
>
> #ifdef CONFIG_DETECT_HUNG_TASK
> -extern int sysctl_hung_task_check_count;
> -extern unsigned int sysctl_hung_task_panic;
> +/* used for block/ */
> extern unsigned long sysctl_hung_task_timeout_secs;
> -extern unsigned long sysctl_hung_task_check_interval_secs;
> -extern int sysctl_hung_task_warnings;
> -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int
> write,
> - void __user *buffer,
> - size_t *lenp, loff_t *ppos);
> #else
> /* Avoid need for ifdefs elsewhere in the code */
> enum { sysctl_hung_task_timeout_secs = 0 };
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 14a625c..53589f2 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -20,10 +20,10 @@
> #include <linux/utsname.h>
> #include <linux/sched/signal.h>
> #include <linux/sched/debug.h>
> +#include <linux/kmemleak.h>
> #include <linux/sched/sysctl.h>
>
> #include <trace/events/sched.h>
> -
> /*
> * The number of tasks checked:
> */
> @@ -296,8 +296,84 @@ static int watchdog(void *dummy)
> return 0;
> }
>
> +/*
> + * This is needed for proc_doulongvec_minmax of
> sysctl_hung_task_timeout_secs
> + * and hung_task_check_interval_secs
> + */
> +static unsigned long hung_task_timeout_max = (LONG_MAX / HZ);

This is not generic so it can stay in this file.

> +static int __maybe_unused neg_one = -1;

This is generic so we can share it, I suggest we just rename this
for now to sysctl_neg_one, export it to a symbol namespace,
EXPORT_SYMBOL_NS_GPL(sysctl_neg_one, SYSCTL) and then import it with
MODULE_IMPORT_NS(SYSCTL)


> +static struct ctl_table hung_task_sysctls[] = {

We want to wrap this around with CONFIG_SYSCTL, so a cleaner solution
is something like this:

diff --git a/kernel/Makefile b/kernel/Makefile
index a42ac3a58994..689718351754 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -88,7 +88,9 @@ obj-$(CONFIG_KCOV) += kcov.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
obj-$(CONFIG_KGDB) += debug/
-obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
+obj-$(CONFIG_DETECT_HUNG_TASK) += hung_tasks.o
+hung_tasks-y := hung_task.o
+hung_tasks-$(CONFIG_SYSCTL) += hung_task_sysctl.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_SECCOMP) += seccomp.o

> +/* get /proc/sys/kernel root */
> +static struct ctl_table sysctls_root[] = {
> + {
> + .procname = "kernel",
> + .mode = 0555,
> + .child = hung_task_sysctls,
> + },
> + {}
> +};
> +

And as per Eric, this is not needed, we can simplify this more, as noted
below.

> +static int __init hung_task_sysctl_init(void)
> +{
> + struct ctl_table_header *srt = register_sysctl_table(sysctls_root);

You want instead something like::

struct ctl_table_header *srt;

srt = register_sysctl("kernel", hung_task_sysctls);
> +
> + if (!srt)
> + return -ENOMEM;
> + kmemleak_not_leak(srt);
> + return 0;
> +}
> +

> static int __init hung_task_init(void)
> {
> + int ret = hung_task_sysctl_init();
> +
> + if (ret != 0)
> + return ret;
> +

And just #ifdef this around CONFIG_SYSCTL.

Luis

2020-05-13 14:16:31

by Luis Chamberlain

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Wed, May 13, 2020 at 08:42:30AM -0500, Eric W. Biederman wrote:
> Luis Chamberlain <[email protected]> writes:
>
> > On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
> >> Luis Chamberlain <[email protected]> writes:
> >>
> >> > On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
> >> >> Luis Chamberlain <[email protected]> writes:
> >> >>
> >> >> > +static struct ctl_table fs_base_table[] = {
> >> >> > + {
> >> >> > + .procname = "fs",
> >> >> > + .mode = 0555,
> >> >> > + .child = fs_table,
> >> >> > + },
> >> >> > + { }
> >> >> > +};
> >> >> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
> >> >> > > +static int __init fs_procsys_init(void)
> >> >> > +{
> >> >> > + struct ctl_table_header *hdr;
> >> >> > +
> >> >> > + hdr = register_sysctl_table(fs_base_table);
> >> >> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
> >> >> AKA
> >> >> hdr = register_sysctl("fs", fs_table);
> >> >
> >> > Ah, much cleaner thanks!
> >>
> >> It is my hope you we can get rid of register_sysctl_table one of these
> >> days. It was the original interface but today it is just a
> >> compatibility wrapper.
> >>
> >> I unfortunately ran out of steam last time before I finished converting
> >> everything over.
> >
> > Let's give it one more go. I'll start with the fs stuff.
>
> Just to be clear moving the tables out of kernel/sysctl.c is a related
> but slightly different problem.

Sure, but also before we go on this crusade, how about we add a few
helpers:

register_sysctl_kernel()
register_sysctl_vm()
register_sysctl_fs()
register_sysctl_debug()
register_sysctl_dev()

That should make it easier to look for these, and shorter. We *know*
this is a common path, given the size of the existing table.

> Today it looks like there are 35 calls of register_sysctl_table
> and 9 calls of register_sysctl_paths.
>
> Among them is lib/sysctl_test.c and check-sysctl-docs.
>
> Meanwhile I can only find 5 calls to register_sysctl in the tree
> so it looks like I didn't get very far converting things over.

While we're on the spring cleaning topic, I've tried to put what I can
think of for TODO items here, anything else? Feel free to edit, its a
wiki after all.

https://kernelnewbies.org/KernelProjects/proc

Feel free to add wishlist items.

Luis

2020-05-13 14:50:43

by Eric W. Biederman

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

Luis Chamberlain <[email protected]> writes:

> On Wed, May 13, 2020 at 08:42:30AM -0500, Eric W. Biederman wrote:
>> Luis Chamberlain <[email protected]> writes:
>>
>> > On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
>> >> Luis Chamberlain <[email protected]> writes:
>> >>
>> >> > On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
>> >> >> Luis Chamberlain <[email protected]> writes:
>> >> >>
>> >> >> > +static struct ctl_table fs_base_table[] = {
>> >> >> > + {
>> >> >> > + .procname = "fs",
>> >> >> > + .mode = 0555,
>> >> >> > + .child = fs_table,
>> >> >> > + },
>> >> >> > + { }
>> >> >> > +};
>> >> >> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
>> >> >> > > +static int __init fs_procsys_init(void)
>> >> >> > +{
>> >> >> > + struct ctl_table_header *hdr;
>> >> >> > +
>> >> >> > + hdr = register_sysctl_table(fs_base_table);
>> >> >> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
>> >> >> AKA
>> >> >> hdr = register_sysctl("fs", fs_table);
>> >> >
>> >> > Ah, much cleaner thanks!
>> >>
>> >> It is my hope you we can get rid of register_sysctl_table one of these
>> >> days. It was the original interface but today it is just a
>> >> compatibility wrapper.
>> >>
>> >> I unfortunately ran out of steam last time before I finished converting
>> >> everything over.
>> >
>> > Let's give it one more go. I'll start with the fs stuff.
>>
>> Just to be clear moving the tables out of kernel/sysctl.c is a related
>> but slightly different problem.
>
> Sure, but also before we go on this crusade, how about we add a few
> helpers:
>
> register_sysctl_kernel()
> register_sysctl_vm()
> register_sysctl_fs()
> register_sysctl_debug()
> register_sysctl_dev()

Hmm.

register_sysctl("kernel")

> That should make it easier to look for these, and shorter. We *know*
> this is a common path, given the size of the existing table.

I don't really care but one character shorter doesn't look like it
really helps. Not really for grepping and not maintenance as we get a
bunch of trivial one line implementations.

Eric

2020-05-13 20:47:20

by Eric W. Biederman

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

Luis Chamberlain <[email protected]> writes:

> On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
>> Luis Chamberlain <[email protected]> writes:
>>
>> > On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
>> >> Luis Chamberlain <[email protected]> writes:
>> >>
>> >> > +static struct ctl_table fs_base_table[] = {
>> >> > + {
>> >> > + .procname = "fs",
>> >> > + .mode = 0555,
>> >> > + .child = fs_table,
>> >> > + },
>> >> > + { }
>> >> > +};
>> >> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
>> >> > > +static int __init fs_procsys_init(void)
>> >> > +{
>> >> > + struct ctl_table_header *hdr;
>> >> > +
>> >> > + hdr = register_sysctl_table(fs_base_table);
>> >> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
>> >> AKA
>> >> hdr = register_sysctl("fs", fs_table);
>> >
>> > Ah, much cleaner thanks!
>>
>> It is my hope you we can get rid of register_sysctl_table one of these
>> days. It was the original interface but today it is just a
>> compatibility wrapper.
>>
>> I unfortunately ran out of steam last time before I finished converting
>> everything over.
>
> Let's give it one more go. I'll start with the fs stuff.

Just to be clear moving the tables out of kernel/sysctl.c is a related
but slightly different problem.

Today it looks like there are 35 calls of register_sysctl_table
and 9 calls of register_sysctl_paths.

Among them is lib/sysctl_test.c and check-sysctl-docs.

Meanwhile I can only find 5 calls to register_sysctl in the tree
so it looks like I didn't get very far converting things over.

Eric

2020-05-13 20:49:00

by Luis Chamberlain

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Wed, May 13, 2020 at 09:44:40AM -0500, Eric W. Biederman wrote:
> Luis Chamberlain <[email protected]> writes:
>
> > On Wed, May 13, 2020 at 08:42:30AM -0500, Eric W. Biederman wrote:
> >> Luis Chamberlain <[email protected]> writes:
> >>
> >> > On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
> >> >> Luis Chamberlain <[email protected]> writes:
> >> >>
> >> >> > On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
> >> >> >> Luis Chamberlain <[email protected]> writes:
> >> >> >>
> >> >> >> > +static struct ctl_table fs_base_table[] = {
> >> >> >> > + {
> >> >> >> > + .procname = "fs",
> >> >> >> > + .mode = 0555,
> >> >> >> > + .child = fs_table,
> >> >> >> > + },
> >> >> >> > + { }
> >> >> >> > +};
> >> >> >> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
> >> >> >> > > +static int __init fs_procsys_init(void)
> >> >> >> > +{
> >> >> >> > + struct ctl_table_header *hdr;
> >> >> >> > +
> >> >> >> > + hdr = register_sysctl_table(fs_base_table);
> >> >> >> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
> >> >> >> AKA
> >> >> >> hdr = register_sysctl("fs", fs_table);
> >> >> >
> >> >> > Ah, much cleaner thanks!
> >> >>
> >> >> It is my hope you we can get rid of register_sysctl_table one of these
> >> >> days. It was the original interface but today it is just a
> >> >> compatibility wrapper.
> >> >>
> >> >> I unfortunately ran out of steam last time before I finished converting
> >> >> everything over.
> >> >
> >> > Let's give it one more go. I'll start with the fs stuff.
> >>
> >> Just to be clear moving the tables out of kernel/sysctl.c is a related
> >> but slightly different problem.
> >
> > Sure, but also before we go on this crusade, how about we add a few
> > helpers:
> >
> > register_sysctl_kernel()
> > register_sysctl_vm()
> > register_sysctl_fs()
> > register_sysctl_debug()
> > register_sysctl_dev()
>
> Hmm.
>
> register_sysctl("kernel")
>
> > That should make it easier to look for these, and shorter. We *know*
> > this is a common path, given the size of the existing table.
>
> I don't really care but one character shorter doesn't look like it
> really helps. Not really for grepping and not maintenance as we get a
> bunch of trivial one line implementations.

Alright, let's skip the helpers for now.

Luis

2020-05-14 06:08:02

by Xiaoming Ni

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On 2020/5/13 20:50, Luis Chamberlain wrote:
> On Wed, May 13, 2020 at 12:04:02PM +0800, Xiaoming Ni wrote:
>> On 2020/5/13 6:03, Luis Chamberlain wrote:
>>> On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
>>>> Luis Chamberlain <[email protected]> writes:
>>>>
>>>>> On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
>>>>>> Luis Chamberlain <[email protected]> writes:
>>>>>>
>>>>>>> +static struct ctl_table fs_base_table[] = {
>>>>>>> + {
>>>>>>> + .procname = "fs",
>>>>>>> + .mode = 0555,
>>>>>>> + .child = fs_table,
>>>>>>> + },
>>>>>>> + { }
>>>>>>> +};
>>>>>> ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
>>>>>>>> +static int __init fs_procsys_init(void)
>>>>>>> +{
>>>>>>> + struct ctl_table_header *hdr;
>>>>>>> +
>>>>>>> + hdr = register_sysctl_table(fs_base_table);
>>>>>> ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl instead.
>>>>>> AKA
>>>>>> hdr = register_sysctl("fs", fs_table);
>>>>>
>>>>> Ah, much cleaner thanks!
>>>>
>>>> It is my hope you we can get rid of register_sysctl_table one of these
>>>> days. It was the original interface but today it is just a
>>>> compatibility wrapper.
>>>>
>>>> I unfortunately ran out of steam last time before I finished converting
>>>> everything over.
>>>
>>> Let's give it one more go. I'll start with the fs stuff.
>>>
>>> Luis
>>>
>>> .
>>>
>>
>> If we register each feature in its own feature code file using register() to
>> register the sysctl interface. To avoid merge conflicts when different
>> features modify sysctl.c at the same time.
>> that is, try to Avoid mixing code with multiple features in the same code
>> file.
>>
>> For example, the multiple file interfaces defined in sysctl.c by the
>> hung_task feature can be moved to hung_task.c.
>>
>> Perhaps later, without centralized sysctl.c ?
>> Is this better?
>>
>> Thanks
>> Xiaoming Ni
>>
>> ---
>> include/linux/sched/sysctl.h | 8 +----
>> kernel/hung_task.c | 78
>> +++++++++++++++++++++++++++++++++++++++++++-
>> kernel/sysctl.c | 50 ----------------------------
>> 3 files changed, 78 insertions(+), 58 deletions(-)
>>
>> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
>> index d4f6215..bb4e0d3 100644
>> --- a/include/linux/sched/sysctl.h
>> +++ b/include/linux/sched/sysctl.h
>> @@ -7,14 +7,8 @@
>> struct ctl_table;
>>
>> #ifdef CONFIG_DETECT_HUNG_TASK
>> -extern int sysctl_hung_task_check_count;
>> -extern unsigned int sysctl_hung_task_panic;
>> +/* used for block/ */
>> extern unsigned long sysctl_hung_task_timeout_secs;
>> -extern unsigned long sysctl_hung_task_check_interval_secs;
>> -extern int sysctl_hung_task_warnings;
>> -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int
>> write,
>> - void __user *buffer,
>> - size_t *lenp, loff_t *ppos);
>> #else
>> /* Avoid need for ifdefs elsewhere in the code */
>> enum { sysctl_hung_task_timeout_secs = 0 };
>> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
>> index 14a625c..53589f2 100644
>> --- a/kernel/hung_task.c
>> +++ b/kernel/hung_task.c
>> @@ -20,10 +20,10 @@
>> #include <linux/utsname.h>
>> #include <linux/sched/signal.h>
>> #include <linux/sched/debug.h>
>> +#include <linux/kmemleak.h>
>> #include <linux/sched/sysctl.h>
>>
>> #include <trace/events/sched.h>
>> -
>> /*
>> * The number of tasks checked:
>> */
>> @@ -296,8 +296,84 @@ static int watchdog(void *dummy)
>> return 0;
>> }
>>
>> +/*
>> + * This is needed for proc_doulongvec_minmax of
>> sysctl_hung_task_timeout_secs
>> + * and hung_task_check_interval_secs
>> + */
>> +static unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
>
> This is not generic so it can stay in this file.
>
>> +static int __maybe_unused neg_one = -1;
>
> This is generic so we can share it, I suggest we just rename this
> for now to sysctl_neg_one, export it to a symbol namespace,
> EXPORT_SYMBOL_NS_GPL(sysctl_neg_one, SYSCTL) and then import it with
> MODULE_IMPORT_NS(SYSCTL)
>
>
>> +static struct ctl_table hung_task_sysctls[] = {
>
> We want to wrap this around with CONFIG_SYSCTL, so a cleaner solution
> is something like this:
>
> diff --git a/kernel/Makefile b/kernel/Makefile
> index a42ac3a58994..689718351754 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -88,7 +88,9 @@ obj-$(CONFIG_KCOV) += kcov.o
> obj-$(CONFIG_KPROBES) += kprobes.o
> obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
> obj-$(CONFIG_KGDB) += debug/
> -obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
> +obj-$(CONFIG_DETECT_HUNG_TASK) += hung_tasks.o
> +hung_tasks-y := hung_task.o
> +hung_tasks-$(CONFIG_SYSCTL) += hung_task_sysctl.o
> obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
> obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
> obj-$(CONFIG_SECCOMP) += seccomp.o
>
>> +/* get /proc/sys/kernel root */
>> +static struct ctl_table sysctls_root[] = {
>> + {
>> + .procname = "kernel",
>> + .mode = 0555,
>> + .child = hung_task_sysctls,
>> + },
>> + {}
>> +};
>> +
>
> And as per Eric, this is not needed, we can simplify this more, as noted
> below.
>
>> +static int __init hung_task_sysctl_init(void)
>> +{
>> + struct ctl_table_header *srt = register_sysctl_table(sysctls_root);
>
> You want instead something like::
>
> struct ctl_table_header *srt;
>
> srt = register_sysctl("kernel", hung_task_sysctls);
>> +
>> + if (!srt)
>> + return -ENOMEM;
>> + kmemleak_not_leak(srt);
>> + return 0;
>> +}
>> +
>
>> static int __init hung_task_init(void)
>> {
>> + int ret = hung_task_sysctl_init();
>> +
>> + if (ret != 0)
>> + return ret;
>> +
>
> And just #ifdef this around CONFIG_SYSCTL.
>
> Luis
>
> .
>

Thank you for your guidance, I will send the patch later

Xiaoming Ni


2020-05-14 16:21:03

by Xiaoming Ni

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On 2020/5/14 14:05, Xiaoming Ni wrote:
> On 2020/5/13 20:50, Luis Chamberlain wrote:
>> On Wed, May 13, 2020 at 12:04:02PM +0800, Xiaoming Ni wrote:
>>> On 2020/5/13 6:03, Luis Chamberlain wrote:
>>>> On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
>>>>> Luis Chamberlain <[email protected]> writes:
>>>>>
>>>>>> On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
>>>>>>> Luis Chamberlain <[email protected]> writes:
>>>>>>>
>>>>>>>> +static struct ctl_table fs_base_table[] = {
>>>>>>>> +    {
>>>>>>>> +        .procname    = "fs",
>>>>>>>> +        .mode        = 0555,
>>>>>>>> +        .child        = fs_table,
>>>>>>>> +    },
>>>>>>>> +    { }
>>>>>>>> +};
>>>>>>>     ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
>>>>>>>>> +static int __init fs_procsys_init(void)
>>>>>>>> +{
>>>>>>>> +    struct ctl_table_header *hdr;
>>>>>>>> +
>>>>>>>> +    hdr = register_sysctl_table(fs_base_table);
>>>>>>>                 ^^^^^^^^^^^^^^^^^^^^^ Please use register_sysctl
>>>>>>> instead.
>>>>>>>     AKA
>>>>>>>           hdr = register_sysctl("fs", fs_table);
>>>>>>
>>>>>> Ah, much cleaner thanks!
>>>>>
>>>>> It is my hope you we can get rid of register_sysctl_table one of these
>>>>> days.  It was the original interface but today it is just a
>>>>> compatibility wrapper.
>>>>>
>>>>> I unfortunately ran out of steam last time before I finished
>>>>> converting
>>>>> everything over.
>>>>
>>>> Let's give it one more go. I'll start with the fs stuff.
>>>>
>>>>     Luis
>>>>
>>>> .
>>>>
>>>
>>> If we register each feature in its own feature code file using
>>> register() to
>>> register the sysctl interface. To avoid merge conflicts when different
>>> features modify sysctl.c at the same time.
>>> that is, try to Avoid mixing code with multiple features in the same
>>> code
>>> file.
>>>
>>> For example, the multiple file interfaces defined in sysctl.c by the
>>> hung_task feature can  be moved to hung_task.c.
>>>
>>> Perhaps later, without centralized sysctl.c ?
>>> Is this better?
>>>
>>> Thanks
>>> Xiaoming Ni
>>>
>>> ---
>>>   include/linux/sched/sysctl.h |  8 +----
>>>   kernel/hung_task.c           | 78
>>> +++++++++++++++++++++++++++++++++++++++++++-
>>>   kernel/sysctl.c              | 50 ----------------------------
>>>   3 files changed, 78 insertions(+), 58 deletions(-)
>>>
>>> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
>>> index d4f6215..bb4e0d3 100644
>>> --- a/include/linux/sched/sysctl.h
>>> +++ b/include/linux/sched/sysctl.h
>>> @@ -7,14 +7,8 @@
>>>   struct ctl_table;
>>>
>>>   #ifdef CONFIG_DETECT_HUNG_TASK
>>> -extern int         sysctl_hung_task_check_count;
>>> -extern unsigned int  sysctl_hung_task_panic;
>>> +/* used for block/ */
>>>   extern unsigned long sysctl_hung_task_timeout_secs;
>>> -extern unsigned long sysctl_hung_task_check_interval_secs;
>>> -extern int sysctl_hung_task_warnings;
>>> -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int
>>> write,
>>> -                     void __user *buffer,
>>> -                     size_t *lenp, loff_t *ppos);
>>>   #else
>>>   /* Avoid need for ifdefs elsewhere in the code */
>>>   enum { sysctl_hung_task_timeout_secs = 0 };
>>> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
>>> index 14a625c..53589f2 100644
>>> --- a/kernel/hung_task.c
>>> +++ b/kernel/hung_task.c
>>> @@ -20,10 +20,10 @@
>>>   #include <linux/utsname.h>
>>>   #include <linux/sched/signal.h>
>>>   #include <linux/sched/debug.h>
>>> +#include <linux/kmemleak.h>
>>>   #include <linux/sched/sysctl.h>
>>>
>>>   #include <trace/events/sched.h>
>>> -
>>>   /*
>>>    * The number of tasks checked:
>>>    */
>>> @@ -296,8 +296,84 @@ static int watchdog(void *dummy)
>>>       return 0;
>>>   }
>>>
>>> +/*
>>> + * This is needed for proc_doulongvec_minmax of
>>> sysctl_hung_task_timeout_secs
>>> + * and hung_task_check_interval_secs
>>> + */
>>> +static unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
>>
>> This is not generic so it can stay in this file.
>>
>>> +static int __maybe_unused neg_one = -1;
>>
>> This is generic so we can share it, I suggest we just rename this
>> for now to sysctl_neg_one, export it to a symbol namespace,
>> EXPORT_SYMBOL_NS_GPL(sysctl_neg_one, SYSCTL) and then import it with
>> MODULE_IMPORT_NS(SYSCTL)

When I made the patch, I found that only sysctl_writes_strict and
hung_task_warnings use the neg_one variable, so is it necessary to merge
and generate the SYSCTL_NEG_ONE variable?

In addition, the SYSCTL symbol namespace has not been created yet. Do I
just need to add a new member -1 to the sysctl_vals array?

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b6f5d45..acae1fa 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,7 +23,7 @@
static const struct inode_operations proc_sys_dir_operations;

/* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { 0, 1, INT_MAX, -1 };
EXPORT_SYMBOL(sysctl_vals);

/* Support for permanently empty directories */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 02fa844..6d741d6 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -41,6 +41,7 @@
#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
#define SYSCTL_ONE ((void *)&sysctl_vals[1])
#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
+#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[3])

extern const int sysctl_vals[];

Thanks
Xiaoming Ni


>>
>>
>>> +static struct ctl_table hung_task_sysctls[] = {
>>
>> We want to wrap this around with CONFIG_SYSCTL, so a cleaner solution
>> is something like this:
>>
>> diff --git a/kernel/Makefile b/kernel/Makefile
>> index a42ac3a58994..689718351754 100644
>> --- a/kernel/Makefile
>> +++ b/kernel/Makefile
>> @@ -88,7 +88,9 @@ obj-$(CONFIG_KCOV) += kcov.o
>>   obj-$(CONFIG_KPROBES) += kprobes.o
>>   obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
>>   obj-$(CONFIG_KGDB) += debug/
>> -obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
>> +obj-$(CONFIG_DETECT_HUNG_TASK) += hung_tasks.o
>> +hung_tasks-y := hung_task.o
>> +hung_tasks-$(CONFIG_SYSCTL) += hung_task_sysctl.o
>>   obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
>>   obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
>>   obj-$(CONFIG_SECCOMP) += seccomp.o
>>
>>> +/* get /proc/sys/kernel root */
>>> +static struct ctl_table sysctls_root[] = {
>>> +    {
>>> +        .procname       = "kernel",
>>> +        .mode           = 0555,
>>> +        .child          = hung_task_sysctls,
>>> +    },
>>> +    {}
>>> +};
>>> +
>>
>> And as per Eric, this is not needed, we can simplify this more, as noted
>> below.
>>
>>> +static int __init hung_task_sysctl_init(void)
>>> +{
>>> +    struct ctl_table_header *srt = register_sysctl_table(sysctls_root);
>>
>> You want instead something like::
>>
>>          struct ctl_table_header *srt;
>>
>>     srt = register_sysctl("kernel", hung_task_sysctls);
>>> +
>>> +    if (!srt)
>>> +        return -ENOMEM;
>>> +    kmemleak_not_leak(srt);
>>> +    return 0;
>>> +}
>>> +
>>
>>>   static int __init hung_task_init(void)
>>>   {
>>> +    int ret = hung_task_sysctl_init();
>>> +
>>> +    if (ret != 0)
>>> +        return ret;
>>> +
>>
>> And just #ifdef this around CONFIG_SYSCTL.
>>
>>    Luis
>>
>> .
>>
>
> Thank you for your guidance, I will send the patch later
>
> Xiaoming Ni
>


2020-05-15 16:10:52

by Luis Chamberlain

[permalink] [raw]
Subject: Re: linux-next: manual merge of the vfs tree with the parisc-hd tree

On Fri, May 15, 2020 at 12:17:52AM +0800, Xiaoming Ni wrote:
> On 2020/5/14 14:05, Xiaoming Ni wrote:
> > On 2020/5/13 20:50, Luis Chamberlain wrote:
> > > On Wed, May 13, 2020 at 12:04:02PM +0800, Xiaoming Ni wrote:
> > > > On 2020/5/13 6:03, Luis Chamberlain wrote:
> > > > > On Tue, May 12, 2020 at 12:40:55PM -0500, Eric W. Biederman wrote:
> > > > > > Luis Chamberlain <[email protected]> writes:
> > > > > >
> > > > > > > On Tue, May 12, 2020 at 06:52:35AM -0500, Eric W. Biederman wrote:
> > > > > > > > Luis Chamberlain <[email protected]> writes:
> > > > > > > >
> > > > > > > > > +static struct ctl_table fs_base_table[] = {
> > > > > > > > > +??? {
> > > > > > > > > +??????? .procname??? = "fs",
> > > > > > > > > +??????? .mode??????? = 0555,
> > > > > > > > > +??????? .child??????? = fs_table,
> > > > > > > > > +??? },
> > > > > > > > > +??? { }
> > > > > > > > > +};
> > > > > > > > ??? ^^^^^^^^^^^^^^^^^^^^^^^^ You don't need this at all.
> > > > > > > > > > +static int __init fs_procsys_init(void)
> > > > > > > > > +{
> > > > > > > > > +??? struct ctl_table_header *hdr;
> > > > > > > > > +
> > > > > > > > > +??? hdr = register_sysctl_table(fs_base_table);
> > > > > > > > ??????????????? ^^^^^^^^^^^^^^^^^^^^^ Please use
> > > > > > > > register_sysctl instead.
> > > > > > > > ????AKA
> > > > > > > > ????????? hdr = register_sysctl("fs", fs_table);
> > > > > > >
> > > > > > > Ah, much cleaner thanks!
> > > > > >
> > > > > > It is my hope you we can get rid of register_sysctl_table one of these
> > > > > > days.? It was the original interface but today it is just a
> > > > > > compatibility wrapper.
> > > > > >
> > > > > > I unfortunately ran out of steam last time before I
> > > > > > finished converting
> > > > > > everything over.
> > > > >
> > > > > Let's give it one more go. I'll start with the fs stuff.
> > > > >
> > > > > ??? Luis
> > > > >
> > > > > .
> > > > >
> > > >
> > > > If we register each feature in its own feature code file using
> > > > register() to
> > > > register the sysctl interface. To avoid merge conflicts when different
> > > > features modify sysctl.c at the same time.
> > > > that is, try to Avoid mixing code with multiple features in the
> > > > same code
> > > > file.
> > > >
> > > > For example, the multiple file interfaces defined in sysctl.c by the
> > > > hung_task feature can? be moved to hung_task.c.
> > > >
> > > > Perhaps later, without centralized sysctl.c ?
> > > > Is this better?
> > > >
> > > > Thanks
> > > > Xiaoming Ni
> > > >
> > > > ---
> > > > ? include/linux/sched/sysctl.h |? 8 +----
> > > > ? kernel/hung_task.c?????????? | 78
> > > > +++++++++++++++++++++++++++++++++++++++++++-
> > > > ? kernel/sysctl.c????????????? | 50 ----------------------------
> > > > ? 3 files changed, 78 insertions(+), 58 deletions(-)
> > > >
> > > > diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
> > > > index d4f6215..bb4e0d3 100644
> > > > --- a/include/linux/sched/sysctl.h
> > > > +++ b/include/linux/sched/sysctl.h
> > > > @@ -7,14 +7,8 @@
> > > > ? struct ctl_table;
> > > >
> > > > ? #ifdef CONFIG_DETECT_HUNG_TASK
> > > > -extern int???????? sysctl_hung_task_check_count;
> > > > -extern unsigned int? sysctl_hung_task_panic;
> > > > +/* used for block/ */
> > > > ? extern unsigned long sysctl_hung_task_timeout_secs;
> > > > -extern unsigned long sysctl_hung_task_check_interval_secs;
> > > > -extern int sysctl_hung_task_warnings;
> > > > -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int
> > > > write,
> > > > -???????????????????? void __user *buffer,
> > > > -???????????????????? size_t *lenp, loff_t *ppos);
> > > > ? #else
> > > > ? /* Avoid need for ifdefs elsewhere in the code */
> > > > ? enum { sysctl_hung_task_timeout_secs = 0 };
> > > > diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> > > > index 14a625c..53589f2 100644
> > > > --- a/kernel/hung_task.c
> > > > +++ b/kernel/hung_task.c
> > > > @@ -20,10 +20,10 @@
> > > > ? #include <linux/utsname.h>
> > > > ? #include <linux/sched/signal.h>
> > > > ? #include <linux/sched/debug.h>
> > > > +#include <linux/kmemleak.h>
> > > > ? #include <linux/sched/sysctl.h>
> > > >
> > > > ? #include <trace/events/sched.h>
> > > > -
> > > > ? /*
> > > > ?? * The number of tasks checked:
> > > > ?? */
> > > > @@ -296,8 +296,84 @@ static int watchdog(void *dummy)
> > > > ????? return 0;
> > > > ? }
> > > >
> > > > +/*
> > > > + * This is needed for proc_doulongvec_minmax of
> > > > sysctl_hung_task_timeout_secs
> > > > + * and hung_task_check_interval_secs
> > > > + */
> > > > +static unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
> > >
> > > This is not generic so it can stay in this file.
> > >
> > > > +static int __maybe_unused neg_one = -1;
> > >
> > > This is generic so we can share it, I suggest we just rename this
> > > for now to sysctl_neg_one, export it to a symbol namespace,
> > > EXPORT_SYMBOL_NS_GPL(sysctl_neg_one, SYSCTL) and then import it with
> > > MODULE_IMPORT_NS(SYSCTL)
>
> When I made the patch, I found that only sysctl_writes_strict and
> hung_task_warnings use the neg_one variable, so is it necessary to merge and
> generate the SYSCTL_NEG_ONE variable?

Yes.


> In addition, the SYSCTL symbol namespace has not been created yet. Do I just
> need to add a new member -1 to the sysctl_vals array?

I had forgotten about our sysctl_vals, so disregard my request
to use EXPORT_SYMBOL_NS_GPL(sysctl_neg_one, SYSCTL) and using
MODULE_IMPORT_NS(SYSCTL). Since we are already using these and
have a prefix on the define we should be good.

> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index b6f5d45..acae1fa 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -23,7 +23,7 @@
> static const struct inode_operations proc_sys_dir_operations;
>
> /* shared constants to be used in various sysctls */
> -const int sysctl_vals[] = { 0, 1, INT_MAX };
> +const int sysctl_vals[] = { 0, 1, INT_MAX, -1 };
> EXPORT_SYMBOL(sysctl_vals);
>
> /* Support for permanently empty directories */
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index 02fa844..6d741d6 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -41,6 +41,7 @@
> #define SYSCTL_ZERO ((void *)&sysctl_vals[0])
> #define SYSCTL_ONE ((void *)&sysctl_vals[1])
> #define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
> +#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[3])
>
> extern const int sysctl_vals[];

This looks good.

Luis