Subject: Re: [v3 1/2] sched/clock: interface to allow timestamps early in boot
To: Pavel Tatashin <pasha.tatashin@oracle.com>, <x86@kernel.org>,
        <linux-kernel@vger.kernel.org>, <mingo@redhat.com>,
        <peterz@infradead.org>, <tglx@linutronix.de>, <hpa@zytor.com>
References: <1502477455-314781-1-git-send-email-pasha.tatashin@oracle.com>
 <1502477455-314781-2-git-send-email-pasha.tatashin@oracle.com>
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Message-ID: <966f910e-f6be-f4df-2600-37133075dbde@cn.fujitsu.com>
Date: Mon, 14 Aug 2017 12:38:17 +0800
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101
 Thunderbird/45.4.0
MIME-Version: 1.0
In-Reply-To: <1502477455-314781-2-git-send-email-pasha.tatashin@oracle.com>
Content-Type: text/plain; charset="gbk"; format=flowed
Content-Transfer-Encoding: 7bit
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 6357
Lines: 213

Hi Pavel,

At 08/12/2017 02:50 AM, Pavel Tatashin wrote:
> In Linux printk() can output timestamps next to every line.  This is very
> useful for tracking regressions, and finding places that can be optimized.
> However, the timestamps are available only later in boot. On smaller
> machines it is insignificant amount of time, but on larger it can be many
> seconds or even minutes into the boot process.
>
> This patch adds an interface for platforms with unstable sched clock to
> show timestamps early in boot. In order to get this functionality a
> platform must do:
>
> - Implement u64 sched_clock_early()
>   Clock that returns monotonic time
>
> - Call sched_clock_early_init()
>   Tells sched clock that the early clock can be used
>
> - Call sched_clock_early_fini()
>   Tells sched clock that the early clock is finished, and sched clock
>   should hand over the operation to permanent clock.
>
> - Use weak sched_clock_early() interface to determine time from boot in
>   arch specific read_boot_clock64()
>
> Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
> ---
>  arch/x86/kernel/time.c      | 22 ++++++++++++++++
>  include/linux/sched/clock.h |  4 +++
>  kernel/sched/clock.c        | 61 ++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 86 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
> index e0754cdbad37..6ede0da7041a 100644
> --- a/arch/x86/kernel/time.c
> +++ b/arch/x86/kernel/time.c
> @@ -14,6 +14,7 @@
>  #include <linux/i8253.h>
>  #include <linux/time.h>
>  #include <linux/export.h>
> +#include <linux/sched/clock.h>
>
>  #include <asm/vsyscall.h>
>  #include <asm/x86_init.h>
> @@ -85,6 +86,7 @@ static __init void x86_late_time_init(void)
>  {
>  	x86_init.timers.timer_init();
>  	tsc_init();
> +	tsc_early_fini();

tsc_early_fini() is defined in patch 2, I guess you may miss it
when you split your patches.

>  }
>
>  /*
> @@ -95,3 +97,23 @@ void __init time_init(void)
>  {
>  	late_time_init = x86_late_time_init;
>  }
> +
> +/*
> + * Called once during to boot to initialize boot time.
> + */
> +void read_boot_clock64(struct timespec64 *ts)
> +{
> +	u64 ns_boot = sched_clock_early(); /* nsec from boot */
> +	struct timespec64 ts_now;
> +	bool valid_clock;
> +
> +	/* Time from epoch */
> +	read_persistent_clock64(&ts_now);
> +	valid_clock = ns_boot && timespec64_valid_strict(&ts_now) &&
> +			(ts_now.tv_sec || ts_now.tv_nsec);
> +
> +	if (!valid_clock)
> +		*ts = (struct timespec64){0, 0};
> +	else
> +		*ts = ns_to_timespec64(timespec64_to_ns(&ts_now) - ns_boot);
> +}
> diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
> index a55600ffdf4b..f8291fa28c0c 100644
> --- a/include/linux/sched/clock.h
> +++ b/include/linux/sched/clock.h
> @@ -63,6 +63,10 @@ extern void sched_clock_tick_stable(void);
>  extern void sched_clock_idle_sleep_event(void);
>  extern void sched_clock_idle_wakeup_event(void);
>
> +void sched_clock_early_init(void);
> +void sched_clock_early_fini(void);
> +u64 sched_clock_early(void);
> +
>  /*
>   * As outlined in clock.c, provides a fast, high resolution, nanosecond
>   * time source that is monotonic per cpu argument and has bounded drift
> diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
> index ca0f8fc945c6..be5b60af4ca9 100644
> --- a/kernel/sched/clock.c
> +++ b/kernel/sched/clock.c
> @@ -80,9 +80,24 @@ EXPORT_SYMBOL_GPL(sched_clock);
>
>  __read_mostly int sched_clock_running;
>
> +/*
> + * We start with sched clock early static branch enabled, and global status
> + * disabled.  Early in boot it is decided whether to enable the global
> + * status as well (set sched_clock_early_running to true), and later, when
> + * early clock is no longer needed, the static branch is disabled.
> + */
> +static DEFINE_STATIC_KEY_TRUE(__use_sched_clock_early);
> +static bool __read_mostly sched_clock_early_running;
> +

In my opinion, these two parameters are repetitive, I suggest remove
one.

eg. remove sched_clock_early_running like below
First, static DEFINE_STATIC_KEY_FALSE(__use_sched_clock_early);

>  void sched_clock_init(void)

we can make sched_clock_init __init

>  {
> -	sched_clock_running = 1;
> +	/*
> +	 * We start clock only once early clock is finished, or if early clock
> +	 * was not running.
> +	 */
> +	if (!sched_clock_early_running)

s/
!sched_clock_early_running/
!static_branch_unlikely(&__use_sched_clock_early)/
> +		sched_clock_running = 1;
> +
>  }
>
>  #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
> @@ -362,6 +377,11 @@ u64 sched_clock_cpu(int cpu)
>  	if (sched_clock_stable())
>  		return sched_clock() + __sched_clock_offset;
>
> +	if (static_branch_unlikely(&__use_sched_clock_early)) {
> +		if (sched_clock_early_running)

s/if (sched_clock_early_running)//

> +			return sched_clock_early();
> +	}
> +
>  	if (unlikely(!sched_clock_running))
>  		return 0ull;
>
> @@ -444,6 +464,45 @@ void sched_clock_idle_wakeup_event(void)
>  }
>  EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
>
> +u64 __weak sched_clock_early(void)
> +{
> +	return 0;
> +}
> +
> +/*
> + * Is called when sched_clock_early() is about to be finished, notifies sched
> + * clock that after this call sched_clock_early() can't be used.
> + */
> +void __init sched_clock_early_fini(void)
> +{
> +	struct sched_clock_data *scd = this_scd();
> +	u64 now_early, now_sched;
> +
> +	now_early = sched_clock_early();
> +	now_sched = sched_clock();
> +
> +	__gtod_offset = now_early - scd->tick_gtod;
> +	__sched_clock_offset = now_early - now_sched;
> +
> +	sched_clock_early_running = false;

s/sched_clock_early_running = false;//

> +	static_branch_disable(&__use_sched_clock_early);
> +
> +	/* Now that early clock is finished, start regular sched clock */
> +	sched_clock_init();
> +}
> +
> +/*
> + * Notifies sched clock that early boot clocksource is available, it means that
> + * the current platform has implemented sched_clock_early().
> + *
> + * The early clock is running until we switch to a stable clock, or when we
> + * learn that the stable clock is not available.
> + */
> +void __init sched_clock_early_init(void)
> +{
> +	sched_clock_early_running = true;

s/
sched_clock_early_running =true/
static_branch_enable(&__use_sched_clock_early)/

Thanks,
	dou.
> +}
> +
>  #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
>
>  u64 sched_clock_cpu(int cpu)
>