On Fri, Oct 30, 2015 at 07:26:37PM -0400, Waiman Long wrote:
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -23,6 +23,19 @@
> #define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET)
>
> /*
> + * Queue Node Adaptive Spinning
> + *
> + * A queue node vCPU will stop spinning if the vCPU in the previous node is
> + * not running. The one lock stealing attempt allowed at slowpath entry
> + * mitigates the slight slowdown for non-overcommitted guest with this
> + * aggressive wait-early mechanism.
> + *
> + * The status of the previous node will be checked at fixed interval
> + * controlled by PV_PREV_CHECK_MASK.
> + */
> +#define PV_PREV_CHECK_MASK 0xff
> +
> +/*
> * Queue node uses: vcpu_running & vcpu_halted.
> * Queue head uses: vcpu_running & vcpu_hashed.
> */
> @@ -202,6 +215,20 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
> }
>
> /*
> + * Return true if when it is time to check the previous node which is not
> + * in a running state.
> + */
> +static inline bool
> +pv_wait_early(struct pv_node *prev, int loop)
> +{
> +
> + if ((loop & PV_PREV_CHECK_MASK) != 0)
> + return false;
> +
> + return READ_ONCE(prev->state) != vcpu_running;
> +}
So it appears to me the sole purpose of PV_PREV_CHECK_MASK it to avoid
touching the prev->state cacheline too hard. Yet that is not mentioned
anywhere above.
> +static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
> {
> struct pv_node *pn = (struct pv_node *)node;
> + struct pv_node *pp = (struct pv_node *)prev;
> int waitcnt = 0;
> int loop;
> + bool wait_early;
>
> /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
> for (;; waitcnt++) {
> - for (loop = SPIN_THRESHOLD; loop; loop--) {
> + for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
> if (READ_ONCE(node->locked))
> return;
> + if (pv_wait_early(pp, loop)) {
> + wait_early = true;
> + break;
> + }
> cpu_relax();
> }
>
So if prev points to another node, it will never see vcpu_running. Was
that fully intended?
FYI, I think I've now seen all patches ;-)
On 11/06/2015 10:01 AM, Peter Zijlstra wrote:
> On Fri, Oct 30, 2015 at 07:26:37PM -0400, Waiman Long wrote:
>> +++ b/kernel/locking/qspinlock_paravirt.h
>> @@ -23,6 +23,19 @@
>> #define _Q_SLOW_VAL (3U<< _Q_LOCKED_OFFSET)
>>
>> /*
>> + * Queue Node Adaptive Spinning
>> + *
>> + * A queue node vCPU will stop spinning if the vCPU in the previous node is
>> + * not running. The one lock stealing attempt allowed at slowpath entry
>> + * mitigates the slight slowdown for non-overcommitted guest with this
>> + * aggressive wait-early mechanism.
>> + *
>> + * The status of the previous node will be checked at fixed interval
>> + * controlled by PV_PREV_CHECK_MASK.
>> + */
>> +#define PV_PREV_CHECK_MASK 0xff
>> +
>> +/*
>> * Queue node uses: vcpu_running& vcpu_halted.
>> * Queue head uses: vcpu_running& vcpu_hashed.
>> */
>> @@ -202,6 +215,20 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
>> }
>>
>> /*
>> + * Return true if when it is time to check the previous node which is not
>> + * in a running state.
>> + */
>> +static inline bool
>> +pv_wait_early(struct pv_node *prev, int loop)
>> +{
>> +
>> + if ((loop& PV_PREV_CHECK_MASK) != 0)
>> + return false;
>> +
>> + return READ_ONCE(prev->state) != vcpu_running;
>> +}
> So it appears to me the sole purpose of PV_PREV_CHECK_MASK it to avoid
> touching the prev->state cacheline too hard. Yet that is not mentioned
> anywhere above.
Yes, that is true. I will add a comment to that effect.
>
>> +static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
>> {
>> struct pv_node *pn = (struct pv_node *)node;
>> + struct pv_node *pp = (struct pv_node *)prev;
>> int waitcnt = 0;
>> int loop;
>> + bool wait_early;
>>
>> /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
>> for (;; waitcnt++) {
>> - for (loop = SPIN_THRESHOLD; loop; loop--) {
>> + for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
>> if (READ_ONCE(node->locked))
>> return;
>> + if (pv_wait_early(pp, loop)) {
>> + wait_early = true;
>> + break;
>> + }
>> cpu_relax();
>> }
>>
> So if prev points to another node, it will never see vcpu_running. Was
> that fully intended?
I had added code in pv_wait_head_or_lock to set the state appropriately
for the queue head vCPU.
for (;; waitcnt++) {
/*
+ * Set correct vCPU state to be used by queue node
wait-early
+ * mechanism.
+ */
+ WRITE_ONCE(pn->state, vcpu_running);
+
+ /*
* Set the pending bit in the active lock spinning loop to
* disable lock stealing. However, the pending bit check in
* pv_queued_spin_trylock_unfair() and the setting/clearing
@@ -374,6 +414,7 @@ static u32 pv_wait_head_lock(struct qspinlock *lock,
struct mcs_spinlock *node)
goto gotlock;
}
}
+ WRITE_ONCE(pn->state, vcpu_halted);
> FYI, I think I've now seen all patches ;-)
Thanks for the review. I will work on fixing the issues you identified
and issue a new patch series next week.
Cheers,
Longman
On Fri, Nov 06, 2015 at 12:54:06PM -0500, Waiman Long wrote:
> >>+static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
> >> {
> >> struct pv_node *pn = (struct pv_node *)node;
> >>+ struct pv_node *pp = (struct pv_node *)prev;
> >> int waitcnt = 0;
> >> int loop;
> >>+ bool wait_early;
> >>
> >> /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
> >> for (;; waitcnt++) {
> >>- for (loop = SPIN_THRESHOLD; loop; loop--) {
> >>+ for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
> >> if (READ_ONCE(node->locked))
> >> return;
> >>+ if (pv_wait_early(pp, loop)) {
> >>+ wait_early = true;
> >>+ break;
> >>+ }
> >> cpu_relax();
> >> }
> >>
> >So if prev points to another node, it will never see vcpu_running. Was
> >that fully intended?
>
> I had added code in pv_wait_head_or_lock to set the state appropriately for
> the queue head vCPU.
Yes, but that's the head, for nodes we'll always have halted or hashed.
On 11/06/2015 03:37 PM, Peter Zijlstra wrote:
> On Fri, Nov 06, 2015 at 12:54:06PM -0500, Waiman Long wrote:
>>>> +static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
>>>> {
>>>> struct pv_node *pn = (struct pv_node *)node;
>>>> + struct pv_node *pp = (struct pv_node *)prev;
>>>> int waitcnt = 0;
>>>> int loop;
>>>> + bool wait_early;
>>>>
>>>> /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
>>>> for (;; waitcnt++) {
>>>> - for (loop = SPIN_THRESHOLD; loop; loop--) {
>>>> + for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
>>>> if (READ_ONCE(node->locked))
>>>> return;
>>>> + if (pv_wait_early(pp, loop)) {
>>>> + wait_early = true;
>>>> + break;
>>>> + }
>>>> cpu_relax();
>>>> }
>>>>
>>> So if prev points to another node, it will never see vcpu_running. Was
>>> that fully intended?
>> I had added code in pv_wait_head_or_lock to set the state appropriately for
>> the queue head vCPU.
> Yes, but that's the head, for nodes we'll always have halted or hashed.
The node state was initialized to be vcpu_running. In pv_wait_node(), it
will be changed to vcpu_halted before sleeping and back to vcpu_running
after that. So it is not true that it is either halted or hashed.
In case, it was changed to vcpu_hashed, it will be changed back to
vcpu_running in pv_wait_head_lock before entering the active spinning
loop. There are definitely a small amount of time where the node state
does not reflect the actual vCPU state, but that is the best we can do
so far.
Cheers,
Longman
On Mon, Nov 09, 2015 at 11:51:20AM -0500, Waiman Long wrote:
> On 11/06/2015 03:37 PM, Peter Zijlstra wrote:
> >On Fri, Nov 06, 2015 at 12:54:06PM -0500, Waiman Long wrote:
> >>>>+static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
> >>>> {
> >>>> struct pv_node *pn = (struct pv_node *)node;
> >>>>+ struct pv_node *pp = (struct pv_node *)prev;
> >>>> int waitcnt = 0;
> >>>> int loop;
> >>>>+ bool wait_early;
> >>>>
> >>>> /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
> >>>> for (;; waitcnt++) {
> >>>>- for (loop = SPIN_THRESHOLD; loop; loop--) {
> >>>>+ for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
> >>>> if (READ_ONCE(node->locked))
> >>>> return;
> >>>>+ if (pv_wait_early(pp, loop)) {
> >>>>+ wait_early = true;
> >>>>+ break;
> >>>>+ }
> >>>> cpu_relax();
> >>>> }
> >>>>
> >>>So if prev points to another node, it will never see vcpu_running. Was
> >>>that fully intended?
> >>I had added code in pv_wait_head_or_lock to set the state appropriately for
> >>the queue head vCPU.
> >Yes, but that's the head, for nodes we'll always have halted or hashed.
>
> The node state was initialized to be vcpu_running. In pv_wait_node(), it
> will be changed to vcpu_halted before sleeping and back to vcpu_running
> after that. So it is not true that it is either halted or hashed.
Durh,.. I mixed up pv_wait_node() and pv_wait_head() I think. Sorry for
the noise.