Received: by 2002:a05:6358:a55:b0:ec:fcf4:3ecf with SMTP id 21csp2829796rwb; Fri, 20 Jan 2023 07:54:52 -0800 (PST) X-Google-Smtp-Source: AMrXdXsKqjPcCJXcaO61o5dHtR+qOOEbxzkMsLk/QklLjdykHica87UJWxeJnELOPGLg6eb83y6y X-Received: by 2002:a17:906:1605:b0:870:2f70:c631 with SMTP id m5-20020a170906160500b008702f70c631mr19921589ejd.2.1674230091806; Fri, 20 Jan 2023 07:54:51 -0800 (PST) ARC-Seal: i=1; a=rsa-sha256; t=1674230091; cv=none; d=google.com; s=arc-20160816; b=kE/Pggb4UrVBZ82d7IOC/m672LyhbZobm9cHPPQGasiQz6FqmcLmY4L+sTJXr4a9ka qmUhVUX85pAU8VMTiNTk2f/XVu4N8q8/p1tYOGE76CpmK9CwxmUkGrffjDxMLkLdrsR8 gRHlz3Kb/IwduTJZWzWwLKNSI7r39X+cudQw8aq83FW31EkR3wNQ8iwRJkK7vXDA2gsO +G6s5JG1o7M4LQGtfZFk1+E3Cu3Ohj8TGqkSJoQV0iYuO0J8I5d3nT5B1hzRxnJsLiFC LuB6mJUpZk1kyRPQaRIyMwabHLmmA7S53Fimg+4uz6cBNDCnfXrgCBukjBZz/z4mznMR VC3g== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:content-transfer-encoding:mime-version :references:in-reply-to:message-id:subject:cc:to:from:date :dkim-signature; bh=XdGTjgHxLkuAm9R++CIRfNtbyu6RO93fJhWZnkRBU8I=; b=d4mKXEYJ63mIxi1QCDUQTDSfCkTolE98JQjKVCqz8I+QeESY0DH0YRIwZUObENh/Uu s6cpHBfEsbhMAebqFPOfbsSwJDn6XHZpeEOi7uyglWKhv51L+ksVFdBOB7XipABXMDuu mTYda6/MQoWrxHJUM463K5+eQsPZxoZE2qIZHhpfh1HEw6WXJ9yrrESZlLjZ7pR5YaAd EoHy0ldgFscRCwDEDv8ZpLfNhL8SgiNzOkNqCbdiH5AbX6GIZWFfSrAnwXU6XKSktifR 6kUQQMmueVFnrYFVOpw1mVvS2qi4lrH52wR7mKJBTWn/eWuEElMnMucBbKpxAwY8PMdf GM3A== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@mailbox.org header.s=mail20150812 header.b=FUkOC+AO; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dmarc=pass (p=REJECT sp=REJECT dis=NONE) header.from=mailbox.org Return-Path: Received: from out1.vger.email (out1.vger.email. [2620:137:e000::1:20]) by mx.google.com with ESMTP id sb20-20020a170906edd400b0087326de2122si9374593ejb.200.2023.01.20.07.54.38; Fri, 20 Jan 2023 07:54:51 -0800 (PST) Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; Authentication-Results: mx.google.com; dkim=pass header.i=@mailbox.org header.s=mail20150812 header.b=FUkOC+AO; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dmarc=pass (p=REJECT sp=REJECT dis=NONE) header.from=mailbox.org Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231314AbjATPUe (ORCPT + 51 others); Fri, 20 Jan 2023 10:20:34 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52418 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231339AbjATPUc (ORCPT ); Fri, 20 Jan 2023 10:20:32 -0500 Received: from mout-p-202.mailbox.org (mout-p-202.mailbox.org [80.241.56.172]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E21AFDF944 for ; Fri, 20 Jan 2023 07:20:26 -0800 (PST) Received: from smtp1.mailbox.org (smtp1.mailbox.org [10.196.197.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange ECDHE (P-384) server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-202.mailbox.org (Postfix) with ESMTPS id 4Nz38j68ZBz9sSL; Fri, 20 Jan 2023 16:20:21 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=mailbox.org; s=mail20150812; t=1674228021; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=XdGTjgHxLkuAm9R++CIRfNtbyu6RO93fJhWZnkRBU8I=; b=FUkOC+AOJHCS0fzYpQZHrILFJSsZRVY2h6ff0FbckgyKgO18SKbd6uqrz5gTanhvKAUvXl hwpW6ZdbqqlsEOkECnQAL2MHfrc84KbA+o1MOep9F037yVH2d2zS0WY84S4VVWeZC32xXU p0q6UV8HA1toTsnPOZRe+EdoZZmiS9AUB9M18ODvg3hPIfWlfeDWgIoYv0cZnYJkUq3Jxu x3neiOKemIe+n3WSmGqD6UoyLf958gIrTA5+WCCggGznqLoNqYBKMxBpyCkAS6/7MDr5C6 WeXRYuVXzRsYmcjRXjBpEZHD4ig9yMmzzib3DmyGy2uJM+2nYI4bDgE3t7zcOw== Date: Fri, 20 Jan 2023 16:20:18 +0100 From: "Erhard F." To: Peter Zijlstra Cc: Sandipan Das , linux-kernel@vger.kernel.org, Nick Desaulniers , Joao Moreira , Masami Hiramatsu , x86@kernel.org, Josh Poimboeuf Subject: Re: [bisected] clang 15 built kernel fails to boot, stuck at "Loading Linux 6.1.1 ...", gcc 12 built kernel with same config boots fine Message-ID: <20230120162018.4e48f78c@yea> In-Reply-To: References: <20230119022303.177052e4@yea> <178000f1-1464-03cb-2335-a01b77e70692@amd.com> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit X-MBO-RS-META: pztd4m9df7ono1oz3zbn7s1dtubu8byf X-MBO-RS-ID: a51ba47983a6a9a7749 X-Spam-Status: No, score=-2.8 required=5.0 tests=BAYES_00,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,RCVD_IN_DNSWL_LOW,SPF_HELO_NONE, SPF_PASS autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on lindbergh.monkeyblade.net Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Fri, 20 Jan 2023 15:57:47 +0100 Peter Zijlstra wrote: > diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h > index f4b87f08f5c5..29832c338cdc 100644 > --- a/arch/x86/include/asm/text-patching.h > +++ b/arch/x86/include/asm/text-patching.h > @@ -184,6 +184,37 @@ void int3_emulate_ret(struct pt_regs *regs) > unsigned long ip = int3_emulate_pop(regs); > int3_emulate_jmp(regs, ip); > } > + > +static __always_inline > +void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) > +{ > + static const unsigned long jcc_mask[6] = { > + [0] = X86_EFLAGS_OF, > + [1] = X86_EFLAGS_CF, > + [2] = X86_EFLAGS_ZF, > + [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, > + [4] = X86_EFLAGS_SF, > + [5] = X86_EFLAGS_PF, > + }; > + > + bool invert = cc & 1; > + bool match; > + > + if (cc < 0xc) { > + match = regs->flags & jcc_mask[cc >> 1]; > + } else { > + match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ > + ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); > + if (cc >= 0xe) > + match = match || (regs->flags & X86_EFLAGS_ZF); > + } > + > + if ((match && !invert) || (!match && invert)) > + ip += disp; > + > + int3_emulate_jmp(regs, ip); > +} > + > #endif /* !CONFIG_UML_X86 */ > > #endif /* _ASM_X86_TEXT_PATCHING_H */ > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c > index 7d8c3cbde368..cd632a0171b4 100644 > --- a/arch/x86/kernel/alternative.c > +++ b/arch/x86/kernel/alternative.c > @@ -1772,6 +1772,11 @@ void text_poke_sync(void) > on_each_cpu(do_sync_core, NULL, 1); > } > > +/* > + * NOTE: crazy scheme to allow patching Jcc.d32 but not increase the size of > + * this thing. When len == 6 everything is prefixed with 0x0f and we map > + * opcode to Jcc.d8, using len to distinguish. > + */ > struct text_poke_loc { > /* addr := _stext + rel_addr */ > s32 rel_addr; > @@ -1893,6 +1898,10 @@ noinstr int poke_int3_handler(struct pt_regs *regs) > int3_emulate_jmp(regs, (long)ip + tp->disp); > break; > > + case 0x70 ... 0x7f: /* Jcc */ > + int3_emulate_jcc(regs, tp->opcode & 0xf, (long)ip, tp->disp); > + break; > + > default: > BUG(); > } > @@ -1966,16 +1975,26 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries > * Second step: update all but the first byte of the patched range. > */ > for (do_sync = 0, i = 0; i < nr_entries; i++) { > - u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, }; > + u8 old[POKE_MAX_OPCODE_SIZE+1] = { tp[i].old, }; > + u8 _new[POKE_MAX_OPCODE_SIZE+1]; > + const u8 *new = tp[i].text; > int len = tp[i].len; > > if (len - INT3_INSN_SIZE > 0) { > memcpy(old + INT3_INSN_SIZE, > text_poke_addr(&tp[i]) + INT3_INSN_SIZE, > len - INT3_INSN_SIZE); > + > + if (len == 6) { > + _new[0] = 0x0f; > + memcpy(_new + 1, new, 5); > + new = _new; > + } > + > text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, > - (const char *)tp[i].text + INT3_INSN_SIZE, > + new + INT3_INSN_SIZE, > len - INT3_INSN_SIZE); > + > do_sync++; > } > > @@ -2003,8 +2022,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries > * The old instruction is recorded so that the event can be > * processed forwards or backwards. > */ > - perf_event_text_poke(text_poke_addr(&tp[i]), old, len, > - tp[i].text, len); > + perf_event_text_poke(text_poke_addr(&tp[i]), old, len, new, len); > } > > if (do_sync) { > @@ -2021,10 +2039,15 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries > * replacing opcode. > */ > for (do_sync = 0, i = 0; i < nr_entries; i++) { > - if (tp[i].text[0] == INT3_INSN_OPCODE) > + u8 byte = tp[i].text[0]; > + > + if (tp[i].len == 6) > + byte = 0x0f; > + > + if (byte == INT3_INSN_OPCODE) > continue; > > - text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE); > + text_poke(text_poke_addr(&tp[i]), &byte, INT3_INSN_SIZE); > do_sync++; > } > > @@ -2042,9 +2065,11 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, > const void *opcode, size_t len, const void *emulate) > { > struct insn insn; > - int ret, i; > + int ret, i = 0; > > - memcpy((void *)tp->text, opcode, len); > + if (len == 6) > + i = 1; > + memcpy((void *)tp->text, opcode+i, len-i); > if (!emulate) > emulate = opcode; > > @@ -2055,6 +2080,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, > tp->len = len; > tp->opcode = insn.opcode.bytes[0]; > > + if (is_jcc32(&insn)) { > + /* > + * Map Jcc.d32 onto Jcc.d8 and use len to distinguish. > + */ > + tp->opcode = insn.opcode.bytes[1] - 0x10; > + } > + > switch (tp->opcode) { > case RET_INSN_OPCODE: > case JMP32_INSN_OPCODE: > @@ -2071,7 +2103,6 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, > BUG_ON(len != insn.length); > } > > - > switch (tp->opcode) { > case INT3_INSN_OPCODE: > case RET_INSN_OPCODE: > @@ -2080,6 +2111,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, > case CALL_INSN_OPCODE: > case JMP32_INSN_OPCODE: > case JMP8_INSN_OPCODE: > + case 0x70 ... 0x7f: /* Jcc */ > tp->disp = insn.immediate.value; > break; > > diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c > index b36f3c367cb2..f2b2f7545ecb 100644 > --- a/arch/x86/kernel/kprobes/core.c > +++ b/arch/x86/kernel/kprobes/core.c > @@ -464,50 +464,26 @@ static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs) > } > NOKPROBE_SYMBOL(kprobe_emulate_call); > > -static nokprobe_inline > -void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond) > +static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) > { > unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; > > - if (cond) > - ip += p->ainsn.rel32; > + ip += p->ainsn.rel32; > int3_emulate_jmp(regs, ip); > } > - > -static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) > -{ > - __kprobe_emulate_jmp(p, regs, true); > -} > NOKPROBE_SYMBOL(kprobe_emulate_jmp); > > -static const unsigned long jcc_mask[6] = { > - [0] = X86_EFLAGS_OF, > - [1] = X86_EFLAGS_CF, > - [2] = X86_EFLAGS_ZF, > - [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, > - [4] = X86_EFLAGS_SF, > - [5] = X86_EFLAGS_PF, > -}; > - > static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) > { > - bool invert = p->ainsn.jcc.type & 1; > - bool match; > - > - if (p->ainsn.jcc.type < 0xc) { > - match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1]; > - } else { > - match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ > - ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); > - if (p->ainsn.jcc.type >= 0xe) > - match = match || (regs->flags & X86_EFLAGS_ZF); > - } > - __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); > + int3_emulate_jcc(regs, p->ainsn.jcc.type, > + regs->ip - INT3_INSN_SIZE + p->ainsn.size, > + p->ainsn.rel32); > } > NOKPROBE_SYMBOL(kprobe_emulate_jcc); > > static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) > { > + unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; > bool match; > > if (p->ainsn.loop.type != 3) { /* LOOP* */ > @@ -535,7 +511,9 @@ static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) > else if (p->ainsn.loop.type == 1) /* LOOPE */ > match = match && (regs->flags & X86_EFLAGS_ZF); > > - __kprobe_emulate_jmp(p, regs, match); > + if (match) > + ip += p->ainsn.rel32; > + int3_emulate_jmp(regs, ip); > } > NOKPROBE_SYMBOL(kprobe_emulate_loop); > > diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c > index 2ebc338980bc..deb909f2b62f 100644 > --- a/arch/x86/kernel/static_call.c > +++ b/arch/x86/kernel/static_call.c > @@ -9,6 +9,7 @@ enum insn_type { > NOP = 1, /* site cond-call */ > JMP = 2, /* tramp / site tail-call */ > RET = 3, /* tramp / site cond-tail-call */ > + JCC = 4, > }; > > /* > @@ -25,12 +26,38 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; > > static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; > > +static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */ > +{ > + u8 ret = 0; > + > + if (insn[0] == 0x0f) { > + u8 tmp = insn[1]; > + if ((tmp & 0xf0) == 0x80) > + ret = tmp; > + } > + > + return ret; > +} > + > +extern void __static_call_return(void); > + > +asm (".global __static_call_return\n\t" > + ".type __static_call_return, @function\n\t" > + ASM_FUNC_ALIGN "\n\t" > + "__static_call_return:\n\t" > + "ret; int3\n\t" > + ".size __static_call_return, . - __static_call_return \n\t"); > + > static void __ref __static_call_transform(void *insn, enum insn_type type, > void *func, bool modinit) > { > const void *emulate = NULL; > int size = CALL_INSN_SIZE; > const void *code; > + u8 op, buf[6]; > + > + if ((type == JMP || type == RET) && (op = __is_Jcc(insn))) > + type = JCC; > > switch (type) { > case CALL: > @@ -57,6 +84,20 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, > else > code = &retinsn; > break; > + > + case JCC: > + if (!func) { > + func = __static_call_return; > + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) > + func = x86_return_thunk; > + } > + > + buf[0] = 0x0f; > + __text_gen_insn(buf+1, op, insn+1, func, 5); > + code = buf; > + size = 6; > + > + break; > } > > if (memcmp(insn, code, size) == 0) > @@ -79,7 +120,8 @@ static void __static_call_validate(void *insn, bool tail, bool tramp) > > if (tail) { > if (opcode == JMP32_INSN_OPCODE || > - opcode == RET_INSN_OPCODE) > + opcode == RET_INSN_OPCODE || > + __is_Jcc(insn)) > return; > } else { > if (opcode == CALL_INSN_OPCODE || Success! You nailed it. Applied your patch on top of v6.2-rc4 and with that I got ye olde Thinkpad A275 with it's borked custom config + clang -Os(stupid) booting again. No side effects to be seen so far.. Many thanks! Regards, Erhard