Received: by 2002:ab2:7903:0:b0:1fb:b500:807b with SMTP id a3csp21772lqj; Sat, 1 Jun 2024 06:37:46 -0700 (PDT) X-Forwarded-Encrypted: i=3; AJvYcCUZb+hmqFNU76War0dz5BsUaDSCipR+xYGXdh1eR5gnUB/6NyOM6kQjHVxcbXqDHRVTpRzIb+08/dtnmHlPTF7nj2xlA0ZJt+32s84RDA== X-Google-Smtp-Source: AGHT+IGQAbLCB6tWiz1gu0lLcaNKya7oDFrAys5E3IHR1gsS+Y10SJlo9+toW1pFMIYkZKaWM6+D X-Received: by 2002:ac8:5a04:0:b0:43a:7b64:2fcc with SMTP id d75a77b69052e-43febe6980fmr124525031cf.7.1717249066032; Sat, 01 Jun 2024 06:37:46 -0700 (PDT) ARC-Seal: i=2; a=rsa-sha256; t=1717249066; cv=pass; d=google.com; s=arc-20160816; b=uxJJvGpRIPwDjKHmi+QfVa4Zectqfv+fBeLGIUVu1B16v/HK+A3ot06pAthlK5zfRR 87JFf8DvDEpmy1dzHPJDmAJUvFKPRmO+Je9ACPYBoIoy0UEwONU8JKAfFQlQU9FUTv0h g0pBVLDvL+LeZyJQZVT65cPno6H5PvvxPfhje8z3MsKGxlYLV5LGgziDFRIGd7PZDvlc U8bcnKPCw+aTDjbJ5abyZh0XuLBqyv8gk0xmxrGOwlh7MikzUxGELBxENUc+FEe8jrY/ IagKiOKC+Ueu3fnNlXJzOx+lI02WgPovONg1HpLbQTFZFPE+B6z8aNHt3C5fYxpuiwf1 HMpw== ARC-Message-Signature: i=2; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=content-transfer-encoding:mime-version:list-unsubscribe :list-subscribe:list-id:precedence:message-id:date:references :in-reply-to:subject:cc:to:from:dkim-signature; bh=Ye/eFIIg5/G1qr1KkNcR29iqsvNe7JD8BDAscA4I/gc=; fh=XG56EC/8IjWs8K1UhL7jZpYrUCBKFFW0YdPU1rd4FLU=; b=TFmmP98PD7pM2YRfGm6t22XYd4RdFgnR/5W8OOo1IOnn5/Q8REfm0THYuLsbkEuWB7 n+VQHHZODBxzzkJoOu8AhVhOWv+oiaCi3kb+0a4WtITRHIK6qwXl4aUEQg4oXKesQ/FU tiGEmOSJm6EuWeJmZ86W+jgLY5mqoRkoUbmuP6KY2jMaIlQEG/4fmUK3wrbFFJ/v4//2 2+TS7fAH5Ru17px/geRlpmBYA50po50FFNDDNwo1+xaHR1O8MvUnLER2rXQGpupwn5AS grawiU5y31E1iFz0fOPDfSv+PL/OYvjY2QiSoBYXuXG629wHalLiP1KyRNXKd6RUPOaW 2HNw==; dara=google.com ARC-Authentication-Results: i=2; mx.google.com; dkim=pass header.i=@metaspace-dk.20230601.gappssmtp.com header.s=20230601 header.b=tF1gPEep; arc=pass (i=1 dkim=pass dkdomain=metaspace-dk.20230601.gappssmtp.com); spf=pass (google.com: domain of linux-kernel+bounces-197805-linux.lists.archive=gmail.com@vger.kernel.org designates 2604:1380:45d1:ec00::1 as permitted sender) smtp.mailfrom="linux-kernel+bounces-197805-linux.lists.archive=gmail.com@vger.kernel.org" Return-Path: Received: from ny.mirrors.kernel.org (ny.mirrors.kernel.org. [2604:1380:45d1:ec00::1]) by mx.google.com with ESMTPS id d75a77b69052e-43ff259ba7asi40551951cf.553.2024.06.01.06.37.45 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sat, 01 Jun 2024 06:37:46 -0700 (PDT) Received-SPF: pass (google.com: domain of linux-kernel+bounces-197805-linux.lists.archive=gmail.com@vger.kernel.org designates 2604:1380:45d1:ec00::1 as permitted sender) client-ip=2604:1380:45d1:ec00::1; Authentication-Results: mx.google.com; dkim=pass header.i=@metaspace-dk.20230601.gappssmtp.com header.s=20230601 header.b=tF1gPEep; arc=pass (i=1 dkim=pass dkdomain=metaspace-dk.20230601.gappssmtp.com); spf=pass (google.com: domain of linux-kernel+bounces-197805-linux.lists.archive=gmail.com@vger.kernel.org designates 2604:1380:45d1:ec00::1 as permitted sender) smtp.mailfrom="linux-kernel+bounces-197805-linux.lists.archive=gmail.com@vger.kernel.org" Received: from smtp.subspace.kernel.org (wormhole.subspace.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ny.mirrors.kernel.org (Postfix) with ESMTPS id 0FEA51C21035 for ; Sat, 1 Jun 2024 11:11:37 +0000 (UTC) Received: from localhost.localdomain (localhost.localdomain [127.0.0.1]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 2AC7F14EC4A; Sat, 1 Jun 2024 11:11:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=metaspace-dk.20230601.gappssmtp.com header.i=@metaspace-dk.20230601.gappssmtp.com header.b="tF1gPEep" Received: from mail-ej1-f42.google.com (mail-ej1-f42.google.com [209.85.218.42]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4272C208B0 for ; Sat, 1 Jun 2024 11:11:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.218.42 ARC-Seal:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1717240281; cv=none; b=E7SuXIPfS4aqqdY1XRwipSMgFoWfnjndlsPfILyTEJ1hwgHGMSygG/p5cd851ZPJzWYsP/QgYliruo28uFPV05g8Gix0+ZQs+8NKYbYmpNXbUlEzcKpvSdoWFvIkMQpXkJtz3wk/0UGmCceN/NV1k5PJlPndtJDS+TzJ6KEZTMU= ARC-Message-Signature:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1717240281; c=relaxed/simple; bh=21N+PVuembHrqSBNJBTVoVgQuu3mMVXmFN4nRBYEHgs=; h=From:To:Cc:Subject:In-Reply-To:References:Date:Message-ID: MIME-Version:Content-Type; b=ZVuch5TmE/D6B3ivUpa7YGcg6RaeMINyGCSWQ6EakEDnGdgnSgA5cY+4it02Bwn/GRF1spnMvTrna3lCERw0r3WprUQw0o2HGYAjkRGmZnPaegapbSz8wUOkRk+purM5JgYZhCn9ln6I2C0HDIuL0FZLzLtJU8Kssr72IHTnXPQ= ARC-Authentication-Results:i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=metaspace.dk; spf=none smtp.mailfrom=metaspace.dk; dkim=pass (2048-bit key) header.d=metaspace-dk.20230601.gappssmtp.com header.i=@metaspace-dk.20230601.gappssmtp.com header.b=tF1gPEep; arc=none smtp.client-ip=209.85.218.42 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=metaspace.dk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=metaspace.dk Received: by mail-ej1-f42.google.com with SMTP id a640c23a62f3a-a6858bdc9ddso167590166b.2 for ; Sat, 01 Jun 2024 04:11:18 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=metaspace-dk.20230601.gappssmtp.com; s=20230601; t=1717240276; x=1717845076; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:message-id:date:references :in-reply-to:subject:cc:to:from:from:to:cc:subject:date:message-id :reply-to; bh=Ye/eFIIg5/G1qr1KkNcR29iqsvNe7JD8BDAscA4I/gc=; b=tF1gPEepT/610atnyrTn82H9m4bozGy+zY3hUxcwfmljPhx6+2W0Me5oRr9IihWDEQ +IqC21zl4RDWXMyTGlqRWLaZ93iGmbtXZ+ar5XVPlAsy2zQQVptdsmDqWLNGECoF8SpX TGFBYqaoU11LrTkfdt5H04a4SGwJkUSVaXZrfV97tuBJwk3T6WFUXKJFX2PLbtAZXrMm wswcMJR3f5Gl2IMgxnxOfhBkW8sTeTAsWuZlO9pRxWPwrFKKA09LlJZnYD/4i+6GVwnz CMYFRRUWpCjQYdMkwWANwgzv3ODR2gJOl2IxFsVjBqzX4j3SUDdEjk32hPI3dG5QLKUD 7QoA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1717240276; x=1717845076; h=content-transfer-encoding:mime-version:message-id:date:references :in-reply-to:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=Ye/eFIIg5/G1qr1KkNcR29iqsvNe7JD8BDAscA4I/gc=; b=FvM7or2eSrcXFQld9RnZhb9sRvnZt/kI+VjfMkeboHWWkl1Gouc6sBRUfHGQj2pxuf 41DBQkxSKTh6tvDyzlCjKjQljE5oDA4dLihI2oWvhj7MZ9Ao46oHxPd9bsMezer6IuTt qsUfah4/KS8eqsYRsBTjTdsQYxbNzuXPafMOe5HTxH1XP7rqrD2YOccIABo4/xkKynt4 +NwOKqY7rocNVbmn5UhX8XvuT6/FyHFoU7fLEWuTAd5XZG8TXQsZf/OTgefYazoPjkWW l5N58VeMKmq64X+xLHCKAkBAc/HhiV6asnOPhMxqvK0S24M93aqEdopS4T2UA8Hoxt97 UHUw== X-Forwarded-Encrypted: i=1; AJvYcCVcOVooS8lAjfe2MhBZoFbbHKgvfJGylG7PL0bA3jbQ3jMbdqzNm/qDUtb7QjgRPXHbF9rOC+AAkH2C5C2gvs25VNx8pciPfDGFyjTa X-Gm-Message-State: AOJu0YwtG0yFUEd3QAc30YNS3ZHFfZW8zQxM5Tc4vlOFkXeAYGCQnYwE NVD+29ygyYJTZQ3oqX4JkwOCdwiwlf65/M19rwvmNOcALovD899fqSpA3+1ShdE= X-Received: by 2002:a17:906:6d06:b0:a68:b0f2:1d95 with SMTP id a640c23a62f3a-a68b0f21f73mr81382566b.71.1717240276238; Sat, 01 Jun 2024 04:11:16 -0700 (PDT) Received: from localhost ([79.142.230.34]) by smtp.gmail.com with ESMTPSA id a640c23a62f3a-a68c5539b80sm24885166b.168.2024.06.01.04.11.15 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sat, 01 Jun 2024 04:11:15 -0700 (PDT) From: Andreas Hindborg To: Benno Lossin Cc: Jens Axboe , Christoph Hellwig , Keith Busch , Damien Le Moal , Bart Van Assche , Hannes Reinecke , Ming Lei , "linux-block@vger.kernel.org" , Andreas Hindborg , Wedson Almeida Filho , Greg KH , Matthew Wilcox , Miguel Ojeda , Alex Gaynor , Boqun Feng , Gary Guo , =?utf-8?Q?Bj=C3=B6rn?= Roy Baron , Alice Ryhl , Chaitanya Kulkarni , Luis Chamberlain , Yexuan Yang <1182282462@bupt.edu.cn>, Sergio =?utf-8?Q?Gonz=C3=A1lez?= Collado , Joel Granados , "Pankaj Raghav (Samsung)" , Daniel Gomez , Niklas Cassel , Philipp Stanner , Conor Dooley , Johannes Thumshirn , Matias =?utf-8?Q?Bj=C3=B8rling?= , open list , "rust-for-linux@vger.kernel.org" , "lsf-pc@lists.linux-foundation.org" , "gost.dev@samsung.com" Subject: Re: [PATCH v3 1/3] rust: block: introduce `kernel::block::mq` module In-Reply-To: <47a8ce04-3901-49ae-abac-a7d85901f980@proton.me> (Benno Lossin's message of "Sat, 01 Jun 2024 09:43:47 +0000") References: <20240601081806.531954-1-nmi@metaspace.dk> <20240601081806.531954-2-nmi@metaspace.dk> <47a8ce04-3901-49ae-abac-a7d85901f980@proton.me> Date: Sat, 01 Jun 2024 13:11:05 +0200 Message-ID: <87ikysor3q.fsf@metaspace.dk> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable Benno Lossin writes: > On 01.06.24 10:18, Andreas Hindborg wrote: >> From: Andreas Hindborg > > [...] > >> +impl GenDisk { >> + /// Try to create a new `GenDisk`. >> + pub fn try_new(tagset: Arc>) -> Result { >> + let lock_class_key =3D crate::sync::LockClassKey::new(); >> + >> + // SAFETY: `tagset.raw_tag_set()` points to a valid and initial= ized tag set >> + let gendisk =3D from_err_ptr(unsafe { >> + bindings::__blk_mq_alloc_disk( >> + tagset.raw_tag_set(), >> + core::ptr::null_mut(), // TODO: We can pass queue limit= s right here >> + core::ptr::null_mut(), >> + lock_class_key.as_ptr(), >> + ) >> + })?; >> + >> + const TABLE: bindings::block_device_operations =3D bindings::bl= ock_device_operations { >> + submit_bio: None, >> + open: None, >> + release: None, >> + ioctl: None, >> + compat_ioctl: None, >> + check_events: None, >> + unlock_native_capacity: None, >> + getgeo: None, >> + set_read_only: None, >> + swap_slot_free_notify: None, >> + report_zones: None, >> + devnode: None, >> + alternative_gpt_sector: None, >> + get_unique_id: None, >> + // TODO: Set to THIS_MODULE. Waiting for const_refs_to_stat= ic feature to >> + // be merged (unstable in rustc 1.78 which is staged for li= nux 6.10) >> + // https://github.com/rust-lang/rust/issues/119618 > > AFAIK the 1.78 upgrade already is in rust-next (and also should appear > in v6.10-rc2, right?) do you have this on your radar? I am tracking this and I plan to add support in a later patch. > >> + owner: core::ptr::null_mut(), >> + pr_ops: core::ptr::null_mut(), >> + free_disk: None, >> + poll_bio: None, >> + }; >> + >> + // SAFETY: gendisk is a valid pointer as we initialized it above >> + unsafe { (*gendisk).fops =3D &TABLE }; >> + >> + // INVARIANT: `gendisk` was initialized above. >> + // INVARIANT: `gendisk.queue.queue_data` is set to `data` in th= e call to >> + // `__blk_mq_alloc_disk` above. >> + Ok(GenDisk { >> + tagset, >> + gendisk, >> + _phantom: PhantomData, >> + }) >> + } > > [...] > >> + /// This function is called by the C kernel. A pointer to this func= tion is >> + /// installed in the `blk_mq_ops` vtable for the driver. >> + /// >> + /// # Safety >> + /// >> + /// This function may only be called by blk-mq C infrastructure. >> + unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_= mq_hw_ctx) { >> + T::commit_rqs() >> + } >> + >> + /// This function is called by the C kernel. It is not currently >> + /// implemented, and there is no way to exercise this code path. > > Is it also possible to completely remove it? ie use `None` in the > VTABLE, or will the C side error? No, this pointer is not allowed to be null. It must be a callable function, hence the stub. It will be populated soon enough when I send patches for the remote completion logic. > >> + /// >> + /// # Safety >> + /// >> + /// This function may only be called by blk-mq C infrastructure. >> + unsafe extern "C" fn complete_callback(_rq: *mut bindings::request)= {} > > [...] > >> +impl<'a> RawWriter<'a> { >> + /// Create a new `RawWriter` instance. >> + fn new(buffer: &'a mut [u8]) -> Result> { >> + *(buffer.last_mut().ok_or(EINVAL)?) =3D 0; >> + >> + // INVARIANT: We null terminated the buffer above. >> + Ok(Self { buffer, pos: 0 }) >> + } >> + >> + pub(crate) fn from_array( >> + a: &'a mut [core::ffi::c_char; N], >> + ) -> Result> { > > You could change the return type to be `RawWriter<'a>` and check using > `build_assert!` that `N > 0`. Then you can also call `unwrap_unchecked` > on the result that you get below. > > I don't know if we want that, but it is a possibility. I guess we could potentially make the type generic over a const buffer size. But let's put a pin in that for now. I'll look into that down the roa= d. > >> + Self::new( >> + // SAFETY: the buffer of `a` is valid for read and write as= `u8` for >> + // at least `N` bytes. >> + unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cas= t::(), N) }, >> + ) >> + } >> +} > > [...] > >> +/// Store the result of `op(target.load())` in target, returning new va= lue of >> +/// taret. >> +fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64= ) -> u64 { >> + let old =3D target.fetch_update(Ordering::Relaxed, Ordering::Relaxe= d, |x| Some(op(x))); >> + >> + // SAFETY: Because the operation passed to `fetch_update` above alw= ays >> + // return `Some`, `old` will always be `Ok`. >> + let old =3D unsafe { old.unwrap_unchecked() }; >> + >> + op(old) >> +} >> + >> +/// Store the result of `op(target.load)` in `target` if `target.load()= !=3D >> +/// pred`, returning previous value of target > > The function returns a bool, not a u64 (value). From the body I read > that you return whether the value was updated. Thanks =F0=9F=91=8D > >> +fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64= , pred: u64) -> bool { >> + let x =3D target.load(Ordering::Relaxed); >> + loop { >> + if x =3D=3D pred { >> + break; >> + } >> + if target >> + .compare_exchange_weak(x, op(x), Ordering::Relaxed, Orderin= g::Relaxed) >> + .is_ok() >> + { >> + break; >> + } > > If this fails, you are not re-reading the value of `target`, so if > someone else just set it to `pred`, you will still continue to try to > set it from `x` to `op(x)`, but it might never have the value `x` again. > This would lead to a potentially infinite loop, right? Yea, that is not good. I should have moved the assignment of `x` into the l= oop > >> + } > > Do you think you can also implement this using `fetch_update`? I guess > this would do what you want, right?: > > target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { > if x =3D=3D pred { > None > } else { > Some(op(x)) > } > }).is_ok() Makes sense, I will steal that. > >> + >> + x =3D=3D pred >> +} > > [...] > >> +impl TagSet { >> + /// Try to create a new tag set >> + pub fn try_new( >> + nr_hw_queues: u32, >> + num_tags: u32, >> + num_maps: u32, >> + ) -> impl PinInit { >> + try_pin_init!( TagSet { >> + // INVARIANT: We initialize `inner` here and it is valid af= ter the >> + // initializer has run. >> + inner <- unsafe {kernel::init::pin_init_from_closure(move |= place: *mut Opaque| -> Result<()> { >> + let place =3D place.cast::(); >> + >> + // SAFETY: pin_init_from_closure promises that `place` = is writable, and >> + // zeroes is a valid bit pattern for this structure. >> + core::ptr::write_bytes(place, 0, 1); >> + >> + /// For a raw pointer to a struct, write a struct field= without >> + /// creating a reference to the field >> + macro_rules! write_ptr_field { >> + ($target:ident, $field:ident, $value:expr) =3D> { >> + ::core::ptr::write(::core::ptr::addr_of_mut!((*= $target).$field), $value) >> + }; >> + } >> + >> + // SAFETY: pin_init_from_closure promises that `place` = is writable >> + write_ptr_field!(place, ops, OperationsVTable:::= :build()); >> + write_ptr_field!(place, nr_hw_queues , nr_hw_queues= ); >> + write_ptr_field!(place, timeout , 0); // 0 means de= fault which is 30 * HZ in C >> + write_ptr_field!(place, numa_node , bindings::NUMA_= NO_NODE); >> + write_ptr_field!(place, queue_depth , num_tags); >> + write_ptr_field!(place, cmd_size , core::mem::size_= of::().try_into()?); >> + write_ptr_field!(place, flags , bindings::BLK_MQ_F_= SHOULD_MERGE); >> + write_ptr_field!(place, driver_data , core::ptr::nu= ll_mut::()); >> + write_ptr_field!(place, nr_maps , num_maps); > > Did something not work with my suggestion? I did not want to change it if we are rewriting it to `Opaque::init` in a cycle or two, which I think is a better solution. Best regards, Andreas