2019-06-07 13:20:18

by Naohiro Aota

[permalink] [raw]
Subject: [PATCH 01/12] btrfs-progs: build: Check zoned block device support

If the kernel supports zoned block devices, the file
/usr/include/linux/blkzoned.h will be present. Check this and define
BTRFS_ZONED if the file is present.

If it present, enables HMZONED feature, if not disable it.

Signed-off-by: Damien Le Moal <[email protected]>
Signed-off-by: Naohiro Aota <[email protected]>
---
configure.ac | 13 +++++++++++++
1 file changed, 13 insertions(+)

diff --git a/configure.ac b/configure.ac
index cf792eb5488b..c637f72a8fe6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -206,6 +206,18 @@ else
AC_DEFINE([HAVE_OWN_FIEMAP_EXTENT_SHARED_DEFINE], [0], [We did not define FIEMAP_EXTENT_SHARED])
fi

+AC_CHECK_HEADER(linux/blkzoned.h, [blkzoned_found=yes], [blkzoned_found=no])
+AC_ARG_ENABLE([zoned],
+ AS_HELP_STRING([--disable-zoned], [disable zoned block device support]),
+ [], [enable_zoned=$blkzoned_found]
+)
+
+AS_IF([test "x$enable_zoned" = xyes], [
+ AC_CHECK_HEADER(linux/blkzoned.h, [],
+ [AC_MSG_ERROR([Couldn't find linux/blkzoned.h])])
+ AC_DEFINE([BTRFS_ZONED], [1], [enable zoned block device support])
+])
+
dnl Define <NAME>_LIBS= and <NAME>_CFLAGS= by pkg-config
dnl
dnl The default PKG_CHECK_MODULES() action-if-not-found is end the
@@ -307,6 +319,7 @@ AC_MSG_RESULT([
btrfs-restore zstd: ${enable_zstd}
Python bindings: ${enable_python}
Python interpreter: ${PYTHON}
+ zoned device: ${enable_zoned}

Type 'make' to compile.
])
--
2.21.0


2019-06-07 13:20:19

by Naohiro Aota

[permalink] [raw]
Subject: [PATCH 11/12] btrfs-progs: device-add: support HMZONED device

This patch check if the target file system is flagged as HMZONED. If it is,
the device to be added is flagged PREP_DEVICE_HMZONED. Also add checks to
prevent mixing non-zoned devices and zoned devices.

Signed-off-by: Naohiro Aota <[email protected]>
---
cmds-device.c | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/cmds-device.c b/cmds-device.c
index e3e30b6d5ded..86ffb1a2a5c2 100644
--- a/cmds-device.c
+++ b/cmds-device.c
@@ -57,6 +57,9 @@ static int cmd_device_add(int argc, char **argv)
int discard = 1;
int force = 0;
int last_dev;
+ int res;
+ int hmzoned;
+ struct btrfs_ioctl_feature_flags feature_flags;

optind = 0;
while (1) {
@@ -92,12 +95,33 @@ static int cmd_device_add(int argc, char **argv)
if (fdmnt < 0)
return 1;

+ res = ioctl(fdmnt, BTRFS_IOC_GET_FEATURES, &feature_flags);
+ if (res) {
+ error("error getting feature flags '%s': %m", mntpnt);
+ return 1;
+ }
+ hmzoned = feature_flags.incompat_flags & BTRFS_FEATURE_INCOMPAT_HMZONED;
+
for (i = optind; i < last_dev; i++){
struct btrfs_ioctl_vol_args ioctl_args;
- int devfd, res;
+ int devfd;
u64 dev_block_count = 0;
char *path;

+ if (hmzoned && zoned_model(argv[i]) == ZONED_NONE) {
+ error("cannot add non-zoned device to HMZONED file system '%s'",
+ argv[i]);
+ ret++;
+ continue;
+ }
+
+ if (!hmzoned && zoned_model(argv[i]) == ZONED_HOST_MANAGED) {
+ error("cannot add host managed zoned device to non-HMZONED file system '%s'",
+ argv[i]);
+ ret++;
+ continue;
+ }
+
res = test_dev_for_mkfs(argv[i], force);
if (res) {
ret++;
@@ -113,7 +137,8 @@ static int cmd_device_add(int argc, char **argv)

res = btrfs_prepare_device(devfd, argv[i], &dev_block_count, 0,
PREP_DEVICE_ZERO_END | PREP_DEVICE_VERBOSE |
- (discard ? PREP_DEVICE_DISCARD : 0));
+ (discard ? PREP_DEVICE_DISCARD : 0) |
+ (hmzoned ? PREP_DEVICE_HMZONED : 0));
close(devfd);
if (res) {
ret++;
--
2.21.0

2019-06-07 13:20:39

by Naohiro Aota

[permalink] [raw]
Subject: [PATCH 08/12] btrfs-progs: volume: align chunk allocation to zones

To facilitate support for zoned block devices in the extent buffer
allocation, a zoned block device chunk is always aligned to a zone of the
device. With this, the zone write pointer location simply becomes a hint to
allocate new buffers.

Signed-off-by: Naohiro Aota <[email protected]>
---
volumes.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 75 insertions(+), 4 deletions(-)

diff --git a/volumes.c b/volumes.c
index f6d1b1e9dc7f..64b42643390b 100644
--- a/volumes.c
+++ b/volumes.c
@@ -399,6 +399,34 @@ int btrfs_scan_one_device(int fd, const char *path,
return ret;
}

+/* zone size is ensured to be power of 2 */
+static u64 btrfs_zone_align(struct btrfs_zone_info *zinfo, u64 val)
+{
+ if (zinfo && zinfo->zone_size)
+ return (val + zinfo->zone_size - 1) & ~(zinfo->zone_size - 1);
+ return val;
+}
+
+static bool check_dev_zone(struct btrfs_zone_info *zinfo, u64 physical,
+ u64 num_bytes)
+{
+ u64 zone_size = zinfo->zone_size;
+ int zone_is_random;
+
+ WARN_ON(!IS_ALIGNED(num_bytes, zone_size));
+ zone_is_random = zone_is_random_write(zinfo, physical);
+
+ while (num_bytes) {
+ if (zone_is_random != zone_is_random_write(zinfo, physical))
+ return false;
+
+ physical += zone_size;
+ num_bytes -= zone_size;
+ }
+
+ return true;
+}
+
/*
* find_free_dev_extent_start - find free space in the specified device
* @device: the device which we search the free space in
@@ -428,6 +456,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
struct btrfs_root *root = device->dev_root;
struct btrfs_dev_extent *dev_extent;
struct btrfs_path *path;
+ struct btrfs_zone_info *zinfo = &device->zinfo;
u64 hole_size;
u64 max_hole_start;
u64 max_hole_size;
@@ -445,6 +474,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
*/
min_search_start = max(root->fs_info->alloc_start, (u64)SZ_1M);
search_start = max(search_start, min_search_start);
+ search_start = btrfs_zone_align(zinfo, search_start);

path = btrfs_alloc_path();
if (!path)
@@ -497,6 +527,18 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
goto next;

if (key.offset > search_start) {
+ if (zinfo && zinfo->zone_size) {
+ while (key.offset > search_start) {
+ hole_size = key.offset - search_start;
+ if (hole_size < num_bytes)
+ break;
+ if (check_dev_zone(zinfo, search_start,
+ num_bytes))
+ break;
+ search_start += zinfo->zone_size;
+ }
+ }
+
hole_size = key.offset - search_start;

/*
@@ -527,7 +569,8 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
extent_end = key.offset + btrfs_dev_extent_length(l,
dev_extent);
if (extent_end > search_start)
- search_start = extent_end;
+ search_start = btrfs_zone_align(&device->zinfo,
+ extent_end);
next:
path->slots[0]++;
cond_resched();
@@ -539,6 +582,18 @@ next:
* search_end may be smaller than search_start.
*/
if (search_end > search_start) {
+ if (zinfo && zinfo->zone_size) {
+ while (search_end > search_start) {
+ hole_size = search_end - search_start;
+ if (hole_size < num_bytes)
+ break;
+ if (check_dev_zone(zinfo, search_start,
+ num_bytes))
+ break;
+ search_start += zinfo->zone_size;
+ }
+ }
+
hole_size = search_end - search_start;

if (hole_size > max_hole_size) {
@@ -582,6 +637,9 @@ int btrfs_insert_dev_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_key key;

+ /* Align to zone for a zoned block device */
+ start = btrfs_zone_align(&device->zinfo, start);
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -1065,9 +1123,15 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
btrfs_super_stripesize(info->super_copy));
}

- /* we don't want a chunk larger than 10% of the FS */
- percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1);
- max_chunk_size = min(percent_max, max_chunk_size);
+ if (info->fs_devices->hmzoned) {
+ /* Zoned mode uses zone aligned chunks */
+ calc_size = info->fs_devices->zone_size;
+ max_chunk_size = calc_size * num_stripes;
+ } else {
+ /* we don't want a chunk larger than 10% of the FS */
+ percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1);
+ max_chunk_size = min(percent_max, max_chunk_size);
+ }

again:
if (chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes) >
@@ -1147,7 +1211,9 @@ again:
*num_bytes = chunk_bytes_by_type(type, calc_size,
num_stripes, sub_stripes);
index = 0;
+ dev_offset = 0;
while(index < num_stripes) {
+ size_t zone_size = device->zinfo.zone_size;
struct btrfs_stripe *stripe;
BUG_ON(list_empty(&private_devs));
cur = private_devs.next;
@@ -1158,11 +1224,16 @@ again:
(index == num_stripes - 1))
list_move_tail(&device->dev_list, dev_list);

+ if (device->zinfo.zone_size)
+ calc_size = device->zinfo.zone_size;
+
ret = btrfs_alloc_dev_extent(trans, device, key.offset,
calc_size, &dev_offset);
if (ret < 0)
goto out_chunk_map;

+ WARN_ON(zone_size && !IS_ALIGNED(dev_offset, zone_size));
+
device->bytes_used += calc_size;
ret = btrfs_update_device(trans, device);
if (ret < 0)
--
2.21.0

2019-06-07 13:21:03

by Naohiro Aota

[permalink] [raw]
Subject: [PATCH 09/12] btrfs-progs: do sequential allocation

Ensures that block allocation in sequential write required zones is always
done sequentially using an allocation pointer which is the zone write
pointer plus the number of blocks already allocated but not yet written.
For conventional zones, the legacy behavior is used.

Signed-off-by: Naohiro Aota <[email protected]>
---
ctree.h | 17 +++++
extent-tree.c | 186 ++++++++++++++++++++++++++++++++++++++++++++++++++
transaction.c | 16 +++++
3 files changed, 219 insertions(+)

diff --git a/ctree.h b/ctree.h
index 9f79686690e0..2e828bf1250e 100644
--- a/ctree.h
+++ b/ctree.h
@@ -1068,15 +1068,32 @@ struct btrfs_space_info {
struct list_head list;
};

+/* Block group allocation types */
+enum btrfs_alloc_type {
+
+ /* Regular first fit allocation */
+ BTRFS_ALLOC_FIT = 0,
+
+ /*
+ * Sequential allocation: this is for HMZONED mode and
+ * will result in ignoring free space before a block
+ * group allocation offset.
+ */
+ BTRFS_ALLOC_SEQ = 1,
+};
+
struct btrfs_block_group_cache {
struct cache_extent cache;
struct btrfs_key key;
struct btrfs_block_group_item item;
struct btrfs_space_info *space_info;
struct btrfs_free_space_ctl *free_space_ctl;
+ enum btrfs_alloc_type alloc_type;
u64 bytes_super;
u64 pinned;
u64 flags;
+ u64 alloc_offset;
+ u64 write_offset;
int cached;
int ro;
/*
diff --git a/extent-tree.c b/extent-tree.c
index e62ee8c2ba13..528c6875c8fb 100644
--- a/extent-tree.c
+++ b/extent-tree.c
@@ -251,6 +251,14 @@ again:
if (cache->ro || !block_group_bits(cache, data))
goto new_group;

+ if (cache->alloc_type == BTRFS_ALLOC_SEQ) {
+ if (cache->key.offset - cache->alloc_offset < num)
+ goto new_group;
+ *start_ret = cache->key.objectid + cache->alloc_offset;
+ cache->alloc_offset += num;
+ return 0;
+ }
+
while(1) {
ret = find_first_extent_bit(&root->fs_info->free_space_cache,
last, &start, &end, EXTENT_DIRTY);
@@ -277,6 +285,7 @@ out:
(unsigned long long)search_start);
return -ENOENT;
}
+ printf("nospace\n");
return -ENOSPC;

new_group:
@@ -3039,6 +3048,176 @@ error:
return ret;
}

+#ifdef BTRFS_ZONED
+static int
+btrfs_get_block_group_alloc_offset(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_device *device;
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ u64 logical = cache->key.objectid;
+ u64 length = cache->key.offset;
+ u64 physical = 0;
+ int ret = 0;
+ int i;
+ u64 zone_size = fs_info->fs_devices->zone_size;
+ u64 *alloc_offsets = NULL;
+
+ if (!btrfs_fs_incompat(fs_info, HMZONED))
+ return 0;
+
+ /* Sanity check */
+ if (!IS_ALIGNED(length, zone_size)) {
+ fprintf(stderr, "unaligned block group at %llu", logical);
+ return -EIO;
+ }
+
+ /* Get the chunk mapping */
+ ce = search_cache_extent(&map_tree->cache_tree, logical);
+ if (!ce) {
+ fprintf(stderr, "failed to find block group at %llu", logical);
+ return -ENOENT;
+ }
+ map = container_of(ce, struct map_lookup, ce);
+
+ /*
+ * Get the zone type: if the group is mapped to a non-sequential zone,
+ * there is no need for the allocation offset (fit allocation is OK).
+ */
+ device = map->stripes[0].dev;
+ physical = map->stripes[0].physical;
+ if (!zone_is_random_write(&device->zinfo, physical))
+ cache->alloc_type = BTRFS_ALLOC_SEQ;
+
+ /* check block group mapping */
+ alloc_offsets = calloc(map->num_stripes, sizeof(*alloc_offsets));
+ for (i = 0; i < map->num_stripes; i++) {
+ int is_sequential;
+ struct blk_zone zone;
+
+ device = map->stripes[i].dev;
+ physical = map->stripes[i].physical;
+
+ is_sequential = !zone_is_random_write(&device->zinfo, physical);
+ if ((is_sequential && cache->alloc_type != BTRFS_ALLOC_SEQ) ||
+ (!is_sequential && cache->alloc_type == BTRFS_ALLOC_SEQ)) {
+ fprintf(stderr,
+ "found block group of mixed zone types");
+ ret = -EIO;
+ goto out;
+ }
+
+ if (!is_sequential)
+ continue;
+
+ WARN_ON(!IS_ALIGNED(physical, zone_size));
+ zone = device->zinfo.zones[physical / zone_size];
+
+ /*
+ * The group is mapped to a sequential zone. Get the zone write
+ * pointer to determine the allocation offset within the zone.
+ */
+ switch (zone.cond) {
+ case BLK_ZONE_COND_OFFLINE:
+ case BLK_ZONE_COND_READONLY:
+ fprintf(stderr, "Offline/readonly zone %llu",
+ physical / fs_info->fs_devices->zone_size);
+ ret = -EIO;
+ goto out;
+ case BLK_ZONE_COND_EMPTY:
+ alloc_offsets[i] = 0;
+ break;
+ case BLK_ZONE_COND_FULL:
+ alloc_offsets[i] = zone_size;
+ break;
+ default:
+ /* Partially used zone */
+ alloc_offsets[i] = ((zone.wp - zone.start) << 9);
+ break;
+ }
+ }
+
+ if (cache->alloc_type != BTRFS_ALLOC_SEQ)
+ goto out;
+
+ switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case 0: /* single */
+ case BTRFS_BLOCK_GROUP_DUP:
+ case BTRFS_BLOCK_GROUP_RAID1:
+ for (i = 1; i < map->num_stripes; i++) {
+ if (alloc_offsets[i] != alloc_offsets[0]) {
+ fprintf(stderr,
+ "zones' write pointers mismatch\n");
+ ret = -EIO;
+ goto out;
+ }
+ }
+ cache->alloc_offset = alloc_offsets[0];
+ break;
+ case BTRFS_BLOCK_GROUP_RAID0:
+ cache->alloc_offset = alloc_offsets[0];
+ for (i = 1; i < map->num_stripes; i++) {
+ cache->alloc_offset += alloc_offsets[i];
+ if (alloc_offsets[0] < alloc_offsets[i]) {
+ fprintf(stderr,
+ "zones' write pointers mismatch\n");
+ ret = -EIO;
+ goto out;
+ }
+ }
+ break;
+ case BTRFS_BLOCK_GROUP_RAID10:
+ cache->alloc_offset = 0;
+ for (i = 0; i < map->num_stripes / map->sub_stripes; i++) {
+ int j;
+ int base;
+
+ base = i*map->sub_stripes;
+ for (j = 1; j < map->sub_stripes; j++) {
+ if (alloc_offsets[base] !=
+ alloc_offsets[base+j]) {
+ fprintf(stderr,
+ "zones' write pointer mismatch\n");
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ if (alloc_offsets[0] < alloc_offsets[base]) {
+ fprintf(stderr,
+ "zones' write pointer mismatch\n");
+ ret = -EIO;
+ goto out;
+ }
+ cache->alloc_offset += alloc_offsets[base];
+ }
+ break;
+ case BTRFS_BLOCK_GROUP_RAID5:
+ case BTRFS_BLOCK_GROUP_RAID6:
+ /* RAID5/6 is not supported yet */
+ default:
+ fprintf(stderr, "Unsupported profile %llu\n",
+ map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ ret = -EINVAL;
+ goto out;
+ }
+
+out:
+ cache->write_offset = cache->alloc_offset;
+ free(alloc_offsets);
+ return ret;
+}
+#else
+static int
+btrfs_get_block_group_alloc_offset(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache)
+{
+ return 0;
+}
+#endif
+
int btrfs_read_block_groups(struct btrfs_root *root)
{
struct btrfs_path *path;
@@ -3122,6 +3301,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
BUG_ON(ret);
cache->space_info = space_info;

+ ret = btrfs_get_block_group_alloc_offset(info, cache);
+ if (ret)
+ goto error;
+
/* use EXTENT_LOCKED to prevent merging */
set_extent_bits(block_group_cache, found_key.objectid,
found_key.objectid + found_key.offset - 1,
@@ -3151,6 +3334,9 @@ btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
cache->key.objectid = chunk_offset;
cache->key.offset = size;

+ ret = btrfs_get_block_group_alloc_offset(fs_info, cache);
+ BUG_ON(ret);
+
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
btrfs_set_block_group_used(&cache->item, bytes_used);
btrfs_set_block_group_chunk_objectid(&cache->item,
diff --git a/transaction.c b/transaction.c
index 138e10f0d6cc..39a52732bc71 100644
--- a/transaction.c
+++ b/transaction.c
@@ -129,16 +129,32 @@ int __commit_transaction(struct btrfs_trans_handle *trans,
{
u64 start;
u64 end;
+ u64 next = 0;
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *eb;
struct extent_io_tree *tree = &fs_info->extent_cache;
+ struct btrfs_block_group_cache *bg = NULL;
int ret;

while(1) {
+again:
ret = find_first_extent_bit(tree, 0, &start, &end,
EXTENT_DIRTY);
if (ret)
break;
+ bg = btrfs_lookup_first_block_group(fs_info, start);
+ BUG_ON(!bg);
+ if (bg->alloc_type == BTRFS_ALLOC_SEQ &&
+ bg->key.objectid + bg->write_offset < start) {
+ next = bg->key.objectid + bg->write_offset;
+ BUG_ON(next + fs_info->nodesize > start);
+ eb = btrfs_find_create_tree_block(fs_info, next);
+ btrfs_mark_buffer_dirty(eb);
+ free_extent_buffer(eb);
+ goto again;
+ }
+ if (bg->alloc_type == BTRFS_ALLOC_SEQ)
+ bg->write_offset += (end + 1 - start);
while(start <= end) {
eb = find_first_extent_buffer(tree, start);
BUG_ON(!eb || eb->start != start);
--
2.21.0

2019-06-07 13:22:04

by Naohiro Aota

[permalink] [raw]
Subject: [PATCH 05/12] btrfs-progs: load and check zone information

This patch checks if a device added to btrfs is a zoned block device. If it
is, load zones information and the zone size for the device.

For a btrfs volume composed of multiple zoned block devices, all devices
must have the same zone size.

Signed-off-by: Naohiro Aota <[email protected]>
---
utils.c | 10 ++++++++++
volumes.c | 18 ++++++++++++++++++
volumes.h | 3 +++
3 files changed, 31 insertions(+)

diff --git a/utils.c b/utils.c
index d50304b1be80..a26fe7a5743c 100644
--- a/utils.c
+++ b/utils.c
@@ -250,6 +250,16 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
goto out;
}

+ ret = btrfs_get_zone_info(fd, path, fs_info->fs_devices->hmzoned,
+ &device->zinfo);
+ if (ret)
+ goto out;
+ if (device->zinfo.zone_size != fs_info->fs_devices->zone_size) {
+ error("Device zone size differ\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
disk_super = (struct btrfs_super_block *)buf;
dev_item = &disk_super->dev_item;

diff --git a/volumes.c b/volumes.c
index 3a91b43b378b..f6d1b1e9dc7f 100644
--- a/volumes.c
+++ b/volumes.c
@@ -168,6 +168,8 @@ static int device_list_add(const char *path,
u64 found_transid = btrfs_super_generation(disk_super);
bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+ int hmzoned = btrfs_super_incompat_flags(disk_super) &
+ BTRFS_FEATURE_INCOMPAT_HMZONED;

if (metadata_uuid)
fs_devices = find_fsid(disk_super->fsid,
@@ -257,6 +259,8 @@ static int device_list_add(const char *path,
if (fs_devices->lowest_devid > devid) {
fs_devices->lowest_devid = devid;
}
+ if (hmzoned)
+ fs_devices->hmzoned = 1;
*fs_devices_ret = fs_devices;
return 0;
}
@@ -327,6 +331,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags)
struct btrfs_device *device;
int ret;

+ fs_devices->zone_size = 0;
+
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (!device->name) {
printk("no name for device %llu, skip it now\n", device->devid);
@@ -350,6 +356,18 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags)
device->fd = fd;
if (flags & O_RDWR)
device->writeable = 1;
+
+ ret = btrfs_get_zone_info(fd, device->name, fs_devices->hmzoned,
+ &device->zinfo);
+ if (ret != 0)
+ goto fail;
+ if (!fs_devices->zone_size) {
+ fs_devices->zone_size = device->zinfo.zone_size;
+ } else if (device->zinfo.zone_size != fs_devices->zone_size) {
+ fprintf(stderr, "Device zone size differ\n");
+ ret = -EINVAL;
+ goto fail;
+ }
}
return 0;
fail:
diff --git a/volumes.h b/volumes.h
index c9262ceaea93..6ec83fe43cfe 100644
--- a/volumes.h
+++ b/volumes.h
@@ -115,6 +115,9 @@ struct btrfs_fs_devices {

int seeding;
struct btrfs_fs_devices *seed;
+
+ u64 zone_size;
+ unsigned int hmzoned:1;
};

struct btrfs_bio_stripe {
--
2.21.0

2019-06-07 14:00:01

by Naohiro Aota

[permalink] [raw]
Subject: [PATCH 02/12] btrfs-progs: utils: Introduce queue_param

Introduce the queue_param function to get a device request queue
parameter and this function to test if the device is an SSD in
is_ssd().

Signed-off-by: Damien Le Moal <[email protected]>
[Naohiro] fixed error return value
Signed-off-by: Naohiro Aota <[email protected]>
---
mkfs/main.c | 40 ++--------------------------------------
utils.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
utils.h | 1 +
3 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/mkfs/main.c b/mkfs/main.c
index b442e6e40c37..93c0b71c864e 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -404,49 +404,13 @@ static int zero_output_file(int out_fd, u64 size)

static int is_ssd(const char *file)
{
- blkid_probe probe;
- char wholedisk[PATH_MAX];
- char sysfs_path[PATH_MAX];
- dev_t devno;
- int fd;
char rotational;
int ret;

- probe = blkid_new_probe_from_filename(file);
- if (!probe)
+ ret = queue_param(file, "rotational", &rotational, 1);
+ if (ret < 1)
return 0;

- /* Device number of this disk (possibly a partition) */
- devno = blkid_probe_get_devno(probe);
- if (!devno) {
- blkid_free_probe(probe);
- return 0;
- }
-
- /* Get whole disk name (not full path) for this devno */
- ret = blkid_devno_to_wholedisk(devno,
- wholedisk, sizeof(wholedisk), NULL);
- if (ret) {
- blkid_free_probe(probe);
- return 0;
- }
-
- snprintf(sysfs_path, PATH_MAX, "/sys/block/%s/queue/rotational",
- wholedisk);
-
- blkid_free_probe(probe);
-
- fd = open(sysfs_path, O_RDONLY);
- if (fd < 0) {
- return 0;
- }
-
- if (read(fd, &rotational, 1) < 1) {
- close(fd);
- return 0;
- }
- close(fd);
-
return rotational == '0';
}

diff --git a/utils.c b/utils.c
index c6cdc8f01dc1..7d5a1f3b7f8d 100644
--- a/utils.c
+++ b/utils.c
@@ -65,6 +65,52 @@ static unsigned short rand_seed[3];

struct btrfs_config bconf;

+/*
+ * Get a device request queue parameter.
+ */
+int queue_param(const char *file, const char *param, char *buf, size_t len)
+{
+ blkid_probe probe;
+ char wholedisk[PATH_MAX];
+ char sysfs_path[PATH_MAX];
+ dev_t devno;
+ int fd;
+ int ret;
+
+ probe = blkid_new_probe_from_filename(file);
+ if (!probe)
+ return 0;
+
+ /* Device number of this disk (possibly a partition) */
+ devno = blkid_probe_get_devno(probe);
+ if (!devno) {
+ blkid_free_probe(probe);
+ return 0;
+ }
+
+ /* Get whole disk name (not full path) for this devno */
+ ret = blkid_devno_to_wholedisk(devno,
+ wholedisk, sizeof(wholedisk), NULL);
+ if (ret) {
+ blkid_free_probe(probe);
+ return 0;
+ }
+
+ snprintf(sysfs_path, PATH_MAX, "/sys/block/%s/queue/%s",
+ wholedisk, param);
+
+ blkid_free_probe(probe);
+
+ fd = open(sysfs_path, O_RDONLY);
+ if (fd < 0)
+ return 0;
+
+ len = read(fd, buf, len);
+ close(fd);
+
+ return len;
+}
+
/*
* Discard the given range in one go
*/
diff --git a/utils.h b/utils.h
index 7c5eb798557d..47321f62c8e0 100644
--- a/utils.h
+++ b/utils.h
@@ -121,6 +121,7 @@ int get_label(const char *btrfs_dev, char *label);
int set_label(const char *btrfs_dev, const char *label);

char *__strncpy_null(char *dest, const char *src, size_t n);
+int queue_param(const char *file, const char *param, char *buf, size_t len);
int is_block_device(const char *file);
int is_mount_point(const char *file);
int is_path_exist(const char *file);
--
2.21.0