Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752289AbdHCHuA (ORCPT ); Thu, 3 Aug 2017 03:50:00 -0400 Received: from mail-pg0-f67.google.com ([74.125.83.67]:36636 "EHLO mail-pg0-f67.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752265AbdHCHt4 (ORCPT ); Thu, 3 Aug 2017 03:49:56 -0400 From: Steven Swanson X-Google-Original-From: Steven Swanson Subject: [RFC 15/16] NOVA: Performance measurement To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org Cc: Steven Swanson , dan.j.williams@intel.com Date: Thu, 03 Aug 2017 00:49:53 -0700 Message-ID: <150174659344.104003.4768103912078807362.stgit@hn> In-Reply-To: <150174646416.104003.14042713459553361884.stgit@hn> References: <150174646416.104003.14042713459553361884.stgit@hn> User-Agent: StGit/0.17.1-27-g0d46-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 44427 Lines: 1629 Signed-off-by: Steven Swanson --- fs/nova/perf.c | 594 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nova/perf.h | 96 ++++++++ fs/nova/stats.c | 685 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nova/stats.h | 218 ++++++++++++++++++ 4 files changed, 1593 insertions(+) create mode 100644 fs/nova/perf.c create mode 100644 fs/nova/perf.h create mode 100644 fs/nova/stats.c create mode 100644 fs/nova/stats.h diff --git a/fs/nova/perf.c b/fs/nova/perf.c new file mode 100644 index 000000000000..35a4c6a490c3 --- /dev/null +++ b/fs/nova/perf.c @@ -0,0 +1,594 @@ +/* + * BRIEF DESCRIPTION + * + * Performance test routines + * + * Copyright 2015-2016 Regents of the University of California, + * UCSD Non-Volatile Systems Lab, Andiry Xu + * Copyright 2012-2013 Intel Corporation + * Copyright 2009-2011 Marco Stornelli + * Copyright 2003 Sony Corporation + * Copyright 2003 Matsushita Electric Industrial Co., Ltd. + * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam + * + * This program is free software; you can redistribute it and/or modify it + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include "perf.h" + +/* normal memcpy functions */ +static int memcpy_read_call(char *dst, char *src, size_t off, size_t size) +{ + /* pin dst address to cache most writes, if size fits */ + memcpy(dst, src + off, size); + return 0; +} + +static int memcpy_write_call(char *dst, char *src, size_t off, size_t size) +{ + /* pin src address to cache most reads, if size fits */ + memcpy(dst + off, src, size); + return 0; +} + +static int memcpy_bidir_call(char *dst, char *src, size_t off, size_t size) +{ + /* minimize caching by forwarding both src and dst */ + memcpy(dst + off, src + off, size); + return 0; +} + +static const memcpy_call_t memcpy_calls[] = { + /* order should match enum memcpy_call_id */ + { "memcpy (mostly read)", memcpy_read_call }, + { "memcpy (mostly write)", memcpy_write_call }, + { "memcpy (read write)", memcpy_bidir_call } +}; + +/* copy from pmem functions */ +static int from_pmem_call(char *dst, char *src, size_t off, size_t size) +{ + /* pin dst address to cache most writes, if size fits */ + /* src address should point to pmem */ + memcpy_mcsafe(dst, src + off, size); + return 0; +} + +static const memcpy_call_t from_pmem_calls[] = { + /* order should match enum from_pmem_call_id */ + { "memcpy_mcsafe", from_pmem_call } +}; + +/* copy to pmem functions */ +static int to_pmem_nocache_call(char *dst, char *src, size_t off, size_t size) +{ + /* pin src address to cache most reads, if size fits */ + /* dst address should point to pmem */ + memcpy_to_pmem_nocache(dst + off, src, size); + return 0; +} + +static int to_flush_call(char *dst, char *src, size_t off, size_t size) +{ + /* pin src address to cache most reads, if size fits */ + /* dst address should point to pmem */ + nova_flush_buffer(dst + off, size, 0); + return 0; +} + +static int to_pmem_flush_call(char *dst, char *src, size_t off, size_t size) +{ + /* pin src address to cache most reads, if size fits */ + /* dst address should point to pmem */ + memcpy(dst + off, src, size); + nova_flush_buffer(dst + off, size, 0); + return 0; +} + +static const memcpy_call_t to_pmem_calls[] = { + /* order should match enum to_pmem_call_id */ + { "memcpy_to_pmem_nocache", to_pmem_nocache_call }, + { "flush buffer", to_flush_call }, + { "memcpy + flush buffer", to_pmem_flush_call } +}; + +/* checksum functions */ +static u64 zlib_adler32_call(u64 init, char *data, size_t size) +{ + u64 csum; + + /* include/linux/zutil.h */ + csum = zlib_adler32(init, data, size); + return csum; +} + +static u64 nd_fletcher64_call(u64 init, char *data, size_t size) +{ + u64 csum; + + /* drivers/nvdimm/core.c */ + csum = nd_fletcher64(data, size, 1); + return csum; +} + +static u64 libcrc32c_call(u64 init, char *data, size_t size) +{ + u32 crc = (u32) init; + + crc = crc32c(crc, data, size); + return (u64) crc; +} + +static u64 nova_crc32c_call(u64 init, char *data, size_t size) +{ + u32 crc = (u32) init; + + crc = nova_crc32c(crc, data, size); + return (u64) crc; +} + +static u64 plain_xor64_call(u64 init, char *data, size_t size) +{ + u64 csum = init; + u64 *word = (u64 *) data; + + while (size > 8) { + csum ^= *word; + word += 1; + size -= 8; + } + + /* for perf testing ignore trailing bytes, if any */ + + return csum; +} + +static const checksum_call_t checksum_calls[] = { + /* order should match enum checksum_call_id */ + { "zlib_adler32", zlib_adler32_call }, + { "nd_fletcher64", nd_fletcher64_call }, + { "libcrc32c", libcrc32c_call }, + { "nova_crc32c", nova_crc32c_call }, + { "plain_xor64", plain_xor64_call } +}; + +/* raid5 functions */ +static u64 nova_block_parity_call(char **data, char *parity, + size_t size, int disks) +{ + int i, j, strp, num_strps = disks; + size_t strp_size = size; + char *block = *data; + u64 xor; + + /* FIXME: using same code as in parity.c; need a way to reuse that */ + + if (static_cpu_has(X86_FEATURE_XMM2)) { // sse2 128b + for (i = 0; i < strp_size; i += 16) { + asm volatile("movdqa %0, %%xmm0" : : "m" (block[i])); + for (strp = 1; strp < num_strps; strp++) { + j = strp * strp_size + i; + asm volatile( + "movdqa %0, %%xmm1\n" + "pxor %%xmm1, %%xmm0\n" + : : "m" (block[j]) + ); + } + asm volatile("movntdq %%xmm0, %0" : "=m" (parity[i])); + } + } else { // common 64b + for (i = 0; i < strp_size; i += 8) { + xor = *((u64 *) &block[i]); + for (strp = 1; strp < num_strps; strp++) { + j = strp * strp_size + i; + xor ^= *((u64 *) &block[j]); + } + *((u64 *) &parity[i]) = xor; + } + } + + return *((u64 *) parity); +} + +static u64 nova_block_csum_parity_call(char **data, char *parity, + size_t size, int disks) +{ + int i; + size_t strp_size = size; + char *block = *data; + u32 volatile crc[8]; // avoid results being optimized out + u64 qwd[8]; + u64 acc[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + + /* FIXME: using same code as in parity.c; need a way to reuse that */ + + for (i = 0; i < strp_size / 8; i++) { + qwd[0] = *((u64 *) (block)); + qwd[1] = *((u64 *) (block + 1 * strp_size)); + qwd[2] = *((u64 *) (block + 2 * strp_size)); + qwd[3] = *((u64 *) (block + 3 * strp_size)); + qwd[4] = *((u64 *) (block + 4 * strp_size)); + qwd[5] = *((u64 *) (block + 5 * strp_size)); + qwd[6] = *((u64 *) (block + 6 * strp_size)); + qwd[7] = *((u64 *) (block + 7 * strp_size)); + + // if (data_csum > 0 && unroll_csum) { + nova_crc32c_qword(qwd[0], acc[0]); + nova_crc32c_qword(qwd[1], acc[1]); + nova_crc32c_qword(qwd[2], acc[2]); + nova_crc32c_qword(qwd[3], acc[3]); + nova_crc32c_qword(qwd[4], acc[4]); + nova_crc32c_qword(qwd[5], acc[5]); + nova_crc32c_qword(qwd[6], acc[6]); + nova_crc32c_qword(qwd[7], acc[7]); + // } + + // if (data_parity > 0) { + parity[i] = qwd[0] ^ qwd[1] ^ qwd[2] ^ qwd[3] ^ + qwd[4] ^ qwd[5] ^ qwd[6] ^ qwd[7]; + // } + + block += 8; + } + // if (data_csum > 0 && unroll_csum) { + crc[0] = cpu_to_le32((u32) acc[0]); + crc[1] = cpu_to_le32((u32) acc[1]); + crc[2] = cpu_to_le32((u32) acc[2]); + crc[3] = cpu_to_le32((u32) acc[3]); + crc[4] = cpu_to_le32((u32) acc[4]); + crc[5] = cpu_to_le32((u32) acc[5]); + crc[6] = cpu_to_le32((u32) acc[6]); + crc[7] = cpu_to_le32((u32) acc[7]); + // } + + return *((u64 *) parity); +} + +#if 0 // some test machines do not have this function (need CONFIG_MD_RAID456) +static u64 xor_blocks_call(char **data, char *parity, + size_t size, int disks) +{ + int xor_cnt, disk_id; + + memcpy(parity, data[0], size); /* init parity with the first disk */ + disks--; + disk_id = 1; + while (disks > 0) { + /* each xor_blocks call can do at most MAX_XOR_BLOCKS (4) */ + xor_cnt = min(disks, MAX_XOR_BLOCKS); + /* crypto/xor.c, used in lib/raid6 and fs/btrfs */ + xor_blocks(xor_cnt, size, parity, (void **)(data + disk_id)); + + disks -= xor_cnt; + disk_id += xor_cnt; + } + + return *((u64 *) parity); +} +#endif + +static const raid5_call_t raid5_calls[] = { + /* order should match enum raid5_call_id */ + { "nova_block_parity", nova_block_parity_call }, + { "nova_block_csum_parity", nova_block_csum_parity_call }, +// { "xor_blocks", xor_blocks_call }, +}; + +/* memory pools for perf testing */ +static void *nova_alloc_vmem_pool(size_t poolsize) +{ + void *pool = vmalloc(poolsize); + + if (pool == NULL) + return NULL; + + /* init pool to verify some checksum results */ + // memset(pool, 0xAC, poolsize); + + /* to have a clean start, flush the data cache for the given virtual + * address range in the vmap area + */ + flush_kernel_vmap_range(pool, poolsize); + + return pool; +} + +static void nova_free_vmem_pool(void *pool) +{ + if (pool != NULL) + vfree(pool); +} + +static void *nova_alloc_pmem_pool(struct super_block *sb, + struct nova_inode_info_header *sih, int cpu, size_t poolsize, + unsigned long *blocknr, int *allocated) +{ + int num; + void *pool; + size_t blocksize, blockoff; + u8 blocktype = NOVA_BLOCK_TYPE_4K; + + blocksize = blk_type_to_size[blocktype]; + num = poolsize / blocksize; + if (poolsize % blocksize) + num++; + + sih->ino = NOVA_TEST_PERF_INO; + sih->i_blk_type = blocktype; + sih->log_head = 0; + sih->log_tail = 0; + + *allocated = nova_new_data_blocks(sb, sih, blocknr, 0, num, + ALLOC_NO_INIT, cpu, ALLOC_FROM_HEAD); + if (*allocated < num) { + nova_dbg("%s: allocated pmem blocks %d < requested blocks %d\n", + __func__, *allocated, num); + if (*allocated > 0) + nova_free_data_blocks(sb, sih, *blocknr, *allocated); + + return NULL; + } + + blockoff = nova_get_block_off(sb, *blocknr, blocktype); + pool = nova_get_block(sb, blockoff); + + return pool; +} + +static void nova_free_pmem_pool(struct super_block *sb, + struct nova_inode_info_header *sih, char **pmem, + unsigned long blocknr, int num) +{ + if (num > 0) + nova_free_data_blocks(sb, sih, blocknr, num); + *pmem = NULL; +} + +static int nova_test_func_perf(struct super_block *sb, unsigned int func_id, + size_t poolsize, size_t size, unsigned int disks) +{ + u64 csum = 12345, xor = 0; + + u64 volatile result; // avoid results being optimized out + const char *fname = NULL; + char *src = NULL, *dst = NULL, *pmem = NULL; + char **data = NULL, *parity; + size_t off = 0; + int cpu, i, j, reps, err = 0, allocated = 0; + unsigned int call_id = 0, call_gid = 0; + unsigned long blocknr = 0, nsec, lat, thru; + struct nova_inode_info_header perf_sih; + const memcpy_call_t *fmemcpy = NULL; + const checksum_call_t *fchecksum = NULL; + const raid5_call_t *fraid5 = NULL; + timing_t perf_time; + + cpu = get_cpu(); /* get cpu id and disable preemption */ + reps = poolsize / size; /* raid calls will adjust this number */ + call_id = func_id - 1; /* individual function id starting from 1 */ + + /* normal memcpy */ + if (call_id < NUM_MEMCPY_CALLS) { + src = nova_alloc_vmem_pool(poolsize); + dst = nova_alloc_vmem_pool(poolsize); + if (src == NULL || dst == NULL) { + err = -ENOMEM; + goto out; + } + + fmemcpy = &memcpy_calls[call_id]; + fname = fmemcpy->name; + call_gid = memcpy_gid; + + goto test; + } + call_id -= NUM_MEMCPY_CALLS; + + /* memcpy from pmem */ + if (call_id < NUM_FROM_PMEM_CALLS) { + pmem = nova_alloc_pmem_pool(sb, &perf_sih, cpu, poolsize, + &blocknr, &allocated); + dst = nova_alloc_vmem_pool(poolsize); + if (pmem == NULL || dst == NULL) { + err = -ENOMEM; + goto out; + } + + fmemcpy = &from_pmem_calls[call_id]; + fname = fmemcpy->name; + call_gid = from_pmem_gid; + + goto test; + } + call_id -= NUM_FROM_PMEM_CALLS; + + /* memcpy to pmem */ + if (call_id < NUM_TO_PMEM_CALLS) { + src = nova_alloc_vmem_pool(poolsize); + pmem = nova_alloc_pmem_pool(sb, &perf_sih, cpu, poolsize, + &blocknr, &allocated); + if (src == NULL || pmem == NULL) { + err = -ENOMEM; + goto out; + } + + fmemcpy = &to_pmem_calls[call_id]; + fname = fmemcpy->name; + call_gid = to_pmem_gid; + + goto test; + } + call_id -= NUM_TO_PMEM_CALLS; + + /* checksum */ + if (call_id < NUM_CHECKSUM_CALLS) { + src = nova_alloc_vmem_pool(poolsize); + + fchecksum = &checksum_calls[call_id]; + fname = fchecksum->name; + call_gid = checksum_gid; + + goto test; + } + call_id -= NUM_CHECKSUM_CALLS; + + /* raid5 */ + if (call_id < NUM_RAID5_CALLS) { + src = nova_alloc_vmem_pool(poolsize); + data = kcalloc(disks, sizeof(char *), GFP_NOFS); + if (data == NULL) { + err = -ENOMEM; + goto out; + } + + reps = poolsize / ((disks + 1) * size); /* +1 for parity */ + + fraid5 = &raid5_calls[call_id]; + fname = fraid5->name; + call_gid = raid5_gid; + + if (call_id == nova_block_csum_parity_id && disks != 8) { + nova_dbg("%s only for 8 disks, skip testing\n", fname); + goto out; + } + + goto test; + } + call_id -= NUM_RAID5_CALLS; + + /* continue with the next call group */ + +test: + if (fmemcpy == NULL && fchecksum == NULL && fraid5 == NULL) { + nova_dbg("%s: function struct error\n", __func__); + err = -EFAULT; + goto out; + } + + reset_perf_timer(); + NOVA_START_TIMING(perf_t, perf_time); + + switch (call_gid) { + case memcpy_gid: + for (i = 0; i < reps; i++, off += size) + err = fmemcpy->call(dst, src, off, size); + break; + case from_pmem_gid: + for (i = 0; i < reps; i++, off += size) + err = fmemcpy->call(dst, pmem, off, size); + break; + case to_pmem_gid: + nova_memunlock_range(sb, pmem, poolsize); + for (i = 0; i < reps; i++, off += size) + err = fmemcpy->call(pmem, src, off, size); + nova_memlock_range(sb, pmem, poolsize); + break; + case checksum_gid: + for (i = 0; i < reps; i++, off += size) + /* checksum calls are memory-read intensive */ + csum = fchecksum->call(csum, src + off, size); + result = csum; + break; + case raid5_gid: + for (i = 0; i < reps; i++, off += (disks + 1) * size) { + for (j = 0; j < disks; j++) + data[j] = &src[off + j * size]; + parity = src + off + disks * size; + xor = fraid5->call(data, parity, size, disks); + } + result = xor; + break; + default: + nova_dbg("%s: invalid function group %d\n", __func__, call_gid); + break; + } + + NOVA_END_TIMING(perf_t, perf_time); + nsec = read_perf_timer(); + + // nova_info("checksum value: 0x%016llx\n", csum); + + lat = (err) ? 0 : nsec / reps; + if (call_gid == raid5_gid) + thru = (err) ? 0 : mb_per_sec(reps * disks * size, nsec); + else + thru = (err) ? 0 : mb_per_sec(reps * size, nsec); + + if (cpu != smp_processor_id()) /* scheduling shouldn't happen */ + nova_dbg("cpu was %d, now %d\n", cpu, smp_processor_id()); + + nova_info("%4u %25s %4u %8lu %8lu\n", func_id, fname, cpu, lat, thru); + +out: + nova_free_vmem_pool(src); + nova_free_vmem_pool(dst); + nova_free_pmem_pool(sb, &perf_sih, &pmem, blocknr, allocated); + + if (data != NULL) + kfree(data); + + put_cpu(); /* enable preemption */ + + if (err) + nova_dbg("%s: performance test aborted\n", __func__); + return err; +} + +int nova_test_perf(struct super_block *sb, unsigned int func_id, + unsigned int poolmb, size_t size, unsigned int disks) +{ + int id, ret = 0; + size_t poolsize = poolmb * 1024 * 1024; + + if (!measure_timing) { + nova_dbg("%s: measure_timing not set!\n", __func__); + ret = -EFAULT; + goto out; + } + if (func_id > NUM_PERF_CALLS) { + nova_dbg("%s: invalid function id %d!\n", __func__, func_id); + ret = -EFAULT; + goto out; + } + if (poolmb < 1 || 1024 < poolmb) { /* limit pool size to 1GB */ + nova_dbg("%s: invalid pool size %u MB!\n", __func__, poolmb); + ret = -EFAULT; + goto out; + } + if (size < 64 || poolsize < size || (size % 64)) { + nova_dbg("%s: invalid data size %zu!\n", __func__, size); + ret = -EFAULT; + goto out; + } + if (disks < 1 || 32 < disks) { /* limit number of disks */ + nova_dbg("%s: invalid disk count %u!\n", __func__, disks); + ret = -EFAULT; + goto out; + } + + nova_info("test function performance\n"); + nova_info("pool size %u MB, work size %zu, disks %u\n", + poolmb, size, disks); + + nova_info("%4s %25s %4s %8s %8s\n", "id", "name", "cpu", "ns", "MB/s"); + nova_info("-------------------------------------------------------\n"); + if (func_id == 0) { + /* individual function id starting from 1 */ + for (id = 1; id <= NUM_PERF_CALLS; id++) { + ret = nova_test_func_perf(sb, id, poolsize, + size, disks); + if (ret < 0) + goto out; + } + } else { + ret = nova_test_func_perf(sb, func_id, poolsize, size, disks); + } + nova_info("-------------------------------------------------------\n"); + +out: + return ret; +} diff --git a/fs/nova/perf.h b/fs/nova/perf.h new file mode 100644 index 000000000000..94bee4674f2e --- /dev/null +++ b/fs/nova/perf.h @@ -0,0 +1,96 @@ +/* + * BRIEF DESCRIPTION + * + * Performance test + * + * Copyright 2015-2016 Regents of the University of California, + * UCSD Non-Volatile Systems Lab, Andiry Xu + * Copyright 2012-2013 Intel Corporation + * Copyright 2009-2011 Marco Stornelli + * Copyright 2003 Sony Corporation + * Copyright 2003 Matsushita Electric Industrial Co., Ltd. + * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam + * + * This program is free software; you can redistribute it and/or modify it + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include "nova.h" + +#define reset_perf_timer() __this_cpu_write(Timingstats_percpu[perf_t], 0) +#define read_perf_timer() __this_cpu_read(Timingstats_percpu[perf_t]) + +#define mb_per_sec(size, nsec) (nsec == 0 ? 0 : \ + (size * (1000000000 / 1024 / 1024) / nsec)) + +enum memcpy_call_id { + memcpy_read_id = 0, + memcpy_write_id, + memcpy_bidir_id, + NUM_MEMCPY_CALLS +}; + +enum from_pmem_call_id { + memcpy_mcsafe_id = 0, + NUM_FROM_PMEM_CALLS +}; + +enum to_pmem_call_id { + memcpy_to_pmem_nocache_id = 0, + flush_buffer_id, + memcpy_to_pmem_flush_id, + NUM_TO_PMEM_CALLS +}; + +enum checksum_call_id { + zlib_adler32_id = 0, + nd_fletcher64_id, + libcrc32c_id, + nova_crc32c_id, + plain_xor64_id, + NUM_CHECKSUM_CALLS +}; + +enum raid5_call_id { + nova_block_parity_id = 0, + nova_block_csum_parity_id, +// xor_blocks_id, + NUM_RAID5_CALLS +}; + +#define NUM_PERF_CALLS \ + (NUM_MEMCPY_CALLS + NUM_FROM_PMEM_CALLS + NUM_TO_PMEM_CALLS + \ + NUM_CHECKSUM_CALLS + NUM_RAID5_CALLS) + +enum call_group_id { + memcpy_gid = 0, + from_pmem_gid, + to_pmem_gid, + checksum_gid, + raid5_gid +}; + +typedef struct { + const char *name; /* name of this call */ +// int (*valid)(void); /* might need for availability check */ + int (*call)(char *, char *, size_t, size_t); /* dst, src, off, size */ +} memcpy_call_t; + +typedef struct { + const char *name; /* name of this call */ +// int (*valid)(void); /* might need for availability check */ + u64 (*call)(u64, char *, size_t); /* init, data, size */ +} checksum_call_t; + +typedef struct { + const char *name; /* name of this call */ +// int (*valid)(void); /* might need for availability check */ + u64 (*call)(char **, char *, /* data, parity */ + size_t, int); /* per-disk-size, data disks */ +} raid5_call_t; diff --git a/fs/nova/stats.c b/fs/nova/stats.c new file mode 100644 index 000000000000..cacf76f0d16d --- /dev/null +++ b/fs/nova/stats.c @@ -0,0 +1,685 @@ +/* + * NOVA File System statistics + * + * Copyright 2015-2016 Regents of the University of California, + * UCSD Non-Volatile Systems Lab, Andiry Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "nova.h" + +const char *Timingstring[TIMING_NUM] = { + /* Init */ + "================ Initialization ================", + "init", + "mount", + "ioremap", + "new_init", + "recovery", + + /* Namei operations */ + "============= Directory operations =============", + "create", + "lookup", + "link", + "unlink", + "symlink", + "mkdir", + "rmdir", + "mknod", + "rename", + "readdir", + "add_dentry", + "remove_dentry", + "setattr", + "setsize", + + /* I/O operations */ + "================ I/O operations ================", + "dax_read", + "cow_write", + "inplace_write", + "copy_to_nvmm", + "dax_get_block", + "read_iter", + "write_iter", + + /* Memory operations */ + "============== Memory operations ===============", + "memcpy_read_nvmm", + "memcpy_write_nvmm", + "memcpy_write_back_to_nvmm", + "handle_partial_block", + + /* Memory management */ + "============== Memory management ===============", + "alloc_blocks", + "new_data_blocks", + "new_log_blocks", + "free_blocks", + "free_data_blocks", + "free_log_blocks", + + /* Transaction */ + "================= Transaction ==================", + "transaction_new_inode", + "transaction_link_change", + "update_tail", + + /* Logging */ + "============= Logging operations ===============", + "append_dir_entry", + "append_file_entry", + "append_mmap_entry", + "append_link_change", + "append_setattr", + "append_snapshot_info", + "inplace_update_entry", + + /* Tree */ + "=============== Tree operations ================", + "checking_entry", + "assign_blocks", + + /* GC */ + "============= Garbage collection ===============", + "log_fast_gc", + "log_thorough_gc", + "check_invalid_log", + + /* Integrity */ + "============ Integrity operations ==============", + "block_csum", + "block_parity", + "block_csum_parity", + "protect_memcpy", + "protect_file_data", + "verify_entry_csum", + "verify_data_csum", + "calc_entry_csum", + "restore_file_data", + "reset_mapping", + "reset_vma", + + /* Others */ + "================ Miscellaneous =================", + "find_cache_page", + "fsync", + "write_pages", + "fallocate", + "direct_IO", + "free_old_entry", + "delete_file_tree", + "delete_dir_tree", + "new_vfs_inode", + "new_nova_inode", + "free_inode", + "free_inode_log", + "evict_inode", + "test_perf", + "wprotect", + + /* Mmap */ + "=============== MMap operations ================", + "mmap_page_fault", + "mmap_pmd_fault", + "mmap_pfn_mkwrite", + "insert_vma", + "remove_vma", + "set_vma_readonly", + "mmap_cow", + "udpate_mapping", + "udpate_pfn", + "mmap_handler", + + /* Rebuild */ + "=================== Rebuild ====================", + "rebuild_dir", + "rebuild_file", + "rebuild_snapshot_table", + + /* Snapshot */ + "=================== Snapshot ===================", + "create_snapshot", + "init_snapshot_info", + "delete_snapshot", + "append_snapshot_filedata", + "append_snapshot_inode", +}; + +u64 Timingstats[TIMING_NUM]; +DEFINE_PER_CPU(u64[TIMING_NUM], Timingstats_percpu); +u64 Countstats[TIMING_NUM]; +DEFINE_PER_CPU(u64[TIMING_NUM], Countstats_percpu); +u64 IOstats[STATS_NUM]; +DEFINE_PER_CPU(u64[STATS_NUM], IOstats_percpu); + +static void nova_print_alloc_stats(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct free_list *free_list; + unsigned long alloc_log_count = 0; + unsigned long alloc_log_pages = 0; + unsigned long alloc_data_count = 0; + unsigned long alloc_data_pages = 0; + unsigned long free_log_count = 0; + unsigned long freed_log_pages = 0; + unsigned long free_data_count = 0; + unsigned long freed_data_pages = 0; + int i; + + nova_info("=========== NOVA allocation stats ===========\n"); + nova_info("Alloc %llu, alloc steps %llu, average %llu\n", + Countstats[new_data_blocks_t], IOstats[alloc_steps], + Countstats[new_data_blocks_t] ? + IOstats[alloc_steps] / Countstats[new_data_blocks_t] + : 0); + nova_info("Free %llu\n", Countstats[free_data_t]); + nova_info("Fast GC %llu, check pages %llu, free pages %llu, average %llu\n", + Countstats[fast_gc_t], IOstats[fast_checked_pages], + IOstats[fast_gc_pages], Countstats[fast_gc_t] ? + IOstats[fast_gc_pages] / Countstats[fast_gc_t] : 0); + nova_info("Thorough GC %llu, checked pages %llu, free pages %llu, average %llu\n", + Countstats[thorough_gc_t], + IOstats[thorough_checked_pages], IOstats[thorough_gc_pages], + Countstats[thorough_gc_t] ? + IOstats[thorough_gc_pages] / Countstats[thorough_gc_t] + : 0); + + for (i = 0; i < sbi->cpus; i++) { + free_list = nova_get_free_list(sb, i); + + alloc_log_count += free_list->alloc_log_count; + alloc_log_pages += free_list->alloc_log_pages; + alloc_data_count += free_list->alloc_data_count; + alloc_data_pages += free_list->alloc_data_pages; + free_log_count += free_list->free_log_count; + freed_log_pages += free_list->freed_log_pages; + free_data_count += free_list->free_data_count; + freed_data_pages += free_list->freed_data_pages; + } + + nova_info("alloc log count %lu, allocated log pages %lu, alloc data count %lu, allocated data pages %lu, free log count %lu, freed log pages %lu, free data count %lu, freed data pages %lu\n", + alloc_log_count, alloc_log_pages, + alloc_data_count, alloc_data_pages, + free_log_count, freed_log_pages, + free_data_count, freed_data_pages); +} + +static void nova_print_IO_stats(struct super_block *sb) +{ + nova_info("=========== NOVA I/O stats ===========\n"); + nova_info("Read %llu, bytes %llu, average %llu\n", + Countstats[dax_read_t], IOstats[read_bytes], + Countstats[dax_read_t] ? + IOstats[read_bytes] / Countstats[dax_read_t] : 0); + nova_info("COW write %llu, bytes %llu, average %llu, write breaks %llu, average %llu\n", + Countstats[cow_write_t], IOstats[cow_write_bytes], + Countstats[cow_write_t] ? + IOstats[cow_write_bytes] / Countstats[cow_write_t] : 0, + IOstats[cow_write_breaks], Countstats[cow_write_t] ? + IOstats[cow_write_breaks] / Countstats[cow_write_t] + : 0); + nova_info("Inplace write %llu, bytes %llu, average %llu, write breaks %llu, average %llu\n", + Countstats[inplace_write_t], IOstats[inplace_write_bytes], + Countstats[inplace_write_t] ? + IOstats[inplace_write_bytes] / + Countstats[inplace_write_t] : 0, + IOstats[inplace_write_breaks], Countstats[inplace_write_t] ? + IOstats[inplace_write_breaks] / + Countstats[inplace_write_t] : 0); +} + +void nova_get_timing_stats(void) +{ + int i; + int cpu; + + for (i = 0; i < TIMING_NUM; i++) { + Timingstats[i] = 0; + Countstats[i] = 0; + for_each_possible_cpu(cpu) { + Timingstats[i] += per_cpu(Timingstats_percpu[i], cpu); + Countstats[i] += per_cpu(Countstats_percpu[i], cpu); + } + } +} + +void nova_get_IO_stats(void) +{ + int i; + int cpu; + + for (i = 0; i < STATS_NUM; i++) { + IOstats[i] = 0; + for_each_possible_cpu(cpu) + IOstats[i] += per_cpu(IOstats_percpu[i], cpu); + } +} + +void nova_print_timing_stats(struct super_block *sb) +{ + int i; + + nova_get_timing_stats(); + nova_get_IO_stats(); + + nova_info("=========== NOVA kernel timing stats ============\n"); + for (i = 0; i < TIMING_NUM; i++) { + /* Title */ + if (Timingstring[i][0] == '=') { + nova_info("\n%s\n\n", Timingstring[i]); + continue; + } + + if (measure_timing || Timingstats[i]) { + nova_info("%s: count %llu, timing %llu, average %llu\n", + Timingstring[i], + Countstats[i], + Timingstats[i], + Countstats[i] ? + Timingstats[i] / Countstats[i] : 0); + } else { + nova_info("%s: count %llu\n", + Timingstring[i], + Countstats[i]); + } + } + + nova_info("\n"); + nova_print_alloc_stats(sb); + nova_print_IO_stats(sb); +} + +static void nova_clear_timing_stats(void) +{ + int i; + int cpu; + + for (i = 0; i < TIMING_NUM; i++) { + Countstats[i] = 0; + Timingstats[i] = 0; + for_each_possible_cpu(cpu) { + per_cpu(Timingstats_percpu[i], cpu) = 0; + per_cpu(Countstats_percpu[i], cpu) = 0; + } + } +} + +static void nova_clear_IO_stats(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct free_list *free_list; + int i; + int cpu; + + for (i = 0; i < STATS_NUM; i++) { + IOstats[i] = 0; + for_each_possible_cpu(cpu) + per_cpu(IOstats_percpu[i], cpu) = 0; + } + + for (i = 0; i < sbi->cpus; i++) { + free_list = nova_get_free_list(sb, i); + + free_list->alloc_log_count = 0; + free_list->alloc_log_pages = 0; + free_list->alloc_data_count = 0; + free_list->alloc_data_pages = 0; + free_list->free_log_count = 0; + free_list->freed_log_pages = 0; + free_list->free_data_count = 0; + free_list->freed_data_pages = 0; + } +} + +void nova_clear_stats(struct super_block *sb) +{ + nova_clear_timing_stats(); + nova_clear_IO_stats(sb); +} + +void nova_print_inode(struct nova_inode *pi) +{ + nova_dbg("%s: NOVA inode %llu\n", __func__, pi->nova_ino); + nova_dbg("valid %u, deleted %u, blk type %u, flags %u\n", + pi->valid, pi->deleted, pi->i_blk_type, pi->i_flags); + nova_dbg("size %llu, ctime %u, mtime %u, atime %u\n", + pi->i_size, pi->i_ctime, pi->i_mtime, pi->i_atime); + nova_dbg("mode %u, links %u, xattr 0x%llx, csum %u\n", + pi->i_mode, pi->i_links_count, pi->i_xattr, pi->csum); + nova_dbg("uid %u, gid %u, gen %u, create time %u\n", + pi->i_uid, pi->i_gid, pi->i_generation, pi->i_create_time); + nova_dbg("head 0x%llx, tail 0x%llx, alter head 0x%llx, tail 0x%llx\n", + pi->log_head, pi->log_tail, pi->alter_log_head, + pi->alter_log_tail); + nova_dbg("create epoch id %llu, delete epoch id %llu\n", + pi->create_epoch_id, pi->delete_epoch_id); +} + +static inline void nova_print_file_write_entry(struct super_block *sb, + u64 curr, struct nova_file_write_entry *entry) +{ + nova_dbg("file write entry @ 0x%llx: epoch %llu, trans %llu, pgoff %llu, pages %u, blocknr %llu, reassigned %u, updating %u, invalid count %u, size %llu, mtime %u\n", + curr, entry->epoch_id, entry->trans_id, + entry->pgoff, entry->num_pages, + entry->block >> PAGE_SHIFT, + entry->reassigned, entry->updating, + entry->invalid_pages, entry->size, entry->mtime); +} + +static inline void nova_print_set_attr_entry(struct super_block *sb, + u64 curr, struct nova_setattr_logentry *entry) +{ + nova_dbg("set attr entry @ 0x%llx: epoch %llu, trans %llu, invalid %u, mode %u, size %llu, atime %u, mtime %u, ctime %u\n", + curr, entry->epoch_id, entry->trans_id, + entry->invalid, entry->mode, + entry->size, entry->atime, entry->mtime, entry->ctime); +} + +static inline void nova_print_link_change_entry(struct super_block *sb, + u64 curr, struct nova_link_change_entry *entry) +{ + nova_dbg("link change entry @ 0x%llx: epoch %llu, trans %llu, invalid %u, links %u, flags %u, ctime %u\n", + curr, entry->epoch_id, entry->trans_id, + entry->invalid, entry->links, + entry->flags, entry->ctime); +} + +static inline void nova_print_mmap_entry(struct super_block *sb, + u64 curr, struct nova_mmap_entry *entry) +{ + nova_dbg("mmap write entry @ 0x%llx: epoch %llu, invalid %u, pgoff %llu, pages %llu\n", + curr, entry->epoch_id, entry->invalid, + entry->pgoff, entry->num_pages); +} + +static inline void nova_print_snapshot_info_entry(struct super_block *sb, + u64 curr, struct nova_snapshot_info_entry *entry) +{ + nova_dbg("snapshot info entry @ 0x%llx: epoch %llu, deleted %u, timestamp %llu\n", + curr, entry->epoch_id, entry->deleted, + entry->timestamp); +} + +static inline size_t nova_print_dentry(struct super_block *sb, + u64 curr, struct nova_dentry *entry) +{ + nova_dbg("dir logentry @ 0x%llx: epoch %llu, trans %llu, reassigned %u, invalid %u, inode %llu, links %u, namelen %u, rec len %u, name %s, mtime %u\n", + curr, entry->epoch_id, entry->trans_id, + entry->reassigned, entry->invalid, + le64_to_cpu(entry->ino), + entry->links_count, entry->name_len, + le16_to_cpu(entry->de_len), entry->name, + entry->mtime); + + return le16_to_cpu(entry->de_len); +} + +u64 nova_print_log_entry(struct super_block *sb, u64 curr) +{ + void *addr; + size_t size; + u8 type; + + addr = (void *)nova_get_block(sb, curr); + type = nova_get_entry_type(addr); + switch (type) { + case SET_ATTR: + nova_print_set_attr_entry(sb, curr, addr); + curr += sizeof(struct nova_setattr_logentry); + break; + case LINK_CHANGE: + nova_print_link_change_entry(sb, curr, addr); + curr += sizeof(struct nova_link_change_entry); + break; + case MMAP_WRITE: + nova_print_mmap_entry(sb, curr, addr); + curr += sizeof(struct nova_mmap_entry); + break; + case SNAPSHOT_INFO: + nova_print_snapshot_info_entry(sb, curr, addr); + curr += sizeof(struct nova_snapshot_info_entry); + break; + case FILE_WRITE: + nova_print_file_write_entry(sb, curr, addr); + curr += sizeof(struct nova_file_write_entry); + break; + case DIR_LOG: + size = nova_print_dentry(sb, curr, addr); + curr += size; + if (size == 0) { + nova_dbg("%s: dentry with size 0 @ 0x%llx\n", + __func__, curr); + curr += sizeof(struct nova_file_write_entry); + NOVA_ASSERT(0); + } + break; + case NEXT_PAGE: + nova_dbg("%s: next page sign @ 0x%llx\n", __func__, curr); + curr = PAGE_TAIL(curr); + break; + default: + nova_dbg("%s: unknown type %d, 0x%llx\n", __func__, type, curr); + curr += sizeof(struct nova_file_write_entry); + NOVA_ASSERT(0); + break; + } + + return curr; +} + +void nova_print_curr_log_page(struct super_block *sb, u64 curr) +{ + struct nova_inode_page_tail *tail; + u64 start, end; + + start = BLOCK_OFF(curr); + end = PAGE_TAIL(curr); + + while (start < end) + start = nova_print_log_entry(sb, start); + + tail = nova_get_block(sb, end); + nova_dbg("Page tail. curr 0x%llx, next page 0x%llx, %u entries, %u invalid\n", + start, tail->next_page, + tail->num_entries, tail->invalid_entries); +} + +void nova_print_nova_log(struct super_block *sb, + struct nova_inode_info_header *sih) +{ + u64 curr; + + if (sih->log_tail == 0 || sih->log_head == 0) + return; + + curr = sih->log_head; + nova_dbg("Pi %lu: log head 0x%llx, tail 0x%llx\n", + sih->ino, curr, sih->log_tail); + while (curr != sih->log_tail) { + if ((curr & (PAGE_SIZE - 1)) == LOG_BLOCK_TAIL) { + struct nova_inode_page_tail *tail = + nova_get_block(sb, curr); + nova_dbg("Log tail, curr 0x%llx, next page 0x%llx, %u entries, %u invalid\n", + curr, tail->next_page, + tail->num_entries, + tail->invalid_entries); + curr = tail->next_page; + } else { + curr = nova_print_log_entry(sb, curr); + } + } +} + +void nova_print_inode_log(struct super_block *sb, struct inode *inode) +{ + struct nova_inode_info *si = NOVA_I(inode); + struct nova_inode_info_header *sih = &si->header; + + nova_print_nova_log(sb, sih); +} + +int nova_get_nova_log_pages(struct super_block *sb, + struct nova_inode_info_header *sih, struct nova_inode *pi) +{ + struct nova_inode_log_page *curr_page; + u64 curr, next; + int count = 1; + + if (pi->log_head == 0 || pi->log_tail == 0) { + nova_dbg("Pi %lu has no log\n", sih->ino); + return 0; + } + + curr = pi->log_head; + curr_page = (struct nova_inode_log_page *)nova_get_block(sb, curr); + while ((next = curr_page->page_tail.next_page) != 0) { + curr = next; + curr_page = (struct nova_inode_log_page *) + nova_get_block(sb, curr); + count++; + } + + return count; +} + +void nova_print_nova_log_pages(struct super_block *sb, + struct nova_inode_info_header *sih) +{ + struct nova_inode_log_page *curr_page; + u64 curr, next; + int count = 1; + int used = count; + + if (sih->log_head == 0 || sih->log_tail == 0) { + nova_dbg("Pi %lu has no log\n", sih->ino); + return; + } + + curr = sih->log_head; + nova_dbg("Pi %lu: log head @ 0x%llx, tail @ 0x%llx\n", + sih->ino, curr, sih->log_tail); + curr_page = (struct nova_inode_log_page *)nova_get_block(sb, curr); + while ((next = curr_page->page_tail.next_page) != 0) { + nova_dbg("Current page 0x%llx, next page 0x%llx, %u entries, %u invalid\n", + curr >> PAGE_SHIFT, next >> PAGE_SHIFT, + curr_page->page_tail.num_entries, + curr_page->page_tail.invalid_entries); + if (sih->log_tail >> PAGE_SHIFT == curr >> PAGE_SHIFT) + used = count; + curr = next; + curr_page = (struct nova_inode_log_page *) + nova_get_block(sb, curr); + count++; + } + if (sih->log_tail >> PAGE_SHIFT == curr >> PAGE_SHIFT) + used = count; + nova_dbg("Pi %lu: log used %d pages, has %d pages, si reports %lu pages\n", + sih->ino, used, count, + sih->log_pages); +} + +void nova_print_inode_log_pages(struct super_block *sb, struct inode *inode) +{ + struct nova_inode_info *si = NOVA_I(inode); + struct nova_inode_info_header *sih = &si->header; + + nova_print_nova_log_pages(sb, sih); +} + +int nova_check_inode_logs(struct super_block *sb, struct nova_inode *pi) +{ + int count1 = 0; + int count2 = 0; + int tail1_at = 0; + int tail2_at = 0; + u64 curr, alter_curr; + + curr = pi->log_head; + alter_curr = pi->alter_log_head; + + while (curr && alter_curr) { + if (alter_log_page(sb, curr) != alter_curr || + alter_log_page(sb, alter_curr) != curr) + nova_dbg("Inode %llu page %d: curr 0x%llx, alter 0x%llx, alter_curr 0x%llx, alter 0x%llx\n", + pi->nova_ino, count1, + curr, alter_log_page(sb, curr), + alter_curr, + alter_log_page(sb, alter_curr)); + + count1++; + count2++; + if ((curr >> PAGE_SHIFT) == (pi->log_tail >> PAGE_SHIFT)) + tail1_at = count1; + if ((alter_curr >> PAGE_SHIFT) == + (pi->alter_log_tail >> PAGE_SHIFT)) + tail2_at = count2; + curr = next_log_page(sb, curr); + alter_curr = next_log_page(sb, alter_curr); + } + + while (curr) { + count1++; + if ((curr >> PAGE_SHIFT) == (pi->log_tail >> PAGE_SHIFT)) + tail1_at = count1; + curr = next_log_page(sb, curr); + } + + while (alter_curr) { + count2++; + if ((alter_curr >> PAGE_SHIFT) == + (pi->alter_log_tail >> PAGE_SHIFT)) + tail2_at = count2; + alter_curr = next_log_page(sb, alter_curr); + } + + nova_dbg("Log1 %d pages, tail @ page %d\n", count1, tail1_at); + nova_dbg("Log2 %d pages, tail @ page %d\n", count2, tail2_at); + + return 0; +} + +void nova_print_free_lists(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct free_list *free_list; + int i; + + nova_dbg("======== NOVA per-CPU free list allocation stats ========\n"); + for (i = 0; i < sbi->cpus; i++) { + free_list = nova_get_free_list(sb, i); + nova_dbg("Free list %d: block start %lu, block end %lu, num_blocks %lu, num_free_blocks %lu, blocknode %lu\n", + i, free_list->block_start, free_list->block_end, + free_list->block_end - free_list->block_start + 1, + free_list->num_free_blocks, free_list->num_blocknode); + + nova_dbg("Free list %d: csum start %lu, replica csum start %lu, csum blocks %lu, parity start %lu, parity blocks %lu\n", + i, free_list->csum_start, free_list->replica_csum_start, + free_list->num_csum_blocks, + free_list->parity_start, free_list->num_parity_blocks); + + nova_dbg("Free list %d: alloc log count %lu, allocated log pages %lu, alloc data count %lu, allocated data pages %lu, free log count %lu, freed log pages %lu, free data count %lu, freed data pages %lu\n", + i, + free_list->alloc_log_count, + free_list->alloc_log_pages, + free_list->alloc_data_count, + free_list->alloc_data_pages, + free_list->free_log_count, + free_list->freed_log_pages, + free_list->free_data_count, + free_list->freed_data_pages); + } +} diff --git a/fs/nova/stats.h b/fs/nova/stats.h new file mode 100644 index 000000000000..766ba0a77872 --- /dev/null +++ b/fs/nova/stats.h @@ -0,0 +1,218 @@ +/* + * NOVA File System statistics + * + * Copyright 2015-2016 Regents of the University of California, + * UCSD Non-Volatile Systems Lab, Andiry Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + + +/* ======================= Timing ========================= */ +enum timing_category { + /* Init */ + init_title_t, + init_t, + mount_t, + ioremap_t, + new_init_t, + recovery_t, + + /* Namei operations */ + namei_title_t, + create_t, + lookup_t, + link_t, + unlink_t, + symlink_t, + mkdir_t, + rmdir_t, + mknod_t, + rename_t, + readdir_t, + add_dentry_t, + remove_dentry_t, + setattr_t, + setsize_t, + + /* I/O operations */ + io_title_t, + dax_read_t, + cow_write_t, + inplace_write_t, + copy_to_nvmm_t, + dax_get_block_t, + read_iter_t, + write_iter_t, + + /* Memory operations */ + memory_title_t, + memcpy_r_nvmm_t, + memcpy_w_nvmm_t, + memcpy_w_wb_t, + partial_block_t, + + /* Memory management */ + mm_title_t, + new_blocks_t, + new_data_blocks_t, + new_log_blocks_t, + free_blocks_t, + free_data_t, + free_log_t, + + /* Transaction */ + trans_title_t, + create_trans_t, + link_trans_t, + update_tail_t, + + /* Logging */ + logging_title_t, + append_dir_entry_t, + append_file_entry_t, + append_mmap_entry_t, + append_link_change_t, + append_setattr_t, + append_snapshot_info_t, + update_entry_t, + + /* Tree */ + tree_title_t, + check_entry_t, + assign_t, + + /* GC */ + gc_title_t, + fast_gc_t, + thorough_gc_t, + check_invalid_t, + + /* Integrity */ + integrity_title_t, + block_csum_t, + block_parity_t, + block_csum_parity_t, + protect_memcpy_t, + protect_file_data_t, + verify_entry_csum_t, + verify_data_csum_t, + calc_entry_csum_t, + restore_data_t, + reset_mapping_t, + reset_vma_t, + + /* Others */ + others_title_t, + find_cache_t, + fsync_t, + write_pages_t, + fallocate_t, + direct_IO_t, + free_old_t, + delete_file_tree_t, + delete_dir_tree_t, + new_vfs_inode_t, + new_nova_inode_t, + free_inode_t, + free_inode_log_t, + evict_inode_t, + perf_t, + wprotect_t, + + /* Mmap */ + mmap_title_t, + mmap_fault_t, + pmd_fault_t, + pfn_mkwrite_t, + insert_vma_t, + remove_vma_t, + set_vma_read_t, + mmap_cow_t, + update_mapping_t, + update_pfn_t, + mmap_handler_t, + + /* Rebuild */ + rebuild_title_t, + rebuild_dir_t, + rebuild_file_t, + rebuild_snapshot_t, + + /* Snapshot */ + snapshot_title_t, + create_snapshot_t, + init_snapshot_info_t, + delete_snapshot_t, + append_snapshot_file_t, + append_snapshot_inode_t, + + /* Sentinel */ + TIMING_NUM, +}; + +enum stats_category { + alloc_steps, + cow_write_breaks, + inplace_write_breaks, + read_bytes, + cow_write_bytes, + inplace_write_bytes, + fast_checked_pages, + thorough_checked_pages, + fast_gc_pages, + thorough_gc_pages, + dirty_pages, + protect_head, + protect_tail, + block_csum_parity, + dax_cow_during_snapshot, + mapping_updated_pages, + cow_overlap_mmap, + dax_new_blocks, + inplace_new_blocks, + fdatasync, + + /* Sentinel */ + STATS_NUM, +}; + +extern const char *Timingstring[TIMING_NUM]; +extern u64 Timingstats[TIMING_NUM]; +DECLARE_PER_CPU(u64[TIMING_NUM], Timingstats_percpu); +extern u64 Countstats[TIMING_NUM]; +DECLARE_PER_CPU(u64[TIMING_NUM], Countstats_percpu); +extern u64 IOstats[STATS_NUM]; +DECLARE_PER_CPU(u64[STATS_NUM], IOstats_percpu); + +typedef struct timespec timing_t; + +#define NOVA_START_TIMING(name, start) \ + {if (measure_timing) getrawmonotonic(&start); } + +#define NOVA_END_TIMING(name, start) \ + {if (measure_timing) { \ + timing_t end; \ + getrawmonotonic(&end); \ + __this_cpu_add(Timingstats_percpu[name], \ + (end.tv_sec - start.tv_sec) * 1000000000 + \ + (end.tv_nsec - start.tv_nsec)); \ + } \ + __this_cpu_add(Countstats_percpu[name], 1); \ + } + +#define NOVA_STATS_ADD(name, value) \ + {__this_cpu_add(IOstats_percpu[name], value); } + +