2020-04-20 20:56:53

by Raphael Moreira Zinsly

[permalink] [raw]
Subject: [PATCH V4 0/5] selftests/powerpc: Add NX-GZIP engine testcase


This patch series are intended to test the POWER9 Nest
Accelerator (NX) GZIP engine that is being introduced by
https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-March/205659.html
More information about how to access the NX can be found in that patch,
also a complete userspace library and more documentation can be found at:
https://github.com/libnxz/power-gzip

Changes in V4:
- Removed nx-helpers.h and moved relevant code to copy-paste.h.
- Removed nx-gzip.h and symlinked the vas-api.h uapi instead.
- Renamed inc to include and fixed warnings.
- Proper integrated the code to the selftests Makefile system
with help from Michael Ellerman.


Thanks,
Raphael


2020-04-20 20:56:53

by Raphael Moreira Zinsly

[permalink] [raw]
Subject: [PATCH V4 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase

Add a compression testcase for the powerpc NX-GZIP engine.

Signed-off-by: Bulent Abali <[email protected]>
Signed-off-by: Raphael Moreira Zinsly <[email protected]>
---
tools/testing/selftests/powerpc/Makefile | 1 +
.../selftests/powerpc/nx-gzip/Makefile | 8 +
.../selftests/powerpc/nx-gzip/gzfht_test.c | 433 ++++++++++++++++++
.../selftests/powerpc/nx-gzip/gzip_vas.c | 315 +++++++++++++
.../selftests/powerpc/nx-gzip/nx-gzip-test.sh | 45 ++
5 files changed, 802 insertions(+)
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/Makefile
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
create mode 100755 tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 644770c3b754..0830e63818c1 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -19,6 +19,7 @@ SUB_DIRS = alignment \
copyloops \
dscr \
mm \
+ nx-gzip \
pmu \
signal \
primitives \
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
new file mode 100644
index 000000000000..016e528a0a94
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -0,0 +1,8 @@
+CFLAGS = -O3 -m64 -I./include
+
+TEST_GEN_FILES := gzfht_test
+TEST_PROGS := nx-gzip-test.sh
+
+include ../../lib.mk
+
+$(TEST_GEN_FILES): gzip_vas.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
new file mode 100644
index 000000000000..7496a83f9c9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
+ * Not intended for productive uses or for performance or compression
+ * ratio measurements. For simplicity of demonstration, this sample
+ * code compresses in to fixed Huffman blocks only (Deflate btype=1)
+ * and has very simple memory management. Dynamic Huffman blocks
+ * (Deflate btype=2) are more involved as detailed in the user guide.
+ * Note also that /dev/crypto/gzip, VAS and skiboot support are
+ * required.
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ *
+ * Author: Bulent Abali <[email protected]>
+ *
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE // For aligned_alloc()
+#define _DEFAULT_SOURCE // For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define FNAME_MAX 1024
+#define FEXT ".nx.gz"
+
+/*
+ * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
+ */
+static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
+ uint32_t dstlen, int with_count,
+ struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ uint32_t fc;
+
+ assert(!!cmdp);
+
+ put32(cmdp->crb, gzip_fc, 0); /* clear */
+ fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
+ GZIP_FC_COMPRESS_RESUME_FHT;
+ putnn(cmdp->crb, gzip_fc, fc);
+ putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
+ memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+ /* Section 6.6 programming notes; spbc may be in two different
+ * places depending on FC.
+ */
+ if (!with_count)
+ put32(cmdp->cpb, out_spbc_comp, 0);
+ else
+ put32(cmdp->cpb, out_spbc_comp_with_count, 0);
+
+ /* Figure 6-3 6-4; CSB location */
+ put64(cmdp->crb, csb_address, 0);
+ put64(cmdp->crb, csb_address,
+ (uint64_t) &cmdp->crb.csb & csb_address_mask);
+
+ /* Source direct dde (scatter-gather list) */
+ clear_dde(cmdp->crb.source_dde);
+ putnn(cmdp->crb.source_dde, dde_count, 0);
+ put32(cmdp->crb.source_dde, ddebc, srclen);
+ put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
+
+ /* Target direct dde (scatter-gather list) */
+ clear_dde(cmdp->crb.target_dde);
+ putnn(cmdp->crb.target_dde, dde_count, 0);
+ put32(cmdp->crb.target_dde, ddebc, dstlen);
+ put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
+
+ /* Submit the crb, the job descriptor, to the accelerator */
+ return nxu_submit_job(cmdp, handle);
+}
+
+/*
+ * Prepares a blank no filename no timestamp gzip header and returns
+ * the number of bytes written to buf.
+ * Gzip specification at https://tools.ietf.org/html/rfc1952
+ */
+int gzip_header_blank(char *buf)
+{
+ int i = 0;
+
+ buf[i++] = 0x1f; /* ID1 */
+ buf[i++] = 0x8b; /* ID2 */
+ buf[i++] = 0x08; /* CM */
+ buf[i++] = 0x00; /* FLG */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x04; /* XFL 4=fastest */
+ buf[i++] = 0x03; /* OS UNIX */
+
+ return i;
+}
+
+/* Caller must free the allocated buffer return nonzero on error. */
+int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
+{
+ struct stat statbuf;
+ FILE *fp;
+ char *p;
+ size_t num_bytes;
+
+ if (stat(fname, &statbuf)) {
+ perror(fname);
+ return(-1);
+ }
+ fp = fopen(fname, "r");
+ if (fp == NULL) {
+ perror(fname);
+ return(-1);
+ }
+ assert(NULL != (p = (char *) malloc(statbuf.st_size)));
+ num_bytes = fread(p, 1, statbuf.st_size, fp);
+ if (ferror(fp) || (num_bytes != statbuf.st_size)) {
+ perror(fname);
+ return(-1);
+ }
+ *buf = p;
+ *bufsize = num_bytes;
+ return 0;
+}
+
+/* Returns nonzero on error */
+int write_output_file(char *fname, char *buf, size_t bufsize)
+{
+ FILE *fp;
+ size_t num_bytes;
+
+ fp = fopen(fname, "w");
+ if (fp == NULL) {
+ perror(fname);
+ return(-1);
+ }
+ num_bytes = fwrite(buf, 1, bufsize, fp);
+ if (ferror(fp) || (num_bytes != bufsize)) {
+ perror(fname);
+ return(-1);
+ }
+ fclose(fp);
+ return 0;
+}
+
+/*
+ * Z_SYNC_FLUSH as described in zlib.h.
+ * Returns number of appended bytes
+ */
+int append_sync_flush(char *buf, int tebc, int final)
+{
+ uint64_t flush;
+ int shift = (tebc & 0x7);
+
+ if (tebc > 0) {
+ /* Last byte is partially full */
+ buf = buf - 1;
+ *buf = *buf & (unsigned char) ((1<<tebc)-1);
+ } else
+ *buf = 0;
+ flush = ((0x1ULL & final) << shift) | *buf;
+ shift = shift + 3; /* BFINAL and BTYPE written */
+ shift = (shift <= 8) ? 8 : 16;
+ flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
+ shift = shift + 32;
+ while (shift > 0) {
+ *buf++ = (unsigned char) (flush & 0xffULL);
+ flush = flush >> 8;
+ shift = shift - 8;
+ }
+ return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
+}
+
+/*
+ * Final deflate block bit. This call assumes the block
+ * beginning is byte aligned.
+ */
+static void set_bfinal(void *buf, int bfinal)
+{
+ char *b = buf;
+
+ if (bfinal)
+ *b = *b | (unsigned char) 0x01;
+ else
+ *b = *b & (unsigned char) 0xfe;
+}
+
+int compress_file(int argc, char **argv, void *handle)
+{
+ char *inbuf, *outbuf, *srcbuf, *dstbuf;
+ char outname[FNAME_MAX];
+ uint32_t srclen, dstlen;
+ uint32_t flushlen, chunk;
+ size_t inlen, outlen, dsttotlen, srctotlen;
+ uint32_t crc, spbc, tpbc, tebc;
+ int lzcounts = 0;
+ int cc;
+ int num_hdr_bytes;
+ struct nx_gzip_crb_cpb_t *cmdp;
+ uint32_t pagelen = 65536;
+ int fault_tries = NX_MAX_FAULTS;
+
+ cmdp = (void *)(uintptr_t)
+ aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
+ sizeof(struct nx_gzip_crb_cpb_t));
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <fname>\n", argv[0]);
+ exit(-1);
+ }
+ if (read_alloc_input_file(argv[1], &inbuf, &inlen))
+ exit(-1);
+ fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
+
+ /* Generous output buffer for header/trailer */
+ outlen = 2 * inlen + 1024;
+
+ assert(NULL != (outbuf = (char *)malloc(outlen)));
+ nxu_touch_pages(outbuf, outlen, pagelen, 1);
+
+ /* Compress piecemeal in smallish chunks */
+ chunk = 1<<22;
+
+ /* Write the gzip header to the stream */
+ num_hdr_bytes = gzip_header_blank(outbuf);
+ dstbuf = outbuf + num_hdr_bytes;
+ outlen = outlen - num_hdr_bytes;
+ dsttotlen = num_hdr_bytes;
+
+ srcbuf = inbuf;
+ srctotlen = 0;
+
+ /* Init the CRB, the coprocessor request block */
+ memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+ /* Initial gzip crc32 */
+ put32(cmdp->cpb, in_crc, 0);
+
+ while (inlen > 0) {
+
+ /* Submit chunk size source data per job */
+ srclen = NX_MIN(chunk, inlen);
+ /* Supply large target in case data expands */
+ dstlen = NX_MIN(2*srclen, outlen);
+
+ /* Page faults are handled by the user code */
+
+ /* Fault-in pages; an improved code wouldn't touch so
+ * many pages but would try to estimate the
+ * compression ratio and adjust both the src and dst
+ * touch amounts.
+ */
+ nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
+ 1);
+ nxu_touch_pages(srcbuf, srclen, pagelen, 0);
+ nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
+
+ cc = compress_fht_sample(
+ srcbuf, srclen,
+ dstbuf, dstlen,
+ lzcounts, cmdp, handle);
+
+ if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
+ cc != ERR_NX_TRANSLATION) {
+ fprintf(stderr, "nx error: cc= %d\n", cc);
+ exit(-1);
+ }
+
+ /* Page faults are handled by the user code */
+ if (cc == ERR_NX_TRANSLATION) {
+ NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
+ NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
+ fault_tries,
+ (unsigned long long) cmdp->crb.csb.fsaddr));
+ fault_tries--;
+ if (fault_tries > 0) {
+ continue;
+ } else {
+ fprintf(stderr, "error: cannot progress; ");
+ fprintf(stderr, "too many faults\n");
+ exit(-1);
+ };
+ }
+
+ fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
+
+ inlen = inlen - srclen;
+ srcbuf = srcbuf + srclen;
+ srctotlen = srctotlen + srclen;
+
+ /* Two possible locations for spbc depending on the function
+ * code.
+ */
+ spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
+ get32(cmdp->cpb, out_spbc_comp_with_count);
+ assert(spbc == srclen);
+
+ /* Target byte count */
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ /* Target ending bit count */
+ tebc = getnn(cmdp->cpb, out_tebc);
+ NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
+ NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
+
+ if (inlen > 0) { /* More chunks to go */
+ set_bfinal(dstbuf, 0);
+ dstbuf = dstbuf + tpbc;
+ dsttotlen = dsttotlen + tpbc;
+ outlen = outlen - tpbc;
+ /* Round up to the next byte with a flush
+ * block; do not set the BFINAqL bit.
+ */
+ flushlen = append_sync_flush(dstbuf, tebc, 0);
+ dsttotlen = dsttotlen + flushlen;
+ outlen = outlen - flushlen;
+ dstbuf = dstbuf + flushlen;
+ NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
+ flushlen));
+ } else { /* Done */
+ /* Set the BFINAL bit of the last block per Deflate
+ * specification.
+ */
+ set_bfinal(dstbuf, 1);
+ dstbuf = dstbuf + tpbc;
+ dsttotlen = dsttotlen + tpbc;
+ outlen = outlen - tpbc;
+ }
+
+ /* Resuming crc32 for the next chunk */
+ crc = get32(cmdp->cpb, out_crc);
+ put32(cmdp->cpb, in_crc, crc);
+ crc = be32toh(crc);
+ }
+
+ /* Append crc32 and ISIZE to the end */
+ memcpy(dstbuf, &crc, 4);
+ memcpy(dstbuf+4, &srctotlen, 4);
+ dsttotlen = dsttotlen + 8;
+ outlen = outlen - 8;
+
+ assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
+ strcpy(outname, argv[1]);
+ strcat(outname, FEXT);
+ if (write_output_file(outname, outbuf, dsttotlen)) {
+ fprintf(stderr, "write error: %s\n", outname);
+ exit(-1);
+ }
+
+ fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
+ dsttotlen);
+ fprintf(stderr, "crc32 checksum = %08x\n", crc);
+
+ if (inbuf != NULL)
+ free(inbuf);
+
+ if (outbuf != NULL)
+ free(outbuf);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int rc;
+ struct sigaction act;
+ void *handle;
+
+ nx_dbg = 0;
+ nx_gzip_log = NULL;
+ act.sa_handler = 0;
+ act.sa_sigaction = nxu_sigsegv_handler;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+
+ handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+ if (!handle) {
+ fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+ exit(-1);
+ }
+
+ rc = compress_file(argc, argv, handle);
+
+ nx_function_end(handle);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
new file mode 100644
index 000000000000..23787d5743db
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <[email protected]>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "vas-api.h"
+#include "nx.h"
+#include "copy-paste.h"
+#include "nxu.h"
+#include "nx_dbg.h"
+#include <sys/platform/ppc.h>
+
+#define barrier()
+#define hwsync() ({ asm volatile("sync" ::: "memory"); })
+
+#ifndef NX_NO_CPU_PRI
+#define cpu_pri_default() ({ asm volatile ("or 2, 2, 2"); })
+#define cpu_pri_low() ({ asm volatile ("or 31, 31, 31"); })
+#else
+#define cpu_pri_default()
+#define cpu_pri_low()
+#endif
+
+void *nx_fault_storage_address;
+
+struct nx_handle {
+ int fd;
+ int function;
+ void *paste_addr;
+};
+
+static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
+{
+ int rc, fd;
+ void *addr;
+ struct vas_tx_win_open_attr txattr;
+
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, " open device name %s\n", devname);
+ return -errno;
+ }
+
+ memset(&txattr, 0, sizeof(txattr));
+ txattr.version = 1;
+ txattr.vas_id = pri;
+ rc = ioctl(fd, VAS_TX_WIN_OPEN, (unsigned long)&txattr);
+ if (rc < 0) {
+ fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
+ rc = -errno;
+ goto out;
+ }
+
+ addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
+ if (addr == MAP_FAILED) {
+ fprintf(stderr, "mmap() failed, errno %d\n", errno);
+ rc = -errno;
+ goto out;
+ }
+ handle->fd = fd;
+ handle->paste_addr = (void *)((char *)addr + 0x400);
+
+ rc = 0;
+out:
+ close(fd);
+ return rc;
+}
+
+void *nx_function_begin(int function, int pri)
+{
+ int rc;
+ char *devname = "/dev/crypto/nx-gzip";
+ struct nx_handle *nxhandle;
+
+ if (function != NX_FUNC_COMP_GZIP) {
+ errno = EINVAL;
+ fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
+ return NULL;
+ }
+
+
+ nxhandle = malloc(sizeof(*nxhandle));
+ if (!nxhandle) {
+ errno = ENOMEM;
+ fprintf(stderr, " No memory\n");
+ return NULL;
+ }
+
+ nxhandle->function = function;
+ rc = open_device_nodes(devname, pri, nxhandle);
+ if (rc < 0) {
+ errno = -rc;
+ fprintf(stderr, " open_device_nodes failed\n");
+ return NULL;
+ }
+
+ return nxhandle;
+}
+
+int nx_function_end(void *handle)
+{
+ int rc = 0;
+ struct nx_handle *nxhandle = handle;
+
+ rc = munmap(nxhandle->paste_addr - 0x400, 4096);
+ if (rc < 0) {
+ fprintf(stderr, "munmap() failed, errno %d\n", errno);
+ return rc;
+ }
+ close(nxhandle->fd);
+ free(nxhandle);
+
+ return rc;
+}
+
+static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
+{
+ long poll = 0;
+ uint64_t t;
+
+ /* Save power and let other threads use the h/w. top may show
+ * 100% but only because OS doesn't know we slowed the this
+ * h/w thread while polling. We're letting other threads have
+ * higher throughput on the core.
+ */
+ cpu_pri_low();
+
+#define CSB_MAX_POLL 200000000UL
+#define USLEEP_TH 300000UL
+
+ t = __ppc_get_timebase();
+
+ while (getnn(cmdp->crb.csb, csb_v) == 0) {
+ ++poll;
+ hwsync();
+
+ cpu_pri_low();
+
+ /* usleep(0) takes around 29000 ticks ~60 us.
+ * 300000 is spinning for about 600 us then
+ * start sleeping.
+ */
+ if ((__ppc_get_timebase() - t) > USLEEP_TH) {
+ cpu_pri_default();
+ usleep(1);
+ }
+
+ if (poll > CSB_MAX_POLL)
+ break;
+
+ /* Fault address from signal handler */
+ if (nx_fault_storage_address) {
+ cpu_pri_default();
+ return -EAGAIN;
+ }
+
+ }
+
+ cpu_pri_default();
+
+ /* hw has updated csb and output buffer */
+ hwsync();
+
+ /* Check CSB flags. */
+ if (getnn(cmdp->crb.csb, csb_v) == 0) {
+ fprintf(stderr, "CSB still not valid after %d polls.\n",
+ (int) poll);
+ prt_err("CSB still not valid after %d polls, giving up.\n",
+ (int) poll);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ int i, ret, retries;
+ struct nx_handle *nxhandle = handle;
+
+ assert(handle != NULL);
+ i = 0;
+ retries = 5000;
+ while (i++ < retries) {
+ hwsync();
+ vas_copy(&cmdp->crb, 0);
+ ret = vas_paste(nxhandle->paste_addr, 0);
+ hwsync();
+
+ NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
+ i, retries, ret));
+
+ if ((ret == 2) || (ret == 3)) {
+
+ ret = nx_wait_for_csb(cmdp);
+ if (!ret) {
+ goto out;
+ } else if (ret == -EAGAIN) {
+ long x;
+
+ prt_err("Touching address %p, 0x%lx\n",
+ nx_fault_storage_address,
+ *(long *) nx_fault_storage_address);
+ x = *(long *) nx_fault_storage_address;
+ *(long *) nx_fault_storage_address = x;
+ nx_fault_storage_address = 0;
+ continue;
+ } else {
+ prt_err("wait_for_csb() returns %d\n", ret);
+ break;
+ }
+ } else {
+ if (i < 10) {
+ /* spin for few ticks */
+#define SPIN_TH 500UL
+ uint64_t fail_spin;
+
+ fail_spin = __ppc_get_timebase();
+ while ((__ppc_get_timebase() - fail_spin) <
+ SPIN_TH)
+ ;
+ } else {
+ /* sleep */
+ unsigned int pr = 0;
+
+ if (pr++ % 100 == 0) {
+ prt_err("Paste attempt %d/", i);
+ prt_err("%d, failed pid= %d\n", retries,
+ getpid());
+ }
+ usleep(1);
+ }
+ continue;
+ }
+ }
+
+out:
+ cpu_pri_default();
+
+ return ret;
+}
+
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ int cc;
+
+ cc = nxu_run_job(cmdp, handle);
+
+ if (!cc)
+ cc = getnn(cmdp->crb.csb, csb_cc); /* CC Table 6-8 */
+
+ return cc;
+}
+
+
+void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx)
+{
+ fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
+ sig, info->si_code, info->si_addr);
+
+ nx_fault_storage_address = info->si_addr;
+}
+
+/*
+ * Fault in pages prior to NX job submission. wr=1 may be required to
+ * touch writeable pages. System zero pages do not fault-in the page as
+ * intended. Typically set wr=1 for NX target pages and set wr=0 for NX
+ * source pages.
+ */
+int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr)
+{
+ char *begin = buf;
+ char *end = (char *) buf + buf_len - 1;
+ volatile char t;
+
+ assert(buf_len >= 0 && !!buf);
+
+ NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
+ (buf + buf_len), buf_len, wr));
+
+ if (buf_len <= 0 || buf == NULL)
+ return -1;
+
+ do {
+ t = *begin;
+ if (wr)
+ *begin = t;
+ begin = begin + page_len;
+ } while (begin < end);
+
+ /* When buf_sz is small or buf tail is in another page */
+ t = *end;
+ if (wr)
+ *end = t;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
new file mode 100755
index 000000000000..7cc7256ba1c7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+if [[ ! -w /dev/crypto/nx-gzip ]]; then
+ echo "Can't access /dev/crypto/nx-gzip, skipping"
+ echo "skip: $0"
+ exit 4
+fi
+
+set -e
+
+function cleanup
+{
+ rm -f nx-tempfile*
+}
+
+trap cleanup EXIT
+
+function test_sizes
+{
+ local n=$1
+ local fname="nx-tempfile.$n"
+
+ for size in 4K 64K 1M 64M
+ do
+ echo "Testing $size ($n) ..."
+ dd if=/dev/urandom of=$fname bs=$size count=1
+ ./gzfht_test $fname
+ done
+}
+
+echo "Doing basic test of different sizes ..."
+test_sizes 0
+
+echo "Running tests in parallel ..."
+for i in {1..16}
+do
+ test_sizes $i &
+done
+
+wait
+
+echo "OK"
+
+exit 0
--
2.21.0

2020-04-20 20:56:53

by Raphael Moreira Zinsly

[permalink] [raw]
Subject: [PATCH V4 4/5] selftests/powerpc: Add NX-GZIP engine decompress testcase

Include a decompression testcase for the powerpc NX-GZIP
engine.

Signed-off-by: Bulent Abali <[email protected]>
Signed-off-by: Raphael Moreira Zinsly <[email protected]>
---
.../selftests/powerpc/nx-gzip/Makefile | 2 +-
.../selftests/powerpc/nx-gzip/gunz_test.c | 1028 +++++++++++++++++
.../selftests/powerpc/nx-gzip/nx-gzip-test.sh | 1 +
3 files changed, 1030 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gunz_test.c

diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
index 016e528a0a94..640fad6cc2c7 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/Makefile
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -1,6 +1,6 @@
CFLAGS = -O3 -m64 -I./include

-TEST_GEN_FILES := gzfht_test
+TEST_GEN_FILES := gzfht_test gunz_test
TEST_PROGS := nx-gzip-test.sh

include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
new file mode 100644
index 000000000000..6ee0fded0391
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
@@ -0,0 +1,1028 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gunzip sample code for demonstrating the P9 NX hardware
+ * interface. Not intended for productive uses or for performance or
+ * compression ratio measurements. Note also that /dev/crypto/gzip,
+ * VAS and skiboot support are required
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <[email protected]>
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE // For aligned_alloc()
+#define _DEFAULT_SOURCE // For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+#include "crb.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
+#define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
+
+#define GETINPC(X) fgetc(X)
+#define FNAME_MAX 1024
+
+/* fifo queue management */
+#define fifo_used_bytes(used) (used)
+#define fifo_free_bytes(used, len) ((len)-(used))
+/* amount of free bytes in the first and last parts */
+#define fifo_free_first_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (len)-((cur)+(used)) : 0)
+#define fifo_free_last_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (cur) : (len)-(used))
+/* amount of used bytes in the first and last parts */
+#define fifo_used_first_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (used) : (len)-(cur))
+#define fifo_used_last_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? 0 : ((used)+(cur))-(len))
+/* first and last free parts start here */
+#define fifo_free_first_offset(cur, used) ((cur)+(used))
+#define fifo_free_last_offset(cur, used, len) \
+ fifo_used_last_bytes(cur, used, len)
+/* first and last used parts start here */
+#define fifo_used_first_offset(cur) (cur)
+#define fifo_used_last_offset(cur) (0)
+
+const int fifo_in_len = 1<<24;
+const int fifo_out_len = 1<<24;
+const int page_sz = 1<<16;
+const int line_sz = 1<<7;
+const int window_max = 1<<15;
+
+/*
+ * Adds an (address, len) pair to the list of ddes (ddl) and updates
+ * the base dde. ddl[0] is the only dde in a direct dde which
+ * contains a single (addr,len) pair. For more pairs, ddl[0] becomes
+ * the indirect (base) dde that points to a list of direct ddes.
+ * See Section 6.4 of the NX-gzip user manual for DDE description.
+ * Addr=NULL, len=0 clears the ddl[0]. Returns the total number of
+ * bytes in ddl. Caller is responsible for allocting the array of
+ * nx_dde_t *ddl. If N addresses are required in the scatter-gather
+ * list, the ddl array must have N+1 entries minimum.
+ */
+static inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
+ uint32_t len)
+{
+ uint32_t ddecnt;
+ uint32_t bytes;
+
+ if (addr == NULL && len == 0) {
+ clearp_dde(ddl);
+ return 0;
+ }
+
+ NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
+ __func__, len));
+
+ /* Number of ddes in the dde list ; == 0 when it is a direct dde */
+ ddecnt = getpnn(ddl, dde_count);
+ bytes = getp32(ddl, ddebc);
+
+ if (ddecnt == 0 && bytes == 0) {
+ /* First dde is unused; make it a direct dde */
+ bytes = len;
+ putp32(ddl, ddebc, bytes);
+ putp64(ddl, ddead, (uint64_t) addr);
+ } else if (ddecnt == 0) {
+ /* Converting direct to indirect dde
+ * ddl[0] becomes head dde of ddl
+ * copy direct to indirect first.
+ */
+ ddl[1] = ddl[0];
+
+ /* Add the new dde next */
+ clear_dde(ddl[2]);
+ put32(ddl[2], ddebc, len);
+ put64(ddl[2], ddead, (uint64_t) addr);
+
+ /* Ddl head points to 2 direct ddes */
+ ddecnt = 2;
+ putpnn(ddl, dde_count, ddecnt);
+ bytes = bytes + len;
+ putp32(ddl, ddebc, bytes);
+ /* Pointer to the first direct dde */
+ putp64(ddl, ddead, (uint64_t) &ddl[1]);
+ } else {
+ /* Append a dde to an existing indirect ddl */
+ ++ddecnt;
+ clear_dde(ddl[ddecnt]);
+ put64(ddl[ddecnt], ddead, (uint64_t) addr);
+ put32(ddl[ddecnt], ddebc, len);
+
+ putpnn(ddl, dde_count, ddecnt);
+ bytes = bytes + len;
+ putp32(ddl, ddebc, bytes); /* byte sum of all dde */
+ }
+ return bytes;
+}
+
+/*
+ * Touch specified number of pages represented in number bytes
+ * beginning from the first buffer in a dde list.
+ * Do not touch the pages past buf_sz-th byte's page.
+ *
+ * Set buf_sz = 0 to touch all pages described by the ddep.
+ */
+static int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
+ int wr)
+{
+ uint32_t indirect_count;
+ uint32_t buf_len;
+ long total;
+ uint64_t buf_addr;
+ struct nx_dde_t *dde_list;
+ int i;
+
+ assert(!!ddep);
+
+ indirect_count = getpnn(ddep, dde_count);
+
+ NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
+ indirect_count));
+ NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
+
+ if (indirect_count == 0) {
+ /* Direct dde */
+ buf_len = getp32(ddep, ddebc);
+ buf_addr = getp64(ddep, ddead);
+
+ NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
+ buf_len, (void *)buf_addr));
+
+ if (buf_sz == 0)
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ else
+ nxu_touch_pages((void *)buf_addr, NX_MIN(buf_len,
+ buf_sz), page_sz, wr);
+
+ return ERR_NX_OK;
+ }
+
+ /* Indirect dde */
+ if (indirect_count > MAX_DDE_COUNT)
+ return ERR_NX_EXCESSIVE_DDE;
+
+ /* First address of the list */
+ dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
+
+ if (buf_sz == 0)
+ buf_sz = getp32(ddep, ddebc);
+
+ total = 0;
+ for (i = 0; i < indirect_count; i++) {
+ buf_len = get32(dde_list[i], ddebc);
+ buf_addr = get64(dde_list[i], ddead);
+ total += buf_len;
+
+ NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
+ buf_len, (void *)buf_addr));
+ NXPRT(fprintf(stderr, "0x%lx\n", total));
+
+ /* Touching fewer pages than encoded in the ddebc */
+ if (total > buf_sz) {
+ buf_len = NX_MIN(buf_len, total - buf_sz);
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
+ buf_len));
+ NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
+ break;
+ }
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ }
+ return ERR_NX_OK;
+}
+
+/*
+ * Src and dst buffers are supplied in scatter gather lists.
+ * NX function code and other parameters supplied in cmdp.
+ */
+static int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
+ struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ uint64_t csbaddr;
+
+ memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+ cmdp->crb.source_dde = *src;
+ cmdp->crb.target_dde = *dst;
+
+ /* Status, output byte count in tpbc */
+ csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
+ put64(cmdp->crb, csb_address, csbaddr);
+
+ /* NX reports input bytes in spbc; cleared */
+ cmdp->cpb.out_spbc_comp_wrap = 0;
+ cmdp->cpb.out_spbc_comp_with_count = 0;
+ cmdp->cpb.out_spbc_decomp = 0;
+
+ /* Clear output */
+ put32(cmdp->cpb, out_crc, INIT_CRC);
+ put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+ /* Submit the crb, the job descriptor, to the accelerator. */
+ return nxu_submit_job(cmdp, handle);
+}
+
+int decompress_file(int argc, char **argv, void *devhandle)
+{
+ FILE *inpf = NULL;
+ FILE *outf = NULL;
+
+ int c, expect, i, cc, rc = 0;
+ char gzfname[FNAME_MAX];
+
+ /* Queuing, file ops, byte counting */
+ char *fifo_in, *fifo_out;
+ int used_in, cur_in, used_out, cur_out, read_sz, n;
+ int first_free, last_free, first_used, last_used;
+ int first_offset, last_offset;
+ int write_sz, free_space, source_sz;
+ int source_sz_estimate, target_sz_estimate;
+ uint64_t last_comp_ratio = 0; /* 1000 max */
+ uint64_t total_out = 0;
+ int is_final, is_eof;
+
+ /* nx hardware */
+ int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
+ int history_len = 0;
+ struct nx_gzip_crb_cpb_t cmd, *cmdp;
+ struct nx_dde_t *ddl_in;
+ struct nx_dde_t dde_in[6] __aligned(128);
+ struct nx_dde_t *ddl_out;
+ struct nx_dde_t dde_out[6] __aligned(128);
+ int pgfault_retries;
+
+ /* when using mmap'ed files */
+ off_t input_file_offset;
+
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
+ fprintf(stderr, " writes to stdout or <fname>.nx.gunzip\n");
+ return -1;
+ }
+
+ if (argc == 1) {
+ inpf = stdin;
+ outf = stdout;
+ } else if (argc == 2) {
+ char w[1024];
+ char *wp;
+
+ inpf = fopen(argv[1], "r");
+ if (inpf == NULL) {
+ perror(argv[1]);
+ return -1;
+ }
+
+ /* Make a new file name to write to. Ignoring '.gz' */
+ wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
+ strcpy(w, wp);
+ strcat(w, ".nx.gunzip");
+
+ outf = fopen(w, "w");
+ if (outf == NULL) {
+ perror(w);
+ return -1;
+ }
+ }
+
+ /* Decode the gzip header */
+ c = GETINPC(inpf); expect = 0x1f; /* ID1 */
+ if (c != expect)
+ goto err1;
+
+ c = GETINPC(inpf); expect = 0x8b; /* ID2 */
+ if (c != expect)
+ goto err1;
+
+ c = GETINPC(inpf); expect = 0x08; /* CM */
+ if (c != expect)
+ goto err1;
+
+ int flg = GETINPC(inpf); /* FLG */
+
+ if (flg & 0xE0 || flg & 0x4 || flg == EOF)
+ goto err2;
+
+ fprintf(stderr, "gzHeader FLG %x\n", flg);
+
+ /* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
+ * sample code.
+ */
+ for (i = 0; i < 6; i++) {
+ char tmp[10];
+
+ tmp[i] = GETINPC(inpf);
+ if (tmp[i] == EOF)
+ goto err3;
+ fprintf(stderr, "%02x ", tmp[i]);
+ if (i == 5)
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
+
+ /* FNAME */
+ if (flg & 0x8) {
+ int k = 0;
+
+ do {
+ c = GETINPC(inpf);
+ if (c == EOF || k >= FNAME_MAX)
+ goto err3;
+ gzfname[k++] = c;
+ } while (c);
+ fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
+ }
+
+ /* FHCRC */
+ if (flg & 0x2) {
+ c = GETINPC(inpf);
+ if (c == EOF)
+ goto err3;
+ c = GETINPC(inpf);
+ if (c == EOF)
+ goto err3;
+ fprintf(stderr, "gzHeader FHCRC: ignored\n");
+ }
+
+ used_in = cur_in = used_out = cur_out = 0;
+ is_final = is_eof = 0;
+
+ /* Allocate one page larger to prevent page faults due to NX
+ * overfetching.
+ * Either do this (char*)(uintptr_t)aligned_alloc or use
+ * -std=c11 flag to make the int-to-pointer warning go away.
+ */
+ assert((fifo_in = (char *)(uintptr_t)aligned_alloc(line_sz,
+ fifo_in_len + page_sz)) != NULL);
+ assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
+ fifo_out_len + page_sz + line_sz)) != NULL);
+ /* Leave unused space due to history rounding rules */
+ fifo_out = fifo_out + line_sz;
+ nxu_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
+
+ ddl_in = &dde_in[0];
+ ddl_out = &dde_out[0];
+ cmdp = &cmd;
+ memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+read_state:
+
+ /* Read from .gz file */
+
+ NXPRT(fprintf(stderr, "read_state:\n"));
+
+ if (is_eof != 0)
+ goto write_state;
+
+ /* We read in to fifo_in in two steps: first: read in to from
+ * cur_in to the end of the buffer. last: if free space wrapped
+ * around, read from fifo_in offset 0 to offset cur_in.
+ */
+
+ /* Reset fifo head to reduce unnecessary wrap arounds */
+ cur_in = (used_in == 0) ? 0 : cur_in;
+
+ /* Free space total is reduced by a gap */
+ free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
+ - line_sz);
+
+ /* Free space may wrap around as first and last */
+ first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
+ last_free = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
+
+ /* Start offsets of the free memory */
+ first_offset = fifo_free_first_offset(cur_in, used_in);
+ last_offset = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
+
+ /* Reduce read_sz because of the line_sz gap */
+ read_sz = NX_MIN(free_space, first_free);
+ n = 0;
+ if (read_sz > 0) {
+ /* Read in to offset cur_in + used_in */
+ n = fread(fifo_in + first_offset, 1, read_sz, inpf);
+ used_in = used_in + n;
+ free_space = free_space - n;
+ assert(n <= read_sz);
+ if (n != read_sz) {
+ /* Either EOF or error; exit the read loop */
+ is_eof = 1;
+ goto write_state;
+ }
+ }
+
+ /* If free space wrapped around */
+ if (last_free > 0) {
+ /* Reduce read_sz because of the line_sz gap */
+ read_sz = NX_MIN(free_space, last_free);
+ n = 0;
+ if (read_sz > 0) {
+ n = fread(fifo_in + last_offset, 1, read_sz, inpf);
+ used_in = used_in + n; /* Increase used space */
+ free_space = free_space - n; /* Decrease free space */
+ assert(n <= read_sz);
+ if (n != read_sz) {
+ /* Either EOF or error; exit the read loop */
+ is_eof = 1;
+ goto write_state;
+ }
+ }
+ }
+
+ /* At this point we have used_in bytes in fifo_in with the
+ * data head starting at cur_in and possibly wrapping around.
+ */
+
+write_state:
+
+ /* Write decompressed data to output file */
+
+ NXPRT(fprintf(stderr, "write_state:\n"));
+
+ if (used_out == 0)
+ goto decomp_state;
+
+ /* If fifo_out has data waiting, write it out to the file to
+ * make free target space for the accelerator used bytes in
+ * the first and last parts of fifo_out.
+ */
+
+ first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
+ last_used = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
+
+ write_sz = first_used;
+
+ n = 0;
+ if (write_sz > 0) {
+ n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
+ used_out = used_out - n;
+ /* Move head of the fifo */
+ cur_out = (cur_out + n) % fifo_out_len;
+ assert(n <= write_sz);
+ if (n != write_sz) {
+ fprintf(stderr, "error: write\n");
+ rc = -1;
+ goto err5;
+ }
+ }
+
+ if (last_used > 0) { /* If more data available in the last part */
+ write_sz = last_used; /* Keep it here for later */
+ n = 0;
+ if (write_sz > 0) {
+ n = fwrite(fifo_out, 1, write_sz, outf);
+ used_out = used_out - n;
+ cur_out = (cur_out + n) % fifo_out_len;
+ assert(n <= write_sz);
+ if (n != write_sz) {
+ fprintf(stderr, "error: write\n");
+ rc = -1;
+ goto err5;
+ }
+ }
+ }
+
+decomp_state:
+
+ /* NX decompresses input data */
+
+ NXPRT(fprintf(stderr, "decomp_state:\n"));
+
+ if (is_final)
+ goto finish_state;
+
+ /* Address/len lists */
+ clearp_dde(ddl_in);
+ clearp_dde(ddl_out);
+
+ /* FC, CRC, HistLen, Table 6-6 */
+ if (resuming) {
+ /* Resuming a partially decompressed input.
+ * The key to resume is supplying the 32KB
+ * dictionary (history) to NX, which is basically
+ * the last 32KB of output produced.
+ */
+ fc = GZIP_FC_DECOMPRESS_RESUME;
+
+ cmdp->cpb.in_crc = cmdp->cpb.out_crc;
+ cmdp->cpb.in_adler = cmdp->cpb.out_adler;
+
+ /* Round up the history size to quadword. Section 2.10 */
+ history_len = (history_len + 15) / 16;
+ putnn(cmdp->cpb, in_histlen, history_len);
+ history_len = history_len * 16; /* bytes */
+
+ if (history_len > 0) {
+ /* Chain in the history buffer to the DDE list */
+ if (cur_out >= history_len) {
+ nx_append_dde(ddl_in, fifo_out
+ + (cur_out - history_len),
+ history_len);
+ } else {
+ nx_append_dde(ddl_in, fifo_out
+ + ((fifo_out_len + cur_out)
+ - history_len),
+ history_len - cur_out);
+ /* Up to 32KB history wraps around fifo_out */
+ nx_append_dde(ddl_in, fifo_out, cur_out);
+ }
+
+ }
+ } else {
+ /* First decompress job */
+ fc = GZIP_FC_DECOMPRESS;
+
+ history_len = 0;
+ /* Writing 0 clears out subc as well */
+ cmdp->cpb.in_histlen = 0;
+ total_out = 0;
+
+ put32(cmdp->cpb, in_crc, INIT_CRC);
+ put32(cmdp->cpb, in_adler, INIT_ADLER);
+ put32(cmdp->cpb, out_crc, INIT_CRC);
+ put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+ /* Assuming 10% compression ratio initially; use the
+ * most recently measured compression ratio as a
+ * heuristic to estimate the input and output
+ * sizes. If we give too much input, the target buffer
+ * overflows and NX cycles are wasted, and then we
+ * must retry with smaller input size. 1000 is 100%.
+ */
+ last_comp_ratio = 100UL;
+ }
+ cmdp->crb.gzip_fc = 0;
+ putnn(cmdp->crb, gzip_fc, fc);
+
+ /*
+ * NX source buffers
+ */
+ first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
+ last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
+
+ if (first_used > 0)
+ nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
+
+ if (last_used > 0)
+ nx_append_dde(ddl_in, fifo_in, last_used);
+
+ /*
+ * NX target buffers
+ */
+ first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
+ last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
+
+ /* Reduce output free space amount not to overwrite the history */
+ int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
+ - (1<<16));
+
+ NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
+ target_max));
+
+ first_free = NX_MIN(target_max, first_free);
+ if (first_free > 0) {
+ first_offset = fifo_free_first_offset(cur_out, used_out);
+ nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
+ }
+
+ if (last_free > 0) {
+ last_free = NX_MIN(target_max - first_free, last_free);
+ if (last_free > 0) {
+ last_offset = fifo_free_last_offset(cur_out, used_out,
+ fifo_out_len);
+ nx_append_dde(ddl_out, fifo_out + last_offset,
+ last_free);
+ }
+ }
+
+ /* Target buffer size is used to limit the source data size
+ * based on previous measurements of compression ratio.
+ */
+
+ /* source_sz includes history */
+ source_sz = getp32(ddl_in, ddebc);
+ assert(source_sz > history_len);
+ source_sz = source_sz - history_len;
+
+ /* Estimating how much source is needed to 3/4 fill a
+ * target_max size target buffer. If we overshoot, then NX
+ * must repeat the job with smaller input and we waste
+ * bandwidth. If we undershoot then we use more NX calls than
+ * necessary.
+ */
+
+ source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
+ / 4000;
+
+ if (source_sz_estimate < source_sz) {
+ /* Target might be small, therefore limiting the
+ * source data.
+ */
+ source_sz = source_sz_estimate;
+ target_sz_estimate = target_max;
+ } else {
+ /* Source file might be small, therefore limiting target
+ * touch pages to a smaller value to save processor cycles.
+ */
+ target_sz_estimate = ((uint64_t)source_sz * 1000UL)
+ / (last_comp_ratio + 1);
+ target_sz_estimate = NX_MIN(2 * target_sz_estimate,
+ target_max);
+ }
+
+ source_sz = source_sz + history_len;
+
+ /* Some NX condition codes require submitting the NX job again.
+ * Kernel doesn't handle NX page faults. Expects user code to
+ * touch pages.
+ */
+ pgfault_retries = NX_MAX_FAULTS;
+
+restart_nx:
+
+ putp32(ddl_in, ddebc, source_sz);
+
+ /* Fault in pages */
+ nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), page_sz, 1);
+ nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
+ nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
+
+ /* Send job to NX */
+ cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
+
+ switch (cc) {
+
+ case ERR_NX_TRANSLATION:
+
+ /* We touched the pages ahead of time. In the most common case
+ * we shouldn't be here. But may be some pages were paged out.
+ * Kernel should have placed the faulting address to fsaddr.
+ */
+ NXPRT(fprintf(stderr, "ERR_NX_TRANSLATION %p\n",
+ (void *)cmdp->crb.csb.fsaddr));
+
+ if (pgfault_retries == NX_MAX_FAULTS) {
+ /* Try once with exact number of pages */
+ --pgfault_retries;
+ goto restart_nx;
+ } else if (pgfault_retries > 0) {
+ /* If still faulting try fewer input pages
+ * assuming memory outage
+ */
+ if (source_sz > page_sz)
+ source_sz = NX_MAX(source_sz / 2, page_sz);
+ --pgfault_retries;
+ goto restart_nx;
+ } else {
+ fprintf(stderr, "cannot make progress; too many ");
+ fprintf(stderr, "page fault retries cc= %d\n", cc);
+ rc = -1;
+ goto err5;
+ }
+
+ case ERR_NX_DATA_LENGTH:
+
+ NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
+ NXPRT(fprintf(stderr, "stream may have trailing data\n"));
+
+ /* Not an error in the most common case; it just says
+ * there is trailing data that we must examine.
+ *
+ * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
+ * Fig.6-7 and Table 6-8.
+ */
+ nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
+
+ if (!csb_ce_termination(nx_ce) &&
+ csb_ce_partial_completion(nx_ce)) {
+ /* Check CPB for more information
+ * spbc and tpbc are valid
+ */
+ sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
+ subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
+ spbc = get32(cmdp->cpb, out_spbc_decomp);
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ assert(target_max >= tpbc);
+
+ goto ok_cc3; /* not an error */
+ } else {
+ /* History length error when CE(1)=1 CE(0)=0. */
+ rc = -1;
+ fprintf(stderr, "history length error cc= %d\n", cc);
+ goto err5;
+ }
+
+ case ERR_NX_TARGET_SPACE:
+
+ /* Target buffer not large enough; retry smaller input
+ * data; give at least 1 byte. SPBC/TPBC are not valid.
+ */
+ assert(source_sz > history_len);
+ source_sz = ((source_sz - history_len + 2) / 2) + history_len;
+ NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
+ NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
+ source_sz, history_len));
+ goto restart_nx;
+
+ case ERR_NX_OK:
+
+ /* This should not happen for gzip formatted data;
+ * we need trailing crc and isize
+ */
+ fprintf(stderr, "ERR_NX_OK\n");
+ spbc = get32(cmdp->cpb, out_spbc_decomp);
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ assert(target_max >= tpbc);
+ assert(spbc >= history_len);
+ source_sz = spbc - history_len;
+ goto offsets_state;
+
+ default:
+ fprintf(stderr, "error: cc= %d\n", cc);
+ rc = -1;
+ goto err5;
+ }
+
+ok_cc3:
+
+ NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
+
+ assert(spbc > history_len);
+ source_sz = spbc - history_len;
+
+ /* Table 6-4: Source Final Block Type (SFBT) describes the
+ * last processed deflate block and clues the software how to
+ * resume the next job. SUBC indicates how many input bits NX
+ * consumed but did not process. SPBC indicates how many
+ * bytes of source were given to the accelerator including
+ * history bytes.
+ */
+
+ switch (sfbt) {
+ int dhtlen;
+
+ case 0x0: /* Deflate final EOB received */
+
+ /* Calculating the checksum start position. */
+
+ source_sz = source_sz - subc / 8;
+ is_final = 1;
+ break;
+
+ /* Resume decompression cases are below. Basically
+ * indicates where NX has suspended and how to resume
+ * the input stream.
+ */
+
+ case 0x8: /* Within a literal block; use rembytecount */
+ case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
+
+ /* Supply the partially processed source byte again */
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* SUBC LS 3bits: number of bits in the first source byte need
+ * to be processed.
+ * 000 means all 8 bits; Table 6-3
+ * Clear subc, histlen, sfbt, rembytecnt, dhtlen
+ */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+ putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
+ out_rembytecnt));
+ break;
+
+ case 0xA: /* Within a FH block; */
+ case 0xB: /* Within a FH block; bfinal=1 */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+ break;
+
+ case 0xC: /* Within a DH block; */
+ case 0xD: /* Within a DH block; bfinal=1 */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+
+ dhtlen = getnn(cmdp->cpb, out_dhtlen);
+ putnn(cmdp->cpb, in_dhtlen, dhtlen);
+ assert(dhtlen >= 42);
+
+ /* Round up to a qword */
+ dhtlen = (dhtlen + 127) / 128;
+
+ while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
+ --dhtlen;
+ cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
+ }
+ break;
+
+ case 0xE: /* Within a block header; bfinal=0; */
+ /* Also given if source data exactly ends (SUBC=0) with
+ * EOB code with BFINAL=0. Means the next byte will
+ * contain a block header.
+ */
+ case 0xF: /* within a block header with BFINAL=1. */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+
+ /* Engine did not process any data */
+ if (is_eof && (source_sz == 0))
+ is_final = 1;
+ }
+
+offsets_state:
+
+ /* Adjust the source and target buffer offsets and lengths */
+
+ NXPRT(fprintf(stderr, "offsets_state:\n"));
+
+ /* Delete input data from fifo_in */
+ used_in = used_in - source_sz;
+ cur_in = (cur_in + source_sz) % fifo_in_len;
+ input_file_offset = input_file_offset + source_sz;
+
+ /* Add output data to fifo_out */
+ used_out = used_out + tpbc;
+
+ assert(used_out <= fifo_out_len);
+
+ total_out = total_out + tpbc;
+
+ /* Deflate history is 32KB max. No need to supply more
+ * than 32KB on a resume.
+ */
+ history_len = (total_out > window_max) ? window_max : total_out;
+
+ /* To estimate expected expansion in the next NX job; 500 means 50%.
+ * Deflate best case is around 1 to 1000.
+ */
+ last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
+ / ((uint64_t)tpbc + 1);
+ last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
+ NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
+ last_comp_ratio, source_sz, spbc, tpbc));
+
+ resuming = 1;
+
+finish_state:
+
+ NXPRT(fprintf(stderr, "finish_state:\n"));
+
+ if (is_final) {
+ if (used_out)
+ goto write_state; /* More data to write out */
+ else if (used_in < 8) {
+ /* Need at least 8 more bytes containing gzip crc
+ * and isize.
+ */
+ rc = -1;
+ goto err4;
+ } else {
+ /* Compare checksums and exit */
+ int i;
+ unsigned char tail[8];
+ uint32_t cksum, isize;
+
+ for (i = 0; i < 8; i++)
+ tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
+ fprintf(stderr, "computed checksum %08x isize %08x\n",
+ cmdp->cpb.out_crc, (uint32_t) (total_out
+ % (1ULL<<32)));
+ cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
+ | (uint32_t) tail[2]<<16
+ | (uint32_t) tail[3]<<24);
+ isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
+ | (uint32_t) tail[6]<<16
+ | (uint32_t) tail[7]<<24);
+ fprintf(stderr, "stored checksum %08x isize %08x\n",
+ cksum, isize);
+
+ if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
+ (total_out % (1ULL<<32))) {
+ rc = 0; goto ok1;
+ } else {
+ rc = -1; goto err4;
+ }
+ }
+ } else
+ goto read_state;
+
+ return -1;
+
+err1:
+ fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
+ expect, c);
+ return -1;
+
+err2:
+ fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
+ return -1;
+
+err3:
+ fprintf(stderr, "error: gzip header\n");
+ return -1;
+
+err4:
+ fprintf(stderr, "error: checksum missing or mismatch\n");
+
+err5:
+ok1:
+ fprintf(stderr, "decomp is complete: fclose\n");
+ fclose(outf);
+
+ return rc;
+}
+
+
+int main(int argc, char **argv)
+{
+ int rc;
+ struct sigaction act;
+ void *handle;
+
+ nx_dbg = 0;
+ nx_gzip_log = NULL;
+ act.sa_handler = 0;
+ act.sa_sigaction = nxu_sigsegv_handler;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+
+ handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+ if (!handle) {
+ fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+ exit(-1);
+ }
+
+ rc = decompress_file(argc, argv, handle);
+
+ nx_function_end(handle);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
index 18949237a001..8d336cdf75c0 100755
--- a/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
+++ b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
@@ -25,6 +25,7 @@ function test_sizes
echo "Testing $size ($n) ..."
dd if=/dev/urandom of=$fname bs=$size count=1
./gzfht_test $fname
+ ./gunz_test ${fname}.nx.gz
done
}

--
2.21.0

2020-04-20 20:56:55

by Raphael Moreira Zinsly

[permalink] [raw]
Subject: [PATCH V4 1/5] selftests/powerpc: Add header files for GZIP engine test

Add files to access the powerpc NX-GZIP engine in user space.

Signed-off-by: Bulent Abali <[email protected]>
Signed-off-by: Raphael Moreira Zinsly <[email protected]>
---
.../selftests/powerpc/nx-gzip/include/crb.h | 155 ++++++++++++++++++
.../selftests/powerpc/nx-gzip/include/nx.h | 38 +++++
.../powerpc/nx-gzip/include/vas-api.h | 1 +
3 files changed, 194 insertions(+)
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/include/crb.h
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/include/nx.h
create mode 120000 tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h

diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/crb.h b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
new file mode 100644
index 000000000000..ab101085fa7e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __CRB_H
+#define __CRB_H
+#include <linux/types.h>
+#include "nx.h"
+
+/* CCW 842 CI/FC masks
+ * NX P8 workbook, section 4.3.1, figure 4-6
+ * "CI/FC Boundary by NX CT type"
+ */
+#define CCW_CI_842 (0x00003ff8)
+#define CCW_FC_842 (0x00000007)
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE (0x3fffffffffffffff)
+#define CCB_ADDRESS (0xfffffffffffffff8)
+#define CCB_CM (0x0000000000000007)
+#define CCB_CM0 (0x0000000000000004)
+#define CCB_CM12 (0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS (0x0)
+#define CCB_CM0_LAST_IN_CHAIN (0x4)
+#define CCB_CM12_STORE (0x0)
+#define CCB_CM12_INTERRUPT (0x1)
+
+#define CCB_SIZE (0x10)
+#define CCB_ALIGN CCB_SIZE
+
+struct coprocessor_completion_block {
+ __be64 value;
+ __be64 address;
+} __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V (0x80)
+#define CSB_F (0x04)
+#define CSB_CH (0x03)
+#define CSB_CE_INCOMPLETE (0x80)
+#define CSB_CE_TERMINATION (0x40)
+#define CSB_CE_TPBC (0x20)
+
+#define CSB_CC_SUCCESS (0)
+#define CSB_CC_INVALID_ALIGN (1)
+#define CSB_CC_OPERAND_OVERLAP (2)
+#define CSB_CC_DATA_LENGTH (3)
+#define CSB_CC_TRANSLATION (5)
+#define CSB_CC_PROTECTION (6)
+#define CSB_CC_RD_EXTERNAL (7)
+#define CSB_CC_INVALID_OPERAND (8)
+#define CSB_CC_PRIVILEGE (9)
+#define CSB_CC_INTERNAL (10)
+#define CSB_CC_WR_EXTERNAL (12)
+#define CSB_CC_NOSPC (13)
+#define CSB_CC_EXCESSIVE_DDE (14)
+#define CSB_CC_WR_TRANSLATION (15)
+#define CSB_CC_WR_PROTECTION (16)
+#define CSB_CC_UNKNOWN_CODE (17)
+#define CSB_CC_ABORT (18)
+#define CSB_CC_TRANSPORT (20)
+#define CSB_CC_SEGMENTED_DDL (31)
+#define CSB_CC_PROGRESS_POINT (32)
+#define CSB_CC_DDE_OVERFLOW (33)
+#define CSB_CC_SESSION (34)
+#define CSB_CC_PROVISION (36)
+#define CSB_CC_CHAIN (37)
+#define CSB_CC_SEQUENCE (38)
+#define CSB_CC_HW (39)
+
+#define CSB_SIZE (0x10)
+#define CSB_ALIGN CSB_SIZE
+
+struct coprocessor_status_block {
+ __u8 flags;
+ __u8 cs;
+ __u8 cc;
+ __u8 ce;
+ __be32 count;
+ __be64 address;
+} __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P (0x8000)
+
+#define DDE_SIZE (0x10)
+#define DDE_ALIGN DDE_SIZE
+
+struct data_descriptor_entry {
+ __be16 flags;
+ __u8 count;
+ __u8 index;
+ __be32 length;
+ __be64 address;
+} __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE (0x80)
+#define CRB_ALIGN (0x100) /* Errata: requires 256 alignment */
+
+
+/* Coprocessor Status Block field
+ * ADDRESS address of CSB
+ * C CCB is valid
+ * AT 0 = addrs are virtual, 1 = addrs are phys
+ * M enable perf monitor
+ */
+#define CRB_CSB_ADDRESS (0xfffffffffffffff0)
+#define CRB_CSB_C (0x0000000000000008)
+#define CRB_CSB_AT (0x0000000000000002)
+#define CRB_CSB_M (0x0000000000000001)
+
+struct coprocessor_request_block {
+ __be32 ccw;
+ __be32 flags;
+ __be64 csb_addr;
+
+ struct data_descriptor_entry source;
+ struct data_descriptor_entry target;
+
+ struct coprocessor_completion_block ccb;
+
+ __u8 reserved[48];
+
+ struct coprocessor_status_block csb;
+} __aligned(CRB_ALIGN);
+
+#define crb_csb_addr(c) __be64_to_cpu(c->csb_addr)
+#define crb_nx_fault_addr(c) __be64_to_cpu(c->stamp.nx.fault_storage_addr)
+#define crb_nx_flags(c) c->stamp.nx.flags
+#define crb_nx_fault_status(c) c->stamp.nx.fault_status
+#define crb_nx_pswid(c) c->stamp.nx.pswid
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS (0xff000000)
+#define CCW_CT (0x00ff0000)
+#define CCW_CD (0x0000ffff)
+#define CCW_CL (0x0000c000)
+
+#endif
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
new file mode 100644
index 000000000000..78171b43464d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ */
+#ifndef _NX_H
+#define _NX_H
+
+#include <stdbool.h>
+
+#define NX_FUNC_COMP_842 1
+#define NX_FUNC_COMP_GZIP 2
+
+#ifndef __aligned
+#define __aligned(x) __attribute__((aligned(x)))
+#endif
+
+struct nx842_func_args {
+ bool use_crc;
+ bool decompress; /* true decompress; false compress */
+ bool move_data;
+ int timeout; /* seconds */
+};
+
+struct nxbuf_t {
+ int len;
+ char *buf;
+};
+
+/* @function should be EFT (aka 842), GZIP etc */
+void *nx_function_begin(int function, int pri);
+
+int nx_function(void *handle, struct nxbuf_t *in, struct nxbuf_t *out,
+ void *arg);
+
+int nx_function_end(void *handle);
+
+#endif /* _NX_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
new file mode 120000
index 000000000000..77fb4c7236d0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/uapi/asm/vas-api.h
\ No newline at end of file
--
2.21.0

2020-04-20 20:57:01

by Raphael Moreira Zinsly

[permalink] [raw]
Subject: [PATCH V4 5/5] selftests/powerpc: Add README for GZIP engine tests

Include a README file with the instructions to use the
testcases at selftests/powerpc/nx-gzip.

Signed-off-by: Bulent Abali <[email protected]>
Signed-off-by: Raphael Moreira Zinsly <[email protected]>
---
.../powerpc/nx-gzip/99-nx-gzip.rules | 1 +
.../testing/selftests/powerpc/nx-gzip/README | 45 +++++++++++++++++++
2 files changed, 46 insertions(+)
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/README

diff --git a/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
new file mode 100644
index 000000000000..5a7118495cb3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
@@ -0,0 +1 @@
+SUBSYSTEM=="nxgzip", KERNEL=="nx-gzip", MODE="0666"
diff --git a/tools/testing/selftests/powerpc/nx-gzip/README b/tools/testing/selftests/powerpc/nx-gzip/README
new file mode 100644
index 000000000000..9809dbaa1905
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/README
@@ -0,0 +1,45 @@
+Test the nx-gzip function:
+=========================
+
+Verify that following device exists:
+ /dev/crypto/nx-gzip
+If you get a permission error run as sudo or set the device permissions:
+ sudo chmod go+rw /dev/crypto/nx-gzip
+However, chmod may not survive across boots. You may create a udev file such
+as:
+ /etc/udev/rules.d/99-nx-gzip.rules
+
+
+To manually build and run:
+$ gcc -O3 -I./include -o gzfht_test gzfht_test.c gzip_vas.c
+$ gcc -O3 -I./include -o gunz_test gunz_test.c gzip_vas.c
+
+
+Compress any file using Fixed Huffman mode. Output will have a .nx.gz suffix:
+$ ./gzfht_test gzip_vas.c
+file gzip_vas.c read, 6413 bytes
+compressed 6413 to 3124 bytes total, crc32 checksum = abd15e8a
+
+
+Uncompress the previous output. Output will have a .nx.gunzip suffix:
+./gunz_test gzip_vas.c.nx.gz
+gzHeader FLG 0
+00 00 00 00 04 03
+gzHeader MTIME, XFL, OS ignored
+computed checksum abd15e8a isize 0000190d
+stored checksum abd15e8a isize 0000190d
+decomp is complete: fclose
+
+
+Compare two files:
+$ sha1sum gzip_vas.c.nx.gz.nx.gunzip gzip_vas.c
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6 gzip_vas.c.nx.gz.nx.gunzip
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6 gzip_vas.c
+
+
+Note that the code here are intended for testing the nx-gzip hardware function.
+They are not intended for demonstrating performance or compression ratio.
+By being simplistic these selftests expect to allocate the entire set of source
+and target pages in the memory so it needs enough memory to work.
+For more information and source code consider using:
+https://github.com/libnxz/power-gzip
--
2.21.0

2020-04-20 20:57:47

by Raphael Moreira Zinsly

[permalink] [raw]
Subject: [PATCH V4 2/5] selftests/powerpc: Add header files for NX compresion/decompression

Add files to be able to compress and decompress files using the
powerpc NX-GZIP engine.

Signed-off-by: Bulent Abali <[email protected]>
Signed-off-by: Raphael Moreira Zinsly <[email protected]>
---
.../powerpc/nx-gzip/include/copy-paste.h | 56 ++
.../powerpc/nx-gzip/include/nx_dbg.h | 95 +++
.../selftests/powerpc/nx-gzip/include/nxu.h | 650 ++++++++++++++++++
3 files changed, 801 insertions(+)
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
create mode 100644 tools/testing/selftests/powerpc/nx-gzip/include/nxu.h

diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
new file mode 100644
index 000000000000..0db2d6485037
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* From asm-compat.h */
+#define __stringify_in_c(...) #__VA_ARGS__
+#define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+
+/*
+ * Macros taken from arch/powerpc/include/asm/ppc-opcode.h and other
+ * header files.
+ */
+#define ___PPC_RA(a) (((a) & 0x1f) << 16)
+#define ___PPC_RB(b) (((b) & 0x1f) << 11)
+
+#define PPC_INST_COPY 0x7c20060c
+#define PPC_INST_PASTE 0x7c20070d
+
+#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \
+ ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \
+ ___PPC_RA(a) | ___PPC_RB(b))
+#define CR0_SHIFT 28
+#define CR0_MASK 0xF
+/*
+ * Copy/paste instructions:
+ *
+ * copy RA,RB
+ * Copy contents of address (RA) + effective_address(RB)
+ * to internal copy-buffer.
+ *
+ * paste RA,RB
+ * Paste contents of internal copy-buffer to the address
+ * (RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+ asm volatile(PPC_COPY(%0, %1)";"
+ :
+ : "b" (offset), "b" (crb)
+ : "memory");
+
+ return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+ __u32 cr;
+
+ cr = 0;
+ asm volatile(PPC_PASTE(%1, %2)";"
+ "mfocrf %0, 0x80;"
+ : "=r" (cr)
+ : "b" (offset), "b" (paste_address)
+ : "memory", "cr0");
+
+ return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
new file mode 100644
index 000000000000..f2c0eee2317e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright 2020 IBM Corporation
+ *
+ */
+
+#ifndef _NXU_DBG_H_
+#define _NXU_DBG_H_
+
+#include <sys/file.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <pthread.h>
+
+extern FILE * nx_gzip_log;
+extern int nx_gzip_trace;
+extern unsigned int nx_gzip_inflate_impl;
+extern unsigned int nx_gzip_deflate_impl;
+extern unsigned int nx_gzip_inflate_flags;
+extern unsigned int nx_gzip_deflate_flags;
+
+extern int nx_dbg;
+pthread_mutex_t mutex_log;
+
+#define nx_gzip_trace_enabled() (nx_gzip_trace & 0x1)
+#define nx_gzip_hw_trace_enabled() (nx_gzip_trace & 0x2)
+#define nx_gzip_sw_trace_enabled() (nx_gzip_trace & 0x4)
+#define nx_gzip_gather_statistics() (nx_gzip_trace & 0x8)
+#define nx_gzip_per_stream_stat() (nx_gzip_trace & 0x10)
+
+#define prt(fmt, ...) do { \
+ pthread_mutex_lock(&mutex_log); \
+ flock(nx_gzip_log->_fileno, LOCK_EX); \
+ time_t t; struct tm *m; time(&t); m = localtime(&t); \
+ fprintf(nx_gzip_log, "[%04d/%02d/%02d %02d:%02d:%02d] " \
+ "pid %d: " fmt, \
+ (int)m->tm_year + 1900, (int)m->tm_mon+1, (int)m->tm_mday, \
+ (int)m->tm_hour, (int)m->tm_min, (int)m->tm_sec, \
+ (int)getpid(), ## __VA_ARGS__); \
+ fflush(nx_gzip_log); \
+ flock(nx_gzip_log->_fileno, LOCK_UN); \
+ pthread_mutex_unlock(&mutex_log); \
+} while (0)
+
+/* Use in case of an error */
+#define prt_err(fmt, ...) do { if (nx_dbg >= 0) { \
+ prt("%s:%u: Error: "fmt, \
+ __FILE__, __LINE__, ## __VA_ARGS__); \
+}} while (0)
+
+/* Use in case of an warning */
+#define prt_warn(fmt, ...) do { if (nx_dbg >= 1) { \
+ prt("%s:%u: Warning: "fmt, \
+ __FILE__, __LINE__, ## __VA_ARGS__); \
+}} while (0)
+
+/* Informational printouts */
+#define prt_info(fmt, ...) do { if (nx_dbg >= 2) { \
+ prt("Info: "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace zlib wrapper code */
+#define prt_trace(fmt, ...) do { if (nx_gzip_trace_enabled()) { \
+ prt("### "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace statistics */
+#define prt_stat(fmt, ...) do { if (nx_gzip_gather_statistics()) { \
+ prt("### "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace zlib hardware implementation */
+#define hw_trace(fmt, ...) do { \
+ if (nx_gzip_hw_trace_enabled()) \
+ fprintf(nx_gzip_log, "hhh " fmt, ## __VA_ARGS__); \
+ } while (0)
+
+/* Trace zlib software implementation */
+#define sw_trace(fmt, ...) do { \
+ if (nx_gzip_sw_trace_enabled()) \
+ fprintf(nx_gzip_log, "sss " fmt, ## __VA_ARGS__); \
+ } while (0)
+
+
+/**
+ * str_to_num - Convert string into number and copy with endings like
+ * KiB for kilobyte
+ * MiB for megabyte
+ * GiB for gigabyte
+ */
+uint64_t str_to_num(char *str);
+void nx_lib_debug(int onoff);
+
+#endif /* _NXU_DBG_H_ */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
new file mode 100644
index 000000000000..a9f07367b3e6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
@@ -0,0 +1,650 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Hardware interface of the NX-GZIP compression accelerator
+ *
+ * Copyright (C) IBM Corporation, 2020
+ *
+ * Author: Bulent Abali <[email protected]>
+ *
+ */
+
+#ifndef _NXU_H
+#define _NXU_H
+
+#include <stdint.h>
+#include <endian.h>
+#include "nx.h"
+
+/* deflate */
+#define LLSZ 286
+#define DSZ 30
+
+/* nx */
+#define DHTSZ 18
+#define DHT_MAXSZ 288
+#define MAX_DDE_COUNT 256
+
+/* util */
+#ifdef NXDBG
+#define NXPRT(X) X
+#else
+#define NXPRT(X)
+#endif
+
+#ifdef NXTIMER
+#include <sys/platform/ppc.h>
+#define NX_CLK(X) X
+#define nx_get_time() __ppc_get_timebase()
+#define nx_get_freq() __ppc_get_timebase_freq()
+#else
+#define NX_CLK(X)
+#define nx_get_time() (-1)
+#define nx_get_freq() (-1)
+#endif
+
+#define NX_MAX_FAULTS 500
+
+/*
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+union nx_qw_t {
+ uint32_t word[4];
+ uint64_t dword[2];
+} __aligned(16);
+
+/*
+ * Note: NX registers with fewer than 32 bits are declared by
+ * convention as uint32_t variables in unions. If *_offset and *_mask
+ * are defined for a variable, then use get_ put_ macros to
+ * conveniently access the register fields for endian conversions.
+ */
+
+struct nx_dde_t {
+ /* Data Descriptor Element, Section 6.4 */
+ union {
+ uint32_t dde_count;
+ /* When dde_count == 0 ddead is a pointer to a data buffer;
+ * ddebc is the buffer length bytes.
+ * When dde_count > 0 dde is an indirect dde; ddead is a
+ * pointer to a contiguous list of direct ddes; ddebc is the
+ * total length of all data pointed to by the list of direct
+ * ddes. Note that only one level of indirection is permitted.
+ * See Section 6.4 of the user manual for additional details.
+ */
+ };
+ uint32_t ddebc; /* dde byte count */
+ uint64_t ddead; /* dde address */
+} __aligned(16);
+
+struct nx_csb_t {
+ /* Coprocessor Status Block, Section 6.6 */
+ union {
+ uint32_t csb_v;
+ /* Valid bit. v must be set to 0 by the program
+ * before submitting the coprocessor command.
+ * Software can poll for the v bit
+ */
+
+ uint32_t csb_f;
+ /* 16B CSB size. Written to 0 by DMA when it writes the CPB */
+
+ uint32_t csb_cs;
+ /* cs completion sequence; unused */
+
+ uint32_t csb_cc;
+ /* cc completion code; cc != 0 exception occurred */
+
+ uint32_t csb_ce;
+ /* ce completion extension */
+
+ };
+ uint32_t tpbc;
+ /* target processed byte count TPBC */
+
+ uint64_t fsaddr;
+ /* Section 6.12.1 CSB NonZero error summary. FSA Failing storage
+ * address. Address where error occurred. When available, written
+ * to A field of CSB
+ */
+} __aligned(16);
+
+struct nx_ccb_t {
+ /* Coprocessor Completion Block, Section 6.7 */
+
+ uint32_t reserved[3];
+ union {
+ /* When crb.c==0 (no ccb defined) it is reserved;
+ * When crb.c==1 (ccb defined) it is cm
+ */
+
+ uint32_t ccb_cm;
+ /* Signal interrupt of crb.c==1 and cm==1 */
+
+ uint32_t word;
+ /* generic access to the 32bit word */
+ };
+} __aligned(16);
+
+struct vas_stamped_crb_t {
+ /*
+ * CRB operand of the paste coprocessor instruction is stamped
+ * in quadword 4 with the information shown here as its written
+ * in to the receive FIFO of the coprocessor
+ */
+
+ union {
+ uint32_t vas_buf_num;
+ /* Verification only vas buffer number which correlates to
+ * the low order bits of the atag in the paste command
+ */
+
+ uint32_t send_wc_id;
+ /* Pointer to Send Window Context that provides for NX address
+ * translation information, such as MSR and LPCR bits, job
+ * completion interrupt RA, PSWID, and job utilization counter.
+ */
+
+ };
+ union {
+ uint32_t recv_wc_id;
+ /* Pointer to Receive Window Context. NX uses this to return
+ * credits to a Receive FIFO as entries are dequeued.
+ */
+
+ };
+ uint32_t reserved2;
+ union {
+ uint32_t vas_invalid;
+ /* Invalid bit. If this bit is 1 the CRB is discarded by
+ * NX upon fetching from the receive FIFO. If this bit is 0
+ * the CRB is processed normally. The bit is stamped to 0
+ * by VAS and may be written to 1 by hypervisor while
+ * the CRB is in the receive FIFO (in memory).
+ */
+
+ };
+};
+
+struct nx_stamped_fault_crb_t {
+ /*
+ * A CRB that has a translation fault is stamped by NX in quadword 4
+ * and pasted to the Fault Send Window in VAS.
+ */
+ uint64_t fsa;
+ union {
+ uint32_t nxsf_t;
+ uint32_t nxsf_fs;
+ };
+ uint32_t pswid;
+};
+
+union stamped_crb_t {
+ struct vas_stamped_crb_t vas;
+ struct nx_stamped_fault_crb_t nx;
+};
+
+struct nx_gzip_cpb_t {
+ /*
+ * Coprocessor Parameter Block In/Out are used to pass metadata
+ * to/from accelerator. Tables 6.5 and 6.6 of the user manual.
+ */
+
+ /* CPBInput */
+
+ struct {
+ union {
+ union nx_qw_t qw0;
+ struct {
+ uint32_t in_adler; /* bits 0:31 */
+ uint32_t in_crc; /* bits 32:63 */
+ union {
+ uint32_t in_histlen; /* bits 64:75 */
+ uint32_t in_subc; /* bits 93:95 */
+ };
+ union {
+ /* bits 108:111 */
+ uint32_t in_sfbt;
+ /* bits 112:127 */
+ uint32_t in_rembytecnt;
+ /* bits 116:127 */
+ uint32_t in_dhtlen;
+ };
+ };
+ };
+ union {
+ union nx_qw_t in_dht[DHTSZ]; /* qw[1:18] */
+ char in_dht_char[DHT_MAXSZ]; /* byte access */
+ };
+ union nx_qw_t reserved[5]; /* qw[19:23] */
+ };
+
+ /* CPBOutput */
+
+ volatile struct {
+ union {
+ union nx_qw_t qw24;
+ struct {
+ uint32_t out_adler; /* bits 0:31 qw[24] */
+ uint32_t out_crc; /* bits 32:63 qw[24] */
+ union {
+ /* bits 77:79 qw[24] */
+ uint32_t out_tebc;
+ /* bits 80:95 qw[24] */
+ uint32_t out_subc;
+ };
+ union {
+ /* bits 108:111 qw[24] */
+ uint32_t out_sfbt;
+ /* bits 112:127 qw[24] */
+ uint32_t out_rembytecnt;
+ /* bits 116:127 qw[24] */
+ uint32_t out_dhtlen;
+ };
+ };
+ };
+ union {
+ union nx_qw_t qw25[79]; /* qw[25:103] */
+ /* qw[25] compress no lzcounts or wrap */
+ uint32_t out_spbc_comp_wrap;
+ uint32_t out_spbc_wrap; /* qw[25] wrap */
+ /* qw[25] compress no lzcounts */
+ uint32_t out_spbc_comp;
+ /* 286 LL and 30 D symbol counts */
+ uint32_t out_lzcount[LLSZ+DSZ];
+ struct {
+ union nx_qw_t out_dht[DHTSZ]; /* qw[25:42] */
+ /* qw[43] decompress */
+ uint32_t out_spbc_decomp;
+ };
+ };
+ /* qw[104] compress with lzcounts */
+ uint32_t out_spbc_comp_with_count;
+ };
+} __aligned(128);
+
+struct nx_gzip_crb_t {
+ union { /* byte[0:3] */
+ uint32_t gzip_fc; /* bits[24-31] */
+ };
+ uint32_t reserved1; /* byte[4:7] */
+ union {
+ uint64_t csb_address; /* byte[8:15] */
+ struct {
+ uint32_t reserved2;
+ union {
+ uint32_t crb_c;
+ /* c==0 no ccb defined */
+
+ uint32_t crb_at;
+ /* at==0 address type is ignored;
+ * all addrs effective assumed.
+ */
+
+ };
+ };
+ };
+ struct nx_dde_t source_dde; /* byte[16:31] */
+ struct nx_dde_t target_dde; /* byte[32:47] */
+ volatile struct nx_ccb_t ccb; /* byte[48:63] */
+ volatile union {
+ /* byte[64:239] shift csb by 128 bytes out of the crb; csb was
+ * in crb earlier; JReilly says csb written with partial inject
+ */
+ union nx_qw_t reserved64[11];
+ union stamped_crb_t stamp; /* byte[64:79] */
+ };
+ volatile struct nx_csb_t csb;
+} __aligned(128);
+
+struct nx_gzip_crb_cpb_t {
+ struct nx_gzip_crb_t crb;
+ struct nx_gzip_cpb_t cpb;
+} __aligned(2048);
+
+
+/*
+ * NX hardware convention has the msb bit on the left numbered 0.
+ * The defines below has *_offset defined as the right most bit
+ * position of a field. x of size_mask(x) is the field width in bits.
+ */
+
+#define size_mask(x) ((1U<<(x))-1)
+
+/*
+ * Offsets and Widths within the containing 32 bits of the various NX
+ * gzip hardware registers. Use the getnn/putnn macros to access
+ * these regs
+ */
+
+#define dde_count_mask size_mask(8)
+#define dde_count_offset 23
+
+/* CSB */
+
+#define csb_v_mask size_mask(1)
+#define csb_v_offset 0
+#define csb_f_mask size_mask(1)
+#define csb_f_offset 6
+#define csb_cs_mask size_mask(8)
+#define csb_cs_offset 15
+#define csb_cc_mask size_mask(8)
+#define csb_cc_offset 23
+#define csb_ce_mask size_mask(8)
+#define csb_ce_offset 31
+
+/* CCB */
+
+#define ccb_cm_mask size_mask(3)
+#define ccb_cm_offset 31
+
+/* VAS stamped CRB fields */
+
+#define vas_buf_num_mask size_mask(6)
+#define vas_buf_num_offset 5
+#define send_wc_id_mask size_mask(16)
+#define send_wc_id_offset 31
+#define recv_wc_id_mask size_mask(16)
+#define recv_wc_id_offset 31
+#define vas_invalid_mask size_mask(1)
+#define vas_invalid_offset 31
+
+/* NX stamped fault CRB fields */
+
+#define nxsf_t_mask size_mask(1)
+#define nxsf_t_offset 23
+#define nxsf_fs_mask size_mask(8)
+#define nxsf_fs_offset 31
+
+/* CPB input */
+
+#define in_histlen_mask size_mask(12)
+#define in_histlen_offset 11
+#define in_dhtlen_mask size_mask(12)
+#define in_dhtlen_offset 31
+#define in_subc_mask size_mask(3)
+#define in_subc_offset 31
+#define in_sfbt_mask size_mask(4)
+#define in_sfbt_offset 15
+#define in_rembytecnt_mask size_mask(16)
+#define in_rembytecnt_offset 31
+
+/* CPB output */
+
+#define out_tebc_mask size_mask(3)
+#define out_tebc_offset 15
+#define out_subc_mask size_mask(16)
+#define out_subc_offset 31
+#define out_sfbt_mask size_mask(4)
+#define out_sfbt_offset 15
+#define out_rembytecnt_mask size_mask(16)
+#define out_rembytecnt_offset 31
+#define out_dhtlen_mask size_mask(12)
+#define out_dhtlen_offset 31
+
+/* CRB */
+
+#define gzip_fc_mask size_mask(8)
+#define gzip_fc_offset 31
+#define crb_c_mask size_mask(1)
+#define crb_c_offset 28
+#define crb_at_mask size_mask(1)
+#define crb_at_offset 30
+#define csb_address_mask ~(15UL) /* mask off bottom 4b */
+
+/*
+ * Access macros for the registers. Do not access registers directly
+ * because of the endian conversion. P9 processor may run either as
+ * Little or Big endian. However the NX coprocessor regs are always
+ * big endian.
+ * Use the 32 and 64b macros to access respective
+ * register sizes.
+ * Use nn forms for the register fields shorter than 32 bits.
+ */
+
+#define getnn(ST, REG) ((be32toh(ST.REG) >> (31-REG##_offset)) \
+ & REG##_mask)
+#define getpnn(ST, REG) ((be32toh((ST)->REG) >> (31-REG##_offset)) \
+ & REG##_mask)
+#define get32(ST, REG) (be32toh(ST.REG))
+#define getp32(ST, REG) (be32toh((ST)->REG))
+#define get64(ST, REG) (be64toh(ST.REG))
+#define getp64(ST, REG) (be64toh((ST)->REG))
+
+#define unget32(ST, REG) (get32(ST, REG) & ~((REG##_mask) \
+ << (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define ungetp32(ST, REG) (getp32(ST, REG) & ~((REG##_mask) \
+ << (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define clear_regs(ST) memset((void *)(&(ST)), 0, sizeof(ST))
+#define clear_dde(ST) do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
+ } while (0)
+#define clearp_dde(ST) do { (ST)->dde_count = (ST)->ddebc = 0; \
+ (ST)->ddead = 0; \
+ } while (0)
+#define clear_struct(ST) memset((void *)(&(ST)), 0, sizeof(ST))
+#define putnn(ST, REG, X) (ST.REG = htobe32(unget32(ST, REG) | (((X) \
+ & REG##_mask) << (31-REG##_offset))))
+#define putpnn(ST, REG, X) ((ST)->REG = htobe32(ungetp32(ST, REG) \
+ | (((X) & REG##_mask) << (31-REG##_offset))))
+
+#define put32(ST, REG, X) (ST.REG = htobe32(X))
+#define putp32(ST, REG, X) ((ST)->REG = htobe32(X))
+#define put64(ST, REG, X) (ST.REG = htobe64(X))
+#define putp64(ST, REG, X) ((ST)->REG = htobe64(X))
+
+/*
+ * Completion extension ce(0) ce(1) ce(2). Bits ce(3-7)
+ * unused. Section 6.6 Figure 6.7.
+ */
+
+#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
+#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
+#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5))
+
+#define CSB_CE_PARTIAL 0x4
+#define CSB_CE_TERMINATE 0x2
+#define CSB_CE_TPBC_VALID 0x1
+
+#define csb_ce_termination(X) (!!((X) & CSB_CE_TERMINATE))
+/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
+
+#define csb_ce_check_completion(X) (!csb_ce_termination(X))
+/* if not terminated then check full or partial completion */
+
+#define csb_ce_partial_completion(X) (!!((X) & CSB_CE_PARTIAL))
+#define csb_ce_full_completion(X) (!csb_ce_partial_completion(X))
+#define csb_ce_tpbc_valid(X) (!!((X) & CSB_CE_TPBC_VALID))
+/* TPBC indicates successfully stored data count */
+
+#define csb_ce_default_err(X) csb_ce_termination(X)
+/* most error CEs have CE(0)=0 and CE(1)=1 */
+
+#define csb_ce_cc3_partial(X) csb_ce_partial_completion(X)
+/* some CC=3 are partially completed, Table 6-8 */
+
+#define csb_ce_cc64(X) ((X)&(CSB_CE_PARTIAL \
+ | CSB_CE_TERMINATE) == 0)
+/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
+ * compress smaller than source.
+ */
+
+/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
+
+#define SFBT_BFINAL 0x1
+#define SFBT_LIT 0x4
+#define SFBT_FHT 0x5
+#define SFBT_DHT 0x6
+#define SFBT_HDR 0x7
+
+/*
+ * NX gzip function codes. Table 6.2.
+ * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
+ * select one of the two Byte Count Limits.
+ */
+
+#define GZIP_FC_LIMIT_MASK 0x01
+#define GZIP_FC_COMPRESS_FHT 0x00
+#define GZIP_FC_COMPRESS_DHT 0x02
+#define GZIP_FC_COMPRESS_FHT_COUNT 0x04
+#define GZIP_FC_COMPRESS_DHT_COUNT 0x06
+#define GZIP_FC_COMPRESS_RESUME_FHT 0x08
+#define GZIP_FC_COMPRESS_RESUME_DHT 0x0a
+#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT 0x0c
+#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT 0x0e
+#define GZIP_FC_DECOMPRESS 0x10
+#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND 0x12
+#define GZIP_FC_DECOMPRESS_RESUME 0x14
+#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND 0x16
+#define GZIP_FC_WRAP 0x1e
+
+#define fc_is_compress(fc) (((fc) & 0x10) == 0)
+#define fc_has_count(fc) (fc_is_compress(fc) && (((fc) & 0x4) != 0))
+
+/* CSB.CC Error codes */
+
+#define ERR_NX_OK 0
+#define ERR_NX_ALIGNMENT 1
+#define ERR_NX_OPOVERLAP 2
+#define ERR_NX_DATA_LENGTH 3
+#define ERR_NX_TRANSLATION 5
+#define ERR_NX_PROTECTION 6
+#define ERR_NX_EXTERNAL_UE7 7
+#define ERR_NX_INVALID_OP 8
+#define ERR_NX_PRIVILEGE 9
+#define ERR_NX_INTERNAL_UE 10
+#define ERR_NX_EXTERN_UE_WR 12
+#define ERR_NX_TARGET_SPACE 13
+#define ERR_NX_EXCESSIVE_DDE 14
+#define ERR_NX_TRANSL_WR 15
+#define ERR_NX_PROTECT_WR 16
+#define ERR_NX_SUBFUNCTION 17
+#define ERR_NX_FUNC_ABORT 18
+#define ERR_NX_BYTE_MAX 19
+#define ERR_NX_CORRUPT_CRB 20
+#define ERR_NX_INVALID_CRB 21
+#define ERR_NX_INVALID_DDE 30
+#define ERR_NX_SEGMENTED_DDL 31
+#define ERR_NX_DDE_OVERFLOW 33
+#define ERR_NX_TPBC_GT_SPBC 64
+#define ERR_NX_MISSING_CODE 66
+#define ERR_NX_INVALID_DIST 67
+#define ERR_NX_INVALID_DHT 68
+#define ERR_NX_EXTERNAL_UE90 90
+#define ERR_NX_WDOG_TIMER 224
+#define ERR_NX_AT_FAULT 250
+#define ERR_NX_INTR_SERVER 252
+#define ERR_NX_UE253 253
+#define ERR_NX_NO_HW 254
+#define ERR_NX_HUNG_OP 255
+#define ERR_NX_END 256
+
+/* initial values for non-resume operations */
+#define INIT_CRC 0 /* crc32(0L, Z_NULL, 0) */
+#define INIT_ADLER 1 /* adler32(0L, Z_NULL, 0) adler is initialized to 1 */
+
+/* prototypes */
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle);
+
+extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx);
+extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr);
+
+/* caller supplies a print buffer 4*sizeof(crb) */
+
+char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf);
+char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_prt_hex(void *cp, int sz, char *prbuf);
+char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_strerror(int e);
+
+#ifdef NX_SIM
+#include <stdio.h>
+int nx_sim_init(void *ctx);
+int nx_sim_end(void *ctx);
+int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx);
+#endif /* NX_SIM */
+
+/* Deflate stream manipulation */
+
+#define set_final_bit(x) (x |= (unsigned char)1)
+#define clr_final_bit(x) (x &= ~(unsigned char)1)
+
+#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
+ } while (0)
+/* append 10 bits 0000001b 00...... ;
+ * assumes appending starts on a byte boundary; b is the final bit.
+ */
+
+
+#ifdef NX_842
+
+/* 842 Engine */
+
+struct nx_eft_crb_t {
+ union { /* byte[0:3] */
+ uint32_t eft_fc; /* bits[29-31] */
+ };
+ uint32_t reserved1; /* byte[4:7] */
+ union {
+ uint64_t csb_address; /* byte[8:15] */
+ struct {
+ uint32_t reserved2;
+ union {
+ uint32_t crb_c;
+ /* c==0 no ccb defined */
+
+ uint32_t crb_at;
+ /* at==0 address type is ignored;
+ * all addrs effective assumed.
+ */
+
+ };
+ };
+ };
+ struct nx_dde_t source_dde; /* byte[16:31] */
+ struct nx_dde_t target_dde; /* byte[32:47] */
+ struct nx_ccb_t ccb; /* byte[48:63] */
+ union {
+ union nx_qw_t reserved64[3]; /* byte[64:96] */
+ };
+ struct nx_csb_t csb;
+} __aligned(128);
+
+/* 842 CRB */
+
+#define EFT_FC_MASK size_mask(3)
+#define EFT_FC_OFFSET 31
+#define EFT_FC_COMPRESS 0x0
+#define EFT_FC_COMPRESS_WITH_CRC 0x1
+#define EFT_FC_DECOMPRESS 0x2
+#define EFT_FC_DECOMPRESS_WITH_CRC 0x3
+#define EFT_FC_BLK_DATA_MOVE 0x4
+#endif /* NX_842 */
+
+#endif /* _NXU_H */
--
2.21.0

2020-04-21 14:10:57

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH V4 1/5] selftests/powerpc: Add header files for GZIP engine test

On Mon, 2020-04-20 at 20:55:34 UTC, Raphael Moreira Zinsly wrote:
> Add files to access the powerpc NX-GZIP engine in user space.
>
> Signed-off-by: Bulent Abali <[email protected]>
> Signed-off-by: Raphael Moreira Zinsly <[email protected]>

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/d53979b589609d87036d8daf9500f7eccb0c6317

cheers