Hi,
Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
device, as requested by Arnd Bergmann.
The MTD-based PS3 Video RAM Storage Driver was integrated into the mainline
kernel in 2.6.29-rc1.
Ideally, we think it would be best if the existing MTD-based ps3vram driver
would be replaced by the new block-based ps3vram driver before 2.6.29 is
released. This would relieve the burden of supporting two different swap space
schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
maintainer's shoulders, as in that case there would never have been a stable
kernel version containing the MTD-based ps3vram driver.
What do you think? If this is accepted, I'll submit a patch to remove the MTD
ps3vram and add the new driver as ps3vram (instead of ps3vram-ng).
Thanks for your (review and other) comments!
---
>From e19ce619675bc150cd82701e7b272f7018c36527 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <[email protected]>
Date: Wed, 25 Feb 2009 18:32:10 +0100
Subject: [PATCH] ps3/block: Add ps3vram-ng driver for accessing video RAM as block device
Add ps3vram-ng driver, which exposes unused video RAM on the PS3 as a block
device suitable for storage or swap. Fast data transfer is achieved
using a local cache in system RAM and DMA transfers via the GPU.
Notes:
- As both the existing MTD ps3vram and ps3vram-ng bind to the same PS3 system
bus device, the driver which is loaded first by udev wins. However, you can
unload ps3vram, and load ps3vram-ng later.
- ps3vram-ng is faster than ps3vram:
o 1 MiB blocks: +50% (read), +5% (write)
o 4 KiB blocks: +50% (read), +5% (write)
o 512 B blocks: +10% (read), +10% (write)
Signed-off-by: Geert Uytterhoeven <[email protected]>
Cc: Jim Paris <[email protected]>
Cc: Vivien Chappelier <[email protected]>
---
arch/powerpc/platforms/ps3/Kconfig | 7 +
drivers/block/Makefile | 1 +
drivers/block/ps3vram_ng.c | 916 ++++++++++++++++++++++++++++++++++++
3 files changed, 924 insertions(+), 0 deletions(-)
create mode 100644 drivers/block/ps3vram_ng.c
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index 920cf7a..b3b7a20 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -128,6 +128,13 @@ config PS3_FLASH
be disabled on the kernel command line using "ps3flash=off", to
not allocate this fixed buffer.
+config PS3_VRAM
+ tristate "PS3 Video RAM Storage Driver"
+ depends on PPC_PS3 && BLOCK
+ help
+ This driver allows you to use excess PS3 video RAM as volatile
+ storage or system swap.
+
config PS3_LPM
tristate "PS3 Logical Performance Monitor support"
depends on PPC_PS3
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 204332b..ad3e45e 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o
obj-$(CONFIG_PS3_DISK) += ps3disk.o
+obj-$(CONFIG_PS3_VRAM) += ps3vram_ng.o
obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
diff --git a/drivers/block/ps3vram_ng.c b/drivers/block/ps3vram_ng.c
new file mode 100644
index 0000000..fa24a17
--- /dev/null
+++ b/drivers/block/ps3vram_ng.c
@@ -0,0 +1,916 @@
+/*
+ * ps3vram - Use extra PS3 video ram as MTD block device.
+ *
+ * Copyright 2009 Sony Corporation
+ *
+ * Based on the MTD ps3vram driver, which is
+ * Copyright (c) 2007-2008 Jim Paris <[email protected]>
+ * Added support RSX DMA Vivien Chappelier <[email protected]>
+ */
+
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+
+#define DEVICE_NAME "ps3vram"
+
+
+#define XDR_BUF_SIZE (2 * 1024 * 1024) /* XDR buffer (must be 1MiB aligned) */
+#define XDR_IOIF 0x0c000000
+
+#define FIFO_BASE XDR_IOIF
+#define FIFO_SIZE (64 * 1024)
+
+#define DMA_PAGE_SIZE (4 * 1024)
+
+#define CACHE_PAGE_SIZE (256 * 1024)
+#define CACHE_PAGE_COUNT ((XDR_BUF_SIZE - FIFO_SIZE) / CACHE_PAGE_SIZE)
+
+#define CACHE_OFFSET CACHE_PAGE_SIZE
+#define FIFO_OFFSET 0
+
+#define CTRL_PUT 0x10
+#define CTRL_GET 0x11
+#define CTRL_TOP 0x15
+
+#define UPLOAD_SUBCH 1
+#define DOWNLOAD_SUBCH 2
+
+#define NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c
+#define NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104
+
+#define L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT 0x601
+
+#define CACHE_PAGE_PRESENT 1
+#define CACHE_PAGE_DIRTY 2
+
+struct ps3vram_tag {
+ unsigned int address;
+ unsigned int flags;
+};
+
+struct ps3vram_cache {
+ unsigned int page_count;
+ unsigned int page_size;
+ struct ps3vram_tag *tags;
+ unsigned int hit;
+ unsigned int miss;
+};
+
+struct ps3vram_priv {
+ spinlock_t lock; /* Request queue spinlock */
+ struct task_struct *thread;
+ struct request_queue *queue;
+ struct gendisk *gendisk;
+
+ u64 size;
+
+ u64 memory_handle;
+ u64 context_handle;
+ u32 *ctrl;
+ u32 *reports;
+ u8 __iomem *ddr_base;
+ u8 *xdr_buf;
+
+ u32 *fifo_base;
+ u32 *fifo_ptr;
+
+ struct ps3vram_cache cache;
+};
+
+
+static int ps3vram_major;
+
+
+static struct block_device_operations ps3vram_fops = {
+ .owner = THIS_MODULE,
+};
+
+
+#define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
+#define DMA_NOTIFIER_OFFSET_BASE 0x1000 /* first DMA notifier offset */
+#define DMA_NOTIFIER_SIZE 0x40
+#define NOTIFIER 7 /* notifier used for completion report */
+
+/* A trailing '-' means to subtract off ps3fb_videomemory.size */
+static char *size = "256M-";
+module_param(size, charp, 0);
+MODULE_PARM_DESC(size, "memory size");
+
+static u32 *ps3vram_get_notifier(u32 *reports, int notifier)
+{
+ return (void *)reports + DMA_NOTIFIER_OFFSET_BASE +
+ DMA_NOTIFIER_SIZE * notifier;
+}
+
+static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+ int i;
+
+ for (i = 0; i < 4; i++)
+ notify[i] = 0xffffffff;
+}
+
+static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
+ unsigned int timeout_ms)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+ unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+
+ do {
+ if (!notify[3])
+ return 0;
+ msleep(1);
+ } while (time_before(jiffies, timeout));
+
+ return -ETIMEDOUT;
+}
+
+static void ps3vram_init_ring(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+ priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
+}
+
+static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
+ unsigned int timeout_ms)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+
+ do {
+ if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
+ return 0;
+ msleep(1);
+ } while (time_before(jiffies, timeout));
+
+ dev_dbg(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n",
+ priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
+ priv->ctrl[CTRL_TOP]);
+
+ return -ETIMEDOUT;
+}
+
+static void ps3vram_out_ring(struct ps3vram_priv *priv, u32 data)
+{
+ *(priv->fifo_ptr)++ = data;
+}
+
+static void ps3vram_begin_ring(struct ps3vram_priv *priv, u32 chan, u32 tag,
+ u32 size)
+{
+ ps3vram_out_ring(priv, (size << 18) | (chan << 13) | tag);
+}
+
+static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ u64 status;
+
+ ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
+
+ priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+
+ /* asking the HV for a blit will kick the FIFO */
+ status = lv1_gpu_context_attribute(priv->context_handle,
+ L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT, 0,
+ 0, 0, 0);
+ if (status)
+ dev_err(&dev->core,
+ "%s: lv1_gpu_context_attribute failed %llu\n",
+ __func__, status);
+
+ priv->fifo_ptr = priv->fifo_base;
+}
+
+static void ps3vram_fire_ring(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ u64 status;
+
+ mutex_lock(&ps3_gpu_mutex);
+
+ priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
+ (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
+
+ /* asking the HV for a blit will kick the FIFO */
+ status = lv1_gpu_context_attribute(priv->context_handle,
+ L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT, 0,
+ 0, 0, 0);
+ if (status)
+ dev_err(&dev->core,
+ "%s: lv1_gpu_context_attribute failed %llu\n",
+ __func__, status);
+
+ if ((priv->fifo_ptr - priv->fifo_base) * sizeof(u32) >
+ FIFO_SIZE - 1024) {
+ dev_dbg(&dev->core, "FIFO full, rewinding\n");
+ ps3vram_wait_ring(dev, 200);
+ ps3vram_rewind_ring(dev);
+ }
+
+ mutex_unlock(&ps3_gpu_mutex);
+}
+
+static void ps3vram_bind(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0, 1);
+ ps3vram_out_ring(priv, 0x31337303);
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x180, 3);
+ ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
+ ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
+ ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
+
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0, 1);
+ ps3vram_out_ring(priv, 0x3137c0de);
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x180, 3);
+ ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
+ ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
+ ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
+
+ ps3vram_fire_ring(dev);
+}
+
+static int ps3vram_upload(struct ps3_system_bus_device *dev,
+ unsigned int src_offset, unsigned int dst_offset,
+ int len, int count)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH,
+ NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ ps3vram_out_ring(priv, XDR_IOIF + src_offset);
+ ps3vram_out_ring(priv, dst_offset);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, count);
+ ps3vram_out_ring(priv, (1 << 8) | 1);
+ ps3vram_out_ring(priv, 0);
+
+ ps3vram_notifier_reset(dev);
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH,
+ NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
+ ps3vram_out_ring(priv, 0);
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x100, 1);
+ ps3vram_out_ring(priv, 0);
+ ps3vram_fire_ring(dev);
+ if (ps3vram_notifier_wait(dev, 200) < 0) {
+ dev_dbg(&dev->core, "%s: Notifier timeout\n", __func__);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ps3vram_download(struct ps3_system_bus_device *dev,
+ unsigned int src_offset, unsigned int dst_offset,
+ int len, int count)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
+ NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ ps3vram_out_ring(priv, src_offset);
+ ps3vram_out_ring(priv, XDR_IOIF + dst_offset);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, count);
+ ps3vram_out_ring(priv, (1 << 8) | 1);
+ ps3vram_out_ring(priv, 0);
+
+ ps3vram_notifier_reset(dev);
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
+ NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
+ ps3vram_out_ring(priv, 0);
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x100, 1);
+ ps3vram_out_ring(priv, 0);
+ ps3vram_fire_ring(dev);
+ if (ps3vram_notifier_wait(dev, 200) < 0) {
+ dev_dbg(&dev->core, "%s: Notifier timeout\n", __func__);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void ps3vram_cache_evict(struct ps3_system_bus_device *dev, int entry)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct ps3vram_cache *cache = &priv->cache;
+
+ if (!(cache->tags[entry].flags & CACHE_PAGE_DIRTY))
+ return;
+
+ dev_dbg(&dev->core, "Flushing %d: 0x%08x\n", entry,
+ cache->tags[entry].address);
+ if (ps3vram_upload(dev, CACHE_OFFSET + entry * cache->page_size,
+ cache->tags[entry].address, DMA_PAGE_SIZE,
+ cache->page_size / DMA_PAGE_SIZE) < 0) {
+ dev_err(&dev->core,
+ "Failed to upload from 0x%x to " "0x%x size 0x%x\n",
+ entry * cache->page_size, cache->tags[entry].address,
+ cache->page_size);
+ }
+ cache->tags[entry].flags &= ~CACHE_PAGE_DIRTY;
+}
+
+static void ps3vram_cache_load(struct ps3_system_bus_device *dev, int entry,
+ unsigned int address)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct ps3vram_cache *cache = &priv->cache;
+
+ dev_dbg(&dev->core, "Fetching %d: 0x%08x\n", entry, address);
+ if (ps3vram_download(dev, address,
+ CACHE_OFFSET + entry * cache->page_size,
+ DMA_PAGE_SIZE,
+ cache->page_size / DMA_PAGE_SIZE) < 0) {
+ dev_err(&dev->core,
+ "Failed to download from 0x%x to 0x%x size 0x%x\n",
+ address, entry * cache->page_size, cache->page_size);
+ }
+
+ cache->tags[entry].address = address;
+ cache->tags[entry].flags |= CACHE_PAGE_PRESENT;
+}
+
+
+static void ps3vram_cache_flush(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct ps3vram_cache *cache = &priv->cache;
+ int i;
+
+ dev_dbg(&dev->core, "FLUSH\n");
+ for (i = 0; i < cache->page_count; i++) {
+ ps3vram_cache_evict(dev, i);
+ cache->tags[i].flags = 0;
+ }
+}
+
+static unsigned int ps3vram_cache_match(struct ps3_system_bus_device *dev,
+ loff_t address)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct ps3vram_cache *cache = &priv->cache;
+ unsigned int base;
+ unsigned int offset;
+ int i;
+ static int counter;
+
+ offset = (unsigned int) (address & (cache->page_size - 1));
+ base = (unsigned int) (address - offset);
+
+ /* fully associative check */
+ for (i = 0; i < cache->page_count; i++) {
+ if ((cache->tags[i].flags & CACHE_PAGE_PRESENT) &&
+ cache->tags[i].address == base) {
+ cache->hit++;
+ dev_dbg(&dev->core, "Found entry %d: 0x%08x\n", i,
+ cache->tags[i].address);
+ return i;
+ }
+ }
+
+ /* choose a random entry */
+ i = (jiffies + (counter++)) % cache->page_count;
+ dev_dbg(&dev->core, "Using entry %d\n", i);
+
+ ps3vram_cache_evict(dev, i);
+ ps3vram_cache_load(dev, i, base);
+
+ cache->miss++;
+ return i;
+}
+
+static int ps3vram_cache_init(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ priv->cache.page_count = CACHE_PAGE_COUNT;
+ priv->cache.page_size = CACHE_PAGE_SIZE;
+ priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
+ CACHE_PAGE_COUNT, GFP_KERNEL);
+ if (priv->cache.tags == NULL) {
+ dev_err(&dev->core, "Could not allocate cache tags\n");
+ return -ENOMEM;
+ }
+
+ dev_info(&dev->core, "Created ram cache: %d entries, %d KiB each\n",
+ CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
+
+ return 0;
+}
+
+static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ ps3vram_cache_flush(dev);
+ kfree(priv->cache.tags);
+}
+
+static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+ size_t len, size_t *retlen, u_char *buf)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ unsigned int cached, count;
+
+ dev_dbg(&dev->core, "%s: from=0x%08x len=0x%zx\n", __func__,
+ (unsigned int)from, len);
+
+ if (from >= priv->size)
+ return -EINVAL;
+
+ if (len > priv->size - from)
+ len = priv->size - from;
+
+ /* Copy from vram to buf */
+ count = len;
+ while (count) {
+ unsigned int offset, avail;
+ unsigned int entry;
+
+ offset = (unsigned int) (from & (priv->cache.page_size - 1));
+ avail = priv->cache.page_size - offset;
+
+ entry = ps3vram_cache_match(dev, from);
+ cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
+
+ dev_dbg(&dev->core, "%s: from=%08x cached=%08x offset=%08x "
+ "avail=%08x count=%08x\n", __func__,
+ (unsigned int)from, cached, offset, avail, count);
+
+ if (avail > count)
+ avail = count;
+ memcpy(buf, priv->xdr_buf + cached, avail);
+
+ buf += avail;
+ count -= avail;
+ from += avail;
+ }
+
+ *retlen = len;
+ return 0;
+}
+
+static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+ size_t len, size_t *retlen, const u_char *buf)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ unsigned int cached, count;
+
+ if (to >= priv->size)
+ return -EINVAL;
+
+ if (len > priv->size - to)
+ len = priv->size - to;
+
+ /* Copy from buf to vram */
+ count = len;
+ while (count) {
+ unsigned int offset, avail;
+ unsigned int entry;
+
+ offset = (unsigned int) (to & (priv->cache.page_size - 1));
+ avail = priv->cache.page_size - offset;
+
+ entry = ps3vram_cache_match(dev, to);
+ cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
+
+ dev_dbg(&dev->core, "%s: to=%08x cached=%08x offset=%08x "
+ "avail=%08x count=%08x\n", __func__, (unsigned int)to,
+ cached, offset, avail, count);
+
+ if (avail > count)
+ avail = count;
+ memcpy(priv->xdr_buf + cached, buf, avail);
+
+ priv->cache.tags[entry].flags |= CACHE_PAGE_DIRTY;
+
+ buf += avail;
+ count -= avail;
+ to += avail;
+ }
+
+ *retlen = len;
+ return 0;
+}
+
+static int ps3vram_proc_show(struct seq_file *m, void *v)
+{
+ struct ps3vram_priv *priv = m->private;
+
+ seq_printf(m, "hit:%u\nmiss:%u\n", priv->cache.hit, priv->cache.miss);
+ return 0;
+}
+
+static int ps3vram_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ps3vram_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations ps3vram_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = ps3vram_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void __devinit ps3vram_proc_init(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct proc_dir_entry *pde;
+
+ pde = proc_create(DEVICE_NAME, 0444, NULL, &ps3vram_proc_fops);
+ if (!pde) {
+ dev_warn(&dev->core, "failed to create /proc entry\n");
+ return;
+ }
+
+ pde->owner = THIS_MODULE;
+ pde->data = priv;
+}
+
+static void ps3vram_do_request(struct ps3_system_bus_device *dev,
+ struct request *req)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ int write, res, uptodate = 0;
+ const char *op;
+ sector_t start_sector;
+ unsigned int sectors;
+ loff_t offset;
+ size_t len, retlen;
+
+ write = rq_data_dir(req);
+ op = write ? "write" : "read";
+
+ start_sector = req->sector;
+ sectors = req->current_nr_sectors;
+ dev_dbg(&dev->core, "%s %u sectors starting at %lu\n", op, sectors,
+ start_sector);
+
+ offset = start_sector << 9;
+ len = sectors << 9;
+
+ if (write)
+ res = ps3vram_write(dev, offset, len, &retlen, req->buffer);
+ else
+ res = ps3vram_read(dev, offset, len, &retlen, req->buffer);
+
+ if (res) {
+ dev_err(&dev->core, "%s failed\n", op);
+ goto out;
+ }
+
+ if (retlen != len) {
+ dev_err(&dev->core, "Short %s\n", op);
+ goto out;
+ }
+
+ dev_dbg(&dev->core, "%s completed\n", op);
+
+ uptodate = 1;
+
+out:
+ spin_lock_irq(&priv->lock);
+ end_request(req, uptodate);
+ spin_unlock_irq(&priv->lock);
+}
+
+static int ps3vram_thread(void *data)
+{
+ struct ps3_system_bus_device *dev = data;
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct request_queue *q = priv->queue;
+ struct request *req;
+
+ dev_dbg(&dev->core, "%s init\n", __func__);
+
+ current->flags |= PF_NOFREEZE;
+
+ while (!kthread_should_stop()) {
+ spin_lock_irq(&priv->lock);
+ set_current_state(TASK_INTERRUPTIBLE);
+ req = elv_next_request(q);
+ if (!req) {
+ spin_unlock_irq(&priv->lock);
+ schedule();
+ continue;
+ }
+ if (!blk_fs_request(req)) {
+ blk_dump_rq_flags(req, DEVICE_NAME " bad request");
+ end_request(req, 0);
+ spin_unlock_irq(&priv->lock);
+ continue;
+ }
+ spin_unlock_irq(&priv->lock);
+ ps3vram_do_request(dev, req);
+ }
+
+ dev_dbg(&dev->core, "%s exit\n", __func__);
+ return 0;
+}
+
+static void ps3vram_request(struct request_queue *q)
+{
+ struct ps3_system_bus_device *dev = q->queuedata;
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ dev_dbg(&dev->core, "Waking up thread\n");
+ wake_up_process(priv->thread);
+}
+
+
+static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv;
+ int error, status;
+ struct request_queue *queue;
+ struct gendisk *gendisk;
+ u64 ddr_lpar, ctrl_lpar, info_lpar, reports_lpar, ddr_size,
+ reports_size;
+ char *rest;
+ struct task_struct *task;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ error = -ENOMEM;
+ goto fail;
+ }
+
+ dev->core.driver_data = priv;
+ spin_lock_init(&priv->lock);
+
+ priv = dev->core.driver_data;
+
+ /* Allocate XDR buffer (1MiB aligned) */
+ priv->xdr_buf = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(XDR_BUF_SIZE));
+ if (priv->xdr_buf == NULL) {
+ dev_dbg(&dev->core, "Could not allocate XDR buffer\n");
+ error = -ENOMEM;
+ goto fail_free_priv;
+ }
+
+ /* Put FIFO at begginning of XDR buffer */
+ priv->fifo_base = (u32 *) (priv->xdr_buf + FIFO_OFFSET);
+ priv->fifo_ptr = priv->fifo_base;
+
+ /* XXX: Need to open GPU, in case ps3fb or snd_ps3 aren't loaded */
+ if (ps3_open_hv_device(dev)) {
+ dev_err(&dev->core, "ps3_open_hv_device failed\n");
+ error = -EAGAIN;
+ goto out_close_gpu;
+ }
+
+ /* Request memory */
+ status = -1;
+ ddr_size = memparse(size, &rest);
+ if (*rest == '-')
+ ddr_size -= ps3fb_videomemory.size;
+ ddr_size = ALIGN(ddr_size, 1024*1024);
+ if (ddr_size <= 0) {
+ dev_err(&dev->core, "Specified size is too small\n");
+ error = -EINVAL;
+ goto out_close_gpu;
+ }
+
+ while (ddr_size > 0) {
+ status = lv1_gpu_memory_allocate(ddr_size, 0, 0, 0, 0,
+ &priv->memory_handle,
+ &ddr_lpar);
+ if (!status)
+ break;
+ ddr_size -= 1024*1024;
+ }
+ if (status || ddr_size <= 0) {
+ dev_err(&dev->core, "lv1_gpu_memory_allocate failed %d\n",
+ status);
+ error = -ENOMEM;
+ goto out_free_xdr_buf;
+ }
+
+ /* Request context */
+ status = lv1_gpu_context_allocate(priv->memory_handle, 0,
+ &priv->context_handle, &ctrl_lpar,
+ &info_lpar, &reports_lpar,
+ &reports_size);
+ if (status) {
+ dev_err(&dev->core, "lv1_gpu_context_allocate failed %d\n",
+ status);
+ error = -ENOMEM;
+ goto out_free_memory;
+ }
+
+ /* Map XDR buffer to RSX */
+ status = lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF,
+ ps3_mm_phys_to_lpar(__pa(priv->xdr_buf)),
+ XDR_BUF_SIZE, 0);
+ if (status) {
+ dev_err(&dev->core, "lv1_gpu_context_iomap failed %d\n",
+ status);
+ error = -ENOMEM;
+ goto out_free_context;
+ }
+
+ priv->ddr_base = ioremap_flags(ddr_lpar, ddr_size, _PAGE_NO_CACHE);
+
+ if (!priv->ddr_base) {
+ dev_err(&dev->core, "ioremap DDR failed\n");
+ error = -ENOMEM;
+ goto out_free_context;
+ }
+
+ priv->ctrl = ioremap(ctrl_lpar, 64 * 1024);
+ if (!priv->ctrl) {
+ dev_err(&dev->core, "ioremap CTRL failed\n");
+ error = -ENOMEM;
+ goto out_unmap_vram;
+ }
+
+ priv->reports = ioremap(reports_lpar, reports_size);
+ if (!priv->reports) {
+ dev_err(&dev->core, "ioremap REPORTS failed\n");
+ error = -ENOMEM;
+ goto out_unmap_ctrl;
+ }
+
+ mutex_lock(&ps3_gpu_mutex);
+ ps3vram_init_ring(dev);
+ mutex_unlock(&ps3_gpu_mutex);
+
+ priv->size = ddr_size;
+
+ ps3vram_bind(dev);
+
+ mutex_lock(&ps3_gpu_mutex);
+ error = ps3vram_wait_ring(dev, 100);
+ mutex_unlock(&ps3_gpu_mutex);
+ if (error < 0) {
+ dev_err(&dev->core, "Failed to initialize channels\n");
+ error = -ETIMEDOUT;
+ goto out_unmap_reports;
+ }
+
+ ps3vram_cache_init(dev);
+ ps3vram_proc_init(dev);
+
+ queue = blk_init_queue(ps3vram_request, &priv->lock);
+ if (!queue) {
+ dev_err(&dev->core, "blk_init_queue failed\n");
+ error = -ENOMEM;
+ goto out_cache_cleanup;
+ }
+
+ priv->queue = queue;
+ queue->queuedata = dev;
+
+ gendisk = alloc_disk(1);
+ if (!gendisk) {
+ dev_err(&dev->core, "alloc_disk failed\n");
+ error = -ENOMEM;
+ goto fail_cleanup_queue;
+ }
+
+ priv->gendisk = gendisk;
+ gendisk->major = ps3vram_major;
+ gendisk->first_minor = 0;
+ gendisk->fops = &ps3vram_fops;
+ gendisk->queue = queue;
+ gendisk->private_data = dev;
+ gendisk->driverfs_dev = &dev->core;
+ strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
+ set_capacity(gendisk, priv->size >> 9);
+
+ task = kthread_run(ps3vram_thread, dev, DEVICE_NAME);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+ goto fail_free_disk;
+ }
+ priv->thread = task;
+
+ dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n",
+ gendisk->disk_name, get_capacity(gendisk) >> 11);
+
+ add_disk(gendisk);
+ return 0;
+
+fail_free_disk:
+ put_disk(priv->gendisk);
+fail_cleanup_queue:
+ blk_cleanup_queue(queue);
+out_cache_cleanup:
+ remove_proc_entry(DEVICE_NAME, NULL);
+ ps3vram_cache_cleanup(dev);
+out_unmap_reports:
+ iounmap(priv->reports);
+out_unmap_ctrl:
+ iounmap(priv->ctrl);
+out_unmap_vram:
+ iounmap(priv->ddr_base);
+out_free_context:
+ lv1_gpu_context_free(priv->context_handle);
+out_free_memory:
+ lv1_gpu_memory_free(priv->memory_handle);
+out_close_gpu:
+ ps3_close_hv_device(dev);
+out_free_xdr_buf:
+ free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
+fail_free_priv:
+ kfree(priv);
+ dev->core.driver_data = NULL;
+fail:
+ return error;
+}
+
+static int ps3vram_remove(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ BUG_ON(!elv_queue_empty(priv->queue));
+
+ kthread_stop(priv->thread);
+ del_gendisk(priv->gendisk);
+ blk_cleanup_queue(priv->queue);
+ put_disk(priv->gendisk);
+ remove_proc_entry(DEVICE_NAME, NULL);
+ ps3vram_cache_cleanup(dev);
+ iounmap(priv->reports);
+ iounmap(priv->ctrl);
+ iounmap(priv->ddr_base);
+ lv1_gpu_context_free(priv->context_handle);
+ lv1_gpu_memory_free(priv->memory_handle);
+ ps3_close_hv_device(dev);
+ free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
+ kfree(priv);
+ dev->core.driver_data = NULL;
+ return 0;
+}
+
+static struct ps3_system_bus_driver ps3vram = {
+ .match_id = PS3_MATCH_ID_GPU,
+ .match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK,
+ .core.name = DEVICE_NAME,
+ .core.owner = THIS_MODULE,
+ .probe = ps3vram_probe,
+ .remove = ps3vram_remove,
+ .shutdown = ps3vram_remove,
+};
+
+
+static int __init ps3vram_init(void)
+{
+ int error;
+
+ if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+ return -ENODEV;
+
+ error = register_blkdev(0, DEVICE_NAME);
+ if (error <= 0) {
+ pr_err("%s: register_blkdev failed %d\n", DEVICE_NAME, error);
+ return error;
+ }
+ ps3vram_major = error;
+
+ pr_info("%s: registered block device major %d\n", DEVICE_NAME,
+ ps3vram_major);
+
+ error = ps3_system_bus_driver_register(&ps3vram);
+ if (error)
+ unregister_blkdev(ps3vram_major, DEVICE_NAME);
+
+ return error;
+}
+
+static void __exit ps3vram_exit(void)
+{
+ ps3_system_bus_driver_unregister(&ps3vram);
+ unregister_blkdev(ps3vram_major, DEVICE_NAME);
+}
+
+module_init(ps3vram_init);
+module_exit(ps3vram_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PS3 Video RAM Storage Driver");
+MODULE_AUTHOR("Sony Corporation");
+MODULE_ALIAS(PS3_MODULE_ALIAS_GPU_RAMDISK);
--
1.6.0.4
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Wed, 2009-03-04 at 14:57 +0100, Geert Uytterhoeven wrote:
> Hi,
>
> Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
> device, as requested by Arnd Bergmann.
>
> The MTD-based PS3 Video RAM Storage Driver was integrated into the mainline
> kernel in 2.6.29-rc1.
>
> Ideally, we think it would be best if the existing MTD-based ps3vram driver
> would be replaced by the new block-based ps3vram driver before 2.6.29 is
> released. This would relieve the burden of supporting two different swap space
> schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
> maintainer's shoulders, as in that case there would never have been a stable
> kernel version containing the MTD-based ps3vram driver.
This is very very very late ... we are at rc7, probably one rc before
final... as much as I like integrating drivers later, I'll ask Linus
opinion on this one.
Linus ? What do you reckon ? Maybe a better option is just to remove
ps3nvram from .29 and merge the new one in .30 ?
Cheers,
Ben.
> What do you think? If this is accepted, I'll submit a patch to remove the MTD
> ps3vram and add the new driver as ps3vram (instead of ps3vram-ng).
>
> Thanks for your (review and other) comments!
>
> ---
> >From e19ce619675bc150cd82701e7b272f7018c36527 Mon Sep 17 00:00:00 2001
> From: Geert Uytterhoeven <[email protected]>
> Date: Wed, 25 Feb 2009 18:32:10 +0100
> Subject: [PATCH] ps3/block: Add ps3vram-ng driver for accessing video RAM as block device
>
> Add ps3vram-ng driver, which exposes unused video RAM on the PS3 as a block
> device suitable for storage or swap. Fast data transfer is achieved
> using a local cache in system RAM and DMA transfers via the GPU.
>
> Notes:
> - As both the existing MTD ps3vram and ps3vram-ng bind to the same PS3 system
> bus device, the driver which is loaded first by udev wins. However, you can
> unload ps3vram, and load ps3vram-ng later.
> - ps3vram-ng is faster than ps3vram:
> o 1 MiB blocks: +50% (read), +5% (write)
> o 4 KiB blocks: +50% (read), +5% (write)
> o 512 B blocks: +10% (read), +10% (write)
>
> Signed-off-by: Geert Uytterhoeven <[email protected]>
> Cc: Jim Paris <[email protected]>
> Cc: Vivien Chappelier <[email protected]>
> ---
> arch/powerpc/platforms/ps3/Kconfig | 7 +
> drivers/block/Makefile | 1 +
> drivers/block/ps3vram_ng.c | 916 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 924 insertions(+), 0 deletions(-)
> create mode 100644 drivers/block/ps3vram_ng.c
>
> diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
> index 920cf7a..b3b7a20 100644
> --- a/arch/powerpc/platforms/ps3/Kconfig
> +++ b/arch/powerpc/platforms/ps3/Kconfig
> @@ -128,6 +128,13 @@ config PS3_FLASH
> be disabled on the kernel command line using "ps3flash=off", to
> not allocate this fixed buffer.
>
> +config PS3_VRAM
> + tristate "PS3 Video RAM Storage Driver"
> + depends on PPC_PS3 && BLOCK
> + help
> + This driver allows you to use excess PS3 video RAM as volatile
> + storage or system swap.
> +
> config PS3_LPM
> tristate "PS3 Logical Performance Monitor support"
> depends on PPC_PS3
> diff --git a/drivers/block/Makefile b/drivers/block/Makefile
> index 204332b..ad3e45e 100644
> --- a/drivers/block/Makefile
> +++ b/drivers/block/Makefile
> @@ -9,6 +9,7 @@ obj-$(CONFIG_MAC_FLOPPY) += swim3.o
> obj-$(CONFIG_BLK_DEV_FD) += floppy.o
> obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o
> obj-$(CONFIG_PS3_DISK) += ps3disk.o
> +obj-$(CONFIG_PS3_VRAM) += ps3vram_ng.o
> obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
> obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
> obj-$(CONFIG_BLK_DEV_RAM) += brd.o
> diff --git a/drivers/block/ps3vram_ng.c b/drivers/block/ps3vram_ng.c
> new file mode 100644
> index 0000000..fa24a17
> --- /dev/null
> +++ b/drivers/block/ps3vram_ng.c
> @@ -0,0 +1,916 @@
> +/*
> + * ps3vram - Use extra PS3 video ram as MTD block device.
> + *
> + * Copyright 2009 Sony Corporation
> + *
> + * Based on the MTD ps3vram driver, which is
> + * Copyright (c) 2007-2008 Jim Paris <[email protected]>
> + * Added support RSX DMA Vivien Chappelier <[email protected]>
> + */
> +
> +#include <linux/blkdev.h>
> +#include <linux/delay.h>
> +#include <linux/kthread.h>
> +#include <linux/proc_fs.h>
> +#include <linux/seq_file.h>
> +
> +#include <asm/firmware.h>
> +#include <asm/lv1call.h>
> +#include <asm/ps3.h>
> +
> +
> +#define DEVICE_NAME "ps3vram"
> +
> +
> +#define XDR_BUF_SIZE (2 * 1024 * 1024) /* XDR buffer (must be 1MiB aligned) */
> +#define XDR_IOIF 0x0c000000
> +
> +#define FIFO_BASE XDR_IOIF
> +#define FIFO_SIZE (64 * 1024)
> +
> +#define DMA_PAGE_SIZE (4 * 1024)
> +
> +#define CACHE_PAGE_SIZE (256 * 1024)
> +#define CACHE_PAGE_COUNT ((XDR_BUF_SIZE - FIFO_SIZE) / CACHE_PAGE_SIZE)
> +
> +#define CACHE_OFFSET CACHE_PAGE_SIZE
> +#define FIFO_OFFSET 0
> +
> +#define CTRL_PUT 0x10
> +#define CTRL_GET 0x11
> +#define CTRL_TOP 0x15
> +
> +#define UPLOAD_SUBCH 1
> +#define DOWNLOAD_SUBCH 2
> +
> +#define NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c
> +#define NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104
> +
> +#define L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT 0x601
> +
> +#define CACHE_PAGE_PRESENT 1
> +#define CACHE_PAGE_DIRTY 2
> +
> +struct ps3vram_tag {
> + unsigned int address;
> + unsigned int flags;
> +};
> +
> +struct ps3vram_cache {
> + unsigned int page_count;
> + unsigned int page_size;
> + struct ps3vram_tag *tags;
> + unsigned int hit;
> + unsigned int miss;
> +};
> +
> +struct ps3vram_priv {
> + spinlock_t lock; /* Request queue spinlock */
> + struct task_struct *thread;
> + struct request_queue *queue;
> + struct gendisk *gendisk;
> +
> + u64 size;
> +
> + u64 memory_handle;
> + u64 context_handle;
> + u32 *ctrl;
> + u32 *reports;
> + u8 __iomem *ddr_base;
> + u8 *xdr_buf;
> +
> + u32 *fifo_base;
> + u32 *fifo_ptr;
> +
> + struct ps3vram_cache cache;
> +};
> +
> +
> +static int ps3vram_major;
> +
> +
> +static struct block_device_operations ps3vram_fops = {
> + .owner = THIS_MODULE,
> +};
> +
> +
> +#define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
> +#define DMA_NOTIFIER_OFFSET_BASE 0x1000 /* first DMA notifier offset */
> +#define DMA_NOTIFIER_SIZE 0x40
> +#define NOTIFIER 7 /* notifier used for completion report */
> +
> +/* A trailing '-' means to subtract off ps3fb_videomemory.size */
> +static char *size = "256M-";
> +module_param(size, charp, 0);
> +MODULE_PARM_DESC(size, "memory size");
> +
> +static u32 *ps3vram_get_notifier(u32 *reports, int notifier)
> +{
> + return (void *)reports + DMA_NOTIFIER_OFFSET_BASE +
> + DMA_NOTIFIER_SIZE * notifier;
> +}
> +
> +static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
> + int i;
> +
> + for (i = 0; i < 4; i++)
> + notify[i] = 0xffffffff;
> +}
> +
> +static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
> + unsigned int timeout_ms)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
> + unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
> +
> + do {
> + if (!notify[3])
> + return 0;
> + msleep(1);
> + } while (time_before(jiffies, timeout));
> +
> + return -ETIMEDOUT;
> +}
> +
> +static void ps3vram_init_ring(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> +
> + priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
> + priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
> +}
> +
> +static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
> + unsigned int timeout_ms)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
> +
> + do {
> + if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
> + return 0;
> + msleep(1);
> + } while (time_before(jiffies, timeout));
> +
> + dev_dbg(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n",
> + priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
> + priv->ctrl[CTRL_TOP]);
> +
> + return -ETIMEDOUT;
> +}
> +
> +static void ps3vram_out_ring(struct ps3vram_priv *priv, u32 data)
> +{
> + *(priv->fifo_ptr)++ = data;
> +}
> +
> +static void ps3vram_begin_ring(struct ps3vram_priv *priv, u32 chan, u32 tag,
> + u32 size)
> +{
> + ps3vram_out_ring(priv, (size << 18) | (chan << 13) | tag);
> +}
> +
> +static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + u64 status;
> +
> + ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
> +
> + priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
> +
> + /* asking the HV for a blit will kick the FIFO */
> + status = lv1_gpu_context_attribute(priv->context_handle,
> + L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT, 0,
> + 0, 0, 0);
> + if (status)
> + dev_err(&dev->core,
> + "%s: lv1_gpu_context_attribute failed %llu\n",
> + __func__, status);
> +
> + priv->fifo_ptr = priv->fifo_base;
> +}
> +
> +static void ps3vram_fire_ring(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + u64 status;
> +
> + mutex_lock(&ps3_gpu_mutex);
> +
> + priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
> + (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
> +
> + /* asking the HV for a blit will kick the FIFO */
> + status = lv1_gpu_context_attribute(priv->context_handle,
> + L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT, 0,
> + 0, 0, 0);
> + if (status)
> + dev_err(&dev->core,
> + "%s: lv1_gpu_context_attribute failed %llu\n",
> + __func__, status);
> +
> + if ((priv->fifo_ptr - priv->fifo_base) * sizeof(u32) >
> + FIFO_SIZE - 1024) {
> + dev_dbg(&dev->core, "FIFO full, rewinding\n");
> + ps3vram_wait_ring(dev, 200);
> + ps3vram_rewind_ring(dev);
> + }
> +
> + mutex_unlock(&ps3_gpu_mutex);
> +}
> +
> +static void ps3vram_bind(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> +
> + ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0, 1);
> + ps3vram_out_ring(priv, 0x31337303);
> + ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x180, 3);
> + ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
> + ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
> + ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
> +
> + ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0, 1);
> + ps3vram_out_ring(priv, 0x3137c0de);
> + ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x180, 3);
> + ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
> + ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
> + ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
> +
> + ps3vram_fire_ring(dev);
> +}
> +
> +static int ps3vram_upload(struct ps3_system_bus_device *dev,
> + unsigned int src_offset, unsigned int dst_offset,
> + int len, int count)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> +
> + ps3vram_begin_ring(priv, UPLOAD_SUBCH,
> + NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
> + ps3vram_out_ring(priv, XDR_IOIF + src_offset);
> + ps3vram_out_ring(priv, dst_offset);
> + ps3vram_out_ring(priv, len);
> + ps3vram_out_ring(priv, len);
> + ps3vram_out_ring(priv, len);
> + ps3vram_out_ring(priv, count);
> + ps3vram_out_ring(priv, (1 << 8) | 1);
> + ps3vram_out_ring(priv, 0);
> +
> + ps3vram_notifier_reset(dev);
> + ps3vram_begin_ring(priv, UPLOAD_SUBCH,
> + NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
> + ps3vram_out_ring(priv, 0);
> + ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x100, 1);
> + ps3vram_out_ring(priv, 0);
> + ps3vram_fire_ring(dev);
> + if (ps3vram_notifier_wait(dev, 200) < 0) {
> + dev_dbg(&dev->core, "%s: Notifier timeout\n", __func__);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int ps3vram_download(struct ps3_system_bus_device *dev,
> + unsigned int src_offset, unsigned int dst_offset,
> + int len, int count)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> +
> + ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
> + NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
> + ps3vram_out_ring(priv, src_offset);
> + ps3vram_out_ring(priv, XDR_IOIF + dst_offset);
> + ps3vram_out_ring(priv, len);
> + ps3vram_out_ring(priv, len);
> + ps3vram_out_ring(priv, len);
> + ps3vram_out_ring(priv, count);
> + ps3vram_out_ring(priv, (1 << 8) | 1);
> + ps3vram_out_ring(priv, 0);
> +
> + ps3vram_notifier_reset(dev);
> + ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
> + NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
> + ps3vram_out_ring(priv, 0);
> + ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x100, 1);
> + ps3vram_out_ring(priv, 0);
> + ps3vram_fire_ring(dev);
> + if (ps3vram_notifier_wait(dev, 200) < 0) {
> + dev_dbg(&dev->core, "%s: Notifier timeout\n", __func__);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static void ps3vram_cache_evict(struct ps3_system_bus_device *dev, int entry)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + struct ps3vram_cache *cache = &priv->cache;
> +
> + if (!(cache->tags[entry].flags & CACHE_PAGE_DIRTY))
> + return;
> +
> + dev_dbg(&dev->core, "Flushing %d: 0x%08x\n", entry,
> + cache->tags[entry].address);
> + if (ps3vram_upload(dev, CACHE_OFFSET + entry * cache->page_size,
> + cache->tags[entry].address, DMA_PAGE_SIZE,
> + cache->page_size / DMA_PAGE_SIZE) < 0) {
> + dev_err(&dev->core,
> + "Failed to upload from 0x%x to " "0x%x size 0x%x\n",
> + entry * cache->page_size, cache->tags[entry].address,
> + cache->page_size);
> + }
> + cache->tags[entry].flags &= ~CACHE_PAGE_DIRTY;
> +}
> +
> +static void ps3vram_cache_load(struct ps3_system_bus_device *dev, int entry,
> + unsigned int address)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + struct ps3vram_cache *cache = &priv->cache;
> +
> + dev_dbg(&dev->core, "Fetching %d: 0x%08x\n", entry, address);
> + if (ps3vram_download(dev, address,
> + CACHE_OFFSET + entry * cache->page_size,
> + DMA_PAGE_SIZE,
> + cache->page_size / DMA_PAGE_SIZE) < 0) {
> + dev_err(&dev->core,
> + "Failed to download from 0x%x to 0x%x size 0x%x\n",
> + address, entry * cache->page_size, cache->page_size);
> + }
> +
> + cache->tags[entry].address = address;
> + cache->tags[entry].flags |= CACHE_PAGE_PRESENT;
> +}
> +
> +
> +static void ps3vram_cache_flush(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + struct ps3vram_cache *cache = &priv->cache;
> + int i;
> +
> + dev_dbg(&dev->core, "FLUSH\n");
> + for (i = 0; i < cache->page_count; i++) {
> + ps3vram_cache_evict(dev, i);
> + cache->tags[i].flags = 0;
> + }
> +}
> +
> +static unsigned int ps3vram_cache_match(struct ps3_system_bus_device *dev,
> + loff_t address)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + struct ps3vram_cache *cache = &priv->cache;
> + unsigned int base;
> + unsigned int offset;
> + int i;
> + static int counter;
> +
> + offset = (unsigned int) (address & (cache->page_size - 1));
> + base = (unsigned int) (address - offset);
> +
> + /* fully associative check */
> + for (i = 0; i < cache->page_count; i++) {
> + if ((cache->tags[i].flags & CACHE_PAGE_PRESENT) &&
> + cache->tags[i].address == base) {
> + cache->hit++;
> + dev_dbg(&dev->core, "Found entry %d: 0x%08x\n", i,
> + cache->tags[i].address);
> + return i;
> + }
> + }
> +
> + /* choose a random entry */
> + i = (jiffies + (counter++)) % cache->page_count;
> + dev_dbg(&dev->core, "Using entry %d\n", i);
> +
> + ps3vram_cache_evict(dev, i);
> + ps3vram_cache_load(dev, i, base);
> +
> + cache->miss++;
> + return i;
> +}
> +
> +static int ps3vram_cache_init(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> +
> + priv->cache.page_count = CACHE_PAGE_COUNT;
> + priv->cache.page_size = CACHE_PAGE_SIZE;
> + priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
> + CACHE_PAGE_COUNT, GFP_KERNEL);
> + if (priv->cache.tags == NULL) {
> + dev_err(&dev->core, "Could not allocate cache tags\n");
> + return -ENOMEM;
> + }
> +
> + dev_info(&dev->core, "Created ram cache: %d entries, %d KiB each\n",
> + CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
> +
> + return 0;
> +}
> +
> +static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> +
> + ps3vram_cache_flush(dev);
> + kfree(priv->cache.tags);
> +}
> +
> +static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
> + size_t len, size_t *retlen, u_char *buf)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + unsigned int cached, count;
> +
> + dev_dbg(&dev->core, "%s: from=0x%08x len=0x%zx\n", __func__,
> + (unsigned int)from, len);
> +
> + if (from >= priv->size)
> + return -EINVAL;
> +
> + if (len > priv->size - from)
> + len = priv->size - from;
> +
> + /* Copy from vram to buf */
> + count = len;
> + while (count) {
> + unsigned int offset, avail;
> + unsigned int entry;
> +
> + offset = (unsigned int) (from & (priv->cache.page_size - 1));
> + avail = priv->cache.page_size - offset;
> +
> + entry = ps3vram_cache_match(dev, from);
> + cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
> +
> + dev_dbg(&dev->core, "%s: from=%08x cached=%08x offset=%08x "
> + "avail=%08x count=%08x\n", __func__,
> + (unsigned int)from, cached, offset, avail, count);
> +
> + if (avail > count)
> + avail = count;
> + memcpy(buf, priv->xdr_buf + cached, avail);
> +
> + buf += avail;
> + count -= avail;
> + from += avail;
> + }
> +
> + *retlen = len;
> + return 0;
> +}
> +
> +static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
> + size_t len, size_t *retlen, const u_char *buf)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + unsigned int cached, count;
> +
> + if (to >= priv->size)
> + return -EINVAL;
> +
> + if (len > priv->size - to)
> + len = priv->size - to;
> +
> + /* Copy from buf to vram */
> + count = len;
> + while (count) {
> + unsigned int offset, avail;
> + unsigned int entry;
> +
> + offset = (unsigned int) (to & (priv->cache.page_size - 1));
> + avail = priv->cache.page_size - offset;
> +
> + entry = ps3vram_cache_match(dev, to);
> + cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
> +
> + dev_dbg(&dev->core, "%s: to=%08x cached=%08x offset=%08x "
> + "avail=%08x count=%08x\n", __func__, (unsigned int)to,
> + cached, offset, avail, count);
> +
> + if (avail > count)
> + avail = count;
> + memcpy(priv->xdr_buf + cached, buf, avail);
> +
> + priv->cache.tags[entry].flags |= CACHE_PAGE_DIRTY;
> +
> + buf += avail;
> + count -= avail;
> + to += avail;
> + }
> +
> + *retlen = len;
> + return 0;
> +}
> +
> +static int ps3vram_proc_show(struct seq_file *m, void *v)
> +{
> + struct ps3vram_priv *priv = m->private;
> +
> + seq_printf(m, "hit:%u\nmiss:%u\n", priv->cache.hit, priv->cache.miss);
> + return 0;
> +}
> +
> +static int ps3vram_proc_open(struct inode *inode, struct file *file)
> +{
> + return single_open(file, ps3vram_proc_show, PDE(inode)->data);
> +}
> +
> +static const struct file_operations ps3vram_proc_fops = {
> + .owner = THIS_MODULE,
> + .open = ps3vram_proc_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
> +
> +static void __devinit ps3vram_proc_init(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + struct proc_dir_entry *pde;
> +
> + pde = proc_create(DEVICE_NAME, 0444, NULL, &ps3vram_proc_fops);
> + if (!pde) {
> + dev_warn(&dev->core, "failed to create /proc entry\n");
> + return;
> + }
> +
> + pde->owner = THIS_MODULE;
> + pde->data = priv;
> +}
> +
> +static void ps3vram_do_request(struct ps3_system_bus_device *dev,
> + struct request *req)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + int write, res, uptodate = 0;
> + const char *op;
> + sector_t start_sector;
> + unsigned int sectors;
> + loff_t offset;
> + size_t len, retlen;
> +
> + write = rq_data_dir(req);
> + op = write ? "write" : "read";
> +
> + start_sector = req->sector;
> + sectors = req->current_nr_sectors;
> + dev_dbg(&dev->core, "%s %u sectors starting at %lu\n", op, sectors,
> + start_sector);
> +
> + offset = start_sector << 9;
> + len = sectors << 9;
> +
> + if (write)
> + res = ps3vram_write(dev, offset, len, &retlen, req->buffer);
> + else
> + res = ps3vram_read(dev, offset, len, &retlen, req->buffer);
> +
> + if (res) {
> + dev_err(&dev->core, "%s failed\n", op);
> + goto out;
> + }
> +
> + if (retlen != len) {
> + dev_err(&dev->core, "Short %s\n", op);
> + goto out;
> + }
> +
> + dev_dbg(&dev->core, "%s completed\n", op);
> +
> + uptodate = 1;
> +
> +out:
> + spin_lock_irq(&priv->lock);
> + end_request(req, uptodate);
> + spin_unlock_irq(&priv->lock);
> +}
> +
> +static int ps3vram_thread(void *data)
> +{
> + struct ps3_system_bus_device *dev = data;
> + struct ps3vram_priv *priv = dev->core.driver_data;
> + struct request_queue *q = priv->queue;
> + struct request *req;
> +
> + dev_dbg(&dev->core, "%s init\n", __func__);
> +
> + current->flags |= PF_NOFREEZE;
> +
> + while (!kthread_should_stop()) {
> + spin_lock_irq(&priv->lock);
> + set_current_state(TASK_INTERRUPTIBLE);
> + req = elv_next_request(q);
> + if (!req) {
> + spin_unlock_irq(&priv->lock);
> + schedule();
> + continue;
> + }
> + if (!blk_fs_request(req)) {
> + blk_dump_rq_flags(req, DEVICE_NAME " bad request");
> + end_request(req, 0);
> + spin_unlock_irq(&priv->lock);
> + continue;
> + }
> + spin_unlock_irq(&priv->lock);
> + ps3vram_do_request(dev, req);
> + }
> +
> + dev_dbg(&dev->core, "%s exit\n", __func__);
> + return 0;
> +}
> +
> +static void ps3vram_request(struct request_queue *q)
> +{
> + struct ps3_system_bus_device *dev = q->queuedata;
> + struct ps3vram_priv *priv = dev->core.driver_data;
> +
> + dev_dbg(&dev->core, "Waking up thread\n");
> + wake_up_process(priv->thread);
> +}
> +
> +
> +static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv;
> + int error, status;
> + struct request_queue *queue;
> + struct gendisk *gendisk;
> + u64 ddr_lpar, ctrl_lpar, info_lpar, reports_lpar, ddr_size,
> + reports_size;
> + char *rest;
> + struct task_struct *task;
> +
> + priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> + if (!priv) {
> + error = -ENOMEM;
> + goto fail;
> + }
> +
> + dev->core.driver_data = priv;
> + spin_lock_init(&priv->lock);
> +
> + priv = dev->core.driver_data;
> +
> + /* Allocate XDR buffer (1MiB aligned) */
> + priv->xdr_buf = (void *)__get_free_pages(GFP_KERNEL,
> + get_order(XDR_BUF_SIZE));
> + if (priv->xdr_buf == NULL) {
> + dev_dbg(&dev->core, "Could not allocate XDR buffer\n");
> + error = -ENOMEM;
> + goto fail_free_priv;
> + }
> +
> + /* Put FIFO at begginning of XDR buffer */
> + priv->fifo_base = (u32 *) (priv->xdr_buf + FIFO_OFFSET);
> + priv->fifo_ptr = priv->fifo_base;
> +
> + /* XXX: Need to open GPU, in case ps3fb or snd_ps3 aren't loaded */
> + if (ps3_open_hv_device(dev)) {
> + dev_err(&dev->core, "ps3_open_hv_device failed\n");
> + error = -EAGAIN;
> + goto out_close_gpu;
> + }
> +
> + /* Request memory */
> + status = -1;
> + ddr_size = memparse(size, &rest);
> + if (*rest == '-')
> + ddr_size -= ps3fb_videomemory.size;
> + ddr_size = ALIGN(ddr_size, 1024*1024);
> + if (ddr_size <= 0) {
> + dev_err(&dev->core, "Specified size is too small\n");
> + error = -EINVAL;
> + goto out_close_gpu;
> + }
> +
> + while (ddr_size > 0) {
> + status = lv1_gpu_memory_allocate(ddr_size, 0, 0, 0, 0,
> + &priv->memory_handle,
> + &ddr_lpar);
> + if (!status)
> + break;
> + ddr_size -= 1024*1024;
> + }
> + if (status || ddr_size <= 0) {
> + dev_err(&dev->core, "lv1_gpu_memory_allocate failed %d\n",
> + status);
> + error = -ENOMEM;
> + goto out_free_xdr_buf;
> + }
> +
> + /* Request context */
> + status = lv1_gpu_context_allocate(priv->memory_handle, 0,
> + &priv->context_handle, &ctrl_lpar,
> + &info_lpar, &reports_lpar,
> + &reports_size);
> + if (status) {
> + dev_err(&dev->core, "lv1_gpu_context_allocate failed %d\n",
> + status);
> + error = -ENOMEM;
> + goto out_free_memory;
> + }
> +
> + /* Map XDR buffer to RSX */
> + status = lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF,
> + ps3_mm_phys_to_lpar(__pa(priv->xdr_buf)),
> + XDR_BUF_SIZE, 0);
> + if (status) {
> + dev_err(&dev->core, "lv1_gpu_context_iomap failed %d\n",
> + status);
> + error = -ENOMEM;
> + goto out_free_context;
> + }
> +
> + priv->ddr_base = ioremap_flags(ddr_lpar, ddr_size, _PAGE_NO_CACHE);
> +
> + if (!priv->ddr_base) {
> + dev_err(&dev->core, "ioremap DDR failed\n");
> + error = -ENOMEM;
> + goto out_free_context;
> + }
> +
> + priv->ctrl = ioremap(ctrl_lpar, 64 * 1024);
> + if (!priv->ctrl) {
> + dev_err(&dev->core, "ioremap CTRL failed\n");
> + error = -ENOMEM;
> + goto out_unmap_vram;
> + }
> +
> + priv->reports = ioremap(reports_lpar, reports_size);
> + if (!priv->reports) {
> + dev_err(&dev->core, "ioremap REPORTS failed\n");
> + error = -ENOMEM;
> + goto out_unmap_ctrl;
> + }
> +
> + mutex_lock(&ps3_gpu_mutex);
> + ps3vram_init_ring(dev);
> + mutex_unlock(&ps3_gpu_mutex);
> +
> + priv->size = ddr_size;
> +
> + ps3vram_bind(dev);
> +
> + mutex_lock(&ps3_gpu_mutex);
> + error = ps3vram_wait_ring(dev, 100);
> + mutex_unlock(&ps3_gpu_mutex);
> + if (error < 0) {
> + dev_err(&dev->core, "Failed to initialize channels\n");
> + error = -ETIMEDOUT;
> + goto out_unmap_reports;
> + }
> +
> + ps3vram_cache_init(dev);
> + ps3vram_proc_init(dev);
> +
> + queue = blk_init_queue(ps3vram_request, &priv->lock);
> + if (!queue) {
> + dev_err(&dev->core, "blk_init_queue failed\n");
> + error = -ENOMEM;
> + goto out_cache_cleanup;
> + }
> +
> + priv->queue = queue;
> + queue->queuedata = dev;
> +
> + gendisk = alloc_disk(1);
> + if (!gendisk) {
> + dev_err(&dev->core, "alloc_disk failed\n");
> + error = -ENOMEM;
> + goto fail_cleanup_queue;
> + }
> +
> + priv->gendisk = gendisk;
> + gendisk->major = ps3vram_major;
> + gendisk->first_minor = 0;
> + gendisk->fops = &ps3vram_fops;
> + gendisk->queue = queue;
> + gendisk->private_data = dev;
> + gendisk->driverfs_dev = &dev->core;
> + strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
> + set_capacity(gendisk, priv->size >> 9);
> +
> + task = kthread_run(ps3vram_thread, dev, DEVICE_NAME);
> + if (IS_ERR(task)) {
> + error = PTR_ERR(task);
> + goto fail_free_disk;
> + }
> + priv->thread = task;
> +
> + dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n",
> + gendisk->disk_name, get_capacity(gendisk) >> 11);
> +
> + add_disk(gendisk);
> + return 0;
> +
> +fail_free_disk:
> + put_disk(priv->gendisk);
> +fail_cleanup_queue:
> + blk_cleanup_queue(queue);
> +out_cache_cleanup:
> + remove_proc_entry(DEVICE_NAME, NULL);
> + ps3vram_cache_cleanup(dev);
> +out_unmap_reports:
> + iounmap(priv->reports);
> +out_unmap_ctrl:
> + iounmap(priv->ctrl);
> +out_unmap_vram:
> + iounmap(priv->ddr_base);
> +out_free_context:
> + lv1_gpu_context_free(priv->context_handle);
> +out_free_memory:
> + lv1_gpu_memory_free(priv->memory_handle);
> +out_close_gpu:
> + ps3_close_hv_device(dev);
> +out_free_xdr_buf:
> + free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
> +fail_free_priv:
> + kfree(priv);
> + dev->core.driver_data = NULL;
> +fail:
> + return error;
> +}
> +
> +static int ps3vram_remove(struct ps3_system_bus_device *dev)
> +{
> + struct ps3vram_priv *priv = dev->core.driver_data;
> +
> + BUG_ON(!elv_queue_empty(priv->queue));
> +
> + kthread_stop(priv->thread);
> + del_gendisk(priv->gendisk);
> + blk_cleanup_queue(priv->queue);
> + put_disk(priv->gendisk);
> + remove_proc_entry(DEVICE_NAME, NULL);
> + ps3vram_cache_cleanup(dev);
> + iounmap(priv->reports);
> + iounmap(priv->ctrl);
> + iounmap(priv->ddr_base);
> + lv1_gpu_context_free(priv->context_handle);
> + lv1_gpu_memory_free(priv->memory_handle);
> + ps3_close_hv_device(dev);
> + free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
> + kfree(priv);
> + dev->core.driver_data = NULL;
> + return 0;
> +}
> +
> +static struct ps3_system_bus_driver ps3vram = {
> + .match_id = PS3_MATCH_ID_GPU,
> + .match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK,
> + .core.name = DEVICE_NAME,
> + .core.owner = THIS_MODULE,
> + .probe = ps3vram_probe,
> + .remove = ps3vram_remove,
> + .shutdown = ps3vram_remove,
> +};
> +
> +
> +static int __init ps3vram_init(void)
> +{
> + int error;
> +
> + if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
> + return -ENODEV;
> +
> + error = register_blkdev(0, DEVICE_NAME);
> + if (error <= 0) {
> + pr_err("%s: register_blkdev failed %d\n", DEVICE_NAME, error);
> + return error;
> + }
> + ps3vram_major = error;
> +
> + pr_info("%s: registered block device major %d\n", DEVICE_NAME,
> + ps3vram_major);
> +
> + error = ps3_system_bus_driver_register(&ps3vram);
> + if (error)
> + unregister_blkdev(ps3vram_major, DEVICE_NAME);
> +
> + return error;
> +}
> +
> +static void __exit ps3vram_exit(void)
> +{
> + ps3_system_bus_driver_unregister(&ps3vram);
> + unregister_blkdev(ps3vram_major, DEVICE_NAME);
> +}
> +
> +module_init(ps3vram_init);
> +module_exit(ps3vram_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("PS3 Video RAM Storage Driver");
> +MODULE_AUTHOR("Sony Corporation");
> +MODULE_ALIAS(PS3_MODULE_ALIAS_GPU_RAMDISK);
Geert Uytterhoeven wrote:
> Ideally, we think it would be best if the existing MTD-based ps3vram driver
> would be replaced by the new block-based ps3vram driver before 2.6.29 is
> released. This would relieve the burden of supporting two different swap space
> schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
> maintainer's shoulders, as in that case there would never have been a stable
> kernel version containing the MTD-based ps3vram driver.
> [..]
> - ps3vram-ng is faster than ps3vram:
> o 1 MiB blocks: +50% (read), +5% (write)
> o 4 KiB blocks: +50% (read), +5% (write)
> o 512 B blocks: +10% (read), +10% (write)
>
Fwiw, it's a useful swap device, since space is so limited -- the
performance improvements are certainly appealing too.
Marcus
On Thu, Mar 05 2009, Benjamin Herrenschmidt wrote:
> On Wed, 2009-03-04 at 14:57 +0100, Geert Uytterhoeven wrote:
> > Hi,
> >
> > Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
> > device, as requested by Arnd Bergmann.
> >
> > The MTD-based PS3 Video RAM Storage Driver was integrated into the mainline
> > kernel in 2.6.29-rc1.
> >
> > Ideally, we think it would be best if the existing MTD-based ps3vram driver
> > would be replaced by the new block-based ps3vram driver before 2.6.29 is
> > released. This would relieve the burden of supporting two different swap space
> > schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
> > maintainer's shoulders, as in that case there would never have been a stable
> > kernel version containing the MTD-based ps3vram driver.
>
> This is very very very late ... we are at rc7, probably one rc before
> final... as much as I like integrating drivers later, I'll ask Linus
> opinion on this one.
>
> Linus ? What do you reckon ? Maybe a better option is just to remove
> ps3nvram from .29 and merge the new one in .30 ?
It's an isolated driver for a special purpose platform, I would have
zero problems merging it :-)
--
Jens Axboe
Am 04.03.2009 um 14:57 schrieb Geert Uytterhoeven:
> Ideally, we think it would be best if the existing MTD-based ps3vram
> driver
> would be replaced by the new block-based ps3vram driver before
> 2.6.29 is
> released. This would relieve the burden of supporting two different
> swap space
> schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the
> distro
> maintainer's shoulders, as in that case there would never have been
> a stable
> kernel version containing the MTD-based ps3vram driver.
openSuSE already ships with the ps3vram driver since a two releases.
A simple name based udev rule could symlink ps3vram to mtdblock0, so
an upgrade
will not break existing setups.
> +obj-$(CONFIG_PS3_VRAM) += ps3vram_ng.o
Please give the driver the obvious name "ps3vram", that way upgrading
will be smooth.
I see our old mtddriver does not have modalias support for autoloading.
Hopefully the new driver has this feature.
Olaf
On Thu, 5 Mar 2009, Olaf Hering wrote:
> Am 04.03.2009 um 14:57 schrieb Geert Uytterhoeven:
> >Ideally, we think it would be best if the existing MTD-based ps3vram driver
> >would be replaced by the new block-based ps3vram driver before 2.6.29 is
> >released. This would relieve the burden of supporting two different swap
> >space
> >schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
> >maintainer's shoulders, as in that case there would never have been a stable
> >kernel version containing the MTD-based ps3vram driver.
>
> openSuSE already ships with the ps3vram driver since a two releases.
> A simple name based udev rule could symlink ps3vram to mtdblock0, so an
> upgrade
> will not break existing setups.
OK.
> >+obj-$(CONFIG_PS3_VRAM) += ps3vram_ng.o
>
> Please give the driver the obvious name "ps3vram", that way upgrading will be
> smooth.
Sure, as I said:
| I'll submit a patch to remove the MTD
| ps3vram and add the new driver as ps3vram (instead of ps3vram-ng).
> I see our old mtddriver does not have modalias support for autoloading.
You forgot to backport commit 0a2d15b928e0b1673d4ed5f48d95af211b6fcc06
("mtd/ps3vram: Add modalias support to the ps3vram driver") to the openSuSE
tree?
> Hopefully the new driver has this feature.
Yes, it has.
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Wed, Mar 04 2009, Geert Uytterhoeven wrote:
> Hi,
>
> Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
> device, as requested by Arnd Bergmann.
>
> The MTD-based PS3 Video RAM Storage Driver was integrated into the mainline
> kernel in 2.6.29-rc1.
>
> Ideally, we think it would be best if the existing MTD-based ps3vram driver
> would be replaced by the new block-based ps3vram driver before 2.6.29 is
> released. This would relieve the burden of supporting two different swap space
> schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
> maintainer's shoulders, as in that case there would never have been a stable
> kernel version containing the MTD-based ps3vram driver.
>
> What do you think? If this is accepted, I'll submit a patch to remove the MTD
> ps3vram and add the new driver as ps3vram (instead of ps3vram-ng).
>
> Thanks for your (review and other) comments!
I'd rewrite this as a ->make_request_fn handler instead. Then you can
get rid of the kernel thread. IOW, change
queue = blk_init_queue(ps3vram_request, &priv->lock);
to
queue = blk_alloc_queue(GFP_KERNEL);
blk_queue_make_request(queue, ps3vram_make_request);
Add error handling of course, and call blk_queue_max_*() to set your
limits for this device. Then add a ps3vram_make_request() ala:
static void ps3vram_do_request(struct request_queue *q, struct bio *bio)
{
struct ps3_system_bus_device *dev = q->queuedata;
struct ps3vram_priv *priv = dev->core.driver_data;
int write, res, err = -EIO;
struct bio_vec *bv;
sector_t sector;
loff_t offset;
size_t len, retlen;
int i;
write = bio_data_dir(bio) == WRITE;
sector = bio->bi_sector;
bio_for_each_segment(bv, bio, i) {
char *ptr = page_address(bv->bv_page) + bv->bv_offset;
len = bv->bv_len;
offset = sector << 9;
if (write)
res = ps3vram_write(dev, offset, len, &retlen, ptr);
else
res = ps3vram_read(dev, offset, len, &retlen, ptr);
if (res) {
dev_err(&dev->core, "%s failed\n", op);
goto out;
}
if (retlen != len) {
dev_err(&dev->core, "Short %s\n", op);
goto out;
}
sector += (len >> 9);
}
dev_dbg(&dev->core, "%s completed\n", op);
err = 0;
out:
bio_endio(bio, err);
}
I just typed it here, so if it doesn't compile you get to keep the
pieces :-)
Since ps3 is very RAM limited, I didn't bother with any highmem mapping
for the bio, since I gather that isn't an issue on that platform. You
may want to detail that in a comment above the page_addres() thing at
the top of the loop, though.
--
Jens Axboe
On Thu, 5 Mar 2009, Geert Uytterhoeven wrote:
> On Thu, 5 Mar 2009, Olaf Hering wrote:
> > I see our old mtddriver does not have modalias support for autoloading.
>
> You forgot to backport commit 0a2d15b928e0b1673d4ed5f48d95af211b6fcc06
> ("mtd/ps3vram: Add modalias support to the ps3vram driver") to the openSuSE
> tree?
Or do you mean that udev doesn't load mtdblock automatically?
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
Hi Jens,
On Thu, 5 Mar 2009, Jens Axboe wrote:
> On Wed, Mar 04 2009, Geert Uytterhoeven wrote:
> > Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
> > device, as requested by Arnd Bergmann.
> >
> > The MTD-based PS3 Video RAM Storage Driver was integrated into the mainline
> > kernel in 2.6.29-rc1.
> >
> > Ideally, we think it would be best if the existing MTD-based ps3vram driver
> > would be replaced by the new block-based ps3vram driver before 2.6.29 is
> > released. This would relieve the burden of supporting two different swap space
> > schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
> > maintainer's shoulders, as in that case there would never have been a stable
> > kernel version containing the MTD-based ps3vram driver.
> >
> > What do you think? If this is accepted, I'll submit a patch to remove the MTD
> > ps3vram and add the new driver as ps3vram (instead of ps3vram-ng).
> >
> > Thanks for your (review and other) comments!
>
> I'd rewrite this as a ->make_request_fn handler instead. Then you can
> get rid of the kernel thread. IOW, change
>
> queue = blk_init_queue(ps3vram_request, &priv->lock);
>
> to
>
> queue = blk_alloc_queue(GFP_KERNEL);
> blk_queue_make_request(queue, ps3vram_make_request);
Thanks, I didn't know that part...
> Add error handling of course, and call blk_queue_max_*() to set your
> limits for this device.
I took out the blk_queue_max_*() calls (compared to ps3disk.c), as none of the
limits apply, and the defaults are fine.
Is that OK, or is it better to make it explicit?
> Then add a ps3vram_make_request() ala:
> static void ps3vram_do_request(struct request_queue *q, struct bio *bio)
> {
> struct ps3_system_bus_device *dev = q->queuedata;
> struct ps3vram_priv *priv = dev->core.driver_data;
> int write, res, err = -EIO;
> struct bio_vec *bv;
> sector_t sector;
> loff_t offset;
> size_t len, retlen;
> int i;
>
> write = bio_data_dir(bio) == WRITE;
>
> sector = bio->bi_sector;
> bio_for_each_segment(bv, bio, i) {
> char *ptr = page_address(bv->bv_page) + bv->bv_offset;
>
> len = bv->bv_len;
> offset = sector << 9;
>
> if (write)
> res = ps3vram_write(dev, offset, len, &retlen, ptr);
> else
> res = ps3vram_read(dev, offset, len, &retlen, ptr);
>
> if (res) {
> dev_err(&dev->core, "%s failed\n", op);
> goto out;
> }
>
> if (retlen != len) {
> dev_err(&dev->core, "Short %s\n", op);
> goto out;
> }
> sector += (len >> 9);
> }
>
> dev_dbg(&dev->core, "%s completed\n", op);
> err = 0;
> out:
> bio_endio(bio, err);
> }
>
> I just typed it here, so if it doesn't compile you get to keep the
> pieces :-)
OK, I'll give it a try...
BTW, does this mean the `simple' way, which I used based on LDD3, is
deprecated?
> Since ps3 is very RAM limited, I didn't bother with any highmem mapping
> for the bio, since I gather that isn't an issue on that platform. You
> may want to detail that in a comment above the page_addres() thing at
> the top of the loop, though.
PS3 is ppc64, so no highmem. But I guess I best add the code for that anyway,
in case people will copy the driver in the future (I remember receiving a
similar comment for ps3disk).
Thanks for your comments!
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> Hi Jens,
>
> On Thu, 5 Mar 2009, Jens Axboe wrote:
> > On Wed, Mar 04 2009, Geert Uytterhoeven wrote:
> > > Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
> > > device, as requested by Arnd Bergmann.
> > >
> > > The MTD-based PS3 Video RAM Storage Driver was integrated into the mainline
> > > kernel in 2.6.29-rc1.
> > >
> > > Ideally, we think it would be best if the existing MTD-based ps3vram driver
> > > would be replaced by the new block-based ps3vram driver before 2.6.29 is
> > > released. This would relieve the burden of supporting two different swap space
> > > schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
> > > maintainer's shoulders, as in that case there would never have been a stable
> > > kernel version containing the MTD-based ps3vram driver.
> > >
> > > What do you think? If this is accepted, I'll submit a patch to remove the MTD
> > > ps3vram and add the new driver as ps3vram (instead of ps3vram-ng).
> > >
> > > Thanks for your (review and other) comments!
> >
> > I'd rewrite this as a ->make_request_fn handler instead. Then you can
> > get rid of the kernel thread. IOW, change
> >
> > queue = blk_init_queue(ps3vram_request, &priv->lock);
> >
> > to
> >
> > queue = blk_alloc_queue(GFP_KERNEL);
> > blk_queue_make_request(queue, ps3vram_make_request);
>
> Thanks, I didn't know that part...
>
> > Add error handling of course, and call blk_queue_max_*() to set your
> > limits for this device.
>
> I took out the blk_queue_max_*() calls (compared to ps3disk.c), as
> none of the limits apply, and the defaults are fine.
>
> Is that OK, or is it better to make it explicit?
I think it's always good to make it explicit. Plus for this case you
definitely need it, as blk_init_queue() wont do it for you anymore.
> > Then add a ps3vram_make_request() ala:
>
> > static void ps3vram_do_request(struct request_queue *q, struct bio *bio)
> > {
> > struct ps3_system_bus_device *dev = q->queuedata;
> > struct ps3vram_priv *priv = dev->core.driver_data;
> > int write, res, err = -EIO;
> > struct bio_vec *bv;
> > sector_t sector;
> > loff_t offset;
> > size_t len, retlen;
> > int i;
> >
> > write = bio_data_dir(bio) == WRITE;
> >
> > sector = bio->bi_sector;
> > bio_for_each_segment(bv, bio, i) {
> > char *ptr = page_address(bv->bv_page) + bv->bv_offset;
> >
> > len = bv->bv_len;
> > offset = sector << 9;
> >
> > if (write)
> > res = ps3vram_write(dev, offset, len, &retlen, ptr);
> > else
> > res = ps3vram_read(dev, offset, len, &retlen, ptr);
> >
> > if (res) {
> > dev_err(&dev->core, "%s failed\n", op);
> > goto out;
> > }
> >
> > if (retlen != len) {
> > dev_err(&dev->core, "Short %s\n", op);
> > goto out;
> > }
> > sector += (len >> 9);
> > }
> >
> > dev_dbg(&dev->core, "%s completed\n", op);
> > err = 0;
> > out:
> > bio_endio(bio, err);
> > }
> >
> > I just typed it here, so if it doesn't compile you get to keep the
> > pieces :-)
>
> OK, I'll give it a try...
>
> BTW, does this mean the `simple' way, which I used based on LDD3, is
> deprecated?
Depends.. It's obviously not a very effective approach, since you punt
to a thread for each request. But if you need the IO scheduler helping
you with merging and sorting (for a rotational device), it still has
some merit. For this particular case, the ->make_request_fn approach is
much better.
> > Since ps3 is very RAM limited, I didn't bother with any highmem mapping
> > for the bio, since I gather that isn't an issue on that platform. You
> > may want to detail that in a comment above the page_addres() thing at
> > the top of the loop, though.
>
> PS3 is ppc64, so no highmem. But I guess I best add the code for that anyway,
> in case people will copy the driver in the future (I remember receiving a
> similar comment for ps3disk).
I'd prefer just the comment, since you have to be able to handle
disabled interrupts on return from bvec_kmap_irq() if you use that. And
that would just complicate your driver, for something that you don't
need. So I'd put the comment in there stating why it's OK to just use
page_address() instead. If people copy-paste the code to some other
driver, then they will get the comment as well.
--
Jens Axboe
On Thu, 5 Mar 2009, Jens Axboe wrote:
> On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > On Thu, 5 Mar 2009, Jens Axboe wrote:
> > > On Wed, Mar 04 2009, Geert Uytterhoeven wrote:
> > > > Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
> > > > device, as requested by Arnd Bergmann.
> > > I'd rewrite this as a ->make_request_fn handler instead. Then you can
> > > get rid of the kernel thread. IOW, change
> > >
> > > queue = blk_init_queue(ps3vram_request, &priv->lock);
> > >
> > > to
> > >
> > > queue = blk_alloc_queue(GFP_KERNEL);
> > > blk_queue_make_request(queue, ps3vram_make_request);
> >
> > Thanks, I didn't know that part...
> >
> > > Add error handling of course, and call blk_queue_max_*() to set your
> > > limits for this device.
> >
> > I took out the blk_queue_max_*() calls (compared to ps3disk.c), as
> > none of the limits apply, and the defaults are fine.
> >
> > Is that OK, or is it better to make it explicit?
>
> I think it's always good to make it explicit. Plus for this case you
> definitely need it, as blk_init_queue() wont do it for you anymore.
blk_queue_make_request() does it for me, too:
void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
{
...
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
...
blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
...
blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
...
}
struct request_queue *
blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
{
...
blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
...
}
> > > Then add a ps3vram_make_request() ala:
> >
> > > static void ps3vram_do_request(struct request_queue *q, struct bio *bio)
> > > {
> > > }
> > >
> > > I just typed it here, so if it doesn't compile you get to keep the
> > > pieces :-)
> >
> > OK, I'll give it a try...
> >
> > BTW, does this mean the `simple' way, which I used based on LDD3, is
> > deprecated?
>
> Depends.. It's obviously not a very effective approach, since you punt
> to a thread for each request. But if you need the IO scheduler helping
> you with merging and sorting (for a rotational device), it still has
> some merit. For this particular case, the ->make_request_fn approach is
> much better.
Without the thread, performance indeed increased.
But then I noticed ps3vram_make_request() may be called concurrently, so I had
to add a mutex to avoid data corruption. This slows the driver down, and in the
end, the version with a thread turns out to be ca. 1% faster. The version
without a thread is about 50 lines less code, though.
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Thu, Mar 05, Geert Uytterhoeven wrote:
> On Thu, 5 Mar 2009, Geert Uytterhoeven wrote:
> > On Thu, 5 Mar 2009, Olaf Hering wrote:
> > > I see our old mtddriver does not have modalias support for autoloading.
> >
> > You forgot to backport commit 0a2d15b928e0b1673d4ed5f48d95af211b6fcc06
> > ("mtd/ps3vram: Add modalias support to the ps3vram driver") to the openSuSE
> > tree?
>
> Or do you mean that udev doesn't load mtdblock automatically?
I have not tried current kernels.
Our older versions of mtd based ps3vram do not have the autoloading feature.
So, please merge the block based ps3vram.ko.
Thanks for pushing it into mainline.
Olaf
On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> On Thu, 5 Mar 2009, Jens Axboe wrote:
> > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > On Thu, 5 Mar 2009, Jens Axboe wrote:
> > > > On Wed, Mar 04 2009, Geert Uytterhoeven wrote:
> > > > > Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
> > > > > device, as requested by Arnd Bergmann.
>
> > > > I'd rewrite this as a ->make_request_fn handler instead. Then you can
> > > > get rid of the kernel thread. IOW, change
> > > >
> > > > queue = blk_init_queue(ps3vram_request, &priv->lock);
> > > >
> > > > to
> > > >
> > > > queue = blk_alloc_queue(GFP_KERNEL);
> > > > blk_queue_make_request(queue, ps3vram_make_request);
> > >
> > > Thanks, I didn't know that part...
> > >
> > > > Add error handling of course, and call blk_queue_max_*() to set your
> > > > limits for this device.
> > >
> > > I took out the blk_queue_max_*() calls (compared to ps3disk.c), as
> > > none of the limits apply, and the defaults are fine.
> > >
> > > Is that OK, or is it better to make it explicit?
> >
> > I think it's always good to make it explicit. Plus for this case you
> > definitely need it, as blk_init_queue() wont do it for you anymore.
>
> blk_queue_make_request() does it for me, too:
>
> void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
> {
> ...
> blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
> blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
> ...
> blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
> ...
> blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
> ...
> }
>
> struct request_queue *
> blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
> {
> ...
> blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
>
> blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
> blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
> ...
> }
Indeed, there's some duplicated code in blk_init_queue_node(), I'll make
sure to get rid of that!
> > > > Then add a ps3vram_make_request() ala:
> > >
> > > > static void ps3vram_do_request(struct request_queue *q, struct bio *bio)
> > > > {
>
> > > > }
> > > >
> > > > I just typed it here, so if it doesn't compile you get to keep the
> > > > pieces :-)
> > >
> > > OK, I'll give it a try...
> > >
> > > BTW, does this mean the `simple' way, which I used based on LDD3, is
> > > deprecated?
> >
> > Depends.. It's obviously not a very effective approach, since you punt
> > to a thread for each request. But if you need the IO scheduler helping
> > you with merging and sorting (for a rotational device), it still has
> > some merit. For this particular case, the ->make_request_fn approach is
> > much better.
>
> Without the thread, performance indeed increased.
>
> But then I noticed ps3vram_make_request() may be called concurrently,
> so I had to add a mutex to avoid data corruption. This slows the
> driver down, and in the end, the version with a thread turns out to be
> ca. 1% faster. The version without a thread is about 50 lines less
> code, though.
That is correct, ->make_request_fn may get reentered. I'm not surprised
that performance dropped if you just shoved everything under a mutex.
You could be a little more smart and queue concurrent bio's for
processing when the current one is complete though, there are several
approaches there that be a lot faster than going all the way through the
IO stack and scheduler just to avoid concurrency.
--
Jens Axboe
On Fri, 6 Mar 2009, Jens Axboe wrote:
> On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > But then I noticed ps3vram_make_request() may be called concurrently,
> > so I had to add a mutex to avoid data corruption. This slows the
> > driver down, and in the end, the version with a thread turns out to be
> > ca. 1% faster. The version without a thread is about 50 lines less
> > code, though.
>
> That is correct, ->make_request_fn may get reentered. I'm not surprised
> that performance dropped if you just shoved everything under a mutex.
> You could be a little more smart and queue concurrent bio's for
> processing when the current one is complete though, there are several
> approaches there that be a lot faster than going all the way through the
> IO stack and scheduler just to avoid concurrency.
Yes, using a spinlock and queueing requests on a list if the driver is busy can
be done after 2.6.29...
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Thu, 5 Mar 2009, Jens Axboe wrote:
> On Thu, Mar 05 2009, Benjamin Herrenschmidt wrote:
> > On Wed, 2009-03-04 at 14:57 +0100, Geert Uytterhoeven wrote:
> > > Below is the rewrite of the PS3 Video RAM Storage Driver as a plain block
> > > device, as requested by Arnd Bergmann.
> > >
> > > The MTD-based PS3 Video RAM Storage Driver was integrated into the mainline
> > > kernel in 2.6.29-rc1.
> > >
> > > Ideally, we think it would be best if the existing MTD-based ps3vram driver
> > > would be replaced by the new block-based ps3vram driver before 2.6.29 is
> > > released. This would relieve the burden of supporting two different swap space
> > > schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
> > > maintainer's shoulders, as in that case there would never have been a stable
> > > kernel version containing the MTD-based ps3vram driver.
> >
> > This is very very very late ... we are at rc7, probably one rc before
> > final... as much as I like integrating drivers later, I'll ask Linus
> > opinion on this one.
> >
> > Linus ? What do you reckon ? Maybe a better option is just to remove
> > ps3nvram from .29 and merge the new one in .30 ?
>
> It's an isolated driver for a special purpose platform, I would have
> zero problems merging it :-)
Here's the new version, incorporating all the review comments.
Please apply for 2.6.29, thanks!
---
>From d7ddc1aaee1ff6dd6a73bd3663b6c390800e0500 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <[email protected]>
Date: Wed, 25 Feb 2009 18:32:10 +0100
Subject: [PATCH] ps3/block: Replace mtd/ps3vram by block/ps3vram
Convert the PS3 Video RAM Storage Driver from an MTD driver to a plain block
device driver.
The ps3vram driver exposes unused video RAM on the PS3 as a block device
suitable for storage or swap. Fast data transfer is achieved using a local
cache in system RAM and DMA transfers via the GPU.
The new driver is ca. 50% faster for reading, and ca. 10% for writing.
Signed-off-by: Geert Uytterhoeven <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Vivien Chappelier <[email protected]>
Cc: Jim Paris <[email protected]>
---
The MTD-based PS3 Video RAM Storage Driver was integrated into the mainline
kernel in 2.6.29-rc1.
Ideally, we think it would be best if the existing MTD-based ps3vram driver
would be replaced by the new block-based ps3vram driver before 2.6.29 is
released. This would relieve the burden of supporting two different swap space
schemes on PS3 (swap on /dev/mtdblock0 vs. /dev/ps3vram) from the distro
maintainer's shoulders, as in that case there would never have been a stable
kernel version containing the MTD-based ps3vram driver.
Changes since previous submission (Wed, 4 Mar 2009 14:57:20 +0100 (CET)):
- Use blk_queue_make_request() to get rid of the thread
- Add a mutex (cfr. the old driver), as ps3vram_make_request() may be called
concurrently
TO DO (after 2.6.29): use a spinlock and a list to queue requests while the
driver is busy
- Remove the old MTD-based ps3vram driver and rename ps3vram-ng to ps3vram
- Make PS3_VRAM depend on FB_PS3=y and m for now
ps3vram relies on ps3fb being initialized first. The easiest way to do this
is by making ps3vram modular, and ps3fb builtin
- Remove the dependency on ps3fb_videomemory.size
The loop to reduce ddr_size until it succeeds does the right thing anyway,
and a few MiB of DDR RAM are reserved by the hypervisor
- lv1 return codes can be int
- Correct a few debug annotations
arch/powerpc/platforms/ps3/Kconfig | 7 +
drivers/block/Makefile | 1 +
drivers/block/ps3vram.c | 865 ++++++++++++++++++++++++++++++++++++
drivers/mtd/devices/Kconfig | 7 -
drivers/mtd/devices/Makefile | 1 -
drivers/mtd/devices/ps3vram.c | 768 --------------------------------
6 files changed, 873 insertions(+), 776 deletions(-)
create mode 100644 drivers/block/ps3vram.c
delete mode 100644 drivers/mtd/devices/ps3vram.c
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index 920cf7a..740ef56 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -128,6 +128,13 @@ config PS3_FLASH
be disabled on the kernel command line using "ps3flash=off", to
not allocate this fixed buffer.
+config PS3_VRAM
+ tristate "PS3 Video RAM Storage Driver"
+ depends on FB_PS3=y && BLOCK && m
+ help
+ This driver allows you to use excess PS3 video RAM as volatile
+ storage or system swap.
+
config PS3_LPM
tristate "PS3 Logical Performance Monitor support"
depends on PPC_PS3
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 204332b..87e120e 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o
obj-$(CONFIG_PS3_DISK) += ps3disk.o
+obj-$(CONFIG_PS3_VRAM) += ps3vram.o
obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
new file mode 100644
index 0000000..393ed67
--- /dev/null
+++ b/drivers/block/ps3vram.c
@@ -0,0 +1,865 @@
+/*
+ * ps3vram - Use extra PS3 video ram as MTD block device.
+ *
+ * Copyright 2009 Sony Corporation
+ *
+ * Based on the MTD ps3vram driver, which is
+ * Copyright (c) 2007-2008 Jim Paris <[email protected]>
+ * Added support RSX DMA Vivien Chappelier <[email protected]>
+ */
+
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+
+#define DEVICE_NAME "ps3vram"
+
+
+#define XDR_BUF_SIZE (2 * 1024 * 1024) /* XDR buffer (must be 1MiB aligned) */
+#define XDR_IOIF 0x0c000000
+
+#define FIFO_BASE XDR_IOIF
+#define FIFO_SIZE (64 * 1024)
+
+#define DMA_PAGE_SIZE (4 * 1024)
+
+#define CACHE_PAGE_SIZE (256 * 1024)
+#define CACHE_PAGE_COUNT ((XDR_BUF_SIZE - FIFO_SIZE) / CACHE_PAGE_SIZE)
+
+#define CACHE_OFFSET CACHE_PAGE_SIZE
+#define FIFO_OFFSET 0
+
+#define CTRL_PUT 0x10
+#define CTRL_GET 0x11
+#define CTRL_TOP 0x15
+
+#define UPLOAD_SUBCH 1
+#define DOWNLOAD_SUBCH 2
+
+#define NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c
+#define NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104
+
+#define L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT 0x601
+
+#define CACHE_PAGE_PRESENT 1
+#define CACHE_PAGE_DIRTY 2
+
+struct ps3vram_tag {
+ unsigned int address;
+ unsigned int flags;
+};
+
+struct ps3vram_cache {
+ unsigned int page_count;
+ unsigned int page_size;
+ struct ps3vram_tag *tags;
+ unsigned int hit;
+ unsigned int miss;
+};
+
+struct ps3vram_priv {
+ struct request_queue *queue;
+ struct gendisk *gendisk;
+
+ u64 size;
+
+ u64 memory_handle;
+ u64 context_handle;
+ u32 *ctrl;
+ u32 *reports;
+ u8 __iomem *ddr_base;
+ u8 *xdr_buf;
+
+ u32 *fifo_base;
+ u32 *fifo_ptr;
+
+ struct ps3vram_cache cache;
+
+ /* Used to serialize cache/DMA operations */
+ struct mutex lock;
+};
+
+
+static int ps3vram_major;
+
+
+static struct block_device_operations ps3vram_fops = {
+ .owner = THIS_MODULE,
+};
+
+
+#define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
+#define DMA_NOTIFIER_OFFSET_BASE 0x1000 /* first DMA notifier offset */
+#define DMA_NOTIFIER_SIZE 0x40
+#define NOTIFIER 7 /* notifier used for completion report */
+
+static char *size = "256M";
+module_param(size, charp, 0);
+MODULE_PARM_DESC(size, "memory size");
+
+static u32 *ps3vram_get_notifier(u32 *reports, int notifier)
+{
+ return (void *)reports + DMA_NOTIFIER_OFFSET_BASE +
+ DMA_NOTIFIER_SIZE * notifier;
+}
+
+static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+ int i;
+
+ for (i = 0; i < 4; i++)
+ notify[i] = 0xffffffff;
+}
+
+static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
+ unsigned int timeout_ms)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+ unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+
+ do {
+ if (!notify[3])
+ return 0;
+ msleep(1);
+ } while (time_before(jiffies, timeout));
+
+ return -ETIMEDOUT;
+}
+
+static void ps3vram_init_ring(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+ priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
+}
+
+static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
+ unsigned int timeout_ms)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+
+ do {
+ if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
+ return 0;
+ msleep(1);
+ } while (time_before(jiffies, timeout));
+
+ dev_warn(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n",
+ priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
+ priv->ctrl[CTRL_TOP]);
+
+ return -ETIMEDOUT;
+}
+
+static void ps3vram_out_ring(struct ps3vram_priv *priv, u32 data)
+{
+ *(priv->fifo_ptr)++ = data;
+}
+
+static void ps3vram_begin_ring(struct ps3vram_priv *priv, u32 chan, u32 tag,
+ u32 size)
+{
+ ps3vram_out_ring(priv, (size << 18) | (chan << 13) | tag);
+}
+
+static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ int status;
+
+ ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
+
+ priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+
+ /* asking the HV for a blit will kick the FIFO */
+ status = lv1_gpu_context_attribute(priv->context_handle,
+ L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT, 0,
+ 0, 0, 0);
+ if (status)
+ dev_err(&dev->core,
+ "%s: lv1_gpu_context_attribute failed %d\n", __func__,
+ status);
+
+ priv->fifo_ptr = priv->fifo_base;
+}
+
+static void ps3vram_fire_ring(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ int status;
+
+ mutex_lock(&ps3_gpu_mutex);
+
+ priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
+ (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
+
+ /* asking the HV for a blit will kick the FIFO */
+ status = lv1_gpu_context_attribute(priv->context_handle,
+ L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT, 0,
+ 0, 0, 0);
+ if (status)
+ dev_err(&dev->core,
+ "%s: lv1_gpu_context_attribute failed %d\n", __func__,
+ status);
+
+ if ((priv->fifo_ptr - priv->fifo_base) * sizeof(u32) >
+ FIFO_SIZE - 1024) {
+ dev_dbg(&dev->core, "FIFO full, rewinding\n");
+ ps3vram_wait_ring(dev, 200);
+ ps3vram_rewind_ring(dev);
+ }
+
+ mutex_unlock(&ps3_gpu_mutex);
+}
+
+static void ps3vram_bind(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0, 1);
+ ps3vram_out_ring(priv, 0x31337303);
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x180, 3);
+ ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
+ ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
+ ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
+
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0, 1);
+ ps3vram_out_ring(priv, 0x3137c0de);
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x180, 3);
+ ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
+ ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
+ ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
+
+ ps3vram_fire_ring(dev);
+}
+
+static int ps3vram_upload(struct ps3_system_bus_device *dev,
+ unsigned int src_offset, unsigned int dst_offset,
+ int len, int count)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH,
+ NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ ps3vram_out_ring(priv, XDR_IOIF + src_offset);
+ ps3vram_out_ring(priv, dst_offset);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, count);
+ ps3vram_out_ring(priv, (1 << 8) | 1);
+ ps3vram_out_ring(priv, 0);
+
+ ps3vram_notifier_reset(dev);
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH,
+ NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
+ ps3vram_out_ring(priv, 0);
+ ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x100, 1);
+ ps3vram_out_ring(priv, 0);
+ ps3vram_fire_ring(dev);
+ if (ps3vram_notifier_wait(dev, 200) < 0) {
+ dev_warn(&dev->core, "%s: Notifier timeout\n", __func__);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ps3vram_download(struct ps3_system_bus_device *dev,
+ unsigned int src_offset, unsigned int dst_offset,
+ int len, int count)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
+ NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ ps3vram_out_ring(priv, src_offset);
+ ps3vram_out_ring(priv, XDR_IOIF + dst_offset);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, len);
+ ps3vram_out_ring(priv, count);
+ ps3vram_out_ring(priv, (1 << 8) | 1);
+ ps3vram_out_ring(priv, 0);
+
+ ps3vram_notifier_reset(dev);
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
+ NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
+ ps3vram_out_ring(priv, 0);
+ ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x100, 1);
+ ps3vram_out_ring(priv, 0);
+ ps3vram_fire_ring(dev);
+ if (ps3vram_notifier_wait(dev, 200) < 0) {
+ dev_warn(&dev->core, "%s: Notifier timeout\n", __func__);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void ps3vram_cache_evict(struct ps3_system_bus_device *dev, int entry)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct ps3vram_cache *cache = &priv->cache;
+
+ if (!(cache->tags[entry].flags & CACHE_PAGE_DIRTY))
+ return;
+
+ dev_dbg(&dev->core, "Flushing %d: 0x%08x\n", entry,
+ cache->tags[entry].address);
+ if (ps3vram_upload(dev, CACHE_OFFSET + entry * cache->page_size,
+ cache->tags[entry].address, DMA_PAGE_SIZE,
+ cache->page_size / DMA_PAGE_SIZE) < 0) {
+ dev_err(&dev->core,
+ "Failed to upload from 0x%x to " "0x%x size 0x%x\n",
+ entry * cache->page_size, cache->tags[entry].address,
+ cache->page_size);
+ }
+ cache->tags[entry].flags &= ~CACHE_PAGE_DIRTY;
+}
+
+static void ps3vram_cache_load(struct ps3_system_bus_device *dev, int entry,
+ unsigned int address)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct ps3vram_cache *cache = &priv->cache;
+
+ dev_dbg(&dev->core, "Fetching %d: 0x%08x\n", entry, address);
+ if (ps3vram_download(dev, address,
+ CACHE_OFFSET + entry * cache->page_size,
+ DMA_PAGE_SIZE,
+ cache->page_size / DMA_PAGE_SIZE) < 0) {
+ dev_err(&dev->core,
+ "Failed to download from 0x%x to 0x%x size 0x%x\n",
+ address, entry * cache->page_size, cache->page_size);
+ }
+
+ cache->tags[entry].address = address;
+ cache->tags[entry].flags |= CACHE_PAGE_PRESENT;
+}
+
+
+static void ps3vram_cache_flush(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct ps3vram_cache *cache = &priv->cache;
+ int i;
+
+ dev_dbg(&dev->core, "FLUSH\n");
+ for (i = 0; i < cache->page_count; i++) {
+ ps3vram_cache_evict(dev, i);
+ cache->tags[i].flags = 0;
+ }
+}
+
+static unsigned int ps3vram_cache_match(struct ps3_system_bus_device *dev,
+ loff_t address)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct ps3vram_cache *cache = &priv->cache;
+ unsigned int base;
+ unsigned int offset;
+ int i;
+ static int counter;
+
+ offset = (unsigned int) (address & (cache->page_size - 1));
+ base = (unsigned int) (address - offset);
+
+ /* fully associative check */
+ for (i = 0; i < cache->page_count; i++) {
+ if ((cache->tags[i].flags & CACHE_PAGE_PRESENT) &&
+ cache->tags[i].address == base) {
+ cache->hit++;
+ dev_dbg(&dev->core, "Found entry %d: 0x%08x\n", i,
+ cache->tags[i].address);
+ return i;
+ }
+ }
+
+ /* choose a random entry */
+ i = (jiffies + (counter++)) % cache->page_count;
+ dev_dbg(&dev->core, "Using entry %d\n", i);
+
+ ps3vram_cache_evict(dev, i);
+ ps3vram_cache_load(dev, i, base);
+
+ cache->miss++;
+ return i;
+}
+
+static int ps3vram_cache_init(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ priv->cache.page_count = CACHE_PAGE_COUNT;
+ priv->cache.page_size = CACHE_PAGE_SIZE;
+ priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
+ CACHE_PAGE_COUNT, GFP_KERNEL);
+ if (priv->cache.tags == NULL) {
+ dev_err(&dev->core, "Could not allocate cache tags\n");
+ return -ENOMEM;
+ }
+
+ dev_info(&dev->core, "Created ram cache: %d entries, %d KiB each\n",
+ CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
+
+ return 0;
+}
+
+static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ ps3vram_cache_flush(dev);
+ kfree(priv->cache.tags);
+}
+
+static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+ size_t len, size_t *retlen, u_char *buf)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ unsigned int cached, count;
+
+ dev_dbg(&dev->core, "%s: from=0x%08x len=0x%zx\n", __func__,
+ (unsigned int)from, len);
+
+ if (from >= priv->size)
+ return -EIO;
+
+ if (len > priv->size - from)
+ len = priv->size - from;
+
+ /* Copy from vram to buf */
+ count = len;
+ while (count) {
+ unsigned int offset, avail;
+ unsigned int entry;
+
+ offset = (unsigned int) (from & (priv->cache.page_size - 1));
+ avail = priv->cache.page_size - offset;
+
+ mutex_lock(&priv->lock);
+
+ entry = ps3vram_cache_match(dev, from);
+ cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
+
+ dev_dbg(&dev->core, "%s: from=%08x cached=%08x offset=%08x "
+ "avail=%08x count=%08x\n", __func__,
+ (unsigned int)from, cached, offset, avail, count);
+
+ if (avail > count)
+ avail = count;
+ memcpy(buf, priv->xdr_buf + cached, avail);
+
+ mutex_unlock(&priv->lock);
+
+ buf += avail;
+ count -= avail;
+ from += avail;
+ }
+
+ *retlen = len;
+ return 0;
+}
+
+static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+ size_t len, size_t *retlen, const u_char *buf)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ unsigned int cached, count;
+
+ if (to >= priv->size)
+ return -EIO;
+
+ if (len > priv->size - to)
+ len = priv->size - to;
+
+ /* Copy from buf to vram */
+ count = len;
+ while (count) {
+ unsigned int offset, avail;
+ unsigned int entry;
+
+ offset = (unsigned int) (to & (priv->cache.page_size - 1));
+ avail = priv->cache.page_size - offset;
+
+ mutex_lock(&priv->lock);
+
+ entry = ps3vram_cache_match(dev, to);
+ cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
+
+ dev_dbg(&dev->core, "%s: to=%08x cached=%08x offset=%08x "
+ "avail=%08x count=%08x\n", __func__, (unsigned int)to,
+ cached, offset, avail, count);
+
+ if (avail > count)
+ avail = count;
+ memcpy(priv->xdr_buf + cached, buf, avail);
+
+ priv->cache.tags[entry].flags |= CACHE_PAGE_DIRTY;
+
+ mutex_unlock(&priv->lock);
+
+ buf += avail;
+ count -= avail;
+ to += avail;
+ }
+
+ *retlen = len;
+ return 0;
+}
+
+static int ps3vram_proc_show(struct seq_file *m, void *v)
+{
+ struct ps3vram_priv *priv = m->private;
+
+ seq_printf(m, "hit:%u\nmiss:%u\n", priv->cache.hit, priv->cache.miss);
+ return 0;
+}
+
+static int ps3vram_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ps3vram_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations ps3vram_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = ps3vram_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void __devinit ps3vram_proc_init(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+ struct proc_dir_entry *pde;
+
+ pde = proc_create(DEVICE_NAME, 0444, NULL, &ps3vram_proc_fops);
+ if (!pde) {
+ dev_warn(&dev->core, "failed to create /proc entry\n");
+ return;
+ }
+
+ pde->owner = THIS_MODULE;
+ pde->data = priv;
+}
+
+static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct ps3_system_bus_device *dev = q->queuedata;
+ int write = bio_data_dir(bio) == WRITE;
+ const char *op = write ? "write" : "read";
+ loff_t offset = bio->bi_sector << 9;
+ int error = 0;
+ struct bio_vec *bvec;
+ unsigned int i;
+
+ dev_dbg(&dev->core, "%s\n", __func__);
+
+ bio_for_each_segment(bvec, bio, i) {
+ /* PS3 is ppc64, so we don't handle highmem */
+ char *ptr = page_address(bvec->bv_page) + bvec->bv_offset;
+ size_t len = bvec->bv_len, retlen;
+
+ dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op,
+ len, offset);
+ if (write)
+ error = ps3vram_write(dev, offset, len, &retlen, ptr);
+ else
+ error = ps3vram_read(dev, offset, len, &retlen, ptr);
+
+ if (error) {
+ dev_err(&dev->core, "%s failed\n", op);
+ goto out;
+ }
+
+ if (retlen != len) {
+ dev_err(&dev->core, "Short %s\n", op);
+ goto out;
+ }
+
+ offset += len;
+ }
+
+ dev_dbg(&dev->core, "%s completed\n", op);
+
+out:
+ bio_endio(bio, error);
+ return 0;
+}
+
+static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv;
+ int error, status;
+ struct request_queue *queue;
+ struct gendisk *gendisk;
+ u64 ddr_lpar, ctrl_lpar, info_lpar, reports_lpar, ddr_size,
+ reports_size;
+ char *rest;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ error = -ENOMEM;
+ goto fail;
+ }
+
+ mutex_init(&priv->lock);
+ dev->core.driver_data = priv;
+
+ priv = dev->core.driver_data;
+
+ /* Allocate XDR buffer (1MiB aligned) */
+ priv->xdr_buf = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(XDR_BUF_SIZE));
+ if (priv->xdr_buf == NULL) {
+ dev_err(&dev->core, "Could not allocate XDR buffer\n");
+ error = -ENOMEM;
+ goto fail_free_priv;
+ }
+
+ /* Put FIFO at begginning of XDR buffer */
+ priv->fifo_base = (u32 *) (priv->xdr_buf + FIFO_OFFSET);
+ priv->fifo_ptr = priv->fifo_base;
+
+ /* XXX: Need to open GPU, in case ps3fb or snd_ps3 aren't loaded */
+ if (ps3_open_hv_device(dev)) {
+ dev_err(&dev->core, "ps3_open_hv_device failed\n");
+ error = -EAGAIN;
+ goto out_close_gpu;
+ }
+
+ /* Request memory */
+ status = -1;
+ ddr_size = ALIGN(memparse(size, &rest), 1024*1024);
+ if (!ddr_size) {
+ dev_err(&dev->core, "Specified size is too small\n");
+ error = -EINVAL;
+ goto out_close_gpu;
+ }
+
+ while (ddr_size > 0) {
+ status = lv1_gpu_memory_allocate(ddr_size, 0, 0, 0, 0,
+ &priv->memory_handle,
+ &ddr_lpar);
+ if (!status)
+ break;
+ ddr_size -= 1024*1024;
+ }
+ if (status) {
+ dev_err(&dev->core, "lv1_gpu_memory_allocate failed %d\n",
+ status);
+ error = -ENOMEM;
+ goto out_free_xdr_buf;
+ }
+
+ /* Request context */
+ status = lv1_gpu_context_allocate(priv->memory_handle, 0,
+ &priv->context_handle, &ctrl_lpar,
+ &info_lpar, &reports_lpar,
+ &reports_size);
+ if (status) {
+ dev_err(&dev->core, "lv1_gpu_context_allocate failed %d\n",
+ status);
+ error = -ENOMEM;
+ goto out_free_memory;
+ }
+
+ /* Map XDR buffer to RSX */
+ status = lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF,
+ ps3_mm_phys_to_lpar(__pa(priv->xdr_buf)),
+ XDR_BUF_SIZE, 0);
+ if (status) {
+ dev_err(&dev->core, "lv1_gpu_context_iomap failed %d\n",
+ status);
+ error = -ENOMEM;
+ goto out_free_context;
+ }
+
+ priv->ddr_base = ioremap_flags(ddr_lpar, ddr_size, _PAGE_NO_CACHE);
+
+ if (!priv->ddr_base) {
+ dev_err(&dev->core, "ioremap DDR failed\n");
+ error = -ENOMEM;
+ goto out_free_context;
+ }
+
+ priv->ctrl = ioremap(ctrl_lpar, 64 * 1024);
+ if (!priv->ctrl) {
+ dev_err(&dev->core, "ioremap CTRL failed\n");
+ error = -ENOMEM;
+ goto out_unmap_vram;
+ }
+
+ priv->reports = ioremap(reports_lpar, reports_size);
+ if (!priv->reports) {
+ dev_err(&dev->core, "ioremap REPORTS failed\n");
+ error = -ENOMEM;
+ goto out_unmap_ctrl;
+ }
+
+ mutex_lock(&ps3_gpu_mutex);
+ ps3vram_init_ring(dev);
+ mutex_unlock(&ps3_gpu_mutex);
+
+ priv->size = ddr_size;
+
+ ps3vram_bind(dev);
+
+ mutex_lock(&ps3_gpu_mutex);
+ error = ps3vram_wait_ring(dev, 100);
+ mutex_unlock(&ps3_gpu_mutex);
+ if (error < 0) {
+ dev_err(&dev->core, "Failed to initialize channels\n");
+ error = -ETIMEDOUT;
+ goto out_unmap_reports;
+ }
+
+ ps3vram_cache_init(dev);
+ ps3vram_proc_init(dev);
+
+ queue = blk_alloc_queue(GFP_KERNEL);
+ if (!queue) {
+ dev_err(&dev->core, "blk_alloc_queue failed\n");
+ error = -ENOMEM;
+ goto out_cache_cleanup;
+ }
+
+ priv->queue = queue;
+ queue->queuedata = dev;
+ blk_queue_make_request(queue, ps3vram_make_request);
+ blk_queue_max_phys_segments(queue, MAX_PHYS_SEGMENTS);
+ blk_queue_max_hw_segments(queue, MAX_HW_SEGMENTS);
+ blk_queue_max_segment_size(queue, MAX_SEGMENT_SIZE);
+ blk_queue_max_sectors(queue, SAFE_MAX_SECTORS);
+
+ gendisk = alloc_disk(1);
+ if (!gendisk) {
+ dev_err(&dev->core, "alloc_disk failed\n");
+ error = -ENOMEM;
+ goto fail_cleanup_queue;
+ }
+
+ priv->gendisk = gendisk;
+ gendisk->major = ps3vram_major;
+ gendisk->first_minor = 0;
+ gendisk->fops = &ps3vram_fops;
+ gendisk->queue = queue;
+ gendisk->private_data = dev;
+ gendisk->driverfs_dev = &dev->core;
+ strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
+ set_capacity(gendisk, priv->size >> 9);
+
+ dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n",
+ gendisk->disk_name, get_capacity(gendisk) >> 11);
+
+ add_disk(gendisk);
+ return 0;
+
+fail_cleanup_queue:
+ blk_cleanup_queue(queue);
+out_cache_cleanup:
+ remove_proc_entry(DEVICE_NAME, NULL);
+ ps3vram_cache_cleanup(dev);
+out_unmap_reports:
+ iounmap(priv->reports);
+out_unmap_ctrl:
+ iounmap(priv->ctrl);
+out_unmap_vram:
+ iounmap(priv->ddr_base);
+out_free_context:
+ lv1_gpu_context_free(priv->context_handle);
+out_free_memory:
+ lv1_gpu_memory_free(priv->memory_handle);
+out_close_gpu:
+ ps3_close_hv_device(dev);
+out_free_xdr_buf:
+ free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
+fail_free_priv:
+ kfree(priv);
+ dev->core.driver_data = NULL;
+fail:
+ return error;
+}
+
+static int ps3vram_remove(struct ps3_system_bus_device *dev)
+{
+ struct ps3vram_priv *priv = dev->core.driver_data;
+
+ del_gendisk(priv->gendisk);
+ put_disk(priv->gendisk);
+ blk_cleanup_queue(priv->queue);
+ remove_proc_entry(DEVICE_NAME, NULL);
+ ps3vram_cache_cleanup(dev);
+ iounmap(priv->reports);
+ iounmap(priv->ctrl);
+ iounmap(priv->ddr_base);
+ lv1_gpu_context_free(priv->context_handle);
+ lv1_gpu_memory_free(priv->memory_handle);
+ ps3_close_hv_device(dev);
+ free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
+ kfree(priv);
+ dev->core.driver_data = NULL;
+ return 0;
+}
+
+static struct ps3_system_bus_driver ps3vram = {
+ .match_id = PS3_MATCH_ID_GPU,
+ .match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK,
+ .core.name = DEVICE_NAME,
+ .core.owner = THIS_MODULE,
+ .probe = ps3vram_probe,
+ .remove = ps3vram_remove,
+ .shutdown = ps3vram_remove,
+};
+
+
+static int __init ps3vram_init(void)
+{
+ int error;
+
+ if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+ return -ENODEV;
+
+ error = register_blkdev(0, DEVICE_NAME);
+ if (error <= 0) {
+ pr_err("%s: register_blkdev failed %d\n", DEVICE_NAME, error);
+ return error;
+ }
+ ps3vram_major = error;
+
+ pr_info("%s: registered block device major %d\n", DEVICE_NAME,
+ ps3vram_major);
+
+ error = ps3_system_bus_driver_register(&ps3vram);
+ if (error)
+ unregister_blkdev(ps3vram_major, DEVICE_NAME);
+
+ return error;
+}
+
+static void __exit ps3vram_exit(void)
+{
+ ps3_system_bus_driver_unregister(&ps3vram);
+ unregister_blkdev(ps3vram_major, DEVICE_NAME);
+}
+
+module_init(ps3vram_init);
+module_exit(ps3vram_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PS3 Video RAM Storage Driver");
+MODULE_AUTHOR("Sony Corporation");
+MODULE_ALIAS(PS3_MODULE_ALIAS_GPU_RAMDISK);
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index bc33200..6fde0a2 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -120,13 +120,6 @@ config MTD_PHRAM
doesn't have access to, memory beyond the mem=xxx limit, nvram,
memory on the video card, etc...
-config MTD_PS3VRAM
- tristate "PS3 video RAM"
- depends on FB_PS3
- help
- This driver allows you to use excess PS3 video RAM as volatile
- storage or system swap.
-
config MTD_LART
tristate "28F160xx flash driver for LART"
depends on SA1100_LART
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index e51521d..0993d5c 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,4 +16,3 @@ obj-$(CONFIG_MTD_LART) += lart.o
obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o
obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o
obj-$(CONFIG_MTD_M25P80) += m25p80.o
-obj-$(CONFIG_MTD_PS3VRAM) += ps3vram.o
diff --git a/drivers/mtd/devices/ps3vram.c b/drivers/mtd/devices/ps3vram.c
deleted file mode 100644
index d21e9be..0000000
--- a/drivers/mtd/devices/ps3vram.c
+++ /dev/null
@@ -1,768 +0,0 @@
-/**
- * ps3vram - Use extra PS3 video ram as MTD block device.
- *
- * Copyright (c) 2007-2008 Jim Paris <[email protected]>
- * Added support RSX DMA Vivien Chappelier <[email protected]>
- */
-
-#include <linux/io.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-#include <linux/version.h>
-#include <linux/gfp.h>
-#include <linux/delay.h>
-#include <linux/mtd/mtd.h>
-
-#include <asm/lv1call.h>
-#include <asm/ps3.h>
-
-#define DEVICE_NAME "ps3vram"
-
-#define XDR_BUF_SIZE (2 * 1024 * 1024) /* XDR buffer (must be 1MiB aligned) */
-#define XDR_IOIF 0x0c000000
-
-#define FIFO_BASE XDR_IOIF
-#define FIFO_SIZE (64 * 1024)
-
-#define DMA_PAGE_SIZE (4 * 1024)
-
-#define CACHE_PAGE_SIZE (256 * 1024)
-#define CACHE_PAGE_COUNT ((XDR_BUF_SIZE - FIFO_SIZE) / CACHE_PAGE_SIZE)
-
-#define CACHE_OFFSET CACHE_PAGE_SIZE
-#define FIFO_OFFSET 0
-
-#define CTRL_PUT 0x10
-#define CTRL_GET 0x11
-#define CTRL_TOP 0x15
-
-#define UPLOAD_SUBCH 1
-#define DOWNLOAD_SUBCH 2
-
-#define NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c
-#define NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104
-
-#define L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT 0x601
-
-struct mtd_info ps3vram_mtd;
-
-#define CACHE_PAGE_PRESENT 1
-#define CACHE_PAGE_DIRTY 2
-
-struct ps3vram_tag {
- unsigned int address;
- unsigned int flags;
-};
-
-struct ps3vram_cache {
- unsigned int page_count;
- unsigned int page_size;
- struct ps3vram_tag *tags;
-};
-
-struct ps3vram_priv {
- u64 memory_handle;
- u64 context_handle;
- u32 *ctrl;
- u32 *reports;
- u8 __iomem *ddr_base;
- u8 *xdr_buf;
-
- u32 *fifo_base;
- u32 *fifo_ptr;
-
- struct device *dev;
- struct ps3vram_cache cache;
-
- /* Used to serialize cache/DMA operations */
- struct mutex lock;
-};
-
-#define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
-#define DMA_NOTIFIER_OFFSET_BASE 0x1000 /* first DMA notifier offset */
-#define DMA_NOTIFIER_SIZE 0x40
-#define NOTIFIER 7 /* notifier used for completion report */
-
-/* A trailing '-' means to subtract off ps3fb_videomemory.size */
-char *size = "256M-";
-module_param(size, charp, 0);
-MODULE_PARM_DESC(size, "memory size");
-
-static u32 *ps3vram_get_notifier(u32 *reports, int notifier)
-{
- return (void *) reports +
- DMA_NOTIFIER_OFFSET_BASE +
- DMA_NOTIFIER_SIZE * notifier;
-}
-
-static void ps3vram_notifier_reset(struct mtd_info *mtd)
-{
- int i;
-
- struct ps3vram_priv *priv = mtd->priv;
- u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
- for (i = 0; i < 4; i++)
- notify[i] = 0xffffffff;
-}
-
-static int ps3vram_notifier_wait(struct mtd_info *mtd, unsigned int timeout_ms)
-{
- struct ps3vram_priv *priv = mtd->priv;
- u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
- unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
-
- do {
- if (!notify[3])
- return 0;
- msleep(1);
- } while (time_before(jiffies, timeout));
-
- return -ETIMEDOUT;
-}
-
-static void ps3vram_init_ring(struct mtd_info *mtd)
-{
- struct ps3vram_priv *priv = mtd->priv;
-
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
- priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
-}
-
-static int ps3vram_wait_ring(struct mtd_info *mtd, unsigned int timeout_ms)
-{
- struct ps3vram_priv *priv = mtd->priv;
- unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
-
- do {
- if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
- return 0;
- msleep(1);
- } while (time_before(jiffies, timeout));
-
- dev_dbg(priv->dev, "%s:%d: FIFO timeout (%08x/%08x/%08x)\n", __func__,
- __LINE__, priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
- priv->ctrl[CTRL_TOP]);
-
- return -ETIMEDOUT;
-}
-
-static void ps3vram_out_ring(struct ps3vram_priv *priv, u32 data)
-{
- *(priv->fifo_ptr)++ = data;
-}
-
-static void ps3vram_begin_ring(struct ps3vram_priv *priv, u32 chan,
- u32 tag, u32 size)
-{
- ps3vram_out_ring(priv, (size << 18) | (chan << 13) | tag);
-}
-
-static void ps3vram_rewind_ring(struct mtd_info *mtd)
-{
- struct ps3vram_priv *priv = mtd->priv;
- u64 status;
-
- ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
-
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
-
- /* asking the HV for a blit will kick the fifo */
- status = lv1_gpu_context_attribute(priv->context_handle,
- L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT,
- 0, 0, 0, 0);
- if (status)
- dev_err(priv->dev, "%s:%d: lv1_gpu_context_attribute failed\n",
- __func__, __LINE__);
-
- priv->fifo_ptr = priv->fifo_base;
-}
-
-static void ps3vram_fire_ring(struct mtd_info *mtd)
-{
- struct ps3vram_priv *priv = mtd->priv;
- u64 status;
-
- mutex_lock(&ps3_gpu_mutex);
-
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
- (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
-
- /* asking the HV for a blit will kick the fifo */
- status = lv1_gpu_context_attribute(priv->context_handle,
- L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT,
- 0, 0, 0, 0);
- if (status)
- dev_err(priv->dev, "%s:%d: lv1_gpu_context_attribute failed\n",
- __func__, __LINE__);
-
- if ((priv->fifo_ptr - priv->fifo_base) * sizeof(u32) >
- FIFO_SIZE - 1024) {
- dev_dbg(priv->dev, "%s:%d: fifo full, rewinding\n", __func__,
- __LINE__);
- ps3vram_wait_ring(mtd, 200);
- ps3vram_rewind_ring(mtd);
- }
-
- mutex_unlock(&ps3_gpu_mutex);
-}
-
-static void ps3vram_bind(struct mtd_info *mtd)
-{
- struct ps3vram_priv *priv = mtd->priv;
-
- ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0, 1);
- ps3vram_out_ring(priv, 0x31337303);
- ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x180, 3);
- ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
- ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
- ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
-
- ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0, 1);
- ps3vram_out_ring(priv, 0x3137c0de);
- ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x180, 3);
- ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
- ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
- ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
-
- ps3vram_fire_ring(mtd);
-}
-
-static int ps3vram_upload(struct mtd_info *mtd, unsigned int src_offset,
- unsigned int dst_offset, int len, int count)
-{
- struct ps3vram_priv *priv = mtd->priv;
-
- ps3vram_begin_ring(priv, UPLOAD_SUBCH,
- NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
- ps3vram_out_ring(priv, XDR_IOIF + src_offset);
- ps3vram_out_ring(priv, dst_offset);
- ps3vram_out_ring(priv, len);
- ps3vram_out_ring(priv, len);
- ps3vram_out_ring(priv, len);
- ps3vram_out_ring(priv, count);
- ps3vram_out_ring(priv, (1 << 8) | 1);
- ps3vram_out_ring(priv, 0);
-
- ps3vram_notifier_reset(mtd);
- ps3vram_begin_ring(priv, UPLOAD_SUBCH,
- NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
- ps3vram_out_ring(priv, 0);
- ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x100, 1);
- ps3vram_out_ring(priv, 0);
- ps3vram_fire_ring(mtd);
- if (ps3vram_notifier_wait(mtd, 200) < 0) {
- dev_dbg(priv->dev, "%s:%d: notifier timeout\n", __func__,
- __LINE__);
- return -1;
- }
-
- return 0;
-}
-
-static int ps3vram_download(struct mtd_info *mtd, unsigned int src_offset,
- unsigned int dst_offset, int len, int count)
-{
- struct ps3vram_priv *priv = mtd->priv;
-
- ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
- NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
- ps3vram_out_ring(priv, src_offset);
- ps3vram_out_ring(priv, XDR_IOIF + dst_offset);
- ps3vram_out_ring(priv, len);
- ps3vram_out_ring(priv, len);
- ps3vram_out_ring(priv, len);
- ps3vram_out_ring(priv, count);
- ps3vram_out_ring(priv, (1 << 8) | 1);
- ps3vram_out_ring(priv, 0);
-
- ps3vram_notifier_reset(mtd);
- ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
- NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
- ps3vram_out_ring(priv, 0);
- ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x100, 1);
- ps3vram_out_ring(priv, 0);
- ps3vram_fire_ring(mtd);
- if (ps3vram_notifier_wait(mtd, 200) < 0) {
- dev_dbg(priv->dev, "%s:%d: notifier timeout\n", __func__,
- __LINE__);
- return -1;
- }
-
- return 0;
-}
-
-static void ps3vram_cache_evict(struct mtd_info *mtd, int entry)
-{
- struct ps3vram_priv *priv = mtd->priv;
- struct ps3vram_cache *cache = &priv->cache;
-
- if (cache->tags[entry].flags & CACHE_PAGE_DIRTY) {
- dev_dbg(priv->dev, "%s:%d: flushing %d : 0x%08x\n", __func__,
- __LINE__, entry, cache->tags[entry].address);
- if (ps3vram_upload(mtd,
- CACHE_OFFSET + entry * cache->page_size,
- cache->tags[entry].address,
- DMA_PAGE_SIZE,
- cache->page_size / DMA_PAGE_SIZE) < 0) {
- dev_dbg(priv->dev, "%s:%d: failed to upload from "
- "0x%x to 0x%x size 0x%x\n", __func__, __LINE__,
- entry * cache->page_size,
- cache->tags[entry].address, cache->page_size);
- }
- cache->tags[entry].flags &= ~CACHE_PAGE_DIRTY;
- }
-}
-
-static void ps3vram_cache_load(struct mtd_info *mtd, int entry,
- unsigned int address)
-{
- struct ps3vram_priv *priv = mtd->priv;
- struct ps3vram_cache *cache = &priv->cache;
-
- dev_dbg(priv->dev, "%s:%d: fetching %d : 0x%08x\n", __func__, __LINE__,
- entry, address);
- if (ps3vram_download(mtd,
- address,
- CACHE_OFFSET + entry * cache->page_size,
- DMA_PAGE_SIZE,
- cache->page_size / DMA_PAGE_SIZE) < 0) {
- dev_err(priv->dev, "%s:%d: failed to download from "
- "0x%x to 0x%x size 0x%x\n", __func__, __LINE__, address,
- entry * cache->page_size, cache->page_size);
- }
-
- cache->tags[entry].address = address;
- cache->tags[entry].flags |= CACHE_PAGE_PRESENT;
-}
-
-
-static void ps3vram_cache_flush(struct mtd_info *mtd)
-{
- struct ps3vram_priv *priv = mtd->priv;
- struct ps3vram_cache *cache = &priv->cache;
- int i;
-
- dev_dbg(priv->dev, "%s:%d: FLUSH\n", __func__, __LINE__);
- for (i = 0; i < cache->page_count; i++) {
- ps3vram_cache_evict(mtd, i);
- cache->tags[i].flags = 0;
- }
-}
-
-static unsigned int ps3vram_cache_match(struct mtd_info *mtd, loff_t address)
-{
- struct ps3vram_priv *priv = mtd->priv;
- struct ps3vram_cache *cache = &priv->cache;
- unsigned int base;
- unsigned int offset;
- int i;
- static int counter;
-
- offset = (unsigned int) (address & (cache->page_size - 1));
- base = (unsigned int) (address - offset);
-
- /* fully associative check */
- for (i = 0; i < cache->page_count; i++) {
- if ((cache->tags[i].flags & CACHE_PAGE_PRESENT) &&
- cache->tags[i].address == base) {
- dev_dbg(priv->dev, "%s:%d: found entry %d : 0x%08x\n",
- __func__, __LINE__, i, cache->tags[i].address);
- return i;
- }
- }
-
- /* choose a random entry */
- i = (jiffies + (counter++)) % cache->page_count;
- dev_dbg(priv->dev, "%s:%d: using entry %d\n", __func__, __LINE__, i);
-
- ps3vram_cache_evict(mtd, i);
- ps3vram_cache_load(mtd, i, base);
-
- return i;
-}
-
-static int ps3vram_cache_init(struct mtd_info *mtd)
-{
- struct ps3vram_priv *priv = mtd->priv;
-
- priv->cache.page_count = CACHE_PAGE_COUNT;
- priv->cache.page_size = CACHE_PAGE_SIZE;
- priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
- CACHE_PAGE_COUNT, GFP_KERNEL);
- if (priv->cache.tags == NULL) {
- dev_err(priv->dev, "%s:%d: could not allocate cache tags\n",
- __func__, __LINE__);
- return -ENOMEM;
- }
-
- dev_info(priv->dev, "created ram cache: %d entries, %d KiB each\n",
- CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
-
- return 0;
-}
-
-static void ps3vram_cache_cleanup(struct mtd_info *mtd)
-{
- struct ps3vram_priv *priv = mtd->priv;
-
- ps3vram_cache_flush(mtd);
- kfree(priv->cache.tags);
-}
-
-static int ps3vram_erase(struct mtd_info *mtd, struct erase_info *instr)
-{
- struct ps3vram_priv *priv = mtd->priv;
-
- if (instr->addr + instr->len > mtd->size)
- return -EINVAL;
-
- mutex_lock(&priv->lock);
-
- ps3vram_cache_flush(mtd);
-
- /* Set bytes to 0xFF */
- memset_io(priv->ddr_base + instr->addr, 0xFF, instr->len);
-
- mutex_unlock(&priv->lock);
-
- instr->state = MTD_ERASE_DONE;
- mtd_erase_callback(instr);
-
- return 0;
-}
-
-static int ps3vram_read(struct mtd_info *mtd, loff_t from, size_t len,
- size_t *retlen, u_char *buf)
-{
- struct ps3vram_priv *priv = mtd->priv;
- unsigned int cached, count;
-
- dev_dbg(priv->dev, "%s:%d: from=0x%08x len=0x%zx\n", __func__, __LINE__,
- (unsigned int)from, len);
-
- if (from >= mtd->size)
- return -EINVAL;
-
- if (len > mtd->size - from)
- len = mtd->size - from;
-
- /* Copy from vram to buf */
- count = len;
- while (count) {
- unsigned int offset, avail;
- unsigned int entry;
-
- offset = (unsigned int) (from & (priv->cache.page_size - 1));
- avail = priv->cache.page_size - offset;
-
- mutex_lock(&priv->lock);
-
- entry = ps3vram_cache_match(mtd, from);
- cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
-
- dev_dbg(priv->dev, "%s:%d: from=%08x cached=%08x offset=%08x "
- "avail=%08x count=%08x\n", __func__, __LINE__,
- (unsigned int)from, cached, offset, avail, count);
-
- if (avail > count)
- avail = count;
- memcpy(buf, priv->xdr_buf + cached, avail);
-
- mutex_unlock(&priv->lock);
-
- buf += avail;
- count -= avail;
- from += avail;
- }
-
- *retlen = len;
- return 0;
-}
-
-static int ps3vram_write(struct mtd_info *mtd, loff_t to, size_t len,
- size_t *retlen, const u_char *buf)
-{
- struct ps3vram_priv *priv = mtd->priv;
- unsigned int cached, count;
-
- if (to >= mtd->size)
- return -EINVAL;
-
- if (len > mtd->size - to)
- len = mtd->size - to;
-
- /* Copy from buf to vram */
- count = len;
- while (count) {
- unsigned int offset, avail;
- unsigned int entry;
-
- offset = (unsigned int) (to & (priv->cache.page_size - 1));
- avail = priv->cache.page_size - offset;
-
- mutex_lock(&priv->lock);
-
- entry = ps3vram_cache_match(mtd, to);
- cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
-
- dev_dbg(priv->dev, "%s:%d: to=%08x cached=%08x offset=%08x "
- "avail=%08x count=%08x\n", __func__, __LINE__,
- (unsigned int)to, cached, offset, avail, count);
-
- if (avail > count)
- avail = count;
- memcpy(priv->xdr_buf + cached, buf, avail);
-
- priv->cache.tags[entry].flags |= CACHE_PAGE_DIRTY;
-
- mutex_unlock(&priv->lock);
-
- buf += avail;
- count -= avail;
- to += avail;
- }
-
- *retlen = len;
- return 0;
-}
-
-static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
-{
- struct ps3vram_priv *priv;
- int status;
- u64 ddr_lpar;
- u64 ctrl_lpar;
- u64 info_lpar;
- u64 reports_lpar;
- u64 ddr_size;
- u64 reports_size;
- int ret = -ENOMEM;
- char *rest;
-
- ret = -EIO;
- ps3vram_mtd.priv = kzalloc(sizeof(struct ps3vram_priv), GFP_KERNEL);
- if (!ps3vram_mtd.priv)
- goto out;
- priv = ps3vram_mtd.priv;
-
- mutex_init(&priv->lock);
- priv->dev = &dev->core;
-
- /* Allocate XDR buffer (1MiB aligned) */
- priv->xdr_buf = (void *)__get_free_pages(GFP_KERNEL,
- get_order(XDR_BUF_SIZE));
- if (priv->xdr_buf == NULL) {
- dev_dbg(&dev->core, "%s:%d: could not allocate XDR buffer\n",
- __func__, __LINE__);
- ret = -ENOMEM;
- goto out_free_priv;
- }
-
- /* Put FIFO at begginning of XDR buffer */
- priv->fifo_base = (u32 *) (priv->xdr_buf + FIFO_OFFSET);
- priv->fifo_ptr = priv->fifo_base;
-
- /* XXX: Need to open GPU, in case ps3fb or snd_ps3 aren't loaded */
- if (ps3_open_hv_device(dev)) {
- dev_err(&dev->core, "%s:%d: ps3_open_hv_device failed\n",
- __func__, __LINE__);
- ret = -EAGAIN;
- goto out_close_gpu;
- }
-
- /* Request memory */
- status = -1;
- ddr_size = memparse(size, &rest);
- if (*rest == '-')
- ddr_size -= ps3fb_videomemory.size;
- ddr_size = ALIGN(ddr_size, 1024*1024);
- if (ddr_size <= 0) {
- dev_err(&dev->core, "%s:%d: specified size is too small\n",
- __func__, __LINE__);
- ret = -EINVAL;
- goto out_close_gpu;
- }
-
- while (ddr_size > 0) {
- status = lv1_gpu_memory_allocate(ddr_size, 0, 0, 0, 0,
- &priv->memory_handle,
- &ddr_lpar);
- if (!status)
- break;
- ddr_size -= 1024*1024;
- }
- if (status || ddr_size <= 0) {
- dev_err(&dev->core, "%s:%d: lv1_gpu_memory_allocate failed\n",
- __func__, __LINE__);
- ret = -ENOMEM;
- goto out_free_xdr_buf;
- }
-
- /* Request context */
- status = lv1_gpu_context_allocate(priv->memory_handle,
- 0,
- &priv->context_handle,
- &ctrl_lpar,
- &info_lpar,
- &reports_lpar,
- &reports_size);
- if (status) {
- dev_err(&dev->core, "%s:%d: lv1_gpu_context_allocate failed\n",
- __func__, __LINE__);
- ret = -ENOMEM;
- goto out_free_memory;
- }
-
- /* Map XDR buffer to RSX */
- status = lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF,
- ps3_mm_phys_to_lpar(__pa(priv->xdr_buf)),
- XDR_BUF_SIZE, 0);
- if (status) {
- dev_err(&dev->core, "%s:%d: lv1_gpu_context_iomap failed\n",
- __func__, __LINE__);
- ret = -ENOMEM;
- goto out_free_context;
- }
-
- priv->ddr_base = ioremap_flags(ddr_lpar, ddr_size, _PAGE_NO_CACHE);
-
- if (!priv->ddr_base) {
- dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
- __LINE__);
- ret = -ENOMEM;
- goto out_free_context;
- }
-
- priv->ctrl = ioremap(ctrl_lpar, 64 * 1024);
- if (!priv->ctrl) {
- dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
- __LINE__);
- ret = -ENOMEM;
- goto out_unmap_vram;
- }
-
- priv->reports = ioremap(reports_lpar, reports_size);
- if (!priv->reports) {
- dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
- __LINE__);
- ret = -ENOMEM;
- goto out_unmap_ctrl;
- }
-
- mutex_lock(&ps3_gpu_mutex);
- ps3vram_init_ring(&ps3vram_mtd);
- mutex_unlock(&ps3_gpu_mutex);
-
- ps3vram_mtd.name = "ps3vram";
- ps3vram_mtd.size = ddr_size;
- ps3vram_mtd.flags = MTD_CAP_RAM;
- ps3vram_mtd.erase = ps3vram_erase;
- ps3vram_mtd.point = NULL;
- ps3vram_mtd.unpoint = NULL;
- ps3vram_mtd.read = ps3vram_read;
- ps3vram_mtd.write = ps3vram_write;
- ps3vram_mtd.owner = THIS_MODULE;
- ps3vram_mtd.type = MTD_RAM;
- ps3vram_mtd.erasesize = CACHE_PAGE_SIZE;
- ps3vram_mtd.writesize = 1;
-
- ps3vram_bind(&ps3vram_mtd);
-
- mutex_lock(&ps3_gpu_mutex);
- ret = ps3vram_wait_ring(&ps3vram_mtd, 100);
- mutex_unlock(&ps3_gpu_mutex);
- if (ret < 0) {
- dev_err(&dev->core, "%s:%d: failed to initialize channels\n",
- __func__, __LINE__);
- ret = -ETIMEDOUT;
- goto out_unmap_reports;
- }
-
- ps3vram_cache_init(&ps3vram_mtd);
-
- if (add_mtd_device(&ps3vram_mtd)) {
- dev_err(&dev->core, "%s:%d: add_mtd_device failed\n",
- __func__, __LINE__);
- ret = -EAGAIN;
- goto out_cache_cleanup;
- }
-
- dev_info(&dev->core, "reserved %u MiB of gpu memory\n",
- (unsigned int)(ddr_size / 1024 / 1024));
-
- return 0;
-
-out_cache_cleanup:
- ps3vram_cache_cleanup(&ps3vram_mtd);
-out_unmap_reports:
- iounmap(priv->reports);
-out_unmap_ctrl:
- iounmap(priv->ctrl);
-out_unmap_vram:
- iounmap(priv->ddr_base);
-out_free_context:
- lv1_gpu_context_free(priv->context_handle);
-out_free_memory:
- lv1_gpu_memory_free(priv->memory_handle);
-out_close_gpu:
- ps3_close_hv_device(dev);
-out_free_xdr_buf:
- free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
-out_free_priv:
- kfree(ps3vram_mtd.priv);
- ps3vram_mtd.priv = NULL;
-out:
- return ret;
-}
-
-static int ps3vram_shutdown(struct ps3_system_bus_device *dev)
-{
- struct ps3vram_priv *priv;
-
- priv = ps3vram_mtd.priv;
-
- del_mtd_device(&ps3vram_mtd);
- ps3vram_cache_cleanup(&ps3vram_mtd);
- iounmap(priv->reports);
- iounmap(priv->ctrl);
- iounmap(priv->ddr_base);
- lv1_gpu_context_free(priv->context_handle);
- lv1_gpu_memory_free(priv->memory_handle);
- ps3_close_hv_device(dev);
- free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
- kfree(priv);
- return 0;
-}
-
-static struct ps3_system_bus_driver ps3vram_driver = {
- .match_id = PS3_MATCH_ID_GPU,
- .match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK,
- .core.name = DEVICE_NAME,
- .core.owner = THIS_MODULE,
- .probe = ps3vram_probe,
- .remove = ps3vram_shutdown,
- .shutdown = ps3vram_shutdown,
-};
-
-static int __init ps3vram_init(void)
-{
- return ps3_system_bus_driver_register(&ps3vram_driver);
-}
-
-static void __exit ps3vram_exit(void)
-{
- ps3_system_bus_driver_unregister(&ps3vram_driver);
-}
-
-module_init(ps3vram_init);
-module_exit(ps3vram_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jim Paris <[email protected]>");
-MODULE_DESCRIPTION("MTD driver for PS3 video RAM");
-MODULE_ALIAS(PS3_MODULE_ALIAS_GPU_RAMDISK);
--
1.6.0.4
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> On Fri, 6 Mar 2009, Jens Axboe wrote:
> > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > But then I noticed ps3vram_make_request() may be called concurrently,
> > > so I had to add a mutex to avoid data corruption. This slows the
> > > driver down, and in the end, the version with a thread turns out to be
> > > ca. 1% faster. The version without a thread is about 50 lines less
> > > code, though.
> >
> > That is correct, ->make_request_fn may get reentered. I'm not surprised
> > that performance dropped if you just shoved everything under a mutex.
> > You could be a little more smart and queue concurrent bio's for
> > processing when the current one is complete though, there are several
> > approaches there that be a lot faster than going all the way through the
> > IO stack and scheduler just to avoid concurrency.
>
> Yes, using a spinlock and queueing requests on a list if the driver is
> busy can be done after 2.6.29...
Certainly. Even just replacing your current mutex with a spinlock during
the memcpy() would surely be a lot faster. Or even just grabbing the
mutex before calling into the write for the duration of the bio. The way
you do it is certain context switch death :-)
--
Jens Axboe
On Fri, 6 Mar 2009, Jens Axboe wrote:
> On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > > But then I noticed ps3vram_make_request() may be called concurrently,
> > > > so I had to add a mutex to avoid data corruption. This slows the
> > > > driver down, and in the end, the version with a thread turns out to be
> > > > ca. 1% faster. The version without a thread is about 50 lines less
> > > > code, though.
> > >
> > > That is correct, ->make_request_fn may get reentered. I'm not surprised
> > > that performance dropped if you just shoved everything under a mutex.
> > > You could be a little more smart and queue concurrent bio's for
> > > processing when the current one is complete though, there are several
> > > approaches there that be a lot faster than going all the way through the
> > > IO stack and scheduler just to avoid concurrency.
> >
> > Yes, using a spinlock and queueing requests on a list if the driver is
> > busy can be done after 2.6.29...
>
> Certainly. Even just replacing your current mutex with a spinlock during
> the memcpy() would surely be a lot faster. Or even just grabbing the
> mutex before calling into the write for the duration of the bio. The way
> you do it is certain context switch death :-)
It's not just the memcpy(). ps3vram_{up,down}load() call msleep(), so I cannot
use a spinlock.
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> On Fri, 6 Mar 2009, Jens Axboe wrote:
> > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > > > But then I noticed ps3vram_make_request() may be called concurrently,
> > > > > so I had to add a mutex to avoid data corruption. This slows the
> > > > > driver down, and in the end, the version with a thread turns out to be
> > > > > ca. 1% faster. The version without a thread is about 50 lines less
> > > > > code, though.
> > > >
> > > > That is correct, ->make_request_fn may get reentered. I'm not surprised
> > > > that performance dropped if you just shoved everything under a mutex.
> > > > You could be a little more smart and queue concurrent bio's for
> > > > processing when the current one is complete though, there are several
> > > > approaches there that be a lot faster than going all the way through the
> > > > IO stack and scheduler just to avoid concurrency.
> > >
> > > Yes, using a spinlock and queueing requests on a list if the driver is
> > > busy can be done after 2.6.29...
> >
> > Certainly. Even just replacing your current mutex with a spinlock during
> > the memcpy() would surely be a lot faster. Or even just grabbing the
> > mutex before calling into the write for the duration of the bio. The way
> > you do it is certain context switch death :-)
>
> It's not just the memcpy(). ps3vram_{up,down}load() call msleep(), so
> I cannot use a spinlock.
Ah right, I hadn't looked close enough. But putting the mutex_lock()
outside of the bio_for_each_segment() is going to be much faster than
getting/releasing it for each segment.
--
Jens Axboe
On Fri, 6 Mar 2009, Jens Axboe wrote:
> On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > > > > But then I noticed ps3vram_make_request() may be called concurrently,
> > > > > > so I had to add a mutex to avoid data corruption. This slows the
> > > > > > driver down, and in the end, the version with a thread turns out to be
> > > > > > ca. 1% faster. The version without a thread is about 50 lines less
> > > > > > code, though.
> > > > >
> > > > > That is correct, ->make_request_fn may get reentered. I'm not surprised
> > > > > that performance dropped if you just shoved everything under a mutex.
> > > > > You could be a little more smart and queue concurrent bio's for
> > > > > processing when the current one is complete though, there are several
> > > > > approaches there that be a lot faster than going all the way through the
> > > > > IO stack and scheduler just to avoid concurrency.
> > > >
> > > > Yes, using a spinlock and queueing requests on a list if the driver is
> > > > busy can be done after 2.6.29...
> > >
> > > Certainly. Even just replacing your current mutex with a spinlock during
> > > the memcpy() would surely be a lot faster. Or even just grabbing the
> > > mutex before calling into the write for the duration of the bio. The way
> > > you do it is certain context switch death :-)
> >
> > It's not just the memcpy(). ps3vram_{up,down}load() call msleep(), so
> > I cannot use a spinlock.
>
> Ah right, I hadn't looked close enough. But putting the mutex_lock()
> outside of the bio_for_each_segment() is going to be much faster than
> getting/releasing it for each segment.
It doesn't seem to make any measurable difference, so I'm gonna leave it for
now.
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Mon, Mar 09 2009, Geert Uytterhoeven wrote:
> On Fri, 6 Mar 2009, Jens Axboe wrote:
> > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > > > > > But then I noticed ps3vram_make_request() may be called concurrently,
> > > > > > > so I had to add a mutex to avoid data corruption. This slows the
> > > > > > > driver down, and in the end, the version with a thread turns out to be
> > > > > > > ca. 1% faster. The version without a thread is about 50 lines less
> > > > > > > code, though.
> > > > > >
> > > > > > That is correct, ->make_request_fn may get reentered. I'm not surprised
> > > > > > that performance dropped if you just shoved everything under a mutex.
> > > > > > You could be a little more smart and queue concurrent bio's for
> > > > > > processing when the current one is complete though, there are several
> > > > > > approaches there that be a lot faster than going all the way through the
> > > > > > IO stack and scheduler just to avoid concurrency.
> > > > >
> > > > > Yes, using a spinlock and queueing requests on a list if the driver is
> > > > > busy can be done after 2.6.29...
> > > >
> > > > Certainly. Even just replacing your current mutex with a spinlock during
> > > > the memcpy() would surely be a lot faster. Or even just grabbing the
> > > > mutex before calling into the write for the duration of the bio. The way
> > > > you do it is certain context switch death :-)
> > >
> > > It's not just the memcpy(). ps3vram_{up,down}load() call msleep(), so
> > > I cannot use a spinlock.
> >
> > Ah right, I hadn't looked close enough. But putting the mutex_lock()
> > outside of the bio_for_each_segment() is going to be much faster than
> > getting/releasing it for each segment.
>
> It doesn't seem to make any measurable difference, so I'm gonna leave it for
> now.
It will depend on where the bio's are coming from. If they are all
single segment, then there will be no difference. If they contain
multiple segments, you reduce the lock/release by that amount.
But yeah, just leave it as-is for now. You can send a final patch for
inclusion though. Unless I'm mistaken, I only saw the original and then
an incremental patch for changing it to ->make_request_fn?
--
Jens Axboe
On Mon, Mar 09 2009, Jens Axboe wrote:
> On Mon, Mar 09 2009, Geert Uytterhoeven wrote:
> > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > > > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > > > > > > But then I noticed ps3vram_make_request() may be called concurrently,
> > > > > > > > so I had to add a mutex to avoid data corruption. This slows the
> > > > > > > > driver down, and in the end, the version with a thread turns out to be
> > > > > > > > ca. 1% faster. The version without a thread is about 50 lines less
> > > > > > > > code, though.
> > > > > > >
> > > > > > > That is correct, ->make_request_fn may get reentered. I'm not surprised
> > > > > > > that performance dropped if you just shoved everything under a mutex.
> > > > > > > You could be a little more smart and queue concurrent bio's for
> > > > > > > processing when the current one is complete though, there are several
> > > > > > > approaches there that be a lot faster than going all the way through the
> > > > > > > IO stack and scheduler just to avoid concurrency.
> > > > > >
> > > > > > Yes, using a spinlock and queueing requests on a list if the driver is
> > > > > > busy can be done after 2.6.29...
> > > > >
> > > > > Certainly. Even just replacing your current mutex with a spinlock during
> > > > > the memcpy() would surely be a lot faster. Or even just grabbing the
> > > > > mutex before calling into the write for the duration of the bio. The way
> > > > > you do it is certain context switch death :-)
> > > >
> > > > It's not just the memcpy(). ps3vram_{up,down}load() call msleep(), so
> > > > I cannot use a spinlock.
> > >
> > > Ah right, I hadn't looked close enough. But putting the mutex_lock()
> > > outside of the bio_for_each_segment() is going to be much faster than
> > > getting/releasing it for each segment.
> >
> > It doesn't seem to make any measurable difference, so I'm gonna leave it for
> > now.
>
> It will depend on where the bio's are coming from. If they are all
> single segment, then there will be no difference. If they contain
> multiple segments, you reduce the lock/release by that amount.
>
> But yeah, just leave it as-is for now. You can send a final patch for
> inclusion though. Unless I'm mistaken, I only saw the original and then
> an incremental patch for changing it to ->make_request_fn?
There was a full version, my mistake. I got confused by the removal of
the old driver in another directory :-)
--
Jens Axboe
On Mon, 9 Mar 2009, Jens Axboe wrote:
> On Mon, Mar 09 2009, Jens Axboe wrote:
> > On Mon, Mar 09 2009, Geert Uytterhoeven wrote:
> > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > > > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > > > > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > > > > > > > But then I noticed ps3vram_make_request() may be called concurrently,
> > > > > > > > > so I had to add a mutex to avoid data corruption. This slows the
> > > > > > > > > driver down, and in the end, the version with a thread turns out to be
> > > > > > > > > ca. 1% faster. The version without a thread is about 50 lines less
> > > > > > > > > code, though.
> > > > > > > >
> > > > > > > > That is correct, ->make_request_fn may get reentered. I'm not surprised
> > > > > > > > that performance dropped if you just shoved everything under a mutex.
> > > > > > > > You could be a little more smart and queue concurrent bio's for
> > > > > > > > processing when the current one is complete though, there are several
> > > > > > > > approaches there that be a lot faster than going all the way through the
> > > > > > > > IO stack and scheduler just to avoid concurrency.
> > > > > > >
> > > > > > > Yes, using a spinlock and queueing requests on a list if the driver is
> > > > > > > busy can be done after 2.6.29...
> > > > > >
> > > > > > Certainly. Even just replacing your current mutex with a spinlock during
> > > > > > the memcpy() would surely be a lot faster. Or even just grabbing the
> > > > > > mutex before calling into the write for the duration of the bio. The way
> > > > > > you do it is certain context switch death :-)
> > > > >
> > > > > It's not just the memcpy(). ps3vram_{up,down}load() call msleep(), so
> > > > > I cannot use a spinlock.
> > > >
> > > > Ah right, I hadn't looked close enough. But putting the mutex_lock()
> > > > outside of the bio_for_each_segment() is going to be much faster than
> > > > getting/releasing it for each segment.
> > >
> > > It doesn't seem to make any measurable difference, so I'm gonna leave it for
> > > now.
> >
> > It will depend on where the bio's are coming from. If they are all
> > single segment, then there will be no difference. If they contain
> > multiple segments, you reduce the lock/release by that amount.
> >
> > But yeah, just leave it as-is for now. You can send a final patch for
> > inclusion though. Unless I'm mistaken, I only saw the original and then
> > an incremental patch for changing it to ->make_request_fn?
>
> There was a full version, my mistake. I got confused by the removal of
Indeed.
> the old driver in another directory :-)
Can you please ack it? Thx!
With kind regards,
Geert Uytterhoeven
Software Architect
Sony Techsoft Centre Europe
The Corporate Village ? Da Vincilaan 7-D1 ? B-1935 Zaventem ? Belgium
Phone: +32 (0)2 700 8453
Fax: +32 (0)2 700 8622
E-mail: [email protected]
Internet: http://www.sony-europe.com/
A division of Sony Europe (Belgium) N.V.
VAT BE 0413.825.160 ? RPR Brussels
Fortis ? BIC GEBABEBB ? IBAN BE41293037680010
On Mon, Mar 09 2009, Geert Uytterhoeven wrote:
> On Mon, 9 Mar 2009, Jens Axboe wrote:
> > On Mon, Mar 09 2009, Jens Axboe wrote:
> > > On Mon, Mar 09 2009, Geert Uytterhoeven wrote:
> > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > > > On Fri, Mar 06 2009, Geert Uytterhoeven wrote:
> > > > > > > > On Fri, 6 Mar 2009, Jens Axboe wrote:
> > > > > > > > > On Thu, Mar 05 2009, Geert Uytterhoeven wrote:
> > > > > > > > > > But then I noticed ps3vram_make_request() may be called concurrently,
> > > > > > > > > > so I had to add a mutex to avoid data corruption. This slows the
> > > > > > > > > > driver down, and in the end, the version with a thread turns out to be
> > > > > > > > > > ca. 1% faster. The version without a thread is about 50 lines less
> > > > > > > > > > code, though.
> > > > > > > > >
> > > > > > > > > That is correct, ->make_request_fn may get reentered. I'm not surprised
> > > > > > > > > that performance dropped if you just shoved everything under a mutex.
> > > > > > > > > You could be a little more smart and queue concurrent bio's for
> > > > > > > > > processing when the current one is complete though, there are several
> > > > > > > > > approaches there that be a lot faster than going all the way through the
> > > > > > > > > IO stack and scheduler just to avoid concurrency.
> > > > > > > >
> > > > > > > > Yes, using a spinlock and queueing requests on a list if the driver is
> > > > > > > > busy can be done after 2.6.29...
> > > > > > >
> > > > > > > Certainly. Even just replacing your current mutex with a spinlock during
> > > > > > > the memcpy() would surely be a lot faster. Or even just grabbing the
> > > > > > > mutex before calling into the write for the duration of the bio. The way
> > > > > > > you do it is certain context switch death :-)
> > > > > >
> > > > > > It's not just the memcpy(). ps3vram_{up,down}load() call msleep(), so
> > > > > > I cannot use a spinlock.
> > > > >
> > > > > Ah right, I hadn't looked close enough. But putting the mutex_lock()
> > > > > outside of the bio_for_each_segment() is going to be much faster than
> > > > > getting/releasing it for each segment.
> > > >
> > > > It doesn't seem to make any measurable difference, so I'm gonna leave it for
> > > > now.
> > >
> > > It will depend on where the bio's are coming from. If they are all
> > > single segment, then there will be no difference. If they contain
> > > multiple segments, you reduce the lock/release by that amount.
> > >
> > > But yeah, just leave it as-is for now. You can send a final patch for
> > > inclusion though. Unless I'm mistaken, I only saw the original and then
> > > an incremental patch for changing it to ->make_request_fn?
> >
> > There was a full version, my mistake. I got confused by the removal of
>
> Indeed.
>
> > the old driver in another directory :-)
>
> Can you please ack it? Thx!
Sure, I thought we had agreed to queue it up for 2.6.29?
--
Jens Axboe
On 03/06/2009 04:54 AM, Geert Uytterhoeven wrote:
> Convert the PS3 Video RAM Storage Driver from an MTD driver to a plain block
> device driver.
>
> The ps3vram driver exposes unused video RAM on the PS3 as a block device
> suitable for storage or swap. Fast data transfer is achieved using a local
> cache in system RAM and DMA transfers via the GPU.
>
> The new driver is ca. 50% faster for reading, and ca. 10% for writing.
>
> arch/powerpc/platforms/ps3/Kconfig | 7 +
> drivers/block/Makefile | 1 +
> drivers/block/ps3vram.c | 865 ++++++++++++++++++++++++++++++++++++
> drivers/mtd/devices/Kconfig | 7 -
> drivers/mtd/devices/Makefile | 1 -
> drivers/mtd/devices/ps3vram.c | 768 --------------------------------
> 6 files changed, 873 insertions(+), 776 deletions(-)
> create mode 100644 drivers/block/ps3vram.c
> delete mode 100644 drivers/mtd/devices/ps3vram.c
Looks good, certainly an improvement.
Acked-by: Geoff Levand <[email protected]>