Hi James, Geert, lkml and mm,
This patch adds support for the Hecuba/E-Ink display with deferred IO.
The changes from the previous version are to switch to using a mutex
and lock_page. I welcome your feedback and advice.
Signed-off-by: Jaya Kumar <[email protected]>
---
drivers/video/Kconfig | 13 +
drivers/video/Makefile | 1
drivers/video/hecubafb.c | 590 +++++++++++++++++++++++++++++++++++++++++++++++
mm/rmap.c | 1
4 files changed, 605 insertions(+)
---
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 8874cf2..151b6e0 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -540,6 +540,19 @@ config FB_IMAC
help
This is the frame buffer device driver for the Intel-based Macintosh
+config FB_HECUBA
+ tristate "Hecuba board support"
+ depends on FB && X86 && MMU
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This enables support for the Hecuba board. This driver was tested
+ with an E-Ink 800x600 display and x86 SBCs through a 16 bit GPIO
+ interface (8 bit data, 4 bit control). If you anticpate using
+ this driver, say Y or M; otherwise say N. You must specify the
+ GPIO IO address to be used for setting control and data.
+
config FB_HGA
tristate "Hercules mono graphics support"
depends on FB && X86
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 6801edf..e9edf8e 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -65,6 +65,7 @@ obj-$(CONFIG_FB_SGIVW) += sgivwfb.o
obj-$(CONFIG_FB_ACORN) += acornfb.o
obj-$(CONFIG_FB_ATARI) += atafb.o
obj-$(CONFIG_FB_MAC) += macfb.o
+obj-$(CONFIG_FB_HECUBA) += hecubafb.o
obj-$(CONFIG_FB_HGA) += hgafb.o
obj-$(CONFIG_FB_IGA) += igafb.o
obj-$(CONFIG_FB_APOLLO) += dnfb.o
diff --git a/drivers/video/hecubafb.c b/drivers/video/hecubafb.c
new file mode 100644
index 0000000..f0f538d
--- /dev/null
+++ b/drivers/video/hecubafb.c
@@ -0,0 +1,590 @@
+/*
+ * linux/drivers/video/hecubafb.c -- FB driver for Hecuba controller
+ *
+ * Copyright (C) 2006, Jaya Kumar
+ * This work was sponsored by CIS(M) Sdn Bhd
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive for
+ * more details.
+ *
+ * Layout is based on skeletonfb.c by James Simmons and Geert Uytterhoeven.
+ * This work was possible because of apollo display code from E-Ink's website
+ * http://support.eink.com/community
+ * All information used to write this code is from public material made
+ * available by E-Ink on its support site. Some commands such as 0xA4
+ * were found by looping through cmd=0x00 thru 0xFF and supplying random
+ * values. There are other commands that the display is capable of,
+ * beyond the 5 used here but they are more complex.
+ *
+ * This driver is written to be used with the Hecuba display controller
+ * board, and tested with the EInk 800x600 display in 1 bit mode.
+ * The interface between Hecuba and the host is TTL based GPIO. The
+ * GPIO requirements are 8 writable data lines and 6 lines for control.
+ * Only 4 of the controls are actually used here but 6 for future use.
+ * The driver requires the IO addresses for data and control GPIO at
+ * load time. It is also possible to use this display with a standard
+ * PC parallel port.
+ *
+ * General notes:
+ * - User must set hecubafb_enable=1 to enable it
+ * - User must set dio_addr=0xIOADDR cio_addr=0xIOADDR c2io_addr=0xIOADDR
+ *
+ * Explaination for deferred IO:
+ * - userspace app like Xfbdev mmaps framebuffer
+ * - driver handles and sets up nopage and page_mkwrite handlers
+ * - app tries to write to mmaped vaddress
+ * - get pagefault and reaches driver's nopage handler
+ * - driver's nopage handler finds and returns physical page ( no
+ * actual framebuffer )
+ * - write so get page_mkwrite where we add this page to a list
+ * - also schedules a workqueue task to be run after a delay
+ * - app continues writing to that page with no additional cost
+ * - the workqueue task then does page_mkclean for the pages on
+ * the list, then completes the updating of the framebuffer
+ * - app tries to write to the address (that was just cleaned)
+ * - get pagefault and the above sequence occurs again
+ *
+ * The desire is roughly to allow bursty framebuffer writes to occur.
+ * Then after some time when hopefully things have gone quiet, we go and
+ * really update the framebuffer. For this type of nonvolatile high latency
+ * display, the desired image is the final image rather than intermediate
+ * stages which is why it's okay to not update for each write that is
+ * occuring.
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/list.h>
+
+/* to support deferred IO */
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+
+/* Apollo controller specific defines */
+#define APOLLO_START_NEW_IMG 0xA0
+#define APOLLO_STOP_IMG_DATA 0xA1
+#define APOLLO_DISPLAY_IMG 0xA2
+#define APOLLO_ERASE_DISPLAY 0xA3
+#define APOLLO_INIT_DISPLAY 0xA4
+
+/* Hecuba interface specific defines */
+/* WUP is inverted, CD is inverted, DS is inverted */
+#define HCB_NWUP_BIT 0x01
+#define HCB_NDS_BIT 0x02
+#define HCB_RW_BIT 0x04
+#define HCB_NCD_BIT 0x08
+#define HCB_ACK_BIT 0x80
+
+/* Display specific information */
+#define DPY_W 600
+#define DPY_H 800
+
+struct hecubafb_par {
+ struct delayed_work deferred_work;
+ unsigned long dio_addr;
+ unsigned long cio_addr;
+ unsigned long c2io_addr;
+ unsigned char ctl;
+ atomic_t ref_count;
+ atomic_t vma_count;
+ struct fb_info *info;
+ unsigned int irq;
+ struct mutex lock;
+ struct list_head pagelist;
+};
+
+struct page_list {
+ struct list_head list;
+ struct page *page;
+};
+
+static struct fb_fix_screeninfo hecubafb_fix __initdata = {
+ .id = "hecubafb",
+ .type = FB_TYPE_PACKED_PIXELS,
+ .visual = FB_VISUAL_MONO01,
+ .xpanstep = 0,
+ .ypanstep = 0,
+ .ywrapstep = 0,
+ .accel = FB_ACCEL_NONE,
+};
+
+static struct fb_var_screeninfo hecubafb_var __initdata = {
+ .xres = DPY_W,
+ .yres = DPY_H,
+ .xres_virtual = DPY_W,
+ .yres_virtual = DPY_H,
+ .bits_per_pixel = 1,
+ .nonstd = 1,
+};
+
+static unsigned long dio_addr;
+static unsigned long cio_addr;
+static unsigned long c2io_addr;
+static unsigned long splashval;
+static unsigned int nosplash;
+static unsigned int hecubafb_enable;
+static unsigned int irq;
+
+static DECLARE_WAIT_QUEUE_HEAD(hecubafb_waitq);
+
+static void hcb_set_ctl(struct hecubafb_par *par)
+{
+ outb(par->ctl, par->cio_addr);
+}
+
+static unsigned char hcb_get_ctl(struct hecubafb_par *par)
+{
+ return inb(par->c2io_addr);
+}
+
+static void hcb_set_data(struct hecubafb_par *par, unsigned char value)
+{
+ outb(value, par->dio_addr);
+}
+
+static int __devinit apollo_init_control(struct hecubafb_par *par)
+{
+ unsigned char ctl;
+ /* for init, we want the following setup to be set:
+ WUP = lo
+ ACK = hi
+ DS = hi
+ RW = hi
+ CD = lo
+ */
+
+ /* write WUP to lo, DS to hi, RW to hi, CD to lo */
+ par->ctl = HCB_NWUP_BIT | HCB_RW_BIT | HCB_NCD_BIT ;
+ par->ctl &= ~HCB_NDS_BIT;
+ hcb_set_ctl(par);
+
+ /* check ACK is not lo */
+ ctl = hcb_get_ctl(par);
+ if ((ctl & HCB_ACK_BIT)) {
+ printk(KERN_ERR "Fail because ACK is already low\n");
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+void hcb_wait_for_ack(struct hecubafb_par *par)
+{
+
+ int timeout;
+ unsigned char ctl;
+
+ timeout=500;
+ do {
+ ctl = hcb_get_ctl(par);
+ if ((ctl & HCB_ACK_BIT))
+ return;
+ udelay(1);
+ } while (timeout--);
+ printk(KERN_ERR "timed out waiting for ack\n");
+}
+
+void hcb_wait_for_ack_clear(struct hecubafb_par *par)
+{
+
+ int timeout;
+ unsigned char ctl;
+
+ timeout=500;
+ do {
+ ctl = hcb_get_ctl(par);
+ if (!(ctl & HCB_ACK_BIT))
+ return;
+ udelay(1);
+ } while (timeout--);
+ printk(KERN_ERR "timed out waiting for clear\n");
+}
+
+void apollo_send_data(struct hecubafb_par *par, unsigned char data)
+{
+ /* set data */
+ hcb_set_data(par, data);
+
+ /* set DS low */
+ par->ctl |= HCB_NDS_BIT;
+ hcb_set_ctl(par);
+
+ hcb_wait_for_ack(par);
+
+ /* set DS hi */
+ par->ctl &= ~(HCB_NDS_BIT);
+ hcb_set_ctl(par);
+
+ hcb_wait_for_ack_clear(par);
+}
+
+void apollo_send_command(struct hecubafb_par *par, unsigned char data)
+{
+ /* command so set CD to high */
+ par->ctl &= ~(HCB_NCD_BIT);
+ hcb_set_ctl(par);
+
+ /* actually strobe with command */
+ apollo_send_data(par, data);
+
+ /* clear CD back to low */
+ par->ctl |= (HCB_NCD_BIT);
+ hcb_set_ctl(par);
+}
+
+/* main hecubafb functions */
+
+static void hecubafb_dpy_update(struct hecubafb_par *par)
+{
+ int i;
+ unsigned char *buf = par->info->screen_base;
+
+ apollo_send_command(par, 0xA0);
+
+ for (i=0; i < (DPY_W*DPY_H/8); i++) {
+ apollo_send_data(par, *(buf++));
+ }
+
+ apollo_send_command(par, 0xA1);
+ apollo_send_command(par, 0xA2);
+}
+
+static void hecubafb_fillrect(struct fb_info *info,
+ const struct fb_fillrect *rect)
+{
+ struct hecubafb_par *par = info->par;
+
+ cfb_fillrect(info, rect);
+
+ hecubafb_dpy_update(par);
+}
+
+static void hecubafb_copyarea(struct fb_info *info,
+ const struct fb_copyarea *area)
+{
+ struct hecubafb_par *par = info->par;
+
+ cfb_copyarea(info, area);
+
+ hecubafb_dpy_update(par);
+}
+
+static void hecubafb_imageblit(struct fb_info *info,
+ const struct fb_image *image)
+{
+ struct hecubafb_par *par = info->par;
+
+ cfb_imageblit(info, image);
+
+ hecubafb_dpy_update(par);
+}
+
+/*
+ * this is the slow path from userspace. they can seek and write to
+ * the fb. it's inefficient to do anything less than a full screen draw
+ */
+static ssize_t hecubafb_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode;
+ int fbidx;
+ struct fb_info *info;
+ unsigned long p;
+ int err=-EINVAL;
+ struct hecubafb_par *par;
+ unsigned int xres;
+ unsigned int fbmemlength;
+
+ p = *ppos;
+ inode = file->f_dentry->d_inode;
+ fbidx = iminor(inode);
+ info = registered_fb[fbidx];
+
+ if (!info || !info->screen_base)
+ return -ENODEV;
+
+ par = info->par;
+ xres = info->var.xres;
+ fbmemlength = (xres * info->var.yres)/8;
+
+ if (p > fbmemlength)
+ return -ENOSPC;
+
+ err = 0;
+ if ((count + p) > fbmemlength) {
+ count = fbmemlength - p;
+ err = -ENOSPC;
+ }
+
+ if (count) {
+ char *base_addr;
+
+ base_addr = info->screen_base;
+ count -= copy_from_user(base_addr + p, buf, count);
+ *ppos += count;
+ err = -EFAULT;
+ }
+
+ hecubafb_dpy_update(par);
+
+ if (count)
+ return count;
+
+ return err;
+}
+
+/* this is to find and return the vmalloc-ed fb pages */
+static struct page* hecubafb_vm_nopage(struct vm_area_struct *vma,
+ unsigned long vaddr, int *type)
+{
+ unsigned long offset;
+ struct page *page;
+ struct fb_info *info = vma->vm_private_data;
+
+ offset = (vaddr - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT);
+ if (offset >= (DPY_W*DPY_H)/8)
+ return NOPAGE_SIGBUS;
+
+ page = vmalloc_to_page(info->screen_base + offset);
+ if (!page)
+ return NOPAGE_OOM;
+
+ get_page(page);
+ if (type)
+ *type = VM_FAULT_MINOR;
+ return page;
+}
+
+static void hecubafb_work(struct work_struct *work)
+{
+ struct hecubafb_par *par = container_of(work, struct hecubafb_par,
+ deferred_work.work);
+ struct list_head *node, *next;
+ struct page_list *cur;
+
+ /* here we unmap the pages, then do all deferred IO */
+ mutex_lock(&par->lock);
+ list_for_each_safe(node, next, &par->pagelist) {
+ cur = list_entry(node, struct page_list, list);
+ list_del(node);
+ lock_page(cur->page);
+ page_mkclean(cur->page);
+ unlock_page(cur->page);
+ kfree(cur);
+ }
+ mutex_unlock(&par->lock);
+ hecubafb_dpy_update(par);
+}
+
+static int hecubafb_page_mkwrite(struct vm_area_struct *vma,
+ struct page *page)
+{
+ struct fb_info *info = vma->vm_private_data;
+ struct hecubafb_par *par = info->par;
+ struct page_list *new;
+
+ /* this is a callback we get when userspace first tries to
+ write to the page. we schedule a workqueue. that workqueue
+ will eventually unmap the touched pages and execute the
+ deferred framebuffer IO. then if userspace touches a page
+ again, we repeat the same scheme */
+
+ new = kzalloc(sizeof(struct page_list), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+ new->page = page;
+
+ /* protect against the workqueue changing the page list */
+ mutex_lock(&par->lock);
+ list_add(&new->list, &par->pagelist);
+ mutex_unlock(&par->lock);
+
+ /* come back in 1s to process the deferred IO */
+ schedule_delayed_work(&par->deferred_work, HZ);
+ return 0;
+}
+
+static struct vm_operations_struct hecubafb_vm_ops = {
+ .nopage = hecubafb_vm_nopage,
+ .page_mkwrite = hecubafb_page_mkwrite,
+};
+
+static int hecubafb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &hecubafb_vm_ops;
+ vma->vm_flags |= ( VM_IO | VM_RESERVED | VM_DONTEXPAND );
+ vma->vm_private_data = info;
+ return 0;
+}
+
+static struct fb_ops hecubafb_ops = {
+ .owner = THIS_MODULE,
+ .fb_write = hecubafb_write,
+ .fb_fillrect = hecubafb_fillrect,
+ .fb_copyarea = hecubafb_copyarea,
+ .fb_imageblit = hecubafb_imageblit,
+ .fb_mmap = hecubafb_mmap,
+};
+
+static int __devinit hecubafb_probe(struct platform_device *dev)
+{
+ struct fb_info *info;
+ int retval = -ENOMEM;
+ int videomemorysize;
+ unsigned char *videomemory;
+ struct hecubafb_par *par;
+
+ videomemorysize = (DPY_W*DPY_H)/8;
+
+ if (!(videomemory = vmalloc(videomemorysize)))
+ return retval;
+
+ memset(videomemory, 0, videomemorysize);
+
+ info = framebuffer_alloc(sizeof(struct hecubafb_par), &dev->dev);
+ if (!info)
+ goto err;
+
+ info->screen_base = (char __iomem *) videomemory;
+ info->fbops = &hecubafb_ops;
+
+ info->var = hecubafb_var;
+ info->fix = hecubafb_fix;
+ par = info->par;
+ par->info = info;
+
+ if (!dio_addr || !cio_addr || !c2io_addr) {
+ printk(KERN_WARNING "no IO addresses supplied\n");
+ goto err1;
+ }
+ par->dio_addr = dio_addr;
+ par->cio_addr = cio_addr;
+ par->c2io_addr = c2io_addr;
+ info->flags = FBINFO_FLAG_DEFAULT;
+ mutex_init(&par->lock);
+ INIT_DELAYED_WORK(&par->deferred_work, hecubafb_work);
+ INIT_LIST_HEAD(&par->pagelist);
+ retval = register_framebuffer(info);
+ if (retval < 0)
+ goto err1;
+ platform_set_drvdata(dev, info);
+
+ printk(KERN_INFO
+ "fb%d: Hecuba frame buffer device, using %dK of video memory\n",
+ info->node, videomemorysize >> 10);
+
+ /* this inits the dpy */
+ apollo_init_control(par);
+
+ apollo_send_command(par, APOLLO_INIT_DISPLAY);
+ apollo_send_data(par, 0x81);
+
+ /* have to wait while display resets */
+ udelay(1000);
+
+ /* if we were told to splash the screen, we just clear it */
+ if (!nosplash) {
+ apollo_send_command(par, APOLLO_ERASE_DISPLAY);
+ apollo_send_data(par, splashval);
+ }
+
+ return 0;
+err1:
+ framebuffer_release(info);
+err:
+ vfree(videomemory);
+ return retval;
+}
+
+static int __devexit hecubafb_remove(struct platform_device *dev)
+{
+ struct fb_info *info = platform_get_drvdata(dev);
+ struct hecubafb_par *par;
+
+ if (info) {
+ par = info->par;
+ cancel_delayed_work(&par->deferred_work);
+ flush_scheduled_work();
+ unregister_framebuffer(info);
+ vfree(info->screen_base);
+ framebuffer_release(info);
+ }
+ return 0;
+}
+
+static struct platform_driver hecubafb_driver = {
+ .probe = hecubafb_probe,
+ .remove = hecubafb_remove,
+ .driver = {
+ .name = "hecubafb",
+ },
+};
+
+static struct platform_device *hecubafb_device;
+
+static int __init hecubafb_init(void)
+{
+ int ret;
+
+ if (!hecubafb_enable) {
+ printk(KERN_ERR "Use hecubafb_enable to enable the device\n");
+ return -ENXIO;
+ }
+
+ ret = platform_driver_register(&hecubafb_driver);
+ if (!ret) {
+ hecubafb_device = platform_device_alloc("hecubafb", 0);
+ if (hecubafb_device)
+ ret = platform_device_add(hecubafb_device);
+ else
+ ret = -ENOMEM;
+
+ if (ret) {
+ platform_device_put(hecubafb_device);
+ platform_driver_unregister(&hecubafb_driver);
+ }
+ }
+ return ret;
+
+}
+
+static void __exit hecubafb_exit(void)
+{
+ platform_device_unregister(hecubafb_device);
+ platform_driver_unregister(&hecubafb_driver);
+}
+
+module_param(nosplash, uint, 0);
+MODULE_PARM_DESC(nosplash, "Disable doing the splash screen");
+module_param(hecubafb_enable, uint, 0);
+MODULE_PARM_DESC(hecubafb_enable, "Enable communication with Hecuba board");
+module_param(dio_addr, ulong, 0);
+MODULE_PARM_DESC(dio_addr, "IO address for data, eg: 0x480");
+module_param(cio_addr, ulong, 0);
+MODULE_PARM_DESC(cio_addr, "IO address for control, eg: 0x400");
+module_param(c2io_addr, ulong, 0);
+MODULE_PARM_DESC(c2io_addr, "IO address for secondary control, eg: 0x408");
+module_param(splashval, ulong, 0);
+MODULE_PARM_DESC(splashval, "Splash pattern: 0x00 is black, 0x01 is white");
+module_param(irq, uint, 0);
+MODULE_PARM_DESC(irq, "IRQ for the Hecuba board");
+
+module_init(hecubafb_init);
+module_exit(hecubafb_exit);
+
+MODULE_DESCRIPTION("fbdev driver for Hecuba board");
+MODULE_AUTHOR("Jaya Kumar");
+MODULE_LICENSE("GPL");
diff --git a/mm/rmap.c b/mm/rmap.c
index 669acb2..0fa0521 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -496,6 +496,7 @@ int page_mkclean(struct page *page)
return ret;
}
+EXPORT_SYMBOL_GPL(page_mkclean);
/**
* page_set_anon_rmap - setup new anonymous rmap
On Sat, 2007-02-17 at 11:42 +0100, Jaya Kumar wrote:
> Hi James, Geert, lkml and mm,
Hi Jaya,
> This patch adds support for the Hecuba/E-Ink display with deferred IO.
> The changes from the previous version are to switch to using a mutex
> and lock_page. I welcome your feedback and advice.
This changelog ought to be a little more extensive; esp. because you're
using these fancy new functions ->page_mkwrite() and page_mkclean() in a
novel way.
Also, I'd still like to see a way to call msync() on the mmap'ed region
to force a flush. I think providing a fb_fsync() method in fbmem.c and a
hook down to the driver ought to work.
Also, you now seem to use a fixed 1 second delay, perhaps provide an
ioctl or something to customize this?
And, as Andrew suggested last time around, could you perhaps push this
fancy new idea into the FB layer so that more drivers can make us of it?
On 2/17/07, Peter Zijlstra <[email protected]> wrote:
> On Sat, 2007-02-17 at 11:42 +0100, Jaya Kumar wrote:
> > Hi James, Geert, lkml and mm,
>
> Hi Jaya,
>
> > This patch adds support for the Hecuba/E-Ink display with deferred IO.
> > The changes from the previous version are to switch to using a mutex
> > and lock_page. I welcome your feedback and advice.
>
> This changelog ought to be a little more extensive; esp. because you're
> using these fancy new functions ->page_mkwrite() and page_mkclean() in a
> novel way.
Hi Peter,
I had put the comment explaining the usage of mkwrite/mkclean in the
.c file. Oh, I see, in the changelog message. Ok, I'll update with a
changelog message mentioning mkwrite/mkclean.
>
> Also, I'd still like to see a way to call msync() on the mmap'ed region
> to force a flush. I think providing a fb_fsync() method in fbmem.c and a
> hook down to the driver ought to work.
I'm hoping fbdev folk will give feedback if this is okay. James,
Geert, what do you think?
>
> Also, you now seem to use a fixed 1 second delay, perhaps provide an
> ioctl or something to customize this?
Ok. Will do.
>
> And, as Andrew suggested last time around, could you perhaps push this
> fancy new idea into the FB layer so that more drivers can make us of it?
I would like to do that very much. I have some ideas how it could work
for devices that support clean partial updates by tracking touched
pages. But I wonder if it is too early to try to abstract this out.
James, Geert, what do you think?
Thanks,
jaya
On Sat, Feb 17, 2007 at 08:25:07AM -0500, Jaya Kumar wrote:
> On 2/17/07, Peter Zijlstra <[email protected]> wrote:
> >And, as Andrew suggested last time around, could you perhaps push this
> >fancy new idea into the FB layer so that more drivers can make us of it?
>
> I would like to do that very much. I have some ideas how it could work
> for devices that support clean partial updates by tracking touched
> pages. But I wonder if it is too early to try to abstract this out.
> James, Geert, what do you think?
>
This would also provide an interesting hook for setting up chained DMA
for the real framebuffer updates when there's more than a couple of pages
that have been touched, which would also be nice to have. There's more
than a few drivers that could take advantage of that.
On 2/17/07, Paul Mundt <[email protected]> wrote:
> On Sat, Feb 17, 2007 at 08:25:07AM -0500, Jaya Kumar wrote:
> > On 2/17/07, Peter Zijlstra <[email protected]> wrote:
> > >And, as Andrew suggested last time around, could you perhaps push this
> > >fancy new idea into the FB layer so that more drivers can make us of it?
> >
> > I would like to do that very much. I have some ideas how it could work
> > for devices that support clean partial updates by tracking touched
> > pages. But I wonder if it is too early to try to abstract this out.
> > James, Geert, what do you think?
> >
> This would also provide an interesting hook for setting up chained DMA
> for the real framebuffer updates when there's more than a couple of pages
> that have been touched, which would also be nice to have. There's more
> than a few drivers that could take advantage of that.
>
Hi Paul,
I could benefit from knowing which driver and display device you are
considering to be applicable.
I was thinking the method used in hecubafb would only be useful to
devices with very slow update paths, where "losing" some of the
display activity is not an issue since the device would not have been
able to update fast enough to show that activity anyway.
What you described with chained DMA sounds different to this. I
suppose one could use this technique to coalesce framebuffer IO to get
better performance/utilization even for fast display devices. Sounds
interesting to try. Did I understand you correctly?
Thanks,
jaya
On Sun, Feb 18, 2007 at 06:31:23AM -0500, Jaya Kumar wrote:
> On 2/17/07, Paul Mundt <[email protected]> wrote:
> >This would also provide an interesting hook for setting up chained DMA
> >for the real framebuffer updates when there's more than a couple of pages
> >that have been touched, which would also be nice to have. There's more
> >than a few drivers that could take advantage of that.
>
> I could benefit from knowing which driver and display device you are
> considering to be applicable.
>
> I was thinking the method used in hecubafb would only be useful to
> devices with very slow update paths, where "losing" some of the
> display activity is not an issue since the device would not have been
> able to update fast enough to show that activity anyway.
>
> What you described with chained DMA sounds different to this. I
> suppose one could use this technique to coalesce framebuffer IO to get
> better performance/utilization even for fast display devices. Sounds
> interesting to try. Did I understand you correctly?
>
Yes, that's what I'm interested in trying. In the SH case we can
basically make use of the on-chip DMAC for any non-PCI device. Some of
these permit scatterlists and chained DMA in hardware, others do not. The
general problem is that since we have to go and poke at the dcache prior
to kicking off the DMA, it's rarely a win for a small number of pages,
memory bursts just end up being faster.
The other issue is that most of the "big" writers are doing so via mmap()
anyways, so it's futile to attempt to handle the DMA case in the
->write() path. Your approach seems like it might be an appropriate
interface for building something like this on top of.
Given that, this would have to be something that's dealt with at the
subsystem level rather than in individual drivers, hence the desire to
see something like this more generically visible.
On 2/18/07, Paul Mundt <[email protected]> wrote:
> Given that, this would have to be something that's dealt with at the
> subsystem level rather than in individual drivers, hence the desire to
> see something like this more generically visible.
>
Hi Peter, Paul, fbdev folk,
Ok. Here's what I'm thinking for abstracting this:
fbdev drivers would setup fb_mmap with their own_mmap as usual. In
own_mmap, they would do what they normally do and setup a vm_ops. They
are free to have their own nopage handler but would set the
page_mkwrite handler to be fbdev_deferred_io_mkwrite().
fbdev_deferred_io_mkwrite would build up the list of touched pages and
pass it to a delayed workqueue which would then mkclean on each page
and then pass a copy of that page list down to a driver's callback
function. The fbdev driver's callback function can then do the actual
IO to the framebuffer or coalesce DMA based on the provided page list.
I would like to add something like the following to struct fb_info:
#ifdef CONFIG_FB_DEFERRED_IO
struct fb_deferred_io *defio;
#endif
to store the mutex (to protect the page list), the touched page list,
and the driver's callback function.
I hope this sounds sufficiently generic to meet everyone's (the two of
us? :) needs.
Thanks,
jaya
ps: I've added James and Geert to the CC list. I would appreciate any
advice on whether this is a suitable approach.
On Mon, Feb 19, 2007 at 11:13:04PM -0500, Jaya Kumar wrote:
> On 2/18/07, Paul Mundt <[email protected]> wrote:
> >Given that, this would have to be something that's dealt with at the
> >subsystem level rather than in individual drivers, hence the desire to
> >see something like this more generically visible.
> >
>
> Hi Peter, Paul, fbdev folk,
>
> Ok. Here's what I'm thinking for abstracting this:
>
> fbdev drivers would setup fb_mmap with their own_mmap as usual. In
> own_mmap, they would do what they normally do and setup a vm_ops. They
> are free to have their own nopage handler but would set the
> page_mkwrite handler to be fbdev_deferred_io_mkwrite().
The vast majority of drivers do not implement ->fb_mmap(), and with
proper abstraction, this should be something that's possible as a direct
alternative to drivers/video/fbmem.c:fb_mmap() for the people that want
it. Of course it's just as easy to do something like the sbuslib.c route
and then have drivers set their ->fb_mmap() from that too.
> fbdev_deferred_io_mkwrite would build up the list of touched pages and
> pass it to a delayed workqueue which would then mkclean on each page
> and then pass a copy of that page list down to a driver's callback
> function. The fbdev driver's callback function can then do the actual
> IO to the framebuffer or coalesce DMA based on the provided page list.
>
That works for me, though I'd prefer for struct page_list to be done with
a scatterlist, then it's trivial to setup from the workqueue context
without having to shuffle things around.
On 2/19/07, Paul Mundt <[email protected]> wrote:
> On Mon, Feb 19, 2007 at 11:13:04PM -0500, Jaya Kumar wrote:
> >
> > Ok. Here's what I'm thinking for abstracting this:
> >
> > fbdev drivers would setup fb_mmap with their own_mmap as usual. In
> > own_mmap, they would do what they normally do and setup a vm_ops. They
> > are free to have their own nopage handler but would set the
> > page_mkwrite handler to be fbdev_deferred_io_mkwrite().
>
> The vast majority of drivers do not implement ->fb_mmap(), and with
> proper abstraction, this should be something that's possible as a direct
> alternative to drivers/video/fbmem.c:fb_mmap() for the people that want
> it. Of course it's just as easy to do something like the sbuslib.c route
> and then have drivers set their ->fb_mmap() from that too.
>
I was thinking about having that fb_mmap replacement too. But then I
got worried because that generic implementation of nopage/etc would
need to handle whether the driver's fb memory was vmalloced, kmalloced
or a mixture if some do that. So I figured let's aim low and just pull
in the core part that does the setup and page tracking stuff. I hope
that's okay.
> That works for me, though I'd prefer for struct page_list to be done with
> a scatterlist, then it's trivial to setup from the workqueue context
> without having to shuffle things around.
>
Ok. Will check out when implementing.
Thanks,
jaya
On Mon, 19 Feb 2007, Jaya Kumar wrote:
> On 2/18/07, Paul Mundt <[email protected]> wrote:
> > Given that, this would have to be something that's dealt with at the
> > subsystem level rather than in individual drivers, hence the desire to
> > see something like this more generically visible.
> >
>
> Hi Peter, Paul, fbdev folk,
>
> Ok. Here's what I'm thinking for abstracting this:
>
> fbdev drivers would setup fb_mmap with their own_mmap as usual. In
> own_mmap, they would do what they normally do and setup a vm_ops. They
> are free to have their own nopage handler but would set the
> page_mkwrite handler to be fbdev_deferred_io_mkwrite().
> fbdev_deferred_io_mkwrite would build up the list of touched pages and
> pass it to a delayed workqueue which would then mkclean on each page
> and then pass a copy of that page list down to a driver's callback
> function. The fbdev driver's callback function can then do the actual
> IO to the framebuffer or coalesce DMA based on the provided page list.
>
> I would like to add something like the following to struct fb_info:
>
> #ifdef CONFIG_FB_DEFERRED_IO
> struct fb_deferred_io *defio;
> #endif
Don't you need a way to specify the maximum deferral time? E.g. a field in
fb_info.
> to store the mutex (to protect the page list), the touched page list,
> and the driver's callback function.
>
> I hope this sounds sufficiently generic to meet everyone's (the two of
> us? :) needs.
Looks fine!
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
On 2/20/07, Jaya Kumar <[email protected]> wrote:
> On 2/19/07, Paul Mundt <[email protected]> wrote:
> > That works for me, though I'd prefer for struct page_list to be done with
> > a scatterlist, then it's trivial to setup from the workqueue context
> > without having to shuffle things around.
> >
>
> Ok. Will check out when implementing.
>
Took a quick look. If I used scatterlist, I'd still need to build a
list of scatterlist to pass to the driver callback. The alternative
being a preallocated array of scatterlist based on the page count of
the framebuffer, which seems expensive since scatterlist has page,
offset, dma and length.
On a separate note, Peter pointed out that it may be possible to reuse
page->lru instead of using a struct page_list. This would enable
something like:
in mkwrite:
mutex_lock
list_add(page->lru, defio->pagelist)
mutex_unlock
in deferred handler:
mutex_lock
for_each page {
lock_page
mkclean
unlock_page
}
callback(fb_info, pagelist)
for_each page {
list_del
}
mutex_unlock
The advantage of reusing page->lru is that avoids needing the struct
page_list and allocation in mkwrite. Is the above exploitation of
->lru ok with mm folk?
In above, we're iterating over the page list twice. I have to mkclean
before calling the callback to avoid the situation where a touched
page is missed by the callback. I don't see a way around that part.
Thanks,
jaya
On 2/20/07, Geert Uytterhoeven <[email protected]> wrote:
> Don't you need a way to specify the maximum deferral time? E.g. a field in
> fb_info.
>
You are right. I will need that. I could put that into struct
fb_deferred_io. So drivers would setup like:
static struct fb_deferred_io hecubafb_defio = {
.delay = HZ,
.deferred_io = hecubafb_dpy_update,
};
where that would be:
struct fb_deferred_io {
unsigned long delay; /* delay between mkwrite and deferred handler */
struct mutex lock; /* mutex that protects the page list */
struct list_head pagelist; /* list of touched pages */
struct delayed_work deferred_work;
void (*deferred_io)(struct fb_info *info, struct list_head
*pagelist); /* callback */
};
and the driver would do:
...
info->fbdefio = hecubafb_defio;
register_framebuffer...
When the driver calls register_framebuffer and unregister_framebuffer,
I can then do the init and destruction of the other members of that
struct. Does this sound okay?
Thanks,
jaya
Could you make it work without the framebuffer. There are embedded LCD
displays that have internal memory that need data flushed to them.
On Wed, 21 Feb 2007, Jaya Kumar wrote:
> On 2/20/07, Geert Uytterhoeven <[email protected]> wrote:
> > Don't you need a way to specify the maximum deferral time? E.g. a field in
> > fb_info.
> >
>
> You are right. I will need that. I could put that into struct
> fb_deferred_io. So drivers would setup like:
>
> static struct fb_deferred_io hecubafb_defio = {
> .delay = HZ,
> .deferred_io = hecubafb_dpy_update,
> };
>
> where that would be:
> struct fb_deferred_io {
> unsigned long delay; /* delay between mkwrite and deferred handler
> */
> struct mutex lock; /* mutex that protects the page list */
> struct list_head pagelist; /* list of touched pages */
> struct delayed_work deferred_work;
> void (*deferred_io)(struct fb_info *info, struct list_head
> *pagelist); /* callback */
> };
>
> and the driver would do:
> ...
> info->fbdefio = hecubafb_defio;
> register_framebuffer...
>
> When the driver calls register_framebuffer and unregister_framebuffer,
> I can then do the init and destruction of the other members of that
> struct. Does this sound okay?
>
> Thanks,
> jaya
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
On 2/21/07, James Simmons <[email protected]> wrote:
>
> Could you make it work without the framebuffer. There are embedded LCD
> displays that have internal memory that need data flushed to them.
>
I'm not sure I understand. What the current implementation does is to
use host based framebuffer memory. Apps mmap that memory and draw to
that. Then after the delay, that framebuffer is written to the
device's memory. That's the scenario for hecubafb where the Apollo
controller maintains it's own internal framebuffer.
When you say without the framebuffer, if you meant without the host
memory, then this method doesn't work. If you mean without the
device's internal memory, then yes, I think we can do that, because it
would be up to the driver to use the touched pagelist to then perform
IO as suitable for its device.
Thanks,
jaya
On Mon, 2007-02-19 at 23:13 -0500, Jaya Kumar wrote:
> On 2/18/07, Paul Mundt <[email protected]> wrote:
> > Given that, this would have to be something that's dealt with at the
> > subsystem level rather than in individual drivers, hence the desire to
> > see something like this more generically visible.
> >
>
> Hi Peter, Paul, fbdev folk,
>
> Ok. Here's what I'm thinking for abstracting this:
>
> fbdev drivers would setup fb_mmap with their own_mmap as usual. In
> own_mmap, they would do what they normally do and setup a vm_ops. They
> are free to have their own nopage handler but would set the
> page_mkwrite handler to be fbdev_deferred_io_mkwrite().
> fbdev_deferred_io_mkwrite would build up the list of touched pages and
> pass it to a delayed workqueue which would then mkclean on each page
Yes, this functionality is sorely needed by more than a few driver
writers.
> and then pass a copy of that page list down to a driver's callback
> function. The fbdev driver's callback function can then do the actual
> IO to the framebuffer or coalesce DMA based on the provided page list.
> I would like to add something like the following to struct fb_info:
>
> #ifdef CONFIG_FB_DEFERRED_IO
> struct fb_deferred_io *defio;
> #endif
>
> to store the mutex (to protect the page list), the touched page list,
> and the driver's callback function.
>
> I hope this sounds sufficiently generic to meet everyone's (the two of
> us? :) needs.
There's definitely more than two :-). For the past several years,
various people have been asking for this functionality. So yes,
implementing this in a generic manner will be a big help.
Tony
On Wed, 2007-02-21 at 11:55 -0500, Jaya Kumar wrote:
> On 2/20/07, Geert Uytterhoeven <[email protected]> wrote:
> > Don't you need a way to specify the maximum deferral time? E.g. a field in
> > fb_info.
> >
>
> You are right. I will need that. I could put that into struct
> fb_deferred_io. So drivers would setup like:
>
Is it also possible to let the drivers do the 'deferred_io'
themselves? Say, a driver that would flush the dirty pages on
every VBLANK interrupt.
> static struct fb_deferred_io hecubafb_defio = {
> .delay = HZ,
> .deferred_io = hecubafb_dpy_update,
> };
>
> where that would be:
> struct fb_deferred_io {
> unsigned long delay; /* delay between mkwrite and deferred handler */
> struct mutex lock; /* mutex that protects the page list */
> struct list_head pagelist; /* list of touched pages */
> struct delayed_work deferred_work;
> void (*deferred_io)(struct fb_info *info, struct list_head
> *pagelist); /* callback */
> };
>
> and the driver would do:
> ...
> info->fbdefio = hecubafb_defio;
> register_framebuffer...
>
> When the driver calls register_framebuffer and unregister_framebuffer,
> I can then do the init and destruction of the other members of that
> struct. Does this sound okay?
It would be better if separate registering functions are created for
this functionality (ie deferred_io_register/unregister).
Tony
On 2/21/07, Antonino A. Daplas <[email protected]> wrote:
> On Wed, 2007-02-21 at 11:55 -0500, Jaya Kumar wrote:
> >
> > You are right. I will need that. I could put that into struct
> > fb_deferred_io. So drivers would setup like:
> >
>
> Is it also possible to let the drivers do the 'deferred_io'
> themselves? Say, a driver that would flush the dirty pages on
> every VBLANK interrupt.
Yes, I think so. The deferred_io callback that the driver would get
would be to provide them with the dirty pages list. Then, they could
use that to handle the on-vblank work.
> > When the driver calls register_framebuffer and unregister_framebuffer,
> > I can then do the init and destruction of the other members of that
> > struct. Does this sound okay?
>
> It would be better if separate registering functions are created for
> this functionality (ie deferred_io_register/unregister).
>
Ok. Will do it that way.
Thanks,
jaya
> I'm not sure I understand. What the current implementation does is to
> use host based framebuffer memory. Apps mmap that memory and draw to
> that. Then after the delay, that framebuffer is written to the
> device's memory. That's the scenario for hecubafb where the Apollo
> controller maintains it's own internal framebuffer.
>
> When you say without the framebuffer, if you meant without the host
> memory, then this method doesn't work. If you mean without the
> device's internal memory, then yes, I think we can do that, because it
> would be up to the driver to use the touched pagelist to then perform
> IO as suitable for its device.
I meant for it to work for non framebuffer devices. I realized that not
such a great idea.