2009-11-10 17:18:32

by Ben Dooks

[permalink] [raw]
Subject: SM501: Implement acceleration features

From: Vincent Sanders <[email protected]>

This patch provides the acceleration entry points for the SM501
framebuffer driver.

This patch provides the sync, copyarea and fillrect entry points,
using the SM501's 2D acceleration engine to perform the operations
in-chip rather than across the bus.

Signed-off-by: Vincent Sanders <[email protected]>
Signed-off-by: Simtec Linux Team <[email protected]>
Signed-off-by: Ben Dooks <[email protected]>

---
drivers/video/sm501fb.c | 238 ++++++++++++++++++++++++++++++++++++++++++---
include/linux/sm501-regs.h | 2
2 files changed, 226 insertions(+), 14 deletions(-)

Index: b/drivers/video/sm501fb.c
===================================================================
--- a/drivers/video/sm501fb.c 2009-11-03 11:19:44.000000000 +0000
+++ b/drivers/video/sm501fb.c 2009-11-03 11:19:46.000000000 +0000
@@ -66,6 +66,7 @@ struct sm501fb_info {
struct fb_info *fb[2]; /* fb info for both heads */
struct resource *fbmem_res; /* framebuffer resource */
struct resource *regs_res; /* registers resource */
+ struct resource *regs2d_res; /* 2d registers resource */
struct sm501_platdata_fb *pdata; /* our platform data */

unsigned long pm_crt_ctrl; /* pm: crt ctrl save */
@@ -73,6 +74,7 @@ struct sm501fb_info {
int irq;
int swap_endian; /* set to swap rgb=>bgr */
void __iomem *regs; /* remapped registers */
+ void __iomem *regs2d; /* 2d remapped registers */
void __iomem *fbmem; /* remapped framebuffer */
size_t fbmem_len; /* length of remapped region */
};
@@ -123,9 +125,9 @@ static inline void sm501fb_sync_regs(str
* This is an attempt to lay out memory for the two framebuffers and
* everything else
*
- * |fbmem_res->start fbmem_res->end|
- * | |
- * |fb[0].fix.smem_start | |fb[1].fix.smem_start | 2K |
+ * |fbmem_res->start fbmem_res->end|
+ * | |
+ * |fb[0].fix.smem_start | |fb[1].fix.smem_start | 2K |
* |-> fb[0].fix.smem_len <-| spare |-> fb[1].fix.smem_len <-|-> cursors <-|
*
* The "spare" space is for the 2d engine data
@@ -486,7 +488,7 @@ static int sm501fb_set_par_common(struct
/* update fb layer with actual clock used */
var->pixclock = sm501fb_hz_to_ps(sm501pixclock);

- dev_dbg(fbi->dev, "%s: pixclock(ps) = %u, pixclock(Hz) = %lu, "
+ dev_dbg(fbi->dev, "%s: pixclock(ps) = %u, pixclock(Hz) = %lu, "
"sm501pixclock = %lu, error = %ld%%\n",
__func__, var->pixclock, pixclock, sm501pixclock,
((pixclock - sm501pixclock)*100)/pixclock);
@@ -1246,7 +1248,173 @@ static ssize_t sm501fb_debug_show_pnl(st

static DEVICE_ATTR(fbregs_pnl, 0444, sm501fb_debug_show_pnl, NULL);

-/* framebuffer ops */
+/* acceleration operations */
+static int sm501fb_sync(struct fb_info *info)
+{
+ int count = 1000000;
+ struct sm501fb_par *par = info->par;
+ struct sm501fb_info *fbi = par->info;
+
+ /* wait for the 2d engine to be ready */
+ while ((count > 0) &&
+ (readl(fbi->regs + SM501_SYSTEM_CONTROL) &
+ SM501_SYSCTRL_2D_ENGINE_STATUS) != 0)
+ count--;
+
+ if (count <= 0) {
+ dev_err(info->dev, "Timeout waiting for 2d engine sync\n");
+ return 1;
+ }
+ return 0;
+}
+
+static void sm501fb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+{
+ struct sm501fb_par *par = info->par;
+ struct sm501fb_info *fbi = par->info;
+ int width = area->width;
+ int height = area->height;
+ int sx = area->sx;
+ int sy = area->sy;
+ int dx = area->dx;
+ int dy = area->dy;
+ unsigned long rtl = 0;
+
+ /* source clip */
+ if ((sx >= info->var.xres_virtual) ||
+ (sy >= info->var.yres_virtual))
+ /* source Area not within virtual screen, skipping */
+ return;
+ if ((sx + width) >= info->var.xres_virtual)
+ width = info->var.xres_virtual - sx - 1;
+ if ((sy + height) >= info->var.yres_virtual)
+ height = info->var.yres_virtual - sy - 1;
+
+ /* dest clip */
+ if ((dx >= info->var.xres_virtual) ||
+ (dy >= info->var.yres_virtual))
+ /* Destination Area not within virtual screen, skipping */
+ return;
+ if ((dx + width) >= info->var.xres_virtual)
+ width = info->var.xres_virtual - dx - 1;
+ if ((dy + height) >= info->var.yres_virtual)
+ height = info->var.yres_virtual - dy - 1;
+
+ if ((sx < dx) || (sy < dy)) {
+ rtl = 1 << 27;
+ sx += width - 1;
+ dx += width - 1;
+ sy += height - 1;
+ dy += height - 1;
+ }
+
+ if (sm501fb_sync(info))
+ return;
+
+ /* set the base addresses */
+ writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_SOURCE_BASE);
+ writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_DESTINATION_BASE);
+
+ /* set the window width */
+ writel((info->var.xres << 16) | info->var.xres,
+ fbi->regs2d + SM501_2D_WINDOW_WIDTH);
+
+ /* set window stride */
+ writel((info->var.xres_virtual << 16) | info->var.xres_virtual,
+ fbi->regs2d + SM501_2D_PITCH);
+
+ /* set data format */
+ switch (info->var.bits_per_pixel) {
+ case 8:
+ writel(0, fbi->regs2d + SM501_2D_STRETCH);
+ break;
+ case 16:
+ writel(0x00100000, fbi->regs2d + SM501_2D_STRETCH);
+ break;
+ case 32:
+ writel(0x00200000, fbi->regs2d + SM501_2D_STRETCH);
+ break;
+ }
+
+ /* 2d compare mask */
+ writel(0xffffffff, fbi->regs2d + SM501_2D_COLOR_COMPARE_MASK);
+
+ /* 2d mask */
+ writel(0xffffffff, fbi->regs2d + SM501_2D_MASK);
+
+ /* source and destination x y */
+ writel((sx << 16) | sy, fbi->regs2d + SM501_2D_SOURCE);
+ writel((dx << 16) | dy, fbi->regs2d + SM501_2D_DESTINATION);
+
+ /* w/h */
+ writel((width << 16) | height, fbi->regs2d + SM501_2D_DIMENSION);
+
+ /* do area move */
+ writel(0x800000cc | rtl, fbi->regs2d + SM501_2D_CONTROL);
+}
+
+static void sm501fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+ struct sm501fb_par *par = info->par;
+ struct sm501fb_info *fbi = par->info;
+ int width = rect->width, height = rect->height;
+
+ if ((rect->dx >= info->var.xres_virtual) ||
+ (rect->dy >= info->var.yres_virtual))
+ /* Rectangle not within virtual screen, skipping */
+ return;
+ if ((rect->dx + width) >= info->var.xres_virtual)
+ width = info->var.xres_virtual - rect->dx - 1;
+ if ((rect->dy + height) >= info->var.yres_virtual)
+ height = info->var.yres_virtual - rect->dy - 1;
+
+ if (sm501fb_sync(info))
+ return;
+
+ /* set the base addresses */
+ writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_SOURCE_BASE);
+ writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_DESTINATION_BASE);
+
+ /* set the window width */
+ writel((info->var.xres << 16) | info->var.xres,
+ fbi->regs2d + SM501_2D_WINDOW_WIDTH);
+
+ /* set window stride */
+ writel((info->var.xres_virtual << 16) | info->var.xres_virtual,
+ fbi->regs2d + SM501_2D_PITCH);
+
+ /* set data format */
+ switch (info->var.bits_per_pixel) {
+ case 8:
+ writel(0, fbi->regs2d + SM501_2D_STRETCH);
+ break;
+ case 16:
+ writel(0x00100000, fbi->regs2d + SM501_2D_STRETCH);
+ break;
+ case 32:
+ writel(0x00200000, fbi->regs2d + SM501_2D_STRETCH);
+ break;
+ }
+
+ /* 2d compare mask */
+ writel(0xffffffff, fbi->regs2d + SM501_2D_COLOR_COMPARE_MASK);
+
+ /* 2d mask */
+ writel(0xffffffff, fbi->regs2d + SM501_2D_MASK);
+
+ /* colour */
+ writel(rect->color, fbi->regs2d + SM501_2D_FOREGROUND);
+
+ /* x y */
+ writel((rect->dx << 16) | rect->dy, fbi->regs2d + SM501_2D_DESTINATION);
+
+ /* w/h */
+ writel((width << 16) | height, fbi->regs2d + SM501_2D_DIMENSION);
+
+ /* do rectangle fill */
+ writel(0x800100cc, fbi->regs2d + SM501_2D_CONTROL);
+}
+

static struct fb_ops sm501fb_ops_crt = {
.owner = THIS_MODULE,
@@ -1256,9 +1424,10 @@ static struct fb_ops sm501fb_ops_crt = {
.fb_setcolreg = sm501fb_setcolreg,
.fb_pan_display = sm501fb_pan_crt,
.fb_cursor = sm501fb_cursor,
- .fb_fillrect = cfb_fillrect,
- .fb_copyarea = cfb_copyarea,
+ .fb_fillrect = sm501fb_fillrect,
+ .fb_copyarea = sm501fb_copyarea,
.fb_imageblit = cfb_imageblit,
+ .fb_sync = sm501fb_sync,
};

static struct fb_ops sm501fb_ops_pnl = {
@@ -1269,9 +1438,10 @@ static struct fb_ops sm501fb_ops_pnl = {
.fb_blank = sm501fb_blank_pnl,
.fb_setcolreg = sm501fb_setcolreg,
.fb_cursor = sm501fb_cursor,
- .fb_fillrect = cfb_fillrect,
- .fb_copyarea = cfb_copyarea,
+ .fb_fillrect = sm501fb_fillrect,
+ .fb_copyarea = sm501fb_copyarea,
.fb_imageblit = cfb_imageblit,
+ .fb_sync = sm501fb_sync,
};

/* sm501_init_cursor
@@ -1329,7 +1499,8 @@ static int sm501fb_start(struct sm501fb_
dev_warn(dev, "no irq for device\n");
}

- /* allocate, reserve and remap resources for registers */
+ /* allocate, reserve and remap resources for display
+ * controller registers */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (res == NULL) {
dev_err(dev, "no resource definition for registers\n");
@@ -1354,12 +1525,38 @@ static int sm501fb_start(struct sm501fb_
goto err_regs_res;
}

+ /* allocate, reserve and remap resources for 2d
+ * controller registers */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (res == NULL) {
+ dev_err(dev, "no resource definition for 2d registers\n");
+ ret = -ENOENT;
+ goto err_regs_map;
+ }
+
+ info->regs2d_res = request_mem_region(res->start,
+ resource_size(res),
+ pdev->name);
+
+ if (info->regs2d_res == NULL) {
+ dev_err(dev, "cannot claim registers\n");
+ ret = -ENXIO;
+ goto err_regs_map;
+ }
+
+ info->regs2d = ioremap(res->start, resource_size(res));
+ if (info->regs2d == NULL) {
+ dev_err(dev, "cannot remap registers\n");
+ ret = -ENXIO;
+ goto err_regs2d_res;
+ }
+
/* allocate, reserve resources for framebuffer */
res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
if (res == NULL) {
dev_err(dev, "no memory resource defined\n");
ret = -ENXIO;
- goto err_regs_map;
+ goto err_regs2d_map;
}

info->fbmem_res = request_mem_region(res->start,
@@ -1368,7 +1565,7 @@ static int sm501fb_start(struct sm501fb_
if (info->fbmem_res == NULL) {
dev_err(dev, "cannot claim framebuffer\n");
ret = -ENXIO;
- goto err_regs_map;
+ goto err_regs2d_map;
}

info->fbmem = ioremap(res->start, (res->end - res->start)+1);
@@ -1389,8 +1586,10 @@ static int sm501fb_start(struct sm501fb_
/* enable display controller */
sm501_unit_power(dev->parent, SM501_GATE_DISPLAY, 1);

- /* setup cursors */
+ /* enable 2d controller */
+ sm501_unit_power(dev->parent, SM501_GATE_2D_ENGINE, 1);

+ /* setup cursors */
sm501_init_cursor(info->fb[HEAD_CRT], SM501_DC_CRT_HWC_ADDR);
sm501_init_cursor(info->fb[HEAD_PANEL], SM501_DC_PANEL_HWC_ADDR);

@@ -1400,6 +1599,13 @@ static int sm501fb_start(struct sm501fb_
release_resource(info->fbmem_res);
kfree(info->fbmem_res);

+ err_regs2d_map:
+ iounmap(info->regs2d);
+
+ err_regs2d_res:
+ release_resource(info->regs2d_res);
+ kfree(info->regs2d_res);
+
err_regs_map:
iounmap(info->regs);

@@ -1420,6 +1626,10 @@ static void sm501fb_stop(struct sm501fb_
release_resource(info->fbmem_res);
kfree(info->fbmem_res);

+ iounmap(info->regs2d);
+ release_resource(info->regs2d_res);
+ kfree(info->regs2d_res);
+
iounmap(info->regs);
release_resource(info->regs_res);
kfree(info->regs_res);
@@ -1504,7 +1714,7 @@ static int sm501fb_init_fb(struct fb_inf
fb->var.activate = FB_ACTIVATE_NOW;
fb->var.accel_flags = 0;
fb->var.vmode = FB_VMODE_NONINTERLACED;
- fb->var.bits_per_pixel = 16;
+ fb->var.bits_per_pixel = 16;

if (enable && (pd->flags & SM501FB_FLAG_USE_INIT_MODE) && 0) {
/* TODO read the mode from the current display */
Index: b/include/linux/sm501-regs.h
===================================================================
--- a/include/linux/sm501-regs.h 2009-11-03 11:17:35.000000000 +0000
+++ b/include/linux/sm501-regs.h 2009-11-03 11:19:46.000000000 +0000
@@ -31,6 +31,8 @@
#define SM501_SYSCTRL_PCI_SUBSYS_LOCK (1<<11)
#define SM501_SYSCTRL_PCI_BURST_READ_EN (1<<15)

+#define SM501_SYSCTRL_2D_ENGINE_STATUS (1<<19)
+
/* miscellaneous control */

#define SM501_MISC_CONTROL (0x000004)

--
Ben ([email protected], http://www.fluff.org/)

'a smiley only costs 4 bytes'


2009-11-10 22:21:36

by Ben Dooks

[permalink] [raw]
Subject: Re: SM501: Implement acceleration features

Just noticed there's a couple of bits that didn't get properly fixed
up before releasing. Will fix, test and repost both the patches in the
series.

--
Ben ([email protected], http://www.fluff.org/)

'a smiley only costs 4 bytes'

2009-11-10 23:01:32

by Krzysztof Helt

[permalink] [raw]
Subject: Re: SM501: Implement acceleration features

On Tue, 10 Nov 2009 17:18:10 +0000
Ben Dooks <[email protected]> wrote:

> From: Vincent Sanders <[email protected]>
>
> This patch provides the acceleration entry points for the SM501
> framebuffer driver.
>
> This patch provides the sync, copyarea and fillrect entry points,
> using the SM501's 2D acceleration engine to perform the operations
> in-chip rather than across the bus.
>
> Signed-off-by: Vincent Sanders <[email protected]>
> Signed-off-by: Simtec Linux Team <[email protected]>
> Signed-off-by: Ben Dooks <[email protected]>
>
> ---
> drivers/video/sm501fb.c | 238 ++++++++++++++++++++++++++++++++++++++++++---
> include/linux/sm501-regs.h | 2
> 2 files changed, 226 insertions(+), 14 deletions(-)
>
> Index: b/drivers/video/sm501fb.c
> ===================================================================
> --- a/drivers/video/sm501fb.c 2009-11-03 11:19:44.000000000 +0000
> +++ b/drivers/video/sm501fb.c 2009-11-03 11:19:46.000000000 +0000

(snip)

> @@ -1246,7 +1248,173 @@ static ssize_t sm501fb_debug_show_pnl(st
>
> static DEVICE_ATTR(fbregs_pnl, 0444, sm501fb_debug_show_pnl, NULL);
>
> -/* framebuffer ops */
> +/* acceleration operations */
> +static int sm501fb_sync(struct fb_info *info)
> +{
> + int count = 1000000;
> + struct sm501fb_par *par = info->par;
> + struct sm501fb_info *fbi = par->info;
> +
> + /* wait for the 2d engine to be ready */
> + while ((count > 0) &&
> + (readl(fbi->regs + SM501_SYSTEM_CONTROL) &
> + SM501_SYSCTRL_2D_ENGINE_STATUS) != 0)
> + count--;
> +

You may add cpu_relax() inside this loop.

> + if (count <= 0) {
> + dev_err(info->dev, "Timeout waiting for 2d engine sync\n");
> + return 1;
> + }
> + return 0;
> +}
> +
> +static void sm501fb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
> +{
> + struct sm501fb_par *par = info->par;
> + struct sm501fb_info *fbi = par->info;
> + int width = area->width;
> + int height = area->height;
> + int sx = area->sx;
> + int sy = area->sy;
> + int dx = area->dx;
> + int dy = area->dy;
> + unsigned long rtl = 0;
> +
> + /* source clip */
> + if ((sx >= info->var.xres_virtual) ||
> + (sy >= info->var.yres_virtual))
> + /* source Area not within virtual screen, skipping */
> + return;
> + if ((sx + width) >= info->var.xres_virtual)
> + width = info->var.xres_virtual - sx - 1;
> + if ((sy + height) >= info->var.yres_virtual)
> + height = info->var.yres_virtual - sy - 1;
> +
> + /* dest clip */
> + if ((dx >= info->var.xres_virtual) ||
> + (dy >= info->var.yres_virtual))
> + /* Destination Area not within virtual screen, skipping */
> + return;
> + if ((dx + width) >= info->var.xres_virtual)
> + width = info->var.xres_virtual - dx - 1;
> + if ((dy + height) >= info->var.yres_virtual)
> + height = info->var.yres_virtual - dy - 1;
> +
> + if ((sx < dx) || (sy < dy)) {
> + rtl = 1 << 27;
> + sx += width - 1;
> + dx += width - 1;
> + sy += height - 1;
> + dy += height - 1;
> + }
> +
> + if (sm501fb_sync(info))
> + return;
> +

Please check if you need to wait for the blit engine before writting to any register.
Usually, the values in the bit engine registers are shadowed after the engine is started
(ie. the blitting operation is started) and the next set of values can be written into the regs.

> + /* set the base addresses */
> + writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_SOURCE_BASE);
> + writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_DESTINATION_BASE);
> +
> + /* set the window width */
> + writel((info->var.xres << 16) | info->var.xres,
> + fbi->regs2d + SM501_2D_WINDOW_WIDTH);
> +
> + /* set window stride */
> + writel((info->var.xres_virtual << 16) | info->var.xres_virtual,
> + fbi->regs2d + SM501_2D_PITCH);
> +
> + /* set data format */
> + switch (info->var.bits_per_pixel) {
> + case 8:
> + writel(0, fbi->regs2d + SM501_2D_STRETCH);
> + break;
> + case 16:
> + writel(0x00100000, fbi->regs2d + SM501_2D_STRETCH);
> + break;
> + case 32:
> + writel(0x00200000, fbi->regs2d + SM501_2D_STRETCH);
> + break;
> + }
> +
> + /* 2d compare mask */
> + writel(0xffffffff, fbi->regs2d + SM501_2D_COLOR_COMPARE_MASK);
> +
> + /* 2d mask */
> + writel(0xffffffff, fbi->regs2d + SM501_2D_MASK);

All writes above do not use any copyarea specific value. They should be done
only once in the sm501fb_set_par() function. It is enough to initialize the blitting
engine when a mode is set (xres/yres and bpp values are known then). The only
exception is when these values are reset by every blit operation.

> +
> + /* source and destination x y */
> + writel((sx << 16) | sy, fbi->regs2d + SM501_2D_SOURCE);
> + writel((dx << 16) | dy, fbi->regs2d + SM501_2D_DESTINATION);
> +
> + /* w/h */
> + writel((width << 16) | height, fbi->regs2d + SM501_2D_DIMENSION);
> +
> + /* do area move */
> + writel(0x800000cc | rtl, fbi->regs2d + SM501_2D_CONTROL);
> +}
> +

The same comments apply for the fillrect() method.

> +static void sm501fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
> +{
> + struct sm501fb_par *par = info->par;
> + struct sm501fb_info *fbi = par->info;
> + int width = rect->width, height = rect->height;
> +
> + if ((rect->dx >= info->var.xres_virtual) ||
> + (rect->dy >= info->var.yres_virtual))
> + /* Rectangle not within virtual screen, skipping */
> + return;
> + if ((rect->dx + width) >= info->var.xres_virtual)
> + width = info->var.xres_virtual - rect->dx - 1;
> + if ((rect->dy + height) >= info->var.yres_virtual)
> + height = info->var.yres_virtual - rect->dy - 1;
> +
> + if (sm501fb_sync(info))
> + return;
> +
> + /* set the base addresses */
> + writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_SOURCE_BASE);
> + writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_DESTINATION_BASE);
> +
> + /* set the window width */
> + writel((info->var.xres << 16) | info->var.xres,
> + fbi->regs2d + SM501_2D_WINDOW_WIDTH);
> +
> + /* set window stride */
> + writel((info->var.xres_virtual << 16) | info->var.xres_virtual,
> + fbi->regs2d + SM501_2D_PITCH);
> +
> + /* set data format */
> + switch (info->var.bits_per_pixel) {
> + case 8:
> + writel(0, fbi->regs2d + SM501_2D_STRETCH);
> + break;
> + case 16:
> + writel(0x00100000, fbi->regs2d + SM501_2D_STRETCH);
> + break;
> + case 32:
> + writel(0x00200000, fbi->regs2d + SM501_2D_STRETCH);
> + break;
> + }
> +
> + /* 2d compare mask */
> + writel(0xffffffff, fbi->regs2d + SM501_2D_COLOR_COMPARE_MASK);
> +
> + /* 2d mask */
> + writel(0xffffffff, fbi->regs2d + SM501_2D_MASK);
> +
> + /* colour */
> + writel(rect->color, fbi->regs2d + SM501_2D_FOREGROUND);
> +
> + /* x y */
> + writel((rect->dx << 16) | rect->dy, fbi->regs2d + SM501_2D_DESTINATION);
> +
> + /* w/h */
> + writel((width << 16) | height, fbi->regs2d + SM501_2D_DIMENSION);
> +
> + /* do rectangle fill */
> + writel(0x800100cc, fbi->regs2d + SM501_2D_CONTROL);
> +}
> +
>
> static struct fb_ops sm501fb_ops_crt = {
> .owner = THIS_MODULE,
> @@ -1256,9 +1424,10 @@ static struct fb_ops sm501fb_ops_crt = {
> .fb_setcolreg = sm501fb_setcolreg,
> .fb_pan_display = sm501fb_pan_crt,
> .fb_cursor = sm501fb_cursor,
> - .fb_fillrect = cfb_fillrect,
> - .fb_copyarea = cfb_copyarea,
> + .fb_fillrect = sm501fb_fillrect,
> + .fb_copyarea = sm501fb_copyarea,
> .fb_imageblit = cfb_imageblit,
> + .fb_sync = sm501fb_sync,
> };
>

You may want to try if setting the info->flags bit FBINFO_READS_FAST gives
you a faster scrolling on the fb device. This flags says that you prefer to use
the copyarea function over the imageblit function for scrolling. The latter is
not accelerated in this driver.

Regards,
Krzysztof

2009-11-11 21:49:25

by Vincent Sanders

[permalink] [raw]
Subject: Re: SM501: Implement acceleration features

On Wed, Nov 11, 2009 at 12:58:25AM +0100, Krzysztof Helt wrote:
> On Tue, 10 Nov 2009 17:18:10 +0000
> Ben Dooks <[email protected]> wrote:
>
> > From: Vincent Sanders <[email protected]>
> >
> > This patch provides the acceleration entry points for the SM501
> > framebuffer driver.
> >
> > This patch provides the sync, copyarea and fillrect entry points,
> > using the SM501's 2D acceleration engine to perform the operations
> > in-chip rather than across the bus.
> >
> > Signed-off-by: Vincent Sanders <[email protected]>
> > Signed-off-by: Simtec Linux Team <[email protected]>
> > Signed-off-by: Ben Dooks <[email protected]>
> >
> > ---
> > drivers/video/sm501fb.c | 238 ++++++++++++++++++++++++++++++++++++++++++---
> > include/linux/sm501-regs.h | 2
> > 2 files changed, 226 insertions(+), 14 deletions(-)
> >
> > Index: b/drivers/video/sm501fb.c
> > ===================================================================
> > --- a/drivers/video/sm501fb.c 2009-11-03 11:19:44.000000000 +0000
> > +++ b/drivers/video/sm501fb.c 2009-11-03 11:19:46.000000000 +0000
>

I have snipped all but small amount for context

> > + /* wait for the 2d engine to be ready */
> > + while ((count > 0) &&
> > + (readl(fbi->regs + SM501_SYSTEM_CONTROL) &
> > + SM501_SYSCTRL_2D_ENGINE_STATUS) != 0)
> > + count--;
> > +
>
> You may add cpu_relax() inside this loop.
>

ok, would need to test this thoroughly as the SM501 has some...odd
behaviours (see later on).

> > +
> > + if (sm501fb_sync(info))
> > + return;
> > +
>
> Please check if you need to wait for the blit engine before writting
> to any register. Usually, the values in the bit engine registers
> are shadowed after the engine is started (ie. the blitting operation
> is started) and the next set of values can be written into the regs.

indeed, if it were sane I would agree, it isnt in all circumstances, see later

>
> > + }
> > +
> > + /* 2d compare mask */
> > + writel(0xffffffff, fbi->regs2d + SM501_2D_COLOR_COMPARE_MASK);
> > +
> > + /* 2d mask */
> > + writel(0xffffffff, fbi->regs2d + SM501_2D_MASK);
>
> All writes above do not use any copyarea specific value. They should
> be done only once in the sm501fb_set_par() function. It is enough to
> initialize the blitting engine when a mode is set (xres/yres and bpp
> values are known then). The only exception is when these values are
> reset by every blit operation.

not reset, but possibly corrupted depending on bus attachment.

>
> > +
> > + /* source and destination x y */
> > + writel((sx << 16) | sy, fbi->regs2d + SM501_2D_SOURCE);
> > + writel((dx << 16) | dy, fbi->regs2d + SM501_2D_DESTINATION);
> > +
> > + /* w/h */
> > + writel((width << 16) | height, fbi->regs2d + SM501_2D_DIMENSION);
> > +
> > + /* do area move */
> > + writel(0x800000cc | rtl, fbi->regs2d + SM501_2D_CONTROL);
> > +}
> > +
>
> The same comments apply for the fillrect() method.
>

I will explain at the end

> >
> > static struct fb_ops sm501fb_ops_crt = {
> > .owner = THIS_MODULE,
> > @@ -1256,9 +1424,10 @@ static struct fb_ops sm501fb_ops_crt = {
> > .fb_setcolreg = sm501fb_setcolreg,
> > .fb_pan_display = sm501fb_pan_crt,
> > .fb_cursor = sm501fb_cursor,
> > - .fb_fillrect = cfb_fillrect,
> > - .fb_copyarea = cfb_copyarea,
> > + .fb_fillrect = sm501fb_fillrect,
> > + .fb_copyarea = sm501fb_copyarea,
> > .fb_imageblit = cfb_imageblit,
> > + .fb_sync = sm501fb_sync,
> > };
> >
>
> You may want to try if setting the info->flags bit FBINFO_READS_FAST
> gives you a faster scrolling on the fb device. This flags says that
> you prefer to use the copyarea function over the imageblit function
> for scrolling. The latter is not accelerated in this driver.

thanks! will try that.

To clarify the idiocy and nasty issues with the SM501/2 range (this
isnt for the weak stomached) :-(

The Silicon Motion SM501 has three revisions (A, B and C. Revision C
is confusingly renamed as SM502 but keeps the 501 identifiers) it
also has the unusual ability to be connected to the host both by PCI
and local 32bit parallel bus in several flavours, Also in both bus mastering
and non bus mastering modes.

Revisions prior to C had a number of subtle bugs related to their
external memory interfaces and the interaction with interrupts which
means posted writes and reads of the irq status register may not
actually match up with reality. There are additional constraints on
clock domains and internal bus configurations which we already address
elsewhere in the driver.

The *only* reliable way to be sure if any of the asynchronous engines
are ready to use is to poll the 2D engine status bit in the system
controller register (not the one in the 2d engine itself!) and if its
in local bus mode be sure your bus timings are conservative enough not
to lock the device up solid with it driving i/o lines against the CPU
(not that the magic smoke escapes or anything :-(. Hopefully this
explains why we wait explicitly for the device as we do (Oh and
revision C fixes those problems but introduces others because of its
weaker bus drive)

Next I discovered that although the 2d engine nominally has a shadowed
register set, what actually happens if you attempt an operation while
the engine is busy is: play a guessing game as to whether the device
will lock up solid, corrupt both requests or work fine.

Finally the revision C introduces an amusing issue where certain
operations followed by certain other operations cause register
corruption. To avoid this issue I specifically program all registers
to do with an operation each time. There is a compromise between the
overhead of a couple of register writes against *much* less complexity
than either the conditional execution depending of current device
configuration or working out which operations may follow which and
fixing up as appropriate (I have coded both these approaches and it was
ugly in comparison).

Thankyou for your review, I hope I explained why I have dones some of
these seemingly odd things. If you think there is another approach I
should take based on this explanation please let me know.

--
Vincent Sanders
Simtec Electronics


Attachments:
(No filename) (6.02 kB)
signature.asc (189.00 B)
Digital signature
Download all attachments

2009-11-11 22:27:44

by Ben Dooks

[permalink] [raw]
Subject: Re: SM501: Implement acceleration features

Vincent Sanders wrote:
> On Wed, Nov 11, 2009 at 12:58:25AM +0100, Krzysztof Helt wrote:
>> On Tue, 10 Nov 2009 17:18:10 +0000
>> Ben Dooks <[email protected]> wrote:
>>
>>> From: Vincent Sanders <[email protected]>
>>>
>>> This patch provides the acceleration entry points for the SM501
>>> framebuffer driver.
>>>
>>> This patch provides the sync, copyarea and fillrect entry points,
>>> using the SM501's 2D acceleration engine to perform the operations
>>> in-chip rather than across the bus.
>>>
>>> Signed-off-by: Vincent Sanders <[email protected]>
>>> Signed-off-by: Simtec Linux Team <[email protected]>
>>> Signed-off-by: Ben Dooks <[email protected]>
>>>
>>> ---
>>> drivers/video/sm501fb.c | 238 ++++++++++++++++++++++++++++++++++++++++++---
>>> include/linux/sm501-regs.h | 2
>>> 2 files changed, 226 insertions(+), 14 deletions(-)
>>>
>>> Index: b/drivers/video/sm501fb.c
>>> ===================================================================
>>> --- a/drivers/video/sm501fb.c 2009-11-03 11:19:44.000000000 +0000
>>> +++ b/drivers/video/sm501fb.c 2009-11-03 11:19:46.000000000 +0000
>
> I have snipped all but small amount for context
>
>>> + /* wait for the 2d engine to be ready */
>>> + while ((count > 0) &&
>>> + (readl(fbi->regs + SM501_SYSTEM_CONTROL) &
>>> + SM501_SYSCTRL_2D_ENGINE_STATUS) != 0)
>>> + count--;
>>> +
>> You may add cpu_relax() inside this loop.
>>
>
> ok, would need to test this thoroughly as the SM501 has some...odd
> behaviours (see later on).
>
>>> +
>>> + if (sm501fb_sync(info))
>>> + return;
>>> +
>> Please check if you need to wait for the blit engine before writting
>> to any register. Usually, the values in the bit engine registers
>> are shadowed after the engine is started (ie. the blitting operation
>> is started) and the next set of values can be written into the regs.
>
> indeed, if it were sane I would agree, it isnt in all circumstances, see later

It might be worth caching the data that relies on bpp after set_par
call, and thus avoiding another swtich on bpp in the acceleration
routines.

Otherwise not much else to say.

--
Ben Dooks, Software Engineer, Simtec Electronics

http://www.simtec.co.uk/