On ia64, the various functions that make up cn_proc.c cause kernel
unaligned access errors.
If you are using these, for example, to get notification about
all tasks forking and exiting, you get multiple unaligned access errors
per process.
Here, we just adjust how the variables are declared and use memcopy to
avoid the error messages.
Signed-off-by: Erik Jacobson <[email protected]>
---
cn_proc.c | 94 +++++++++++++++++++++++++++++++-------------------------------
1 file changed, 47 insertions(+), 47 deletions(-)
--- linux.orig/drivers/connector/cn_proc.c 2006-11-29 15:57:37.000000000 -0600
+++ linux/drivers/connector/cn_proc.c 2006-12-07 16:50:03.195035791 -0600
@@ -49,7 +49,7 @@
void proc_fork_connector(struct task_struct *task)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
@@ -57,19 +57,19 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
- get_seq(&msg->seq, &ev->cpu);
+ get_seq(&msg->seq, &ev.cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
- ev->what = PROC_EVENT_FORK;
- ev->event_data.fork.parent_pid = task->real_parent->pid;
- ev->event_data.fork.parent_tgid = task->real_parent->tgid;
- ev->event_data.fork.child_pid = task->pid;
- ev->event_data.fork.child_tgid = task->tgid;
+ ev.timestamp_ns = timespec_to_ns(&ts);
+ ev.what = PROC_EVENT_FORK;
+ ev.event_data.fork.parent_pid = task->real_parent->pid;
+ ev.event_data.fork.parent_tgid = task->real_parent->tgid;
+ ev.event_data.fork.child_pid = task->pid;
+ ev.event_data.fork.child_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
/* If cn_netlink_send() failed, the data is not sent */
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
@@ -77,7 +77,7 @@
void proc_exec_connector(struct task_struct *task)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
struct timespec ts;
__u8 buffer[CN_PROC_MSG_SIZE];
@@ -85,24 +85,24 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
- get_seq(&msg->seq, &ev->cpu);
+ get_seq(&msg->seq, &ev.cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
- ev->what = PROC_EVENT_EXEC;
- ev->event_data.exec.process_pid = task->pid;
- ev->event_data.exec.process_tgid = task->tgid;
+ ev.timestamp_ns = timespec_to_ns(&ts);
+ ev.what = PROC_EVENT_EXEC;
+ ev.event_data.exec.process_pid = task->pid;
+ ev.event_data.exec.process_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
void proc_id_connector(struct task_struct *task, int which_id)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
@@ -110,32 +110,32 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
- ev->what = which_id;
- ev->event_data.id.process_pid = task->pid;
- ev->event_data.id.process_tgid = task->tgid;
+ ev.what = which_id;
+ ev.event_data.id.process_pid = task->pid;
+ ev.event_data.id.process_tgid = task->tgid;
if (which_id == PROC_EVENT_UID) {
- ev->event_data.id.r.ruid = task->uid;
- ev->event_data.id.e.euid = task->euid;
+ ev.event_data.id.r.ruid = task->uid;
+ ev.event_data.id.e.euid = task->euid;
} else if (which_id == PROC_EVENT_GID) {
- ev->event_data.id.r.rgid = task->gid;
- ev->event_data.id.e.egid = task->egid;
+ ev.event_data.id.r.rgid = task->gid;
+ ev.event_data.id.e.egid = task->egid;
} else
return;
- get_seq(&msg->seq, &ev->cpu);
+ get_seq(&msg->seq, &ev.cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ ev.timestamp_ns = timespec_to_ns(&ts);
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
void proc_exit_connector(struct task_struct *task)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
@@ -143,19 +143,19 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
- get_seq(&msg->seq, &ev->cpu);
+ get_seq(&msg->seq, &ev.cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
- ev->what = PROC_EVENT_EXIT;
- ev->event_data.exit.process_pid = task->pid;
- ev->event_data.exit.process_tgid = task->tgid;
- ev->event_data.exit.exit_code = task->exit_code;
- ev->event_data.exit.exit_signal = task->exit_signal;
+ ev.timestamp_ns = timespec_to_ns(&ts);
+ ev.what = PROC_EVENT_EXIT;
+ ev.event_data.exit.process_pid = task->pid;
+ ev.event_data.exit.process_tgid = task->tgid;
+ ev.event_data.exit.exit_code = task->exit_code;
+ ev.event_data.exit.exit_signal = task->exit_signal;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
@@ -170,7 +170,7 @@
static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
@@ -178,16 +178,16 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
msg->seq = rcvd_seq;
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
- ev->cpu = -1;
- ev->what = PROC_EVENT_NONE;
- ev->event_data.ack.err = err;
+ ev.timestamp_ns = timespec_to_ns(&ts);
+ ev.cpu = -1;
+ ev.what = PROC_EVENT_NONE;
+ ev.event_data.ack.err = err;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = rcvd_ack + 1;
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
On Thu, 7 Dec 2006 17:22:13 -0600, Erik Jacobson <[email protected]> wrote:
> Here, we just adjust how the variables are declared and use memcopy to
> avoid the error messages.
> - ev->timestamp_ns = timespec_to_ns(&ts);
> + ev.timestamp_ns = timespec_to_ns(&ts);
Please try to declare u64 timestamp_ns, then copy it into the *ev
instead of copying whole *ev. This ought to fix the problem if
buffer[] ends aligned to 32 bits or better.
Also... Since Linus does not take patches in general off l-k anymore,
you have to find whoever herds the connector and feed the patch through
him/her.
-- Pete
On Fri, 2006-12-08 at 19:20 -0800, Pete Zaitcev wrote:
> On Thu, 7 Dec 2006 17:22:13 -0600, Erik Jacobson <[email protected]> wrote:
Erik,
Thanks for cc'ing me on this patch.
> > Here, we just adjust how the variables are declared and use memcopy to
> > avoid the error messages.
> > - ev->timestamp_ns = timespec_to_ns(&ts);
> > + ev.timestamp_ns = timespec_to_ns(&ts);
>
> Please try to declare u64 timestamp_ns, then copy it into the *ev
> instead of copying whole *ev. This ought to fix the problem if
> buffer[] ends aligned to 32 bits or better.
Yes, I like this suggestion.
> Also... Since Linus does not take patches in general off l-k anymore,
> you have to find whoever herds the connector and feed the patch through
> him/her.
>
> -- Pete
I contributed and continue to follow this particular connector (Evgeniy
Polyakov wrote the generic connector code). In the past I've given my
patches to Andrew Morton and requested inclusion in -mm -- that may be
the best route (Andrew?). I'd be happy to Ack a patch implementing
Pete's suggestion.
Cheers,
-Matt Helsley
> > Here, we just adjust how the variables are declared and use memcopy to
> > avoid the error messages.
> > - ev->timestamp_ns = timespec_to_ns(&ts);
> > + ev.timestamp_ns = timespec_to_ns(&ts);
> Please try to declare u64 timestamp_ns, then copy it into the *ev
> instead of copying whole *ev. This ought to fix the problem if
> buffer[] ends aligned to 32 bits or better.
So I took this suggestion for a spin and met with the same result.
Please confirm I understood what you were asking.
The unaligned access messages are still produced.
Here is how I changed the function (declared timestamp_ns in the
top of the function, and used memcpy to copy it in place).
The case of fork is provided below but I did it to all the appropriate
functions. See the removed and added comments to see what changed.
void proc_fork_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
u64 timestamp_ns; // added
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = (struct cn_msg*)buffer;
ev = (struct proc_event*)msg->data;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
//ev->timestamp_ns = timespec_to_ns(&ts); removed
printk("dbg fork before timestamp_to_ns call\n");
timestamp_ns = timespec_to_ns(&ts); //added
printk("dbg fork after timespec_to_ns call, b4 memcpy\n");
memcpy(&ev->timestamp_ns, ×tamp_ns, sizeof(ev->timestamp_ns)); //added
printk("dbg fork after memcpy, b4 other ev settings...\n");
ev->what = PROC_EVENT_FORK;
ev->event_data.fork.parent_pid = task->real_parent->pid;
ev->event_data.fork.parent_tgid = task->real_parent->tgid;
ev->event_data.fork.child_pid = task->pid;
ev->event_data.fork.child_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
/* If cn_netlink_send() failed, the data is not sent */
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
Here is the output from this kernel when the container program tracking
tasks is running. You can see the unaligned access messages along with
my debug statements for the case of fork. The TASK and EXIT messages are
from the container program running in debug mode in the background of the
same tty.
[root@minime1 ~]# /bin/true
dbg fork before timestamp_to_ns call
dbg fork after timespec_to_ns call, b4 memcpy
kernel unaligned access to 0xe000003076b6fbe4, ip=0xa0000001004f1480
dbg fork after memcpy, b4 other ev settings...
FORK parent 3389 (bash) child 3418 (bash)
kernel unaligned access to 0xe0000030732dfe24, ip=0xa0000001004f17c1
kernel unaligned access to 0xe0000030732dfe04, ip=0xa0000001004f1241
[root@minime1 ~]# EXIT pid 3418 (No-Info)
I had built this kernel with KDB and I checked the IPs.
ip 0xa0000001004f1480 is in proc_fork_connector, between the two
printk calls. That should make it the memcpy. That jives with the
console output too.
0xa0000001004f17c1 is proc_exec_connector
0xa0000001004f1241 is proc_exit_connector
I'm not able to read the disassembly well, but I've included the full
disassembly of proc_fork_connector below in case that's helpful.
I could try some memory dumps if needed. I really don't have lots of
experience fixing kernel unaligned access issues so if there are other
things I should try that are better than my original patch, I'm all ears.
vmlinux: file format elf64-ia64-little
Disassembly of section .text:
proc_fork_connector():
a0000001004f1280 <proc_fork_connector> [MMB] alloc r36=ar.pfs,9,6,0
a0000001004f1286 <proc_fork_connector+0x6> adds r12=-80,r12
a0000001004f128c <proc_fork_connector+0xc> nop.b 0x0
a0000001004f1290 <proc_fork_connector+0x10> [MMI] adds r20=3200,r13
a0000001004f1296 <proc_fork_connector+0x16> addl r2=-1981124,r1
a0000001004f129c <proc_fork_connector+0x1c> mov r35=b0;;
a0000001004f12a0 <proc_fork_connector+0x20> [MMI] ld4.acq r14=[r2]
a0000001004f12a6 <proc_fork_connector+0x26> adds r19=40,r20
a0000001004f12ac <proc_fork_connector+0x2c> adds r34=40,r12;;
a0000001004f12b0 <proc_fork_connector+0x30> [MIB] nop.m 0x0
a0000001004f12b6 <proc_fork_connector+0x36> cmp4.lt p7,p6=0,r14
a0000001004f12bc <proc_fork_connector+0x3c> (p06) br.cond.dpnt.few a0000001004f1570 <proc_fork_connector+0x2f0>
a0000001004f12c0 <proc_fork_connector+0x40> [MMI] ld4 r15=[r19];;
a0000001004f12c6 <proc_fork_connector+0x46> adds r8=1,r15
a0000001004f12cc <proc_fork_connector+0x4c> nop.i 0x0
a0000001004f12d0 <proc_fork_connector+0x50> [MMI] nop.m 0x0;;
a0000001004f12d6 <proc_fork_connector+0x56> st4 [r19]=r8
a0000001004f12dc <proc_fork_connector+0x5c> nop.i 0x0;;
a0000001004f12e0 <proc_fork_connector+0x60> [MMI] mov r3=-65496
a0000001004f12e6 <proc_fork_connector+0x66> mov r14=-35804
a0000001004f12ec <proc_fork_connector+0x6c> adds r40=48,r12
a0000001004f12f0 <proc_fork_connector+0x70> [MII] adds r30=20,r20
a0000001004f12f6 <proc_fork_connector+0x76> adds r28=64,r12;;
a0000001004f12fc <proc_fork_connector+0x7c> nop.i 0x0
a0000001004f1300 <proc_fork_connector+0x80> [MMI] ld8 r2=[r3]
a0000001004f1306 <proc_fork_connector+0x86> nop.m 0x0
a0000001004f130c <proc_fork_connector+0x8c> nop.i 0x0;;
a0000001004f1310 <proc_fork_connector+0x90> [MMI] add r31=r14,r2;;
a0000001004f1316 <proc_fork_connector+0x96> ld4 r38=[r31]
a0000001004f131c <proc_fork_connector+0x9c> nop.i 0x0;;
a0000001004f1320 <proc_fork_connector+0xa0> [MMI] adds r39=1,r38
a0000001004f1326 <proc_fork_connector+0xa6> st4 [r40]=r38
a0000001004f132c <proc_fork_connector+0xac> nop.i 0x0;;
a0000001004f1330 <proc_fork_connector+0xb0> [MMI] st4 [r31]=r39
a0000001004f1336 <proc_fork_connector+0xb6> ld4 r29=[r30]
a0000001004f133c <proc_fork_connector+0xbc> nop.i 0x0
a0000001004f1340 <proc_fork_connector+0xc0> [MMI] nop.m 0x0;;
a0000001004f1346 <proc_fork_connector+0xc6> st4 [r28]=r29
a0000001004f134c <proc_fork_connector+0xcc> nop.i 0x0;;
a0000001004f1350 <proc_fork_connector+0xd0> [MMI] ld4 r27=[r19];;
a0000001004f1356 <proc_fork_connector+0xd6> adds r26=-1,r27
a0000001004f135c <proc_fork_connector+0xdc> nop.i 0x0
a0000001004f1360 <proc_fork_connector+0xe0> [MMI] nop.m 0x0;;
a0000001004f1366 <proc_fork_connector+0xe6> st4 [r19]=r26
a0000001004f136c <proc_fork_connector+0xec> nop.i 0x0;;
a0000001004f1370 <proc_fork_connector+0xf0> [MMI] adds r25=16,r20
a0000001004f1376 <proc_fork_connector+0xf6> nop.m 0x0
a0000001004f137c <proc_fork_connector+0xfc> adds r33=24,r12;;
a0000001004f1380 <proc_fork_connector+0x100> [MII] ld4.acq r24=[r25]
a0000001004f1386 <proc_fork_connector+0x106> nop.i 0x0;;
a0000001004f138c <proc_fork_connector+0x10c> tbit.z p8,p9=r24,2
a0000001004f1390 <proc_fork_connector+0x110> [MMB] nop.m 0x0
a0000001004f1396 <proc_fork_connector+0x116> nop.m 0x0
a0000001004f139c <proc_fork_connector+0x11c> (p09) br.call.spnt.many b0=a0000001007a8ee0 <preempt_schedule>;;
a0000001004f13a0 <proc_fork_connector+0x120> [MIB] mov r38=r33
a0000001004f13a6 <proc_fork_connector+0x126> nop.i 0x0
a0000001004f13ac <proc_fork_connector+0x12c> br.call.sptk.many b0=a0000001000dbbe0 <ktime_get_ts>;;
a0000001004f13b0 <proc_fork_connector+0x130> [MMI] nop.m 0x0
a0000001004f13b6 <proc_fork_connector+0x136> addl r23=-2061232,r1
a0000001004f13bc <proc_fork_connector+0x13c> nop.i 0x0;;
a0000001004f13c0 <proc_fork_connector+0x140> [MIB] ld8 r38=[r23]
a0000001004f13c6 <proc_fork_connector+0x146> nop.i 0x0
a0000001004f13cc <proc_fork_connector+0x14c> br.call.sptk.many b0=a0000001000a36c0 <printk>;;
a0000001004f13d0 <proc_fork_connector+0x150> [MMI] addl r22=-2061224,r1
a0000001004f13d6 <proc_fork_connector+0x156> ld8 r17=[r33],8
a0000001004f13dc <proc_fork_connector+0x15c> nop.i 0x0
a0000001004f13e0 <proc_fork_connector+0x160> [MMI] adds r2=16,r12;;
a0000001004f13e6 <proc_fork_connector+0x166> nop.m 0x0
a0000001004f13ec <proc_fork_connector+0x16c> shl r19=r17,5
a0000001004f13f0 <proc_fork_connector+0x170> [MMI] ld8 r8=[r33]
a0000001004f13f6 <proc_fork_connector+0x176> ld8 r38=[r22]
a0000001004f13fc <proc_fork_connector+0x17c> adds r33=400,r32;;
a0000001004f1400 <proc_fork_connector+0x180> [MII] nop.m 0x0
a0000001004f1406 <proc_fork_connector+0x186> sub r21=r19,r17;;
a0000001004f140c <proc_fork_connector+0x18c> shl r18=r21,6;;
a0000001004f1410 <proc_fork_connector+0x190> [MII] nop.m 0x0
a0000001004f1416 <proc_fork_connector+0x196> sub r11=r18,r21;;
a0000001004f141c <proc_fork_connector+0x19c> shladd r10=r11,3,r17;;
a0000001004f1420 <proc_fork_connector+0x1a0> [MII] nop.m 0x0
a0000001004f1426 <proc_fork_connector+0x1a6> shladd r16=r10,2,r10;;
a0000001004f142c <proc_fork_connector+0x1ac> shladd r9=r16,2,r16;;
a0000001004f1430 <proc_fork_connector+0x1b0> [MII] nop.m 0x0
a0000001004f1436 <proc_fork_connector+0x1b6> shladd r15=r9,2,r9;;
a0000001004f143c <proc_fork_connector+0x1bc> shl r20=r15,9;;
a0000001004f1440 <proc_fork_connector+0x1c0> [MMI] nop.m 0x0
a0000001004f1446 <proc_fork_connector+0x1c6> add r3=r20,r8
a0000001004f144c <proc_fork_connector+0x1cc> nop.i 0x0;;
a0000001004f1450 <proc_fork_connector+0x1d0> [MIB] st8 [r2]=r3
a0000001004f1456 <proc_fork_connector+0x1d6> nop.i 0x0
a0000001004f145c <proc_fork_connector+0x1dc> br.call.sptk.many b0=a0000001000a36c0 <printk>;;
a0000001004f1460 <proc_fork_connector+0x1e0> [MMI] addl r38=-2061216,r1
a0000001004f1466 <proc_fork_connector+0x1e6> adds r14=16,r12
a0000001004f146c <proc_fork_connector+0x1ec> adds r39=68,r12;;
a0000001004f1470 <proc_fork_connector+0x1f0> [MMI] ld8 r40=[r14]
a0000001004f1476 <proc_fork_connector+0x1f6> ld8 r38=[r38]
a0000001004f147c <proc_fork_connector+0x1fc> nop.i 0x0;;
a0000001004f1480 <proc_fork_connector+0x200> [MIB] st8 [r39]=r40
a0000001004f1486 <proc_fork_connector+0x206> nop.i 0x0
a0000001004f148c <proc_fork_connector+0x20c> br.call.sptk.many b0=a0000001000a36c0 <printk>;;
a0000001004f1490 <proc_fork_connector+0x210> [MMI] addl r25=-1986756,r1
a0000001004f1496 <proc_fork_connector+0x216> ld8 r29=[r33]
a0000001004f149c <proc_fork_connector+0x21c> mov r31=1
a0000001004f14a0 <proc_fork_connector+0x220> [MMI] adds r30=60,r12
a0000001004f14a6 <proc_fork_connector+0x226> adds r27=392,r32
a0000001004f14ac <proc_fork_connector+0x22c> adds r28=388,r32;;
a0000001004f14b0 <proc_fork_connector+0x230> [MMI] st4 [r30]=r31
a0000001004f14b6 <proc_fork_connector+0x236> adds r26=388,r29
a0000001004f14bc <proc_fork_connector+0x23c> adds r23=76,r12
a0000001004f14c0 <proc_fork_connector+0x240> [MMI] adds r22=392,r29
a0000001004f14c6 <proc_fork_connector+0x246> adds r21=84,r12
a0000001004f14cc <proc_fork_connector+0x24c> adds r17=88,r12
a0000001004f14d0 <proc_fork_connector+0x250> [MMI] ld4 r20=[r25],4
a0000001004f14d6 <proc_fork_connector+0x256> ld4 r19=[r28]
a0000001004f14dc <proc_fork_connector+0x25c> adds r10=80,r12
a0000001004f14e0 <proc_fork_connector+0x260> [MMI] adds r9=44,r12
a0000001004f14e6 <proc_fork_connector+0x266> mov r15=32
a0000001004f14ec <proc_fork_connector+0x26c> adds r8=56,r12;;
a0000001004f14f0 <proc_fork_connector+0x270> [MMI] ld4 r24=[r26]
a0000001004f14f6 <proc_fork_connector+0x276> ld4 r18=[r27]
a0000001004f14fc <proc_fork_connector+0x27c> adds r3=52,r12
a0000001004f1500 <proc_fork_connector+0x280> [MMI] mov r38=r34
a0000001004f1506 <proc_fork_connector+0x286> mov r39=1
a0000001004f150c <proc_fork_connector+0x28c> mov r40=208
a0000001004f1510 <proc_fork_connector+0x290> [MII] ld4 r16=[r25]
a0000001004f1516 <proc_fork_connector+0x296> nop.i 0x0;;
a0000001004f151c <proc_fork_connector+0x29c> nop.i 0x0
a0000001004f1520 <proc_fork_connector+0x2a0> [MMI] st4 [r23]=r24
a0000001004f1526 <proc_fork_connector+0x2a6> ld4 r11=[r22]
a0000001004f152c <proc_fork_connector+0x2ac> nop.i 0x0
a0000001004f1530 <proc_fork_connector+0x2b0> [MMI] st4 [r21]=r19
a0000001004f1536 <proc_fork_connector+0x2b6> st4 [r17]=r18
a0000001004f153c <proc_fork_connector+0x2bc> nop.i 0x0;;
a0000001004f1540 <proc_fork_connector+0x2c0> [MMI] st4 [r10]=r11
a0000001004f1546 <proc_fork_connector+0x2c6> st4 [r9]=r16
a0000001004f154c <proc_fork_connector+0x2cc> nop.i 0x0
a0000001004f1550 <proc_fork_connector+0x2d0> [MMI] st4 [r34]=r20
a0000001004f1556 <proc_fork_connector+0x2d6> st2 [r8]=r15
a0000001004f155c <proc_fork_connector+0x2dc> nop.i 0x0
a0000001004f1560 <proc_fork_connector+0x2e0> [MIB] st4 [r3]=r0
a0000001004f1566 <proc_fork_connector+0x2e6> nop.i 0x0
a0000001004f156c <proc_fork_connector+0x2ec> br.call.sptk.many b0=a0000001004f02e0 <cn_netlink_send>;;
a0000001004f1570 <proc_fork_connector+0x2f0> [MII] nop.m 0x0
a0000001004f1576 <proc_fork_connector+0x2f6> mov.i ar.pfs=r36
a0000001004f157c <proc_fork_connector+0x2fc> mov b0=r35
a0000001004f1580 <proc_fork_connector+0x300> [MIB] nop.m 0x0
a0000001004f1586 <proc_fork_connector+0x306> adds r12=80,r12
a0000001004f158c <proc_fork_connector+0x30c> br.ret.sptk.many b0;;
a0000001004f1590 <proc_fork_connector+0x310> [MMI] nop.m 0x0
a0000001004f1596 <proc_fork_connector+0x316> nop.m 0x0
a0000001004f159c <proc_fork_connector+0x31c> nop.i 0x0
Disassembly of section .init.text:
Disassembly of section .data.page_aligned:
On Sat, 9 Dec 2006 15:09:13 -0600, Erik Jacobson <[email protected]> wrote:
> > Please try to declare u64 timestamp_ns, then copy it into the *ev
> > instead of copying whole *ev. This ought to fix the problem if
> > buffer[] ends aligned to 32 bits or better.
>
> So I took this suggestion for a spin and met with the same result.
> The unaligned access messages are still produced.
I see. And I see you went a few steps forward with dignosing it:
> dbg fork after timespec_to_ns call, b4 memcpy
> kernel unaligned access to 0xe000003076b6fbe4, ip=0xa0000001004f1480
> dbg fork after memcpy, b4 other ev settings...
> a0000001004f1470 <proc_fork_connector+0x1f0> [MMI] ld8 r40=[r14]
> a0000001004f1476 <proc_fork_connector+0x1f6> ld8 r38=[r38]
> a0000001004f147c <proc_fork_connector+0x1fc> nop.i 0x0;;
> a0000001004f1480 <proc_fork_connector+0x200> [MIB] st8 [r39]=r40
> a0000001004f1486 <proc_fork_connector+0x206> nop.i 0x0
> a0000001004f148c <proc_fork_connector+0x20c> br.call.sptk.many b0=a0000001000a36c0 <printk>;;
It seems rather strange that memcpy gets optimized this way. I could
not have foreseen it. Still, it was worth a try, even if putting 32
extra bytes on stack and running memcpy on them does not seem too
onerous, for a fork(). Thanks for doing it, and let's go with your
original patch then... if Matt Helsley does not mind.
Thank you,
-- Pete
P.S. This syntax seems to cry for being expressed in diff -u:
> ktime_get_ts(&ts); /* get high res monotonic timestamp */
> //ev->timestamp_ns = timespec_to_ns(&ts); removed
> printk("dbg fork before timestamp_to_ns call\n");
> timestamp_ns = timespec_to_ns(&ts); //added
> printk("dbg fork after timespec_to_ns call, b4 memcpy\n");
> memcpy(&ev->timestamp_ns, ×tamp_ns, sizeof(ev->timestamp_ns)); //added
> printk("dbg fork after memcpy, b4 other ev settings...\n");
On Sat, 2006-12-09 at 18:34 -0800, Pete Zaitcev wrote:
> On Sat, 9 Dec 2006 15:09:13 -0600, Erik Jacobson <[email protected]> wrote:
>
> > > Please try to declare u64 timestamp_ns, then copy it into the *ev
> > > instead of copying whole *ev. This ought to fix the problem if
> > > buffer[] ends aligned to 32 bits or better.
> >
> > So I took this suggestion for a spin and met with the same result.
> > The unaligned access messages are still produced.
>
> I see. And I see you went a few steps forward with dignosing it:
>
> > dbg fork after timespec_to_ns call, b4 memcpy
> > kernel unaligned access to 0xe000003076b6fbe4, ip=0xa0000001004f1480
> > dbg fork after memcpy, b4 other ev settings...
>
> > a0000001004f1470 <proc_fork_connector+0x1f0> [MMI] ld8 r40=[r14]
> > a0000001004f1476 <proc_fork_connector+0x1f6> ld8 r38=[r38]
> > a0000001004f147c <proc_fork_connector+0x1fc> nop.i 0x0;;
> > a0000001004f1480 <proc_fork_connector+0x200> [MIB] st8 [r39]=r40
> > a0000001004f1486 <proc_fork_connector+0x206> nop.i 0x0
> > a0000001004f148c <proc_fork_connector+0x20c> br.call.sptk.many b0=a0000001000a36c0 <printk>;;
I'm not very familiar with ia64 asm but it looks like its loading and
storying 8 bytes at a time for the memcpy().
> It seems rather strange that memcpy gets optimized this way. I could
> not have foreseen it. Still, it was worth a try, even if putting 32
> extra bytes on stack and running memcpy on them does not seem too
> onerous, for a fork(). Thanks for doing it, and let's go with your
> original patch then... if Matt Helsley does not mind.
OK, I'll ack the original.
> Thank you,
> -- Pete
I'm shocked memcpy() introduces 8-byte stores that violate architecture
alignment rules. Is there any chance this a bug in ia64's memcpy()
implementation? I've tried to read it but since I'm not familiar with
ia64 asm I can't make out significant parts of it in
arch/ia64/lib/memcpy.S.
<snip>
Cheers,
-Matt Helsley
On Thu, 2006-12-07 at 17:22 -0600, Erik Jacobson wrote:
> On ia64, the various functions that make up cn_proc.c cause kernel
> unaligned access errors.
>
> If you are using these, for example, to get notification about
> all tasks forking and exiting, you get multiple unaligned access errors
> per process.
>
> Here, we just adjust how the variables are declared and use memcopy to
> avoid the error messages.
>
> Signed-off-by: Erik Jacobson <[email protected]>
Acked-by: Matt Helsley <[email protected]>
> ---
>
> cn_proc.c | 94 +++++++++++++++++++++++++++++++-------------------------------
> 1 file changed, 47 insertions(+), 47 deletions(-)
> --- linux.orig/drivers/connector/cn_proc.c 2006-11-29 15:57:37.000000000 -0600
> +++ linux/drivers/connector/cn_proc.c 2006-12-07 16:50:03.195035791 -0600
> @@ -49,7 +49,7 @@
> void proc_fork_connector(struct task_struct *task)
> {
> struct cn_msg *msg;
> - struct proc_event *ev;
> + struct proc_event ev;
> __u8 buffer[CN_PROC_MSG_SIZE];
> struct timespec ts;
>
> @@ -57,19 +57,19 @@
> return;
>
> msg = (struct cn_msg*)buffer;
> - ev = (struct proc_event*)msg->data;
> - get_seq(&msg->seq, &ev->cpu);
> + get_seq(&msg->seq, &ev.cpu);
> ktime_get_ts(&ts); /* get high res monotonic timestamp */
> - ev->timestamp_ns = timespec_to_ns(&ts);
> - ev->what = PROC_EVENT_FORK;
> - ev->event_data.fork.parent_pid = task->real_parent->pid;
> - ev->event_data.fork.parent_tgid = task->real_parent->tgid;
> - ev->event_data.fork.child_pid = task->pid;
> - ev->event_data.fork.child_tgid = task->tgid;
> + ev.timestamp_ns = timespec_to_ns(&ts);
> + ev.what = PROC_EVENT_FORK;
> + ev.event_data.fork.parent_pid = task->real_parent->pid;
> + ev.event_data.fork.parent_tgid = task->real_parent->tgid;
> + ev.event_data.fork.child_pid = task->pid;
> + ev.event_data.fork.child_tgid = task->tgid;
>
> memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
> msg->ack = 0; /* not used */
> - msg->len = sizeof(*ev);
> + msg->len = sizeof(ev);
> + memcpy(msg->data, &ev, sizeof(ev));
> /* If cn_netlink_send() failed, the data is not sent */
> cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
> }
> @@ -77,7 +77,7 @@
> void proc_exec_connector(struct task_struct *task)
> {
> struct cn_msg *msg;
> - struct proc_event *ev;
> + struct proc_event ev;
> struct timespec ts;
> __u8 buffer[CN_PROC_MSG_SIZE];
>
> @@ -85,24 +85,24 @@
> return;
>
> msg = (struct cn_msg*)buffer;
> - ev = (struct proc_event*)msg->data;
> - get_seq(&msg->seq, &ev->cpu);
> + get_seq(&msg->seq, &ev.cpu);
> ktime_get_ts(&ts); /* get high res monotonic timestamp */
> - ev->timestamp_ns = timespec_to_ns(&ts);
> - ev->what = PROC_EVENT_EXEC;
> - ev->event_data.exec.process_pid = task->pid;
> - ev->event_data.exec.process_tgid = task->tgid;
> + ev.timestamp_ns = timespec_to_ns(&ts);
> + ev.what = PROC_EVENT_EXEC;
> + ev.event_data.exec.process_pid = task->pid;
> + ev.event_data.exec.process_tgid = task->tgid;
>
> memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
> msg->ack = 0; /* not used */
> - msg->len = sizeof(*ev);
> + msg->len = sizeof(ev);
> + memcpy(msg->data, &ev, sizeof(ev));
> cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
> }
>
> void proc_id_connector(struct task_struct *task, int which_id)
> {
> struct cn_msg *msg;
> - struct proc_event *ev;
> + struct proc_event ev;
> __u8 buffer[CN_PROC_MSG_SIZE];
> struct timespec ts;
>
> @@ -110,32 +110,32 @@
> return;
>
> msg = (struct cn_msg*)buffer;
> - ev = (struct proc_event*)msg->data;
> - ev->what = which_id;
> - ev->event_data.id.process_pid = task->pid;
> - ev->event_data.id.process_tgid = task->tgid;
> + ev.what = which_id;
> + ev.event_data.id.process_pid = task->pid;
> + ev.event_data.id.process_tgid = task->tgid;
> if (which_id == PROC_EVENT_UID) {
> - ev->event_data.id.r.ruid = task->uid;
> - ev->event_data.id.e.euid = task->euid;
> + ev.event_data.id.r.ruid = task->uid;
> + ev.event_data.id.e.euid = task->euid;
> } else if (which_id == PROC_EVENT_GID) {
> - ev->event_data.id.r.rgid = task->gid;
> - ev->event_data.id.e.egid = task->egid;
> + ev.event_data.id.r.rgid = task->gid;
> + ev.event_data.id.e.egid = task->egid;
> } else
> return;
> - get_seq(&msg->seq, &ev->cpu);
> + get_seq(&msg->seq, &ev.cpu);
> ktime_get_ts(&ts); /* get high res monotonic timestamp */
> - ev->timestamp_ns = timespec_to_ns(&ts);
> + ev.timestamp_ns = timespec_to_ns(&ts);
>
> memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
> msg->ack = 0; /* not used */
> - msg->len = sizeof(*ev);
> + msg->len = sizeof(ev);
> + memcpy(msg->data, &ev, sizeof(ev));
> cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
> }
>
> void proc_exit_connector(struct task_struct *task)
> {
> struct cn_msg *msg;
> - struct proc_event *ev;
> + struct proc_event ev;
> __u8 buffer[CN_PROC_MSG_SIZE];
> struct timespec ts;
>
> @@ -143,19 +143,19 @@
> return;
>
> msg = (struct cn_msg*)buffer;
> - ev = (struct proc_event*)msg->data;
> - get_seq(&msg->seq, &ev->cpu);
> + get_seq(&msg->seq, &ev.cpu);
> ktime_get_ts(&ts); /* get high res monotonic timestamp */
> - ev->timestamp_ns = timespec_to_ns(&ts);
> - ev->what = PROC_EVENT_EXIT;
> - ev->event_data.exit.process_pid = task->pid;
> - ev->event_data.exit.process_tgid = task->tgid;
> - ev->event_data.exit.exit_code = task->exit_code;
> - ev->event_data.exit.exit_signal = task->exit_signal;
> + ev.timestamp_ns = timespec_to_ns(&ts);
> + ev.what = PROC_EVENT_EXIT;
> + ev.event_data.exit.process_pid = task->pid;
> + ev.event_data.exit.process_tgid = task->tgid;
> + ev.event_data.exit.exit_code = task->exit_code;
> + ev.event_data.exit.exit_signal = task->exit_signal;
>
> memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
> msg->ack = 0; /* not used */
> - msg->len = sizeof(*ev);
> + msg->len = sizeof(ev);
> + memcpy(msg->data, &ev, sizeof(ev));
> cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
> }
>
> @@ -170,7 +170,7 @@
> static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
> {
> struct cn_msg *msg;
> - struct proc_event *ev;
> + struct proc_event ev;
> __u8 buffer[CN_PROC_MSG_SIZE];
> struct timespec ts;
>
> @@ -178,16 +178,16 @@
> return;
>
> msg = (struct cn_msg*)buffer;
> - ev = (struct proc_event*)msg->data;
> msg->seq = rcvd_seq;
> ktime_get_ts(&ts); /* get high res monotonic timestamp */
> - ev->timestamp_ns = timespec_to_ns(&ts);
> - ev->cpu = -1;
> - ev->what = PROC_EVENT_NONE;
> - ev->event_data.ack.err = err;
> + ev.timestamp_ns = timespec_to_ns(&ts);
> + ev.cpu = -1;
> + ev.what = PROC_EVENT_NONE;
> + ev.event_data.ack.err = err;
> memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
> msg->ack = rcvd_ack + 1;
> - msg->len = sizeof(*ev);
> + msg->len = sizeof(ev);
> + memcpy(msg->data, &ev, sizeof(ev));
> cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
> }
>
On Mon, 11 Dec 2006 15:52:47 -0800, Matt Helsley <[email protected]> wrote:
> I'm shocked memcpy() introduces 8-byte stores that violate architecture
> alignment rules. Is there any chance this a bug in ia64's memcpy()
> implementation? I've tried to read it but since I'm not familiar with
> ia64 asm I can't make out significant parts of it in
> arch/ia64/lib/memcpy.S.
The arch/ia64/lib/memcpy.S is probably fine, it must be gcc doing
an inline substitution of a well-known function.
A commenter on my blog mentioned seeing the same thing in the past.
(http://zaitcev.livejournal.com/107185.html?thread=128945#t128945)
It's possible that applying (void *) cast to the first argument of memcpy
would disrupt this optimization. But since we have a well understood
patch by Erik, which only adds a penalty of 32 bytes of stack waste
and 32 bytes of memcpy, I thought it best not to bother with heaping
workarounds.
-- Pete
From: Pete Zaitcev <[email protected]>
Date: Mon, 11 Dec 2006 17:29:07 -0800
> On Mon, 11 Dec 2006 15:52:47 -0800, Matt Helsley <[email protected]> wrote:
>
> > I'm shocked memcpy() introduces 8-byte stores that violate architecture
> > alignment rules. Is there any chance this a bug in ia64's memcpy()
> > implementation? I've tried to read it but since I'm not familiar with
> > ia64 asm I can't make out significant parts of it in
> > arch/ia64/lib/memcpy.S.
>
> The arch/ia64/lib/memcpy.S is probably fine, it must be gcc doing
> an inline substitution of a well-known function.
>
> A commenter on my blog mentioned seeing the same thing in the past.
> (http://zaitcev.livejournal.com/107185.html?thread=128945#t128945)
>
> It's possible that applying (void *) cast to the first argument of memcpy
> would disrupt this optimization. But since we have a well understood
> patch by Erik, which only adds a penalty of 32 bytes of stack waste
> and 32 bytes of memcpy, I thought it best not to bother with heaping
> workarounds.
Yes GCC can assume the object is aligned because of the type
of the argument to memcpy().
I tried myself some games with adding a "packed" attribute to the
pointer declaration (trying to tell it that "the thing pointed to"
might be unaligned), but to no avail.
Pete Zaitcev wrote on Monday, December 11, 2006 5:29 PM
> On Mon, 11 Dec 2006 15:52:47 -0800, Matt Helsley <[email protected]> wrote:
>
> > I'm shocked memcpy() introduces 8-byte stores that violate architecture
> > alignment rules. Is there any chance this a bug in ia64's memcpy()
> > implementation? I've tried to read it but since I'm not familiar with
> > ia64 asm I can't make out significant parts of it in
> > arch/ia64/lib/memcpy.S.
>
> The arch/ia64/lib/memcpy.S is probably fine, it must be gcc doing
> an inline substitution of a well-known function.
arch/ia64/lib/memcpy.S is fine because it does alignment check at the very
beginning of the function and depends on the alignment of dst/src alignment,
it does different thing. The unaligned access is coming from gcc inlined
version of memcpy.
But looking deeply, memory allocation for proc_event in proc_for_connector
doesn't looked correct at all:
In drivers/connector/cn_proc.c:
#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event))
void proc_fork_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE];
You can't do that because gcc assumes struct proc_event aligns on certain
boundary. Doing fancy hand crafting like that breaks code generated by gcc.
- Ken
On Mon, 2006-12-11 at 17:50 -0800, David Miller wrote:
> From: Pete Zaitcev <[email protected]>
> Date: Mon, 11 Dec 2006 17:29:07 -0800
>
> > On Mon, 11 Dec 2006 15:52:47 -0800, Matt Helsley <[email protected]> wrote:
> >
> > > I'm shocked memcpy() introduces 8-byte stores that violate architecture
> > > alignment rules. Is there any chance this a bug in ia64's memcpy()
> > > implementation? I've tried to read it but since I'm not familiar with
> > > ia64 asm I can't make out significant parts of it in
> > > arch/ia64/lib/memcpy.S.
> >
> > The arch/ia64/lib/memcpy.S is probably fine, it must be gcc doing
> > an inline substitution of a well-known function.
> >
> > A commenter on my blog mentioned seeing the same thing in the past.
> > (http://zaitcev.livejournal.com/107185.html?thread=128945#t128945)
> >
> > It's possible that applying (void *) cast to the first argument of memcpy
> > would disrupt this optimization. But since we have a well understood
> > patch by Erik, which only adds a penalty of 32 bytes of stack waste
> > and 32 bytes of memcpy, I thought it best not to bother with heaping
> > workarounds.
>
> Yes GCC can assume the object is aligned because of the type
> of the argument to memcpy().
Hmm, that GCC assumption conflicts with the prototypes of memcpy() I've
seen.
Does the code really check the type or just the size argument? If the
latter then I don't think assuming alignment is correct -- we could be
copying a non-nul-terminated string that happens to be a power of 2 in
length.
Cheers,
-Matt Helsley
From: Matt Helsley <[email protected]>
Date: Mon, 11 Dec 2006 19:09:16 -0800
> Hmm, that GCC assumption conflicts with the prototypes of memcpy() I've
> seen.
When GCC expands __builtin_memcpy() internally it looks at the types
of the arguments, and what it knows about their guarenteed alignment.
memcpy()'s declaration of the first argument as "void *" has
zero influence upon any of this.
Hi Andrew.
There was some discussion on this patch but I believe we've agreed
on the first version I sent. This was ACKed by Matt Helsley.
Would you consider taking this in to -mm?
I've included my original patch email at the bottom.
On Mon, Dec 11, 2006 at 03:52:46PM -0800, Matt Helsley wrote:
(snipped up)
> On Thu, 2006-12-07 at 17:22 -0600, Erik Jacobson wrote:
> > Signed-off-by: Erik Jacobson <[email protected]>
> Acked-by: Matt Helsley <[email protected]>
> > ---
> > cn_proc.c | 94 +++++++++++++++++++++++++++++++-------------------------------
Original patch email:
Date: Thu, 7 Dec 2006 17:22:13 -0600
From: Erik Jacobson <[email protected]>
To: [email protected]
Cc: [email protected], [email protected]
Subject: [PATCH] connector: Some fixes for ia64 unaligned access errors
On ia64, the various functions that make up cn_proc.c cause kernel
unaligned access errors.
If you are using these, for example, to get notification about
all tasks forking and exiting, you get multiple unaligned access errors
per process.
Here, we just adjust how the variables are declared and use memcopy to
avoid the error messages.
Signed-off-by: Erik Jacobson <[email protected]>
---
cn_proc.c | 94 +++++++++++++++++++++++++++++++-------------------------------
1 file changed, 47 insertions(+), 47 deletions(-)
--- linux.orig/drivers/connector/cn_proc.c 2006-11-29 15:57:37.000000000 -0600
+++ linux/drivers/connector/cn_proc.c 2006-12-07 16:50:03.195035791 -0600
@@ -49,7 +49,7 @@
void proc_fork_connector(struct task_struct *task)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
@@ -57,19 +57,19 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
- get_seq(&msg->seq, &ev->cpu);
+ get_seq(&msg->seq, &ev.cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
- ev->what = PROC_EVENT_FORK;
- ev->event_data.fork.parent_pid = task->real_parent->pid;
- ev->event_data.fork.parent_tgid = task->real_parent->tgid;
- ev->event_data.fork.child_pid = task->pid;
- ev->event_data.fork.child_tgid = task->tgid;
+ ev.timestamp_ns = timespec_to_ns(&ts);
+ ev.what = PROC_EVENT_FORK;
+ ev.event_data.fork.parent_pid = task->real_parent->pid;
+ ev.event_data.fork.parent_tgid = task->real_parent->tgid;
+ ev.event_data.fork.child_pid = task->pid;
+ ev.event_data.fork.child_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
/* If cn_netlink_send() failed, the data is not sent */
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
@@ -77,7 +77,7 @@
void proc_exec_connector(struct task_struct *task)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
struct timespec ts;
__u8 buffer[CN_PROC_MSG_SIZE];
@@ -85,24 +85,24 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
- get_seq(&msg->seq, &ev->cpu);
+ get_seq(&msg->seq, &ev.cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
- ev->what = PROC_EVENT_EXEC;
- ev->event_data.exec.process_pid = task->pid;
- ev->event_data.exec.process_tgid = task->tgid;
+ ev.timestamp_ns = timespec_to_ns(&ts);
+ ev.what = PROC_EVENT_EXEC;
+ ev.event_data.exec.process_pid = task->pid;
+ ev.event_data.exec.process_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
void proc_id_connector(struct task_struct *task, int which_id)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
@@ -110,32 +110,32 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
- ev->what = which_id;
- ev->event_data.id.process_pid = task->pid;
- ev->event_data.id.process_tgid = task->tgid;
+ ev.what = which_id;
+ ev.event_data.id.process_pid = task->pid;
+ ev.event_data.id.process_tgid = task->tgid;
if (which_id == PROC_EVENT_UID) {
- ev->event_data.id.r.ruid = task->uid;
- ev->event_data.id.e.euid = task->euid;
+ ev.event_data.id.r.ruid = task->uid;
+ ev.event_data.id.e.euid = task->euid;
} else if (which_id == PROC_EVENT_GID) {
- ev->event_data.id.r.rgid = task->gid;
- ev->event_data.id.e.egid = task->egid;
+ ev.event_data.id.r.rgid = task->gid;
+ ev.event_data.id.e.egid = task->egid;
} else
return;
- get_seq(&msg->seq, &ev->cpu);
+ get_seq(&msg->seq, &ev.cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ ev.timestamp_ns = timespec_to_ns(&ts);
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
void proc_exit_connector(struct task_struct *task)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
@@ -143,19 +143,19 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
- get_seq(&msg->seq, &ev->cpu);
+ get_seq(&msg->seq, &ev.cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
- ev->what = PROC_EVENT_EXIT;
- ev->event_data.exit.process_pid = task->pid;
- ev->event_data.exit.process_tgid = task->tgid;
- ev->event_data.exit.exit_code = task->exit_code;
- ev->event_data.exit.exit_signal = task->exit_signal;
+ ev.timestamp_ns = timespec_to_ns(&ts);
+ ev.what = PROC_EVENT_EXIT;
+ ev.event_data.exit.process_pid = task->pid;
+ ev.event_data.exit.process_tgid = task->tgid;
+ ev.event_data.exit.exit_code = task->exit_code;
+ ev.event_data.exit.exit_signal = task->exit_signal;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
@@ -170,7 +170,7 @@
static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
{
struct cn_msg *msg;
- struct proc_event *ev;
+ struct proc_event ev;
__u8 buffer[CN_PROC_MSG_SIZE];
struct timespec ts;
@@ -178,16 +178,16 @@
return;
msg = (struct cn_msg*)buffer;
- ev = (struct proc_event*)msg->data;
msg->seq = rcvd_seq;
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
- ev->cpu = -1;
- ev->what = PROC_EVENT_NONE;
- ev->event_data.ack.err = err;
+ ev.timestamp_ns = timespec_to_ns(&ts);
+ ev.cpu = -1;
+ ev.what = PROC_EVENT_NONE;
+ ev.event_data.ack.err = err;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = rcvd_ack + 1;
- msg->len = sizeof(*ev);
+ msg->len = sizeof(ev);
+ memcpy(msg->data, &ev, sizeof(ev));
cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
}
On Tue, 12 Dec 2006 11:54:11 -0600
Erik Jacobson <[email protected]> wrote:
> Hi Andrew.
>
> There was some discussion on this patch but I believe we've agreed
> on the first version I sent. This was ACKed by Matt Helsley.
>
> Would you consider taking this in to -mm?
>
> I've included my original patch email at the bottom.
>
I'm a bit late to the party.
> cn_proc.c | 94 +++++++++++++++++++++++++++++++-------------------------------
But it's rather a lot of churn for such a thing. Did you consider simply using
put_unaligned() against the specific offending field(s)?
On Tue, 12 Dec 2006 20:31:32 -0600
Erik Jacobson <[email protected]> wrote:
> > But it's rather a lot of churn for such a thing. Did you consider simply using
> > put_unaligned() against the specific offending field(s)?
>
> Hi. This was not considered.
>
> I wanted to give you some quick feedback, so I tried your suggestion in the
> fork path. It seemed to fix the problem as well.
OK.
> put_unaligned(timespec_to_ns(&ts), (__u64 *) &ev->timestamp_ns);
>
> Is what I tried.
>
> I'm on vacation tomorrow but on Thursday, if you like, I can whip up
> a patch that does this and test it more thoroughly. Is this the
> direction you prefer? What I did just now was really quick and dirty
> to see if it has a shot or not but it looks like put_unaligned will
> fix it too.
>
Well it's a one-liner and it makes it very clear what's going on. So
unless there's some undiscovered downside, yes, I think it's a good way to
go. It'll be an easier patch for the -stable guys to swallow too.
There's no particular hurry on it.
> But it's rather a lot of churn for such a thing. Did you consider simply using
> put_unaligned() against the specific offending field(s)?
Hi. This was not considered.
I wanted to give you some quick feedback, so I tried your suggestion in the
fork path. It seemed to fix the problem as well.
put_unaligned(timespec_to_ns(&ts), (__u64 *) &ev->timestamp_ns);
Is what I tried.
I'm on vacation tomorrow but on Thursday, if you like, I can whip up
a patch that does this and test it more thoroughly. Is this the
direction you prefer? What I did just now was really quick and dirty
to see if it has a shot or not but it looks like put_unaligned will
fix it too.
--
Erik Jacobson - Linux System Software - SGI - Eagan, Minnesota
Hi. I didn't want to leave this hanging and it stayed in my head so I
thought I'd better just finish it and test it.
I tried out this patch and it got rid of all three unaligned acces errors
I was seeing with process connectors and the patch is indeed much smaller.
I ran our container daemon program in debug mode to be sure the forks
and exits still worked right when the patch was applied and all seemed
well.
I applied this patch to x86_64 as well as a sanity check and it seems
working fine.
Things look good to me. I'm ok if you prefer this patch, or the one
already in -mm.
Signed-off-by: Erik Jacobson <[email protected]>
---
cn_proc.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
--- linux.orig/drivers/connector/cn_proc.c 2006-12-12 23:03:31.000000000 -0600
+++ linux/drivers/connector/cn_proc.c 2006-12-12 23:06:34.243535000 -0600
@@ -28,6 +28,7 @@
#include <linux/init.h>
#include <linux/connector.h>
#include <asm/atomic.h>
+#include <asm/unaligned.h>
#include <linux/cn_proc.h>
@@ -60,7 +61,7 @@
ev = (struct proc_event*)msg->data;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *) &ev->timestamp_ns);
ev->what = PROC_EVENT_FORK;
ev->event_data.fork.parent_pid = task->real_parent->pid;
ev->event_data.fork.parent_tgid = task->real_parent->tgid;
@@ -88,7 +89,7 @@
ev = (struct proc_event*)msg->data;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *) &ev->timestamp_ns);
ev->what = PROC_EVENT_EXEC;
ev->event_data.exec.process_pid = task->pid;
ev->event_data.exec.process_tgid = task->tgid;
@@ -124,7 +125,7 @@
return;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *) &ev->timestamp_ns);
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
@@ -146,7 +147,7 @@
ev = (struct proc_event*)msg->data;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *) &ev->timestamp_ns);
ev->what = PROC_EVENT_EXIT;
ev->event_data.exit.process_pid = task->pid;
ev->event_data.exit.process_tgid = task->tgid;
@@ -181,7 +182,7 @@
ev = (struct proc_event*)msg->data;
msg->seq = rcvd_seq;
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *) &ev->timestamp_ns);
ev->cpu = -1;
ev->what = PROC_EVENT_NONE;
ev->event_data.ack.err = err;