Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id ; Fri, 10 Jan 2003 13:05:58 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id ; Fri, 10 Jan 2003 13:05:58 -0500 Received: from 216-239-45-4.google.com ([216.239.45.4]:21190 "EHLO 216-239-45-4.google.com") by vger.kernel.org with ESMTP id ; Fri, 10 Jan 2003 13:04:47 -0500 Message-ID: <3E1F0CF5.4000304@google.com> Date: Fri, 10 Jan 2003 10:12:05 -0800 From: Ross Biro User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.0.1) Gecko/20020826 X-Accept-Language: en-us, en MIME-Version: 1.0 To: Alan Cox , Andre Hedrick , Marcelo Tosatti CC: Linux Kernel Mailing List Subject: PATCH: [2.4.21-pre3] Fix for SMP race condition in IDE code References: <1042207998.28469.98.camel@irongate.swansea.linux.org.uk> <20030110164834.GM843@suse.de> <1042222339.32175.3.camel@irongate.swansea.linux.org.uk> Content-Type: text/plain; charset=us-ascii; format=flowed Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14482 Lines: 380 There is a race condition in all versions of the IDE code that I've looked at including 2.4.18 and 2.4.21-pre3. Basically on an SMP system if mutiple IDE channels are on the same interrupt and 1 channel sends has an interrupt pending on 1 processor while the other processor is calling ide_set_handler, then the interrupt can be mistaken for command completion on both channels, and a command can be completed before it is even issued. This problem can be triggered with the following code cd /proc/ide (while true; do for i in hd[a-z]; do for j in 1 2 3 ; do cat $i/smart_values >/dev/null; done; done; done) & (while true; do for i in hd[a-z]; do dd if=/dev/$i of=/dev/null bs=4096k skip=0 & done; wait; done) & And can be seen by properly instrumenting drive_cmd_intr to check for errors. On a dual proc machine with 4 channels on a single interrupt and 2.4.18 I expec to see an error about once every twenty minutes with the above code. I believe it should occur much less often on 2.4.20 and above. Drives react to this problem in different ways. Often they simply lock up and refuse to talk to the host until they have been properly reset. Some drives require a power cycle before they will work properly again. This problem required the use of over 200 machines, approximately 2000 drives, a bus analyzer, and a lot of cooperation from a couple of drive manufacturers to go from "something goes wrong once in a while" to something we could easily reproduce. This patch has only been minimally tested and then only with 1 brand of ide hard drive. ----- snip ------ diff -durbB linux-2.4.20/drivers/ide/ide-cd.c linux-2.4.20-p1/drivers/ide/ide-cd.c --- linux-2.4.20/drivers/ide/ide-cd.c Thu Jan 9 11:14:01 2003 +++ linux-2.4.20-p1/drivers/ide/ide-cd.c Wed Jan 8 16:25:04 2003 @@ -863,11 +864,15 @@ HWIF(drive)->OUTB(drive->ctl, IDE_CONTROL_REG); if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) { + unsigned long flags; if (HWGROUP(drive)->handler != NULL) BUG(); - ide_set_handler (drive, handler, WAIT_CMD, cdrom_timer_expiry); + spin_lock_irqsave(&io_request_lock, flags); /* packet command */ HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG); + ide_set_handler_nolock (drive, handler, WAIT_CMD, cdrom_timer_expiry); + ide_delay_400ns(); + spin_unlock_irqrestore(&io_request_lock, flags); return ide_started; } else { /* packet command */ diff -durbB linux-2.4.20/drivers/ide/ide-disk.c linux-2.4.20-p1/drivers/ide/ide-disk.c --- linux-2.4.20/drivers/ide/ide-disk.c Thu Jan 9 11:14:01 2003 +++ linux-2.4.20-p1/drivers/ide/ide-disk.c Wed Jan 8 16:25:17 2003 @@ -467,12 +467,15 @@ #endif /* CONFIG_BLK_DEV_IDEDMA */ if (HWGROUP(drive)->handler != NULL) BUG(); - ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); command = ((drive->mult_count) ? ((lba48) ? WIN_MULTREAD_EXT : WIN_MULTREAD) : ((lba48) ? WIN_READ_EXT : WIN_READ)); + spin_lock_irqsave(&io_request_lock, flags); hwif->OUTB(command, IDE_COMMAND_REG); + ide_set_handler_nolock(drive, &read_intr, WAIT_CMD, NULL); + ide_delay_400ns(); + spin_unlock_irqrestore(&io_request_lock, flags); return ide_started; } else if (rq_data_dir(rq) == WRITE) { ide_startstop_t startstop; diff -durbB linux-2.4.20/drivers/ide/ide-dma.c linux-2.4.20-p1/drivers/ide/ide-dma.c --- linux-2.4.20/drivers/ide/ide-dma.c Thu Jan 9 11:14:01 2003 +++ linux-2.4.20-p1/drivers/ide/ide-dma.c Thu Jan 9 15:32:09 2003 @@ -655,6 +655,7 @@ unsigned int count = 0; u8 dma_stat = 0, lba48 = (drive->addressing == 1) ? 1 : 0; task_ioreg_t command = WIN_NOP; + unsigned long flags; if (!(count = ide_build_dmatable(drive, rq, PCI_DMA_FROMDEVICE))) /* try PIO instead of DMA */ @@ -673,7 +674,6 @@ /* paranoia check */ if (HWGROUP(drive)->handler != NULL) BUG(); - ide_set_handler(drive, &ide_dma_intr, 2*WAIT_CMD, dma_timer_expiry); /* * FIX ME to use only ACB ide_task_t args Struct @@ -691,7 +691,11 @@ } #endif /* issue cmd to drive */ + spin_lock_irqsave(&io_request_lock, flags); hwif->OUTB(command, IDE_COMMAND_REG); + ide_set_handler_nolock(drive, &ide_dma_intr, 2*WAIT_CMD, dma_timer_expiry); + ide_delay_400ns(); + spin_unlock_irqrestore(&io_request_lock, flags); return HWIF(drive)->ide_dma_count(drive); } @@ -707,6 +711,7 @@ unsigned int count = 0; u8 dma_stat = 0, lba48 = (drive->addressing == 1) ? 1 : 0; task_ioreg_t command = WIN_NOP; + unsigned long flags; if (!(count = ide_build_dmatable(drive, rq, PCI_DMA_TODEVICE))) /* try PIO instead of DMA */ @@ -725,7 +730,6 @@ /* paranoia check */ if (HWGROUP(drive)->handler != NULL) BUG(); - ide_set_handler(drive, &ide_dma_intr, 2*WAIT_CMD, dma_timer_expiry); /* * FIX ME to use only ACB ide_task_t args Struct */ @@ -742,7 +746,13 @@ } #endif /* issue cmd to drive */ + spin_lock_irqsave(&io_request_lock, flags); hwif->OUTB(command, IDE_COMMAND_REG); + ide_set_handler_nolock(drive, &ide_dma_intr, + 2*WAIT_CMD, dma_timer_expiry); + ide_delay_400ns(); + spin_unlock_irqrestore(&io_request_lock, flags); + return HWIF(drive)->ide_dma_count(drive); } diff -durbB linux-2.4.20/drivers/ide/ide-floppy.c linux-2.4.20-p1/drivers/ide/ide-floppy.c --- linux-2.4.20/drivers/ide/ide-floppy.c Thu Jan 9 11:14:01 2003 +++ linux-2.4.20-p1/drivers/ide/ide-floppy.c Wed Jan 8 16:15:17 2003 @@ -1123,14 +1123,17 @@ } if (test_bit(IDEFLOPPY_DRQ_INTERRUPT, &floppy->flags)) { + unsigned long flags; if (HWGROUP(drive)->handler != NULL) BUG(); + /* Issue the packet command */ + spin_lock_irqsave(&io_request_lock, flags); + HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG); ide_set_handler(drive, pkt_xfer_routine, IDEFLOPPY_WAIT_CMD, NULL); - /* Issue the packet command */ - HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG); + spin_unlock_irqrestore(&io_request_lock, flags); return ide_started; } else { /* Issue the packet command */ diff -durbB linux-2.4.20/drivers/ide/ide-io.c linux-2.4.20-p1/drivers/ide/ide-io.c --- linux-2.4.20/drivers/ide/ide-io.c Thu Jan 9 11:14:01 2003 +++ linux-2.4.20-p1/drivers/ide/ide-io.c Wed Jan 8 16:25:37 2003 @@ -363,14 +363,21 @@ void ide_cmd (ide_drive_t *drive, u8 cmd, u8 nsect, ide_handler_t *handler) { ide_hwif_t *hwif = HWIF(drive); + unsigned long flags; + if (HWGROUP(drive)->handler != NULL) BUG(); - ide_set_handler(drive, handler, WAIT_CMD, NULL); if (IDE_CONTROL_REG) hwif->OUTB(drive->ctl,IDE_CONTROL_REG); /* clear nIEN */ SELECT_MASK(drive,0); hwif->OUTB(nsect,IDE_NSECTOR_REG); + + spin_lock_irqsave(&io_request_lock, flags); hwif->OUTB(cmd,IDE_COMMAND_REG); + ide_set_handler_nolock(drive, handler, WAIT_CMD, NULL); + ide_delay_400ns(); + spin_unlock_irqrestore(&io_request_lock, flags); + } EXPORT_SYMBOL(ide_cmd); diff -durbB linux-2.4.20/drivers/ide/ide-iops.c linux-2.4.20-p1/drivers/ide/ide-iops.c --- linux-2.4.20/drivers/ide/ide-iops.c Thu Jan 9 11:14:01 2003 +++ linux-2.4.20-p1/drivers/ide/ide-iops.c Wed Jan 8 15:54:18 2003 @@ -908,13 +908,14 @@ * timer is started to prevent us from waiting forever in case * something goes wrong (see the ide_timer_expiry() handler later on). */ -void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, + +/* This version doesn't get the spinlock, so you must call it with a spinlock + on io_request_lock. */ +void ide_set_handler_nolock (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry) { - unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); if (hwgroup->handler != NULL) { printk("%s: ide_set_handler: handler not null; " "old=%p, new=%p\n", @@ -924,6 +925,15 @@ hwgroup->expiry = expiry; hwgroup->timer.expires = jiffies + timeout; add_timer(&hwgroup->timer); +} + +/* This version grabs and releases the io_request_lock, so must be called + with out the spinlock grabbed. */ +void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, + unsigned int timeout, ide_expiry_t *expiry) { + unsigned long flags; + spin_lock_irqsave(&io_request_lock, flags); + ide_set_handler_nolock(drive, handler, timeout, expiry); spin_unlock_irqrestore(&io_request_lock, flags); } diff -durbB linux-2.4.20/drivers/ide/ide-tape.c linux-2.4.20-p1/drivers/ide/ide-tape.c --- linux-2.4.20/drivers/ide/ide-tape.c Thu Jan 9 11:14:01 2003 +++ linux-2.4.20-p1/drivers/ide/ide-tape.c Wed Jan 8 16:20:09 2003 @@ -2457,13 +2457,17 @@ set_bit(PC_DMA_IN_PROGRESS, &pc->flags); #endif /* CONFIG_BLK_DEV_IDEDMA */ if (test_bit(IDETAPE_DRQ_INTERRUPT, &tape->flags)) { + unsigned long flags; if (HWGROUP(drive)->handler != NULL) BUG(); - ide_set_handler(drive, + + spin_lock_irqsave(&io_request_lock, flags); + HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG); + ide_set_handler_nolock(drive, &idetape_transfer_pc, IDETAPE_WAIT_CMD, NULL); - HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG); + spin_unlock_irqrestore(&io_request_lock, flags); return ide_started; } else { HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG); diff -durbB linux-2.4.20/drivers/ide/ide-taskfile.c linux-2.4.20-p1/drivers/ide/ide-taskfile.c --- linux-2.4.20/drivers/ide/ide-taskfile.c Thu Jan 9 11:14:01 2003 +++ linux-2.4.20-p1/drivers/ide/ide-taskfile.c Wed Jan 8 16:25:43 2003 @@ -173,6 +173,7 @@ task_struct_t *taskfile = (task_struct_t *) task->tfRegister; hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister; u8 HIHI = (drive->addressing == 1) ? 0xE0 : 0xEF; + unsigned long flags; #ifdef CONFIG_IDE_TASK_IOCTL_DEBUG void debug_taskfile(drive, task); @@ -201,8 +202,14 @@ hwif->OUTB((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG); if (task->handler != NULL) { - ide_set_handler(drive, task->handler, WAIT_WORSTCASE, NULL); + spin_lock_irqsave(&io_request_lock, flags); hwif->OUTB(taskfile->command, IDE_COMMAND_REG); + /* We need to give the drive time to set the busy + flag, or we may mistake an interrupt from another drive + for the command completion on this drive. */ + ide_set_handler_nolock(drive, task->handler, WAIT_WORSTCASE, NULL); + ide_delay_400ns(); + spin_unlock_irqrestore(&io_request_lock, flags); if (task->prehandler != NULL) return task->prehandler(drive, task->rq); return ide_started; @@ -1832,6 +1839,7 @@ ide_hwif_t *hwif = HWIF(drive); task_struct_t *taskfile = (task_struct_t *) task->tfRegister; hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister; + unsigned long flags; #if DEBUG_TASKFILE u8 status; #endif @@ -1929,9 +1937,13 @@ if (task->handler == NULL) return ide_stopped; - ide_set_handler(drive, task->handler, WAIT_WORSTCASE, NULL); + /* Issue the command */ + spin_lock_irqsave(&io_request_lock, flags); hwif->OUTB(taskfile->command, IDE_COMMAND_REG); + ide_set_handler_nolock(drive, task->handler, + WAIT_WORSTCASE, NULL); + spin_unlock_irqrestore(&io_request_lock, flags); if (task->prehandler != NULL) return task->prehandler(drive, HWGROUP(drive)->rq); } diff -durbB linux-2.4.20/include/asm-i386/ide.h linux-2.4.20-p1/include/asm-i386/ide.h --- linux-2.4.20/include/asm-i386/ide.h Thu Jan 9 11:17:05 2003 +++ linux-2.4.20-p1/include/asm-i386/ide.h Fri Jan 10 09:54:07 2003 @@ -14,6 +14,7 @@ #ifdef __KERNEL__ #include +#include #ifndef MAX_HWIFS # ifdef CONFIG_BLK_DEV_IDEPCI @@ -22,6 +23,16 @@ #define MAX_HWIFS 6 # endif #endif + + + +/* The ATA spec requires 400ns delays all over the place. */ +/* Do the same fixed point trick the udelay does to get our delay. */ +#define IDE_DELAY_400NS +static __inline__ void ide_delay_400ns(void) +{ + __const_udelay (400 * 4); +} static __inline__ int ide_default_irq(ide_ioreg_t base) { diff -durbB linux-2.4.20/include/linux/ide.h linux-2.4.20-p1/include/linux/ide.h --- linux-2.4.20/include/linux/ide.h Thu Jan 9 11:17:05 2003 +++ linux-2.4.20-p1/include/linux/ide.h Thu Jan 9 15:37:22 2003 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -354,6 +355,11 @@ #include +#ifndef IDE_DELAY_400NS +#define IDE_DELAY_400NS +static inline void ide_delay_400ns(void) { udelay(1); } +#endif + /* Currently only m68k, apus and m8xx need it */ #ifdef IDE_ARCH_ACK_INTR extern int ide_irq_lock; @@ -1282,6 +1288,7 @@ * and also to start the safety timer. */ extern void ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int, ide_expiry_t *); +extern void ide_set_handler_nolock(ide_drive_t *, ide_handler_t *, unsigned int, ide_expiry_t *); /* * Error reporting, in human readable form (luxurious, but a memory hog). - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/