2010-02-26 21:57:59

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 0/9] cciss driver SCSI updates

The following series mainly fixes the scsi tape code in the cciss
driver to take advantage of more scatter gather elements, enabling
larger transfer sizes. The first eight patches are mainly cleaning
things up and getting ready for the 9th patch, which enables the
SCSI half of the driver to use more than 31 scatter gather elements.

I've tested on 32-bit and 64-bit systems, and written out tapes
with block sizes up to 2Mb.

These patches are vs. Jens's linux-2.6-block tree.

Stephen M. Cameron (9):
cciss: clarify command list padding calculation
cciss: detect bad alignment of scsi commands at build time
cciss: factor out scatter gather chain block allocation and freeing
cciss: simplify scatter gather code
cciss: fix scatter gather chain block dma direction kludge
cciss: factor out scatter gather chain block mapping code
cciss: do not use void pointer for scsi hba data
cciss: eliminate unnecessary pointer use in cciss scsi code
cciss: Fix problem with scatter gather elements in the scsi half of the driver


drivers/block/cciss.c | 174 ++++++++++++++++++++++----------------------
drivers/block/cciss.h | 12 +--
drivers/block/cciss_cmd.h | 14 +++-
drivers/block/cciss_scsi.c | 145 +++++++++++++++++++++++--------------
4 files changed, 188 insertions(+), 157 deletions(-)

--
-- steve


2010-02-26 21:58:11

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 2/9] cciss: detect bad alignment of scsi commands at build time

From: Stephen M. Cameron <[email protected]>

cciss: detect bad alignment of scsi commands at build time
Incidentally fix some nearby c++ style comments.

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss.c | 2 +-
drivers/block/cciss_cmd.h | 9 ++++++---
drivers/block/cciss_scsi.c | 12 ++++++++----
3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a29e694..cd8c7c2 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -4496,7 +4496,7 @@ static int __init cciss_init(void)
* boundary. Given that we use pci_alloc_consistent() to allocate an
* array of them, the size must be a multiple of 8 bytes.
*/
- BUILD_BUG_ON(sizeof(CommandList_struct) % 8);
+ BUILD_BUG_ON(sizeof(CommandList_struct) % COMMANDLIST_ALIGNMENT);

printk(KERN_INFO DRIVER_NAME "\n");

diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 515c9f0..e624ff9 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -167,10 +167,13 @@ typedef struct _SGDescriptor_struct {
#define CMD_MSG_TIMEOUT 0x05
#define CMD_MSG_STALE 0xff

-/* This structure needs to be divisible by 8 for new
- * indexing method. PAD_32 and PAD_64 can be adjusted
- * independently as needed for 32-bit and 64-bits systems.
+/* This structure needs to be divisible by COMMANDLIST_ALIGNMENT
+ * because low bits of the address are used to to indicate that
+ * whether the tag contains an index or an address. PAD_32 and
+ * PAD_64 can be adjusted independently as needed for 32-bit
+ * and 64-bits systems.
*/
+#define COMMANDLIST_ALIGNMENT (8)
#define IS_64_BIT ((sizeof(long) - 4)/4)
#define IS_32_BIT (!IS_64_BIT)
#define PAD_32 (0)
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 5d0e46d..f203606 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -93,11 +93,15 @@ static struct scsi_host_template cciss_driver_template = {
};

#pragma pack(1)
+
+#define SCSI_PAD_32 4
+#define SCSI_PAD_64 4
+
struct cciss_scsi_cmd_stack_elem_t {
CommandList_struct cmd;
ErrorInfo_struct Err;
__u32 busaddr;
- __u32 pad;
+ u8 pad[IS_32_BIT * SCSI_PAD_32 + IS_64_BIT * SCSI_PAD_64];
};

#pragma pack()
@@ -202,9 +206,9 @@ scsi_cmd_stack_setup(int ctlr, struct cciss_scsi_adapter_data_t *sa)
stk = &sa->cmd_stack;
size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;

- // pci_alloc_consistent guarantees 32-bit DMA address will
- // be used
-
+ /* Check alignment, see cciss_cmd.h near CommandList_struct def. */
+ BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0);
+ /* pci_alloc_consistent guarantees 32-bit DMA address will be used */
stk->pool = (struct cciss_scsi_cmd_stack_elem_t *)
pci_alloc_consistent(hba[ctlr]->pdev, size, &stk->cmd_pool_handle);

2010-02-26 21:58:22

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 4/9] cciss: simplify scatter gather code

From: Stephen M. Cameron <[email protected]>

cciss: simplify scatter gather code.
Instead of allocating an array of pointers to a structure
containing an SGDescriptor structure, and two other elements
that aren't really used, just allocate SGDescriptor structs.

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss.c | 43 +++++++++++++++----------------------------
drivers/block/cciss.h | 8 +-------
2 files changed, 16 insertions(+), 35 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index eddb916..adc517c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -257,7 +257,7 @@ static inline void removeQ(CommandList_struct *c)
hlist_del_init(&c->list);
}

-static void cciss_free_sg_chain_blocks(struct Cmd_sg_list **cmd_sg_list,
+static void cciss_free_sg_chain_blocks(SGDescriptor_struct **cmd_sg_list,
int nr_cmds)
{
int i;
@@ -265,20 +265,17 @@ static void cciss_free_sg_chain_blocks(struct Cmd_sg_list **cmd_sg_list,
if (!cmd_sg_list)
return;
for (i = 0; i < nr_cmds; i++) {
- if (cmd_sg_list[i]) {
- kfree(cmd_sg_list[i]->sgchain);
- kfree(cmd_sg_list[i]);
- cmd_sg_list[i] = NULL;
- }
+ kfree(cmd_sg_list[i]);
+ cmd_sg_list[i] = NULL;
}
kfree(cmd_sg_list);
}

-static struct Cmd_sg_list **cciss_allocate_sg_chain_blocks(ctlr_info_t *h,
- int chainsize, int nr_cmds)
+static SGDescriptor_struct **cciss_allocate_sg_chain_blocks(
+ ctlr_info_t *h, int chainsize, int nr_cmds)
{
int j;
- struct Cmd_sg_list **cmd_sg_list;
+ SGDescriptor_struct **cmd_sg_list;

if (chainsize <= 0)
return NULL;
@@ -289,16 +286,10 @@ static struct Cmd_sg_list **cciss_allocate_sg_chain_blocks(ctlr_info_t *h,

/* Build up chain blocks for each command */
for (j = 0; j < nr_cmds; j++) {
- cmd_sg_list[j] = kmalloc(sizeof(*cmd_sg_list[j]), GFP_KERNEL);
- if (!cmd_sg_list[j]) {
- dev_err(&h->pdev->dev, "Cannot get memory "
- "for chain block.\n");
- goto clean;
- }
/* Need a block of chainsized s/g elements. */
- cmd_sg_list[j]->sgchain = kmalloc((chainsize *
- sizeof(SGDescriptor_struct)), GFP_KERNEL);
- if (!cmd_sg_list[j]->sgchain) {
+ cmd_sg_list[j] = kmalloc((chainsize *
+ sizeof(*cmd_sg_list[j])), GFP_KERNEL);
+ if (!cmd_sg_list[j]) {
dev_err(&h->pdev->dev, "Cannot get memory "
"for s/g chains.\n");
goto clean;
@@ -1731,7 +1722,7 @@ static void cciss_softirq_done(struct request *rq)
pci_unmap_single(h->pdev, temp64.val,
cmd->SG[i].Len, ddir);
/* Point to the next block */
- curr_sg = h->cmd_sg_list[cmd->cmdindex]->sgchain;
+ curr_sg = h->cmd_sg_list[cmd->cmdindex];
sg_index = 0;
}
temp64.val32.lower = curr_sg[sg_index].Addr.lower;
@@ -3206,7 +3197,7 @@ static void do_cciss_request(struct request_queue *q)
curr_sg[sg_index].Ext = CCISS_SG_CHAIN;

/* Point to next chain block. */
- curr_sg = h->cmd_sg_list[c->cmdindex]->sgchain;
+ curr_sg = h->cmd_sg_list[c->cmdindex];
sg_index = 0;
chained = 1;
}
@@ -3223,6 +3214,7 @@ static void do_cciss_request(struct request_queue *q)

if (chained) {
int len;
+ dma_addr_t dma_addr;
curr_sg = c->SG;
sg_index = h->max_cmd_sgentries - 1;
len = curr_sg[sg_index].Len;
@@ -3231,16 +3223,11 @@ static void do_cciss_request(struct request_queue *q)
* block with address of next chain block.
*/
temp64.val = pci_map_single(h->pdev,
- h->cmd_sg_list[c->cmdindex]->sgchain,
- len, dir);
-
- h->cmd_sg_list[c->cmdindex]->sg_chain_dma = temp64.val;
+ h->cmd_sg_list[c->cmdindex], len, dir);
+ dma_addr = temp64.val;
curr_sg[sg_index].Addr.lower = temp64.val32.lower;
curr_sg[sg_index].Addr.upper = temp64.val32.upper;
-
- pci_dma_sync_single_for_device(h->pdev,
- h->cmd_sg_list[c->cmdindex]->sg_chain_dma,
- len, dir);
+ pci_dma_sync_single_for_device(h->pdev, dma_addr, len, dir);
}

/* track how many SG entries we are using */
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 2b07bda..ac454fd 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -55,12 +55,6 @@ typedef struct _drive_info_struct
char device_initialized; /* indicates whether dev is initialized */
} drive_info_struct;

-struct Cmd_sg_list {
- SGDescriptor_struct *sgchain;
- dma_addr_t sg_chain_dma;
- int chain_block_size;
-};
-
struct ctlr_info
{
int ctlr;
@@ -89,7 +83,7 @@ struct ctlr_info
int maxsgentries;
int chainsize;
int max_cmd_sgentries;
- struct Cmd_sg_list **cmd_sg_list;
+ SGDescriptor_struct **cmd_sg_list;

# define DOORBELL_INT 0
# define PERF_MODE_INT 1

2010-02-26 21:58:28

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 5/9] cciss: fix scatter gather chain block dma direction kludge

From: Stephen M. Cameron <[email protected]>

cciss: fix scatter gather chain block dma direction kludge
The data direction for the chained block of scatter gather
elements should always be PCI_DMA_TODEVICE, but was mistakenly
set to the direction of the data transfer, then a kludge to
fix it was added, in which pci_dma_sync_single_for_device or
pci_dma_sync_single_for_cpu was called. If the correct direction
is used in the first place, the kludge isn't needed.

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss.c | 8 +++-----
1 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index adc517c..c0d794c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1717,10 +1717,8 @@ static void cciss_softirq_done(struct request *rq)
if (curr_sg[sg_index].Ext == CCISS_SG_CHAIN) {
temp64.val32.lower = cmd->SG[i].Addr.lower;
temp64.val32.upper = cmd->SG[i].Addr.upper;
- pci_dma_sync_single_for_cpu(h->pdev, temp64.val,
- cmd->SG[i].Len, ddir);
pci_unmap_single(h->pdev, temp64.val,
- cmd->SG[i].Len, ddir);
+ cmd->SG[i].Len, PCI_DMA_TODEVICE);
/* Point to the next block */
curr_sg = h->cmd_sg_list[cmd->cmdindex];
sg_index = 0;
@@ -3223,11 +3221,11 @@ static void do_cciss_request(struct request_queue *q)
* block with address of next chain block.
*/
temp64.val = pci_map_single(h->pdev,
- h->cmd_sg_list[c->cmdindex], len, dir);
+ h->cmd_sg_list[c->cmdindex], len,
+ PCI_DMA_TODEVICE);
dma_addr = temp64.val;
curr_sg[sg_index].Addr.lower = temp64.val32.lower;
curr_sg[sg_index].Addr.upper = temp64.val32.upper;
- pci_dma_sync_single_for_device(h->pdev, dma_addr, len, dir);
}

/* track how many SG entries we are using */

2010-02-26 21:58:36

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 6/9] cciss: factor out scatter gather chain block mapping code

From: Stephen M. Cameron <[email protected]>

cciss: factor out scatter gather chain block mapping code
Rationale is I want to use this code from the scsi half of the
driver.

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss.c | 63 ++++++++++++++++++++++++++-----------------------
1 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index c0d794c..9e3af30 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -301,6 +301,35 @@ clean:
return NULL;
}

+static void cciss_unmap_sg_chain_block(ctlr_info_t *h, CommandList_struct *c)
+{
+ SGDescriptor_struct *chain_sg;
+ u64bit temp64;
+
+ if (c->Header.SGTotal <= h->max_cmd_sgentries)
+ return;
+
+ chain_sg = &c->SG[h->max_cmd_sgentries - 1];
+ temp64.val32.lower = chain_sg->Addr.lower;
+ temp64.val32.upper = chain_sg->Addr.upper;
+ pci_unmap_single(h->pdev, temp64.val, chain_sg->Len, PCI_DMA_TODEVICE);
+}
+
+static void cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
+ SGDescriptor_struct *chain_block, int len)
+{
+ SGDescriptor_struct *chain_sg;
+ u64bit temp64;
+
+ chain_sg = &c->SG[h->max_cmd_sgentries - 1];
+ chain_sg->Ext = CCISS_SG_CHAIN;
+ chain_sg->Len = len;
+ temp64.val = pci_map_single(h->pdev, chain_block, len,
+ PCI_DMA_TODEVICE);
+ chain_sg->Addr.lower = temp64.val32.lower;
+ chain_sg->Addr.upper = temp64.val32.upper;
+}
+
#include "cciss_scsi.c" /* For SCSI tape support */

static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
@@ -1715,10 +1744,7 @@ static void cciss_softirq_done(struct request *rq)
/* unmap the DMA mapping for all the scatter gather elements */
for (i = 0; i < cmd->Header.SGList; i++) {
if (curr_sg[sg_index].Ext == CCISS_SG_CHAIN) {
- temp64.val32.lower = cmd->SG[i].Addr.lower;
- temp64.val32.upper = cmd->SG[i].Addr.upper;
- pci_unmap_single(h->pdev, temp64.val,
- cmd->SG[i].Len, PCI_DMA_TODEVICE);
+ cciss_unmap_sg_chain_block(h, cmd);
/* Point to the next block */
curr_sg = h->cmd_sg_list[cmd->cmdindex];
sg_index = 0;
@@ -3122,7 +3148,6 @@ static void do_cciss_request(struct request_queue *q)
SGDescriptor_struct *curr_sg;
drive_info_struct *drv;
int i, dir;
- int nseg = 0;
int sg_index = 0;
int chained = 0;

@@ -3189,11 +3214,6 @@ static void do_cciss_request(struct request_queue *q)
for (i = 0; i < seg; i++) {
if (((sg_index+1) == (h->max_cmd_sgentries)) &&
!chained && ((seg - i) > 1)) {
- nseg = seg - i;
- curr_sg[sg_index].Len = (nseg) *
- sizeof(SGDescriptor_struct);
- curr_sg[sg_index].Ext = CCISS_SG_CHAIN;
-
/* Point to next chain block. */
curr_sg = h->cmd_sg_list[c->cmdindex];
sg_index = 0;
@@ -3206,27 +3226,12 @@ static void do_cciss_request(struct request_queue *q)
curr_sg[sg_index].Addr.lower = temp64.val32.lower;
curr_sg[sg_index].Addr.upper = temp64.val32.upper;
curr_sg[sg_index].Ext = 0; /* we are not chaining */
-
++sg_index;
}
-
- if (chained) {
- int len;
- dma_addr_t dma_addr;
- curr_sg = c->SG;
- sg_index = h->max_cmd_sgentries - 1;
- len = curr_sg[sg_index].Len;
- /* Setup pointer to next chain block.
- * Fill out last element in current chain
- * block with address of next chain block.
- */
- temp64.val = pci_map_single(h->pdev,
- h->cmd_sg_list[c->cmdindex], len,
- PCI_DMA_TODEVICE);
- dma_addr = temp64.val;
- curr_sg[sg_index].Addr.lower = temp64.val32.lower;
- curr_sg[sg_index].Addr.upper = temp64.val32.upper;
- }
+ if (chained)
+ cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
+ (seg - (h->max_cmd_sgentries - 1)) *
+ sizeof(SGDescriptor_struct));

/* track how many SG entries we are using */
if (seg > h->maxSG)

2010-02-26 21:58:44

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 8/9] cciss: eliminate unnecessary pointer use in cciss scsi code

From: Stephen M. Cameron <[email protected]>

cciss: eliminate unnecessary pointer use in cciss scsi code
An extra level of indirection was being used in some places
for no real reason.

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss_scsi.c | 31 +++++++++++++++----------------
1 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index c458232..6dc15b6 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -1400,7 +1400,7 @@ cciss_scatter_gather(struct pci_dev *pdev,
static int
cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
{
- ctlr_info_t **c;
+ ctlr_info_t *c;
int ctlr, rc;
unsigned char scsi3addr[8];
CommandList_struct *cp;
@@ -1408,8 +1408,8 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd

// Get the ptr to our adapter structure (hba[i]) out of cmd->host.
// We violate cmd->host privacy here. (Is there another way?)
- c = (ctlr_info_t **) &cmd->device->host->hostdata[0];
- ctlr = (*c)->ctlr;
+ c = (ctlr_info_t *) cmd->device->host->hostdata[0];
+ ctlr = c->ctlr;

rc = lookup_scsi3addr(ctlr, cmd->device->channel, cmd->device->id,
cmd->device->lun, scsi3addr);
@@ -1432,7 +1432,7 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd
see what the device thinks of it. */

spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
- cp = scsi_cmd_alloc(*c);
+ cp = scsi_cmd_alloc(c);
spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
if (cp == NULL) { /* trouble... */
printk("scsi_cmd_alloc returned NULL!\n");
@@ -1490,15 +1490,14 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd
BUG();
break;
}
-
- cciss_scatter_gather((*c)->pdev, cp, cmd); // Fill the SG list
+ cciss_scatter_gather(c->pdev, cp, cmd);

/* Put the request on the tail of the request queue */

spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
- addQ(&(*c)->reqQ, cp);
- (*c)->Qdepth++;
- start_io(*c);
+ addQ(&c->reqQ, cp);
+ c->Qdepth++;
+ start_io(c);
spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);

/* the cmd'll come back via intr handler in complete_scsi_command() */
@@ -1655,14 +1654,14 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
int rc;
CommandList_struct *cmd_in_trouble;
unsigned char lunaddr[8];
- ctlr_info_t **c;
+ ctlr_info_t *c;
int ctlr;

/* find the controller to which the command to be aborted was sent */
- c = (ctlr_info_t **) &scsicmd->device->host->hostdata[0];
+ c = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
if (c == NULL) /* paranoia */
return FAILED;
- ctlr = (*c)->ctlr;
+ ctlr = c->ctlr;
printk(KERN_WARNING "cciss%d: resetting tape drive or medium changer.\n", ctlr);
/* find the command that's giving us trouble */
cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble;
@@ -1672,7 +1671,7 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
/* send a reset to the SCSI LUN which the command was sent to */
rc = sendcmd_withirq(CCISS_RESET_MSG, ctlr, NULL, 0, 0, lunaddr,
TYPE_MSG);
- if (rc == 0 && wait_for_device_to_become_ready(*c, lunaddr) == 0)
+ if (rc == 0 && wait_for_device_to_become_ready(c, lunaddr) == 0)
return SUCCESS;
printk(KERN_WARNING "cciss%d: resetting device failed.\n", ctlr);
return FAILED;
@@ -1683,14 +1682,14 @@ static int cciss_eh_abort_handler(struct scsi_cmnd *scsicmd)
int rc;
CommandList_struct *cmd_to_abort;
unsigned char lunaddr[8];
- ctlr_info_t **c;
+ ctlr_info_t *c;
int ctlr;

/* find the controller to which the command to be aborted was sent */
- c = (ctlr_info_t **) &scsicmd->device->host->hostdata[0];
+ c = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
if (c == NULL) /* paranoia */
return FAILED;
- ctlr = (*c)->ctlr;
+ ctlr = c->ctlr;
printk(KERN_WARNING "cciss%d: aborting tardy SCSI cmd\n", ctlr);

/* find the command to be aborted */

2010-02-26 21:58:55

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 9/9] cciss: Fix problem with scatter gather elements in the scsi half of the driver

From: Stephen M. Cameron <[email protected]>

cciss: Fix problem with scatter gather elements in the scsi half of the driver
When support for more than 31 scatter gather elements was added to the block
half of the driver, the SCSI half of the driver was not addressed, and the bump
from 31 to 32 scatter gather elements in the command block itself (not chained)
actually broke the SCSI half of the driver, so that any transfer requiring 32
scatter gather elements wouldn't work. This fix also increases the max transfer
size and size of the scatter gather table to the limit supported by the controller

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss_scsi.c | 85 +++++++++++++++++++++++++++++++-------------
1 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 6dc15b6..e1d0e2c 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -84,7 +84,6 @@ static struct scsi_host_template cciss_driver_template = {
.queuecommand = cciss_scsi_queue_command,
.can_queue = SCSI_CCISS_CAN_QUEUE,
.this_id = 7,
- .sg_tablesize = MAXSGENTRIES,
.cmd_per_lun = 1,
.use_clustering = DISABLE_CLUSTERING,
/* Can't have eh_bus_reset_handler or eh_host_reset_handler for cciss */
@@ -94,13 +93,14 @@ static struct scsi_host_template cciss_driver_template = {

#pragma pack(1)

-#define SCSI_PAD_32 4
-#define SCSI_PAD_64 4
+#define SCSI_PAD_32 0
+#define SCSI_PAD_64 0

struct cciss_scsi_cmd_stack_elem_t {
CommandList_struct cmd;
ErrorInfo_struct Err;
__u32 busaddr;
+ int cmdindex;
u8 pad[IS_32_BIT * SCSI_PAD_32 + IS_64_BIT * SCSI_PAD_64];
};

@@ -122,6 +122,7 @@ struct cciss_scsi_cmd_stack_t {
struct cciss_scsi_adapter_data_t {
struct Scsi_Host *scsi_host;
struct cciss_scsi_cmd_stack_t cmd_stack;
+ SGDescriptor_struct **cmd_sg_list;
int registered;
spinlock_t lock; // to protect ccissscsi[ctlr];
};
@@ -156,6 +157,7 @@ scsi_cmd_alloc(ctlr_info_t *h)
memset(&c->Err, 0, sizeof(c->Err));
/* set physical addr of cmd and addr of scsi parameters */
c->cmd.busaddr = c->busaddr;
+ c->cmd.cmdindex = c->cmdindex;
/* (__u32) (stk->cmd_pool_handle +
(sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top)); */

@@ -201,6 +203,11 @@ scsi_cmd_stack_setup(int ctlr, struct cciss_scsi_adapter_data_t *sa)
struct cciss_scsi_cmd_stack_t *stk;
size_t size;

+ sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(hba[ctlr],
+ hba[ctlr]->chainsize, CMD_STACK_SIZE);
+ if (!sa->cmd_sg_list && hba[ctlr]->chainsize > 0)
+ return -ENOMEM;
+
stk = &sa->cmd_stack;
size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;

@@ -211,14 +218,16 @@ scsi_cmd_stack_setup(int ctlr, struct cciss_scsi_adapter_data_t *sa)
pci_alloc_consistent(hba[ctlr]->pdev, size, &stk->cmd_pool_handle);

if (stk->pool == NULL) {
- printk("stk->pool is null\n");
- return -1;
+ cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
+ sa->cmd_sg_list = NULL;
+ return -ENOMEM;
}

for (i=0; i<CMD_STACK_SIZE; i++) {
stk->elem[i] = &stk->pool[i];
stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle +
(sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
+ stk->elem[i]->cmdindex = i;
}
stk->top = CMD_STACK_SIZE-1;
return 0;
@@ -243,6 +252,7 @@ scsi_cmd_stack_free(int ctlr)

pci_free_consistent(hba[ctlr]->pdev, size, stk->pool, stk->cmd_pool_handle);
stk->pool = NULL;
+ cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
}

#if 0
@@ -726,6 +736,8 @@ complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag)
ctlr = hba[cp->ctlr];

scsi_dma_unmap(cmd);
+ if (cp->Header.SGTotal > ctlr->max_cmd_sgentries)
+ cciss_unmap_sg_chain_block(ctlr, cp);

cmd->result = (DID_OK << 16); /* host byte */
cmd->result |= (COMMAND_COMPLETE << 8); /* msg byte */
@@ -848,6 +860,7 @@ cciss_scsi_detect(int ctlr)
sh->io_port = 0; // good enough? FIXME,
sh->n_io_port = 0; // I don't think we use these two...
sh->this_id = SELF_SCSI_ID;
+ sh->sg_tablesize = hba[ctlr]->maxsgentries;

((struct cciss_scsi_adapter_data_t *)
hba[ctlr]->scsi_ctlr)->scsi_host = sh;
@@ -1365,34 +1378,54 @@ cciss_scsi_proc_info(struct Scsi_Host *sh,
dma mapping and fills in the scatter gather entries of the
cciss command, cp. */

-static void
-cciss_scatter_gather(struct pci_dev *pdev,
- CommandList_struct *cp,
- struct scsi_cmnd *cmd)
+static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *cp,
+ struct scsi_cmnd *cmd)
{
unsigned int len;
struct scatterlist *sg;
__u64 addr64;
- int use_sg, i;
-
- BUG_ON(scsi_sg_count(cmd) > MAXSGENTRIES);
-
- use_sg = scsi_dma_map(cmd);
- if (use_sg) { /* not too many addrs? */
- scsi_for_each_sg(cmd, sg, use_sg, i) {
+ int request_nsgs, i, chained, sg_index;
+ struct cciss_scsi_adapter_data_t *sa = h->scsi_ctlr;
+ SGDescriptor_struct *curr_sg;
+
+ BUG_ON(scsi_sg_count(cmd) > h->maxsgentries);
+
+ chained = 0;
+ sg_index = 0;
+ curr_sg = cp->SG;
+ request_nsgs = scsi_dma_map(cmd);
+ if (request_nsgs) {
+ scsi_for_each_sg(cmd, sg, request_nsgs, i) {
+ if (sg_index + 1 == h->max_cmd_sgentries &&
+ !chained && request_nsgs - i > 1) {
+ chained = 1;
+ sg_index = 0;
+ curr_sg = sa->cmd_sg_list[cp->cmdindex];
+ }
addr64 = (__u64) sg_dma_address(sg);
len = sg_dma_len(sg);
- cp->SG[i].Addr.lower =
- (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
- cp->SG[i].Addr.upper =
- (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
- cp->SG[i].Len = len;
- cp->SG[i].Ext = 0; // we are not chaining
+ curr_sg[sg_index].Addr.lower =
+ (__u32) (addr64 & 0x0FFFFFFFFULL);
+ curr_sg[sg_index].Addr.upper =
+ (__u32) ((addr64 >> 32) & 0x0FFFFFFFFULL);
+ curr_sg[sg_index].Len = len;
+ curr_sg[sg_index].Ext = 0;
+ ++sg_index;
}
+ if (chained)
+ cciss_map_sg_chain_block(h, cp,
+ sa->cmd_sg_list[cp->cmdindex],
+ (request_nsgs - (h->max_cmd_sgentries - 1)) *
+ sizeof(SGDescriptor_struct));
}
-
- cp->Header.SGList = (__u8) use_sg; /* no. SGs contig in this cmd */
- cp->Header.SGTotal = (__u16) use_sg; /* total sgs in this cmd list */
+ /* track how many SG entries we are using */
+ if (request_nsgs > h->maxSG)
+ h->maxSG = request_nsgs;
+ cp->Header.SGTotal = (__u8) request_nsgs + chained;
+ if (request_nsgs > h->max_cmd_sgentries)
+ cp->Header.SGList = h->max_cmd_sgentries;
+ else
+ cp->Header.SGList = cp->Header.SGTotal;
return;
}

@@ -1490,7 +1523,7 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd
BUG();
break;
}
- cciss_scatter_gather(c->pdev, cp, cmd);
+ cciss_scatter_gather(c, cp, cmd);

/* Put the request on the tail of the request queue */

2010-02-26 21:59:24

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 7/9] cciss: do not use void pointer for scsi hba data

From: Stephen M. Cameron <[email protected]>

cciss: do not use void pointer for scsi hba data
and get rid of related unnecessary type casting
and delete some superfluous and misleading comments nearby.

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss.h | 4 +---
drivers/block/cciss_scsi.c | 23 ++++++++++-------------
2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index ac454fd..c5d4111 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -131,9 +131,7 @@ struct ctlr_info
/* Disk structures we need to pass back */
struct gendisk *gendisk[CISS_MAX_LUN];
#ifdef CONFIG_CISS_SCSI_TAPE
- void *scsi_ctlr; /* ptr to structure containing scsi related stuff */
- /* list of block side commands the scsi error handling sucked up */
- /* and saved for later processing */
+ struct cciss_scsi_adapter_data_t *scsi_ctlr;
#endif
unsigned char alive;
struct list_head scan_list;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index f203606..c458232 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -127,11 +127,9 @@ struct cciss_scsi_adapter_data_t {
};

#define CPQ_TAPE_LOCK(ctlr, flags) spin_lock_irqsave( \
- &(((struct cciss_scsi_adapter_data_t *) \
- hba[ctlr]->scsi_ctlr)->lock), flags);
+ &hba[ctlr]->scsi_ctlr->lock, flags);
#define CPQ_TAPE_UNLOCK(ctlr, flags) spin_unlock_irqrestore( \
- &(((struct cciss_scsi_adapter_data_t *) \
- hba[ctlr]->scsi_ctlr)->lock), flags);
+ &hba[ctlr]->scsi_ctlr->lock, flags);

static CommandList_struct *
scsi_cmd_alloc(ctlr_info_t *h)
@@ -147,7 +145,7 @@ scsi_cmd_alloc(ctlr_info_t *h)
struct cciss_scsi_cmd_stack_t *stk;
u64bit temp64;

- sa = (struct cciss_scsi_adapter_data_t *) h->scsi_ctlr;
+ sa = h->scsi_ctlr;
stk = &sa->cmd_stack;

if (stk->top < 0)
@@ -186,7 +184,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd)
struct cciss_scsi_adapter_data_t *sa;
struct cciss_scsi_cmd_stack_t *stk;

- sa = (struct cciss_scsi_adapter_data_t *) h->scsi_ctlr;
+ sa = h->scsi_ctlr;
stk = &sa->cmd_stack;
if (stk->top >= CMD_STACK_SIZE) {
printk("cciss: scsi_cmd_free called too many times.\n");
@@ -233,7 +231,7 @@ scsi_cmd_stack_free(int ctlr)
struct cciss_scsi_cmd_stack_t *stk;
size_t size;

- sa = (struct cciss_scsi_adapter_data_t *) hba[ctlr]->scsi_ctlr;
+ sa = hba[ctlr]->scsi_ctlr;
stk = &sa->cmd_stack;
if (stk->top != CMD_STACK_SIZE-1) {
printk( "cciss: %d scsi commands are still outstanding.\n",
@@ -534,8 +532,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
CPQ_TAPE_LOCK(ctlr, flags);

if (hostno != -1) /* if it's not the first time... */
- sh = ((struct cciss_scsi_adapter_data_t *)
- hba[ctlr]->scsi_ctlr)->scsi_host;
+ sh = hba[ctlr]->scsi_ctlr->scsi_host;

/* find any devices in ccissscsi[] that are not in
sd[] and remove them from ccissscsi[] */
@@ -706,7 +703,7 @@ cciss_scsi_setup(int cntl_num)
kfree(shba);
shba = NULL;
}
- hba[cntl_num]->scsi_ctlr = (void *) shba;
+ hba[cntl_num]->scsi_ctlr = shba;
return;
}

@@ -853,7 +850,7 @@ cciss_scsi_detect(int ctlr)
sh->this_id = SELF_SCSI_ID;

((struct cciss_scsi_adapter_data_t *)
- hba[ctlr]->scsi_ctlr)->scsi_host = (void *) sh;
+ hba[ctlr]->scsi_ctlr)->scsi_host = sh;
sh->hostdata[0] = (unsigned long) hba[ctlr];
sh->irq = hba[ctlr]->intr[SIMPLE_MODE_INT];
sh->unique_id = sh->irq;
@@ -1518,7 +1515,7 @@ cciss_unregister_scsi(int ctlr)
/* we are being forcibly unloaded, and may not refuse. */

spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
- sa = (struct cciss_scsi_adapter_data_t *) hba[ctlr]->scsi_ctlr;
+ sa = hba[ctlr]->scsi_ctlr;
stk = &sa->cmd_stack;

/* if we weren't ever actually registered, don't unregister */
@@ -1545,7 +1542,7 @@ cciss_engage_scsi(int ctlr)
unsigned long flags;

spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
- sa = (struct cciss_scsi_adapter_data_t *) hba[ctlr]->scsi_ctlr;
+ sa = hba[ctlr]->scsi_ctlr;
stk = &sa->cmd_stack;

if (sa->registered) {

2010-02-26 21:58:06

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 1/9] cciss: clarify command list padding calculation

From: Stephen M. Cameron <[email protected]>

cciss: clarify command list padding calculation

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss_cmd.h | 9 +++++++--
1 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 25f9762..515c9f0 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -168,9 +168,14 @@ typedef struct _SGDescriptor_struct {
#define CMD_MSG_STALE 0xff

/* This structure needs to be divisible by 8 for new
- * indexing method.
+ * indexing method. PAD_32 and PAD_64 can be adjusted
+ * independently as needed for 32-bit and 64-bits systems.
*/
-#define PADSIZE (sizeof(long) - 4)
+#define IS_64_BIT ((sizeof(long) - 4)/4)
+#define IS_32_BIT (!IS_64_BIT)
+#define PAD_32 (0)
+#define PAD_64 (4)
+#define PADSIZE (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64)
typedef struct _CommandList_struct {
CommandListHeader_struct Header;
RequestBlock_struct Request;

2010-02-26 21:58:17

by Stephen M. Cameron

[permalink] [raw]
Subject: [PATCH 3/9] cciss: factor out scatter gather chain block allocation and freeing

From: Stephen M. Cameron <[email protected]>

cciss: factor out scatter gather chain block allocation and freeing
Rationale is that I want to use this code from the scsi half of the
driver.

Signed-off-by: Stephen M. Cameron <[email protected]>
---
drivers/block/cciss.c | 108 ++++++++++++++++++++++++++-----------------------
1 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd8c7c2..eddb916 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -257,6 +257,59 @@ static inline void removeQ(CommandList_struct *c)
hlist_del_init(&c->list);
}

+static void cciss_free_sg_chain_blocks(struct Cmd_sg_list **cmd_sg_list,
+ int nr_cmds)
+{
+ int i;
+
+ if (!cmd_sg_list)
+ return;
+ for (i = 0; i < nr_cmds; i++) {
+ if (cmd_sg_list[i]) {
+ kfree(cmd_sg_list[i]->sgchain);
+ kfree(cmd_sg_list[i]);
+ cmd_sg_list[i] = NULL;
+ }
+ }
+ kfree(cmd_sg_list);
+}
+
+static struct Cmd_sg_list **cciss_allocate_sg_chain_blocks(ctlr_info_t *h,
+ int chainsize, int nr_cmds)
+{
+ int j;
+ struct Cmd_sg_list **cmd_sg_list;
+
+ if (chainsize <= 0)
+ return NULL;
+
+ cmd_sg_list = kmalloc(sizeof(*cmd_sg_list) * nr_cmds, GFP_KERNEL);
+ if (!cmd_sg_list)
+ return NULL;
+
+ /* Build up chain blocks for each command */
+ for (j = 0; j < nr_cmds; j++) {
+ cmd_sg_list[j] = kmalloc(sizeof(*cmd_sg_list[j]), GFP_KERNEL);
+ if (!cmd_sg_list[j]) {
+ dev_err(&h->pdev->dev, "Cannot get memory "
+ "for chain block.\n");
+ goto clean;
+ }
+ /* Need a block of chainsized s/g elements. */
+ cmd_sg_list[j]->sgchain = kmalloc((chainsize *
+ sizeof(SGDescriptor_struct)), GFP_KERNEL);
+ if (!cmd_sg_list[j]->sgchain) {
+ dev_err(&h->pdev->dev, "Cannot get memory "
+ "for s/g chains.\n");
+ goto clean;
+ }
+ }
+ return cmd_sg_list;
+clean:
+ cciss_free_sg_chain_blocks(cmd_sg_list, nr_cmds);
+ return NULL;
+}
+
#include "cciss_scsi.c" /* For SCSI tape support */

static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
@@ -4238,37 +4291,10 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
goto clean4;
}
}
- hba[i]->cmd_sg_list = kmalloc(sizeof(struct Cmd_sg_list *) *
- hba[i]->nr_cmds,
- GFP_KERNEL);
- if (!hba[i]->cmd_sg_list) {
- printk(KERN_ERR "cciss%d: Cannot get memory for "
- "s/g chaining.\n", i);
+ hba[i]->cmd_sg_list = cciss_allocate_sg_chain_blocks(hba[i],
+ hba[i]->chainsize, hba[i]->nr_cmds);
+ if (!hba[i]->cmd_sg_list && hba[i]->chainsize > 0)
goto clean4;
- }
- /* Build up chain blocks for each command */
- if (hba[i]->chainsize > 0) {
- for (j = 0; j < hba[i]->nr_cmds; j++) {
- hba[i]->cmd_sg_list[j] =
- kmalloc(sizeof(struct Cmd_sg_list),
- GFP_KERNEL);
- if (!hba[i]->cmd_sg_list[j]) {
- printk(KERN_ERR "cciss%d: Cannot get memory "
- "for chain block.\n", i);
- goto clean4;
- }
- /* Need a block of chainsized s/g elements. */
- hba[i]->cmd_sg_list[j]->sgchain =
- kmalloc((hba[i]->chainsize *
- sizeof(SGDescriptor_struct)),
- GFP_KERNEL);
- if (!hba[i]->cmd_sg_list[j]->sgchain) {
- printk(KERN_ERR "cciss%d: Cannot get memory "
- "for s/g chains\n", i);
- goto clean4;
- }
- }
- }

spin_lock_init(&hba[i]->lock);

@@ -4327,16 +4353,7 @@ clean4:
for (k = 0; k < hba[i]->nr_cmds; k++)
kfree(hba[i]->scatter_list[k]);
kfree(hba[i]->scatter_list);
- /* Only free up extra s/g lists if controller supports them */
- if (hba[i]->chainsize > 0) {
- for (j = 0; j < hba[i]->nr_cmds; j++) {
- if (hba[i]->cmd_sg_list[j]) {
- kfree(hba[i]->cmd_sg_list[j]->sgchain);
- kfree(hba[i]->cmd_sg_list[j]);
- }
- }
- kfree(hba[i]->cmd_sg_list);
- }
+ cciss_free_sg_chain_blocks(hba[i]->cmd_sg_list, hba[i]->nr_cmds);
if (hba[i]->cmd_pool)
pci_free_consistent(hba[i]->pdev,
hba[i]->nr_cmds * sizeof(CommandList_struct),
@@ -4454,16 +4471,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
for (j = 0; j < hba[i]->nr_cmds; j++)
kfree(hba[i]->scatter_list[j]);
kfree(hba[i]->scatter_list);
- /* Only free up extra s/g lists if controller supports them */
- if (hba[i]->chainsize > 0) {
- for (j = 0; j < hba[i]->nr_cmds; j++) {
- if (hba[i]->cmd_sg_list[j]) {
- kfree(hba[i]->cmd_sg_list[j]->sgchain);
- kfree(hba[i]->cmd_sg_list[j]);
- }
- }
- kfree(hba[i]->cmd_sg_list);
- }
+ cciss_free_sg_chain_blocks(hba[i]->cmd_sg_list, hba[i]->nr_cmds);
/*
* Deliberately omit pci_disable_device(): it does something nasty to
* Smart Array controllers that pci_enable_device does not undo

2010-02-28 18:43:55

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/9] cciss driver SCSI updates

On Fri, Feb 26 2010, Stephen M. Cameron wrote:
> The following series mainly fixes the scsi tape code in the cciss
> driver to take advantage of more scatter gather elements, enabling
> larger transfer sizes. The first eight patches are mainly cleaning
> things up and getting ready for the 9th patch, which enables the
> SCSI half of the driver to use more than 31 scatter gather elements.
>
> I've tested on 32-bit and 64-bit systems, and written out tapes
> with block sizes up to 2Mb.
>
> These patches are vs. Jens's linux-2.6-block tree.

Thanks Stephen, applied.

--
Jens Axboe