2007-10-22 18:12:38

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 00/10] SG updates

Hi,

I split the patch up into a few pieces, so it can be applied safely.
It builds with allyesconfig on i386 and x86-64, and it's been booted
and tested on both those archs and ppc64 as well.

The same patch series can also be applied by pulling

git://git.kernel.dk/linux-2.6-block.git sg




2007-10-22 18:12:52

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 02/10] [SG] Update block layer to use sg helpers

Signed-off-by: Jens Axboe <[email protected]>
---
block/ll_rw_blk.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 8025d64..61c2e39 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1354,8 +1354,9 @@ new_segment:
else
sg = sg_next(sg);

- memset(sg, 0, sizeof(*sg));
- sg->page = bvec->bv_page;
+ sg_dma_len(sg) = 0;
+ sg_dma_address(sg) = 0;
+ sg_set_page(sg, bvec->bv_page);
sg->length = nbytes;
sg->offset = bvec->bv_offset;
nsegs++;
@@ -1363,6 +1364,9 @@ new_segment:
bvprv = bvec;
} /* segments in rq */

+ if (sg)
+ __sg_mark_end(sg);
+
return nsegs;
}

--
1.5.3.GIT

2007-10-22 18:13:08

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 03/10] [SG] Update crypto/ to sg helpers

Signed-off-by: Jens Axboe <[email protected]>
---
crypto/digest.c | 2 +-
crypto/hmac.c | 3 ++-
crypto/scatterwalk.c | 2 +-
crypto/scatterwalk.h | 6 +++---
crypto/tcrypt.c | 4 ++--
crypto/xcbc.c | 2 +-
6 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/crypto/digest.c b/crypto/digest.c
index e56de67..8871dec 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -41,7 +41,7 @@ static int update2(struct hash_desc *desc,
return 0;

for (;;) {
- struct page *pg = sg->page;
+ struct page *pg = sg_page(sg);
unsigned int offset = sg->offset;
unsigned int l = sg->length;

diff --git a/crypto/hmac.c b/crypto/hmac.c
index 8802fb6..e4eb6ac 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -159,7 +159,8 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;

sg_set_buf(sg1, ipad, bs);
- sg1[1].page = (void *)sg;
+
+ sg_set_page(&sg[1], (void *) sg);
sg1[1].length = 0;
sg_set_buf(sg2, opad, bs + ds);

diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index d6852c3..b9bbda0 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -54,7 +54,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
if (out) {
struct page *page;

- page = walk->sg->page + ((walk->offset - 1) >> PAGE_SHIFT);
+ page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT);
flush_dcache_page(page);
}

diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h
index 9c73e37..87ed681 100644
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -22,13 +22,13 @@

static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg)
{
- return (++sg)->length ? sg : (void *)sg->page;
+ return (++sg)->length ? sg : (void *) sg_page(sg);
}

static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in,
struct scatter_walk *walk_out)
{
- return !(((walk_in->sg->page - walk_out->sg->page) << PAGE_SHIFT) +
+ return !(((sg_page(walk_in->sg) - sg_page(walk_out->sg)) << PAGE_SHIFT) +
(int)(walk_in->offset - walk_out->offset));
}

@@ -60,7 +60,7 @@ static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk,

static inline struct page *scatterwalk_page(struct scatter_walk *walk)
{
- return walk->sg->page + (walk->offset >> PAGE_SHIFT);
+ return sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT);
}

static inline void scatterwalk_unmap(void *vaddr, int out)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 18d489c..d741c63 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -317,7 +317,7 @@ static void test_cipher(char *algo, int enc,
goto out;
}

- q = kmap(sg[0].page) + sg[0].offset;
+ q = kmap(sg_page(&sg[0])) + sg[0].offset;
hexdump(q, cipher_tv[i].rlen);

printk("%s\n",
@@ -390,7 +390,7 @@ static void test_cipher(char *algo, int enc,
temp = 0;
for (k = 0; k < cipher_tv[i].np; k++) {
printk("page %u\n", k);
- q = kmap(sg[k].page) + sg[k].offset;
+ q = kmap(sg_page(&sg[k])) + sg[k].offset;
hexdump(q, cipher_tv[i].tap[k]);
printk("%s\n",
memcmp(q, cipher_tv[i].result + temp,
diff --git a/crypto/xcbc.c b/crypto/xcbc.c
index 9f502b8..ac68f3b 100644
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
@@ -120,7 +120,7 @@ static int crypto_xcbc_digest_update2(struct hash_desc *pdesc,

do {

- struct page *pg = sg[i].page;
+ struct page *pg = sg_page(&sg[i]);
unsigned int offset = sg[i].offset;
unsigned int slen = sg[i].length;

--
1.5.3.GIT

2007-10-22 18:13:28

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 05/10] [SG] Update fs/ to use sg helpers

Signed-off-by: Jens Axboe <[email protected]>
---
fs/ecryptfs/crypto.c | 16 +++++++++++-----
fs/ecryptfs/keystore.c | 3 +++
fs/nfsd/nfs4recover.c | 8 +++-----
3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 1ae90ef..0a9882e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -283,7 +283,7 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
pg = virt_to_page(addr);
offset = offset_in_page(addr);
if (sg) {
- sg[i].page = pg;
+ sg_set_page(&sg[i], pg);
sg[i].offset = offset;
}
remainder_of_page = PAGE_CACHE_SIZE - offset;
@@ -713,10 +713,13 @@ ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
{
struct scatterlist src_sg, dst_sg;

- src_sg.page = src_page;
+ sg_init_table(&src_sg, 1);
+ sg_init_table(&dst_sg, 1);
+
+ sg_set_page(&src_sg, src_page);
src_sg.offset = src_offset;
src_sg.length = size;
- dst_sg.page = dst_page;
+ sg_set_page(&dst_sg, dst_page);
dst_sg.offset = dst_offset;
dst_sg.length = size;
return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
@@ -742,10 +745,13 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
{
struct scatterlist src_sg, dst_sg;

- src_sg.page = src_page;
+ sg_init_table(&src_sg, 1);
+ sg_init_table(&dst_sg, 1);
+
+ sg_set_page(&src_sg, src_page);
src_sg.offset = src_offset;
src_sg.length = size;
- dst_sg.page = dst_page;
+ sg_set_page(&dst_sg, dst_page);
dst_sg.offset = dst_offset;
dst_sg.length = size;
return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 89d9710..263fed8 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1040,6 +1040,9 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
};
int rc = 0;

+ sg_init_table(&dst_sg, 1);
+ sg_init_table(&src_sg, 1);
+
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(
KERN_DEBUG, "Session key encryption key (size [%d]):\n",
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ebd03cc..6f03918 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -88,7 +88,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
{
struct xdr_netobj cksum;
struct hash_desc desc;
- struct scatterlist sg[1];
+ struct scatterlist sg;
__be32 status = nfserr_resource;

dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
@@ -102,11 +102,9 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
if (cksum.data == NULL)
goto out;

- sg[0].page = virt_to_page(clname->data);
- sg[0].offset = offset_in_page(clname->data);
- sg[0].length = clname->len;
+ sg_init_one(&sg, clname->data, clname->len);

- if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
+ if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data))
goto out;

md5_to_hex(dname, cksum.data);
--
1.5.3.GIT

2007-10-22 18:13:41

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 01/10] [SG] Add helpers for manipulating SG entries

We can then transition drivers without changing the generated code.

Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/scatterlist.h | 112 +++++++++++++++++++++++++++++++++++++++---
1 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 2dc7464..1645795 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -2,24 +2,37 @@
#define _LINUX_SCATTERLIST_H

#include <asm/scatterlist.h>
+#include <asm/io.h>
#include <linux/mm.h>
#include <linux/string.h>

+/**
+ * sg_set_page - Set sg entry to point at given page
+ * @sg: SG entry
+ * @page: The page
+ *
+ * Description:
+ * Use this function to set an sg entry pointing at a page, never assign
+ * the page directly. We encode sg table information in the lower bits
+ * of the page pointer. See sg_page() for looking up the page belonging
+ * to an sg entry.
+ *
+ **/
+static inline void sg_set_page(struct scatterlist *sg, struct page *page)
+{
+ sg->page = page;
+}
+
+#define sg_page(sg) ((sg)->page)
+
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
unsigned int buflen)
{
- sg->page = virt_to_page(buf);
+ sg_set_page(sg, virt_to_page(buf));
sg->offset = offset_in_page(buf);
sg->length = buflen;
}

-static inline void sg_init_one(struct scatterlist *sg, const void *buf,
- unsigned int buflen)
-{
- memset(sg, 0, sizeof(*sg));
- sg_set_buf(sg, buf, buflen);
-}
-
/*
* We overload the LSB of the page pointer to indicate whether it's
* a valid sg entry, or whether it points to the start of a new scatterlist.
@@ -104,4 +117,87 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
prv[prv_nents - 1].page = (struct page *) ((unsigned long) sgl | 0x01);
}

+/**
+ * sg_mark_end - Mark the end of the scatterlist
+ * @sgl: Scatterlist
+ * @nents: Number of entries in sgl
+ *
+ * Description:
+ * Marks the last entry as the termination point for sg_next()
+ *
+ **/
+static inline void sg_mark_end(struct scatterlist *sgl, unsigned int nents)
+{
+}
+
+static inline void __sg_mark_end(struct scatterlist *sg)
+{
+}
+
+
+/**
+ * sg_init_one - Initialize a single entry sg list
+ * @sg: SG entry
+ * @buf: Virtual address for IO
+ * @buflen: IO length
+ *
+ * Notes:
+ * This should not be used on a single entry that is part of a larger
+ * table. Use sg_init_table() for that.
+ *
+ **/
+static inline void sg_init_one(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ memset(sg, 0, sizeof(*sg));
+ sg_mark_end(sg, 1);
+ sg_set_buf(sg, buf, buflen);
+}
+
+/**
+ * sg_init_table - Initialize SG table
+ * @sgl: The SG table
+ * @nents: Number of entries in table
+ *
+ * Notes:
+ * If this is part of a chained sg table, sg_mark_end() should be
+ * used only on the last table part.
+ *
+ **/
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+ memset(sgl, 0, sizeof(*sgl) * nents);
+ sg_mark_end(sgl, nents);
+}
+
+/**
+ * sg_phys - Return physical address of an sg entry
+ * @sg: SG entry
+ *
+ * Description:
+ * This calls page_to_phys() on the page in this sg entry, and adds the
+ * sg offset. The caller must know that it is legal to call page_to_phys()
+ * on the sg page.
+ *
+ **/
+static inline unsigned long sg_phys(struct scatterlist *sg)
+{
+ return page_to_phys(sg_page(sg)) + sg->offset;
+}
+
+/**
+ * sg_virt - Return virtual address of an sg entry
+ * @sg: SG entry
+ *
+ * Description:
+ * This calls page_address() on the page in this sg entry, and adds the
+ * sg offset. The caller must know that the sg page has a valid virtual
+ * mapping.
+ *
+ **/
+static inline void *sg_virt(struct scatterlist *sg)
+{
+ return page_address(sg_page(sg)) + sg->offset;
+}
+
#endif /* _LINUX_SCATTERLIST_H */
--
1.5.3.GIT

2007-10-22 18:14:00

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 07/10] [SG] Update swiotlb to use sg helpers

Signed-off-by: Jens Axboe <[email protected]>
---
lib/swiotlb.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 752fd95..1a8050a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -35,7 +35,7 @@
#define OFFSET(val,align) ((unsigned long) \
( (val) & ( (align) - 1)))

-#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
+#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))

/*
--
1.5.3.GIT

2007-10-22 18:14:29

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 06/10] [SG] Update net/ to use sg helpers

Signed-off-by: Jens Axboe <[email protected]>
---
net/core/skbuff.c | 4 ++--
net/ieee80211/ieee80211_crypt_tkip.c | 13 +++++--------
net/ieee80211/ieee80211_crypt_wep.c | 8 ++------
net/mac80211/wep.c | 8 ++------
net/sctp/auth.c | 3 ++-
net/sctp/sm_make_chunk.c | 6 ++++--
net/sunrpc/auth_gss/gss_krb5_crypto.c | 10 +++++-----
net/sunrpc/xdr.c | 2 +-
net/xfrm/xfrm_algo.c | 4 ++--
9 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 70d9b5d..4e2c84f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2045,7 +2045,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
if (copy > 0) {
if (copy > len)
copy = len;
- sg[elt].page = virt_to_page(skb->data + offset);
+ sg_set_page(&sg[elt], virt_to_page(skb->data + offset));
sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
sg[elt].length = copy;
elt++;
@@ -2065,7 +2065,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)

if (copy > len)
copy = len;
- sg[elt].page = frag->page;
+ sg_set_page(&sg[elt], frag->page);
sg[elt].offset = frag->page_offset+offset-start;
sg[elt].length = copy;
elt++;
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 72e6ab6..c796661 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -390,9 +390,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[3] = crc >> 24;

crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
- sg.page = virt_to_page(pos);
- sg.offset = offset_in_page(pos);
- sg.length = len + 4;
+ sg_init_one(&sg, pos, len + 4);
return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
}

@@ -485,9 +483,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
plen = skb->len - hdr_len - 12;

crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
- sg.page = virt_to_page(pos);
- sg.offset = offset_in_page(pos);
- sg.length = plen + 4;
+ sg_init_one(&sg, pos, plen + 4);
if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) {
if (net_ratelimit()) {
printk(KERN_DEBUG ": TKIP: failed to decrypt "
@@ -539,11 +535,12 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n");
return -1;
}
- sg[0].page = virt_to_page(hdr);
+ sg_init_table(sg, 2);
+ sg_set_page(&sg[0], virt_to_page(hdr));
sg[0].offset = offset_in_page(hdr);
sg[0].length = 16;

- sg[1].page = virt_to_page(data);
+ sg_set_page(&sg[1], virt_to_page(data));
sg[1].offset = offset_in_page(data);
sg[1].length = data_len;

diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 8d18245..0af6103 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -170,9 +170,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[3] = crc >> 24;

crypto_blkcipher_setkey(wep->tx_tfm, key, klen);
- sg.page = virt_to_page(pos);
- sg.offset = offset_in_page(pos);
- sg.length = len + 4;
+ sg_init_one(&sg, pos, len + 4);
return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
}

@@ -212,9 +210,7 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
plen = skb->len - hdr_len - 8;

crypto_blkcipher_setkey(wep->rx_tfm, key, klen);
- sg.page = virt_to_page(pos);
- sg.offset = offset_in_page(pos);
- sg.length = plen + 4;
+ sg_init_one(&sg, pos, plen + 4);
if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4))
return -7;

diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 6675261..cc806d6 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -138,9 +138,7 @@ void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
*icv = cpu_to_le32(~crc32_le(~0, data, data_len));

crypto_blkcipher_setkey(tfm, rc4key, klen);
- sg.page = virt_to_page(data);
- sg.offset = offset_in_page(data);
- sg.length = data_len + WEP_ICV_LEN;
+ sg_init_one(&sg, data, data_len + WEP_ICV_LEN);
crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length);
}

@@ -204,9 +202,7 @@ int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
__le32 crc;

crypto_blkcipher_setkey(tfm, rc4key, klen);
- sg.page = virt_to_page(data);
- sg.offset = offset_in_page(data);
- sg.length = data_len + WEP_ICV_LEN;
+ sg_init_one(&sg, data, data_len + WEP_ICV_LEN);
crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length);

crc = cpu_to_le32(~crc32_le(~0, data, data_len));
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 7818107..cbd64b2 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -726,7 +726,8 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,

/* set up scatter list */
end = skb_tail_pointer(skb);
- sg.page = virt_to_page(auth);
+ sg_init_table(&sg, 1);
+ sg_set_page(&sg, virt_to_page(auth));
sg.offset = (unsigned long)(auth) % PAGE_SIZE;
sg.length = end - (unsigned char *)auth;

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f983a36..d5a9785 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1513,7 +1513,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
struct hash_desc desc;

/* Sign the message. */
- sg.page = virt_to_page(&cookie->c);
+ sg_init_table(&sg, 1);
+ sg_set_page(&sg, virt_to_page(&cookie->c));
sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE;
sg.length = bodysize;
keylen = SCTP_SECRET_SIZE;
@@ -1585,7 +1586,8 @@ struct sctp_association *sctp_unpack_cookie(

/* Check the signature. */
keylen = SCTP_SECRET_SIZE;
- sg.page = virt_to_page(bear_cookie);
+ sg_init_table(&sg, 1);
+ sg_set_page(&sg, virt_to_page(bear_cookie));
sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE;
sg.length = bodysize;
key = (char *)ep->secret_key[ep->current_key];
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index bfb6a29..32be431 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -197,9 +197,9 @@ encryptor(struct scatterlist *sg, void *data)
int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
in_page = desc->pages[i];
} else {
- in_page = sg->page;
+ in_page = sg_page(sg);
}
- desc->infrags[desc->fragno].page = in_page;
+ sg_set_page(&desc->infrags[desc->fragno], in_page);
desc->fragno++;
desc->fraglen += sg->length;
desc->pos += sg->length;
@@ -215,11 +215,11 @@ encryptor(struct scatterlist *sg, void *data)
if (ret)
return ret;
if (fraglen) {
- desc->outfrags[0].page = sg->page;
+ sg_set_page(&desc->outfrags[0], sg_page(sg));
desc->outfrags[0].offset = sg->offset + sg->length - fraglen;
desc->outfrags[0].length = fraglen;
desc->infrags[0] = desc->outfrags[0];
- desc->infrags[0].page = in_page;
+ sg_set_page(&desc->infrags[0], in_page);
desc->fragno = 1;
desc->fraglen = fraglen;
} else {
@@ -287,7 +287,7 @@ decryptor(struct scatterlist *sg, void *data)
if (ret)
return ret;
if (fraglen) {
- desc->frags[0].page = sg->page;
+ sg_set_page(&desc->frags[0], sg_page(sg));
desc->frags[0].offset = sg->offset + sg->length - fraglen;
desc->frags[0].length = fraglen;
desc->fragno = 1;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6a59180..3d1f7cd 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1059,7 +1059,7 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
do {
if (thislen > page_len)
thislen = page_len;
- sg->page = buf->pages[i];
+ sg_set_page(sg, buf->pages[i]);
sg->offset = page_offset;
sg->length = thislen;
ret = actor(sg, data);
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 5ced62c..fb2220a 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -552,7 +552,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
if (copy > len)
copy = len;

- sg.page = virt_to_page(skb->data + offset);
+ sg_set_page(&sg, virt_to_page(skb->data + offset));
sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
sg.length = copy;

@@ -577,7 +577,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
if (copy > len)
copy = len;

- sg.page = frag->page;
+ sg_set_page(&sg, frag->page);
sg.offset = frag->page_offset + offset-start;
sg.length = copy;

--
1.5.3.GIT

2007-10-22 18:14:45

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 10/10] Add CONFIG_DEBUG_SG sg validation

Add a Kconfig entry which will toggle some sanity checks on the sg
entry and tables.

Signed-off-by: Jens Axboe <[email protected]>
---
include/asm-alpha/scatterlist.h | 3 +++
include/asm-arm/scatterlist.h | 3 +++
include/asm-avr32/scatterlist.h | 3 +++
include/asm-blackfin/scatterlist.h | 3 +++
include/asm-cris/scatterlist.h | 3 +++
include/asm-frv/scatterlist.h | 3 +++
include/asm-h8300/scatterlist.h | 3 +++
include/asm-ia64/scatterlist.h | 3 +++
include/asm-m32r/scatterlist.h | 3 +++
include/asm-m68k/scatterlist.h | 3 +++
include/asm-m68knommu/scatterlist.h | 3 +++
include/asm-mips/scatterlist.h | 3 +++
include/asm-parisc/scatterlist.h | 3 +++
include/asm-powerpc/scatterlist.h | 3 +++
include/asm-s390/scatterlist.h | 3 +++
include/asm-sh/scatterlist.h | 3 +++
include/asm-sh64/scatterlist.h | 3 +++
include/asm-sparc/scatterlist.h | 3 +++
include/asm-sparc64/scatterlist.h | 3 +++
include/asm-v850/scatterlist.h | 3 +++
include/asm-x86/scatterlist_32.h | 3 +++
include/asm-x86/scatterlist_64.h | 3 +++
include/asm-xtensa/scatterlist.h | 3 +++
include/linux/scatterlist.h | 22 ++++++++++++++++++++++
lib/Kconfig.debug | 10 ++++++++++
25 files changed, 101 insertions(+), 0 deletions(-)

diff --git a/include/asm-alpha/scatterlist.h b/include/asm-alpha/scatterlist.h
index b764706..440747c 100644
--- a/include/asm-alpha/scatterlist.h
+++ b/include/asm-alpha/scatterlist.h
@@ -5,6 +5,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;

diff --git a/include/asm-arm/scatterlist.h b/include/asm-arm/scatterlist.h
index ab1d85d..ca0a37d 100644
--- a/include/asm-arm/scatterlist.h
+++ b/include/asm-arm/scatterlist.h
@@ -5,6 +5,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset; /* buffer offset */
dma_addr_t dma_address; /* dma address */
diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h
index 1356f29..377320e 100644
--- a/include/asm-avr32/scatterlist.h
+++ b/include/asm-avr32/scatterlist.h
@@ -4,6 +4,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
diff --git a/include/asm-blackfin/scatterlist.h b/include/asm-blackfin/scatterlist.h
index 384af54..32128d5 100644
--- a/include/asm-blackfin/scatterlist.h
+++ b/include/asm-blackfin/scatterlist.h
@@ -4,6 +4,9 @@
#include <linux/mm.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
diff --git a/include/asm-cris/scatterlist.h b/include/asm-cris/scatterlist.h
index 5a8a834..faff53a 100644
--- a/include/asm-cris/scatterlist.h
+++ b/include/asm-cris/scatterlist.h
@@ -2,6 +2,9 @@
#define __ASM_CRIS_SCATTERLIST_H

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
char * address; /* Location data is to be transferred to */
unsigned int length;

diff --git a/include/asm-frv/scatterlist.h b/include/asm-frv/scatterlist.h
index 53dade7..f7da007 100644
--- a/include/asm-frv/scatterlist.h
+++ b/include/asm-frv/scatterlist.h
@@ -22,6 +22,9 @@
* and that's it. There's no excuse for not highmem enabling YOUR driver. /jens
*/
struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset; /* for highmem, page offset */

diff --git a/include/asm-h8300/scatterlist.h b/include/asm-h8300/scatterlist.h
index 7e41983..d3ecdd8 100644
--- a/include/asm-h8300/scatterlist.h
+++ b/include/asm-h8300/scatterlist.h
@@ -4,6 +4,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
diff --git a/include/asm-ia64/scatterlist.h b/include/asm-ia64/scatterlist.h
index 2f76ce3..d6f5787 100644
--- a/include/asm-ia64/scatterlist.h
+++ b/include/asm-ia64/scatterlist.h
@@ -9,6 +9,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
unsigned int length; /* buffer length */
diff --git a/include/asm-m32r/scatterlist.h b/include/asm-m32r/scatterlist.h
index 33b4b4d..1ed372c 100644
--- a/include/asm-m32r/scatterlist.h
+++ b/include/asm-m32r/scatterlist.h
@@ -4,6 +4,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
char * address; /* Location data is to be transferred to, NULL for
* highmem page */
unsigned long page_link;
diff --git a/include/asm-m68k/scatterlist.h b/include/asm-m68k/scatterlist.h
index e06bb89..d3a7a0e 100644
--- a/include/asm-m68k/scatterlist.h
+++ b/include/asm-m68k/scatterlist.h
@@ -4,6 +4,9 @@
#include <linux/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
unsigned int length;
diff --git a/include/asm-m68knommu/scatterlist.h b/include/asm-m68knommu/scatterlist.h
index 28bed41..1094284 100644
--- a/include/asm-m68knommu/scatterlist.h
+++ b/include/asm-m68knommu/scatterlist.h
@@ -5,6 +5,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
diff --git a/include/asm-mips/scatterlist.h b/include/asm-mips/scatterlist.h
index 787797c..83d69fe 100644
--- a/include/asm-mips/scatterlist.h
+++ b/include/asm-mips/scatterlist.h
@@ -4,6 +4,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
diff --git a/include/asm-parisc/scatterlist.h b/include/asm-parisc/scatterlist.h
index 26da914..cd3cfdf 100644
--- a/include/asm-parisc/scatterlist.h
+++ b/include/asm-parisc/scatterlist.h
@@ -5,6 +5,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;

diff --git a/include/asm-powerpc/scatterlist.h b/include/asm-powerpc/scatterlist.h
index b9f1dbc..fcf7d55 100644
--- a/include/asm-powerpc/scatterlist.h
+++ b/include/asm-powerpc/scatterlist.h
@@ -14,6 +14,9 @@
#include <asm/dma.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
unsigned int length;
diff --git a/include/asm-s390/scatterlist.h b/include/asm-s390/scatterlist.h
index eb39486..29ec8e2 100644
--- a/include/asm-s390/scatterlist.h
+++ b/include/asm-s390/scatterlist.h
@@ -2,6 +2,9 @@
#define _ASMS390_SCATTERLIST_H

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
unsigned int length;
diff --git a/include/asm-sh/scatterlist.h b/include/asm-sh/scatterlist.h
index bc7c809..a7d0d18 100644
--- a/include/asm-sh/scatterlist.h
+++ b/include/asm-sh/scatterlist.h
@@ -4,6 +4,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;/* for highmem, page offset */
dma_addr_t dma_address;
diff --git a/include/asm-sh64/scatterlist.h b/include/asm-sh64/scatterlist.h
index 0afd856..5109251 100644
--- a/include/asm-sh64/scatterlist.h
+++ b/include/asm-sh64/scatterlist.h
@@ -14,6 +14,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;/* for highmem, page offset */
dma_addr_t dma_address;
diff --git a/include/asm-sparc/scatterlist.h b/include/asm-sparc/scatterlist.h
index 45b16f1..e08d3d7 100644
--- a/include/asm-sparc/scatterlist.h
+++ b/include/asm-sparc/scatterlist.h
@@ -5,6 +5,9 @@
#include <linux/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;

diff --git a/include/asm-sparc64/scatterlist.h b/include/asm-sparc64/scatterlist.h
index 4cbaf7c..6df23f0 100644
--- a/include/asm-sparc64/scatterlist.h
+++ b/include/asm-sparc64/scatterlist.h
@@ -6,6 +6,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;

diff --git a/include/asm-v850/scatterlist.h b/include/asm-v850/scatterlist.h
index db91feb..02d27b3 100644
--- a/include/asm-v850/scatterlist.h
+++ b/include/asm-v850/scatterlist.h
@@ -17,6 +17,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned offset;
dma_addr_t dma_address;
diff --git a/include/asm-x86/scatterlist_32.h b/include/asm-x86/scatterlist_32.h
index 140a5b3..0e7d997 100644
--- a/include/asm-x86/scatterlist_32.h
+++ b/include/asm-x86/scatterlist_32.h
@@ -4,6 +4,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
diff --git a/include/asm-x86/scatterlist_64.h b/include/asm-x86/scatterlist_64.h
index e344784..1847c72 100644
--- a/include/asm-x86/scatterlist_64.h
+++ b/include/asm-x86/scatterlist_64.h
@@ -4,6 +4,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
unsigned int length;
diff --git a/include/asm-xtensa/scatterlist.h b/include/asm-xtensa/scatterlist.h
index 3b8aba5..810080b 100644
--- a/include/asm-xtensa/scatterlist.h
+++ b/include/asm-xtensa/scatterlist.h
@@ -14,6 +14,9 @@
#include <asm/types.h>

struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index c6136e8..42daf5e 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -23,6 +23,8 @@
*
*/

+#define SG_MAGIC 0x87654321
+
/**
* sg_set_page - Set sg entry to point at given page
* @sg: SG entry
@@ -39,6 +41,9 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page)
{
unsigned long page_link = sg->page_link & 0x3;

+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg->sg_magic != SG_MAGIC);
+#endif
sg->page_link = page_link | (unsigned long) page;
}

@@ -81,6 +86,9 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
**/
static inline struct scatterlist *sg_next(struct scatterlist *sg)
{
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg->sg_magic != SG_MAGIC);
+#endif
if (sg_is_last(sg))
return NULL;

@@ -124,6 +132,10 @@ static inline struct scatterlist *sg_last(struct scatterlist *sgl,
ret = sg;

#endif
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sgl[0].sg_magic != SG_MAGIC);
+ BUG_ON(!sg_is_last(ret));
+#endif
return ret;
}

@@ -180,6 +192,9 @@ static inline void sg_init_one(struct scatterlist *sg, const void *buf,
unsigned int buflen)
{
memset(sg, 0, sizeof(*sg));
+#ifdef CONFIG_DEBUG_SG
+ sg->sg_magic = SG_MAGIC;
+#endif
sg_mark_end(sg, 1);
sg_set_buf(sg, buf, buflen);
}
@@ -198,6 +213,13 @@ static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
{
memset(sgl, 0, sizeof(*sgl) * nents);
sg_mark_end(sgl, nents);
+#ifdef CONFIG_DEBUG_SG
+ {
+ int i;
+ for (i = 0; i < nents; i++)
+ sgl[i].sg_magic = SG_MAGIC;
+ }
+#endif
}

/**
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c567f21..1faa508 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -389,6 +389,16 @@ config DEBUG_LIST

If unsure, say N.

+config DEBUG_SG
+ bool "Debug SG table operations"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to turn on checks on scatter-gather tables. This can
+ help find problems with drivers that do not properly initialize
+ their sg tables.
+
+ If unsure, say N.
+
config FRAME_POINTER
bool "Compile the kernel with frame pointers"
depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32 || SUPERH || BFIN)
--
1.5.3.GIT

2007-10-22 18:15:04

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 09/10] Change table chaining layout

Change the page member of the scatterlist structure to be an unsigned
long, and encode more stuff in the lower bits:

- Bits 0 and 1 zero: this is a normal sg entry. Next sg entry is located
at sg + 1.
- Bit 0 set: this is a chain entry, the next real entry is at ->page_link
with the two low bits masked off.
- Bit 1 set: this is the final entry in the sg entry. sg_next() will return
NULL when passed such an entry.

It's thus important that sg table users use the proper accessors to get
and set the page member.

Signed-off-by: Jens Axboe <[email protected]>
---
include/asm-alpha/scatterlist.h | 2 +-
include/asm-arm/scatterlist.h | 2 +-
include/asm-avr32/scatterlist.h | 2 +-
include/asm-blackfin/scatterlist.h | 2 +-
include/asm-cris/scatterlist.h | 2 +-
include/asm-frv/scatterlist.h | 2 +-
include/asm-h8300/scatterlist.h | 2 +-
include/asm-ia64/scatterlist.h | 2 +-
include/asm-m32r/scatterlist.h | 2 +-
include/asm-m68k/scatterlist.h | 2 +-
include/asm-m68knommu/scatterlist.h | 2 +-
include/asm-mips/scatterlist.h | 2 +-
include/asm-parisc/scatterlist.h | 2 +-
include/asm-powerpc/scatterlist.h | 2 +-
include/asm-s390/scatterlist.h | 2 +-
include/asm-sh/scatterlist.h | 2 +-
include/asm-sh64/scatterlist.h | 2 +-
include/asm-sparc/scatterlist.h | 2 +-
include/asm-sparc64/scatterlist.h | 2 +-
include/asm-v850/scatterlist.h | 2 +-
include/asm-x86/dma-mapping_32.h | 4 +-
include/asm-x86/scatterlist_32.h | 2 +-
include/asm-x86/scatterlist_64.h | 2 +-
include/asm-xtensa/scatterlist.h | 2 +-
include/linux/scatterlist.h | 78 ++++++++++++++++++++++++-----------
25 files changed, 79 insertions(+), 49 deletions(-)

diff --git a/include/asm-alpha/scatterlist.h b/include/asm-alpha/scatterlist.h
index 9173654..b764706 100644
--- a/include/asm-alpha/scatterlist.h
+++ b/include/asm-alpha/scatterlist.h
@@ -5,7 +5,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;

unsigned int length;
diff --git a/include/asm-arm/scatterlist.h b/include/asm-arm/scatterlist.h
index de2f65e..ab1d85d 100644
--- a/include/asm-arm/scatterlist.h
+++ b/include/asm-arm/scatterlist.h
@@ -5,7 +5,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page; /* buffer page */
+ unsigned long page_link;
unsigned int offset; /* buffer offset */
dma_addr_t dma_address; /* dma address */
unsigned int length; /* length */
diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h
index c6d5ce3..1356f29 100644
--- a/include/asm-avr32/scatterlist.h
+++ b/include/asm-avr32/scatterlist.h
@@ -4,7 +4,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/asm-blackfin/scatterlist.h b/include/asm-blackfin/scatterlist.h
index 60e07b9..384af54 100644
--- a/include/asm-blackfin/scatterlist.h
+++ b/include/asm-blackfin/scatterlist.h
@@ -4,7 +4,7 @@
#include <linux/mm.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/asm-cris/scatterlist.h b/include/asm-cris/scatterlist.h
index 4bdc44c..5a8a834 100644
--- a/include/asm-cris/scatterlist.h
+++ b/include/asm-cris/scatterlist.h
@@ -6,7 +6,7 @@ struct scatterlist {
unsigned int length;

/* The following is i386 highmem junk - not used by us */
- struct page * page; /* Location for highmem page, if any */
+ unsigned long page_link;
unsigned int offset;/* for highmem, page offset */

};
diff --git a/include/asm-frv/scatterlist.h b/include/asm-frv/scatterlist.h
index 8e827fa..53dade7 100644
--- a/include/asm-frv/scatterlist.h
+++ b/include/asm-frv/scatterlist.h
@@ -22,7 +22,7 @@
* and that's it. There's no excuse for not highmem enabling YOUR driver. /jens
*/
struct scatterlist {
- struct page *page; /* Location for highmem page, if any */
+ unsigned long page_link;
unsigned int offset; /* for highmem, page offset */

dma_addr_t dma_address;
diff --git a/include/asm-h8300/scatterlist.h b/include/asm-h8300/scatterlist.h
index 985fdf5..7e41983 100644
--- a/include/asm-h8300/scatterlist.h
+++ b/include/asm-h8300/scatterlist.h
@@ -4,7 +4,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/asm-ia64/scatterlist.h b/include/asm-ia64/scatterlist.h
index 7d5234d..2f76ce3 100644
--- a/include/asm-ia64/scatterlist.h
+++ b/include/asm-ia64/scatterlist.h
@@ -9,7 +9,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
unsigned int length; /* buffer length */

diff --git a/include/asm-m32r/scatterlist.h b/include/asm-m32r/scatterlist.h
index 352415f..33b4b4d 100644
--- a/include/asm-m32r/scatterlist.h
+++ b/include/asm-m32r/scatterlist.h
@@ -6,7 +6,7 @@
struct scatterlist {
char * address; /* Location data is to be transferred to, NULL for
* highmem page */
- struct page * page; /* Location for highmem page, if any */
+ unsigned long page_link;
unsigned int offset;/* for highmem, page offset */

dma_addr_t dma_address;
diff --git a/include/asm-m68k/scatterlist.h b/include/asm-m68k/scatterlist.h
index 24887a2..e06bb89 100644
--- a/include/asm-m68k/scatterlist.h
+++ b/include/asm-m68k/scatterlist.h
@@ -4,7 +4,7 @@
#include <linux/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
unsigned int length;

diff --git a/include/asm-m68knommu/scatterlist.h b/include/asm-m68knommu/scatterlist.h
index 4da79d3..28bed41 100644
--- a/include/asm-m68knommu/scatterlist.h
+++ b/include/asm-m68knommu/scatterlist.h
@@ -5,7 +5,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/asm-mips/scatterlist.h b/include/asm-mips/scatterlist.h
index 7af104c..787797c 100644
--- a/include/asm-mips/scatterlist.h
+++ b/include/asm-mips/scatterlist.h
@@ -4,7 +4,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page * page;
+ unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/asm-parisc/scatterlist.h b/include/asm-parisc/scatterlist.h
index e7211c7..26da914 100644
--- a/include/asm-parisc/scatterlist.h
+++ b/include/asm-parisc/scatterlist.h
@@ -5,7 +5,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;

unsigned int length;
diff --git a/include/asm-powerpc/scatterlist.h b/include/asm-powerpc/scatterlist.h
index b075f61..b9f1dbc 100644
--- a/include/asm-powerpc/scatterlist.h
+++ b/include/asm-powerpc/scatterlist.h
@@ -14,7 +14,7 @@
#include <asm/dma.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
unsigned int length;

diff --git a/include/asm-s390/scatterlist.h b/include/asm-s390/scatterlist.h
index a43b3af..eb39486 100644
--- a/include/asm-s390/scatterlist.h
+++ b/include/asm-s390/scatterlist.h
@@ -2,7 +2,7 @@
#define _ASMS390_SCATTERLIST_H

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
unsigned int length;
};
diff --git a/include/asm-sh/scatterlist.h b/include/asm-sh/scatterlist.h
index b9ae53c..bc7c809 100644
--- a/include/asm-sh/scatterlist.h
+++ b/include/asm-sh/scatterlist.h
@@ -4,7 +4,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page * page; /* Location for highmem page, if any */
+ unsigned long page_link;
unsigned int offset;/* for highmem, page offset */
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/asm-sh64/scatterlist.h b/include/asm-sh64/scatterlist.h
index 1c723f2..0afd856 100644
--- a/include/asm-sh64/scatterlist.h
+++ b/include/asm-sh64/scatterlist.h
@@ -14,7 +14,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page * page; /* Location for highmem page, if any */
+ unsigned long page_link;
unsigned int offset;/* for highmem, page offset */
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/asm-sparc/scatterlist.h b/include/asm-sparc/scatterlist.h
index 4055af9..45b16f1 100644
--- a/include/asm-sparc/scatterlist.h
+++ b/include/asm-sparc/scatterlist.h
@@ -5,7 +5,7 @@
#include <linux/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;

unsigned int length;
diff --git a/include/asm-sparc64/scatterlist.h b/include/asm-sparc64/scatterlist.h
index 703c5bb..4cbaf7c 100644
--- a/include/asm-sparc64/scatterlist.h
+++ b/include/asm-sparc64/scatterlist.h
@@ -6,7 +6,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;

unsigned int length;
diff --git a/include/asm-v850/scatterlist.h b/include/asm-v850/scatterlist.h
index 56f4029..db91feb 100644
--- a/include/asm-v850/scatterlist.h
+++ b/include/asm-v850/scatterlist.h
@@ -17,7 +17,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned offset;
dma_addr_t dma_address;
unsigned length;
diff --git a/include/asm-x86/dma-mapping_32.h b/include/asm-x86/dma-mapping_32.h
index 6a2d26c..55f01bd 100644
--- a/include/asm-x86/dma-mapping_32.h
+++ b/include/asm-x86/dma-mapping_32.h
@@ -45,9 +45,9 @@ dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
WARN_ON(nents == 0 || sglist[0].length == 0);

for_each_sg(sglist, sg, nents, i) {
- BUG_ON(!sg->page);
+ BUG_ON(!sg_page(sg));

- sg->dma_address = page_to_phys(sg->page) + sg->offset;
+ sg->dma_address = sg_phys(sg);
}

flush_write_buffers();
diff --git a/include/asm-x86/scatterlist_32.h b/include/asm-x86/scatterlist_32.h
index bd5164a..140a5b3 100644
--- a/include/asm-x86/scatterlist_32.h
+++ b/include/asm-x86/scatterlist_32.h
@@ -4,7 +4,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/asm-x86/scatterlist_64.h b/include/asm-x86/scatterlist_64.h
index ef3986b..e344784 100644
--- a/include/asm-x86/scatterlist_64.h
+++ b/include/asm-x86/scatterlist_64.h
@@ -4,7 +4,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
unsigned int length;
dma_addr_t dma_address;
diff --git a/include/asm-xtensa/scatterlist.h b/include/asm-xtensa/scatterlist.h
index ca337a2..3b8aba5 100644
--- a/include/asm-xtensa/scatterlist.h
+++ b/include/asm-xtensa/scatterlist.h
@@ -14,7 +14,7 @@
#include <asm/types.h>

struct scatterlist {
- struct page *page;
+ unsigned long page_link;
unsigned int offset;
dma_addr_t dma_address;
unsigned int length;
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 1645795..c6136e8 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -2,9 +2,26 @@
#define _LINUX_SCATTERLIST_H

#include <asm/scatterlist.h>
-#include <asm/io.h>
#include <linux/mm.h>
#include <linux/string.h>
+#include <asm/io.h>
+
+/*
+ * Notes on SG table design.
+ *
+ * Architectures must provide an unsigned long page_link field in the
+ * scatterlist struct. We use that to place the page pointer AND encode
+ * information about the sg table as well. The two lower bits are reserved
+ * for this information.
+ *
+ * If bit 0 is set, then the page_link contains a pointer to the next sg
+ * table list. Otherwise the next entry is at sg + 1.
+ *
+ * If bit 1 is set, then this sg entry is the last element in a list.
+ *
+ * See sg_next().
+ *
+ */

/**
* sg_set_page - Set sg entry to point at given page
@@ -20,11 +37,20 @@
**/
static inline void sg_set_page(struct scatterlist *sg, struct page *page)
{
- sg->page = page;
+ unsigned long page_link = sg->page_link & 0x3;
+
+ sg->page_link = page_link | (unsigned long) page;
}

-#define sg_page(sg) ((sg)->page)
+#define sg_page(sg) ((struct page *) ((sg)->page_link & ~0x3))

+/**
+ * sg_set_buf - Set sg entry to point at given data
+ * @sg: SG entry
+ * @buf: Data
+ * @buflen: Data length
+ *
+ **/
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
unsigned int buflen)
{
@@ -38,26 +64,27 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
* a valid sg entry, or whether it points to the start of a new scatterlist.
* Those low bits are there for everyone! (thanks mason :-)
*/
-#define sg_is_chain(sg) ((unsigned long) (sg)->page & 0x01)
+#define sg_is_chain(sg) ((sg)->page_link & 0x01)
+#define sg_is_last(sg) ((sg)->page_link & 0x02)
#define sg_chain_ptr(sg) \
- ((struct scatterlist *) ((unsigned long) (sg)->page & ~0x01))
+ ((struct scatterlist *) ((sg)->page_link & ~0x03))

/**
* sg_next - return the next scatterlist entry in a list
* @sg: The current sg entry
*
- * Usually the next entry will be @sg@ + 1, but if this sg element is part
- * of a chained scatterlist, it could jump to the start of a new
- * scatterlist array.
- *
- * Note that the caller must ensure that there are further entries after
- * the current entry, this function will NOT return NULL for an end-of-list.
+ * Description:
+ * Usually the next entry will be @sg@ + 1, but if this sg element is part
+ * of a chained scatterlist, it could jump to the start of a new
+ * scatterlist array.
*
- */
+ **/
static inline struct scatterlist *sg_next(struct scatterlist *sg)
{
- sg++;
+ if (sg_is_last(sg))
+ return NULL;

+ sg++;
if (unlikely(sg_is_chain(sg)))
sg = sg_chain_ptr(sg);

@@ -75,14 +102,15 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg)
* @sgl: First entry in the scatterlist
* @nents: Number of entries in the scatterlist
*
- * Should only be used casually, it (currently) scan the entire list
- * to get the last entry.
+ * Description:
+ * Should only be used casually, it (currently) scan the entire list
+ * to get the last entry.
*
- * Note that the @sgl@ pointer passed in need not be the first one,
- * the important bit is that @nents@ denotes the number of entries that
- * exist from @sgl@.
+ * Note that the @sgl@ pointer passed in need not be the first one,
+ * the important bit is that @nents@ denotes the number of entries that
+ * exist from @sgl@.
*
- */
+ **/
static inline struct scatterlist *sg_last(struct scatterlist *sgl,
unsigned int nents)
{
@@ -105,16 +133,17 @@ static inline struct scatterlist *sg_last(struct scatterlist *sgl,
* @prv_nents: Number of entries in prv
* @sgl: Second scatterlist
*
- * Links @prv@ and @sgl@ together, to form a longer scatterlist.
+ * Description:
+ * Links @prv@ and @sgl@ together, to form a longer scatterlist.
*
- */
+ **/
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
{
#ifndef ARCH_HAS_SG_CHAIN
BUG();
#endif
- prv[prv_nents - 1].page = (struct page *) ((unsigned long) sgl | 0x01);
+ prv[prv_nents - 1].page_link = (unsigned long) sgl | 0x01;
}

/**
@@ -128,13 +157,14 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
**/
static inline void sg_mark_end(struct scatterlist *sgl, unsigned int nents)
{
+ sgl[nents - 1].page_link = 0x02;
}

static inline void __sg_mark_end(struct scatterlist *sg)
{
+ sg->page_link |= 0x02;
}

-
/**
* sg_init_one - Initialize a single entry sg list
* @sg: SG entry
@@ -187,7 +217,7 @@ static inline unsigned long sg_phys(struct scatterlist *sg)

/**
* sg_virt - Return virtual address of an sg entry
- * @sg: SG entry
+ * @sg: SG entry
*
* Description:
* This calls page_address() on the page in this sg entry, and adds the
--
1.5.3.GIT

2007-10-22 18:15:29

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 08/10] [SG] Update arch/ to use sg helpers

Signed-off-by: Jens Axboe <[email protected]>
---
arch/alpha/kernel/pci_iommu.c | 2 +-
arch/arm/common/dmabounce.c | 2 +-
arch/blackfin/kernel/dma-mapping.c | 3 +--
arch/ia64/hp/common/sba_iommu.c | 2 +-
arch/ia64/hp/sim/simscsi.c | 4 ++--
arch/ia64/sn/pci/pci_dma.c | 2 +-
arch/m68k/kernel/dma.c | 2 +-
arch/mips/mm/dma-default.c | 16 +++++++---------
arch/powerpc/kernel/dma_64.c | 3 +--
arch/powerpc/kernel/ibmebus.c | 3 +--
arch/powerpc/kernel/iommu.c | 2 +-
arch/powerpc/platforms/ps3/system-bus.c | 5 ++---
arch/sparc/kernel/ioport.c | 17 ++++++++---------
arch/sparc/mm/io-unit.c | 2 +-
arch/sparc/mm/iommu.c | 8 ++++----
arch/sparc/mm/sun4c.c | 2 +-
arch/sparc64/kernel/iommu.c | 7 ++-----
arch/sparc64/kernel/iommu_common.c | 13 ++++++-------
arch/sparc64/kernel/ldc.c | 2 +-
arch/sparc64/kernel/pci_sun4v.c | 7 ++-----
arch/x86/kernel/pci-calgary_64.c | 10 ++++++----
arch/x86/kernel/pci-gart_64.c | 4 ++--
arch/x86/kernel/pci-nommu_64.c | 4 ++--
23 files changed, 55 insertions(+), 67 deletions(-)

diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index e1c4707..ee07dce 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -465,7 +465,7 @@ EXPORT_SYMBOL(pci_free_consistent);
Write dma_length of each leader with the combined lengths of
the mergable followers. */

-#define SG_ENT_VIRT_ADDRESS(SG) (page_address((SG)->page) + (SG)->offset)
+#define SG_ENT_VIRT_ADDRESS(SG) (sg_virt((SG)))
#define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG))

static void
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 44ab0da..9d371e4 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -442,7 +442,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
BUG_ON(dir == DMA_NONE);

for (i = 0; i < nents; i++, sg++) {
- struct page *page = sg->page;
+ struct page *page = sg_page(sg);
unsigned int offset = sg->offset;
unsigned int length = sg->length;
void *ptr = page_address(page) + offset;
diff --git a/arch/blackfin/kernel/dma-mapping.c b/arch/blackfin/kernel/dma-mapping.c
index 94d7b11..a16cb03 100644
--- a/arch/blackfin/kernel/dma-mapping.c
+++ b/arch/blackfin/kernel/dma-mapping.c
@@ -160,8 +160,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
BUG_ON(direction == DMA_NONE);

for (i = 0; i < nents; i++, sg++) {
- sg->dma_address = (dma_addr_t)(page_address(sg->page) +
- sg->offset);
+ sg->dma_address = (dma_addr_t) sg_virt(sg);

invalidate_dcache_range(sg_dma_address(sg),
sg_dma_address(sg) +
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 3c95f41..bc859a3 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -246,7 +246,7 @@ static int reserve_sba_gart = 1;
static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t);
static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t);

-#define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset)
+#define sba_sg_address(sg) sg_virt((sg))

#ifdef FULL_VALID_PDIR
static u64 prefetch_spill_page;
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index a3a558a..6ef9b52 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -131,7 +131,7 @@ simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset)
stat.fd = desc[sc->device->id];

scsi_for_each_sg(sc, sl, scsi_sg_count(sc), i) {
- req.addr = __pa(page_address(sl->page) + sl->offset);
+ req.addr = __pa(sg_virt(sl));
req.len = sl->length;
if (DBG)
printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n",
@@ -212,7 +212,7 @@ static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len)
if (!len)
break;
thislen = min(len, slp->length);
- memcpy(page_address(slp->page) + slp->offset, buf, thislen);
+ memcpy(sg_virt(slp), buf, thislen);
len -= thislen;
}
}
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index ecd8a52..511db2f 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -16,7 +16,7 @@
#include <asm/sn/pcidev.h>
#include <asm/sn/sn_sal.h>

-#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
+#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))

/**
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 9d4e4b5..ef490e1 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -121,7 +121,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
int i;

for (i = 0; i < nents; sg++, i++) {
- sg->dma_address = page_to_phys(sg->page) + sg->offset;
+ sg->dma_address = sg_phys(sg);
dma_sync_single_for_device(dev, sg->dma_address, sg->length, dir);
}
return nents;
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index 98b5e5b..b0b034c 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -165,12 +165,11 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
for (i = 0; i < nents; i++, sg++) {
unsigned long addr;

- addr = (unsigned long) page_address(sg->page);
+ addr = (unsigned long) sg_virt(sg);
if (!plat_device_is_coherent(dev) && addr)
- __dma_sync(addr + sg->offset, sg->length, direction);
+ __dma_sync(addr, sg->length, direction);
sg->dma_address = plat_map_dma_mem(dev,
- (void *)(addr + sg->offset),
- sg->length);
+ (void *)addr, sg->length);
}

return nents;
@@ -223,10 +222,9 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
for (i = 0; i < nhwentries; i++, sg++) {
if (!plat_device_is_coherent(dev) &&
direction != DMA_TO_DEVICE) {
- addr = (unsigned long) page_address(sg->page);
+ addr = (unsigned long) sg_virt(sg);
if (addr)
- __dma_sync(addr + sg->offset, sg->length,
- direction);
+ __dma_sync(addr, sg->length, direction);
}
plat_unmap_dma_mem(sg->dma_address);
}
@@ -304,7 +302,7 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
/* Make sure that gcc doesn't leave the empty loop body. */
for (i = 0; i < nelems; i++, sg++) {
if (cpu_is_noncoherent_r10000(dev))
- __dma_sync((unsigned long)page_address(sg->page),
+ __dma_sync((unsigned long)page_address(sg_page(sg)),
sg->length, direction);
plat_unmap_dma_mem(sg->dma_address);
}
@@ -322,7 +320,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele
/* Make sure that gcc doesn't leave the empty loop body. */
for (i = 0; i < nelems; i++, sg++) {
if (!plat_device_is_coherent(dev))
- __dma_sync((unsigned long)page_address(sg->page),
+ __dma_sync((unsigned long)page_address(sg_page(sg)),
sg->length, direction);
plat_unmap_dma_mem(sg->dma_address);
}
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
index 9001104..14206e3 100644
--- a/arch/powerpc/kernel/dma_64.c
+++ b/arch/powerpc/kernel/dma_64.c
@@ -161,8 +161,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
int i;

for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = (page_to_phys(sg->page) + sg->offset) |
- dma_direct_offset;
+ sg->dma_address = sg_phys(sg) | dma_direct_offset;
sg->dma_length = sg->length;
}

diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 289d7e9..72fd871 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -102,8 +102,7 @@ static int ibmebus_map_sg(struct device *dev,
int i;

for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = (dma_addr_t)page_address(sg->page)
- + sg->offset;
+ sg->dma_address = (dma_addr_t) sg_virt(sg);
sg->dma_length = sg->length;
}

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 306a6f7..2d0c9ef 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -307,7 +307,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
continue;
}
/* Allocate iommu entries for that segment */
- vaddr = (unsigned long)page_address(s->page) + s->offset;
+ vaddr = (unsigned long) sg_virt(s);
npages = iommu_num_pages(vaddr, slen);
entry = iommu_range_alloc(tbl, npages, &handle, mask >> IOMMU_PAGE_SHIFT, 0);

diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 07e64b4..6405f4a 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -628,9 +628,8 @@ static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl,
int i;

for_each_sg(sgl, sg, nents, i) {
- int result = ps3_dma_map(dev->d_region,
- page_to_phys(sg->page) + sg->offset, sg->length,
- &sg->dma_address, 0);
+ int result = ps3_dma_map(dev->d_region, sg_phys(sg),
+ sg->length, &sg->dma_address, 0);

if (result) {
pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 9c3ed88..97aa50d 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -727,9 +727,8 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
BUG_ON(direction == PCI_DMA_NONE);
/* IIep is write-through, not flushing. */
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg->page) == NULL);
- sg->dvma_address =
- virt_to_phys(page_address(sg->page)) + sg->offset;
+ BUG_ON(page_address(sg_page(sg)) == NULL);
+ sg->dvma_address = virt_to_phys(sg_virt(sg));
sg->dvma_length = sg->length;
}
return nents;
@@ -748,9 +747,9 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg->page) == NULL);
+ BUG_ON(page_address(sg_page(sg)) == NULL);
mmu_inval_dma_area(
- (unsigned long) page_address(sg->page),
+ (unsigned long) page_address(sg_page(sg)),
(sg->length + PAGE_SIZE-1) & PAGE_MASK);
}
}
@@ -798,9 +797,9 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sgl, int
BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg->page) == NULL);
+ BUG_ON(page_address(sg_page(sg)) == NULL);
mmu_inval_dma_area(
- (unsigned long) page_address(sg->page),
+ (unsigned long) page_address(sg_page(sg)),
(sg->length + PAGE_SIZE-1) & PAGE_MASK);
}
}
@@ -814,9 +813,9 @@ void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sgl,
BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg->page) == NULL);
+ BUG_ON(page_address(sg_page(sg)) == NULL);
mmu_inval_dma_area(
- (unsigned long) page_address(sg->page),
+ (unsigned long) page_address(sg_page(sg)),
(sg->length + PAGE_SIZE-1) & PAGE_MASK);
}
}
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index 375b4db..1666087 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -144,7 +144,7 @@ static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus
spin_lock_irqsave(&iounit->lock, flags);
while (sz != 0) {
--sz;
- sg->dvma_address = iounit_get_area(iounit, (unsigned long)page_address(sg->page) + sg->offset, sg->length);
+ sg->dvma_address = iounit_get_area(iounit, sg_virt(sg), sg->length);
sg->dvma_length = sg->length;
sg = sg_next(sg);
}
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 283656d..4b93427 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -238,7 +238,7 @@ static void iommu_get_scsi_sgl_noflush(struct scatterlist *sg, int sz, struct sb
while (sz != 0) {
--sz;
n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
- sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
+ sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset;
sg->dvma_length = (__u32) sg->length;
sg = sg_next(sg);
}
@@ -252,7 +252,7 @@ static void iommu_get_scsi_sgl_gflush(struct scatterlist *sg, int sz, struct sbu
while (sz != 0) {
--sz;
n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
- sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
+ sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset;
sg->dvma_length = (__u32) sg->length;
sg = sg_next(sg);
}
@@ -273,7 +273,7 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu
* XXX Is this a good assumption?
* XXX What if someone else unmaps it here and races us?
*/
- if ((page = (unsigned long) page_address(sg->page)) != 0) {
+ if ((page = (unsigned long) page_address(sg_page(sg))) != 0) {
for (i = 0; i < n; i++) {
if (page != oldpage) { /* Already flushed? */
flush_page_for_dma(page);
@@ -283,7 +283,7 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu
}
}

- sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
+ sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset;
sg->dvma_length = (__u32) sg->length;
sg = sg_next(sg);
}
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index ee6708f..a2cc141 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1228,7 +1228,7 @@ static void sun4c_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *
{
while (sz != 0) {
--sz;
- sg->dvma_address = (__u32)sun4c_lockarea(page_address(sg->page) + sg->offset, sg->length);
+ sg->dvma_address = (__u32)sun4c_lockarea(sg_virt(sg), sg->length);
sg->dvma_length = sg->length;
sg = sg_next(sg);
}
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index 29af777..070a484 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -472,8 +472,7 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
spin_unlock_irqrestore(&iommu->lock, flags);
}

-#define SG_ENT_PHYS_ADDRESS(SG) \
- (__pa(page_address((SG)->page)) + (SG)->offset)
+#define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG))))

static void fill_sg(iopte_t *iopte, struct scatterlist *sg,
int nused, int nelems,
@@ -565,9 +564,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
/* Fast path single entry scatterlists. */
if (nelems == 1) {
sglist->dma_address =
- dma_4u_map_single(dev,
- (page_address(sglist->page) +
- sglist->offset),
+ dma_4u_map_single(dev, sg_virt(sglist),
sglist->length, direction);
if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
return 0;
diff --git a/arch/sparc64/kernel/iommu_common.c b/arch/sparc64/kernel/iommu_common.c
index d7ca900..78e8277 100644
--- a/arch/sparc64/kernel/iommu_common.c
+++ b/arch/sparc64/kernel/iommu_common.c
@@ -73,7 +73,7 @@ static int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg,

daddr = dma_sg->dma_address;
sglen = sg->length;
- sgaddr = (unsigned long) (page_address(sg->page) + sg->offset);
+ sgaddr = (unsigned long) sg_virt(sg);
while (dlen > 0) {
unsigned long paddr;

@@ -123,7 +123,7 @@ static int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg,
sg = sg_next(sg);
if (--nents <= 0)
break;
- sgaddr = (unsigned long) (page_address(sg->page) + sg->offset);
+ sgaddr = (unsigned long) sg_virt(sg);
sglen = sg->length;
}
if (dlen < 0) {
@@ -191,7 +191,7 @@ void verify_sglist(struct scatterlist *sglist, int nents, iopte_t *iopte, int np
printk("sg(%d): page_addr(%p) off(%x) length(%x) "
"dma_address[%016x] dma_length[%016x]\n",
i,
- page_address(sg->page), sg->offset,
+ page_address(sg_page(sg)), sg->offset,
sg->length,
sg->dma_address, sg->dma_length);
}
@@ -207,15 +207,14 @@ unsigned long prepare_sg(struct scatterlist *sg, int nents)
unsigned long prev;
u32 dent_addr, dent_len;

- prev = (unsigned long) (page_address(sg->page) + sg->offset);
+ prev = (unsigned long) sg_virt(sg);
prev += (unsigned long) (dent_len = sg->length);
- dent_addr = (u32) ((unsigned long)(page_address(sg->page) + sg->offset)
- & (IO_PAGE_SIZE - 1UL));
+ dent_addr = (u32) ((unsigned long)(sg_virt(sg)) & (IO_PAGE_SIZE - 1UL));
while (--nents) {
unsigned long addr;

sg = sg_next(sg);
- addr = (unsigned long) (page_address(sg->page) + sg->offset);
+ addr = (unsigned long) sg_virt(sg);
if (! VCONTIG(prev, addr)) {
dma_sg->dma_address = dent_addr;
dma_sg->dma_length = dent_len;
diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c
index 85a2be0..c8313cb 100644
--- a/arch/sparc64/kernel/ldc.c
+++ b/arch/sparc64/kernel/ldc.c
@@ -2057,7 +2057,7 @@ static void fill_cookies(struct cookie_state *sp, unsigned long pa,

static int sg_count_one(struct scatterlist *sg)
{
- unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT;
+ unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
long len = sg->length;

if ((sg->offset | len) & (8UL - 1))
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index fe46ace..8c4875b 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -365,8 +365,7 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
spin_unlock_irqrestore(&iommu->lock, flags);
}

-#define SG_ENT_PHYS_ADDRESS(SG) \
- (__pa(page_address((SG)->page)) + (SG)->offset)
+#define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG))))

static long fill_sg(long entry, struct device *dev,
struct scatterlist *sg,
@@ -477,9 +476,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
/* Fast path single entry scatterlists. */
if (nelems == 1) {
sglist->dma_address =
- dma_4v_map_single(dev,
- (page_address(sglist->page) +
- sglist->offset),
+ dma_4v_map_single(dev, sg_virt(sglist),
sglist->length, direction);
if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
return 0;
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 5098f58..1a20fe3 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -411,8 +411,10 @@ static int calgary_nontranslate_map_sg(struct device* dev,
int i;

for_each_sg(sg, s, nelems, i) {
- BUG_ON(!s->page);
- s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
+ struct page *p = sg_page(s);
+
+ BUG_ON(!p);
+ s->dma_address = virt_to_bus(sg_virt(s));
s->dma_length = s->length;
}
return nelems;
@@ -432,9 +434,9 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
return calgary_nontranslate_map_sg(dev, sg, nelems, direction);

for_each_sg(sg, s, nelems, i) {
- BUG_ON(!s->page);
+ BUG_ON(!sg_page(s));

- vaddr = (unsigned long)page_address(s->page) + s->offset;
+ vaddr = (unsigned long) sg_virt(s);
npages = num_dma_pages(vaddr, s->length);

entry = iommu_range_alloc(tbl, npages);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 5cdfab6..c56e9ee 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -302,7 +302,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
#endif

for_each_sg(sg, s, nents, i) {
- unsigned long addr = page_to_phys(s->page) + s->offset;
+ unsigned long addr = sg_phys(s);
if (nonforced_iommu(dev, addr, s->length)) {
addr = dma_map_area(dev, addr, s->length, dir);
if (addr == bad_dma_address) {
@@ -397,7 +397,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
start_sg = sgmap = sg;
ps = NULL; /* shut up gcc */
for_each_sg(sg, s, nents, i) {
- dma_addr_t addr = page_to_phys(s->page) + s->offset;
+ dma_addr_t addr = sg_phys(s);
s->dma_address = addr;
BUG_ON(s->length == 0);

diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c
index e85d436..faf70bd 100644
--- a/arch/x86/kernel/pci-nommu_64.c
+++ b/arch/x86/kernel/pci-nommu_64.c
@@ -62,8 +62,8 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
int i;

for_each_sg(sg, s, nents, i) {
- BUG_ON(!s->page);
- s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
+ BUG_ON(!sg_page(s));
+ s->dma_address = virt_to_bus(sg_virt(s));
if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
return 0;
s->dma_length = s->length;
--
1.5.3.GIT

2007-10-22 18:15:48

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 04/10] [SG] Update drivers to use sg helpers

Signed-off-by: Jens Axboe <[email protected]>
---
arch/um/drivers/ubd_kern.c | 2 +-
drivers/ata/libata-core.c | 10 ++++----
drivers/ata/libata-scsi.c | 2 +-
drivers/block/DAC960.c | 2 +
drivers/block/cciss.c | 4 +-
drivers/block/cpqarray.c | 3 +-
drivers/block/cryptoloop.c | 12 ++++++---
drivers/block/sunvdc.c | 1 +
drivers/block/ub.c | 11 +++++----
drivers/block/viodasd.c | 2 +
drivers/ide/cris/ide-cris.c | 4 +-
drivers/ide/ide-probe.c | 4 ++-
drivers/ide/ide-taskfile.c | 2 +-
drivers/ide/mips/au1xxx-ide.c | 6 +---
drivers/ieee1394/dma.c | 2 +-
drivers/ieee1394/sbp2.c | 2 +-
drivers/infiniband/core/umem.c | 11 ++++++---
drivers/infiniband/hw/ipath/ipath_dma.c | 4 +-
drivers/infiniband/hw/ipath/ipath_mr.c | 2 +-
drivers/infiniband/hw/mthca/mthca_memfree.c | 24 ++++++++++++-------
drivers/infiniband/ulp/iser/iser_memory.c | 8 +++---
drivers/md/dm-crypt.c | 21 +++++++++--------
drivers/media/common/saa7146_core.c | 3 +-
drivers/media/video/ivtv/ivtv-udma.c | 4 +-
drivers/media/video/videobuf-dma-sg.c | 8 ++++--
drivers/mmc/card/queue.c | 15 ++++++------
drivers/mmc/host/at91_mci.c | 8 +++---
drivers/mmc/host/au1xmmc.c | 11 +++------
drivers/mmc/host/imxmmc.c | 2 +-
drivers/mmc/host/mmc_spi.c | 8 +++---
drivers/mmc/host/omap.c | 4 +-
drivers/mmc/host/sdhci.c | 2 +-
drivers/mmc/host/tifm_sd.c | 8 +++---
drivers/mmc/host/wbsd.c | 6 ++--
drivers/net/mlx4/icm.c | 14 +++++++----
drivers/net/ppp_mppe.c | 6 +---
drivers/scsi/3w-9xxx.c | 4 +-
drivers/scsi/3w-xxxx.c | 2 +-
drivers/scsi/NCR5380.c | 6 +---
drivers/scsi/NCR53c406a.c | 6 +---
drivers/scsi/aacraid/aachba.c | 2 +-
drivers/scsi/aha152x.c | 2 +-
drivers/scsi/aha1542.c | 8 ++----
drivers/scsi/arcmsr/arcmsr_hba.c | 4 +-
drivers/scsi/fd_mcs.c | 6 ++--
drivers/scsi/fdomain.c | 7 ++---
drivers/scsi/gdth.c | 4 +-
drivers/scsi/ibmmca.c | 2 +-
drivers/scsi/ide-scsi.c | 12 +++++-----
drivers/scsi/imm.c | 8 +-----
drivers/scsi/in2000.c | 4 +-
drivers/scsi/ipr.c | 19 ++++++++++------
drivers/scsi/ips.c | 6 ++--
drivers/scsi/iscsi_tcp.c | 15 ++++++------
drivers/scsi/megaraid.c | 8 ++----
drivers/scsi/megaraid/megaraid_mbox.c | 12 +++------
drivers/scsi/osst.c | 32 ++++++++++++++------------
drivers/scsi/pcmcia/nsp_cs.h | 2 +-
drivers/scsi/pcmcia/sym53c500_cs.c | 6 +---
drivers/scsi/ppa.c | 7 +----
drivers/scsi/qlogicfas408.c | 2 +-
drivers/scsi/scsi_debug.c | 4 +-
drivers/scsi/scsi_lib.c | 13 ++++++++--
drivers/scsi/seagate.c | 8 +++---
drivers/scsi/sg.c | 30 ++++++++++++------------
drivers/scsi/st.c | 8 +++---
drivers/scsi/sym53c416.c | 2 +-
drivers/scsi/tmscsim.c | 5 +---
drivers/scsi/ultrastor.c | 2 +-
drivers/scsi/wd7000.c | 2 +-
drivers/usb/core/message.c | 6 +---
drivers/usb/image/microtek.c | 5 +--
drivers/usb/misc/usbtest.c | 4 +-
drivers/usb/storage/protocol.c | 2 +-
74 files changed, 262 insertions(+), 253 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 25b248a..3a8cd3d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1115,7 +1115,7 @@ static void do_ubd_request(struct request_queue *q)
}
prepare_request(req, io_req,
(unsigned long long) req->sector << 9,
- sg->offset, sg->length, sg->page);
+ sg->offset, sg->length, sg_page(sg));

last_sectors = sg->length >> 9;
n = os_write_file(thread_fd, &io_req,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 629eadb..69092bc 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4296,7 +4296,7 @@ void ata_sg_clean(struct ata_queued_cmd *qc)
sg_last(sg, qc->orig_n_elem)->length += qc->pad_len;
if (pad_buf) {
struct scatterlist *psg = &qc->pad_sgent;
- void *addr = kmap_atomic(psg->page, KM_IRQ0);
+ void *addr = kmap_atomic(sg_page(psg), KM_IRQ0);
memcpy(addr + psg->offset, pad_buf, qc->pad_len);
kunmap_atomic(addr, KM_IRQ0);
}
@@ -4686,11 +4686,11 @@ static int ata_sg_setup(struct ata_queued_cmd *qc)
* data in this function or read data in ata_sg_clean.
*/
offset = lsg->offset + lsg->length - qc->pad_len;
- psg->page = nth_page(lsg->page, offset >> PAGE_SHIFT);
+ sg_set_page(psg, nth_page(sg_page(lsg), offset >> PAGE_SHIFT));
psg->offset = offset_in_page(offset);

if (qc->tf.flags & ATA_TFLAG_WRITE) {
- void *addr = kmap_atomic(psg->page, KM_IRQ0);
+ void *addr = kmap_atomic(sg_page(psg), KM_IRQ0);
memcpy(pad_buf, addr + psg->offset, qc->pad_len);
kunmap_atomic(addr, KM_IRQ0);
}
@@ -4836,7 +4836,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
if (qc->curbytes == qc->nbytes - qc->sect_size)
ap->hsm_task_state = HSM_ST_LAST;

- page = qc->cursg->page;
+ page = sg_page(qc->cursg);
offset = qc->cursg->offset + qc->cursg_ofs;

/* get the current page and offset */
@@ -4988,7 +4988,7 @@ next_sg:

sg = qc->cursg;

- page = sg->page;
+ page = sg_page(sg);
offset = sg->offset + qc->cursg_ofs;

/* get the current page and offset */
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 9fbb39c..5b758b9 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1544,7 +1544,7 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out)
struct scatterlist *sg = scsi_sglist(cmd);

if (sg) {
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
buflen = sg->length;
} else {
buf = NULL;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 84d6aa5..5350542 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -345,6 +345,7 @@ static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller)
Command->V1.ScatterGatherList =
(DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU;
Command->V1.ScatterGatherListDMA = ScatterGatherDMA;
+ sg_init_table(Command->cmd_sglist, DAC960_V1_ScatterGatherLimit);
} else {
Command->cmd_sglist = Command->V2.ScatterList;
Command->V2.ScatterGatherList =
@@ -353,6 +354,7 @@ static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller)
Command->V2.RequestSense =
(DAC960_SCSI_RequestSense_T *)RequestSenseCPU;
Command->V2.RequestSenseDMA = RequestSenseDMA;
+ sg_init_table(Command->cmd_sglist, DAC960_V2_ScatterGatherLimit);
}
}
return true;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 7c2cfde..5a6fe17 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2610,7 +2610,7 @@ static void do_cciss_request(struct request_queue *q)
(int)creq->nr_sectors);
#endif /* CCISS_DEBUG */

- memset(tmp_sg, 0, sizeof(tmp_sg));
+ sg_init_table(tmp_sg, MAXSGENTRIES);
seg = blk_rq_map_sg(q, creq, tmp_sg);

/* get the DMA records for the setup */
@@ -2621,7 +2621,7 @@ static void do_cciss_request(struct request_queue *q)

for (i = 0; i < seg; i++) {
c->SG[i].Len = tmp_sg[i].length;
- temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page,
+ temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
tmp_sg[i].offset,
tmp_sg[i].length, dir);
c->SG[i].Addr.lower = temp64.val32.lower;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 568603d..efab27f 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -918,6 +918,7 @@ queue_next:
DBGPX(
printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors);
);
+ sg_init_table(tmp_sg, SG_MAX);
seg = blk_rq_map_sg(q, creq, tmp_sg);

/* Now do all the DMA Mappings */
@@ -929,7 +930,7 @@ DBGPX(
{
c->req.sg[i].size = tmp_sg[i].length;
c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev,
- tmp_sg[i].page,
+ sg_page(&tmp_sg[i]),
tmp_sg[i].offset,
tmp_sg[i].length, dir);
}
diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
index 4053503..1b58b01 100644
--- a/drivers/block/cryptoloop.c
+++ b/drivers/block/cryptoloop.c
@@ -26,6 +26,7 @@
#include <linux/crypto.h>
#include <linux/blkdev.h>
#include <linux/loop.h>
+#include <linux/scatterlist.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>

@@ -119,14 +120,17 @@ cryptoloop_transfer(struct loop_device *lo, int cmd,
.tfm = tfm,
.flags = CRYPTO_TFM_REQ_MAY_SLEEP,
};
- struct scatterlist sg_out = { NULL, };
- struct scatterlist sg_in = { NULL, };
+ struct scatterlist sg_out;
+ struct scatterlist sg_in;

encdec_cbc_t encdecfunc;
struct page *in_page, *out_page;
unsigned in_offs, out_offs;
int err;

+ sg_init_table(&sg_out, 1);
+ sg_init_table(&sg_in, 1);
+
if (cmd == READ) {
in_page = raw_page;
in_offs = raw_off;
@@ -146,11 +150,11 @@ cryptoloop_transfer(struct loop_device *lo, int cmd,
u32 iv[4] = { 0, };
iv[0] = cpu_to_le32(IV & 0xffffffff);

- sg_in.page = in_page;
+ sg_set_page(&sg_in, in_page);
sg_in.offset = in_offs;
sg_in.length = sz;

- sg_out.page = out_page;
+ sg_set_page(&sg_out, out_page);
sg_out.offset = out_offs;
sg_out.length = sz;

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 317a790..7276f7d 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -388,6 +388,7 @@ static int __send_request(struct request *req)
op = VD_OP_BWRITE;
}

+ sg_init_table(sg, port->ring_cookies);
nsg = blk_rq_map_sg(req->q, req, sg);

len = 0;
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index c57dd2b..14143f2 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -25,6 +25,7 @@
#include <linux/usb_usual.h>
#include <linux/blkdev.h>
#include <linux/timer.h>
+#include <linux/scatterlist.h>
#include <scsi/scsi.h>

#define DRV_NAME "ub"
@@ -656,6 +657,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
if ((cmd = ub_get_cmd(lun)) == NULL)
return -1;
memset(cmd, 0, sizeof(struct ub_scsi_cmd));
+ sg_init_table(cmd->sgv, UB_MAX_REQ_SG);

blkdev_dequeue_request(rq);

@@ -1309,9 +1311,8 @@ static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
else
pipe = sc->send_bulk_pipe;
sc->last_pipe = pipe;
- usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe,
- page_address(sg->page) + sg->offset, sg->length,
- ub_urb_complete, sc);
+ usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, sg_virt(sg),
+ sg->length, ub_urb_complete, sc);
sc->work_urb.actual_length = 0;
sc->work_urb.error_count = 0;
sc->work_urb.status = 0;
@@ -1427,7 +1428,7 @@ static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
scmd->state = UB_CMDST_INIT;
scmd->nsg = 1;
sg = &scmd->sgv[0];
- sg->page = virt_to_page(sc->top_sense);
+ sg_set_page(sg, virt_to_page(sc->top_sense));
sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
sg->length = UB_SENSE_SIZE;
scmd->len = UB_SENSE_SIZE;
@@ -1863,7 +1864,7 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
cmd->state = UB_CMDST_INIT;
cmd->nsg = 1;
sg = &cmd->sgv[0];
- sg->page = virt_to_page(p);
+ sg_set_page(sg, virt_to_page(p));
sg->offset = (unsigned long)p & (PAGE_SIZE-1);
sg->length = 8;
cmd->len = 8;
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index e824b67..ab5d404 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -41,6 +41,7 @@
#include <linux/dma-mapping.h>
#include <linux/completion.h>
#include <linux/device.h>
+#include <linux/scatterlist.h>

#include <asm/uaccess.h>
#include <asm/vio.h>
@@ -270,6 +271,7 @@ static int send_request(struct request *req)
d = req->rq_disk->private_data;

/* Now build the scatter-gather list */
+ sg_init_table(sg, VIOMAXBLOCKDMA);
nsg = blk_rq_map_sg(req->q, req, sg);
nsg = dma_map_sg(d->dev, sg, nsg, direction);

diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c
index ff20377..e196aef 100644
--- a/drivers/ide/cris/ide-cris.c
+++ b/drivers/ide/cris/ide-cris.c
@@ -935,11 +935,11 @@ static int cris_ide_build_dmatable (ide_drive_t *drive)
* than two possibly non-adjacent physical 4kB pages.
*/
/* group sequential buffers into one large buffer */
- addr = page_to_phys(sg->page) + sg->offset;
+ addr = sg_phys(sg);
size = sg_dma_len(sg);
while (--i) {
sg = sg_next(sg);
- if ((addr + size) != page_to_phys(sg->page) + sg->offset)
+ if ((addr + size) != sg_phys(sg))
break;
size += sg_dma_len(sg);
}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index d5146c5..ec55a17 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1317,12 +1317,14 @@ static int hwif_init(ide_hwif_t *hwif)
if (!hwif->sg_max_nents)
hwif->sg_max_nents = PRD_ENTRIES;

- hwif->sg_table = kzalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
+ hwif->sg_table = kmalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
GFP_KERNEL);
if (!hwif->sg_table) {
printk(KERN_ERR "%s: unable to allocate SG table.\n", hwif->name);
goto out;
}
+
+ sg_init_table(hwif->sg_table, hwif->sg_max_nents);

if (init_irq(hwif) == 0)
goto done;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 73ef6bf..d066546 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -261,7 +261,7 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write)
hwif->cursg = sg;
}

- page = cursg->page;
+ page = sg_page(cursg);
offset = cursg->offset + hwif->cursg_ofs * SECTOR_SIZE;

/* get the current page and offset */
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index 1de5856..a4ce3ba 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -276,8 +276,7 @@ static int auide_build_dmatable(ide_drive_t *drive)

if (iswrite) {
if(!put_source_flags(ahwif->tx_chan,
- (void*)(page_address(sg->page)
- + sg->offset),
+ (void*) sg_virt(sg),
tc, flags)) {
printk(KERN_ERR "%s failed %d\n",
__FUNCTION__, __LINE__);
@@ -285,8 +284,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
} else
{
if(!put_dest_flags(ahwif->rx_chan,
- (void*)(page_address(sg->page)
- + sg->offset),
+ (void*) sg_virt(sg),
tc, flags)) {
printk(KERN_ERR "%s failed %d\n",
__FUNCTION__, __LINE__);
diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c
index 45d6055..25e113b 100644
--- a/drivers/ieee1394/dma.c
+++ b/drivers/ieee1394/dma.c
@@ -111,7 +111,7 @@ int dma_region_alloc(struct dma_region *dma, unsigned long n_bytes,
unsigned long va =
(unsigned long)dma->kvirt + (i << PAGE_SHIFT);

- dma->sglist[i].page = vmalloc_to_page((void *)va);
+ sg_set_page(&dma->sglist[i], vmalloc_to_page((void *)va));
dma->sglist[i].length = PAGE_SIZE;
}

diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index 1b353b9..d5dfe11 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -1466,7 +1466,7 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb,
cmd->dma_size = sgpnt[0].length;
cmd->dma_type = CMD_DMA_PAGE;
cmd->cmd_dma = dma_map_page(hi->host->device.parent,
- sgpnt[0].page, sgpnt[0].offset,
+ sg_page(&sgpnt[0]), sgpnt[0].offset,
cmd->dma_size, cmd->dma_dir);

orb->data_descriptor_lo = cmd->cmd_dma;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 2f54e29..14159ff 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -55,9 +55,11 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
ib_dma_unmap_sg(dev, chunk->page_list,
chunk->nents, DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->nents; ++i) {
+ struct page *page = sg_page(&chunk->page_list[i]);
+
if (umem->writable && dirty)
- set_page_dirty_lock(chunk->page_list[i].page);
- put_page(chunk->page_list[i].page);
+ set_page_dirty_lock(page);
+ put_page(page);
}

kfree(chunk);
@@ -164,11 +166,12 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
}

chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
+ sg_init_table(chunk->page_list, chunk->nents);
for (i = 0; i < chunk->nents; ++i) {
if (vma_list &&
!is_vm_hugetlb_page(vma_list[i + off]))
umem->hugetlb = 0;
- chunk->page_list[i].page = page_list[i + off];
+ sg_set_page(&chunk->page_list[i], page_list[i + off]);
chunk->page_list[i].offset = 0;
chunk->page_list[i].length = PAGE_SIZE;
}
@@ -179,7 +182,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
DMA_BIDIRECTIONAL);
if (chunk->nmap <= 0) {
for (i = 0; i < chunk->nents; ++i)
- put_page(chunk->page_list[i].page);
+ put_page(sg_page(&chunk->page_list[i]));
kfree(chunk);

ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 22709a4..e90a0ea 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -108,7 +108,7 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
BUG_ON(!valid_dma_direction(direction));

for_each_sg(sgl, sg, nents, i) {
- addr = (u64) page_address(sg->page);
+ addr = (u64) page_address(sg_page(sg));
/* TODO: handle highmem pages */
if (!addr) {
ret = 0;
@@ -127,7 +127,7 @@ static void ipath_unmap_sg(struct ib_device *dev,

static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
{
- u64 addr = (u64) page_address(sg->page);
+ u64 addr = (u64) page_address(sg_page(sg));

if (addr)
addr += sg->offset;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index e442470..db4ba92 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -225,7 +225,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
for (i = 0; i < chunk->nents; i++) {
void *vaddr;

- vaddr = page_address(chunk->page_list[i].page);
+ vaddr = page_address(sg_page(&chunk->page_list[i]));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index e61f3e6..007b381 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -71,7 +71,7 @@ static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *
PCI_DMA_BIDIRECTIONAL);

for (i = 0; i < chunk->npages; ++i)
- __free_pages(chunk->mem[i].page,
+ __free_pages(sg_page(&chunk->mem[i]),
get_order(chunk->mem[i].length));
}

@@ -81,7 +81,7 @@ static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chun

for (i = 0; i < chunk->npages; ++i) {
dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
- lowmem_page_address(chunk->mem[i].page),
+ lowmem_page_address(sg_page(&chunk->mem[i])),
sg_dma_address(&chunk->mem[i]));
}
}
@@ -107,10 +107,13 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)

static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
{
- mem->page = alloc_pages(gfp_mask, order);
- if (!mem->page)
+ struct page *page;
+
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
return -ENOMEM;

+ sg_set_page(mem, page);
mem->length = PAGE_SIZE << order;
mem->offset = 0;
return 0;
@@ -157,6 +160,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
if (!chunk)
goto fail;

+ sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);
chunk->npages = 0;
chunk->nsg = 0;
list_add_tail(&chunk->list, &icm->chunk_list);
@@ -304,7 +308,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_h
* so if we found the page, dma_handle has already
* been assigned to. */
if (chunk->mem[i].length > offset) {
- page = chunk->mem[i].page;
+ page = sg_page(&chunk->mem[i]);
goto out;
}
offset -= chunk->mem[i].length;
@@ -445,6 +449,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag
int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab, int index, u64 uaddr)
{
+ struct page *pages[1];
int ret = 0;
u8 status;
int i;
@@ -472,16 +477,17 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
}

ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
- &db_tab->page[i].mem.page, NULL);
+ pages, NULL);
if (ret < 0)
goto out;

+ sg_set_page(&db_tab->page[i].mem, pages[0]);
db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE;
db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;

ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
if (ret < 0) {
- put_page(db_tab->page[i].mem.page);
+ put_page(pages[0]);
goto out;
}

@@ -491,7 +497,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
ret = -EINVAL;
if (ret) {
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_page(db_tab->page[i].mem.page);
+ put_page(sg_page(&db_tab->page[i].mem));
goto out;
}

@@ -557,7 +563,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_page(db_tab->page[i].mem.page);
+ put_page(sg_page(&db_tab->page[i].mem));
}
}

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index f3529b6..d687980 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -131,7 +131,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,

p = mem;
for_each_sg(sgl, sg, data->size, i) {
- from = kmap_atomic(sg->page, KM_USER0);
+ from = kmap_atomic(sg_page(sg), KM_USER0);
memcpy(p,
from + sg->offset,
sg->length);
@@ -191,7 +191,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,

p = mem;
for_each_sg(sgl, sg, sg_size, i) {
- to = kmap_atomic(sg->page, KM_SOFTIRQ0);
+ to = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
memcpy(to + sg->offset,
p,
sg->length);
@@ -300,7 +300,7 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
for_each_sg(sgl, sg, data->dma_nents, i) {
/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
"offset: %ld sz: %ld\n", i,
- (unsigned long)page_to_phys(sg->page),
+ (unsigned long)sg_phys(sg),
(unsigned long)sg->offset,
(unsigned long)sg->length); */
end_addr = ib_sg_dma_address(ibdev, sg) +
@@ -336,7 +336,7 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg),
- sg->page, sg->offset,
+ sg_page(sg), sg->offset,
sg->length, ib_sg_dma_len(ibdev, sg));
}

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 0eb5416..ac54f69 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -348,16 +348,17 @@ static int crypt_convert(struct crypt_config *cc,
ctx->idx_out < ctx->bio_out->bi_vcnt) {
struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
- struct scatterlist sg_in = {
- .page = bv_in->bv_page,
- .offset = bv_in->bv_offset + ctx->offset_in,
- .length = 1 << SECTOR_SHIFT
- };
- struct scatterlist sg_out = {
- .page = bv_out->bv_page,
- .offset = bv_out->bv_offset + ctx->offset_out,
- .length = 1 << SECTOR_SHIFT
- };
+ struct scatterlist sg_in, sg_out;
+
+ sg_init_table(&sg_in, 1);
+ sg_set_page(&sg_in, bv_in->bv_page);
+ sg_in.offset = bv_in->bv_offset + ctx->offset_in;
+ sg_in.length = 1 << SECTOR_SHIFT;
+
+ sg_init_table(&sg_out, 1);
+ sg_set_page(&sg_out, bv_out->bv_page);
+ sg_out.offset = bv_out->bv_offset + ctx->offset_out;
+ sg_out.length = 1 << SECTOR_SHIFT;

ctx->offset_in += sg_in.length;
if (ctx->offset_in >= bv_in->bv_len) {
diff --git a/drivers/media/common/saa7146_core.c b/drivers/media/common/saa7146_core.c
index 365a221..2b1f8b4 100644
--- a/drivers/media/common/saa7146_core.c
+++ b/drivers/media/common/saa7146_core.c
@@ -112,12 +112,13 @@ static struct scatterlist* vmalloc_to_sg(unsigned char *virt, int nr_pages)
sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL);
if (NULL == sglist)
return NULL;
+ sg_init_table(sglist, nr_pages);
for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) {
pg = vmalloc_to_page(virt);
if (NULL == pg)
goto err;
BUG_ON(PageHighMem(pg));
- sglist[i].page = pg;
+ sg_set_page(&sglist[i], pg);
sglist[i].length = PAGE_SIZE;
}
return sglist;
diff --git a/drivers/media/video/ivtv/ivtv-udma.c b/drivers/media/video/ivtv/ivtv-udma.c
index c4626d1..912b424 100644
--- a/drivers/media/video/ivtv/ivtv-udma.c
+++ b/drivers/media/video/ivtv/ivtv-udma.c
@@ -63,10 +63,10 @@ int ivtv_udma_fill_sg_list (struct ivtv_user_dma *dma, struct ivtv_dma_page_info
memcpy(page_address(dma->bouncemap[map_offset]) + offset, src, len);
kunmap_atomic(src, KM_BOUNCE_READ);
local_irq_restore(flags);
- dma->SGlist[map_offset].page = dma->bouncemap[map_offset];
+ sg_set_page(&dma->SGlist[map_offset], dma->bouncemap[map_offset]);
}
else {
- dma->SGlist[map_offset].page = dma->map[map_offset];
+ sg_set_page(&dma->SGlist[map_offset], dma->map[map_offset]);
}
offset = 0;
map_offset++;
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 3eb6123..0a18286 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -60,12 +60,13 @@ videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages)
sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL);
if (NULL == sglist)
return NULL;
+ sg_init_table(sglist, nr_pages);
for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) {
pg = vmalloc_to_page(virt);
if (NULL == pg)
goto err;
BUG_ON(PageHighMem(pg));
- sglist[i].page = pg;
+ sg_set_page(&sglist[i], pg);
sglist[i].length = PAGE_SIZE;
}
return sglist;
@@ -86,13 +87,14 @@ videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset)
sglist = kcalloc(nr_pages, sizeof(*sglist), GFP_KERNEL);
if (NULL == sglist)
return NULL;
+ sg_init_table(sglist, nr_pages);

if (NULL == pages[0])
goto nopage;
if (PageHighMem(pages[0]))
/* DMA to highmem pages might not work */
goto highmem;
- sglist[0].page = pages[0];
+ sg_set_page(&sglist[0], pages[0]);
sglist[0].offset = offset;
sglist[0].length = PAGE_SIZE - offset;
for (i = 1; i < nr_pages; i++) {
@@ -100,7 +102,7 @@ videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset)
goto nopage;
if (PageHighMem(pages[i]))
goto highmem;
- sglist[i].page = pages[i];
+ sg_set_page(&sglist[i], pages[i]);
sglist[i].length = PAGE_SIZE;
}
return sglist;
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index a5d0354..9203a0b 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
+#include <linux/scatterlist.h>

#include <linux/mmc/card.h>
#include <linux/mmc/host.h>
@@ -153,19 +154,21 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
blk_queue_max_segment_size(mq->queue, bouncesz);

- mq->sg = kzalloc(sizeof(struct scatterlist),
+ mq->sg = kmalloc(sizeof(struct scatterlist),
GFP_KERNEL);
if (!mq->sg) {
ret = -ENOMEM;
goto cleanup_queue;
}
+ sg_init_table(mq->sg, 1);

- mq->bounce_sg = kzalloc(sizeof(struct scatterlist) *
+ mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
bouncesz / 512, GFP_KERNEL);
if (!mq->bounce_sg) {
ret = -ENOMEM;
goto cleanup_queue;
}
+ sg_init_table(mq->bounce_sg, bouncesz / 512);
}
}
#endif
@@ -302,12 +305,12 @@ static void copy_sg(struct scatterlist *dst, unsigned int dst_len,
BUG_ON(dst_len == 0);

if (dst_size == 0) {
- dst_buf = page_address(dst->page) + dst->offset;
+ dst_buf = sg_virt(dst);
dst_size = dst->length;
}

if (src_size == 0) {
- src_buf = page_address(src->page) + src->offset;
+ src_buf = sg_virt(dst);
src_size = src->length;
}

@@ -353,9 +356,7 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
return 1;
}

- mq->sg[0].page = virt_to_page(mq->bounce_buf);
- mq->sg[0].offset = offset_in_page(mq->bounce_buf);
- mq->sg[0].length = 0;
+ sg_init_one(mq->sg, mq->bounce_buf, 0);

while (sg_len) {
mq->sg[0].length += mq->bounce_sg[sg_len - 1].length;
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index 7a452c2..b1edcef 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -149,7 +149,7 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data

sg = &data->sg[i];

- sgbuffer = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset;
+ sgbuffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
amount = min(size, sg->length);
size -= amount;

@@ -226,7 +226,7 @@ static void at91_mci_pre_dma_read(struct at91mci_host *host)
sg = &data->sg[host->transfer_index++];
pr_debug("sg = %p\n", sg);

- sg->dma_address = dma_map_page(NULL, sg->page, sg->offset, sg->length, DMA_FROM_DEVICE);
+ sg->dma_address = dma_map_page(NULL, sg_page(sg), sg->offset, sg->length, DMA_FROM_DEVICE);

pr_debug("dma address = %08X, length = %d\n", sg->dma_address, sg->length);

@@ -283,7 +283,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
int index;

/* Swap the contents of the buffer */
- buffer = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset;
+ buffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
pr_debug("buffer = %p, length = %d\n", buffer, sg->length);

for (index = 0; index < (sg->length / 4); index++)
@@ -292,7 +292,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
}

- flush_dcache_page(sg->page);
+ flush_dcache_page(sg_page(sg));
}

/* Is there another transfer to trigger? */
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index 92c4d0d..bcbb6d2 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -340,7 +340,7 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host)

/* This is the pointer to the data buffer */
sg = &data->sg[host->pio.index];
- sg_ptr = page_address(sg->page) + sg->offset + host->pio.offset;
+ sg_ptr = sg_virt(sg) + host->pio.offset;

/* This is the space left inside the buffer */
sg_len = data->sg[host->pio.index].length - host->pio.offset;
@@ -400,7 +400,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host)

if (host->pio.index < host->dma.len) {
sg = &data->sg[host->pio.index];
- sg_ptr = page_address(sg->page) + sg->offset + host->pio.offset;
+ sg_ptr = sg_virt(sg) + host->pio.offset;

/* This is the space left inside the buffer */
sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset;
@@ -613,14 +613,11 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data)

if (host->flags & HOST_F_XMIT){
ret = au1xxx_dbdma_put_source_flags(channel,
- (void *) (page_address(sg->page) +
- sg->offset),
- len, flags);
+ (void *) sg_virt(sg), len, flags);
}
else {
ret = au1xxx_dbdma_put_dest_flags(channel,
- (void *) (page_address(sg->page) +
- sg->offset),
+ (void *) sg_virt(sg),
len, flags);
}

diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c
index 6ebc41e..fc72e1f 100644
--- a/drivers/mmc/host/imxmmc.c
+++ b/drivers/mmc/host/imxmmc.c
@@ -262,7 +262,7 @@ static void imxmci_setup_data(struct imxmci_host *host, struct mmc_data *data)
}

/* Convert back to virtual address */
- host->data_ptr = (u16*)(page_address(data->sg->page) + data->sg->offset);
+ host->data_ptr = (u16*)sg_virt(sg);
host->data_cnt = 0;

clear_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events);
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 7ae18ea..12c2d80 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -813,7 +813,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
&& dir == DMA_FROM_DEVICE)
dir = DMA_BIDIRECTIONAL;

- dma_addr = dma_map_page(dma_dev, sg->page, 0,
+ dma_addr = dma_map_page(dma_dev, sg_page(sg), 0,
PAGE_SIZE, dir);
if (direction == DMA_TO_DEVICE)
t->tx_dma = dma_addr + sg->offset;
@@ -822,7 +822,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
}

/* allow pio too; we don't allow highmem */
- kmap_addr = kmap(sg->page);
+ kmap_addr = kmap(sg_page(sg));
if (direction == DMA_TO_DEVICE)
t->tx_buf = kmap_addr + sg->offset;
else
@@ -855,8 +855,8 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,

/* discard mappings */
if (direction == DMA_FROM_DEVICE)
- flush_kernel_dcache_page(sg->page);
- kunmap(sg->page);
+ flush_kernel_dcache_page(sg_page(sg));
+ kunmap(sg_page(sg));
if (dma_dev)
dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir);

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 60a67df..971e18b 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -24,10 +24,10 @@
#include <linux/mmc/host.h>
#include <linux/mmc/card.h>
#include <linux/clk.h>
+#include <linux/scatterlist.h>

#include <asm/io.h>
#include <asm/irq.h>
-#include <asm/scatterlist.h>
#include <asm/mach-types.h>

#include <asm/arch/board.h>
@@ -383,7 +383,7 @@ mmc_omap_sg_to_buf(struct mmc_omap_host *host)

sg = host->data->sg + host->sg_idx;
host->buffer_bytes_left = sg->length;
- host->buffer = page_address(sg->page) + sg->offset;
+ host->buffer = sg_virt(sg);
if (host->buffer_bytes_left > host->total_bytes_left)
host->buffer_bytes_left = host->total_bytes_left;
}
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b397121..0db837e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -231,7 +231,7 @@ static void sdhci_deactivate_led(struct sdhci_host *host)

static inline char* sdhci_sg_to_buffer(struct sdhci_host* host)
{
- return page_address(host->cur_sg->page) + host->cur_sg->offset;
+ return sg_virt(host->cur_sg);
}

static inline int sdhci_next_sg(struct sdhci_host* host)
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index 9b90479..c11a3d2 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -192,7 +192,7 @@ static void tifm_sd_transfer_data(struct tifm_sd *host)
}
off = sg[host->sg_pos].offset + host->block_pos;

- pg = nth_page(sg[host->sg_pos].page, off >> PAGE_SHIFT);
+ pg = nth_page(sg_page(&sg[host->sg_pos]), off >> PAGE_SHIFT);
p_off = offset_in_page(off);
p_cnt = PAGE_SIZE - p_off;
p_cnt = min(p_cnt, cnt);
@@ -241,18 +241,18 @@ static void tifm_sd_bounce_block(struct tifm_sd *host, struct mmc_data *r_data)
}
off = sg[host->sg_pos].offset + host->block_pos;

- pg = nth_page(sg[host->sg_pos].page, off >> PAGE_SHIFT);
+ pg = nth_page(sg_page(&sg[host->sg_pos]), off >> PAGE_SHIFT);
p_off = offset_in_page(off);
p_cnt = PAGE_SIZE - p_off;
p_cnt = min(p_cnt, cnt);
p_cnt = min(p_cnt, t_size);

if (r_data->flags & MMC_DATA_WRITE)
- tifm_sd_copy_page(host->bounce_buf.page,
+ tifm_sd_copy_page(sg_page(&host->bounce_buf),
r_data->blksz - t_size,
pg, p_off, p_cnt);
else if (r_data->flags & MMC_DATA_READ)
- tifm_sd_copy_page(pg, p_off, host->bounce_buf.page,
+ tifm_sd_copy_page(pg, p_off, sg_page(&host->bounce_buf),
r_data->blksz - t_size, p_cnt);

t_size -= p_cnt;
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index 80db11c..fa4c8c5 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -269,7 +269,7 @@ static inline int wbsd_next_sg(struct wbsd_host *host)

static inline char *wbsd_sg_to_buffer(struct wbsd_host *host)
{
- return page_address(host->cur_sg->page) + host->cur_sg->offset;
+ return sg_virt(host->cur_sg);
}

static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data)
@@ -283,7 +283,7 @@ static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data)
len = data->sg_len;

for (i = 0; i < len; i++) {
- sgbuf = page_address(sg[i].page) + sg[i].offset;
+ sgbuf = sg_virt(&sg[i]);
memcpy(dmabuf, sgbuf, sg[i].length);
dmabuf += sg[i].length;
}
@@ -300,7 +300,7 @@ static inline void wbsd_dma_to_sg(struct wbsd_host *host, struct mmc_data *data)
len = data->sg_len;

for (i = 0; i < len; i++) {
- sgbuf = page_address(sg[i].page) + sg[i].offset;
+ sgbuf = sg_virt(&sg[i]);
memcpy(sgbuf, dmabuf, sg[i].length);
dmabuf += sg[i].length;
}
diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c
index 4b3c109..887633b 100644
--- a/drivers/net/mlx4/icm.c
+++ b/drivers/net/mlx4/icm.c
@@ -60,7 +60,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu
PCI_DMA_BIDIRECTIONAL);

for (i = 0; i < chunk->npages; ++i)
- __free_pages(chunk->mem[i].page,
+ __free_pages(sg_page(&chunk->mem[i]),
get_order(chunk->mem[i].length));
}

@@ -70,7 +70,7 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *

for (i = 0; i < chunk->npages; ++i)
dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
- lowmem_page_address(chunk->mem[i].page),
+ lowmem_page_address(sg_page(&chunk->mem[i])),
sg_dma_address(&chunk->mem[i]));
}

@@ -95,10 +95,13 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)

static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
{
- mem->page = alloc_pages(gfp_mask, order);
- if (!mem->page)
+ struct page *page;
+
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
return -ENOMEM;

+ sg_set_page(mem, page);
mem->length = PAGE_SIZE << order;
mem->offset = 0;
return 0;
@@ -145,6 +148,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
if (!chunk)
goto fail;

+ sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
chunk->npages = 0;
chunk->nsg = 0;
list_add_tail(&chunk->list, &icm->chunk_list);
@@ -334,7 +338,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_han
* been assigned to.
*/
if (chunk->mem[i].length > offset) {
- page = chunk->mem[i].page;
+ page = sg_page(&chunk->mem[i]);
goto out;
}
offset -= chunk->mem[i].length;
diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c
index c0b6d19..bcb0885 100644
--- a/drivers/net/ppp_mppe.c
+++ b/drivers/net/ppp_mppe.c
@@ -55,7 +55,7 @@
#include <linux/mm.h>
#include <linux/ppp_defs.h>
#include <linux/ppp-comp.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>

#include "ppp_mppe.h"

@@ -68,9 +68,7 @@ MODULE_VERSION("1.0.2");
static unsigned int
setup_sg(struct scatterlist *sg, const void *address, unsigned int length)
{
- sg[0].page = virt_to_page(address);
- sg[0].offset = offset_in_page(address);
- sg[0].length = length;
+ sg_init_one(sg, address, length);
return length;
}

diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index fb14014..afb262b 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1840,7 +1840,7 @@ static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
(scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) {
if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) {
struct scatterlist *sg = scsi_sglist(srb);
- char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ char *buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length);
kunmap_atomic(buf - sg->offset, KM_IRQ0);
}
@@ -1919,7 +1919,7 @@ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int re
char *buf;
unsigned long flags = 0;
local_irq_save(flags);
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
memcpy(buf, tw_dev->generic_buffer_virt[request_id], sg->length);
kunmap_atomic(buf - sg->offset, KM_IRQ0);
local_irq_restore(flags);
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index a64153b..59716eb 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1469,7 +1469,7 @@ static void tw_transfer_internal(TW_Device_Extension *tw_dev, int request_id,
struct scatterlist *sg = scsi_sglist(cmd);

local_irq_save(flags);
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
transfer_len = min(sg->length, len);

memcpy(buf, data, transfer_len);
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index 988f0bc..2597209 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -298,8 +298,7 @@ static __inline__ void initialize_SCp(Scsi_Cmnd * cmd)
if (cmd->use_sg) {
cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
cmd->SCp.buffers_residual = cmd->use_sg - 1;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page)+
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
} else {
cmd->SCp.buffer = NULL;
@@ -2143,8 +2142,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
++cmd->SCp.buffer;
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page)+
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
dprintk(NDEBUG_INFORMATION, ("scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual));
}
/*
diff --git a/drivers/scsi/NCR53c406a.c b/drivers/scsi/NCR53c406a.c
index 3168a17..137d065 100644
--- a/drivers/scsi/NCR53c406a.c
+++ b/drivers/scsi/NCR53c406a.c
@@ -875,8 +875,7 @@ static void NCR53c406a_intr(void *dev_id)
outb(TRANSFER_INFO | DMA_OP, CMD_REG);
#if USE_PIO
scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
- NCR53c406a_pio_write(page_address(sg->page) + sg->offset,
- sg->length);
+ NCR53c406a_pio_write(sg_virt(sg), sg->length);
}
REG0;
#endif /* USE_PIO */
@@ -897,8 +896,7 @@ static void NCR53c406a_intr(void *dev_id)
outb(TRANSFER_INFO | DMA_OP, CMD_REG);
#if USE_PIO
scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
- NCR53c406a_pio_read(page_address(sg->page) + sg->offset,
- sg->length);
+ NCR53c406a_pio_read(sg_virt(sg), sg->length);
}
REG0;
#endif /* USE_PIO */
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 80e448d..a77ab8d 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -356,7 +356,7 @@ static void aac_internal_transfer(struct scsi_cmnd *scsicmd, void *data, unsigne
int transfer_len;
struct scatterlist *sg = scsi_sglist(scsicmd);

- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
transfer_len = min(sg->length, len + offset);

transfer_len -= offset;
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index a58c265..ea8c699 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -613,7 +613,7 @@ struct aha152x_scdata {
#define SCNEXT(SCpnt) SCDATA(SCpnt)->next
#define SCSEM(SCpnt) SCDATA(SCpnt)->done

-#define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset))
+#define SG_ADDRESS(buffer) ((char *) sg_virt((buffer)))

/* state handling */
static void seldi_run(struct Scsi_Host *shpnt);
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 961a188..bbcc2c5 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -49,7 +49,7 @@
#include "aha1542.h"

#define SCSI_BUF_PA(address) isa_virt_to_bus(address)
-#define SCSI_SG_PA(sgent) (isa_page_to_bus((sgent)->page) + (sgent)->offset)
+#define SCSI_SG_PA(sgent) (isa_page_to_bus(sg_page((sgent))) + (sgent)->offset)

static void BAD_DMA(void *address, unsigned int length)
{
@@ -66,8 +66,7 @@ static void BAD_SG_DMA(Scsi_Cmnd * SCpnt,
int badseg)
{
printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n",
- badseg, nseg,
- page_address(sgp->page) + sgp->offset,
+ badseg, nseg, sg_virt(sgp),
(unsigned long long)SCSI_SG_PA(sgp),
sgp->length);

@@ -712,8 +711,7 @@ static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i);
scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) {
printk(KERN_CRIT "%d: %p %d\n", i,
- (page_address(sg->page) +
- sg->offset), sg->length);
+ sg_virt(sg), sg->length);
};
printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr);
ptr = (unsigned char *) &cptr[i];
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index f817775..f7a2528 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -1343,7 +1343,7 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
/* 4 bytes: Areca io control code */

sg = scsi_sglist(cmd);
- buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
if (scsi_sg_count(cmd) > 1) {
retvalue = ARCMSR_MESSAGE_FAIL;
goto message_out;
@@ -1593,7 +1593,7 @@ static void arcmsr_handle_virtual_command(struct AdapterControlBlock *acb,
strncpy(&inqdata[32], "R001", 4); /* Product Revision */

sg = scsi_sglist(cmd);
- buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;

memcpy(buffer, inqdata, sizeof(inqdata));
sg = scsi_sglist(cmd);
diff --git a/drivers/scsi/fd_mcs.c b/drivers/scsi/fd_mcs.c
index 668569e..8335b60 100644
--- a/drivers/scsi/fd_mcs.c
+++ b/drivers/scsi/fd_mcs.c
@@ -973,7 +973,7 @@ static irqreturn_t fd_mcs_intr(int irq, void *dev_id)
if (current_SC->SCp.buffers_residual) {
--current_SC->SCp.buffers_residual;
++current_SC->SCp.buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
} else
break;
@@ -1006,7 +1006,7 @@ static irqreturn_t fd_mcs_intr(int irq, void *dev_id)
if (!current_SC->SCp.this_residual && current_SC->SCp.buffers_residual) {
--current_SC->SCp.buffers_residual;
++current_SC->SCp.buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
}
}
@@ -1109,7 +1109,7 @@ static int fd_mcs_queue(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))

if (current_SC->use_sg) {
current_SC->SCp.buffer = (struct scatterlist *) current_SC->request_buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
current_SC->SCp.buffers_residual = current_SC->use_sg - 1;
} else {
diff --git a/drivers/scsi/fdomain.c b/drivers/scsi/fdomain.c
index 5d282e6..2cd6b49 100644
--- a/drivers/scsi/fdomain.c
+++ b/drivers/scsi/fdomain.c
@@ -1321,7 +1321,7 @@ static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id)
if (current_SC->SCp.buffers_residual) {
--current_SC->SCp.buffers_residual;
++current_SC->SCp.buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
} else
break;
@@ -1354,7 +1354,7 @@ static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id)
&& current_SC->SCp.buffers_residual) {
--current_SC->SCp.buffers_residual;
++current_SC->SCp.buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
}
}
@@ -1439,8 +1439,7 @@ static int fdomain_16x0_queue(struct scsi_cmnd *SCpnt,

if (scsi_sg_count(current_SC)) {
current_SC->SCp.buffer = scsi_sglist(current_SC);
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page)
- + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1;
} else {
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 3ac080e..ac6c57a 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -2374,13 +2374,13 @@ static void gdth_copy_internal_data(gdth_ha_str *ha, Scsi_Cmnd *scp,
if (cpsum+cpnow > cpcount)
cpnow = cpcount - cpsum;
cpsum += cpnow;
- if (!sl->page) {
+ if (!sg_page(sl)) {
printk("GDT-HA %d: invalid sc/gt element in gdth_copy_internal_data()\n",
ha->hanum);
return;
}
local_irq_save(flags);
- address = kmap_atomic(sl->page, KM_BIO_SRC_IRQ) + sl->offset;
+ address = kmap_atomic(sg_page(sl), KM_BIO_SRC_IRQ) + sl->offset;
if (to_buffer)
memcpy(buffer, address, cpnow);
else
diff --git a/drivers/scsi/ibmmca.c b/drivers/scsi/ibmmca.c
index 714e627..db004a4 100644
--- a/drivers/scsi/ibmmca.c
+++ b/drivers/scsi/ibmmca.c
@@ -1828,7 +1828,7 @@ static int ibmmca_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
BUG_ON(scsi_sg_count(cmd) > 16);

scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
- ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg->page) + sg->offset);
+ ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg_page(sg)) + sg->offset);
ld(shpnt)[ldn].sge[i].byte_length = sg->length;
}
scb->enable |= IM_POINTER_TO_LIST;
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 252d180..8d0244c 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -175,18 +175,18 @@ static void idescsi_input_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigne

while (bcount) {
count = min(pc->sg->length - pc->b_count, bcount);
- if (PageHighMem(pc->sg->page)) {
+ if (PageHighMem(sg_page(pc->sg))) {
unsigned long flags;

local_irq_save(flags);
- buf = kmap_atomic(pc->sg->page, KM_IRQ0) +
+ buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
pc->sg->offset;
drive->hwif->atapi_input_bytes(drive,
buf + pc->b_count, count);
kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
local_irq_restore(flags);
} else {
- buf = page_address(pc->sg->page) + pc->sg->offset;
+ buf = sg_virt(pc->sg);
drive->hwif->atapi_input_bytes(drive,
buf + pc->b_count, count);
}
@@ -212,18 +212,18 @@ static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsign

while (bcount) {
count = min(pc->sg->length - pc->b_count, bcount);
- if (PageHighMem(pc->sg->page)) {
+ if (PageHighMem(sg_page(pc->sg))) {
unsigned long flags;

local_irq_save(flags);
- buf = kmap_atomic(pc->sg->page, KM_IRQ0) +
+ buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
pc->sg->offset;
drive->hwif->atapi_output_bytes(drive,
buf + pc->b_count, count);
kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
local_irq_restore(flags);
} else {
- buf = page_address(pc->sg->page) + pc->sg->offset;
+ buf = sg_virt(pc->sg);
drive->hwif->atapi_output_bytes(drive,
buf + pc->b_count, count);
}
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index 74cdc1f..a3d0c6b 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -705,9 +705,7 @@ static int imm_completion(struct scsi_cmnd *cmd)
cmd->SCp.buffer++;
cmd->SCp.this_residual =
cmd->SCp.buffer->length;
- cmd->SCp.ptr =
- page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);

/*
* Make sure that we transfer even number of bytes
@@ -844,9 +842,7 @@ static int imm_engine(imm_struct *dev, struct scsi_cmnd *cmd)
cmd->SCp.buffer =
(struct scatterlist *) cmd->request_buffer;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr =
- page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
} else {
/* else fill the only available buffer */
cmd->SCp.buffer = NULL;
diff --git a/drivers/scsi/in2000.c b/drivers/scsi/in2000.c
index ab7cbf3..c8b452f 100644
--- a/drivers/scsi/in2000.c
+++ b/drivers/scsi/in2000.c
@@ -372,7 +372,7 @@ static int in2000_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
if (cmd->use_sg) {
cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
cmd->SCp.buffers_residual = cmd->use_sg - 1;
- cmd->SCp.ptr = (char *) page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
} else {
cmd->SCp.buffer = NULL;
@@ -764,7 +764,7 @@ static void transfer_bytes(Scsi_Cmnd * cmd, int data_in_dir)
++cmd->SCp.buffer;
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
}

/* Set up hardware registers */
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index c316a0b..439b97a 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -2872,6 +2872,7 @@ static struct ipr_sglist *ipr_alloc_ucode_buffer(int buf_len)
}

scatterlist = sglist->scatterlist;
+ sg_init_table(scatterlist, num_elem);

sglist->order = order;
sglist->num_sg = num_elem;
@@ -2884,12 +2885,12 @@ static struct ipr_sglist *ipr_alloc_ucode_buffer(int buf_len)

/* Free up what we already allocated */
for (j = i - 1; j >= 0; j--)
- __free_pages(scatterlist[j].page, order);
+ __free_pages(sg_page(&scatterlist[j]), order);
kfree(sglist);
return NULL;
}

- scatterlist[i].page = page;
+ sg_set_page(&scatterlist[i], page);
}

return sglist;
@@ -2910,7 +2911,7 @@ static void ipr_free_ucode_buffer(struct ipr_sglist *sglist)
int i;

for (i = 0; i < sglist->num_sg; i++)
- __free_pages(sglist->scatterlist[i].page, sglist->order);
+ __free_pages(sg_page(&sglist->scatterlist[i]), sglist->order);

kfree(sglist);
}
@@ -2940,9 +2941,11 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist *sglist,
scatterlist = sglist->scatterlist;

for (i = 0; i < (len / bsize_elem); i++, buffer += bsize_elem) {
- kaddr = kmap(scatterlist[i].page);
+ struct page *page = sg_page(&scatterlist[i]);
+
+ kaddr = kmap(page);
memcpy(kaddr, buffer, bsize_elem);
- kunmap(scatterlist[i].page);
+ kunmap(page);

scatterlist[i].length = bsize_elem;

@@ -2953,9 +2956,11 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist *sglist,
}

if (len % bsize_elem) {
- kaddr = kmap(scatterlist[i].page);
+ struct page *page = sg_page(&scatterlist[i]);
+
+ kaddr = kmap(page);
memcpy(kaddr, buffer, len % bsize_elem);
- kunmap(scatterlist[i].page);
+ kunmap(page);

scatterlist[i].length = len % bsize_elem;
}
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index edaac27..5c5a9b2 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -1515,7 +1515,7 @@ static int ips_is_passthru(struct scsi_cmnd *SC)
/* kmap_atomic() ensures addressability of the user buffer.*/
/* local_irq_save() protects the KM_IRQ0 address slot. */
local_irq_save(flags);
- buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
if (buffer && buffer[0] == 'C' && buffer[1] == 'O' &&
buffer[2] == 'P' && buffer[3] == 'P') {
kunmap_atomic(buffer - sg->offset, KM_IRQ0);
@@ -3523,7 +3523,7 @@ ips_scmd_buf_write(struct scsi_cmnd *scmd, void *data, unsigned int count)
/* kmap_atomic() ensures addressability of the data buffer.*/
/* local_irq_save() protects the KM_IRQ0 address slot. */
local_irq_save(flags);
- buffer = kmap_atomic(sg[i].page, KM_IRQ0) + sg[i].offset;
+ buffer = kmap_atomic(sg_page(&sg[i]), KM_IRQ0) + sg[i].offset;
memcpy(buffer, &cdata[xfer_cnt], min_cnt);
kunmap_atomic(buffer - sg[i].offset, KM_IRQ0);
local_irq_restore(flags);
@@ -3556,7 +3556,7 @@ ips_scmd_buf_read(struct scsi_cmnd *scmd, void *data, unsigned int count)
/* kmap_atomic() ensures addressability of the data buffer.*/
/* local_irq_save() protects the KM_IRQ0 address slot. */
local_irq_save(flags);
- buffer = kmap_atomic(sg[i].page, KM_IRQ0) + sg[i].offset;
+ buffer = kmap_atomic(sg_page(&sg[i]), KM_IRQ0) + sg[i].offset;
memcpy(&cdata[xfer_cnt], buffer, min_cnt);
kunmap_atomic(buffer - sg[i].offset, KM_IRQ0);
local_irq_restore(flags);
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index a21455d..6ce4109 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -70,9 +70,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
static inline void
iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size)
{
- ibuf->sg.page = virt_to_page(vbuf);
- ibuf->sg.offset = offset_in_page(vbuf);
- ibuf->sg.length = size;
+ sg_init_one(&ibuf->sg, vbuf, size);
ibuf->sent = 0;
ibuf->use_sendmsg = 1;
}
@@ -80,13 +78,14 @@ iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size)
static inline void
iscsi_buf_init_sg(struct iscsi_buf *ibuf, struct scatterlist *sg)
{
- ibuf->sg.page = sg->page;
+ sg_init_table(&ibuf->sg, 1);
+ sg_set_page(&ibuf->sg, sg_page(sg));
ibuf->sg.offset = sg->offset;
ibuf->sg.length = sg->length;
/*
* Fastpath: sg element fits into single page
*/
- if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg->page))
+ if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg_page(sg)))
ibuf->use_sendmsg = 0;
else
ibuf->use_sendmsg = 1;
@@ -716,7 +715,7 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) {
char *dest;

- dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
+ dest = kmap_atomic(sg_page(&sg[i]), KM_SOFTIRQ0);
rc = iscsi_ctask_copy(tcp_conn, ctask, dest + sg[i].offset,
sg[i].length, offset);
kunmap_atomic(dest, KM_SOFTIRQ0);
@@ -1103,9 +1102,9 @@ iscsi_send(struct iscsi_conn *conn, struct iscsi_buf *buf, int size, int flags)
* slab case.
*/
if (buf->use_sendmsg)
- res = sock_no_sendpage(sk, buf->sg.page, offset, size, flags);
+ res = sock_no_sendpage(sk, sg_page(&buf->sg), offset, size, flags);
else
- res = tcp_conn->sendpage(sk, buf->sg.page, offset, size, flags);
+ res = tcp_conn->sendpage(sk, sg_page(&buf->sg), offset, size, flags);

if (res >= 0) {
conn->txdata_octets += res;
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 10d1aff..66c6520 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -658,7 +658,7 @@ mega_build_cmd(adapter_t *adapter, Scsi_Cmnd *cmd, int *busy)
struct scatterlist *sg;

sg = scsi_sglist(cmd);
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;

memset(buf, 0, cmd->cmnd[4]);
kunmap_atomic(buf - sg->offset, KM_IRQ0);
@@ -1542,10 +1542,8 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status)
if( cmd->cmnd[0] == INQUIRY && !islogical ) {

sgl = scsi_sglist(cmd);
- if( sgl->page ) {
- c = *(unsigned char *)
- page_address((&sgl[0])->page) +
- (&sgl[0])->offset;
+ if( sg_page(sgl) ) {
+ c = *(unsigned char *) sg_virt(&sgl[0]);
} else {
printk(KERN_WARNING
"megaraid: invalid sg.\n");
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 7877920..c892310 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -1584,10 +1584,8 @@ megaraid_mbox_build_cmd(adapter_t *adapter, struct scsi_cmnd *scp, int *busy)
caddr_t vaddr;

sgl = scsi_sglist(scp);
- if (sgl->page) {
- vaddr = (caddr_t)
- (page_address((&sgl[0])->page)
- + (&sgl[0])->offset);
+ if (sg_page(sgl)) {
+ vaddr = (caddr_t) sg_virt(&sgl[0]);

memset(vaddr, 0, scp->cmnd[4]);
}
@@ -2328,10 +2326,8 @@ megaraid_mbox_dpc(unsigned long devp)
&& IS_RAID_CH(raid_dev, scb->dev_channel)) {

sgl = scsi_sglist(scp);
- if (sgl->page) {
- c = *(unsigned char *)
- (page_address((&sgl[0])->page) +
- (&sgl[0])->offset);
+ if (sg_page(sgl)) {
+ c = *(unsigned char *) sg_virt(&sgl[0]);
} else {
con_log(CL_ANN, (KERN_WARNING
"megaraid mailbox: invalid sg:%d\n",
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 331b789..1c5c4b6 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -542,7 +542,7 @@ static int osst_verify_frame(struct osst_tape * STp, int frame_seq_number, int q
if (STp->raw) {
if (STp->buffer->syscall_result) {
for (i=0; i < STp->buffer->sg_segs; i++)
- memset(page_address(STp->buffer->sg[i].page),
+ memset(page_address(sg_page(&STp->buffer->sg[i])),
0, STp->buffer->sg[i].length);
strcpy(STp->buffer->b_data, "READ ERROR ON FRAME");
} else
@@ -4437,7 +4437,7 @@ static int os_scsi_tape_open(struct inode * inode, struct file * filp)
for (i = 0, b_size = 0;
(i < STp->buffer->sg_segs) && ((b_size + STp->buffer->sg[i].length) <= OS_DATA_SIZE);
b_size += STp->buffer->sg[i++].length);
- STp->buffer->aux = (os_aux_t *) (page_address(STp->buffer->sg[i].page) + OS_DATA_SIZE - b_size);
+ STp->buffer->aux = (os_aux_t *) (page_address(sg_page(&STp->buffer->sg[i])) + OS_DATA_SIZE - b_size);
#if DEBUG
printk(OSST_DEB_MSG "%s:D: b_data points to %p in segment 0 at %p\n", name,
STp->buffer->b_data, page_address(STp->buffer->sg[0].page));
@@ -5252,25 +5252,26 @@ static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma)
/* Try to allocate the first segment up to OS_DATA_SIZE and the others
big enough to reach the goal (code assumes no segments in place) */
for (b_size = OS_DATA_SIZE, order = OSST_FIRST_ORDER; b_size >= PAGE_SIZE; order--, b_size /= 2) {
- STbuffer->sg[0].page = alloc_pages(priority, order);
+ struct page *page = alloc_pages(priority, order);
+
STbuffer->sg[0].offset = 0;
- if (STbuffer->sg[0].page != NULL) {
+ if (page != NULL) {
+ sg_set_page(&STbuffer->sg[0], page);
STbuffer->sg[0].length = b_size;
- STbuffer->b_data = page_address(STbuffer->sg[0].page);
+ STbuffer->b_data = page_address(page);
break;
}
}
- if (STbuffer->sg[0].page == NULL) {
+ if (sg_page(&STbuffer->sg[0]) == NULL) {
printk(KERN_NOTICE "osst :I: Can't allocate tape buffer main segment.\n");
return 0;
}
/* Got initial segment of 'bsize,order', continue with same size if possible, except for AUX */
for (segs=STbuffer->sg_segs=1, got=b_size;
segs < max_segs && got < OS_FRAME_SIZE; ) {
- STbuffer->sg[segs].page =
- alloc_pages(priority, (OS_FRAME_SIZE - got <= PAGE_SIZE) ? 0 : order);
+ struct page *page = alloc_pages(priority, (OS_FRAME_SIZE - got <= PAGE_SIZE) ? 0 : order);
STbuffer->sg[segs].offset = 0;
- if (STbuffer->sg[segs].page == NULL) {
+ if (page == NULL) {
if (OS_FRAME_SIZE - got <= (max_segs - segs) * b_size / 2 && order) {
b_size /= 2; /* Large enough for the rest of the buffers */
order--;
@@ -5284,6 +5285,7 @@ static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma)
normalize_buffer(STbuffer);
return 0;
}
+ sg_set_page(&STbuffer->sg[segs], page);
STbuffer->sg[segs].length = (OS_FRAME_SIZE - got <= PAGE_SIZE / 2) ? (OS_FRAME_SIZE - got) : b_size;
got += STbuffer->sg[segs].length;
STbuffer->buffer_size = got;
@@ -5316,7 +5318,7 @@ static void normalize_buffer(struct osst_buffer *STbuffer)
b_size < STbuffer->sg[i].length;
b_size *= 2, order++);

- __free_pages(STbuffer->sg[i].page, order);
+ __free_pages(sg_page(&STbuffer->sg[i]), order);
STbuffer->buffer_size -= STbuffer->sg[i].length;
}
#if DEBUG
@@ -5344,7 +5346,7 @@ static int append_to_buffer(const char __user *ubp, struct osst_buffer *st_bp, i
for ( ; i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length - offset < do_count ?
st_bp->sg[i].length - offset : do_count;
- res = copy_from_user(page_address(st_bp->sg[i].page) + offset, ubp, cnt);
+ res = copy_from_user(page_address(sg_page(&st_bp->sg[i])) + offset, ubp, cnt);
if (res)
return (-EFAULT);
do_count -= cnt;
@@ -5377,7 +5379,7 @@ static int from_buffer(struct osst_buffer *st_bp, char __user *ubp, int do_count
for ( ; i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length - offset < do_count ?
st_bp->sg[i].length - offset : do_count;
- res = copy_to_user(ubp, page_address(st_bp->sg[i].page) + offset, cnt);
+ res = copy_to_user(ubp, page_address(sg_page(&st_bp->sg[i])) + offset, cnt);
if (res)
return (-EFAULT);
do_count -= cnt;
@@ -5410,7 +5412,7 @@ static int osst_zero_buffer_tail(struct osst_buffer *st_bp)
i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length - offset < do_count ?
st_bp->sg[i].length - offset : do_count ;
- memset(page_address(st_bp->sg[i].page) + offset, 0, cnt);
+ memset(page_address(sg_page(&st_bp->sg[i])) + offset, 0, cnt);
do_count -= cnt;
offset = 0;
}
@@ -5430,7 +5432,7 @@ static int osst_copy_to_buffer(struct osst_buffer *st_bp, unsigned char *ptr)
for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length < do_count ?
st_bp->sg[i].length : do_count ;
- memcpy(page_address(st_bp->sg[i].page), ptr, cnt);
+ memcpy(page_address(sg_page(&st_bp->sg[i])), ptr, cnt);
do_count -= cnt;
ptr += cnt;
}
@@ -5451,7 +5453,7 @@ static int osst_copy_from_buffer(struct osst_buffer *st_bp, unsigned char *ptr)
for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length < do_count ?
st_bp->sg[i].length : do_count ;
- memcpy(ptr, page_address(st_bp->sg[i].page), cnt);
+ memcpy(ptr, page_address(sg_page(&st_bp->sg[i])), cnt);
do_count -= cnt;
ptr += cnt;
}
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index 9839755..7db28cd 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -393,7 +393,7 @@ enum _burst_mode {
#define MSG_EXT_SDTR 0x01

/* scatter-gather table */
-# define BUFFER_ADDR ((char *)((unsigned int)(SCpnt->SCp.buffer->page) + SCpnt->SCp.buffer->offset))
+# define BUFFER_ADDR ((char *)((sg_virt(SCpnt->SCp.buffer))))

#endif /*__nsp_cs__*/
/* end */
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 190e2a7..969b938 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -443,8 +443,7 @@ SYM53C500_intr(int irq, void *dev_id)

scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
SYM53C500_pio_write(fast_pio, port_base,
- page_address(sg->page) + sg->offset,
- sg->length);
+ sg_virt(sg), sg->length);
}
REG0(port_base);
}
@@ -463,8 +462,7 @@ SYM53C500_intr(int irq, void *dev_id)

scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
SYM53C500_pio_read(fast_pio, port_base,
- page_address(sg->page) + sg->offset,
- sg->length);
+ sg_virt(sg), sg->length);
}
REG0(port_base);
}
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index 67b6d76..67ee51a 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -608,9 +608,7 @@ static int ppa_completion(struct scsi_cmnd *cmd)
cmd->SCp.buffer++;
cmd->SCp.this_residual =
cmd->SCp.buffer->length;
- cmd->SCp.ptr =
- page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
}
}
/* Now check to see if the drive is ready to comunicate */
@@ -756,8 +754,7 @@ static int ppa_engine(ppa_struct *dev, struct scsi_cmnd *cmd)
/* if many buffers are available, start filling the first */
cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
} else {
/* else fill the only available buffer */
cmd->SCp.buffer = NULL;
diff --git a/drivers/scsi/qlogicfas408.c b/drivers/scsi/qlogicfas408.c
index 2bfbf26..de7b3bc 100644
--- a/drivers/scsi/qlogicfas408.c
+++ b/drivers/scsi/qlogicfas408.c
@@ -317,7 +317,7 @@ static unsigned int ql_pcmd(struct scsi_cmnd *cmd)
return ((priv->qabort == 1 ?
DID_ABORT : DID_RESET) << 16);
}
- buf = page_address(sg->page) + sg->offset;
+ buf = sg_virt(sg);
if (ql_pdma(priv, phase, buf, sg->length))
break;
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 72ee4c9..46cae5a 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -625,7 +625,7 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
scsi_for_each_sg(scp, sg, scp->use_sg, k) {
if (active) {
kaddr = (unsigned char *)
- kmap_atomic(sg->page, KM_USER0);
+ kmap_atomic(sg_page(sg), KM_USER0);
if (NULL == kaddr)
return (DID_ERROR << 16);
kaddr_off = (unsigned char *)kaddr + sg->offset;
@@ -672,7 +672,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
sg = scsi_sglist(scp);
req_len = fin = 0;
for (k = 0; k < scp->use_sg; ++k, sg = sg_next(sg)) {
- kaddr = (unsigned char *)kmap_atomic(sg->page, KM_USER0);
+ kaddr = (unsigned char *)kmap_atomic(sg_page(sg), KM_USER0);
if (NULL == kaddr)
return -1;
kaddr_off = (unsigned char *)kaddr + sg->offset;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aac8a02..61fdaf0 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -295,7 +295,7 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
int i, err, nr_vecs = 0;

for_each_sg(sgl, sg, nsegs, i) {
- page = sg->page;
+ page = sg_page(sg);
off = sg->offset;
len = sg->length;
data_len += len;
@@ -764,7 +764,7 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
if (unlikely(!sgl))
goto enomem;

- memset(sgl, 0, sizeof(*sgl) * sgp->size);
+ sg_init_table(sgl, sgp->size);

/*
* first loop through, set initial index and return value
@@ -781,6 +781,13 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);

/*
+ * if we have nothing left, mark the last segment as
+ * end-of-list
+ */
+ if (!left)
+ sg_mark_end(sgl, this);
+
+ /*
* don't allow subsequent mempool allocs to sleep, it would
* violate the mempool principle.
*/
@@ -2353,7 +2360,7 @@ void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
*offset = *offset - len_complete + sg->offset;

/* Assumption: contiguous pages can be accessed as "page + i" */
- page = nth_page(sg->page, (*offset >> PAGE_SHIFT));
+ page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT));
*offset &= ~PAGE_MASK;

/* Bytes in this sg-entry from *offset to the end of the page */
diff --git a/drivers/scsi/seagate.c b/drivers/scsi/seagate.c
index ce80fa9..b113244 100644
--- a/drivers/scsi/seagate.c
+++ b/drivers/scsi/seagate.c
@@ -999,14 +999,14 @@ connect_loop:
for (i = 0; i < nobuffs; ++i)
printk("scsi%d : buffer %d address = %p length = %d\n",
hostno, i,
- page_address(buffer[i].page) + buffer[i].offset,
+ sg_virt(&buffer[i]),
buffer[i].length);
}
#endif

buffer = (struct scatterlist *) SCint->request_buffer;
len = buffer->length;
- data = page_address(buffer->page) + buffer->offset;
+ data = sg_virt(buffer);
} else {
DPRINTK (DEBUG_SG, "scsi%d : scatter gather not requested.\n", hostno);
buffer = NULL;
@@ -1239,7 +1239,7 @@ connect_loop:
--nobuffs;
++buffer;
len = buffer->length;
- data = page_address(buffer->page) + buffer->offset;
+ data = sg_virt(buffer);
DPRINTK (DEBUG_SG,
"scsi%d : next scatter-gather buffer len = %d address = %08x\n",
hostno, len, data);
@@ -1396,7 +1396,7 @@ connect_loop:
--nobuffs;
++buffer;
len = buffer->length;
- data = page_address(buffer->page) + buffer->offset;
+ data = sg_virt(buffer);
DPRINTK (DEBUG_SG, "scsi%d : next scatter-gather buffer len = %d address = %08x\n", hostno, len, data);
}
break;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 7238b2d..cc19710 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1169,7 +1169,7 @@ sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type)
len = vma->vm_end - sa;
len = (len < sg->length) ? len : sg->length;
if (offset < len) {
- page = virt_to_page(page_address(sg->page) + offset);
+ page = virt_to_page(page_address(sg_page(sg)) + offset);
get_page(page); /* increment page count */
break;
}
@@ -1717,13 +1717,13 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
goto out_unlock; */
}

- sgl[0].page = pages[0];
+ sg_set_page(sgl, pages[0]);
sgl[0].offset = uaddr & ~PAGE_MASK;
if (nr_pages > 1) {
sgl[0].length = PAGE_SIZE - sgl[0].offset;
count -= sgl[0].length;
for (i=1; i < nr_pages ; i++) {
- sgl[i].page = pages[i];
+ sg_set_page(&sgl[i], pages[i]);
sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE;
count -= PAGE_SIZE;
}
@@ -1754,7 +1754,7 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
int i;

for (i=0; i < nr_pages; i++) {
- struct page *page = sgl[i].page;
+ struct page *page = sg_page(&sgl[i]);

if (dirtied)
SetPageDirty(page);
@@ -1854,7 +1854,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
scatter_elem_sz_prev = ret_sz;
}
}
- sg->page = p;
+ sg_set_page(sg, p);
sg->length = (ret_sz > num) ? num : ret_sz;

SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, "
@@ -1907,14 +1907,14 @@ sg_write_xfer(Sg_request * srp)
onum = 1;

ksglen = sg->length;
- p = page_address(sg->page);
+ p = page_address(sg_page(sg));
for (j = 0, k = 0; j < onum; ++j) {
res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up);
if (res)
return res;

for (; p; sg = sg_next(sg), ksglen = sg->length,
- p = page_address(sg->page)) {
+ p = page_address(sg_page(sg))) {
if (usglen <= 0)
break;
if (ksglen > usglen) {
@@ -1991,12 +1991,12 @@ sg_remove_scat(Sg_scatter_hold * schp)
} else {
int k;

- for (k = 0; (k < schp->k_use_sg) && sg->page;
+ for (k = 0; (k < schp->k_use_sg) && sg_page(sg);
++k, sg = sg_next(sg)) {
SCSI_LOG_TIMEOUT(5, printk(
"sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
- k, sg->page, sg->length));
- sg_page_free(sg->page, sg->length);
+ k, sg_page(sg), sg->length));
+ sg_page_free(sg_page(sg), sg->length);
}
}
kfree(schp->buffer);
@@ -2038,7 +2038,7 @@ sg_read_xfer(Sg_request * srp)
} else
onum = 1;

- p = page_address(sg->page);
+ p = page_address(sg_page(sg));
ksglen = sg->length;
for (j = 0, k = 0; j < onum; ++j) {
res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up);
@@ -2046,7 +2046,7 @@ sg_read_xfer(Sg_request * srp)
return res;

for (; p; sg = sg_next(sg), ksglen = sg->length,
- p = page_address(sg->page)) {
+ p = page_address(sg_page(sg))) {
if (usglen <= 0)
break;
if (ksglen > usglen) {
@@ -2092,15 +2092,15 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
if ((!outp) || (num_read_xfer <= 0))
return 0;

- for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, sg = sg_next(sg)) {
+ for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) {
num = sg->length;
if (num > num_read_xfer) {
- if (__copy_to_user(outp, page_address(sg->page),
+ if (__copy_to_user(outp, page_address(sg_page(sg)),
num_read_xfer))
return -EFAULT;
break;
} else {
- if (__copy_to_user(outp, page_address(sg->page),
+ if (__copy_to_user(outp, page_address(sg_page(sg)),
num))
return -EFAULT;
num_read_xfer -= num;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 73c44cb..ce69b9e 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3797,7 +3797,7 @@ static void buf_to_sg(struct st_buffer *STbp, unsigned int length)
sg = &(STbp->sg[0]);
frp = STbp->frp;
for (i=count=0; count < length; i++) {
- sg[i].page = frp[i].page;
+ sg_set_page(&sg[i], frp[i].page);
if (length - count > frp[i].length)
sg[i].length = frp[i].length;
else
@@ -4446,14 +4446,14 @@ static int sgl_map_user_pages(struct scatterlist *sgl, const unsigned int max_pa
}

/* Populate the scatter/gather list */
- sgl[0].page = pages[0];
+ sg_set_page(&sgl[0], pages[0]);
sgl[0].offset = uaddr & ~PAGE_MASK;
if (nr_pages > 1) {
sgl[0].length = PAGE_SIZE - sgl[0].offset;
count -= sgl[0].length;
for (i=1; i < nr_pages ; i++) {
+ sg_set_page(&sgl[i], pages[i]);;
sgl[i].offset = 0;
- sgl[i].page = pages[i];
sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE;
count -= PAGE_SIZE;
}
@@ -4483,7 +4483,7 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p
int i;

for (i=0; i < nr_pages; i++) {
- struct page *page = sgl[i].page;
+ struct page *page = sg_page(&sgl[i]);

if (dirtied)
SetPageDirty(page);
diff --git a/drivers/scsi/sym53c416.c b/drivers/scsi/sym53c416.c
index 8befab7..90cee94 100644
--- a/drivers/scsi/sym53c416.c
+++ b/drivers/scsi/sym53c416.c
@@ -196,7 +196,7 @@ static unsigned int sym53c416_base_3[2] = {0,0};

#define MAXHOSTS 4

-#define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset))
+#define SG_ADDRESS(buffer) ((char *) sg_virt((buffer)))

enum phases
{
diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
index 5c72ca3..4419304 100644
--- a/drivers/scsi/tmscsim.c
+++ b/drivers/scsi/tmscsim.c
@@ -430,10 +430,7 @@ static __inline__ void dc390_Going_remove (struct dc390_dcb* pDCB, struct dc390_

static struct scatterlist* dc390_sg_build_single(struct scatterlist *sg, void *addr, unsigned int length)
{
- memset(sg, 0, sizeof(struct scatterlist));
- sg->page = virt_to_page(addr);
- sg->length = length;
- sg->offset = (unsigned long)addr & ~PAGE_MASK;
+ sg_init_one(sg, addr, length);
return sg;
}

diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c
index ea72bbe..6d1f0ed 100644
--- a/drivers/scsi/ultrastor.c
+++ b/drivers/scsi/ultrastor.c
@@ -681,7 +681,7 @@ static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt)

max = scsi_sg_count(SCpnt);
scsi_for_each_sg(SCpnt, sg, max, i) {
- mscp->sglist[i].address = isa_page_to_bus(sg->page) + sg->offset;
+ mscp->sglist[i].address = isa_page_to_bus(sg_page(sg)) + sg->offset;
mscp->sglist[i].num_bytes = sg->length;
transfer_length += sg->length;
}
diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c
index 255c611..03cd44f 100644
--- a/drivers/scsi/wd7000.c
+++ b/drivers/scsi/wd7000.c
@@ -1123,7 +1123,7 @@ static int wd7000_queuecommand(struct scsi_cmnd *SCpnt,
any2scsi(scb->maxlen, nseg * sizeof(Sgb));

scsi_for_each_sg(SCpnt, sg, nseg, i) {
- any2scsi(sgb[i].ptr, isa_page_to_bus(sg->page) + sg->offset);
+ any2scsi(sgb[i].ptr, isa_page_to_bus(sg_page(sg)) + sg->offset);
any2scsi(sgb[i].len, sg->length);
}
} else {
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 8dd5a6a..90d64a8 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -437,13 +437,11 @@ int usb_sg_init (
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_IOMMU)
io->urbs[i]->transfer_buffer = NULL;
#else
- io->urbs[i]->transfer_buffer =
- page_address(sg[i].page) + sg[i].offset;
+ io->urbs[i]->transfer_buffer = sg_virt(&sg[i]);
#endif
} else {
/* hc may use _only_ transfer_buffer */
- io->urbs [i]->transfer_buffer =
- page_address (sg [i].page) + sg [i].offset;
+ io->urbs [i]->transfer_buffer = sg_virt(&sg[i]);
len = sg [i].length;
}

diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index e7d982a..91e999c 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -519,8 +519,7 @@ static void mts_do_sg (struct urb* transfer)
context->fragment++;
mts_int_submit_urb(transfer,
context->data_pipe,
- page_address(sg[context->fragment].page) +
- sg[context->fragment].offset,
+ sg_virt(&sg[context->fragment]),
sg[context->fragment].length,
context->fragment + 1 == scsi_sg_count(context->srb) ?
mts_data_done : mts_do_sg);
@@ -557,7 +556,7 @@ mts_build_transfer_context(struct scsi_cmnd *srb, struct mts_desc* desc)
return;
} else {
sg = scsi_sglist(srb);
- desc->context.data = page_address(sg[0].page) + sg[0].offset;
+ desc->context.data = sg_virt(&sg[0]);
desc->context.data_length = sg[0].length;
}

diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index e901d31..ea31621 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -360,9 +360,9 @@ static void free_sglist (struct scatterlist *sg, int nents)
if (!sg)
return;
for (i = 0; i < nents; i++) {
- if (!sg [i].page)
+ if (!sg_page(&sg[i]))
continue;
- kfree (page_address (sg [i].page) + sg [i].offset);
+ kfree (sg_virt(&sg[i]));
}
kfree (sg);
}
diff --git a/drivers/usb/storage/protocol.c b/drivers/usb/storage/protocol.c
index cc8f7c5..889622b 100644
--- a/drivers/usb/storage/protocol.c
+++ b/drivers/usb/storage/protocol.c
@@ -195,7 +195,7 @@ unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
* the *offset and *index values for the next loop. */
cnt = 0;
while (cnt < buflen) {
- struct page *page = sg->page +
+ struct page *page = sg_page(sg) +
((sg->offset + *offset) >> PAGE_SHIFT);
unsigned int poff =
(sg->offset + *offset) & (PAGE_SIZE-1);
--
1.5.3.GIT

2007-10-22 19:39:53

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, 22 Oct 2007, Jens Axboe wrote:
> Change the page member of the scatterlist structure to be an unsigned
> long, and encode more stuff in the lower bits:
>
> - Bits 0 and 1 zero: this is a normal sg entry. Next sg entry is located
> at sg + 1.
> - Bit 0 set: this is a chain entry, the next real entry is at ->page_link
> with the two low bits masked off.
> - Bit 1 set: this is the final entry in the sg entry. sg_next() will return
> NULL when passed such an entry.

Better safe than sorry...

Is it possible that a chain entry pointer has bit 1 set on architectures
(e.g. m68k) where the natural alignment of 32-bit quantities is _2_ bytes,
not 4?

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2007-10-22 19:50:51

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout



On Mon, 22 Oct 2007, Geert Uytterhoeven wrote:
>
> Better safe than sorry...
>
> Is it possible that a chain entry pointer has bit 1 set on architectures
> (e.g. m68k) where the natural alignment of 32-bit quantities is _2_ bytes,
> not 4?

Better make sure that such alignment never happens... But no, I don't
think it will, since these things would generally always have to be
allocated with an allocator, and the *allocator* won't return 2-byte
aligned data structures.

Linus

2007-10-22 19:52:52

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, Oct 22 2007, Linus Torvalds wrote:
>
>
> On Mon, 22 Oct 2007, Geert Uytterhoeven wrote:
> >
> > Better safe than sorry...
> >
> > Is it possible that a chain entry pointer has bit 1 set on architectures
> > (e.g. m68k) where the natural alignment of 32-bit quantities is _2_ bytes,
> > not 4?
>
> Better make sure that such alignment never happens... But no, I don't
> think it will, since these things would generally always have to be
> allocated with an allocator, and the *allocator* won't return 2-byte
> aligned data structures.

How about stack allocations?

--
Jens Axboe

2007-10-22 20:13:43

by Alan

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, 22 Oct 2007 12:49:40 -0700 (PDT)
Linus Torvalds <[email protected]> wrote:

>
>
> On Mon, 22 Oct 2007, Geert Uytterhoeven wrote:
> >
> > Better safe than sorry...
> >
> > Is it possible that a chain entry pointer has bit 1 set on architectures
> > (e.g. m68k) where the natural alignment of 32-bit quantities is _2_ bytes,
> > not 4?
>
> Better make sure that such alignment never happens... But no, I don't
> think it will, since these things would generally always have to be
> allocated with an allocator, and the *allocator* won't return 2-byte
> aligned data structures.

No - but a structure which has other objects in it before the object
being written out may well be 2 byte aligned on M68K and some of the
other externally 16bit platforms - ditto local dynamic objects.

Why can't we just make the list one item longer than the entry count and
stick a NULL on the end of it like normal people ? Then you need one bit
which ought to be safe for everyone (and if the bit is a macro any CPU
warped enough to have byte alignment is surely going to have top bits
spare...)

Alan

2007-10-22 20:40:08

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, Oct 22, 2007 at 09:16:17PM +0100, Alan Cox wrote:
> On Mon, 22 Oct 2007 12:49:40 -0700 (PDT)
> Linus Torvalds <[email protected]> wrote:
>
> >
> >
> > On Mon, 22 Oct 2007, Geert Uytterhoeven wrote:
> > >
> > > Better safe than sorry...
> > >
> > > Is it possible that a chain entry pointer has bit 1 set on architectures
> > > (e.g. m68k) where the natural alignment of 32-bit quantities is _2_ bytes,
> > > not 4?
> >
> > Better make sure that such alignment never happens... But no, I don't
> > think it will, since these things would generally always have to be
> > allocated with an allocator, and the *allocator* won't return 2-byte
> > aligned data structures.
>
> No - but a structure which has other objects in it before the object
> being written out may well be 2 byte aligned on M68K and some of the
> other externally 16bit platforms - ditto local dynamic objects.

Also, the current version of SLOB will return objects aligned at 2 bytes if the
architecture allows it.

> Why can't we just make the list one item longer than the entry count and
> stick a NULL on the end of it like normal people ? Then you need one bit
> which ought to be safe for everyone (and if the bit is a macro any CPU
> warped enough to have byte alignment is surely going to have top bits
> spare...)

I'm guessing the extra entry makes slab-like allocators unhappy.

--
Mathematics is the supreme nostalgia of our time.

2007-10-22 20:45:36

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout



On Mon, 22 Oct 2007, Alan Cox wrote:
>
> Why can't we just make the list one item longer than the entry count and
> stick a NULL on the end of it like normal people ? Then you need one bit
> which ought to be safe for everyone (and if the bit is a macro any CPU
> warped enough to have byte alignment is surely going to have top bits
> spare...)

Well, quite frankly, equally easy is to just add a

__attribute__((aligned(4)))

or whatever the gcc syntax for that is today.. That guarantees that gcc
lays things out properly.

Linus

2007-10-22 21:11:08

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 08/10] [SG] Update arch/ to use sg helpers

It looks like it could be nice to define and use a helper for
page_address(sg_page(sg)) (although 11 call sites could use it
after this patch)

#define sg_pgaddr(sg) page_address(sg_page(sg))

Note that mips sg_{un,}map_sg checked for page_address(sg->page) != 0
before calling __dma_sync(addr + sg->offset, sg->length, direction)
and you changed it to addr = (unsigned long) sg_virt(sg) which
takes sg->offset into account. That said I'm not sure if the original
code was correct for the (page_address(sg->page) == 0 && sg->offset != 0)
case...


On Oct. 22, 2007, 20:11 +0200, Jens Axboe <[email protected]> wrote:
<snip>

> diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
> index 98b5e5b..b0b034c 100644
> --- a/arch/mips/mm/dma-default.c
> +++ b/arch/mips/mm/dma-default.c
> @@ -165,12 +165,11 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> for (i = 0; i < nents; i++, sg++) {
> unsigned long addr;
>
> - addr = (unsigned long) page_address(sg->page);
> + addr = (unsigned long) sg_virt(sg);
> if (!plat_device_is_coherent(dev) && addr)
> - __dma_sync(addr + sg->offset, sg->length, direction);
> + __dma_sync(addr, sg->length, direction);
> sg->dma_address = plat_map_dma_mem(dev,
> - (void *)(addr + sg->offset),
> - sg->length);
> + (void *)addr, sg->length);
> }
>
> return nents;
> @@ -223,10 +222,9 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
> for (i = 0; i < nhwentries; i++, sg++) {
> if (!plat_device_is_coherent(dev) &&
> direction != DMA_TO_DEVICE) {
> - addr = (unsigned long) page_address(sg->page);
> + addr = (unsigned long) sg_virt(sg);
> if (addr)
> - __dma_sync(addr + sg->offset, sg->length,
> - direction);
> + __dma_sync(addr, sg->length, direction);
> }
> plat_unmap_dma_mem(sg->dma_address);
> }

<snip>

> diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
> index 5098f58..1a20fe3 100644
> --- a/arch/x86/kernel/pci-calgary_64.c
> +++ b/arch/x86/kernel/pci-calgary_64.c
> @@ -411,8 +411,10 @@ static int calgary_nontranslate_map_sg(struct device* dev,
> int i;
>
> for_each_sg(sg, s, nelems, i) {
> - BUG_ON(!s->page);
> - s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
> + struct page *p = sg_page(s);
> +
> + BUG_ON(!p);

why not just BUG_ON(!sg_page(s))?

> + s->dma_address = virt_to_bus(sg_virt(s));
> s->dma_length = s->length;
> }
> return nelems;

2007-10-22 21:17:04

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Oct. 22, 2007, 22:16 +0200, Alan Cox <[email protected]> wrote:
> On Mon, 22 Oct 2007 12:49:40 -0700 (PDT)
> Linus Torvalds <[email protected]> wrote:
>
>>
>> On Mon, 22 Oct 2007, Geert Uytterhoeven wrote:
>>> Better safe than sorry...
>>>
>>> Is it possible that a chain entry pointer has bit 1 set on architectures
>>> (e.g. m68k) where the natural alignment of 32-bit quantities is _2_ bytes,
>>> not 4?
>> Better make sure that such alignment never happens... But no, I don't
>> think it will, since these things would generally always have to be
>> allocated with an allocator, and the *allocator* won't return 2-byte
>> aligned data structures.
>
> No - but a structure which has other objects in it before the object
> being written out may well be 2 byte aligned on M68K and some of the
> other externally 16bit platforms - ditto local dynamic objects.
>
> Why can't we just make the list one item longer than the entry count and
> stick a NULL on the end of it like normal people ? Then you need one bit
> which ought to be safe for everyone (and if the bit is a macro any CPU
> warped enough to have byte alignment is surely going to have top bits
> spare...)

Alternatively, I proposed to check for end of list in sg_next
by calling it with the next iterator value and number of list elements.
We tried that patch here and it seems like a reasonable alternative.
If folks are interested, I can send the full patch for review.

>
> Alan
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

2007-10-22 21:21:48

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

Alan Cox wrote:
> Why can't we just make the list one item longer than the entry count and
> stick a NULL on the end of it like normal people ?

Certainly seems safer than the current "let's run off the end of the
list if anything bad happens" setup... And I do not think allocating
n+1 scatterlist entries will have much of a negative impact.

Jeff


2007-10-22 21:42:03

by Alan

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, 22 Oct 2007 13:44:43 -0700 (PDT)
Linus Torvalds <[email protected]> wrote:

>
>
> On Mon, 22 Oct 2007, Alan Cox wrote:
> >
> > Why can't we just make the list one item longer than the entry count and
> > stick a NULL on the end of it like normal people ? Then you need one bit
> > which ought to be safe for everyone (and if the bit is a macro any CPU
> > warped enough to have byte alignment is surely going to have top bits
> > spare...)
>
> Well, quite frankly, equally easy is to just add a
>
> __attribute__((aligned(4)))
>
> or whatever the gcc syntax for that is today.. That guarantees that gcc
> lays things out properly.

For structures, not array elements or stack objects. Does gcc now get
aligned correct as an attribute on a stack object ?

Still doesn't answer the rather more important question - why not just
stick a NULL on the end instead of all the nutty hacks ?

2007-10-22 21:48:20

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout



On Mon, 22 Oct 2007, Alan Cox wrote:

> For structures, not array elements or stack objects. Does gcc now get
> aligned correct as an attribute on a stack object ?

I think m68k stack layout still guarantees 4-byte-alignment, no?

> Still doesn't answer the rather more important question - why not just
> stick a NULL on the end instead of all the nutty hacks ?

You still do need one bit for the discontiguous case, so it's not like you
can avoid the hacks anyway (unless you just blow up the structure
entirely) and make it a separate member). So once you have that
bit+pointer, using a separate NULL entry isn't exactly prettier.

Especially as we actally want to see the difference between
"end-of-allocation" and "not yet filled in", so you shouldn't use NULL
anyway, you should probably use something like "all-ones".

Linus

2007-10-22 21:48:32

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, Oct 22, 2007 at 05:21:30PM -0400, Jeff Garzik wrote:
> Alan Cox wrote:
> >Why can't we just make the list one item longer than the entry count and
> >stick a NULL on the end of it like normal people ?
>
> Certainly seems safer than the current "let's run off the end of the
> list if anything bad happens" setup... And I do not think allocating
> n+1 scatterlist entries will have much of a negative impact.

It'll mean m-1 scatterlists fit on a slab.

--
Mathematics is the supreme nostalgia of our time.

2007-10-22 22:50:29

by Alan

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, 22 Oct 2007 16:47:07 -0500
Matt Mackall <[email protected]> wrote:

> On Mon, Oct 22, 2007 at 05:21:30PM -0400, Jeff Garzik wrote:
> > Alan Cox wrote:
> > >Why can't we just make the list one item longer than the entry count and
> > >stick a NULL on the end of it like normal people ?
> >
> > Certainly seems safer than the current "let's run off the end of the
> > list if anything bad happens" setup... And I do not think allocating
> > n+1 scatterlist entries will have much of a negative impact.
>
> It'll mean m-1 scatterlists fit on a slab.

Is that really a credible space issue ?

2007-10-22 23:47:30

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, Oct 22, 2007 at 11:52:51PM +0100, Alan Cox wrote:
> On Mon, 22 Oct 2007 16:47:07 -0500
> Matt Mackall <[email protected]> wrote:
>
> > On Mon, Oct 22, 2007 at 05:21:30PM -0400, Jeff Garzik wrote:
> > > Alan Cox wrote:
> > > >Why can't we just make the list one item longer than the entry count and
> > > >stick a NULL on the end of it like normal people ?
> > >
> > > Certainly seems safer than the current "let's run off the end of the
> > > list if anything bad happens" setup... And I do not think allocating
> > > n+1 scatterlist entries will have much of a negative impact.
> >
> > It'll mean m-1 scatterlists fit on a slab.
>
> Is that really a credible space issue ?

Yes. Especially if m is 2 or 1. A scatterlist on 64-bit x86 looks like
it takes 32 bytes, which means 128 elements fit on a page. One more
spills - ouch!

But maybe chaining means this doesn't matter any more. Maybe we can
even pick a nice moderate sg size and reduce the number of mempools we
need for these things.

--
Mathematics is the supreme nostalgia of our time.

2007-10-23 00:07:30

by David Miller

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

From: Linus Torvalds <[email protected]>
Date: Mon, 22 Oct 2007 14:47:38 -0700 (PDT)

> On Mon, 22 Oct 2007, Alan Cox wrote:
>
> > Still doesn't answer the rather more important question - why not just
> > stick a NULL on the end instead of all the nutty hacks ?
>
> You still do need one bit for the discontiguous case, so it's not like you
> can avoid the hacks anyway (unless you just blow up the structure
> entirely) and make it a separate member). So once you have that
> bit+pointer, using a separate NULL entry isn't exactly prettier.
>
> Especially as we actally want to see the difference between
> "end-of-allocation" and "not yet filled in", so you shouldn't use NULL
> anyway, you should probably use something like "all-ones".

Indeed that's the crux of the matter, we need to express a trinary
state "end of scatterlist, next entry is linear, next entry is
indirect" plus a pointer for the indirect case.

Generally, Jens was doing a good job cooking up the patch that
implemented this fully and I took care of making sure tricky
ports like sparc64 built cleanly etc.

He went away for a few days, but when he gets back we should seriously
work on integrating his work.

I fully recognize Alan's m68k on-stack alignment concern. The on-stack
cases are troublesome in other ways as well, and I think therefore the
way to move forward is to convert those to some kind of dynamic scheme.
Usually such code is working in a locked context on some object
(crypto instance, for example) and thus the scatterlist chunk can
be embedded into that object for ensured alignment.

2007-10-23 00:11:40

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

Matt Mackall wrote:
> On Mon, Oct 22, 2007 at 11:52:51PM +0100, Alan Cox wrote:
>> On Mon, 22 Oct 2007 16:47:07 -0500
>> Matt Mackall <[email protected]> wrote:
>>
>>> On Mon, Oct 22, 2007 at 05:21:30PM -0400, Jeff Garzik wrote:
>>>> Alan Cox wrote:
>>>>> Why can't we just make the list one item longer than the entry count and
>>>>> stick a NULL on the end of it like normal people ?
>>>> Certainly seems safer than the current "let's run off the end of the
>>>> list if anything bad happens" setup... And I do not think allocating
>>>> n+1 scatterlist entries will have much of a negative impact.
>>> It'll mean m-1 scatterlists fit on a slab.
>> Is that really a credible space issue ?
>
> Yes. Especially if m is 2 or 1. A scatterlist on 64-bit x86 looks like
> it takes 32 bytes, which means 128 elements fit on a page. One more
> spills - ouch!

...and its trivial to reduce that number to 127 without noticeable
effect, really.

Jeff



2007-10-23 04:02:51

by Olof Johansson

[permalink] [raw]
Subject: powerpc: Fix fallout from sg_page() changes

Fix fallout from 18dabf473e15850c0dbc8ff13ac1e2806d542c15:

In file included from include/linux/dma-mapping.h:52,
from drivers/base/dma-mapping.c:10:
include/asm/dma-mapping.h: In function 'dma_map_sg':
include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h:289: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h:290: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h: In function 'dma_sync_sg_for_cpu':
include/asm/dma-mapping.h:331: error: 'struct scatterlist' has no member named 'page'

drivers/scsi/ps3rom.c: In function 'fetch_to_dev_buffer':
drivers/scsi/ps3rom.c:150: error: 'struct scatterlist' has no member named 'page'



Signed-off-by: Olof Johansson <[email protected]>

diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
index 65be95d..ff52013 100644
--- a/include/asm-powerpc/dma-mapping.h
+++ b/include/asm-powerpc/dma-mapping.h
@@ -285,9 +285,9 @@ dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
BUG_ON(direction == DMA_NONE);

for_each_sg(sgl, sg, nents, i) {
- BUG_ON(!sg->page);
- __dma_sync_page(sg->page, sg->offset, sg->length, direction);
- sg->dma_address = page_to_bus(sg->page) + sg->offset;
+ BUG_ON(!sg_page(sg));
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
+ sg->dma_address = page_to_bus(sg_page(sg)) + sg->offset;
}

return nents;
@@ -328,7 +328,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev,
BUG_ON(direction == DMA_NONE);

for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg->page, sg->offset, sg->length, direction);
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
}

static inline void dma_sync_sg_for_device(struct device *dev,
@@ -341,7 +341,7 @@ static inline void dma_sync_sg_for_device(struct device *dev,
BUG_ON(direction == DMA_NONE);

for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg->page, sg->offset, sg->length, direction);
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
}

static inline int dma_mapping_error(dma_addr_t dma_addr)
diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c
index 03f19b8..17b4a7c 100644
--- a/drivers/scsi/ps3rom.c
+++ b/drivers/scsi/ps3rom.c
@@ -147,7 +147,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd *cmd, void *buf)

req_len = fin = 0;
scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) {
- kaddr = kmap_atomic(sg_page(sgpnt->page), KM_IRQ0);
+ kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0);
len = sgpnt->length;
if ((req_len + len) > buflen) {
len = buflen - req_len;

2007-10-23 04:24:52

by Olof Johansson

[permalink] [raw]
Subject: IB/ehca: Fix sg_page() fallout

More fallout from sg_page changes, found with powerpc allyesconfig:

drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user1':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1779: error: 'struct scatterlist' has no member named 'page'
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_check_kpages_per_ate':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1835: error: 'struct scatterlist' has no member named 'page'
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user2':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1870: error: 'struct scatterlist' has no member named 'page'


Signed-off-by: Olof Johansson <[email protected]>

diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index da88738..a3037f3 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -1776,7 +1776,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
list_for_each_entry_continue(
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(chunk->page_list[i].page)
+ pgaddr = page_to_pfn(sg_page(chunk->page_list[i]))
<< PAGE_SHIFT ;
*kpage = phys_to_abs(pgaddr +
(pginfo->next_hwpage *
@@ -1832,7 +1832,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
{
int t;
for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT;
+ u64 pgaddr = page_to_pfn(sg_page(page_list[t])) << PAGE_SHIFT;
ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
*(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
@@ -1867,7 +1867,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
if (nr_kpages == kpages_per_hwpage) {
- pgaddr = ( page_to_pfn(chunk->page_list[i].page)
+ pgaddr = ( page_to_pfn(sg_page(chunk->page_list[i]))
<< PAGE_SHIFT );
*kpage = phys_to_abs(pgaddr);
if ( !(*kpage) ) {

2007-10-23 05:05:25

by Jens Axboe

[permalink] [raw]
Subject: Re: IB/ehca: Fix sg_page() fallout

On Mon, Oct 22 2007, Olof Johansson wrote:
> More fallout from sg_page changes, found with powerpc allyesconfig:
>
> drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user1':
> drivers/infiniband/hw/ehca/ehca_mrmw.c:1779: error: 'struct scatterlist' has no member named 'page'
> drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_check_kpages_per_ate':
> drivers/infiniband/hw/ehca/ehca_mrmw.c:1835: error: 'struct scatterlist' has no member named 'page'
> drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user2':
> drivers/infiniband/hw/ehca/ehca_mrmw.c:1870: error: 'struct scatterlist' has no member named 'page'

Thanks a lot Olof, applied both fixups!

--
Jens Axboe

2007-10-23 05:13:27

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PATCH 02/10] [SG] Update block layer to use sg helpers

On Mon, Oct 22, 2007 at 08:10:56PM +0200, Jens Axboe wrote:
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> block/ll_rw_blk.c | 8 ++++++--
> 1 files changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
> index 8025d64..61c2e39 100644
> --- a/block/ll_rw_blk.c
> +++ b/block/ll_rw_blk.c
> @@ -1354,8 +1354,9 @@ new_segment:
> else
> sg = sg_next(sg);
>
> - memset(sg, 0, sizeof(*sg));
> - sg->page = bvec->bv_page;
> + sg_dma_len(sg) = 0;
> + sg_dma_address(sg) = 0;
> + sg_set_page(sg, bvec->bv_page);
> sg->length = nbytes;
> sg->offset = bvec->bv_offset;
> nsegs++;
> @@ -1363,6 +1364,9 @@ new_segment:
> bvprv = bvec;
> } /* segments in rq */
>
> + if (sg)
> + __sg_mark_end(sg);
> +
> return nsegs;
> }

Hmm.... this breaks s390:

CC block/ll_rw_blk.o
block/ll_rw_blk.c: In function 'blk_rq_map_sg':
block/ll_rw_blk.c:1357: error: implicit declaration of function 'sg_dma_len'
block/ll_rw_blk.c:1357: error: lvalue required as left operand of assignment
block/ll_rw_blk.c:1358: error: implicit declaration of function 'sg_dma_address'
block/ll_rw_blk.c:1358: error: lvalue required as left operand of assignment
make[1]: *** [block/ll_rw_blk.o] Error 1

Missing macros and no appropriate members in struct scatterlist since we
don't have DMA. How to fix?

2007-10-23 05:16:51

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 02/10] [SG] Update block layer to use sg helpers

On Tue, Oct 23 2007, Heiko Carstens wrote:
> On Mon, Oct 22, 2007 at 08:10:56PM +0200, Jens Axboe wrote:
> > Signed-off-by: Jens Axboe <[email protected]>
> > ---
> > block/ll_rw_blk.c | 8 ++++++--
> > 1 files changed, 6 insertions(+), 2 deletions(-)
> >
> > diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
> > index 8025d64..61c2e39 100644
> > --- a/block/ll_rw_blk.c
> > +++ b/block/ll_rw_blk.c
> > @@ -1354,8 +1354,9 @@ new_segment:
> > else
> > sg = sg_next(sg);
> >
> > - memset(sg, 0, sizeof(*sg));
> > - sg->page = bvec->bv_page;
> > + sg_dma_len(sg) = 0;
> > + sg_dma_address(sg) = 0;
> > + sg_set_page(sg, bvec->bv_page);
> > sg->length = nbytes;
> > sg->offset = bvec->bv_offset;
> > nsegs++;
> > @@ -1363,6 +1364,9 @@ new_segment:
> > bvprv = bvec;
> > } /* segments in rq */
> >
> > + if (sg)
> > + __sg_mark_end(sg);
> > +
> > return nsegs;
> > }
>
> Hmm.... this breaks s390:
>
> CC block/ll_rw_blk.o
> block/ll_rw_blk.c: In function 'blk_rq_map_sg':
> block/ll_rw_blk.c:1357: error: implicit declaration of function 'sg_dma_len'
> block/ll_rw_blk.c:1357: error: lvalue required as left operand of assignment
> block/ll_rw_blk.c:1358: error: implicit declaration of function 'sg_dma_address'
> block/ll_rw_blk.c:1358: error: lvalue required as left operand of assignment
> make[1]: *** [block/ll_rw_blk.o] Error 1
>
> Missing macros and no appropriate members in struct scatterlist since we
> don't have DMA. How to fix?

Just remove the two lines here, there are actually not required.

--
Jens Axboe

2007-10-23 05:42:33

by Heiko Carstens

[permalink] [raw]
Subject: [PATCH] fix ll_rw_blk.c build on s390

From: Heiko Carstens <[email protected]>

CC block/ll_rw_blk.o
block/ll_rw_blk.c: In function 'blk_rq_map_sg':
block/ll_rw_blk.c:1357: error: implicit declaration of function 'sg_dma_len'
block/ll_rw_blk.c:1357: error: lvalue required as left operand of assignment
block/ll_rw_blk.c:1358: error: implicit declaration of function 'sg_dma_address'
block/ll_rw_blk.c:1358: error: lvalue required as left operand of assignment
make[1]: *** [block/ll_rw_blk.o] Error 1
make: *** [block] Error 2

Cc: Jens Axboe <[email protected]>
Signed-off-by: Heiko Carstens <[email protected]>
---
block/ll_rw_blk.c | 2 --
1 file changed, 2 deletions(-)

Index: linux-2.6/block/ll_rw_blk.c
===================================================================
--- linux-2.6.orig/block/ll_rw_blk.c
+++ linux-2.6/block/ll_rw_blk.c
@@ -1354,8 +1354,6 @@ new_segment:
else
sg = sg_next(sg);

- sg_dma_len(sg) = 0;
- sg_dma_address(sg) = 0;
sg_set_page(sg, bvec->bv_page);
sg->length = nbytes;
sg->offset = bvec->bv_offset;

2007-10-23 05:44:52

by Heiko Carstens

[permalink] [raw]
Subject: [PATCH] net: fix xfrm build - missing scatterlist.h include

From: Heiko Carstens <[email protected]>

net/xfrm/xfrm_algo.c: In function 'skb_icv_walk':
net/xfrm/xfrm_algo.c:555: error: implicit declaration of function 'sg_set_page'
make[2]: *** [net/xfrm/xfrm_algo.o] Error 1

Cc: David Miller <[email protected]>
Cc: Jens Axboe <[email protected]>
Signed-off-by: Heiko Carstens <[email protected]>
---
net/xfrm/xfrm_algo.c | 1 +
1 file changed, 1 insertion(+)

Index: linux-2.6/net/xfrm/xfrm_algo.c
===================================================================
--- linux-2.6.orig/net/xfrm/xfrm_algo.c
+++ linux-2.6/net/xfrm/xfrm_algo.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
#include <linux/crypto.h>
+#include <linux/scatterlist.h>
#include <net/xfrm.h>
#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
#include <net/ah.h>

2007-10-23 05:47:44

by Olof Johansson

[permalink] [raw]
Subject: Re: IB/ehca: Fix sg_page() fallout


More fallout from sg_page changes:

drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user1':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1779: error: 'struct scatterlist' has no member named 'page'
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_check_kpages_per_ate':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1835: error: 'struct scatterlist' has no member named 'page'
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user2':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1870: error: 'struct scatterlist' has no member named 'page'


Signed-off-by: Olof Johansson <[email protected]>


---

On Tue, Oct 23, 2007 at 07:05:12AM +0200, Jens Axboe wrote:
> On Mon, Oct 22 2007, Olof Johansson wrote:
> > More fallout from sg_page changes, found with powerpc allyesconfig:
> >
> > drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user1':
> > drivers/infiniband/hw/ehca/ehca_mrmw.c:1779: error: 'struct scatterlist' has no member named 'page'
> > drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_check_kpages_per_ate':
> > drivers/infiniband/hw/ehca/ehca_mrmw.c:1835: error: 'struct scatterlist' has no member named 'page'
> > drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user2':
> > drivers/infiniband/hw/ehca/ehca_mrmw.c:1870: error: 'struct scatterlist' has no member named 'page'
>
> Thanks a lot Olof, applied both fixups!

I messed up the second fix. :( please replace with this.


-Olof


diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index da88738..ead7230 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -1776,7 +1776,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
list_for_each_entry_continue(
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(chunk->page_list[i].page)
+ pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT ;
*kpage = phys_to_abs(pgaddr +
(pginfo->next_hwpage *
@@ -1832,7 +1832,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
{
int t;
for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT;
+ u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
*(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
@@ -1867,7 +1867,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
if (nr_kpages == kpages_per_hwpage) {
- pgaddr = ( page_to_pfn(chunk->page_list[i].page)
+ pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT );
*kpage = phys_to_abs(pgaddr);
if ( !(*kpage) ) {

2007-10-23 06:29:40

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PATCH 04/10] [SG] Update drivers to use sg helpers

On Mon, Oct 22, 2007 at 08:10:58PM +0200, Jens Axboe wrote:
> Signed-off-by: Jens Axboe <[email protected]>
> ---

You forgot s390's zfcp driver. But unfortunately the trivial fix below
doesn't work. No more I/O possible. Swen and/or Christof could you
provide a correct fix for this please? Thanks!

---
drivers/s390/scsi/zfcp_def.h | 4 ++--
drivers/s390/scsi/zfcp_erp.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)

Index: linux-2.6/drivers/s390/scsi/zfcp_def.h
===================================================================
--- linux-2.6.orig/drivers/s390/scsi/zfcp_def.h
+++ linux-2.6/drivers/s390/scsi/zfcp_def.h
@@ -63,7 +63,7 @@
static inline void *
zfcp_sg_to_address(struct scatterlist *list)
{
- return (void *) (page_address(list->page) + list->offset);
+ return (void *) (page_address(sg_page(list) + list->offset));
}

/**
@@ -74,7 +74,7 @@ zfcp_sg_to_address(struct scatterlist *l
static inline void
zfcp_address_to_sg(void *address, struct scatterlist *list)
{
- list->page = virt_to_page(address);
+ sg_set_page(list, virt_to_page(address));
list->offset = ((unsigned long) address) & (PAGE_SIZE - 1);
}

Index: linux-2.6/drivers/s390/scsi/zfcp_erp.c
===================================================================
--- linux-2.6.orig/drivers/s390/scsi/zfcp_erp.c
+++ linux-2.6/drivers/s390/scsi/zfcp_erp.c
@@ -363,7 +363,7 @@ zfcp_erp_adisc(struct zfcp_port *port)
retval = -ENOMEM;
freemem:
if (address != NULL)
- __free_pages(send_els->req->page, 0);
+ __free_pages(sg_page(send_els->req), 0);
if (send_els != NULL) {
kfree(send_els->req);
kfree(send_els->resp);
@@ -437,7 +437,7 @@ zfcp_erp_adisc_handler(unsigned long dat

out:
zfcp_port_put(port);
- __free_pages(send_els->req->page, 0);
+ __free_pages(sg_page(send_els->req), 0);
kfree(send_els->req);
kfree(send_els->resp);
kfree(send_els);

2007-10-23 07:14:08

by Jens Axboe

[permalink] [raw]
Subject: Re: IB/ehca: Fix sg_page() fallout

On Tue, Oct 23 2007, Olof Johansson wrote:
>
> More fallout from sg_page changes:
>
> drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user1':
> drivers/infiniband/hw/ehca/ehca_mrmw.c:1779: error: 'struct scatterlist' has no member named 'page'
> drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_check_kpages_per_ate':
> drivers/infiniband/hw/ehca/ehca_mrmw.c:1835: error: 'struct scatterlist' has no member named 'page'
> drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user2':
> drivers/infiniband/hw/ehca/ehca_mrmw.c:1870: error: 'struct scatterlist' has no member named 'page'
>
>
> Signed-off-by: Olof Johansson <[email protected]>
>
>
> ---
>
> On Tue, Oct 23, 2007 at 07:05:12AM +0200, Jens Axboe wrote:
> > On Mon, Oct 22 2007, Olof Johansson wrote:
> > > More fallout from sg_page changes, found with powerpc allyesconfig:
> > >
> > > drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user1':
> > > drivers/infiniband/hw/ehca/ehca_mrmw.c:1779: error: 'struct scatterlist' has no member named 'page'
> > > drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_check_kpages_per_ate':
> > > drivers/infiniband/hw/ehca/ehca_mrmw.c:1835: error: 'struct scatterlist' has no member named 'page'
> > > drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user2':
> > > drivers/infiniband/hw/ehca/ehca_mrmw.c:1870: error: 'struct scatterlist' has no member named 'page'
> >
> > Thanks a lot Olof, applied both fixups!
>
> I messed up the second fix. :( please replace with this.

No problem, applied.

--
Jens Axboe

2007-10-23 07:14:59

by Jens Axboe

[permalink] [raw]
Subject: Re: powerpc: Fix fallout from sg_page() changes

On Mon, Oct 22 2007, Olof Johansson wrote:
> Fix fallout from 18dabf473e15850c0dbc8ff13ac1e2806d542c15:
>
> In file included from include/linux/dma-mapping.h:52,
> from drivers/base/dma-mapping.c:10:
> include/asm/dma-mapping.h: In function 'dma_map_sg':
> include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
> include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
> include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
> include/asm/dma-mapping.h:289: error: 'struct scatterlist' has no member named 'page'
> include/asm/dma-mapping.h:290: error: 'struct scatterlist' has no member named 'page'
> include/asm/dma-mapping.h: In function 'dma_sync_sg_for_cpu':
> include/asm/dma-mapping.h:331: error: 'struct scatterlist' has no member named 'page'
>
> drivers/scsi/ps3rom.c: In function 'fetch_to_dev_buffer':
> drivers/scsi/ps3rom.c:150: error: 'struct scatterlist' has no member named 'page'

Applied.

--
Jens Axboe

2007-10-23 07:15:42

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 04/10] [SG] Update drivers to use sg helpers

On Tue, Oct 23 2007, Heiko Carstens wrote:
> On Mon, Oct 22, 2007 at 08:10:58PM +0200, Jens Axboe wrote:
> > Signed-off-by: Jens Axboe <[email protected]>
> > ---
>
> You forgot s390's zfcp driver. But unfortunately the trivial fix below
> doesn't work. No more I/O possible. Swen and/or Christof could you
> provide a correct fix for this please? Thanks!
>
> ---
> drivers/s390/scsi/zfcp_def.h | 4 ++--
> drivers/s390/scsi/zfcp_erp.c | 4 ++--
> 2 files changed, 4 insertions(+), 4 deletions(-)
>
> Index: linux-2.6/drivers/s390/scsi/zfcp_def.h
> ===================================================================
> --- linux-2.6.orig/drivers/s390/scsi/zfcp_def.h
> +++ linux-2.6/drivers/s390/scsi/zfcp_def.h
> @@ -63,7 +63,7 @@
> static inline void *
> zfcp_sg_to_address(struct scatterlist *list)
> {
> - return (void *) (page_address(list->page) + list->offset);
> + return (void *) (page_address(sg_page(list) + list->offset));
> }

return sg_virt(list); would be better.

I'll fix up the driver, no worries.

--
Jens Axboe

2007-10-23 07:17:21

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PATCH 04/10] [SG] Update drivers to use sg helpers

On Tue, Oct 23, 2007 at 09:14:07AM +0200, Jens Axboe wrote:
> On Tue, Oct 23 2007, Heiko Carstens wrote:
> > On Mon, Oct 22, 2007 at 08:10:58PM +0200, Jens Axboe wrote:
> > > Signed-off-by: Jens Axboe <[email protected]>
> > > ---
> >
> > You forgot s390's zfcp driver. But unfortunately the trivial fix below
> > doesn't work. No more I/O possible. Swen and/or Christof could you
> > provide a correct fix for this please? Thanks!
> >
> > ---
> > drivers/s390/scsi/zfcp_def.h | 4 ++--
> > drivers/s390/scsi/zfcp_erp.c | 4 ++--
> > 2 files changed, 4 insertions(+), 4 deletions(-)
> >
> > Index: linux-2.6/drivers/s390/scsi/zfcp_def.h
> > ===================================================================
> > --- linux-2.6.orig/drivers/s390/scsi/zfcp_def.h
> > +++ linux-2.6/drivers/s390/scsi/zfcp_def.h
> > @@ -63,7 +63,7 @@
> > static inline void *
> > zfcp_sg_to_address(struct scatterlist *list)
> > {
> > - return (void *) (page_address(list->page) + list->offset);
> > + return (void *) (page_address(sg_page(list) + list->offset));
> > }
>
> return sg_virt(list); would be better.
>
> I'll fix up the driver, no worries.

ok, thanks!

2007-10-23 07:19:03

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, 22 Oct 2007, Linus Torvalds wrote:
> On Mon, 22 Oct 2007, Alan Cox wrote:
> > For structures, not array elements or stack objects. Does gcc now get
> > aligned correct as an attribute on a stack object ?
>
> I think m68k stack layout still guarantees 4-byte-alignment, no?

The stack pointer must be even (i.e. 2 byte-alignment).
But it looks like current gcc always allocates multiples of 4 bytes on
the stack, probably for performance reasons.

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2007-10-23 07:27:59

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 08/10] [SG] Update arch/ to use sg helpers

On Mon, Oct 22 2007, Benny Halevy wrote:
> It looks like it could be nice to define and use a helper for
> page_address(sg_page(sg)) (although 11 call sites could use it
> after this patch)
>
> #define sg_pgaddr(sg) page_address(sg_page(sg))
>
> Note that mips sg_{un,}map_sg checked for page_address(sg->page) != 0
> before calling __dma_sync(addr + sg->offset, sg->length, direction)
> and you changed it to addr = (unsigned long) sg_virt(sg) which
> takes sg->offset into account. That said I'm not sure if the original
> code was correct for the (page_address(sg->page) == 0 && sg->offset != 0)
> case...

I initially thought that may have been a bug, but most of the other arch
iommu code handle it in the same way. I don't think it's a bug, since
they want to clear full page ranges anyway. I should not have changed
this code to take the offset into account - I don't think it's an issue,
but it should have been left as-is. Will do that.

> > diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
> > index 5098f58..1a20fe3 100644
> > --- a/arch/x86/kernel/pci-calgary_64.c
> > +++ b/arch/x86/kernel/pci-calgary_64.c
> > @@ -411,8 +411,10 @@ static int calgary_nontranslate_map_sg(struct device* dev,
> > int i;
> >
> > for_each_sg(sg, s, nelems, i) {
> > - BUG_ON(!s->page);
> > - s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
> > + struct page *p = sg_page(s);
> > +
> > + BUG_ON(!p);
>
> why not just BUG_ON(!sg_page(s))?
>
> > + s->dma_address = virt_to_bus(sg_virt(s));
> > s->dma_length = s->length;
> > }
> > return nelems;

I think because of a two stage conversion, the page variable addition
predates the sg_virt() helper.

--
Jens Axboe

2007-10-23 07:30:27

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH] net: fix xfrm build - missing scatterlist.h include

On Tue, Oct 23 2007, Heiko Carstens wrote:
> From: Heiko Carstens <[email protected]>
>
> net/xfrm/xfrm_algo.c: In function 'skb_icv_walk':
> net/xfrm/xfrm_algo.c:555: error: implicit declaration of function 'sg_set_page'
> make[2]: *** [net/xfrm/xfrm_algo.o] Error 1

Thanks, arch fallout... Applied.

--
Jens Axboe

2007-10-23 09:30:35

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, Oct 22 2007 at 23:47 +0200, Linus Torvalds <[email protected]> wrote:
>
> On Mon, 22 Oct 2007, Alan Cox wrote:
>
>> For structures, not array elements or stack objects. Does gcc now get
>> aligned correct as an attribute on a stack object ?
>
> I think m68k stack layout still guarantees 4-byte-alignment, no?
>
>> Still doesn't answer the rather more important question - why not just
>> stick a NULL on the end instead of all the nutty hacks ?
>
> You still do need one bit for the discontiguous case, so it's not like you
> can avoid the hacks anyway (unless you just blow up the structure
> entirely) and make it a separate member). So once you have that
> bit+pointer, using a separate NULL entry isn't exactly prettier.
>
> Especially as we actally want to see the difference between
> "end-of-allocation" and "not yet filled in", so you shouldn't use NULL
> anyway, you should probably use something like "all-ones".
>
> Linus
> -

Every one is so hysterical about this sg-chaining problem. And massive
patches produced, that when a simple none intrusive solution is proposed
it is totally ignored because every one thinks, "I can not be that stupid".
Well Einstein said: "Simplicity is the ultimate sophistication". So no one
need to feel bad.

I'm talking about Benney's Proposition of a while back.
(I'm including it below, cause I can't bother with the
stupid Archives broken search)

What Benny was proposing is that the scatterlist pointer
might not have the complete information about sizes and
allocations, but the surrounding code always has, So why
not just pass this information to the decision maker -
sg_next() - so it can make the right choice, before a suicide.
So sg_next() becomes:

static inline struct scatterlist *sg_next(struct scatterlist *sg,
int next, int nr)
{
return next < nr ? sg_next_unsafe(sg) : NULL;
}

Where sg_next_unsafe(sg) is what the original sg_next() used to be.

and a user code like for_each_sg() becomes:

/*
* Loop over each sg element, following the pointer to a new list if necessary
*/
#define for_each_sg(sglist, sg, nr, __i) \
for (__i = 0, sg = (sglist); sg; sg = sg_next(sg, ++__i, nr))


In his patch he shows examples of other uses of sg_next they all fit.

The sg_next usage is new and in few places. Not like the sg->page
all over the kernel.

I know he has a patch for the complete kernel, (I know I helped a bit)
and it is a fraction of the size of all the patches that where submitted
after that. And it does not have all the problems that we still have now
with slob allocators, stack, and such.

OK Just my $0.2
Boaz Harrosh

-----
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 2dc7464..3a27e03 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -30,7 +30,7 @@ static inline void sg_init_one(struct scatterlist *sg, const void *buf,
((struct scatterlist *) ((unsigned long) (sg)->page & ~0x01))

/**
- * sg_next - return the next scatterlist entry in a list
+ * sg_next_unsafe - return the next scatterlist entry in a list
* @sg: The current sg entry
*
* Usually the next entry will be @sg@ + 1, but if this sg element is part
@@ -41,7 +41,7 @@ static inline void sg_init_one(struct scatterlist *sg, const void *buf,
* the current entry, this function will NOT return NULL for an end-of-list.
*
*/
-static inline struct scatterlist *sg_next(struct scatterlist *sg)
+static inline struct scatterlist *sg_next_unsafe(struct scatterlist *sg)
{
sg++;

@@ -51,11 +51,27 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg)
return sg;
}

+/**
+ * sg_next - return the next scatterlist entry in a list
+ * @sg: The current sg entry
+ * @next: Index of next sg entry
+ * @nr: Number of sg entries in the list
+ *
+ * Note that the caller must ensure that there are further entries after
+ * the current entry, this function will NOT return NULL for an end-of-list.
+ *
+ */
+static inline struct scatterlist *sg_next(struct scatterlist *sg,
+ int next, int nr)
+{
+ return next < nr ? sg_next_unsafe(sg) : NULL;
+}
+
/*
* Loop over each sg element, following the pointer to a new list if necessary
*/
#define for_each_sg(sglist, sg, nr, __i) \
- for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+ for (__i = 0, sg = (sglist); sg; sg = sg_next(sg, ++__i, nr))

/**
* sg_last - return the last scatterlist entry in a list
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 7238b2d..57cc1dd 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1165,7 +1165,7 @@ sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type)
sg = rsv_schp->buffer;
sa = vma->vm_start;
for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
- ++k, sg = sg_next(sg)) {
+ sg = sg_next(sg, ++k, rsv_schp->k_use_sg)) {
len = vma->vm_end - sa;
len = (len < sg->length) ? len : sg->length;
if (offset < len) {
@@ -1209,7 +1209,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
sa = vma->vm_start;
sg = rsv_schp->buffer;
for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
- ++k, sg = sg_next(sg)) {
+ sg = sg_next(sg, ++k, rsv_schp->k_use_sg)) {
len = vma->vm_end - sa;
len = (len < sg->length) ? len : sg->length;
sa += len;
@@ -1840,7 +1840,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
}
for (k = 0, sg = schp->buffer, rem_sz = blk_size;
(rem_sz > 0) && (k < mx_sc_elems);
- ++k, rem_sz -= ret_sz, sg = sg_next(sg)) {
+ rem_sz -= ret_sz, sg = sg_next(sg, ++k, mx_sc_elems)) {

num = (rem_sz > scatter_elem_sz_prev) ?
scatter_elem_sz_prev : rem_sz;
@@ -1913,7 +1913,7 @@ sg_write_xfer(Sg_request * srp)
if (res)
return res;

- for (; p; sg = sg_next(sg), ksglen = sg->length,
+ for (; p; sg = sg_next_unsafe(sg), ksglen = sg->length,
p = page_address(sg->page)) {
if (usglen <= 0)
break;
@@ -1991,8 +1991,8 @@ sg_remove_scat(Sg_scatter_hold * schp)
} else {
int k;

- for (k = 0; (k < schp->k_use_sg) && sg->page;
- ++k, sg = sg_next(sg)) {
+ for (k = 0; sg && sg->page;
+ sg = sg_next(sg, ++k, schp->k_use_sg)) {
SCSI_LOG_TIMEOUT(5, printk(
"sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
k, sg->page, sg->length));
@@ -2045,7 +2045,7 @@ sg_read_xfer(Sg_request * srp)
if (res)
return res;

- for (; p; sg = sg_next(sg), ksglen = sg->length,
+ for (; p; sg = sg_next_unsafe(sg), ksglen = sg->length,
p = page_address(sg->page)) {
if (usglen <= 0)
break;
@@ -2092,7 +2092,7 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
if ((!outp) || (num_read_xfer <= 0))
return 0;

- for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, sg = sg_next(sg)) {
+ for (k = 0; sg && sg->page; sg = sg_next(sg, ++k, schp->k_use_sg)) {
num = sg->length;
if (num > num_read_xfer) {
if (__copy_to_user(outp, page_address(sg->page),
@@ -2142,7 +2142,7 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size)
SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size));
rem = size;

- for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) {
+ for (k = 0; sg; sg = sg_next(sg, ++k, rsv_schp->k_use_sg)) {
num = sg->length;
if (rem <= num) {
sfp->save_scat_len = num;

2007-10-23 09:43:19

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Boaz Harrosh wrote:
> On Mon, Oct 22 2007 at 23:47 +0200, Linus Torvalds <[email protected]> wrote:
> >
> > On Mon, 22 Oct 2007, Alan Cox wrote:
> >
> >> For structures, not array elements or stack objects. Does gcc now get
> >> aligned correct as an attribute on a stack object ?
> >
> > I think m68k stack layout still guarantees 4-byte-alignment, no?
> >
> >> Still doesn't answer the rather more important question - why not just
> >> stick a NULL on the end instead of all the nutty hacks ?
> >
> > You still do need one bit for the discontiguous case, so it's not like you
> > can avoid the hacks anyway (unless you just blow up the structure
> > entirely) and make it a separate member). So once you have that
> > bit+pointer, using a separate NULL entry isn't exactly prettier.
> >
> > Especially as we actally want to see the difference between
> > "end-of-allocation" and "not yet filled in", so you shouldn't use NULL
> > anyway, you should probably use something like "all-ones".
> >
> > Linus
> > -
>
> Every one is so hysterical about this sg-chaining problem. And massive
> patches produced, that when a simple none intrusive solution is proposed
> it is totally ignored because every one thinks, "I can not be that stupid".
> Well Einstein said: "Simplicity is the ultimate sophistication". So no one
> need to feel bad.

It's all about the end goal - having maintainable and resilient code.
And I think the sg code will be better once we get past the next day or
so, and it'll be more robust. That is what matters to me, not the
simplicity of the patch itself.

--
Jens Axboe

2007-10-23 09:51:46

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007 at 11:41 +0200, Jens Axboe <[email protected]> wrote:
> On Tue, Oct 23 2007, Boaz Harrosh wrote:
>> On Mon, Oct 22 2007 at 23:47 +0200, Linus Torvalds <[email protected]> wrote:
>>> On Mon, 22 Oct 2007, Alan Cox wrote:
>>>
>>>> For structures, not array elements or stack objects. Does gcc now get
>>>> aligned correct as an attribute on a stack object ?
>>> I think m68k stack layout still guarantees 4-byte-alignment, no?
>>>
>>>> Still doesn't answer the rather more important question - why not just
>>>> stick a NULL on the end instead of all the nutty hacks ?
>>> You still do need one bit for the discontiguous case, so it's not like you
>>> can avoid the hacks anyway (unless you just blow up the structure
>>> entirely) and make it a separate member). So once you have that
>>> bit+pointer, using a separate NULL entry isn't exactly prettier.
>>>
>>> Especially as we actally want to see the difference between
>>> "end-of-allocation" and "not yet filled in", so you shouldn't use NULL
>>> anyway, you should probably use something like "all-ones".
>>>
>>> Linus
>>> -
>> Every one is so hysterical about this sg-chaining problem. And massive
>> patches produced, that when a simple none intrusive solution is proposed
>> it is totally ignored because every one thinks, "I can not be that stupid".
>> Well Einstein said: "Simplicity is the ultimate sophistication". So no one
>> need to feel bad.
>
> It's all about the end goal - having maintainable and resilient code.
> And I think the sg code will be better once we get past the next day or
> so, and it'll be more robust. That is what matters to me, not the
> simplicity of the patch itself.
>

But that is exactly what his patch is. Much more robust. Because you do not
relay on sglist content but on outside information, that you already have.
Have you had an hard look at his solution? It just simply falls into place.
Please try it out for yourself. I did, and it works.

Boaz


2007-10-23 09:56:45

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Boaz Harrosh wrote:
> On Tue, Oct 23 2007 at 11:41 +0200, Jens Axboe <[email protected]> wrote:
> > On Tue, Oct 23 2007, Boaz Harrosh wrote:
> >> On Mon, Oct 22 2007 at 23:47 +0200, Linus Torvalds <[email protected]> wrote:
> >>> On Mon, 22 Oct 2007, Alan Cox wrote:
> >>>
> >>>> For structures, not array elements or stack objects. Does gcc now get
> >>>> aligned correct as an attribute on a stack object ?
> >>> I think m68k stack layout still guarantees 4-byte-alignment, no?
> >>>
> >>>> Still doesn't answer the rather more important question - why not just
> >>>> stick a NULL on the end instead of all the nutty hacks ?
> >>> You still do need one bit for the discontiguous case, so it's not like you
> >>> can avoid the hacks anyway (unless you just blow up the structure
> >>> entirely) and make it a separate member). So once you have that
> >>> bit+pointer, using a separate NULL entry isn't exactly prettier.
> >>>
> >>> Especially as we actally want to see the difference between
> >>> "end-of-allocation" and "not yet filled in", so you shouldn't use NULL
> >>> anyway, you should probably use something like "all-ones".
> >>>
> >>> Linus
> >>> -
> >> Every one is so hysterical about this sg-chaining problem. And massive
> >> patches produced, that when a simple none intrusive solution is proposed
> >> it is totally ignored because every one thinks, "I can not be that stupid".
> >> Well Einstein said: "Simplicity is the ultimate sophistication". So no one
> >> need to feel bad.
> >
> > It's all about the end goal - having maintainable and resilient code.
> > And I think the sg code will be better once we get past the next day or
> > so, and it'll be more robust. That is what matters to me, not the
> > simplicity of the patch itself.
> >
>
> But that is exactly what his patch is. Much more robust. Because you do not
> relay on sglist content but on outside information, that you already have.
> Have you had an hard look at his solution? It just simply falls into place.
> Please try it out for yourself. I did, and it works.

Sure, I looked at it, it's not exactly rocket science, I do understand
what it achieves. I don't think the patch is bad as such, I'm merely
trying to state that I think the end code AND interface will be much
nicer with the current direction that the sg helpers are moving.

It does rely on outside context, because you need to pass in the sglist
number. In my opinion, this patch would be a bandaid for the original
chain code until we got around to fixing the PAGEALLOC crash. Which we
did, it's now merged. The patch doesn't make the code cleaner, it makes
it uglier. It'll work, but that still doesn't mean I have to agree it's
a nice design.

--
Jens Axboe

2007-10-23 10:24:33

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007 at 11:55 +0200, Jens Axboe <[email protected]> wrote:
> On Tue, Oct 23 2007, Boaz Harrosh wrote:
>> On Tue, Oct 23 2007 at 11:41 +0200, Jens Axboe <[email protected]> wrote:
>>> On Tue, Oct 23 2007, Boaz Harrosh wrote:
>>>> On Mon, Oct 22 2007 at 23:47 +0200, Linus Torvalds <[email protected]> wrote:
>>>>> On Mon, 22 Oct 2007, Alan Cox wrote:
>>>>>
>>>>>> For structures, not array elements or stack objects. Does gcc now get
>>>>>> aligned correct as an attribute on a stack object ?
>>>>> I think m68k stack layout still guarantees 4-byte-alignment, no?
>>>>>
>>>>>> Still doesn't answer the rather more important question - why not just
>>>>>> stick a NULL on the end instead of all the nutty hacks ?
>>>>> You still do need one bit for the discontiguous case, so it's not like you
>>>>> can avoid the hacks anyway (unless you just blow up the structure
>>>>> entirely) and make it a separate member). So once you have that
>>>>> bit+pointer, using a separate NULL entry isn't exactly prettier.
>>>>>
>>>>> Especially as we actally want to see the difference between
>>>>> "end-of-allocation" and "not yet filled in", so you shouldn't use NULL
>>>>> anyway, you should probably use something like "all-ones".
>>>>>
>>>>> Linus
>>>>> -
>>>> Every one is so hysterical about this sg-chaining problem. And massive
>>>> patches produced, that when a simple none intrusive solution is proposed
>>>> it is totally ignored because every one thinks, "I can not be that stupid".
>>>> Well Einstein said: "Simplicity is the ultimate sophistication". So no one
>>>> need to feel bad.
>>> It's all about the end goal - having maintainable and resilient code.
>>> And I think the sg code will be better once we get past the next day or
>>> so, and it'll be more robust. That is what matters to me, not the
>>> simplicity of the patch itself.
>>>
>> But that is exactly what his patch is. Much more robust. Because you do not
>> relay on sglist content but on outside information, that you already have.
>> Have you had an hard look at his solution? It just simply falls into place.
>> Please try it out for yourself. I did, and it works.
>
> Sure, I looked at it, it's not exactly rocket science, I do understand
> what it achieves. I don't think the patch is bad as such, I'm merely
> trying to state that I think the end code AND interface will be much
> nicer with the current direction that the sg helpers are moving.
>
> It does rely on outside context, because you need to pass in the sglist
> number. In my opinion, this patch would be a bandaid for the original
> chain code until we got around to fixing the PAGEALLOC crash. Which we
> did, it's now merged. The patch doesn't make the code cleaner, it makes
> it uglier. It'll work, but that still doesn't mean I have to agree it's
> a nice design.
>
A nice design is to have an struct like BIO. That holds a pointer to the
array of scatterlists, size, ..., and a next and prev pointers to the next
chunks. Than have all kernel code that now accepts scatterlist* and size
accept a pointer to such structure. And all is clear and defined.

But since we do not do that, and every single API in the kernel that
receives a scatterlist pointer also receives an sg_count parameter,
than I do not see what is so hacky about giving that sg_count parameter
to the one that needs it the most. sg_next();

OK I guess this is all a matter of taste so there is no point arguing
about it any more. I can see your view, and the work has been done so
I guess there is no point going back. If it all works than it's for the
best.

Thanks Jens for doing all this, The performance gain is substantial
and we will all enjoy it.

Boaz


2007-10-23 10:30:38

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Boaz Harrosh wrote:
> On Tue, Oct 23 2007 at 11:55 +0200, Jens Axboe <[email protected]> wrote:
> > On Tue, Oct 23 2007, Boaz Harrosh wrote:
> >> On Tue, Oct 23 2007 at 11:41 +0200, Jens Axboe <[email protected]> wrote:
> >>> On Tue, Oct 23 2007, Boaz Harrosh wrote:
> >>>> On Mon, Oct 22 2007 at 23:47 +0200, Linus Torvalds <[email protected]> wrote:
> >>>>> On Mon, 22 Oct 2007, Alan Cox wrote:
> >>>>>
> >>>>>> For structures, not array elements or stack objects. Does gcc now get
> >>>>>> aligned correct as an attribute on a stack object ?
> >>>>> I think m68k stack layout still guarantees 4-byte-alignment, no?
> >>>>>
> >>>>>> Still doesn't answer the rather more important question - why not just
> >>>>>> stick a NULL on the end instead of all the nutty hacks ?
> >>>>> You still do need one bit for the discontiguous case, so it's not like you
> >>>>> can avoid the hacks anyway (unless you just blow up the structure
> >>>>> entirely) and make it a separate member). So once you have that
> >>>>> bit+pointer, using a separate NULL entry isn't exactly prettier.
> >>>>>
> >>>>> Especially as we actally want to see the difference between
> >>>>> "end-of-allocation" and "not yet filled in", so you shouldn't use NULL
> >>>>> anyway, you should probably use something like "all-ones".
> >>>>>
> >>>>> Linus
> >>>>> -
> >>>> Every one is so hysterical about this sg-chaining problem. And massive
> >>>> patches produced, that when a simple none intrusive solution is proposed
> >>>> it is totally ignored because every one thinks, "I can not be that stupid".
> >>>> Well Einstein said: "Simplicity is the ultimate sophistication". So no one
> >>>> need to feel bad.
> >>> It's all about the end goal - having maintainable and resilient code.
> >>> And I think the sg code will be better once we get past the next day or
> >>> so, and it'll be more robust. That is what matters to me, not the
> >>> simplicity of the patch itself.
> >>>
> >> But that is exactly what his patch is. Much more robust. Because you do not
> >> relay on sglist content but on outside information, that you already have.
> >> Have you had an hard look at his solution? It just simply falls into place.
> >> Please try it out for yourself. I did, and it works.
> >
> > Sure, I looked at it, it's not exactly rocket science, I do understand
> > what it achieves. I don't think the patch is bad as such, I'm merely
> > trying to state that I think the end code AND interface will be much
> > nicer with the current direction that the sg helpers are moving.
> >
> > It does rely on outside context, because you need to pass in the sglist
> > number. In my opinion, this patch would be a bandaid for the original
> > chain code until we got around to fixing the PAGEALLOC crash. Which we
> > did, it's now merged. The patch doesn't make the code cleaner, it makes
> > it uglier. It'll work, but that still doesn't mean I have to agree it's
> > a nice design.
> >
> A nice design is to have an struct like BIO. That holds a pointer to the
> array of scatterlists, size, ..., and a next and prev pointers to the next
> chunks. Than have all kernel code that now accepts scatterlist* and size
> accept a pointer to such structure. And all is clear and defined.
>
> But since we do not do that, and every single API in the kernel that
> receives a scatterlist pointer also receives an sg_count parameter,
> than I do not see what is so hacky about giving that sg_count parameter
> to the one that needs it the most. sg_next();

Not all paths need to know the exact number though, and with the changes
you could legitimately pass in just the header and iteration would work
fine.

> OK I guess this is all a matter of taste so there is no point arguing
> about it any more. I can see your view, and the work has been done so
> I guess there is no point going back. If it all works than it's for the
> best.

Yes agreed, debating taste is usually not very interesting as we wont
get far ;-)

> Thanks Jens for doing all this, The performance gain is substantial
> and we will all enjoy it.

My pleasure, I just wish it could have been a little less painful. But
in a day or two, it should all be behind us and we can move forward with
making good use of it.

--
Jens Axboe

2007-10-23 10:34:21

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout


* Jens Axboe <[email protected]> wrote:

> It's all about the end goal - having maintainable and resilient code.
> And I think the sg code will be better once we get past the next day
> or so, and it'll be more robust. That is what matters to me, not the
> simplicity of the patch itself.

Linus' latest tree, which has your SG-list enhancements included,
certainly works fine here and does not have the problems of the first
iteration.

Ingo

2007-10-23 10:44:50

by Christian Borntraeger

[permalink] [raw]
Subject: Re: [PATCH 06/10] [SG] Update net/ to use sg helpers

Fix sctp compile

sctp fails to compile with
net/sctp/sm_make_chunk.c: In function 'sctp_pack_cookie':
net/sctp/sm_make_chunk.c:1516: error: implicit declaration of function 'sg_init_table'
net/sctp/sm_make_chunk.c:1517: error: implicit declaration of function 'sg_set_page'

use the proper include file.

SCTP maintainers Vlad Yasevich and Sridhar Samudrala are CCed.

Signed-off-by: Christian Borntraeger <[email protected]>
---
net/sctp/sm_make_chunk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-2.6/net/sctp/sm_make_chunk.c
===================================================================
--- linux-2.6.orig/net/sctp/sm_make_chunk.c
+++ linux-2.6/net/sctp/sm_make_chunk.c
@@ -56,7 +56,7 @@
#include <linux/ipv6.h>
#include <linux/net.h>
#include <linux/inet.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include <linux/crypto.h>
#include <net/sock.h>

2007-10-23 10:47:31

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 06/10] [SG] Update net/ to use sg helpers

On Tue, Oct 23 2007, Christian Borntraeger wrote:
> Fix sctp compile
>
> sctp fails to compile with
> net/sctp/sm_make_chunk.c: In function 'sctp_pack_cookie':
> net/sctp/sm_make_chunk.c:1516: error: implicit declaration of function 'sg_init_table'
> net/sctp/sm_make_chunk.c:1517: error: implicit declaration of function 'sg_set_page'
>
> use the proper include file.
>
> SCTP maintainers Vlad Yasevich and Sridhar Samudrala are CCed.
>
> Signed-off-by: Christian Borntraeger <[email protected]>

Looks good, thanks!

--
Jens Axboe

2007-10-23 10:57:51

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Ingo Molnar wrote:
>
> * Jens Axboe <[email protected]> wrote:
>
> > It's all about the end goal - having maintainable and resilient code.
> > And I think the sg code will be better once we get past the next day
> > or so, and it'll be more robust. That is what matters to me, not the
> > simplicity of the patch itself.
>
> Linus' latest tree, which has your SG-list enhancements included,
> certainly works fine here and does not have the problems of the first
> iteration.

That's good to hear :-)

I have a series of pending patches where I've collected fallout patches
from people and some from myself here:

http://git.kernel.dk/?p=linux-2.6-block.git;a=shortlog;h=sg

or pullable from

git://git.kernel.dk/inux-2.6-block.git sg

As far as I can tell, all archs should now compile and work. I've tested
(compile tested) alpha/arm/ia64/m68k/mips/sh/sparc this morning and
fixed whatever showed up. It's all just a missing linux/scatterlist.h
include or ->page usage.

--
Jens Axboe

2007-10-23 11:28:45

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout


* Jens Axboe <[email protected]> wrote:

> > Linus' latest tree, which has your SG-list enhancements included,
> > certainly works fine here and does not have the problems of the
> > first iteration.
>
> That's good to hear :-)
>
> I have a series of pending patches where I've collected fallout
> patches from people and some from myself here:
>
> http://git.kernel.dk/?p=linux-2.6-block.git;a=shortlog;h=sg
>
> or pullable from
>
> git://git.kernel.dk/inux-2.6-block.git sg

i've attached your fixes as a diff against linus-latest below - for
those who'd like to have it in patch form.

Ingo

diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index ee07dce..2d00a08 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -7,6 +7,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/bootmem.h>
+#include <linux/scatterlist.h>
#include <linux/log2.h>

#include <asm/io.h>
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 9d371e4..52fc6a8 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -29,6 +29,7 @@
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
#include <linux/list.h>
+#include <linux/scatterlist.h>

#include <asm/cacheflush.h>

diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index b0b034c..b1b4052 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -13,6 +13,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/scatterlist.h>

#include <asm/cache.h>
#include <asm/io.h>
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 41f8e32..9448d4e 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/scatterlist.h>

#include <asm/cacheflush.h>
#include <asm/dma.h> /* for DMA_CHUNK_SIZE */
diff --git a/arch/sparc64/kernel/iommu_common.c b/arch/sparc64/kernel/iommu_common.c
index 78e8277..b70324e 100644
--- a/arch/sparc64/kernel/iommu_common.c
+++ b/arch/sparc64/kernel/iommu_common.c
@@ -233,6 +233,11 @@ unsigned long prepare_sg(struct scatterlist *sg, int nents)
dma_sg->dma_address = dent_addr;
dma_sg->dma_length = dent_len;

+ if (dma_sg != sg) {
+ dma_sg = next_sg(dma_sg);
+ dma_sg->dma_length = 0;
+ }
+
return ((unsigned long) dent_addr +
(unsigned long) dent_len +
(IO_PAGE_SIZE - 1UL)) >> IO_PAGE_SHIFT;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 61c2e39..de5ba47 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1351,11 +1351,21 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
new_segment:
if (!sg)
sg = sglist;
- else
+ else {
+ /*
+ * If the driver previously mapped a shorter
+ * list, we could see a termination bit
+ * prematurely unless it fully inits the sg
+ * table on each mapping. We KNOW that there
+ * must be more entries here or the driver
+ * would be buggy, so force clear the
+ * termination bit to avoid doing a full
+ * sg_init_table() in drivers for each command.
+ */
+ sg->page_link &= ~0x02;
sg = sg_next(sg);
+ }

- sg_dma_len(sg) = 0;
- sg_dma_address(sg) = 0;
sg_set_page(sg, bvec->bv_page);
sg->length = nbytes;
sg->offset = bvec->bv_offset;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index ec55a17..6a6f2e0 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -47,6 +47,7 @@
#include <linux/spinlock.h>
#include <linux/kmod.h>
#include <linux/pci.h>
+#include <linux/scatterlist.h>

#include <asm/byteorder.h>
#include <asm/irq.h>
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index da88738..ead7230 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -1776,7 +1776,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
list_for_each_entry_continue(
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(chunk->page_list[i].page)
+ pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT ;
*kpage = phys_to_abs(pgaddr +
(pginfo->next_hwpage *
@@ -1832,7 +1832,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
{
int t;
for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT;
+ u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
*(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
@@ -1867,7 +1867,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
if (nr_kpages == kpages_per_hwpage) {
- pgaddr = ( page_to_pfn(chunk->page_list[i].page)
+ pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT );
*kpage = phys_to_abs(pgaddr);
if ( !(*kpage) ) {
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b3c4dbf..7c60cbd 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -42,6 +42,7 @@
#include <linux/reboot.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/scatterlist.h>

#include <asm/byteorder.h>
#include <asm/cache.h> /* for L1_CACHE_BYTES */
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index e5c3239..e527a0e 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -28,6 +28,7 @@
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/pci.h>
+#include <linux/scatterlist.h>

#include <asm/byteorder.h>
#include <asm/io.h>
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index b3d7031..0c4ab3b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1962,7 +1962,7 @@ static void intel_free_coherent(struct device *hwdev, size_t size,
free_pages((unsigned long)vaddr, order);
}

-#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
+#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
int nelems, int dir)
{
@@ -2010,7 +2010,7 @@ static int intel_nontranslate_map_sg(struct device *hddev,
struct scatterlist *sg;

for_each_sg(sglist, sg, nelems, i) {
- BUG_ON(!sg->page);
+ BUG_ON(!sg_page(sg));
sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
sg->dma_length = sg->length;
}
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 7507067..fd5d0c1 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -559,6 +559,7 @@ zfcp_sg_list_alloc(struct zfcp_sg_list *sg_list, size_t size)
retval = -ENOMEM;
goto out;
}
+ sg_init_table(sg_list->sg, sg_list->count);

for (i = 0, sg = sg_list->sg; i < sg_list->count; i++, sg++) {
sg->length = min(size, PAGE_SIZE);
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 57cac70..326e7ee 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -63,7 +63,7 @@
static inline void *
zfcp_sg_to_address(struct scatterlist *list)
{
- return (void *) (page_address(list->page) + list->offset);
+ return sg_virt(list);
}

/**
@@ -74,7 +74,7 @@ zfcp_sg_to_address(struct scatterlist *list)
static inline void
zfcp_address_to_sg(void *address, struct scatterlist *list)
{
- list->page = virt_to_page(address);
+ sg_set_page(list, virt_to_page(address));
list->offset = ((unsigned long) address) & (PAGE_SIZE - 1);
}

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index a6475a2..9438d0b 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -308,13 +308,15 @@ zfcp_erp_adisc(struct zfcp_port *port)
if (send_els == NULL)
goto nomem;

- send_els->req = kzalloc(sizeof(struct scatterlist), GFP_ATOMIC);
+ send_els->req = kmalloc(sizeof(struct scatterlist), GFP_ATOMIC);
if (send_els->req == NULL)
goto nomem;
+ sg_init_table(send_els->req, 1);

- send_els->resp = kzalloc(sizeof(struct scatterlist), GFP_ATOMIC);
+ send_els->resp = kmalloc(sizeof(struct scatterlist), GFP_ATOMIC);
if (send_els->resp == NULL)
goto nomem;
+ sg_init_table(send_els->resp, 1);

address = (void *) get_zeroed_page(GFP_ATOMIC);
if (address == NULL)
@@ -363,7 +365,7 @@ zfcp_erp_adisc(struct zfcp_port *port)
retval = -ENOMEM;
freemem:
if (address != NULL)
- __free_pages(send_els->req->page, 0);
+ __free_pages(sg_page(send_els->req), 0);
if (send_els != NULL) {
kfree(send_els->req);
kfree(send_els->resp);
@@ -437,7 +439,7 @@ zfcp_erp_adisc_handler(unsigned long data)

out:
zfcp_port_put(port);
- __free_pages(send_els->req->page, 0);
+ __free_pages(sg_page(send_els->req), 0);
kfree(send_els->req);
kfree(send_els->resp);
kfree(send_els);
diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c
index 03f19b8..17b4a7c 100644
--- a/drivers/scsi/ps3rom.c
+++ b/drivers/scsi/ps3rom.c
@@ -147,7 +147,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd *cmd, void *buf)

req_len = fin = 0;
scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) {
- kaddr = kmap_atomic(sg_page(sgpnt->page), KM_IRQ0);
+ kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0);
len = sgpnt->length;
if ((req_len + len) > buflen) {
len = buflen - req_len;
diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h
index 1eb8aac..e99406a 100644
--- a/include/asm-arm/dma-mapping.h
+++ b/include/asm-arm/dma-mapping.h
@@ -5,7 +5,7 @@

#include <linux/mm.h> /* need struct page */

-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>

/*
* DMA-consistent mapping functions. These allocate/free a region of
@@ -274,8 +274,8 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
for (i = 0; i < nents; i++, sg++) {
char *virt;

- sg->dma_address = page_to_dma(dev, sg->page) + sg->offset;
- virt = page_address(sg->page) + sg->offset;
+ sg->dma_address = page_to_dma(dev, sg_page(sg)) + sg->offset;
+ virt = sg_virt(sg);

if (!arch_is_coherent())
dma_cache_maint(virt, sg->length, dir);
@@ -371,7 +371,7 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
int i;

for (i = 0; i < nents; i++, sg++) {
- char *virt = page_address(sg->page) + sg->offset;
+ char *virt = sg_virt(sg);
if (!arch_is_coherent())
dma_cache_maint(virt, sg->length, dir);
}
@@ -384,7 +384,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents,
int i;

for (i = 0; i < nents; i++, sg++) {
- char *virt = page_address(sg->page) + sg->offset;
+ char *virt = sg_virt(sg);
if (!arch_is_coherent())
dma_cache_maint(virt, sg->length, dir);
}
diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h
index 81e3426..a713163 100644
--- a/include/asm-avr32/dma-mapping.h
+++ b/include/asm-avr32/dma-mapping.h
@@ -217,8 +217,8 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
for (i = 0; i < nents; i++) {
char *virt;

- sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
- virt = page_address(sg[i].page) + sg[i].offset;
+ sg[i].dma_address = page_to_bus(sg_page(&sg[i])) + sg[i].offset;
+ virt = sg_virt(&sg[i]);
dma_cache_sync(dev, virt, sg[i].length, direction);
}

@@ -327,8 +327,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int i;

for (i = 0; i < nents; i++) {
- dma_cache_sync(dev, page_address(sg[i].page) + sg[i].offset,
- sg[i].length, direction);
+ dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, direction);
}
}

diff --git a/include/asm-blackfin/scatterlist.h b/include/asm-blackfin/scatterlist.h
index 32128d5..04f4487 100644
--- a/include/asm-blackfin/scatterlist.h
+++ b/include/asm-blackfin/scatterlist.h
@@ -20,7 +20,6 @@ struct scatterlist {
* returns, or alternatively stop on the first sg_dma_len(sg) which
* is 0.
*/
-#define sg_address(sg) (page_address((sg)->page) + (sg)->offset)
#define sg_dma_address(sg) ((sg)->dma_address)
#define sg_dma_len(sg) ((sg)->length)

diff --git a/include/asm-frv/scatterlist.h b/include/asm-frv/scatterlist.h
index f7da007..99ba76e 100644
--- a/include/asm-frv/scatterlist.h
+++ b/include/asm-frv/scatterlist.h
@@ -4,19 +4,19 @@
#include <asm/types.h>

/*
- * Drivers must set either ->address or (preferred) ->page and ->offset
+ * Drivers must set either ->address or (preferred) page and ->offset
* to indicate where data must be transferred to/from.
*
- * Using ->page is recommended since it handles highmem data as well as
+ * Using page is recommended since it handles highmem data as well as
* low mem. ->address is restricted to data which has a virtual mapping, and
- * it will go away in the future. Updating to ->page can be automated very
+ * it will go away in the future. Updating to page can be automated very
* easily -- something like
*
* sg->address = some_ptr;
*
* can be rewritten as
*
- * sg->page = virt_to_page(some_ptr);
+ * sg_set_page(virt_to_page(some_ptr));
* sg->offset = (unsigned long) some_ptr & ~PAGE_MASK;
*
* and that's it. There's no excuse for not highmem enabling YOUR driver. /jens
diff --git a/include/asm-m68knommu/scatterlist.h b/include/asm-m68knommu/scatterlist.h
index 1094284..afc4788 100644
--- a/include/asm-m68knommu/scatterlist.h
+++ b/include/asm-m68knommu/scatterlist.h
@@ -14,7 +14,6 @@ struct scatterlist {
unsigned int length;
};

-#define sg_address(sg) (page_address((sg)->page) + (sg)->offset)
#define sg_dma_address(sg) ((sg)->dma_address)
#define sg_dma_len(sg) ((sg)->length)

diff --git a/include/asm-parisc/scatterlist.h b/include/asm-parisc/scatterlist.h
index cd3cfdf..62269b3 100644
--- a/include/asm-parisc/scatterlist.h
+++ b/include/asm-parisc/scatterlist.h
@@ -18,7 +18,7 @@ struct scatterlist {
__u32 iova_length; /* bytes mapped */
};

-#define sg_virt_addr(sg) ((unsigned long)(page_address(sg->page) + sg->offset))
+#define sg_virt_addr(sg) ((unsigned long)sg_virt(sg))
#define sg_dma_address(sg) ((sg)->iova)
#define sg_dma_len(sg) ((sg)->iova_length)

diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
index 65be95d..ff52013 100644
--- a/include/asm-powerpc/dma-mapping.h
+++ b/include/asm-powerpc/dma-mapping.h
@@ -285,9 +285,9 @@ dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
BUG_ON(direction == DMA_NONE);

for_each_sg(sgl, sg, nents, i) {
- BUG_ON(!sg->page);
- __dma_sync_page(sg->page, sg->offset, sg->length, direction);
- sg->dma_address = page_to_bus(sg->page) + sg->offset;
+ BUG_ON(!sg_page(sg));
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
+ sg->dma_address = page_to_bus(sg_page(sg)) + sg->offset;
}

return nents;
@@ -328,7 +328,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev,
BUG_ON(direction == DMA_NONE);

for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg->page, sg->offset, sg->length, direction);
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
}

static inline void dma_sync_sg_for_device(struct device *dev,
@@ -341,7 +341,7 @@ static inline void dma_sync_sg_for_device(struct device *dev,
BUG_ON(direction == DMA_NONE);

for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg->page, sg->offset, sg->length, direction);
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
}

static inline int dma_mapping_error(dma_addr_t dma_addr)
diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h
index 84fefda..fcea067 100644
--- a/include/asm-sh/dma-mapping.h
+++ b/include/asm-sh/dma-mapping.h
@@ -2,7 +2,7 @@
#define __ASM_SH_DMA_MAPPING_H

#include <linux/mm.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include <asm/cacheflush.h>
#include <asm/io.h>

@@ -85,10 +85,9 @@ static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,

for (i = 0; i < nents; i++) {
#if !defined(CONFIG_PCI) || defined(CONFIG_SH_PCIDMA_NONCOHERENT)
- dma_cache_sync(dev, page_address(sg[i].page) + sg[i].offset,
- sg[i].length, dir);
+ dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir);
#endif
- sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
+ sg[i].dma_address = sg_phys(&sg[i]);
}

return nents;
@@ -138,10 +137,9 @@ static inline void dma_sync_sg(struct device *dev, struct scatterlist *sg,

for (i = 0; i < nelems; i++) {
#if !defined(CONFIG_PCI) || defined(CONFIG_SH_PCIDMA_NONCOHERENT)
- dma_cache_sync(dev, page_address(sg[i].page) + sg[i].offset,
- sg[i].length, dir);
+ dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir);
#endif
- sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
+ sg[i].dma_address = sg_phys(&sg[i]);
}
}

diff --git a/include/asm-sh64/dma-mapping.h b/include/asm-sh64/dma-mapping.h
index e661857..1438b76 100644
--- a/include/asm-sh64/dma-mapping.h
+++ b/include/asm-sh64/dma-mapping.h
@@ -2,7 +2,7 @@
#define __ASM_SH_DMA_MAPPING_H

#include <linux/mm.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include <asm/io.h>

struct pci_dev;
@@ -71,10 +71,9 @@ static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,

for (i = 0; i < nents; i++) {
#if !defined(CONFIG_PCI) || defined(CONFIG_SH_PCIDMA_NONCOHERENT)
- dma_cache_sync(dev, page_address(sg[i].page) + sg[i].offset,
- sg[i].length, dir);
+ dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir);
#endif
- sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
+ sg[i].dma_address = sg_phys(&sg[i]);
}

return nents;
@@ -124,10 +123,9 @@ static inline void dma_sync_sg(struct device *dev, struct scatterlist *sg,

for (i = 0; i < nelems; i++) {
#if !defined(CONFIG_PCI) || defined(CONFIG_SH_PCIDMA_NONCOHERENT)
- dma_cache_sync(dev, page_address(sg[i].page) + sg[i].offset,
- sg[i].length, dir);
+ dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir);
#endif
- sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
+ sg[i].dma_address = sg_phys(&sg[i]);
}
}

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d5a9785..658476c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -56,7 +56,7 @@
#include <linux/ipv6.h>
#include <linux/net.h>
#include <linux/inet.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include <linux/crypto.h>
#include <net/sock.h>

diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index fb2220a..313d4be 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
#include <linux/crypto.h>
+#include <linux/scatterlist.h>
#include <net/xfrm.h>
#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
#include <net/ah.h>

2007-10-23 14:32:53

by John Stoffel

[permalink] [raw]
Subject: Re: [PATCH 02/10] [SG] Update block layer to use sg helpers

>>>>> "Jens" == Jens Axboe <[email protected]> writes:

Jens> Signed-off-by: Jens Axboe <[email protected]>
Jens> ---
Jens> block/ll_rw_blk.c | 8 ++++++--
Jens> 1 files changed, 6 insertions(+), 2 deletions(-)

Jens> diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
Jens> index 8025d64..61c2e39 100644
Jens> --- a/block/ll_rw_blk.c
Jens> +++ b/block/ll_rw_blk.c
Jens> @@ -1354,8 +1354,9 @@ new_segment:
Jens> else
Jens> sg = sg_next(sg);

Jens> - memset(sg, 0, sizeof(*sg));
Jens> - sg->page = bvec->bv_page;
Jens> + sg_dma_len(sg) = 0;
Jens> + sg_dma_address(sg) = 0;

Why don't you call these something like sg_set_dma_len() and
sg_set_dma_address() instead, to make it clear these set the values
and don't read them?

Jens> + sg_set_page(sg, bvec->bv_page);

Esp since you have the sg_set_page() right below it.


sg-> length = nbytes;
sg-> offset = bvec->bv_offset;
Jens> nsegs++;
Jens> @@ -1363,6 +1364,9 @@ new_segment:
Jens> bvprv = bvec;
Jens> } /* segments in rq */

Jens> + if (sg)
Jens> + __sg_mark_end(sg);
Jens> +
Jens> return nsegs;
Jens> }

Jens> --
Jens> 1.5.3.GIT

Jens> -
Jens> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Jens> the body of a message to [email protected]
Jens> More majordomo info at http://vger.kernel.org/majordomo-info.html
Jens> Please read the FAQ at http://www.tux.org/lkml/


Jens> !DSPAM:471ce899205391598813817!

2007-10-23 14:49:23

by Arnd Bergmann

[permalink] [raw]
Subject: [PATCH][SG] fix typo in ps3rom.c

This was accidentally introduced by yesterday's change to the SG handling.

Signed-off-by: Arnd Bergmann <[email protected]>

---

On Monday 22 October 2007, Jens Axboe wrote:
>
> I split the patch up into a few pieces, so it can be applied safely.
> It builds with allyesconfig on i386 and x86-64, and it's been booted
> and tested on both those archs and ppc64 as well.

Not sure if this has been reported already, but I needed the trivial fix
to build the ps3rom driver.

diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c
index 03f19b8..17b4a7c 100644
--- a/drivers/scsi/ps3rom.c
+++ b/drivers/scsi/ps3rom.c
@@ -147,7 +147,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd *cmd, void *buf)

req_len = fin = 0;
scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) {
- kaddr = kmap_atomic(sg_page(sgpnt->page), KM_IRQ0);
+ kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0);
len = sgpnt->length;
if ((req_len + len) > buflen) {
len = buflen - req_len;

2007-10-23 15:26:27

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout



On Tue, 23 Oct 2007, Boaz Harrosh wrote:
>
> A nice design is to have an struct like BIO. That holds a pointer to the
> array of scatterlists, size, ..., and a next and prev pointers to the next
> chunks. Than have all kernel code that now accepts scatterlist* and size
> accept a pointer to such structure. And all is clear and defined.

Yes, that would be one clean situation.

> But since we do not do that, and every single API in the kernel that
> receives a scatterlist pointer also receives an sg_count parameter,
> than I do not see what is so hacky about giving that sg_count parameter
> to the one that needs it the most. sg_next();

Well, I'd personally actually prefer to *not* have the count be passed
down explicitly, because it's just too error prone. So I'd much rather see
the count implicit in the list: whether it's in an explicit header
structure (that is the *only* thing passed down) or whether it's embedded
in the list itself is not important.

Since the list itself has to have the "next pointer" for chaining, and
thus already has "embedded information" in it, it actually does make sense
in my opinion to just embed the end-of-list information too. And the end
result right now is pretty simple, with "sg_next()" being really simple to
use, and there being no way to screw things up by getting the count and
the sg pointer out of sync.

My biggest complaint right now is that a lot of users of the sg *filling*
functions were mindlessly converted, so we have code like

cryptoloop.c: sg_set_page(&sg_in, in_page);
cryptoloop.c: sg_in.offset = in_offs;
cryptoloop.c: sg_in.length = sz;

which is just really stupid, and we should have a function for that. But
worse is code like this:

ub.c: sg_set_page(sg, virt_to_page(sc->top_sense));
ub.c: sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
ub.c: sg->length = UB_SENSE_SIZE;

which again was converted "line by line" and we actually *do* have a
function to do the above three lines as

sg_set_buf(sg, sc->top_sense, UB_SENSE_SIZE);

where that *single* line is just tons shorter but more importantly, more
readable, than the mess that is a brute-force conversion.

So I think the SG stuff looks ok now, but I think we have a lot of "fix up
the rough edges" to go!

(The above is not the only case. Just grep for "sg_set_page", and you'll
see several examples of this kind of hard-to-read code. Basically, I don't
think it's ever a good idea to initialize the SG entries one by one, and
even when we have a hard page/offset/size thing, we should not set them
one by one, and we should probably extend sg_set_page() to always take
offset and length too, since setting one without the other two is never
really sensible!)

Linus

2007-10-23 17:10:40

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Mon, Oct 22 2007 at 20:11 +0200, Jens Axboe <[email protected]> wrote:
> Change the page member of the scatterlist structure to be an unsigned
> long, and encode more stuff in the lower bits:
>
> - Bits 0 and 1 zero: this is a normal sg entry. Next sg entry is located
> at sg + 1.
> - Bit 0 set: this is a chain entry, the next real entry is at ->page_link
> with the two low bits masked off.
> - Bit 1 set: this is the final entry in the sg entry. sg_next() will return
> NULL when passed such an entry.
>
> It's thus important that sg table users use the proper accessors to get
> and set the page member.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
<snip>
>
> /**
> * sg_set_page - Set sg entry to point at given page
> @@ -20,11 +37,20 @@
> **/
> static inline void sg_set_page(struct scatterlist *sg, struct page *page)
> {
> - sg->page = page;
> + unsigned long page_link = sg->page_link & 0x3;
> +
You might want to put a BUG_ON(page & 0x3); Make sure
you're not loosing information. (The m68k problem)

> + sg->page_link = page_link | (unsigned long) page;
> }
>
> -#define sg_page(sg) ((sg)->page)
> +#define sg_page(sg) ((struct page *) ((sg)->page_link & ~0x3))
>
<snip>

Boaz

2007-10-23 18:33:40

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Boaz Harrosh wrote:
> On Mon, Oct 22 2007 at 20:11 +0200, Jens Axboe <[email protected]> wrote:
> > Change the page member of the scatterlist structure to be an unsigned
> > long, and encode more stuff in the lower bits:
> >
> > - Bits 0 and 1 zero: this is a normal sg entry. Next sg entry is located
> > at sg + 1.
> > - Bit 0 set: this is a chain entry, the next real entry is at ->page_link
> > with the two low bits masked off.
> > - Bit 1 set: this is the final entry in the sg entry. sg_next() will return
> > NULL when passed such an entry.
> >
> > It's thus important that sg table users use the proper accessors to get
> > and set the page member.
> >
> > Signed-off-by: Jens Axboe <[email protected]>
> > ---
> <snip>
> >
> > /**
> > * sg_set_page - Set sg entry to point at given page
> > @@ -20,11 +37,20 @@
> > **/
> > static inline void sg_set_page(struct scatterlist *sg, struct page *page)
> > {
> > - sg->page = page;
> > + unsigned long page_link = sg->page_link & 0x3;
> > +
> You might want to put a BUG_ON(page & 0x3); Make sure
> you're not loosing information. (The m68k problem)

That's a really good idea, thanks Boaz! I'll add that.

--
Jens Axboe

2007-10-23 19:23:20

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, 23 Oct 2007, Ingo Molnar wrote:
> * Jens Axboe <[email protected]> wrote:
>
> > > Linus' latest tree, which has your SG-list enhancements included,
> > > certainly works fine here and does not have the problems of the
> > > first iteration.
> >
> > That's good to hear :-)
> >
> > I have a series of pending patches where I've collected fallout
> > patches from people and some from myself here:
> >
> > http://git.kernel.dk/?p=linux-2.6-block.git;a=shortlog;h=sg
> >
> > or pullable from
> >
> > git://git.kernel.dk/inux-2.6-block.git sg
>
> i've attached your fixes as a diff against linus-latest below - for
> those who'd like to have it in patch form.

The below are still needed for m68k
---

m68k: sg fallout

Signed-off-by: Geert Uytterhoeven <[email protected]>
---
arch/m68k/kernel/dma.c | 2 +-
drivers/scsi/atari_NCR5380.c | 5 ++---
drivers/scsi/sun3x_esp.c | 4 ++--
net/ieee80211/ieee80211_crypt_tkip.c | 2 +-
net/ieee80211/ieee80211_crypt_wep.c | 2 +-
net/mac80211/wep.c | 2 +-
6 files changed, 8 insertions(+), 9 deletions(-)

--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -9,10 +9,10 @@
#include <linux/dma-mapping.h>
#include <linux/device.h>
#include <linux/kernel.h>
+#include <linux/scatterlist.h>
#include <linux/vmalloc.h>

#include <asm/pgalloc.h>
-#include <asm/scatterlist.h>

void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t flag)
--- a/drivers/scsi/atari_NCR5380.c
+++ b/drivers/scsi/atari_NCR5380.c
@@ -477,10 +477,9 @@ static void merge_contiguous_buffers(Scs

for (endaddr = virt_to_phys(cmd->SCp.ptr + cmd->SCp.this_residual - 1) + 1;
cmd->SCp.buffers_residual &&
- virt_to_phys(page_address(cmd->SCp.buffer[1].page) +
- cmd->SCp.buffer[1].offset) == endaddr;) {
+ virt_to_phys(sg_virt(&cmd->SCp.buffer[1])) == endaddr;) {
MER_PRINTK("VTOP(%p) == %08lx -> merging\n",
- page_address(cmd->SCp.buffer[1].page), endaddr);
+ page_address(sg_page(&cmd->SCp.buffer[1])), endaddr);
#if (NDEBUG & NDEBUG_MERGING)
++cnt;
#endif
--- a/drivers/scsi/sun3x_esp.c
+++ b/drivers/scsi/sun3x_esp.c
@@ -332,8 +332,8 @@ static void dma_mmu_get_scsi_sgl (struct
struct scatterlist *sg = sp->SCp.buffer;

while (sz >= 0) {
- sg[sz].dma_address = dvma_map((unsigned long)page_address(sg[sz].page) +
- sg[sz].offset, sg[sz].length);
+ sg[sz].dma_address = dvma_map((unsigned long)sg_virt(&sg[sz]),
+ sg[sz].length);
sz--;
}
sp->SCp.ptr=(char *)((unsigned long)sp->SCp.buffer->dma_address);
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -24,7 +24,7 @@
#include <net/ieee80211.h>

#include <linux/crypto.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include <linux/crc32.h>

MODULE_AUTHOR("Jouni Malinen");
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -21,7 +21,7 @@
#include <net/ieee80211.h>

#include <linux/crypto.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include <linux/crc32.h>

MODULE_AUTHOR("Jouni Malinen");
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -16,7 +16,7 @@
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/mm.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>

#include <net/mac80211.h>
#include "ieee80211_i.h"

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2007-10-23 19:56:52

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

Jens Axboe <[email protected]> writes:

>> You might want to put a BUG_ON(page & 0x3); Make sure
>> you're not loosing information. (The m68k problem)
>
> That's a really good idea, thanks Boaz! I'll add that.

It would be even better if you replaced all the magic numbers with defines
or better accessors.

-Andi

2007-10-23 20:20:30

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Andi Kleen wrote:
> Jens Axboe <[email protected]> writes:
>
> >> You might want to put a BUG_ON(page & 0x3); Make sure
> >> you're not loosing information. (The m68k problem)
> >
> > That's a really good idea, thanks Boaz! I'll add that.
>
> It would be even better if you replaced all the magic numbers with defines
> or better accessors.

All? There are two numbers, and all are confined to scatterlist.h
privately. Except the one in blk_rq_map_sg(), which was done on purpose
since I don't want to export that knowledge to others. So we definitely
don't want accessors, I can name the two bit values but don't see much
point in doing it.

--
Jens Axboe

2007-10-23 20:57:23

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23, 2007 at 10:20:17PM +0200, Jens Axboe wrote:
> On Tue, Oct 23 2007, Andi Kleen wrote:
> > Jens Axboe <[email protected]> writes:
> >
> > >> You might want to put a BUG_ON(page & 0x3); Make sure
> > >> you're not loosing information. (The m68k problem)
> > >
> > > That's a really good idea, thanks Boaz! I'll add that.
> >
> > It would be even better if you replaced all the magic numbers with defines
> > or better accessors.
>
> All? There are two numbers, and all are confined to scatterlist.h
> privately. Except the one in blk_rq_map_sg(), which was done on purpose
> since I don't want to export that knowledge to others. So we definitely
> don't want accessors, I can name the two bit values but don't see much
> point in doing it.

Maybe no point for you, but it would be helpful for any poor soul
who has to read/debug/change the code later.

Even if they're limited right now that doesn't mean it'll stay
that way anyways.

-Andi

2007-10-23 21:44:55

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Andi Kleen wrote:
> On Tue, Oct 23, 2007 at 10:20:17PM +0200, Jens Axboe wrote:
> > On Tue, Oct 23 2007, Andi Kleen wrote:
> > > Jens Axboe <[email protected]> writes:
> > >
> > > >> You might want to put a BUG_ON(page & 0x3); Make sure
> > > >> you're not loosing information. (The m68k problem)
> > > >
> > > > That's a really good idea, thanks Boaz! I'll add that.
> > >
> > > It would be even better if you replaced all the magic numbers with defines
> > > or better accessors.
> >
> > All? There are two numbers, and all are confined to scatterlist.h
> > privately. Except the one in blk_rq_map_sg(), which was done on purpose
> > since I don't want to export that knowledge to others. So we definitely
> > don't want accessors, I can name the two bit values but don't see much
> > point in doing it.
>
> Maybe no point for you, but it would be helpful for any poor soul
> who has to read/debug/change the code later.

I understand, but if you look in the include file that uses the magic
numbers, then there's a big comment block in the beginning describing
the usage.

> Even if they're limited right now that doesn't mean it'll stay
> that way anyways.

Since it's reusing lower page bits, we can't go beyond 2 bits anyway. So
I'll be surprised if they expand :-)

--
Jens Axboe

2007-10-23 21:46:44

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Geert Uytterhoeven wrote:
> On Tue, 23 Oct 2007, Ingo Molnar wrote:
> > * Jens Axboe <[email protected]> wrote:
> >
> > > > Linus' latest tree, which has your SG-list enhancements included,
> > > > certainly works fine here and does not have the problems of the
> > > > first iteration.
> > >
> > > That's good to hear :-)
> > >
> > > I have a series of pending patches where I've collected fallout
> > > patches from people and some from myself here:
> > >
> > > http://git.kernel.dk/?p=linux-2.6-block.git;a=shortlog;h=sg
> > >
> > > or pullable from
> > >
> > > git://git.kernel.dk/inux-2.6-block.git sg
> >
> > i've attached your fixes as a diff against linus-latest below - for
> > those who'd like to have it in patch form.
>
> The below are still needed for m68k

OK, added. Thanks!

--
Jens Axboe

2007-10-24 06:57:19

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Geert Uytterhoeven wrote:
> On Tue, 23 Oct 2007, Ingo Molnar wrote:
> > * Jens Axboe <[email protected]> wrote:
> >
> > > > Linus' latest tree, which has your SG-list enhancements included,
> > > > certainly works fine here and does not have the problems of the
> > > > first iteration.
> > >
> > > That's good to hear :-)
> > >
> > > I have a series of pending patches where I've collected fallout
> > > patches from people and some from myself here:
> > >
> > > http://git.kernel.dk/?p=linux-2.6-block.git;a=shortlog;h=sg
> > >
> > > or pullable from
> > >
> > > git://git.kernel.dk/inux-2.6-block.git sg
> >
> > i've attached your fixes as a diff against linus-latest below - for
> > those who'd like to have it in patch form.
>
> The below are still needed for m68k

The wep.c was already applied, I applied the rest of them. Thanks!

--
Jens Axboe

2007-10-24 08:06:09

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Tue, Oct 23 2007, Linus Torvalds wrote:
> My biggest complaint right now is that a lot of users of the sg *filling*
> functions were mindlessly converted, so we have code like
>
> cryptoloop.c: sg_set_page(&sg_in, in_page);
> cryptoloop.c: sg_in.offset = in_offs;
> cryptoloop.c: sg_in.length = sz;
>
> which is just really stupid, and we should have a function for that. But
> worse is code like this:
>
> ub.c: sg_set_page(sg, virt_to_page(sc->top_sense));
> ub.c: sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
> ub.c: sg->length = UB_SENSE_SIZE;
>
> which again was converted "line by line" and we actually *do* have a
> function to do the above three lines as
>
> sg_set_buf(sg, sc->top_sense, UB_SENSE_SIZE);
>
> where that *single* line is just tons shorter but more importantly, more
> readable, than the mess that is a brute-force conversion.

I modified sg_set_page() to take a length and offset argument, and
converted these silly sg_set_page(.., virt_to_page(foo)) to just use
sg_set_buf() instead:

30 files changed, 93 insertions(+), 152 deletions(-)

and it's definitely a win. Added to the pending bits...

--
Jens Axboe

2007-10-24 09:04:04

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Wed, 24 Oct 2007, Jens Axboe wrote:
> On Tue, Oct 23 2007, Linus Torvalds wrote:
> > My biggest complaint right now is that a lot of users of the sg *filling*
> > functions were mindlessly converted, so we have code like
> >
> > cryptoloop.c: sg_set_page(&sg_in, in_page);
> > cryptoloop.c: sg_in.offset = in_offs;
> > cryptoloop.c: sg_in.length = sz;
> >
> > which is just really stupid, and we should have a function for that. But
> > worse is code like this:
> >
> > ub.c: sg_set_page(sg, virt_to_page(sc->top_sense));
> > ub.c: sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
> > ub.c: sg->length = UB_SENSE_SIZE;
> >
> > which again was converted "line by line" and we actually *do* have a
> > function to do the above three lines as
> >
> > sg_set_buf(sg, sc->top_sense, UB_SENSE_SIZE);
> >
> > where that *single* line is just tons shorter but more importantly, more
> > readable, than the mess that is a brute-force conversion.
>
> I modified sg_set_page() to take a length and offset argument, and

As it no longer sets the page only, perhaps it's a good idea to rename
sg_set_page() to sg_set()?

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2007-10-24 09:13:13

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Wed, Oct 24 2007, Geert Uytterhoeven wrote:
> On Wed, 24 Oct 2007, Jens Axboe wrote:
> > On Tue, Oct 23 2007, Linus Torvalds wrote:
> > > My biggest complaint right now is that a lot of users of the sg *filling*
> > > functions were mindlessly converted, so we have code like
> > >
> > > cryptoloop.c: sg_set_page(&sg_in, in_page);
> > > cryptoloop.c: sg_in.offset = in_offs;
> > > cryptoloop.c: sg_in.length = sz;
> > >
> > > which is just really stupid, and we should have a function for that. But
> > > worse is code like this:
> > >
> > > ub.c: sg_set_page(sg, virt_to_page(sc->top_sense));
> > > ub.c: sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
> > > ub.c: sg->length = UB_SENSE_SIZE;
> > >
> > > which again was converted "line by line" and we actually *do* have a
> > > function to do the above three lines as
> > >
> > > sg_set_buf(sg, sc->top_sense, UB_SENSE_SIZE);
> > >
> > > where that *single* line is just tons shorter but more importantly, more
> > > readable, than the mess that is a brute-force conversion.
> >
> > I modified sg_set_page() to take a length and offset argument, and
>
> As it no longer sets the page only, perhaps it's a good idea to rename
> sg_set_page() to sg_set()?

sg_set_buf() also sets length and offset, sg_set_page() is just a mirror
of that. So I'd prefer to keep the naming.

--
Jens Axboe

2007-10-24 13:36:12

by Olivier Galibert

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Wed, Oct 24, 2007 at 11:12:42AM +0200, Jens Axboe wrote:
> sg_set_buf() also sets length and offset, sg_set_page() is just a mirror
> of that. So I'd prefer to keep the naming.

Hmmm, sg_set_phys/sg_set_virt to be more symmetrical to
sg_phys/sg_virt?

OG.

2007-10-24 13:38:21

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Wed, Oct 24 2007, Olivier Galibert wrote:
> On Wed, Oct 24, 2007 at 11:12:42AM +0200, Jens Axboe wrote:
> > sg_set_buf() also sets length and offset, sg_set_page() is just a mirror
> > of that. So I'd prefer to keep the naming.
>
> Hmmm, sg_set_phys/sg_set_virt to be more symmetrical to
> sg_phys/sg_virt?

(please don't drop cc lists)

That doesn't make any sense. Both sg_set_buf() and sg_set_page() set the
same thing in the sg entry, the input is just different. It has nothing
to do with setting the physical value, for instance.

--
Jens Axboe

2007-10-24 13:45:53

by Olivier Galibert

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Wed, Oct 24, 2007 at 03:38:04PM +0200, Jens Axboe wrote:
> (please don't drop cc lists)

Sorry. Reactions of people to Cc vary...


> That doesn't make any sense. Both sg_set_buf() and sg_set_page() set the
> same thing in the sg entry, the input is just different. It has nothing
> to do with setting the physical value, for instance.

Ok. I misunderstood the sg_virt/sg_phys difference I guess. No
problem.

OG.

2007-10-24 15:18:51

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout



On Wed, 24 Oct 2007, Jens Axboe wrote:
> >
> > As it no longer sets the page only, perhaps it's a good idea to rename
> > sg_set_page() to sg_set()?
>
> sg_set_buf() also sets length and offset, sg_set_page() is just a mirror
> of that. So I'd prefer to keep the naming.

I agree. And it's not like you can get it wrong, since if you only give
the "page" argument, the preprocessor will complain loudly.

I think "sg_set_x()" is now rather logical - we fill in the SG entry
(entirely), and the only question is _how_ we do it (which is what "x"
says - using a page, or a kernel buffer).

Linus

2007-10-25 08:40:05

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Wednesday 24 October 2007 01:22:55 Linus Torvalds wrote:
> On Tue, 23 Oct 2007, Boaz Harrosh wrote:
> > But since we do not do that, and every single API in the kernel that
> > receives a scatterlist pointer also receives an sg_count parameter,
> > than I do not see what is so hacky about giving that sg_count parameter
> > to the one that needs it the most. sg_next();
>
> Well, I'd personally actually prefer to *not* have the count be passed
> down explicitly, because it's just too error prone.

Well, the duplication is bad, but walking lists to find the length is
inefficient so you pass around the length as well.

What irritates me more is that scatterlists aren't quite generically useful.
The virtio code wants to join a scatterlist created by blk_rq_map_sg() with
two others, yet it won't work because sg_chain() doesn't remove the end
marker from the first entry.

If this patch weren't already included, I'd be strongly arguing for the bio
idea: I find the chained sg code tricksy and ugly (sorry Jens).

To be constructive, how's this (BTW, why @arg@, I thought it was simply
"@arg"?)

===
Make sg_chain() a little more generic

Allow chaining when the first chain already has an end marker, and
change it to a slightly clearer semantic (the number of used entries
in the array, not that number plus one).

Signed-off-by: Rusty Russell <[email protected]>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 61fdaf0..24bbc92 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -778,7 +778,7 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
* ended up doing another loop.
*/
if (prev)
- sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
+ sg_chain(prev, SCSI_MAX_SG_SEGMENTS-1, sgl);

/*
* if we have nothing left, mark the last segment as
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index df7ddce..c1ef145 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -147,12 +147,11 @@ static inline struct scatterlist *sg_last(struct scatterlist *sgl,
/**
* sg_chain - Chain two sglists together
* @prv: First scatterlist
- * @prv_nents: Number of entries in prv
+ * @prv_nents: Number of entries used in prv
* @sgl: Second scatterlist
*
* Description:
- * Links @prv@ and @sgl@ together, to form a longer scatterlist.
- *
+ * @prv[@prv_nents] is used as a link to join @prv to @sgl.
**/
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
@@ -160,7 +159,9 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
#ifndef ARCH_HAS_SG_CHAIN
BUG();
#endif
- prv[prv_nents - 1].page_link = (unsigned long) sgl | 0x01;
+ if (prv_nents > 0)
+ prv[prv_nents - 1].page_link &= ~0x02UL;
+ prv[prv_nents].page_link = (unsigned long) sgl | 0x01;
}

/**

2007-10-25 09:13:33

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Thu, Oct 25 2007, Rusty Russell wrote:
> On Wednesday 24 October 2007 01:22:55 Linus Torvalds wrote:
> > On Tue, 23 Oct 2007, Boaz Harrosh wrote:
> > > But since we do not do that, and every single API in the kernel that
> > > receives a scatterlist pointer also receives an sg_count parameter,
> > > than I do not see what is so hacky about giving that sg_count parameter
> > > to the one that needs it the most. sg_next();
> >
> > Well, I'd personally actually prefer to *not* have the count be passed
> > down explicitly, because it's just too error prone.
>
> Well, the duplication is bad, but walking lists to find the length is
> inefficient so you pass around the length as well.
>
> What irritates me more is that scatterlists aren't quite generically useful.
> The virtio code wants to join a scatterlist created by blk_rq_map_sg() with
> two others, yet it won't work because sg_chain() doesn't remove the end
> marker from the first entry.

That's a minor nit for your special purpose, we/you can change that.

> If this patch weren't already included, I'd be strongly arguing for the bio
> idea: I find the chained sg code tricksy and ugly (sorry Jens).

What is the bio idea? A bio works in essentially the same way, the only
difference is having a specific next pointer. It's still just a linked
lists of arbitrarily sized sg tables (the bio_vec arrays).

>
> To be constructive, how's this (BTW, why @arg@, I thought it was simply
> "@arg"?)
>
> ===
> Make sg_chain() a little more generic
>
> Allow chaining when the first chain already has an end marker, and
> change it to a slightly clearer semantic (the number of used entries
> in the array, not that number plus one).
>
> Signed-off-by: Rusty Russell <[email protected]>
>
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 61fdaf0..24bbc92 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -778,7 +778,7 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
> * ended up doing another loop.
> */
> if (prev)
> - sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
> + sg_chain(prev, SCSI_MAX_SG_SEGMENTS-1, sgl);

What's this bit for?

> diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
> index df7ddce..c1ef145 100644
> --- a/include/linux/scatterlist.h
> +++ b/include/linux/scatterlist.h
> @@ -147,12 +147,11 @@ static inline struct scatterlist *sg_last(struct scatterlist *sgl,
> /**
> * sg_chain - Chain two sglists together
> * @prv: First scatterlist
> - * @prv_nents: Number of entries in prv
> + * @prv_nents: Number of entries used in prv
> * @sgl: Second scatterlist
> *
> * Description:
> - * Links @prv@ and @sgl@ together, to form a longer scatterlist.
> - *
> + * @prv[@prv_nents] is used as a link to join @prv to @sgl.
> **/
> static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
> struct scatterlist *sgl)
> @@ -160,7 +159,9 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
> #ifndef ARCH_HAS_SG_CHAIN
> BUG();
> #endif
> - prv[prv_nents - 1].page_link = (unsigned long) sgl | 0x01;
> + if (prv_nents > 0)
> + prv[prv_nents - 1].page_link &= ~0x02UL;
> + prv[prv_nents].page_link = (unsigned long) sgl | 0x01;
> }

We definitely should clear any other markers, that makes sense.

--
Jens Axboe

2007-10-25 11:54:44

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Thursday 25 October 2007 19:11:40 Jens Axboe wrote:
> On Thu, Oct 25 2007, Rusty Russell wrote:
> > What irritates me more is that scatterlists aren't quite generically
> > useful. The virtio code wants to join a scatterlist created by
> > blk_rq_map_sg() with two others, yet it won't work because sg_chain()
> > doesn't remove the end marker from the first entry.
>
> That's a minor nit for your special purpose, we/you can change that.

Well currently sg_chain() only joins "incomplete" (ie. unterminated) sg
chains. That works great for you, but it feels more like a special purpose
to me.

> > If this patch weren't already included, I'd be strongly arguing for the
> > bio idea: I find the chained sg code tricksy and ugly (sorry Jens).
>
> What is the bio idea? A bio works in essentially the same way, the only
> difference is having a specific next pointer. It's still just a linked
> lists of arbitrarily sized sg tables (the bio_vec arrays).

It was suggested by analogy earlier in this thread, to use a two-level
structure.

In this case I would have first renamed struct scatterlist to struct
scatterelem. Then struct scatterlist looks like:

struct scatterlist {
unsigned int num;
struct scatterelem elems[0];
};

We'd want a nice macro to declare them for the stack case:

#define DEFINE_SCATTERLIST(name, elems) \
struct { \
struct scatterlist sg; \
struct scatterelem elems[num]; \
} name

Now we've tied the number and array together, we can introduce:

struct sg_multilist
{
unsigned int num_scatterlists;
struct scatterlist *sg_array[0];
};

And, of course, a common way to represent a one-sglist array:

#define DEFINE_SG_MULTI(name, num) \
struct { \
struct sg_multilist ml; \
struct scatterlist *sg_array; \
struct scatterlist sg; \
struct scatterelem elems[num]; \
} name = { .ml = { 1 }, .sg_array = &name.sg }

Now simply replace all the places which expect a "struct scatterlist"
with "struct sg_multilist" and we're done.

Using dangling structures is not as neat as using pointers, but it's very
efficient.

> > @@ -778,7 +778,7 @@ struct scatterlist *scsi_alloc_sgtable(struct
> > scsi_cmnd *cmd, gfp_t gfp_mask) * ended up doing another loop.
> > */
> > if (prev)
> > - sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
> > + sg_chain(prev, SCSI_MAX_SG_SEGMENTS-1, sgl);
>
> What's this bit for?

I changed the sg_chain() function not to take one off the argument. It made
more sense when I wrote the virtblk code (here it's natural, since the num
elements used + 1 == size of array).

> > -?????prv[prv_nents - 1].page_link = (unsigned long) sgl | 0x01;
> > +?????if (prv_nents > 0)
> > +?????????????prv[prv_nents - 1].page_link &= ~0x02UL;
> > +?????prv[prv_nents].page_link = (unsigned long) sgl | 0x01;
> > ?}
> We definitely should clear any other markers, that makes sense.

Agreed, and it was the use of "prv_nents - 2" in that code which made me think
the arg should be "num used" not "one past the num used".

Cheers,
Rusty.

2007-10-25 15:42:59

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout



On Thu, 25 Oct 2007, Rusty Russell wrote:
> On Wednesday 24 October 2007 01:22:55 Linus Torvalds wrote:
> >
> > Well, I'd personally actually prefer to *not* have the count be passed
> > down explicitly, because it's just too error prone.
>
> Well, the duplication is bad, but walking lists to find the length is
> inefficient so you pass around the length as well.

Nobody should *ever* walk the list to find the length. Does anybody really
do that? Yes, we pass the thing down, but do people *need* it?

[ Side note: some of the users of that length currently would seem to be
buggy in the presense of continuation entries, and seem to assume that
the "list" is just a contiguous array. In fatc, that's almost the only
valid use for the "count" thing, since any other use _has_ to walk it
entry by entry anyway, no? ]

The thing is, nobody should care. You walk the list to fill things in, or
to write it out to some HW-specific DMA table, you should never care about
the length. However, you *do* care about the "where does it end" part: to
be able to detect overflows (which should never happen, but from a
debugging standpoint it needs to be detectable rather than just silently
use or corrupt memory).

But if people really want/need the length, then we damn well should have a
"header" thing, not two independent "list + length" parameters.

Linus

2007-10-25 16:03:45

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Oct. 25, 2007, 17:40 +0200, Linus Torvalds <[email protected]> wrote:
>
> On Thu, 25 Oct 2007, Rusty Russell wrote:
>> On Wednesday 24 October 2007 01:22:55 Linus Torvalds wrote:
>>> Well, I'd personally actually prefer to *not* have the count be passed
>>> down explicitly, because it's just too error prone.
>> Well, the duplication is bad, but walking lists to find the length is
>> inefficient so you pass around the length as well.
>
> Nobody should *ever* walk the list to find the length. Does anybody really
> do that? Yes, we pass the thing down, but do people *need* it?
>
> [ Side note: some of the users of that length currently would seem to be
> buggy in the presense of continuation entries, and seem to assume that
> the "list" is just a contiguous array. In fatc, that's almost the only
> valid use for the "count" thing, since any other use _has_ to walk it
> entry by entry anyway, no? ]
>
> The thing is, nobody should care. You walk the list to fill things in, or
> to write it out to some HW-specific DMA table, you should never care about
> the length. However, you *do* care about the "where does it end" part: to
> be able to detect overflows (which should never happen, but from a
> debugging standpoint it needs to be detectable rather than just silently
> use or corrupt memory).
>
> But if people really want/need the length, then we damn well should have a
> "header" thing, not two independent "list + length" parameters.

There are a number of indicators that need to be kept in sync, depending
on the usage. The number of entries is set when it is allocated and is
currently needed to free it up (note that in the sgtable "sketch" James
proposed we saved the sg pool index in the sgtable header and used it to
free each chunk to the right pool). The number of entries is then used in
many places to scan the list, however, after the sg list is dma mapped, the
dma mapping may be shorter than the original sg list when multiple pages are
coalesced and there we need to defer to use the dma_length (plus the number
of entries) to determine the end of the list.

IMO I think that the byte count can be used authoritatively to scan
the contents of the sg list either before or after dma mapping
while the number of entries is relevant to walking the list in a context free
manner (i.e. to go over all the entries that were allocated)

>
> Linus
> -

2007-10-26 00:03:14

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Thursday 25 October 2007 21:54:44 Rusty Russell wrote:
> On Thursday 25 October 2007 19:11:40 Jens Axboe wrote:
> > On Thu, Oct 25 2007, Rusty Russell wrote:
> > > What irritates me more is that scatterlists aren't quite generically
> > > useful. The virtio code wants to join a scatterlist created by
> > > blk_rq_map_sg() with two others, yet it won't work because sg_chain()
> > > doesn't remove the end marker from the first entry.
> >
> > That's a minor nit for your special purpose, we/you can change that.
>
> Well currently sg_chain() only joins "incomplete" (ie. unterminated) sg
> chains. That works great for you, but it feels more like a special purpose
> to me.
>
> > > If this patch weren't already included, I'd be strongly arguing for the
> > > bio idea: I find the chained sg code tricksy and ugly (sorry Jens).
> >
> > What is the bio idea? A bio works in essentially the same way, the only
> > difference is having a specific next pointer. It's still just a linked
> > lists of arbitrarily sized sg tables (the bio_vec arrays).
>
> It was suggested by analogy earlier in this thread, to use a two-level
> structure.
>
> In this case I would have first renamed struct scatterlist to struct
> scatterelem. Then struct scatterlist looks like:
>
> struct scatterlist {
> unsigned int num;
> struct scatterelem elems[0];
> };

To correct my own thoughts, it'd be better to just put a "struct list_head
list;" in there for chaining. That's more along standard kernel lines, and
neatly handles the single-scatterlist case.

Cheers,
Rusty.

2007-10-26 05:02:18

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

Linus Torvalds writes:

> Nobody should *ever* walk the list to find the length. Does anybody really
> do that? Yes, we pass the thing down, but do people *need* it?

Yes, I need it for devices that use the macintosh DBDMA
(descriptor-based DMA) hardware. The DBDMA hardware reads an array of
descriptors from system RAM, so I need to allocate an array and fill
it in with DBDMA command blocks (and then dma-map it and point the
device at it).

> [ Side note: some of the users of that length currently would seem to be
> buggy in the presense of continuation entries, and seem to assume that
> the "list" is just a contiguous array. In fatc, that's almost the only
> valid use for the "count" thing, since any other use _has_ to walk it
> entry by entry anyway, no? ]

Maybe the drivers for devices that use DBDMA are now buggy. Certainly
filling in the array of DBDMA command blocks involves walking the
list, but it would extremely useful to know how much to allocate
before we start filling them in. So we at least need an upper bound
on the number of "real" entries, even if we don't have the exact
number.

Paul.

2007-10-26 14:54:21

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout



On Fri, 26 Oct 2007, Paul Mackerras wrote:
>
> Linus Torvalds writes:
>
> > Nobody should *ever* walk the list to find the length. Does anybody really
> > do that? Yes, we pass the thing down, but do people *need* it?
>
> Yes, I need it for devices that use the macintosh DBDMA
> (descriptor-based DMA) hardware. The DBDMA hardware reads an array of
> descriptors from system RAM, so I need to allocate an array and fill
> it in with DBDMA command blocks (and then dma-map it and point the
> device at it).

Yes, for allocation purposes you'd need the size ahead of time, agreed.
Otherwise you have to walk the list twice.

> Maybe the drivers for devices that use DBDMA are now buggy. Certainly
> filling in the array of DBDMA command blocks involves walking the
> list, but it would extremely useful to know how much to allocate
> before we start filling them in. So we at least need an upper bound
> on the number of "real" entries, even if we don't have the exact
> number.

Hmm. Depending on where you do this, and if this is some block-layer
specific driver/code (rather than necessarily a generic SG thing), you do
have the req->nr_phys_segments thing which should be that for you (ie the
SG list may have _fewer_ requests in it in case some of those entries got
squashed together due to be contiguous).

But yeah, I don't think it would be wrong at all to have a

struct scatterlist_head {
unsigned int entries;
unsigned int flags; /* ? */
struct scatterlist *sg;
};

which would be passed down at higher levels.

Linus

2007-10-26 17:28:52

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 09/10] Change table chaining layout

On Fri, Oct 26 2007, Linus Torvalds wrote:
>
>
> On Fri, 26 Oct 2007, Paul Mackerras wrote:
> >
> > Linus Torvalds writes:
> >
> > > Nobody should *ever* walk the list to find the length. Does anybody really
> > > do that? Yes, we pass the thing down, but do people *need* it?
> >
> > Yes, I need it for devices that use the macintosh DBDMA
> > (descriptor-based DMA) hardware. The DBDMA hardware reads an array of
> > descriptors from system RAM, so I need to allocate an array and fill
> > it in with DBDMA command blocks (and then dma-map it and point the
> > device at it).
>
> Yes, for allocation purposes you'd need the size ahead of time, agreed.
> Otherwise you have to walk the list twice.

Do you really allocate a fresh table for every command, or just a max
sized one at init?

> > Maybe the drivers for devices that use DBDMA are now buggy. Certainly
> > filling in the array of DBDMA command blocks involves walking the
> > list, but it would extremely useful to know how much to allocate
> > before we start filling them in. So we at least need an upper bound
> > on the number of "real" entries, even if we don't have the exact
> > number.
>
> Hmm. Depending on where you do this, and if this is some block-layer
> specific driver/code (rather than necessarily a generic SG thing), you do
> have the req->nr_phys_segments thing which should be that for you (ie the
> SG list may have _fewer_ requests in it in case some of those entries got
> squashed together due to be contiguous).
>
> But yeah, I don't think it would be wrong at all to have a
>
> struct scatterlist_head {
> unsigned int entries;
> unsigned int flags; /* ? */
> struct scatterlist *sg;
> };
>
> which would be passed down at higher levels.

That'd be fine with me as well, but I really don't think that a lot of
people really do need the sg count when you can just loop over the table
until it returns NULL.

--
Jens Axboe

2007-11-05 06:11:40

by Rusty Russell

[permalink] [raw]
Subject: [RFC PATCH 1/2] sg_ring instead of scatterlist chaining

Hi all,

This patch implements a header for a linked list of scatterlist
arrays, rather than using an extra entry and low pointer bits to chain them
together. I've tested that it's sane for virtio (which uses struct
scatterlist).

Features:
1) Neatens code by including length in structure.
2) Avoids end ambiguity by including maximum length too.
3) Works fine with old "sg is an array" interfaces.
4) Kinda icky for stack declaration, so hence a helper is created.
5) Lacks magic.

I reverted (most of?) the scatterlist chaining changes to create these
patches, so it won't apply to your kernels. The reversion patch isn't
interesting, so I haven't posted it.

Thanks,
Rusty.

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 4efbd9c..ce7e581 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -5,6 +5,51 @@
#include <linux/mm.h>
#include <linux/string.h>

+/**
+ * struct sg_ring - a ring of scatterlists
+ * @list: the list_head chaining them together
+ * @num: the number of valid sg entries
+ * @max: the maximum number of sg entries (size of the sg array).
+ * @sg: the array of scatterlist entries.
+ *
+ * This provides a convenient encapsulation of one or more scatter gather
+ * arrays. */
+struct sg_ring
+{
+ struct list_head list;
+ unsigned int num, max;
+ struct scatterlist sg[0];
+};
+
+/* This helper declares an sg ring on the stack or in a struct. */
+#define DECLARE_SG(name, max) \
+ struct { \
+ struct sg_ring ring; \
+ struct scatterlist sg[max]; \
+ } name
+
+/**
+ * sg_ring_init - initialize a scatterlist ring.
+ * @sg: the sg_ring.
+ * @max: the size of the trailing sg array.
+ *
+ * After initialization sg is alone in the ring. */
+static inline void sg_ring_init(struct sg_ring *sg, unsigned int max)
+{
+ INIT_LIST_HEAD(&sg->list);
+ sg->max = max;
+}
+
+/**
+ * sg_ring_next - next array in a scatterlist ring.
+ * @sg: the sg_ring.
+ *
+ * After initialization sg is alone in the ring. */
+static inline struct sg_ring *sg_ring_next(struct sg_ring *sg)
+{
+ return list_first_entry(&sg->list, struct sg_ring, list);
+}
+
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
unsigned int buflen)
{
@@ -20,4 +65,20 @@ static inline void sg_init_one(struct scatterlist *sg, const void *buf,
sg_set_buf(sg, buf, buflen);
}

+/**
+ * sg_init_single - initialize a one-element scatterlist ring.
+ * @sg: the sg_ring.
+ * @buf: the pointer to the buffer.
+ * @buflen: the length of the buffer.
+ *
+ * Does sg_ring_init and also sets up first (and only) sg element. */
+static inline void sg_init_single(struct sg_ring *sg,
+ const void *buf,
+ unsigned int buflen)
+{
+ sg_ring_init(sg, 1);
+ sg->num = 1;
+ sg_init_one(&sg->sg[0], buf, buflen);
+}
+
#endif /* _LINUX_SCATTERLIST_H */

2007-11-05 06:15:37

by Rusty Russell

[permalink] [raw]
Subject: [RFC PATCH 2/2] sg_ring instead of scatterlist chaining in virtio

Example using virtio.

The interface actually improves because we don't need to hand two lengths to
add_buf (it needs an input and output sg[], so we used to hand one pointer
and a counter of how many were in and how many out, now we can neatly hand
two separate sgs).

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index a901eee..6a9b54d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -23,7 +23,9 @@ struct virtio_blk
mempool_t *pool;

/* Scatterlist: can be too big for stack. */
- struct scatterlist sg[3+MAX_PHYS_SEGMENTS];
+ DECLARE_SG(out, 1);
+ DECLARE_SG(in, 1);
+ DECLARE_SG(sg, MAX_PHYS_SEGMENTS);
};

struct virtblk_req
@@ -69,8 +71,8 @@ static bool blk_done(struct virtqueue *vq)
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
struct request *req)
{
- unsigned long num, out, in;
struct virtblk_req *vbr;
+ struct sg_ring *in;

vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
if (!vbr)
@@ -94,23 +96,24 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
if (blk_barrier_rq(vbr->req))
vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;

- /* We have to zero this, otherwise blk_rq_map_sg gets upset. */
- memset(vblk->sg, 0, sizeof(vblk->sg));
- sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
- num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
- sg_set_buf(&vblk->sg[num+1], &vbr->in_hdr, sizeof(vbr->in_hdr));
+ sg_init_single(&vblk->out.ring, &vbr->out_hdr, sizeof(vbr->out_hdr));
+ sg_ring_init(&vblk->sg.ring, ARRAY_SIZE(vblk->sg.sg));
+ vblk->sg.ring.num = blk_rq_map_sg(q, vbr->req, vblk->sg.sg);
+ sg_init_single(&vblk->in.ring, &vbr->in_hdr, sizeof(vbr->in_hdr));

if (rq_data_dir(vbr->req) == WRITE) {
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
- out = 1 + num;
- in = 1;
+ /* Chain write request onto output buffers. */
+ list_add_tail(&vblk->sg.ring.list, &vblk->out.ring.list);
+ in = &vblk->in.ring;
} else {
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
- out = 1;
- in = 1 + num;
+ /* Chain input (status) buffer at end of read buffers. */
+ list_add_tail(&vblk->in.ring.list, &vblk->sg.ring.list);
+ in = &vblk->sg.ring;
}

- if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
+ if (vblk->vq->vq_ops->add_buf(vblk->vq, &vblk->out.ring, in, vbr)) {
mempool_free(vbr, vblk->pool);
return false;
}
@@ -127,7 +130,7 @@ static void do_virtblk_request(struct request_queue *q)

while ((req = elv_next_request(q)) != NULL) {
vblk = req->rq_disk->private_data;
- BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg));
+ BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg.sg));

/* If this request fails, stop queue and wait for something to
finish to restart it. */
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 100e8a2..e4d90c7 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -54,15 +54,15 @@ static struct hv_ops virtio_cons;
* immediately (lguest's Launcher does). */
static int put_chars(u32 vtermno, const char *buf, int count)
{
- struct scatterlist sg[1];
+ DECLARE_SG(sg, 1);
unsigned int len;

/* This is a convenient routine to initialize a single-elem sg list */
- sg_init_one(sg, buf, count);
+ sg_init_single(&sg.ring, buf, count);

/* add_buf wants a token to identify this buffer: we hand it any
* non-NULL pointer, since there's only ever one buffer. */
- if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
+ if (out_vq->vq_ops->add_buf(out_vq, &sg.ring, NULL, (void *)1) == 0) {
/* Tell Host to go! */
out_vq->vq_ops->kick(out_vq);
/* Chill out until it's done with the buffer. */
@@ -78,11 +78,12 @@ static int put_chars(u32 vtermno, const char *buf, int count)
* queue. */
static void add_inbuf(void)
{
- struct scatterlist sg[1];
- sg_init_one(sg, inbuf, PAGE_SIZE);
+ DECLARE_SG(sg, 1);
+
+ sg_init_single(&sg.ring, inbuf, PAGE_SIZE);

/* We should always be able to add one buffer to an empty queue. */
- if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
+ if (in_vq->vq_ops->add_buf(in_vq, NULL, &sg.ring, inbuf) != 0)
BUG();
in_vq->vq_ops->kick(in_vq);
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e396c9d..2698592 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -143,20 +143,21 @@ drop:
static void try_fill_recv(struct virtnet_info *vi)
{
struct sk_buff *skb;
- struct scatterlist sg[1+MAX_SKB_FRAGS];
- int num, err;
+ DECLARE_SG(sg, 1+MAX_SKB_FRAGS);
+ int err;

+ sg_ring_init(&sg.ring, 1+MAX_SKB_FRAGS);
for (;;) {
skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
if (unlikely(!skb))
break;

skb_put(skb, MAX_PACKET_LEN);
- vnet_hdr_to_sg(sg, skb);
- num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
+ vnet_hdr_to_sg(sg.sg, skb);
+ sg.ring.num = skb_to_sgvec(skb, sg.sg+1, 0, skb->len) + 1;
skb_queue_head(&vi->recv, skb);

- err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
+ err = vi->rvq->vq_ops->add_buf(vi->rvq, NULL, &sg.ring, skb);
if (err) {
skb_unlink(skb, &vi->recv);
kfree_skb(skb);
@@ -225,14 +226,15 @@ static void free_old_xmit_skbs(struct virtnet_info *vi)
static int start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
- int num, err;
- struct scatterlist sg[1+MAX_SKB_FRAGS];
+ int err;
+ DECLARE_SG(sg, 1+MAX_SKB_FRAGS);
struct virtio_net_hdr *hdr;
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
DECLARE_MAC_BUF(mac);

pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest));

+ sg_ring_init(&sg.ring, 1+MAX_SKB_FRAGS);
free_old_xmit_skbs(vi);

/* Encode metadata header at front. */
@@ -263,10 +265,10 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev)
hdr->gso_size = 0;
}

- vnet_hdr_to_sg(sg, skb);
- num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
+ vnet_hdr_to_sg(sg.sg, skb);
+ sg.ring.num = skb_to_sgvec(skb, sg.sg+1, 0, skb->len) + 1;
__skb_queue_head(&vi->send, skb);
- err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
+ err = vi->svq->vq_ops->add_buf(vi->svq, &sg.ring, NULL, skb);
if (err) {
pr_debug("%s: virtio not prepared to send\n", dev->name);
skb_unlink(skb, &vi->send);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index ae53570..d73cd18 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -69,48 +69,68 @@ struct vring_virtqueue

#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)

+static int add_desc(struct vring_virtqueue *vq, unsigned int i,
+ struct scatterlist *sg, unsigned int flags)
+{
+ if (vq->num_free == 0)
+ return -ENOSPC;
+
+ vq->vring.desc[i].flags = VRING_DESC_F_NEXT | flags;
+ vq->vring.desc[i].addr = (page_to_pfn(sg->page)<<PAGE_SHIFT)
+ + sg->offset;
+ vq->vring.desc[i].len = sg->length;
+ vq->num_free--;
+ return vq->vring.desc[i].next;
+}
+
static int vring_add_buf(struct virtqueue *_vq,
- struct scatterlist sg[],
- unsigned int out,
- unsigned int in,
+ struct sg_ring *out,
+ struct sg_ring *in,
void *data)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- unsigned int i, avail, head, uninitialized_var(prev);
+ unsigned int j, avail, head, free, uninitialized_var(prev);
+ int i;
+ struct sg_ring empty_sg, *start;

BUG_ON(data == NULL);
- BUG_ON(out + in > vq->vring.num);
- BUG_ON(out + in == 0);
+
+ sg_ring_init(&empty_sg, 0);
+ empty_sg.num = 0;
+ if (!out)
+ out = &empty_sg;
+ if (!in)
+ in = &empty_sg;
+
+ BUG_ON(in->num == 0 && out->num == 0);

START_USE(vq);

- if (vq->num_free < out + in) {
- pr_debug("Can't add buf len %i - avail = %i\n",
- out + in, vq->num_free);
- END_USE(vq);
- return -ENOSPC;
- }
+ i = head = vq->free_head;
+ free = vq->num_free;
+
+ /* Lay out the output buffers first. */
+ start = out;
+ do {
+ for (j = 0; j < out->num; j++) {
+ prev = i;
+ i = add_desc(vq, i, &out->sg[j], 0);
+ if (unlikely(i < 0))
+ goto full;
+ }
+ } while ((out = sg_ring_next(out)) != start);
+
+ /* Lay out the input buffers next. */
+ start = in;
+ do {
+ for (j = 0; j < in->num; j++) {
+ prev = i;
+ i = add_desc(vq, i, &in->sg[j], VRING_DESC_F_WRITE);
+ if (unlikely(i < 0))
+ goto full;
+ }
+ } while ((in = sg_ring_next(in)) != start);

- /* We're about to use some buffers from the free list. */
- vq->num_free -= out + in;
-
- head = vq->free_head;
- for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
- vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
- vq->vring.desc[i].addr = (page_to_pfn(sg->page)<<PAGE_SHIFT)
- + sg->offset;
- vq->vring.desc[i].len = sg->length;
- prev = i;
- sg++;
- }
- for (; in; i = vq->vring.desc[i].next, in--) {
- vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
- vq->vring.desc[i].addr = (page_to_pfn(sg->page)<<PAGE_SHIFT)
- + sg->offset;
- vq->vring.desc[i].len = sg->length;
- prev = i;
- sg++;
- }
/* Last one doesn't continue. */
vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;

@@ -128,6 +148,12 @@ static int vring_add_buf(struct virtqueue *_vq,
pr_debug("Added buffer head %i to %p\n", head, vq);
END_USE(vq);
return 0;
+
+full:
+ pr_debug("Buffer needed more than %u on %p\n", free, vq);
+ vq->num_free = free;
+ END_USE(vq);
+ return i;
}

static void vring_kick(struct virtqueue *_vq)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 14e1379..cee525f 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -29,9 +29,8 @@ struct virtqueue
* virtqueue_ops - operations for virtqueue abstraction layer
* @add_buf: expose buffer to other end
* vq: the struct virtqueue we're talking about.
- * sg: the description of the buffer(s).
- * out_num: the number of sg readable by other side
- * in_num: the number of sg which are writable (after readable ones)
+ * out: the scatter gather elements readable by other side (can be NULL)
+ * in: the scatter gather elements which are writable (can be NULL)
* data: the token identifying the buffer.
* Returns 0 or an error.
* @kick: update after add_buf
@@ -56,9 +55,8 @@ struct virtqueue
*/
struct virtqueue_ops {
int (*add_buf)(struct virtqueue *vq,
- struct scatterlist sg[],
- unsigned int out_num,
- unsigned int in_num,
+ struct sg_ring *out,
+ struct sg_ring *in,
void *data);

void (*kick)(struct virtqueue *vq);

2007-11-05 16:46:31

by Randy Dunlap

[permalink] [raw]
Subject: Re: [RFC PATCH 1/2] sg_ring instead of scatterlist chaining

On Mon, 5 Nov 2007 17:11:55 +1100 Rusty Russell wrote:

> Hi all,
>
> This patch implements a header for a linked list of scatterlist
> arrays, rather than using an extra entry and low pointer bits to chain them
> together. I've tested that it's sane for virtio (which uses struct
> scatterlist).
>
> Features:
> 1) Neatens code by including length in structure.
> 2) Avoids end ambiguity by including maximum length too.
> 3) Works fine with old "sg is an array" interfaces.
> 4) Kinda icky for stack declaration, so hence a helper is created.
> 5) Lacks magic.
>
> I reverted (most of?) the scatterlist chaining changes to create these
> patches, so it won't apply to your kernels. The reversion patch isn't
> interesting, so I haven't posted it.
>
> Thanks,
> Rusty.
>
> diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
> index 4efbd9c..ce7e581 100644
> --- a/include/linux/scatterlist.h
> +++ b/include/linux/scatterlist.h
> @@ -5,6 +5,51 @@
> #include <linux/mm.h>
> #include <linux/string.h>
>
> +/**
> + * struct sg_ring - a ring of scatterlists
> + * @list: the list_head chaining them together
> + * @num: the number of valid sg entries
> + * @max: the maximum number of sg entries (size of the sg array).
> + * @sg: the array of scatterlist entries.
> + *
> + * This provides a convenient encapsulation of one or more scatter gather
> + * arrays. */

Hi Rusty,

I don't know where these patches are going, but please put the
trailing */ in all of these kernel-doc descriptions (nice ones :)
on a separate line. Thanks.

> +struct sg_ring
> +{
> + struct list_head list;
> + unsigned int num, max;
> + struct scatterlist sg[0];
> +};
> +
> +/* This helper declares an sg ring on the stack or in a struct. */
> +#define DECLARE_SG(name, max) \
> + struct { \
> + struct sg_ring ring; \
> + struct scatterlist sg[max]; \
> + } name
> +
> +/**
> + * sg_ring_init - initialize a scatterlist ring.
> + * @sg: the sg_ring.
> + * @max: the size of the trailing sg array.
> + *
> + * After initialization sg is alone in the ring. */
> +static inline void sg_ring_init(struct sg_ring *sg, unsigned int max)
> +{
> + INIT_LIST_HEAD(&sg->list);
> + sg->max = max;
> +}
> +
> +/**
> + * sg_ring_next - next array in a scatterlist ring.
> + * @sg: the sg_ring.
> + *
> + * After initialization sg is alone in the ring. */
> +static inline struct sg_ring *sg_ring_next(struct sg_ring *sg)
> +{
> + return list_first_entry(&sg->list, struct sg_ring, list);
> +}
> +
> static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
> unsigned int buflen)
> {
> @@ -20,4 +65,20 @@ static inline void sg_init_one(struct scatterlist *sg, const void *buf,
> sg_set_buf(sg, buf, buflen);
> }
>
> +/**
> + * sg_init_single - initialize a one-element scatterlist ring.
> + * @sg: the sg_ring.
> + * @buf: the pointer to the buffer.
> + * @buflen: the length of the buffer.
> + *
> + * Does sg_ring_init and also sets up first (and only) sg element. */
> +static inline void sg_init_single(struct sg_ring *sg,
> + const void *buf,
> + unsigned int buflen)
> +{
> + sg_ring_init(sg, 1);
> + sg->num = 1;
> + sg_init_one(&sg->sg[0], buf, buflen);
> +}
> +
> #endif /* _LINUX_SCATTERLIST_H */

---
~Randy