2007-06-20 00:13:37

by Duane Griffin

[permalink] [raw]
Subject: [patch 1/2] HFS+: Refactor ASCII to unicode conversion routine for later reuse

Refactor existing HFS+ ASCII to unicode string conversion routine to
split out character conversion functionality. This will be reused by
the custom dentry hash and comparison routines. This approach avoids
unnecessary memory allocation compared to using the string conversion
routine directly in the new functions.

Signed-off-by: Duane Griffin <[email protected]>
---

--- linux-2.6.21.orig/fs/hfsplus/unicode.c
+++ linux-2.6.21/fs/hfsplus/unicode.c
@@ -239,58 +239,97 @@ out:
return res;
}

-int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, const char *astr, int len)
+/*
+ * Convert one or more ASCII characters into a single unicode character.
+ * Returns the number of ASCII characters corresponding to the unicode char.
+ */
+static
+int asc2uc(struct super_block *sb, const char *astr, int len, wchar_t *uc)
{
- struct nls_table *nls = HFSPLUS_SB(sb).nls;
- int size, off, decompose;
- wchar_t c;
- u16 outlen = 0;
+ int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc);
+ if (size <= 0) {
+ *uc = '?';
+ size = 1;
+ }
+ switch (*uc) {
+ case 0x2400:
+ *uc = 0;
+ break;
+ case ':':
+ *uc = '/';
+ break;
+ }
+ return size;
+}
+
+struct decomposed_uc {
+ int size;
+ wchar_t str[3];
+};
+
+/* Decomposes a single unicode character. */
+static void decompose_uc(wchar_t uc, struct decomposed_uc *duc)
+{
+ int off, ii;

- decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
+ duc->size = 1;
+ duc->str[0] = uc;
+
+ off = hfsplus_decompose_table[(uc >> 12) & 0xf];
+ if (off == 0 || off == 0xffff)
+ return;
+
+ off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
+ if (!off)
+ return;
+
+ off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
+ if (!off)
+ return;
+
+ off = hfsplus_decompose_table[off + (uc & 0xf)];
+
+ duc->size = off & 3;
+ off /= 4;
+ for (ii = 0; ii < duc->size; ++ii)
+ duc->str[ii] = hfsplus_decompose_table[off++];
+}
+
+/*
+ * Convert a ASCII character(s) to a unicode character.
+ * The unicode character will be decomposed, if required.
+ * Returns the number of ASCII characters converted.
+ */
+static int asc2ucd(struct super_block *sb,
+ const char *astr, int len, struct decomposed_uc *duc)
+{
+ int size = asc2uc(sb, astr, len, duc->str);
+ bool decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
+
+ if (decompose && *duc->str >= 0xc0)
+ decompose_uc(*duc->str, duc);
+ else
+ duc->size = 1;
+
+ return size;
+}
+
+int hfsplus_asc2uni(struct super_block *sb,
+ struct hfsplus_unistr *ustr, const char *astr, int len)
+{
+ u16 outlen = 0;
+ struct decomposed_uc duc;

while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
- size = nls->char2uni(astr, len, &c);
- if (size <= 0) {
- c = '?';
- size = 1;
- }
+ int ii;
+ int size = asc2ucd(sb, astr, len, &duc);
+ if (outlen + duc.size > HFSPLUS_MAX_STRLEN)
+ break;
+
astr += size;
len -= size;
- switch (c) {
- case 0x2400:
- c = 0;
- break;
- case ':':
- c = '/';
- break;
- }
- if (c >= 0xc0 && decompose) {
- off = hfsplus_decompose_table[(c >> 12) & 0xf];
- if (!off)
- goto done;
- if (off == 0xffff) {
- goto done;
- }
- off = hfsplus_decompose_table[off + ((c >> 8) & 0xf)];
- if (!off)
- goto done;
- off = hfsplus_decompose_table[off + ((c >> 4) & 0xf)];
- if (!off)
- goto done;
- off = hfsplus_decompose_table[off + (c & 0xf)];
- size = off & 3;
- if (!size)
- goto done;
- off /= 4;
- if (outlen + size > HFSPLUS_MAX_STRLEN)
- break;
- do {
- ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]);
- } while (--size > 0);
- continue;
- }
- done:
- ustr->unicode[outlen++] = cpu_to_be16(c);
+ for (ii = 0; ii < duc.size; ++ii)
+ ustr->unicode[outlen++] = cpu_to_be16(duc.str[ii]);
}
ustr->length = cpu_to_be16(outlen);
if (len > 0)

--
"I never could learn to drink that blood and call it wine" - Bob Dylan


2007-06-25 12:11:49

by Roman Zippel

[permalink] [raw]
Subject: Re: [patch 1/2] HFS+: Refactor ASCII to unicode conversion routine for later reuse

Hi,

On Wed, 20 Jun 2007, Duane Griffin wrote:

> Refactor existing HFS+ ASCII to unicode string conversion routine to
> split out character conversion functionality. This will be reused by
> the custom dentry hash and comparison routines. This approach avoids
> unnecessary memory allocation compared to using the string conversion
> routine directly in the new functions.

I like the idea of this, but not that it generates larger code, so I
reformatted it a little to get rid of the decomposed_uc struct which
required an unnecessary data copy, so now the it even generates slightly
smaller code. :)

bye, Roman


From: Duane Griffin <[email protected]>

Refactor existing HFS+ ASCII to unicode string conversion routine to
split out character conversion functionality. This will be reused by
the custom dentry hash and comparison routines. This approach avoids
unnecessary memory allocation compared to using the string conversion
routine directly in the new functions.

Signed-off-by: Duane Griffin <[email protected]>
Signed-off-by: Roman Zippel <[email protected]>
---
fs/hfsplus/unicode.c | 105 +++++++++++++++++++++++++++++----------------------
1 file changed, 60 insertions(+), 45 deletions(-)

Index: linux-2.6/fs/hfsplus/unicode.c
===================================================================
--- linux-2.6.orig/fs/hfsplus/unicode.c
+++ linux-2.6/fs/hfsplus/unicode.c
@@ -239,58 +239,73 @@ out:
return res;
}

-int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, const char *astr, int len)
+/*
+ * Convert one or more ASCII characters into a single unicode character.
+ * Returns the number of ASCII characters corresponding to the unicode char.
+ */
+static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
+ wchar_t *uc)
{
- struct nls_table *nls = HFSPLUS_SB(sb).nls;
- int size, off, decompose;
+ int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc);
+ if (size <= 0) {
+ *uc = '?';
+ size = 1;
+ }
+ switch (*uc) {
+ case 0x2400:
+ *uc = 0;
+ break;
+ case ':':
+ *uc = '/';
+ break;
+ }
+ return size;
+}
+
+/* Decomposes a single unicode character. */
+static inline u16 *decompose_unichar(wchar_t uc, int *size)
+{
+ int off;
+
+ off = hfsplus_decompose_table[(uc >> 12) & 0xf];
+ if (off == 0 || off == 0xffff)
+ return NULL;
+
+ off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
+ if (!off)
+ return NULL;
+
+ off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
+ if (!off)
+ return NULL;
+
+ off = hfsplus_decompose_table[off + (uc & 0xf)];
+ *size = off & 3;
+ return hfsplus_decompose_table + (off / 4);
+}
+
+int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
+ const char *astr, int len)
+{
+ int size, dsize, decompose;
+ u16 *dstr, outlen = 0;
wchar_t c;
- u16 outlen = 0;

decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
-
while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
- size = nls->char2uni(astr, len, &c);
- if (size <= 0) {
- c = '?';
- size = 1;
- }
- astr += size;
- len -= size;
- switch (c) {
- case 0x2400:
- c = 0;
- break;
- case ':':
- c = '/';
- break;
- }
- if (c >= 0xc0 && decompose) {
- off = hfsplus_decompose_table[(c >> 12) & 0xf];
- if (!off)
- goto done;
- if (off == 0xffff) {
- goto done;
- }
- off = hfsplus_decompose_table[off + ((c >> 8) & 0xf)];
- if (!off)
- goto done;
- off = hfsplus_decompose_table[off + ((c >> 4) & 0xf)];
- if (!off)
- goto done;
- off = hfsplus_decompose_table[off + (c & 0xf)];
- size = off & 3;
- if (!size)
- goto done;
- off /= 4;
- if (outlen + size > HFSPLUS_MAX_STRLEN)
+ size = asc2unichar(sb, astr, len, &c);
+
+ if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+ if (outlen + dsize > HFSPLUS_MAX_STRLEN)
break;
do {
- ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]);
- } while (--size > 0);
- continue;
- }
- done:
- ustr->unicode[outlen++] = cpu_to_be16(c);
+ ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
+ } while (--dsize > 0);
+ } else
+ ustr->unicode[outlen++] = cpu_to_be16(c);
+
+ astr += size;
+ len -= size;
}
ustr->length = cpu_to_be16(outlen);
if (len > 0)

2007-06-26 10:03:19

by Duane Griffin

[permalink] [raw]
Subject: Re: [patch 1/2] HFS+: Refactor ASCII to unicode conversion routine for later reuse

On 25/06/07, Roman Zippel <[email protected]> wrote:
>
> I like the idea of this, but not that it generates larger code, so I
> reformatted it a little to get rid of the decomposed_uc struct which
> required an unnecessary data copy, so now the it even generates slightly
> smaller code. :)

Ah, nice one! Returning the pointer into the table is a much nicer way
of doing things.

> + if (decompose && (dstr = decompose_unichar(c, &dsize))) {
> + if (outlen + dsize > HFSPLUS_MAX_STRLEN)
> break;
> do {
> - ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]);
> - } while (--size > 0);
> - continue;
> - }
> - done:
> - ustr->unicode[outlen++] = cpu_to_be16(c);
> + ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
> + } while (--dsize > 0);

Andrew's comments about the loop in the second patch apply here too, I
think. The original code did have a check for this condition, so I
guess it is a potential problem. How about this (on top of your
version of the patches):

Index: linux-2.6.21.5/fs/hfsplus/unicode.c
===================================================================
--- linux-2.6.21.5.orig/fs/hfsplus/unicode.c
+++ linux-2.6.21.5/fs/hfsplus/unicode.c
@@ -280,7 +280,9 @@ static inline u16 *decompose_unichar(wch
return NULL;

off = hfsplus_decompose_table[off + (uc & 0xf)];
- *size = off & 3;
+ if ((*size = off & 3) == 0)
+ return NULL;
+
return hfsplus_decompose_table + (off / 4);
}

I'm using gmail to send this, so it will mess up the tabs I'm afraid :(

> bye, Roman

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan