2013-04-11 08:47:26

by Tomasz Stanislawski

[permalink] [raw]
Subject: [RFC] security: smack: add hash table for smack for quick label searching

Hi everyone,
I am a developer working on optimization of the TIZEN system.
Recently, I've discovered a performance issue in SMACK subsystem.
I used the PERF tool to find performance bottlenecks.

The test scenario was simple. Run multiple applications and
see what the system does using the following command:

perf record -a -g

Next, see the results with the command:

perf report -s symbol -g graph,0.5

Among the many lines, the following ones are especially interesting:

5.96% [k] smk_find_entry
|
|--5.06%-- smk_access
| |
| --4.99%-- smk_curacc
| |
| |--3.79%-- smack_ptrace_access_check
| | security_ptrace_access_check
| | __ptrace_may_access
| | ptrace_may_access
| | |
| | --3.78%-- mm_access
| | mm_for_maps
| | m_start
| | seq_read
| | vfs_read
| | sys_read
| | ret_fast_syscall
| | |
| | --3.19%-- (nil)
| |
| --0.71%-- smack_inode_permission
| security_inode_permission
| inode_permission
|
--0.89%-- smack_to_secid
smack_socket_getpeersec_dgram
security_socket_getpeersec_dgram
|
--0.54%-- unix_stream_sendmsg

4.63% [k] strcmp
|
|--2.16%-- smk_find_entry
| |
| --1.92%-- smk_access
| |
| --1.85%-- smk_curacc
| |
| --1.20%-- smack_ptrace_access_check
| security_ptrace_access_check
| __ptrace_may_access
| ptrace_may_access
| mm_access
| mm_for_maps
| m_start
| seq_read
| vfs_read
| sys_read
| ret_fast_syscall
| |
| --0.99%-- (nil)
|
--2.14%-- smk_access
|
--2.11%-- smk_curacc
|
--1.75%-- smack_ptrace_access_check
security_ptrace_access_check
__ptrace_may_access
ptrace_may_access
|
--1.73%-- mm_access
mm_for_maps
m_start
seq_read
vfs_read
sys_read
ret_fast_syscall
|
--1.40%-- (nil)

To sum up, the result indicates that the CPU spents circa 8% (2.16% + 5.96%)
of cycles searching for a SMACK label in the smk_find_entry function.
The function iterates through smack_known_list to find an entry.
The further analysis showed that the size of the list can reach even 600.
I measured that it takes circa 200 tries to find an entry on average.
The value was computed as a total number iterations in the smk_find_entry's
loop divided by the times smk_find_entry was called in a time-window of
the length of 10 seconds.

IMO, this is a serious performance issue which scales badly with
a complexity of the system.

I implemented a patch that makes a use of a hash table to quicken searching
for SMACK's labels. The patch is rebased onto the latest v3.9-rc6 kernel.
The code is thread-safe (I hope) because it shares the RCU mechanism
and locks with smack_known_list.

There is still some place for improvements like:
a) using struct hlist_head instead of struct list_head to reduce
the memory size of the hash table.

OR

b) use smack_known::list instead of introducing smack_known::htab_list
and modify all smack_known_list related code to iterate over
the hash table.

I decided to postpone the mentioned improvements for a sake of simplicity
of this RFC. After applying the patch, the smk_find_entry overhead was
reduced to mere 0.05% of CPU cycles.

I hope you find the measurement and the patch useful.
All comments are welcome.

Regards,
Tomasz Stanislawski


Tomasz Stanislawski (1):
security: smack: add hash table for smack for quick label searching

security/smack/smack.h | 5 +++++
security/smack/smack_access.c | 33 +++++++++++++++++++++++++++++++--
security/smack/smack_lsm.c | 21 +++++++++++++++------
3 files changed, 51 insertions(+), 8 deletions(-)

--
1.7.9.5


2013-04-11 08:47:40

by Tomasz Stanislawski

[permalink] [raw]
Subject: [RFC] security: smack: add hash table for smack for quick label searching

This patch adds a hash table to quicken searching of a smack label by its name.

For a typical idle for TIZEN the CPU wastes circa 5-10% of its cycles for
processing the smk_find_entry function. This patch adds a hash map that should
speed up searches by a factor of up to 500 at the cost of additional 4kiB
memory for the hash table.

Signed-off-by: Tomasz Stanislawski <[email protected]>
---
security/smack/smack.h | 5 +++++
security/smack/smack_access.c | 33 +++++++++++++++++++++++++++++++--
security/smack/smack_lsm.c | 21 +++++++++++++++------
3 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/security/smack/smack.h b/security/smack/smack.h
index 99b3612..8df667e 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -118,6 +118,7 @@ struct smk_netlbladdr {
*/
struct smack_known {
struct list_head list;
+ struct list_head htab_list;
char *smk_known;
u32 smk_secid;
struct netlbl_lsm_secattr smk_netlabel; /* on wire labels */
@@ -215,6 +216,7 @@ char *smk_parse_smack(const char *string, int len);
int smk_netlbl_mls(int, char *, struct netlbl_lsm_secattr *, int);
char *smk_import(const char *, int);
struct smack_known *smk_import_entry(const char *, int);
+void __smk_insert_entry(struct smack_known *skp);
struct smack_known *smk_find_entry(const char *);
u32 smack_to_secid(const char *);

@@ -238,6 +240,9 @@ extern struct mutex smack_known_lock;
extern struct list_head smack_known_list;
extern struct list_head smk_netlbladdr_list;

+#define SMACK_KNOWN_SIZE 512
+extern struct list_head smack_known_htab[SMACK_KNOWN_SIZE];
+
extern struct security_operations smack_ops;

/*
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index db14689..0490c0d 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -48,6 +48,8 @@ struct smack_known smack_known_web = {

LIST_HEAD(smack_known_list);

+struct list_head smack_known_htab[SMACK_KNOWN_SIZE];
+
/*
* The initial value needs to be bigger than any of the
* known values above.
@@ -322,6 +324,30 @@ void smack_log(char *subject_label, char *object_label, int request,

DEFINE_MUTEX(smack_known_lock);

+/* simple Dan Bernstein string hashing function */
+static u32 strhash(const char *s)
+{
+ u32 hash = 5381;
+ for (; *s; ++s)
+ hash = hash * 33 + *s;
+ return hash;
+}
+
+/**
+ * smk_insert_entry - insert a smack label into a hash map,
+ *
+ * this function must be called under smack_known_lock
+ */
+void __smk_insert_entry(struct smack_known *skp)
+{
+ u32 hash = strhash(skp->smk_known);
+ struct list_head *head = &smack_known_htab[
+ hash & (SMACK_KNOWN_SIZE - 1)];
+
+ list_add_rcu(&skp->htab_list, head);
+ list_add_rcu(&skp->list, &smack_known_list);
+}
+
/**
* smk_find_entry - find a label on the list, return the list entry
* @string: a text string that might be a Smack label
@@ -331,9 +357,12 @@ DEFINE_MUTEX(smack_known_lock);
*/
struct smack_known *smk_find_entry(const char *string)
{
+ u32 hash = strhash(string);
+ struct list_head *head = &smack_known_htab[
+ hash & (SMACK_KNOWN_SIZE - 1)];
struct smack_known *skp;

- list_for_each_entry_rcu(skp, &smack_known_list, list) {
+ list_for_each_entry_rcu(skp, head, htab_list) {
if (strcmp(skp->smk_known, string) == 0)
return skp;
}
@@ -470,7 +499,7 @@ struct smack_known *smk_import_entry(const char *string, int len)
* Make sure that the entry is actually
* filled before putting it on the list.
*/
- list_add_rcu(&skp->list, &smack_known_list);
+ __smk_insert_entry(skp);
goto unlockout;
}
/*
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index fa64740..38f5884 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -3535,6 +3535,8 @@ struct security_operations smack_ops = {

static __init void init_smack_known_list(void)
{
+ int i;
+
/*
* Initialize rule list locks
*/
@@ -3553,15 +3555,22 @@ static __init void init_smack_known_list(void)
INIT_LIST_HEAD(&smack_known_floor.smk_rules);
INIT_LIST_HEAD(&smack_known_invalid.smk_rules);
INIT_LIST_HEAD(&smack_known_web.smk_rules);
+
+ /*
+ * Initialize a hash map for a quick search of SMACK labels
+ */
+ for (i = 0; i < SMACK_KNOWN_SIZE; ++i)
+ INIT_LIST_HEAD(&smack_known_htab[i]);
+
/*
* Create the known labels list
*/
- list_add(&smack_known_huh.list, &smack_known_list);
- list_add(&smack_known_hat.list, &smack_known_list);
- list_add(&smack_known_star.list, &smack_known_list);
- list_add(&smack_known_floor.list, &smack_known_list);
- list_add(&smack_known_invalid.list, &smack_known_list);
- list_add(&smack_known_web.list, &smack_known_list);
+ __smk_insert_entry(&smack_known_huh);
+ __smk_insert_entry(&smack_known_hat);
+ __smk_insert_entry(&smack_known_star);
+ __smk_insert_entry(&smack_known_floor);
+ __smk_insert_entry(&smack_known_invalid);
+ __smk_insert_entry(&smack_known_web);
}

/**
--
1.7.9.5

2013-04-11 18:04:57

by Casey Schaufler

[permalink] [raw]
Subject: Re: [RFC] security: smack: add hash table for smack for quick label searching

On 4/11/2013 1:46 AM, Tomasz Stanislawski wrote:
> Hi everyone,
> I am a developer working on optimization of the TIZEN system.
> Recently, I've discovered a performance issue in SMACK subsystem.
> I used the PERF tool to find performance bottlenecks.
>
> The test scenario was simple. Run multiple applications and
> see what the system does using the following command:
>
> perf record -a -g
>
> Next, see the results with the command:
>
> perf report -s symbol -g graph,0.5
>
> Among the many lines, the following ones are especially interesting:
>
> 5.96% [k] smk_find_entry
> |
> |--5.06%-- smk_access
> | |
> | --4.99%-- smk_curacc
> | |
> | |--3.79%-- smack_ptrace_access_check
> | | security_ptrace_access_check
> | | __ptrace_may_access
> | | ptrace_may_access
> | | |
> | | --3.78%-- mm_access
> | | mm_for_maps
> | | m_start
> | | seq_read
> | | vfs_read
> | | sys_read
> | | ret_fast_syscall
> | | |
> | | --3.19%-- (nil)
> | |
> | --0.71%-- smack_inode_permission
> | security_inode_permission
> | inode_permission
> |
> --0.89%-- smack_to_secid
> smack_socket_getpeersec_dgram
> security_socket_getpeersec_dgram
> |
> --0.54%-- unix_stream_sendmsg
>
> 4.63% [k] strcmp
> |
> |--2.16%-- smk_find_entry
> | |
> | --1.92%-- smk_access
> | |
> | --1.85%-- smk_curacc
> | |
> | --1.20%-- smack_ptrace_access_check
> | security_ptrace_access_check
> | __ptrace_may_access
> | ptrace_may_access
> | mm_access
> | mm_for_maps
> | m_start
> | seq_read
> | vfs_read
> | sys_read
> | ret_fast_syscall
> | |
> | --0.99%-- (nil)
> |
> --2.14%-- smk_access
> |
> --2.11%-- smk_curacc
> |
> --1.75%-- smack_ptrace_access_check
> security_ptrace_access_check
> __ptrace_may_access
> ptrace_may_access
> |
> --1.73%-- mm_access
> mm_for_maps
> m_start
> seq_read
> vfs_read
> sys_read
> ret_fast_syscall
> |
> --1.40%-- (nil)
>
> To sum up, the result indicates that the CPU spents circa 8% (2.16% + 5.96%)
> of cycles searching for a SMACK label in the smk_find_entry function.
> The function iterates through smack_known_list to find an entry.
> The further analysis showed that the size of the list can reach even 600.
> I measured that it takes circa 200 tries to find an entry on average.
> The value was computed as a total number iterations in the smk_find_entry's
> loop divided by the times smk_find_entry was called in a time-window of
> the length of 10 seconds.
>
> IMO, this is a serious performance issue which scales badly with
> a complexity of the system.
>
> I implemented a patch that makes a use of a hash table to quicken searching
> for SMACK's labels. The patch is rebased onto the latest v3.9-rc6 kernel.
> The code is thread-safe (I hope) because it shares the RCU mechanism
> and locks with smack_known_list.
>
> There is still some place for improvements like:
> a) using struct hlist_head instead of struct list_head to reduce
> the memory size of the hash table.
>
> OR
>
> b) use smack_known::list instead of introducing smack_known::htab_list
> and modify all smack_known_list related code to iterate over
> the hash table.
>
> I decided to postpone the mentioned improvements for a sake of simplicity
> of this RFC. After applying the patch, the smk_find_entry overhead was
> reduced to mere 0.05% of CPU cycles.
>
> I hope you find the measurement and the patch useful.
> All comments are welcome.

NAK

There will be no hash tables in Smack.

The correct solution is simple.

In the task_smack structure there are two Smack label pointers,
smk_task and smk_forked. Replace these fields with pointers to
the smack_known structures that contain the Smack label pointers
used today. This will require trivial changes throughout the
Smack code to accommodate the type change and a few logical twists
around smk_import. It will eliminate the need for smk_lookup_entry.


>
> Regards,
> Tomasz Stanislawski
>
>
> Tomasz Stanislawski (1):
> security: smack: add hash table for smack for quick label searching
>
> security/smack/smack.h | 5 +++++
> security/smack/smack_access.c | 33 +++++++++++++++++++++++++++++++--
> security/smack/smack_lsm.c | 21 +++++++++++++++------
> 3 files changed, 51 insertions(+), 8 deletions(-)
>

2013-04-12 15:12:16

by Łukasz Stelmach

[permalink] [raw]
Subject: Re: [RFC] security: smack: add hash table for smack for quick label searching

It was <2013-04-11 czw 19:59>, when Casey Schaufler wrote:
> On 4/11/2013 1:46 AM, Tomasz Stanislawski wrote:
>> Hi everyone,
>> I am a developer working on optimization of the TIZEN system.
>> Recently, I've discovered a performance issue in SMACK subsystem.
>> I used the PERF tool to find performance bottlenecks.
>>
>> The test scenario was simple. Run multiple applications and
>> see what the system does using the following command:
>>
>> perf record -a -g
>>
>> Next, see the results with the command:
>>
>> perf report -s symbol -g graph,0.5
>>
>> Among the many lines, the following ones are especially interesting:
>>
>> 5.96% [k] smk_find_entry
>> |
>> |--5.06%-- smk_access
>> | |
>> | --4.99%-- smk_curacc
>> | |
>> | |--3.79%-- smack_ptrace_access_check

[...]

>>
>> To sum up, the result indicates that the CPU spents circa 8% (2.16% + 5.96%)
>> of cycles searching for a SMACK label in the smk_find_entry function.
>> The function iterates through smack_known_list to find an entry.
>> The further analysis showed that the size of the list can reach even 600.
>> I measured that it takes circa 200 tries to find an entry on average.
>> The value was computed as a total number iterations in the smk_find_entry's
>> loop divided by the times smk_find_entry was called in a time-window of
>> the length of 10 seconds.
>>
>> IMO, this is a serious performance issue which scales badly with
>> a complexity of the system.
>>
>> I implemented a patch that makes a use of a hash table to quicken searching
>> for SMACK's labels. The patch is rebased onto the latest v3.9-rc6 kernel.
>> The code is thread-safe (I hope) because it shares the RCU mechanism
>> and locks with smack_known_list.

[...]

>>
>> I hope you find the measurement and the patch useful.
>> All comments are welcome.
>
> NAK
>
> There will be no hash tables in Smack.
>
> The correct solution is simple.
>
> In the task_smack structure there are two Smack label pointers,
> smk_task and smk_forked. Replace these fields with pointers to
> the smack_known structures that contain the Smack label pointers
> used today. This will require trivial changes throughout the
> Smack code to accommodate the type change and a few logical twists
> around smk_import. It will eliminate the need for smk_lookup_entry.

Allow me to join the conversation with an interesting observation.
I don't know whether hash table is a good solution to the problem Tomasz
has mentioned it definitely improves performance during loading rules.

Conditions:

--8<---------------cut here---------------start------------->8---
# number of subjects
sh-4.1# cat /etc/smack/accesses.d/* | cut -d\ -f1 | sort -u | wc -l
379
# number of rules
sh-4.1# cat /etc/smack/accesses.d/* | wc -l
23895
--8<---------------cut here---------------end--------------->8---

Without the patch

--8<---------------cut here---------------start------------->8---
sh-4.1# time smackctl apply

real 0m1.255s
user 0m0.115s
sys 0m0.895s
--8<---------------cut here---------------end--------------->8---

perfs output is:

--8<---------------cut here---------------start------------->8---
# ========
# captured on: Tue Jan 1 09:52:14 2013
# os release : 3.4.5
# arch : armv7l
# nrcpus online : 4
# nrcpus avail : 4
# cpudesc : ARMv7 Processor rev 0 (v7l)
# total memory : 1025456 kB
# cmdline : /perf record -a -g -- /usr/bin/smackctl apply
# event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 45, 46, 47, 48 }
# ========
#
# Events: 1K cycles
#
# Overhead Symbol
# ........ ..................................
#
47.49% [k] strcmp
|
|--41.27%-- smk_find_entry
| |
| |--28.87%-- smk_import_entry
| | smk_import
| | smk_fill_rule
| | smk_parse_long_rule
| | smk_write_rules_list.clone.3
| | smk_write_load2
| | vfs_write
| | sys_write
| | ret_fast_syscall
| |
| --12.41%-- smk_write_rules_list.clone.3
| smk_write_load2
| vfs_write
| sys_write
| ret_fast_syscall
|
|--4.41%-- smk_import_entry
| smk_import
| smk_fill_rule
| smk_parse_long_rule
| smk_write_rules_list.clone.3
| smk_write_load2
| vfs_write
| sys_write
| ret_fast_syscall
|
--1.80%-- smk_write_rules_list.clone.3
smk_write_load2
vfs_write
sys_write
ret_fast_syscall
19.21% [k] smk_find_entry
|
|--12.44%-- smk_import_entry
| smk_import
| smk_fill_rule
| smk_parse_long_rule
| smk_write_rules_list.clone.3
| smk_write_load2
| vfs_write
| sys_write
| ret_fast_syscall
|
--6.78%-- smk_write_rules_list.clone.3
smk_write_load2
vfs_write
sys_write
ret_fast_syscall

[ less than 10% per entry ]

--8<---------------cut here---------------end--------------->8---

With the patch

--8<---------------cut here---------------start------------->8---
sh-4.1# time smackctl apply

real 0m0.349s
user 0m0.095s
sys 0m0.250s
--8<---------------cut here---------------end--------------->8---

still quite long but 3.59598853868 times better.

--8<---------------cut here---------------start------------->8---
# ========
# captured on: Tue Jan 1 10:40:45 2013
# os release : 3.4.5
# arch : armv7l
# nrcpus online : 4
# nrcpus avail : 4
# cpudesc : ARMv7 Processor rev 0 (v7l)
# total memory : 1025544 kB
# cmdline : /perf record -a -g -- smackctl apply
# event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 1, 2, 3, 4 }
# ========
#
# Events: 606 cycles
#
# Overhead Symbol
# ........ .....................................
#
12.90% [k] smk_write_rules_list.isra.9
|
--- smk_write_load2
vfs_write
sys_write
ret_fast_syscall
7.86% [k] exynos_enter_idle
|
--- cpuidle_enter
cpuidle_enter_state
cpuidle_idle_call
cpu_idle
|
--7.69%-- secondary_start_kernel
0x4066df54
6.95% [k] v7_dma_inv_range
|
|--3.70%-- __dma_page_cpu_to_dev
| arm_dma_map_page
| arm_dma_map_sg
| dw_mci_pre_dma_transfer
| dw_mci_pre_req
| mmc_pre_req
| mmc_start_req
| mmc_blk_issue_rw_rq
| mmc_blk_issue_rq
| mmc_queue_thread
| kthread
| do_exit
|
--3.25%-- __dma_page_dev_to_cpu
arm_dma_unmap_page
arm_dma_unmap_sg
dw_mci_post_req
mmc_post_req
mmc_start_req
mmc_blk_issue_rw_rq
mmc_blk_issue_rq
mmc_queue_thread
kthread
do_exit
3.56% [k] _raw_spin_unlock_irq
|
|--1.40%-- mmc_blk_issue_rw_rq
| mmc_blk_issue_rq
| mmc_queue_thread
| kthread
| do_exit
|
--1.09%-- finish_task_switch
__schedule
|
--0.75%-- schedule


3.45% [.] 0x0000118c
2.94% [.] strncpy
2.67% [.] vfprintf
2.44% [k] strcmp
|
--- smk_find_entry
smk_import_entry
smk_import
smk_fill_rule
smk_parse_long_rule
smk_write_rules_list.isra.9
smk_write_load2
vfs_write
sys_write
ret_fast_syscall

[ less than 2.5% per entry ]
--8<---------------cut here---------------end--------------->8---

--
Łukasz Stelmach
Software wizzard
Samsung Poland R&D Center

Al. Armii Ludowej 26, 00-609 Warszawa
http://www.rd.samsung.pl

2013-04-12 18:07:22

by Casey Schaufler

[permalink] [raw]
Subject: Re: [RFC] security: smack: add hash table for smack for quick label searching

On 4/12/2013 8:12 AM, Łukasz Stelmach wrote:
> It was <2013-04-11 czw 19:59>, when Casey Schaufler wrote:
>> On 4/11/2013 1:46 AM, Tomasz Stanislawski wrote:
>>> Hi everyone,
>>> I am a developer working on optimization of the TIZEN system.
>>> Recently, I've discovered a performance issue in SMACK subsystem.
>>> I used the PERF tool to find performance bottlenecks.
>>>
>>> The test scenario was simple. Run multiple applications and
>>> see what the system does using the following command:
>>>
>>> perf record -a -g
>>>
>>> Next, see the results with the command:
>>>
>>> perf report -s symbol -g graph,0.5
>>>
>>> Among the many lines, the following ones are especially interesting:
>>>
>>> 5.96% [k] smk_find_entry
>>> |
>>> |--5.06%-- smk_access
>>> | |
>>> | --4.99%-- smk_curacc
>>> | |
>>> | |--3.79%-- smack_ptrace_access_check
> [...]
>
>>> To sum up, the result indicates that the CPU spents circa 8% (2.16% + 5.96%)
>>> of cycles searching for a SMACK label in the smk_find_entry function.
>>> The function iterates through smack_known_list to find an entry.
>>> The further analysis showed that the size of the list can reach even 600.
>>> I measured that it takes circa 200 tries to find an entry on average.
>>> The value was computed as a total number iterations in the smk_find_entry's
>>> loop divided by the times smk_find_entry was called in a time-window of
>>> the length of 10 seconds.
>>>
>>> IMO, this is a serious performance issue which scales badly with
>>> a complexity of the system.
>>>
>>> I implemented a patch that makes a use of a hash table to quicken searching
>>> for SMACK's labels. The patch is rebased onto the latest v3.9-rc6 kernel.
>>> The code is thread-safe (I hope) because it shares the RCU mechanism
>>> and locks with smack_known_list.
> [...]
>
>>> I hope you find the measurement and the patch useful.
>>> All comments are welcome.
>> NAK
>>
>> There will be no hash tables in Smack.
>>
>> The correct solution is simple.
>>
>> In the task_smack structure there are two Smack label pointers,
>> smk_task and smk_forked. Replace these fields with pointers to
>> the smack_known structures that contain the Smack label pointers
>> used today. This will require trivial changes throughout the
>> Smack code to accommodate the type change and a few logical twists
>> around smk_import. It will eliminate the need for smk_lookup_entry.
> Allow me to join the conversation with an interesting observation.
> I don't know whether hash table is a good solution to the problem Tomasz
> has mentioned it definitely improves performance during loading rules.

Loading rules is a rare event and an uninteresting case.

> Conditions:
>
> --8<---------------cut here---------------start------------->8---
> # number of subjects
> sh-4.1# cat /etc/smack/accesses.d/* | cut -d\ -f1 | sort -u | wc -l
> 379
> # number of rules
> sh-4.1# cat /etc/smack/accesses.d/* | wc -l
> 23895
> --8<---------------cut here---------------end--------------->8---
>
> Without the patch
>
> --8<---------------cut here---------------start------------->8---
> sh-4.1# time smackctl apply
>
> real 0m1.255s
> user 0m0.115s
> sys 0m0.895s
> --8<---------------cut here---------------end--------------->8---
>
> perfs output is:
>
> --8<---------------cut here---------------start------------->8---
> # ========
> # captured on: Tue Jan 1 09:52:14 2013
> # os release : 3.4.5
> # arch : armv7l
> # nrcpus online : 4
> # nrcpus avail : 4
> # cpudesc : ARMv7 Processor rev 0 (v7l)
> # total memory : 1025456 kB
> # cmdline : /perf record -a -g -- /usr/bin/smackctl apply
> # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 45, 46, 47, 48 }
> # ========
> #
> # Events: 1K cycles
> #
> # Overhead Symbol
> # ........ ..................................
> #
> 47.49% [k] strcmp
> |
> |--41.27%-- smk_find_entry
> | |
> | |--28.87%-- smk_import_entry
> | | smk_import
> | | smk_fill_rule
> | | smk_parse_long_rule
> | | smk_write_rules_list.clone.3
> | | smk_write_load2
> | | vfs_write
> | | sys_write
> | | ret_fast_syscall
> | |
> | --12.41%-- smk_write_rules_list.clone.3
> | smk_write_load2
> | vfs_write
> | sys_write
> | ret_fast_syscall
> |
> |--4.41%-- smk_import_entry
> | smk_import
> | smk_fill_rule
> | smk_parse_long_rule
> | smk_write_rules_list.clone.3
> | smk_write_load2
> | vfs_write
> | sys_write
> | ret_fast_syscall
> |
> --1.80%-- smk_write_rules_list.clone.3
> smk_write_load2
> vfs_write
> sys_write
> ret_fast_syscall
> 19.21% [k] smk_find_entry
> |
> |--12.44%-- smk_import_entry
> | smk_import
> | smk_fill_rule
> | smk_parse_long_rule
> | smk_write_rules_list.clone.3
> | smk_write_load2
> | vfs_write
> | sys_write
> | ret_fast_syscall
> |
> --6.78%-- smk_write_rules_list.clone.3
> smk_write_load2
> vfs_write
> sys_write
> ret_fast_syscall
>
> [ less than 10% per entry ]
>
> --8<---------------cut here---------------end--------------->8---
>
> With the patch
>
> --8<---------------cut here---------------start------------->8---
> sh-4.1# time smackctl apply
>
> real 0m0.349s
> user 0m0.095s
> sys 0m0.250s
> --8<---------------cut here---------------end--------------->8---
>
> still quite long but 3.59598853868 times better.
>
> --8<---------------cut here---------------start------------->8---
> # ========
> # captured on: Tue Jan 1 10:40:45 2013
> # os release : 3.4.5
> # arch : armv7l
> # nrcpus online : 4
> # nrcpus avail : 4
> # cpudesc : ARMv7 Processor rev 0 (v7l)
> # total memory : 1025544 kB
> # cmdline : /perf record -a -g -- smackctl apply
> # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 1, 2, 3, 4 }
> # ========
> #
> # Events: 606 cycles
> #
> # Overhead Symbol
> # ........ .....................................
> #
> 12.90% [k] smk_write_rules_list.isra.9
> |
> --- smk_write_load2
> vfs_write
> sys_write
> ret_fast_syscall
> 7.86% [k] exynos_enter_idle
> |
> --- cpuidle_enter
> cpuidle_enter_state
> cpuidle_idle_call
> cpu_idle
> |
> --7.69%-- secondary_start_kernel
> 0x4066df54
> 6.95% [k] v7_dma_inv_range
> |
> |--3.70%-- __dma_page_cpu_to_dev
> | arm_dma_map_page
> | arm_dma_map_sg
> | dw_mci_pre_dma_transfer
> | dw_mci_pre_req
> | mmc_pre_req
> | mmc_start_req
> | mmc_blk_issue_rw_rq
> | mmc_blk_issue_rq
> | mmc_queue_thread
> | kthread
> | do_exit
> |
> --3.25%-- __dma_page_dev_to_cpu
> arm_dma_unmap_page
> arm_dma_unmap_sg
> dw_mci_post_req
> mmc_post_req
> mmc_start_req
> mmc_blk_issue_rw_rq
> mmc_blk_issue_rq
> mmc_queue_thread
> kthread
> do_exit
> 3.56% [k] _raw_spin_unlock_irq
> |
> |--1.40%-- mmc_blk_issue_rw_rq
> | mmc_blk_issue_rq
> | mmc_queue_thread
> | kthread
> | do_exit
> |
> --1.09%-- finish_task_switch
> __schedule
> |
> --0.75%-- schedule
>
>
> 3.45% [.] 0x0000118c
> 2.94% [.] strncpy
> 2.67% [.] vfprintf
> 2.44% [k] strcmp
> |
> --- smk_find_entry
> smk_import_entry
> smk_import
> smk_fill_rule
> smk_parse_long_rule
> smk_write_rules_list.isra.9
> smk_write_load2
> vfs_write
> sys_write
> ret_fast_syscall
>
> [ less than 2.5% per entry ]
> --8<---------------cut here---------------end--------------->8---
>

2013-06-08 20:32:04

by Casey Schaufler

[permalink] [raw]
Subject: Re: [RFC] security: smack: add hash table for smack for quick label searching

On 4/11/2013 1:46 AM, Tomasz Stanislawski wrote:
> This patch adds a hash table to quicken searching of a smack label by its name.
>
> For a typical idle for TIZEN the CPU wastes circa 5-10% of its cycles for
> processing the smk_find_entry function. This patch adds a hash map that should
> speed up searches by a factor of up to 500 at the cost of additional 4kiB
> memory for the hash table.
>
> Signed-off-by: Tomasz Stanislawski <[email protected]>

I had originally NAKed this patch because there were other,
more important changes that needed to be made. Those are in
now, so it's time to revisit this change. I have many comments
throughout. The patch needs to be rebased on the current
smack-next smack-for-3.11 branch.

> ---
> security/smack/smack.h | 5 +++++
> security/smack/smack_access.c | 33 +++++++++++++++++++++++++++++++--
> security/smack/smack_lsm.c | 21 +++++++++++++++------
> 3 files changed, 51 insertions(+), 8 deletions(-)
>
> diff --git a/security/smack/smack.h b/security/smack/smack.h
> index 99b3612..8df667e 100644
> --- a/security/smack/smack.h
> +++ b/security/smack/smack.h
> @@ -118,6 +118,7 @@ struct smk_netlbladdr {
> */
> struct smack_known {
> struct list_head list;
> + struct list_head htab_list;

I would prefer "smk_hashed".

> char *smk_known;
> u32 smk_secid;
> struct netlbl_lsm_secattr smk_netlabel; /* on wire labels */
> @@ -215,6 +216,7 @@ char *smk_parse_smack(const char *string, int len);
> int smk_netlbl_mls(int, char *, struct netlbl_lsm_secattr *, int);
> char *smk_import(const char *, int);
> struct smack_known *smk_import_entry(const char *, int);
> +void __smk_insert_entry(struct smack_known *skp);

Use "smk_insert_entry" instead of "__smk_insert_entry".
This is a really small function. How about a static inline?

> struct smack_known *smk_find_entry(const char *);
> u32 smack_to_secid(const char *);
>
> @@ -238,6 +240,9 @@ extern struct mutex smack_known_lock;
> extern struct list_head smack_known_list;
> extern struct list_head smk_netlbladdr_list;
>
> +#define SMACK_KNOWN_SIZE 512

I don't think the name is descriptive. SMACK_HASH_SLOTS would be better.

This is *way* too big. Show me performance numbers for 4, 8, ..., 256.

> +extern struct list_head smack_known_htab[SMACK_KNOWN_SIZE];

I'd prefer "smack_known_hash". I like real words where they fit.

> +
> extern struct security_operations smack_ops;
>
> /*
> diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
> index db14689..0490c0d 100644
> --- a/security/smack/smack_access.c
> +++ b/security/smack/smack_access.c
> @@ -48,6 +48,8 @@ struct smack_known smack_known_web = {
>
> LIST_HEAD(smack_known_list);
>
> +struct list_head smack_known_htab[SMACK_KNOWN_SIZE];
> +
> /*
> * The initial value needs to be bigger than any of the
> * known values above.
> @@ -322,6 +324,30 @@ void smack_log(char *subject_label, char *object_label, int request,
>
> DEFINE_MUTEX(smack_known_lock);
>
> +/* simple Dan Bernstein string hashing function */

Please use the usual function description comment format.
Please explain in that comment where the numbers 33 and 5381
come from. If there is any chance they might get changed in
the future #define them. Heck, #define them anyway.

> +static u32 strhash(const char *s)

Please give this more consistent name. Perhaps smk_list_hash().

> +{
> + u32 hash = 5381;
> + for (; *s; ++s)
> + hash = hash * 33 + *s;
> + return hash;
> +}
> +
> +/**
> + * smk_insert_entry - insert a smack label into a hash map,
> + *
> + * this function must be called under smack_known_lock
> + */
> +void __smk_insert_entry(struct smack_known *skp)
> +{
> + u32 hash = strhash(skp->smk_known);
> + struct list_head *head = &smack_known_htab[
> + hash & (SMACK_KNOWN_SIZE - 1)];

Split this. Initializations in the declaration line should not
be done where it clutters the code.

> +
> + list_add_rcu(&skp->htab_list, head);
> + list_add_rcu(&skp->list, &smack_known_list);
> +}
> +
> /**
> * smk_find_entry - find a label on the list, return the list entry
> * @string: a text string that might be a Smack label
> @@ -331,9 +357,12 @@ DEFINE_MUTEX(smack_known_lock);
> */
> struct smack_known *smk_find_entry(const char *string)
> {
> + u32 hash = strhash(string);
> + struct list_head *head = &smack_known_htab[
> + hash & (SMACK_KNOWN_SIZE - 1)];
> struct smack_known *skp;
>
> - list_for_each_entry_rcu(skp, &smack_known_list, list) {
> + list_for_each_entry_rcu(skp, head, htab_list) {
> if (strcmp(skp->smk_known, string) == 0)
> return skp;
> }
> @@ -470,7 +499,7 @@ struct smack_known *smk_import_entry(const char *string, int len)
> * Make sure that the entry is actually
> * filled before putting it on the list.
> */
> - list_add_rcu(&skp->list, &smack_known_list);
> + __smk_insert_entry(skp);
> goto unlockout;
> }
> /*
> diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
> index fa64740..38f5884 100644
> --- a/security/smack/smack_lsm.c
> +++ b/security/smack/smack_lsm.c
> @@ -3535,6 +3535,8 @@ struct security_operations smack_ops = {
>
> static __init void init_smack_known_list(void)
> {
> + int i;
> +
> /*
> * Initialize rule list locks
> */
> @@ -3553,15 +3555,22 @@ static __init void init_smack_known_list(void)
> INIT_LIST_HEAD(&smack_known_floor.smk_rules);
> INIT_LIST_HEAD(&smack_known_invalid.smk_rules);
> INIT_LIST_HEAD(&smack_known_web.smk_rules);
> +
> + /*
> + * Initialize a hash map for a quick search of SMACK labels
> + */
> + for (i = 0; i < SMACK_KNOWN_SIZE; ++i)
> + INIT_LIST_HEAD(&smack_known_htab[i]);
> +
> /*
> * Create the known labels list
> */
> - list_add(&smack_known_huh.list, &smack_known_list);
> - list_add(&smack_known_hat.list, &smack_known_list);
> - list_add(&smack_known_star.list, &smack_known_list);
> - list_add(&smack_known_floor.list, &smack_known_list);
> - list_add(&smack_known_invalid.list, &smack_known_list);
> - list_add(&smack_known_web.list, &smack_known_list);
> + __smk_insert_entry(&smack_known_huh);
> + __smk_insert_entry(&smack_known_hat);
> + __smk_insert_entry(&smack_known_star);
> + __smk_insert_entry(&smack_known_floor);
> + __smk_insert_entry(&smack_known_invalid);
> + __smk_insert_entry(&smack_known_web);
> }
>
> /**