This file controls file related operations.
Signed-off-by: Kentaro Takeda <[email protected]>
Signed-off-by: Tetsuo Handa <[email protected]>
Signed-off-by: Toshiharu Harada <[email protected]>
---
security/tomoyo/file.c | 1149 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 1149 insertions(+)
--- /dev/null
+++ linux-next/security/tomoyo/file.c
@@ -0,0 +1,1149 @@
+/*
+ * security/tomoyo/file.c
+ *
+ * Implementation of the Domain-Based Mandatory Access Control.
+ *
+ * Copyright (C) 2005-2008 NTT DATA CORPORATION
+ *
+ * Version: 2.2.0-pre 2008/10/01
+ *
+ */
+
+#include "common.h"
+#include "tomoyo.h"
+#include "realpath.h"
+#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+
+/* Structure for "allow_read" keyword. */
+struct globally_readable_file_entry {
+ struct list1_head list;
+ const struct path_info *filename;
+ bool is_deleted;
+};
+
+/* Structure for "file_pattern" keyword. */
+struct pattern_entry {
+ struct list1_head list;
+ const struct path_info *pattern;
+ bool is_deleted;
+};
+
+/* Structure for "deny_rewrite" keyword. */
+struct no_rewrite_entry {
+ struct list1_head list;
+ const struct path_info *pattern;
+ bool is_deleted;
+};
+
+/* Keyword array for single path operations. */
+static const char *sp_keyword[MAX_SINGLE_PATH_OPERATION] = {
+ [TMY_TYPE_READ_WRITE_ACL] = "read/write",
+ [TMY_TYPE_EXECUTE_ACL] = "execute",
+ [TMY_TYPE_READ_ACL] = "read",
+ [TMY_TYPE_WRITE_ACL] = "write",
+ [TMY_TYPE_CREATE_ACL] = "create",
+ [TMY_TYPE_UNLINK_ACL] = "unlink",
+ [TMY_TYPE_MKDIR_ACL] = "mkdir",
+ [TMY_TYPE_RMDIR_ACL] = "rmdir",
+ [TMY_TYPE_MKFIFO_ACL] = "mkfifo",
+ [TMY_TYPE_MKSOCK_ACL] = "mksock",
+ [TMY_TYPE_MKBLOCK_ACL] = "mkblock",
+ [TMY_TYPE_MKCHAR_ACL] = "mkchar",
+ [TMY_TYPE_TRUNCATE_ACL] = "truncate",
+ [TMY_TYPE_SYMLINK_ACL] = "symlink",
+ [TMY_TYPE_REWRITE_ACL] = "rewrite",
+};
+
+/* Keyword array for double path operations. */
+static const char *dp_keyword[MAX_DOUBLE_PATH_OPERATION] = {
+ [TMY_TYPE_LINK_ACL] = "link",
+ [TMY_TYPE_RENAME_ACL] = "rename",
+};
+
+/**
+ * tmy_sp2keyword - Get the name of single path operation.
+ *
+ * @operation: Type of operation.
+ *
+ * Returns the name of single path operation.
+ */
+const char *tmy_sp2keyword(const u8 operation)
+{
+ return (operation < MAX_SINGLE_PATH_OPERATION)
+ ? sp_keyword[operation] : NULL;
+}
+
+/**
+ * tmy_dp2keyword - Get the name of double path operation.
+ *
+ * @operation: Type of operation.
+ *
+ * Returns the name of double path operation.
+ */
+const char *tmy_dp2keyword(const u8 operation)
+{
+ return (operation < MAX_DOUBLE_PATH_OPERATION)
+ ? dp_keyword[operation] : NULL;
+}
+
+/**
+ * strendswith - Check whether the token ends with the given token.
+ *
+ * @name: The token to check.
+ * @tail: The token to find.
+ *
+ * Returns true if @name ends with @tail, false otherwise.
+ */
+static bool strendswith(const char *name, const char *tail)
+{
+ int len;
+ if (!name || !tail)
+ return false;
+ len = strlen(name) - strlen(tail);
+ return len >= 0 && !strcmp(name + len, tail);
+}
+
+/**
+ * tmy_get_path - Get realpath.
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @mnt: Pointer to "struct vfsmount".
+ *
+ * Returns pointer to "struct path_info" on success, NULL otherwise.
+ */
+static struct path_info *tmy_get_path(struct dentry *dentry,
+ struct vfsmount *mnt)
+{
+ int error;
+ struct path_info_with_data *buf = tmy_alloc(sizeof(*buf));
+ if (!buf)
+ return NULL;
+ /* Preserve one byte for appending "/". */
+ error = tmy_realpath_from_dentry2(dentry, mnt, buf->body,
+ sizeof(buf->body) - 2);
+ if (!error) {
+ buf->head.name = buf->body;
+ tmy_fill_path_info(&buf->head);
+ return &buf->head;
+ }
+ tmy_free(buf);
+ return NULL;
+}
+
+static int update_double_path_acl(const u8 type, const char *filename1,
+ const char *filename2,
+ struct domain_info * const domain,
+ const bool is_delete);
+static int update_single_path_acl(const u8 type, const char *filename,
+ struct domain_info * const domain,
+ const bool is_delete);
+
+/* The list for "struct globally_readable_file_entry". */
+static LIST1_HEAD(globally_readable_list);
+
+/**
+ * update_globally_readable_entry - Update "struct globally_readable_file_entry" list.
+ *
+ * @filename: Filename unconditionally permitted to open() for reading.
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int update_globally_readable_entry(const char *filename,
+ const bool is_delete)
+{
+ struct globally_readable_file_entry *new_entry;
+ struct globally_readable_file_entry *ptr;
+ static DEFINE_MUTEX(lock);
+ const struct path_info *saved_filename;
+ int error = -ENOMEM;
+ if (!tmy_is_correct_path(filename, 1, 0, -1, __func__))
+ return -EINVAL;
+ saved_filename = tmy_save_name(filename);
+ if (!saved_filename)
+ return -ENOMEM;
+ mutex_lock(&lock);
+ list1_for_each_entry(ptr, &globally_readable_list, list) {
+ if (ptr->filename != saved_filename)
+ continue;
+ ptr->is_deleted = is_delete;
+ error = 0;
+ goto out;
+ }
+ if (is_delete) {
+ error = -ENOENT;
+ goto out;
+ }
+ new_entry = tmy_alloc_element(sizeof(*new_entry));
+ if (!new_entry)
+ goto out;
+ new_entry->filename = saved_filename;
+ list1_add_tail_mb(&new_entry->list, &globally_readable_list);
+ error = 0;
+out:
+ mutex_unlock(&lock);
+ tmy_update_counter(TMY_UPDATES_COUNTER_EXCEPTION_POLICY);
+ return error;
+}
+
+/**
+ * is_globally_readable_file - Check if the file is unconditionnaly permitted to be open()ed for reading.
+ *
+ * @filename: The filename to check.
+ *
+ * Returns true if any domain can open @filename for reading, false otherwise.
+ */
+static bool is_globally_readable_file(const struct path_info *filename)
+{
+ struct globally_readable_file_entry *ptr;
+ list1_for_each_entry(ptr, &globally_readable_list, list) {
+ if (!ptr->is_deleted &&
+ tmy_path_matches_pattern(filename, ptr->filename))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * tmy_write_globally_readable_policy - Write "struct globally_readable_file_entry" list.
+ *
+ * @data: String to parse.
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_write_globally_readable_policy(char *data, const bool is_delete)
+{
+ return update_globally_readable_entry(data, is_delete);
+}
+
+/**
+ * tmy_read_globally_readable_policy - Read "struct globally_readable_file_entry" list.
+ *
+ * @head: Pointer to "struct tmy_io_buffer".
+ *
+ * Returns true on success, false otherwise.
+ */
+bool tmy_read_globally_readable_policy(struct tmy_io_buffer *head)
+{
+ struct list1_head *pos;
+ list1_for_each_cookie(pos, head->read_var2, &globally_readable_list) {
+ struct globally_readable_file_entry *ptr;
+ ptr = list1_entry(pos, struct globally_readable_file_entry,
+ list);
+ if (ptr->is_deleted)
+ continue;
+ if (!tmy_io_printf(head, KEYWORD_ALLOW_READ "%s\n",
+ ptr->filename->name))
+ goto out;
+ }
+ return true;
+out:
+ return false;
+}
+
+/* The list for "struct pattern_entry". */
+static LIST1_HEAD(pattern_list);
+
+/**
+ * update_file_pattern_entry - Update "struct pattern_entry" list.
+ *
+ * @pattern: Pathname pattern.
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int update_file_pattern_entry(const char *pattern, const bool is_delete)
+{
+ struct pattern_entry *new_entry;
+ struct pattern_entry *ptr;
+ static DEFINE_MUTEX(lock);
+ const struct path_info *saved_pattern;
+ int error = -ENOMEM;
+ if (!tmy_is_correct_path(pattern, 0, 1, 0, __func__))
+ return -EINVAL;
+ saved_pattern = tmy_save_name(pattern);
+ if (!saved_pattern)
+ return -ENOMEM;
+ mutex_lock(&lock);
+ list1_for_each_entry(ptr, &pattern_list, list) {
+ if (saved_pattern != ptr->pattern)
+ continue;
+ ptr->is_deleted = is_delete;
+ error = 0;
+ goto out;
+ }
+ if (is_delete) {
+ error = -ENOENT;
+ goto out;
+ }
+ new_entry = tmy_alloc_element(sizeof(*new_entry));
+ if (!new_entry)
+ goto out;
+ new_entry->pattern = saved_pattern;
+ list1_add_tail_mb(&new_entry->list, &pattern_list);
+ error = 0;
+out:
+ mutex_unlock(&lock);
+ tmy_update_counter(TMY_UPDATES_COUNTER_EXCEPTION_POLICY);
+ return error;
+}
+
+/**
+ * get_file_pattern - Get patterned pathname.
+ *
+ * @filename: The filename to find patterned pathname.
+ *
+ * Returns pointer to pathname pattern if matched, @filename otherwise.
+ */
+static const struct path_info *
+get_file_pattern(const struct path_info *filename)
+{
+ struct pattern_entry *ptr;
+ const struct path_info *pattern = NULL;
+ list1_for_each_entry(ptr, &pattern_list, list) {
+ if (ptr->is_deleted)
+ continue;
+ if (!tmy_path_matches_pattern(filename, ptr->pattern))
+ continue;
+ pattern = ptr->pattern;
+ if (strendswith(pattern->name, "/\\*")) {
+ /* Do nothing. Try to find the better match. */
+ } else {
+ /* This would be the better match. Use this. */
+ break;
+ }
+ }
+ if (pattern)
+ filename = pattern;
+ return filename;
+}
+
+/**
+ * tmy_write_pattern_policy - Write "struct pattern_entry" list.
+ *
+ * @data: String to parse.
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_write_pattern_policy(char *data, const bool is_delete)
+{
+ return update_file_pattern_entry(data, is_delete);
+}
+
+/**
+ * tmy_read_file_pattern - Read "struct pattern_entry" list.
+ *
+ * @head: Pointer to "struct tmy_io_buffer".
+ *
+ * Returns true on success, false otherwise.
+ */
+bool tmy_read_file_pattern(struct tmy_io_buffer *head)
+{
+ struct list1_head *pos;
+ list1_for_each_cookie(pos, head->read_var2, &pattern_list) {
+ struct pattern_entry *ptr;
+ ptr = list1_entry(pos, struct pattern_entry, list);
+ if (ptr->is_deleted)
+ continue;
+ if (!tmy_io_printf(head, KEYWORD_FILE_PATTERN "%s\n",
+ ptr->pattern->name))
+ goto out;
+ }
+ return true;
+out:
+ return false;
+}
+
+/* The list for "struct no_rewrite_entry". */
+static LIST1_HEAD(no_rewrite_list);
+
+/**
+ * update_no_rewrite_entry - Update "struct no_rewrite_entry" list.
+ *
+ * @pattern: Pathname pattern that are not rewritable by default.
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int update_no_rewrite_entry(const char *pattern, const bool is_delete)
+{
+ struct no_rewrite_entry *new_entry, *ptr;
+ static DEFINE_MUTEX(lock);
+ const struct path_info *saved_pattern;
+ int error = -ENOMEM;
+ if (!tmy_is_correct_path(pattern, 0, 0, 0, __func__))
+ return -EINVAL;
+ saved_pattern = tmy_save_name(pattern);
+ if (!saved_pattern)
+ return -ENOMEM;
+ mutex_lock(&lock);
+ list1_for_each_entry(ptr, &no_rewrite_list, list) {
+ if (ptr->pattern != saved_pattern)
+ continue;
+ ptr->is_deleted = is_delete;
+ error = 0;
+ goto out;
+ }
+ if (is_delete) {
+ error = -ENOENT;
+ goto out;
+ }
+ new_entry = tmy_alloc_element(sizeof(*new_entry));
+ if (!new_entry)
+ goto out;
+ new_entry->pattern = saved_pattern;
+ list1_add_tail_mb(&new_entry->list, &no_rewrite_list);
+ error = 0;
+out:
+ mutex_unlock(&lock);
+ tmy_update_counter(TMY_UPDATES_COUNTER_EXCEPTION_POLICY);
+ return error;
+}
+
+/**
+ * is_no_rewrite_file - Check if the given pathname is not permitted to be rewrited.
+ *
+ * @filename: Filename to check.
+ *
+ * Returns true if @filename is specified by "deny_rewrite" directive,
+ * false otherwise.
+ */
+static bool is_no_rewrite_file(const struct path_info *filename)
+{
+ struct no_rewrite_entry *ptr;
+ list1_for_each_entry(ptr, &no_rewrite_list, list) {
+ if (ptr->is_deleted)
+ continue;
+ if (!tmy_path_matches_pattern(filename, ptr->pattern))
+ continue;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * tmy_write_no_rewrite_policy - Write "struct no_rewrite_entry" list.
+ *
+ * @data: String to parse.
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_write_no_rewrite_policy(char *data, const bool is_delete)
+{
+ return update_no_rewrite_entry(data, is_delete);
+}
+
+/**
+ * tmy_read_no_rewrite_policy - Read "struct no_rewrite_entry" list.
+ *
+ * @head: Pointer to "struct tmy_io_buffer".
+ *
+ * Returns true on success, false otherwise.
+ */
+bool tmy_read_no_rewrite_policy(struct tmy_io_buffer *head)
+{
+ struct list1_head *pos;
+ list1_for_each_cookie(pos, head->read_var2, &no_rewrite_list) {
+ struct no_rewrite_entry *ptr;
+ ptr = list1_entry(pos, struct no_rewrite_entry, list);
+ if (ptr->is_deleted)
+ continue;
+ if (!tmy_io_printf(head, KEYWORD_DENY_REWRITE "%s\n",
+ ptr->pattern->name))
+ goto out;
+ }
+ return true;
+out:
+ return false;
+}
+
+/**
+ * update_file_acl - Update file's read/write/execute ACL.
+ *
+ * @filename: Filename.
+ * @perm: Permission (between 1 to 7).
+ * @domain: Pointer to "struct domain_info".
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * This is legacy support interface for older policy syntax.
+ * Current policy syntax uses "allow_read/write" instead of "6",
+ * "allow_read" instead of "4", "allow_write" instead of "2",
+ * "allow_execute" instead of "1".
+ */
+static int update_file_acl(const char *filename, u8 perm,
+ struct domain_info * const domain,
+ const bool is_delete)
+{
+ if (perm > 7 || !perm) {
+ printk(KERN_DEBUG "%s: Invalid permission '%d %s'\n",
+ __func__, perm, filename);
+ return -EINVAL;
+ }
+ if (filename[0] != '@' && strendswith(filename, "/"))
+ /*
+ * Only 'allow_mkdir' and 'allow_rmdir' are valid for
+ * directory permissions.
+ */
+ return 0;
+ if (perm & 4)
+ update_single_path_acl(TMY_TYPE_READ_ACL, filename, domain,
+ is_delete);
+ if (perm & 2)
+ update_single_path_acl(TMY_TYPE_WRITE_ACL, filename, domain,
+ is_delete);
+ if (perm & 1)
+ update_single_path_acl(TMY_TYPE_EXECUTE_ACL, filename, domain,
+ is_delete);
+ return 0;
+}
+
+/**
+ * check_single_path_acl2 - Check permission for single path operation.
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @filename: Filename to check.
+ * @perm: Permission.
+ * @may_use_pattern: True if patterned ACL is permitted.
+ *
+ * Returns 0 on success, -EPERM otherwise.
+ */
+static int check_single_path_acl2(const struct domain_info *domain,
+ const struct path_info *filename,
+ const u16 perm, const bool may_use_pattern)
+{
+ struct acl_info *ptr;
+ list1_for_each_entry(ptr, &domain->acl_info_list, list) {
+ struct single_path_acl_record *acl;
+ if (tmy_acl_type2(ptr) != TYPE_SINGLE_PATH_ACL)
+ continue;
+ acl = container_of(ptr, struct single_path_acl_record, head);
+ if (!(acl->perm & perm))
+ continue;
+ if (may_use_pattern || !acl->filename->is_patterned) {
+ if (!tmy_path_matches_pattern(filename,
+ acl->filename))
+ continue;
+ } else {
+ continue;
+ }
+ return 0;
+ }
+ return -EPERM;
+}
+
+/**
+ * check_file_acl - Check permission for opening files.
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @filename: Filename to check.
+ * @operation: Mode ("read" or "write" or "read/write" or "execute").
+ *
+ * Returns 0 on success, -EPERM otherwise.
+ */
+static int check_file_acl(const struct domain_info *domain,
+ const struct path_info *filename, const u8 operation)
+{
+ u16 perm = 0;
+ if (!tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE))
+ return 0;
+ if (operation == 6)
+ perm = 1 << TMY_TYPE_READ_WRITE_ACL;
+ else if (operation == 4)
+ perm = 1 << TMY_TYPE_READ_ACL;
+ else if (operation == 2)
+ perm = 1 << TMY_TYPE_WRITE_ACL;
+ else if (operation == 1)
+ perm = 1 << TMY_TYPE_EXECUTE_ACL;
+ else
+ BUG();
+ return check_single_path_acl2(domain, filename, perm, operation != 1);
+}
+
+/**
+ * check_file_perm2 - Check permission for opening files.
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @filename: Filename to check.
+ * @perm: Mode ("read" or "write" or "read/write" or "execute").
+ * @operation: Operation name passed used for verbose mode.
+ * @mode: Access control mode.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int check_file_perm2(struct domain_info * const domain,
+ const struct path_info *filename, const u8 perm,
+ const char *operation, const u8 mode)
+{
+ const bool is_enforce = (mode == 3);
+ const char *msg = "<unknown>";
+ int error = 0;
+ if (!filename)
+ return 0;
+ error = check_file_acl(domain, filename, perm);
+ if (error && perm == 4 &&
+ (domain->flags & DOMAIN_FLAGS_IGNORE_GLOBAL_ALLOW_READ) == 0 &&
+ is_globally_readable_file(filename))
+ error = 0;
+ if (perm == 6)
+ msg = tmy_sp2keyword(TMY_TYPE_READ_WRITE_ACL);
+ else if (perm == 4)
+ msg = tmy_sp2keyword(TMY_TYPE_READ_ACL);
+ else if (perm == 2)
+ msg = tmy_sp2keyword(TMY_TYPE_WRITE_ACL);
+ else if (perm == 1)
+ msg = tmy_sp2keyword(TMY_TYPE_EXECUTE_ACL);
+ else
+ BUG();
+ if (!error)
+ return 0;
+ if (tmy_verbose_mode(domain))
+ printk(KERN_WARNING "TOMOYO-%s: Access '%s(%s) %s' denied "
+ "for %s\n", tmy_get_msg(is_enforce), msg, operation,
+ filename->name, tmy_get_last_name(domain));
+ if (is_enforce)
+ return error;
+ if (mode == 1 && tmy_check_domain_quota(domain)) {
+ /* Don't use patterns for execute permission. */
+ const struct path_info *patterned_file = (perm != 1) ?
+ get_file_pattern(filename) : filename;
+ update_file_acl(patterned_file->name, perm,
+ domain, false);
+ }
+ return 0;
+}
+
+/**
+ * tmy_write_file_policy - Update file related list.
+ *
+ * @data: String to parse.
+ * @domain: Pointer to "struct domain_info".
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_write_file_policy(char *data, struct domain_info *domain,
+ const bool is_delete)
+{
+ char *filename = strchr(data, ' ');
+ char *filename2;
+ unsigned int perm;
+ u8 type;
+ if (!filename)
+ return -EINVAL;
+ *filename++ = '\0';
+ if (sscanf(data, "%u", &perm) == 1)
+ return update_file_acl(filename, (u8) perm, domain, is_delete);
+ if (strncmp(data, "allow_", 6))
+ goto out;
+ data += 6;
+ for (type = 0; type < MAX_SINGLE_PATH_OPERATION; type++) {
+ if (strcmp(data, sp_keyword[type]))
+ continue;
+ return update_single_path_acl(type, filename,
+ domain, is_delete);
+ }
+ filename2 = strchr(filename, ' ');
+ if (!filename2)
+ goto out;
+ *filename2++ = '\0';
+ for (type = 0; type < MAX_DOUBLE_PATH_OPERATION; type++) {
+ if (strcmp(data, dp_keyword[type]))
+ continue;
+ return update_double_path_acl(type, filename, filename2, domain,
+ is_delete);
+ }
+out:
+ return -EINVAL;
+}
+
+/**
+ * update_single_path_acl - Update "struct single_path_acl_record" list.
+ *
+ * @type: Type of operation.
+ * @filename: Filename.
+ * @domain: Pointer to "struct domain_info".
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int update_single_path_acl(const u8 type, const char *filename,
+ struct domain_info * const domain,
+ const bool is_delete)
+{
+ static const u16 rw_mask =
+ (1 << TMY_TYPE_READ_ACL) | (1 << TMY_TYPE_WRITE_ACL);
+ const struct path_info *saved_filename;
+ struct acl_info *ptr;
+ struct single_path_acl_record *acl;
+ int error = -ENOMEM;
+ const u16 perm = 1 << type;
+ if (!domain)
+ return -EINVAL;
+ if (!tmy_is_correct_path(filename, 0, 0, 0, __func__))
+ return -EINVAL;
+ saved_filename = tmy_save_name(filename);
+ if (!saved_filename)
+ return -ENOMEM;
+ mutex_lock(&domain_acl_lock);
+ if (is_delete)
+ goto delete;
+ list1_for_each_entry(ptr, &domain->acl_info_list, list) {
+ if (tmy_acl_type1(ptr) != TYPE_SINGLE_PATH_ACL)
+ continue;
+ acl = container_of(ptr, struct single_path_acl_record, head);
+ if (acl->filename != saved_filename)
+ continue;
+ /* Special case. Clear all bits if marked as deleted. */
+ if (ptr->type & ACL_DELETED)
+ acl->perm = 0;
+ acl->perm |= perm;
+ if ((acl->perm & rw_mask) == rw_mask)
+ acl->perm |= 1 << TMY_TYPE_READ_WRITE_ACL;
+ else if (acl->perm & (1 << TMY_TYPE_READ_WRITE_ACL))
+ acl->perm |= rw_mask;
+ error = tmy_add_domain_acl(NULL, ptr);
+ goto out;
+ }
+ /* Not found. Append it to the tail. */
+ acl = tmy_alloc_acl_element(TYPE_SINGLE_PATH_ACL);
+ if (!acl)
+ goto out;
+ acl->perm = perm;
+ acl->filename = saved_filename;
+ error = tmy_add_domain_acl(domain, &acl->head);
+ goto out;
+delete:
+ error = -ENOENT;
+ list1_for_each_entry(ptr, &domain->acl_info_list, list) {
+ if (tmy_acl_type2(ptr) != TYPE_SINGLE_PATH_ACL)
+ continue;
+ acl = container_of(ptr, struct single_path_acl_record, head);
+ if (acl->filename != saved_filename)
+ continue;
+ acl->perm &= ~perm;
+ if ((acl->perm & rw_mask) != rw_mask)
+ acl->perm &= ~(1 << TMY_TYPE_READ_WRITE_ACL);
+ else if (!(acl->perm & (1 << TMY_TYPE_READ_WRITE_ACL)))
+ acl->perm &= ~rw_mask;
+ error = tmy_del_domain_acl(acl->perm ? NULL : ptr);
+ break;
+ }
+out:
+ mutex_unlock(&domain_acl_lock);
+ return error;
+}
+
+/**
+ * update_double_path_acl - Update "struct double_path_acl_record" list.
+ *
+ * @type: Type of operation.
+ * @filename1: First filename.
+ * @filename2: Second filename.
+ * @domain: Pointer to "struct domain_info".
+ * @is_delete: True if it is a delete request.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int update_double_path_acl(const u8 type, const char *filename1,
+ const char *filename2,
+ struct domain_info * const domain,
+ const bool is_delete)
+{
+ const struct path_info *saved_filename1;
+ const struct path_info *saved_filename2;
+ struct acl_info *ptr;
+ struct double_path_acl_record *acl;
+ int error = -ENOMEM;
+ const u8 perm = 1 << type;
+ if (!domain)
+ return -EINVAL;
+ if (!tmy_is_correct_path(filename1, 0, 0, 0, __func__) ||
+ !tmy_is_correct_path(filename2, 0, 0, 0, __func__))
+ return -EINVAL;
+ saved_filename1 = tmy_save_name(filename1);
+ saved_filename2 = tmy_save_name(filename2);
+ if (!saved_filename1 || !saved_filename2)
+ return -ENOMEM;
+ mutex_lock(&domain_acl_lock);
+ if (is_delete)
+ goto delete;
+ list1_for_each_entry(ptr, &domain->acl_info_list, list) {
+ if (tmy_acl_type1(ptr) != TYPE_DOUBLE_PATH_ACL)
+ continue;
+ acl = container_of(ptr, struct double_path_acl_record, head);
+ if (acl->filename1 != saved_filename1 ||
+ acl->filename2 != saved_filename2)
+ continue;
+ /* Special case. Clear all bits if marked as deleted. */
+ if (ptr->type & ACL_DELETED)
+ acl->perm = 0;
+ acl->perm |= perm;
+ error = tmy_add_domain_acl(NULL, ptr);
+ goto out;
+ }
+ /* Not found. Append it to the tail. */
+ acl = tmy_alloc_acl_element(TYPE_DOUBLE_PATH_ACL);
+ if (!acl)
+ goto out;
+ acl->perm = perm;
+ acl->filename1 = saved_filename1;
+ acl->filename2 = saved_filename2;
+ error = tmy_add_domain_acl(domain, &acl->head);
+ goto out;
+delete:
+ error = -ENOENT;
+ list1_for_each_entry(ptr, &domain->acl_info_list, list) {
+ if (tmy_acl_type2(ptr) != TYPE_DOUBLE_PATH_ACL)
+ continue;
+ acl = container_of(ptr, struct double_path_acl_record, head);
+ if (acl->filename1 != saved_filename1 ||
+ acl->filename2 != saved_filename2)
+ continue;
+ acl->perm &= ~perm;
+ error = tmy_del_domain_acl(acl->perm ? NULL : ptr);
+ break;
+ }
+out:
+ mutex_unlock(&domain_acl_lock);
+ return error;
+}
+
+/**
+ * check_single_path_acl - Check permission for single path operation.
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @type: Type of operation.
+ * @filename: Filename to check.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int check_single_path_acl(struct domain_info *domain, const u8 type,
+ const struct path_info *filename)
+{
+ if (!tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE))
+ return 0;
+ return check_single_path_acl2(domain, filename, 1 << type, 1);
+}
+
+/**
+ * check_double_path_acl - Check permission for double path operation.
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @type: Type of operation.
+ * @filename1: First filename to check.
+ * @filename2: Second filename to check.
+ *
+ * Returns 0 on success, -EPERM otherwise.
+ */
+static int check_double_path_acl(const struct domain_info *domain,
+ const u8 type,
+ const struct path_info *filename1,
+ const struct path_info *filename2)
+{
+ struct acl_info *ptr;
+ const u8 perm = 1 << type;
+ if (!tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE))
+ return 0;
+ list1_for_each_entry(ptr, &domain->acl_info_list, list) {
+ struct double_path_acl_record *acl;
+ if (tmy_acl_type2(ptr) != TYPE_DOUBLE_PATH_ACL)
+ continue;
+ acl = container_of(ptr, struct double_path_acl_record, head);
+ if (!(acl->perm & perm))
+ continue;
+ if (!tmy_path_matches_pattern(filename1,
+ acl->filename1))
+ continue;
+ if (!tmy_path_matches_pattern(filename2,
+ acl->filename2))
+ continue;
+ return 0;
+ }
+ return -EPERM;
+}
+
+/**
+ * check_single_path_permission2 - Check permission for single path operation.
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @operation: Type of operation.
+ * @filename: Filename to check.
+ * @mode: Access control mode.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int check_single_path_permission2(struct domain_info * const domain,
+ u8 operation,
+ const struct path_info *filename,
+ const u8 mode)
+{
+ const char *msg;
+ int error;
+ const bool is_enforce = (mode == 3);
+ if (!mode)
+ return 0;
+next:
+ error = check_single_path_acl(domain, operation, filename);
+ msg = tmy_sp2keyword(operation);
+ if (!error)
+ goto ok;
+ if (tmy_verbose_mode(domain))
+ printk(KERN_WARNING "TOMOYO-%s: Access '%s %s' denied for %s\n",
+ tmy_get_msg(is_enforce), msg, filename->name,
+ tmy_get_last_name(domain));
+ if (mode == 1 && tmy_check_domain_quota(domain))
+ update_single_path_acl(operation,
+ get_file_pattern(filename)->name,
+ domain, false);
+ if (!is_enforce)
+ error = 0;
+ok:
+ /*
+ * Since "allow_truncate" doesn't imply "allow_rewrite" permission,
+ * we need to check "allow_rewrite" permission if the filename is
+ * specified by "deny_rewrite" keyword.
+ */
+ if (!error && operation == TMY_TYPE_TRUNCATE_ACL &&
+ is_no_rewrite_file(filename)) {
+ operation = TMY_TYPE_REWRITE_ACL;
+ goto next;
+ }
+ return error;
+}
+
+/**
+ * tmy_check_file_perm - Check permission for sysctl()'s "read" and "write".
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @filename: Filename to check.
+ * @perm: Mode ("read" or "write" or "read/write").
+ * @operation: Always "sysctl".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_check_file_perm(struct domain_info *domain, const char *filename,
+ const u8 perm, const char *operation)
+{
+ struct path_info name;
+ const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
+ if (!mode)
+ return 0;
+ name.name = filename;
+ tmy_fill_path_info(&name);
+ return check_file_perm2(domain, &name, perm, operation, mode);
+}
+
+/**
+ * tmy_check_exec_perm - Check permission for "execute".
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @filename: Check permission for "execute".
+ * @tmp: Buffer for temporal use.
+ *
+ * Returns 0 on success, negativevalue otherwise.
+ */
+int tmy_check_exec_perm(struct domain_info *domain,
+ const struct path_info *filename,
+ struct tmy_page_buffer *tmp)
+{
+ const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
+ if (!mode)
+ return 0;
+ return check_file_perm2(domain, filename, 1, "do_execve", mode);
+}
+
+/**
+ * tmy_check_open_permission - Check permission for "read" and "write".
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @dentry: Pointer to "struct dentry".
+ * @mnt: Pointer to "struct vfsmount".
+ * @flag: Flags for open().
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_check_open_permission(struct domain_info *domain,
+ struct dentry *dentry, struct vfsmount *mnt,
+ const int flag)
+{
+ const u8 acc_mode = ACC_MODE(flag);
+ int error = -ENOMEM;
+ struct path_info *buf;
+ const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
+ const bool is_enforce = (mode == 3);
+ if (!mode || !mnt)
+ return 0;
+ if (acc_mode == 0)
+ return 0;
+ if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
+ /*
+ * I don't check directories here because mkdir() and rmdir()
+ * don't call me.
+ */
+ return 0;
+ buf = tmy_get_path(dentry, mnt);
+ if (!buf)
+ goto out;
+ error = 0;
+ /*
+ * If the filename is specified by "deny_rewrite" keyword,
+ * we need to check "allow_rewrite" permission when the filename is not
+ * opened for append mode or the filename is truncated at open time.
+ */
+ if ((acc_mode & MAY_WRITE) &&
+ ((flag & O_TRUNC) || !(flag & O_APPEND)) &&
+ (is_no_rewrite_file(buf))) {
+ error = check_single_path_permission2(domain,
+ TMY_TYPE_REWRITE_ACL,
+ buf, mode);
+ }
+ if (!error)
+ error = check_file_perm2(domain, buf, acc_mode, "open", mode);
+ if (!error && (flag & O_TRUNC))
+ error = check_single_path_permission2(domain,
+ TMY_TYPE_TRUNCATE_ACL,
+ buf, mode);
+out:
+ tmy_free(buf);
+ if (!is_enforce)
+ error = 0;
+ return error;
+}
+
+/**
+ * tmy_check_1path_perm - Check permission for "create", "unlink", "mkdir", "rmdir", "mkfifo", "mksock", "mkblock", "mkchar", "truncate" and "symlink".
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @operation: Type of operation.
+ * @dentry: Pointer to "struct dentry".
+ * @mnt: Pointer to "struct vfsmount".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_check_1path_perm(struct domain_info *domain, const u8 operation,
+ struct dentry *dentry, struct vfsmount *mnt)
+{
+ int error = -ENOMEM;
+ struct path_info *buf;
+ const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
+ const bool is_enforce = (mode == 3);
+ if (!mode || !mnt)
+ return 0;
+ buf = tmy_get_path(dentry, mnt);
+ if (!buf)
+ goto out;
+ switch (operation) {
+ case TMY_TYPE_MKDIR_ACL:
+ case TMY_TYPE_RMDIR_ACL:
+ if (!buf->is_dir) {
+ /* tmy_get_path() preserves space for appending "/." */
+ strcat((char *) buf->name, "/");
+ tmy_fill_path_info(buf);
+ }
+ }
+ error = check_single_path_permission2(domain, operation, buf, mode);
+out:
+ tmy_free(buf);
+ if (!is_enforce)
+ error = 0;
+ return error;
+}
+
+/**
+ * tmy_check_rewrite_permission - Check permission for "rewrite".
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @filp: Pointer to "struct file".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_check_rewrite_permission(struct domain_info *domain, struct file *filp)
+{
+ int error = -ENOMEM;
+ const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
+ const bool is_enforce = (mode == 3);
+ struct path_info *buf;
+ if (!mode || !filp->f_path.mnt)
+ return 0;
+ buf = tmy_get_path(filp->f_path.dentry, filp->f_path.mnt);
+ if (!buf)
+ goto out;
+ if (!is_no_rewrite_file(buf)) {
+ error = 0;
+ goto out;
+ }
+ error = check_single_path_permission2(domain, TMY_TYPE_REWRITE_ACL,
+ buf, mode);
+out:
+ tmy_free(buf);
+ if (!is_enforce)
+ error = 0;
+ return error;
+}
+
+/**
+ * tmy_check_2path_perm - Check permission for "rename" and "link".
+ *
+ * @domain: Pointer to "struct domain_info".
+ * @operation: Type of operation.
+ * @dentry1: Pointer to "struct dentry".
+ * @mnt1: Pointer to "struct vfsmount".
+ * @dentry2: Pointer to "struct dentry".
+ * @mnt2: Pointer to "struct vfsmount".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+int tmy_check_2path_perm(struct domain_info * const domain, const u8 operation,
+ struct dentry *dentry1, struct vfsmount *mnt1,
+ struct dentry *dentry2, struct vfsmount *mnt2)
+{
+ int error = -ENOMEM;
+ struct path_info *buf1, *buf2;
+ const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
+ const bool is_enforce = (mode == 3);
+ const char *msg;
+ if (!mode || !mnt1 || !mnt2)
+ return 0;
+ buf1 = tmy_get_path(dentry1, mnt1);
+ buf2 = tmy_get_path(dentry2, mnt2);
+ if (!buf1 || !buf2)
+ goto out;
+ if (operation == TMY_TYPE_RENAME_ACL) {
+ /* TYPE_LINK_ACL can't reach here for directory. */
+ if (dentry1->d_inode && S_ISDIR(dentry1->d_inode->i_mode)) {
+ /* tmy_get_path() preserves space for appending "/." */
+ if (!buf1->is_dir) {
+ strcat((char *) buf1->name, "/");
+ tmy_fill_path_info(buf1);
+ }
+ if (!buf2->is_dir) {
+ strcat((char *) buf2->name, "/");
+ tmy_fill_path_info(buf2);
+ }
+ }
+ }
+ error = check_double_path_acl(domain, operation, buf1, buf2);
+ msg = tmy_dp2keyword(operation);
+ if (!error)
+ goto out;
+ if (tmy_verbose_mode(domain))
+ printk(KERN_WARNING "TOMOYO-%s: Access '%s %s %s' "
+ "denied for %s\n", tmy_get_msg(is_enforce),
+ msg, buf1->name, buf2->name, tmy_get_last_name(domain));
+ if (mode == 1 && tmy_check_domain_quota(domain))
+ update_double_path_acl(operation,
+ get_file_pattern(buf1)->name,
+ get_file_pattern(buf2)->name,
+ domain, false);
+out:
+ tmy_free(buf1);
+ tmy_free(buf2);
+ if (!is_enforce)
+ error = 0;
+ return error;
+}
--
Quoting Kentaro Takeda ([email protected]):
I am not happy with the locking.
In a previous patch you mark funtions with 'begin/end critical section'.
Please instead put a comment on top listing precisely which locks
the fn expects to be held.
As for protecting your own data, please
1. explain at the var declaration what lock protects it
2. define the lock next to the list
For instance, I assume the intent below is for pattern_list to be
protected by the static 'lock' mutex defined in
update_file_pattern_entry. But get_file_pattern() also walks the
list without any locking.
It looks like you only ever append to the list, with a memory barrier,
so *maybe* it's safe, but your rationale should be spelled out here.
Ideally you would use something like rcu that everyone can easily
recognize. If you insist on introducing your own list walking and
locking primitives, then please
1. introduce them separately
2. add them into list.h (not tomoyo/common.h!)
3. and get the right folks to ack it. I'd feel better if Paul
McKenney (cc:d) looked and ok'd it, for instance. (Paul,
the relevant helpers are defined in a separate patch though)
You could even protect everything with a big dumb mutex for now and
optimize that in later patches. That way stuff like this doesn't muck
up the review of the main TOMOYO patches.
> +/* The list for "struct pattern_entry". */
> +static LIST1_HEAD(pattern_list);
> +
> +/**
> + * update_file_pattern_entry - Update "struct pattern_entry" list.
> + *
> + * @pattern: Pathname pattern.
> + * @is_delete: True if it is a delete request.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +static int update_file_pattern_entry(const char *pattern, const bool is_delete)
> +{
> + struct pattern_entry *new_entry;
> + struct pattern_entry *ptr;
> + static DEFINE_MUTEX(lock);
> + const struct path_info *saved_pattern;
> + int error = -ENOMEM;
> + if (!tmy_is_correct_path(pattern, 0, 1, 0, __func__))
> + return -EINVAL;
> + saved_pattern = tmy_save_name(pattern);
> + if (!saved_pattern)
> + return -ENOMEM;
> + mutex_lock(&lock);
> + list1_for_each_entry(ptr, &pattern_list, list) {
> + if (saved_pattern != ptr->pattern)
> + continue;
> + ptr->is_deleted = is_delete;
> + error = 0;
> + goto out;
> + }
> + if (is_delete) {
> + error = -ENOENT;
> + goto out;
> + }
> + new_entry = tmy_alloc_element(sizeof(*new_entry));
> + if (!new_entry)
> + goto out;
> + new_entry->pattern = saved_pattern;
> + list1_add_tail_mb(&new_entry->list, &pattern_list);
> + error = 0;
> +out:
> + mutex_unlock(&lock);
> + tmy_update_counter(TMY_UPDATES_COUNTER_EXCEPTION_POLICY);
> + return error;
> +}
> +
> +/**
> + * get_file_pattern - Get patterned pathname.
> + *
> + * @filename: The filename to find patterned pathname.
> + *
> + * Returns pointer to pathname pattern if matched, @filename otherwise.
> + */
> +static const struct path_info *
> +get_file_pattern(const struct path_info *filename)
> +{
> + struct pattern_entry *ptr;
> + const struct path_info *pattern = NULL;
> + list1_for_each_entry(ptr, &pattern_list, list) {
> + if (ptr->is_deleted)
> + continue;
> + if (!tmy_path_matches_pattern(filename, ptr->pattern))
> + continue;
> + pattern = ptr->pattern;
> + if (strendswith(pattern->name, "/\\*")) {
> + /* Do nothing. Try to find the better match. */
> + } else {
> + /* This would be the better match. Use this. */
> + break;
> + }
> + }
> + if (pattern)
> + filename = pattern;
> + return filename;
> +}
> +
> +/**
> + * tmy_write_pattern_policy - Write "struct pattern_entry" list.
> + *
> + * @data: String to parse.
> + * @is_delete: True if it is a delete request.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +int tmy_write_pattern_policy(char *data, const bool is_delete)
> +{
> + return update_file_pattern_entry(data, is_delete);
> +}
> +
> +/**
> + * tmy_read_file_pattern - Read "struct pattern_entry" list.
> + *
> + * @head: Pointer to "struct tmy_io_buffer".
> + *
> + * Returns true on success, false otherwise.
> + */
> +bool tmy_read_file_pattern(struct tmy_io_buffer *head)
> +{
> + struct list1_head *pos;
> + list1_for_each_cookie(pos, head->read_var2, &pattern_list) {
> + struct pattern_entry *ptr;
> + ptr = list1_entry(pos, struct pattern_entry, list);
> + if (ptr->is_deleted)
> + continue;
> + if (!tmy_io_printf(head, KEYWORD_FILE_PATTERN "%s\n",
> + ptr->pattern->name))
> + goto out;
> + }
> + return true;
> +out:
> + return false;
> +}
> +
> +/* The list for "struct no_rewrite_entry". */
> +static LIST1_HEAD(no_rewrite_list);
> +
> +/**
> + * update_no_rewrite_entry - Update "struct no_rewrite_entry" list.
> + *
> + * @pattern: Pathname pattern that are not rewritable by default.
> + * @is_delete: True if it is a delete request.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +static int update_no_rewrite_entry(const char *pattern, const bool is_delete)
> +{
> + struct no_rewrite_entry *new_entry, *ptr;
> + static DEFINE_MUTEX(lock);
> + const struct path_info *saved_pattern;
> + int error = -ENOMEM;
> + if (!tmy_is_correct_path(pattern, 0, 0, 0, __func__))
> + return -EINVAL;
> + saved_pattern = tmy_save_name(pattern);
> + if (!saved_pattern)
> + return -ENOMEM;
> + mutex_lock(&lock);
> + list1_for_each_entry(ptr, &no_rewrite_list, list) {
> + if (ptr->pattern != saved_pattern)
> + continue;
> + ptr->is_deleted = is_delete;
> + error = 0;
> + goto out;
> + }
> + if (is_delete) {
> + error = -ENOENT;
> + goto out;
> + }
> + new_entry = tmy_alloc_element(sizeof(*new_entry));
> + if (!new_entry)
> + goto out;
> + new_entry->pattern = saved_pattern;
> + list1_add_tail_mb(&new_entry->list, &no_rewrite_list);
> + error = 0;
> +out:
> + mutex_unlock(&lock);
> + tmy_update_counter(TMY_UPDATES_COUNTER_EXCEPTION_POLICY);
> + return error;
> +}
> +
> +/**
> + * is_no_rewrite_file - Check if the given pathname is not permitted to be rewrited.
> + *
> + * @filename: Filename to check.
> + *
> + * Returns true if @filename is specified by "deny_rewrite" directive,
> + * false otherwise.
> + */
> +static bool is_no_rewrite_file(const struct path_info *filename)
> +{
> + struct no_rewrite_entry *ptr;
> + list1_for_each_entry(ptr, &no_rewrite_list, list) {
> + if (ptr->is_deleted)
> + continue;
> + if (!tmy_path_matches_pattern(filename, ptr->pattern))
> + continue;
> + return true;
> + }
> + return false;
> +}
> +
> +/**
> + * tmy_write_no_rewrite_policy - Write "struct no_rewrite_entry" list.
> + *
> + * @data: String to parse.
> + * @is_delete: True if it is a delete request.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +int tmy_write_no_rewrite_policy(char *data, const bool is_delete)
> +{
> + return update_no_rewrite_entry(data, is_delete);
> +}
> +
> +/**
> + * tmy_read_no_rewrite_policy - Read "struct no_rewrite_entry" list.
> + *
> + * @head: Pointer to "struct tmy_io_buffer".
> + *
> + * Returns true on success, false otherwise.
> + */
> +bool tmy_read_no_rewrite_policy(struct tmy_io_buffer *head)
> +{
> + struct list1_head *pos;
> + list1_for_each_cookie(pos, head->read_var2, &no_rewrite_list) {
> + struct no_rewrite_entry *ptr;
> + ptr = list1_entry(pos, struct no_rewrite_entry, list);
> + if (ptr->is_deleted)
> + continue;
> + if (!tmy_io_printf(head, KEYWORD_DENY_REWRITE "%s\n",
> + ptr->pattern->name))
> + goto out;
> + }
> + return true;
> +out:
> + return false;
> +}
> +
> +/**
> + * update_file_acl - Update file's read/write/execute ACL.
> + *
> + * @filename: Filename.
> + * @perm: Permission (between 1 to 7).
> + * @domain: Pointer to "struct domain_info".
> + * @is_delete: True if it is a delete request.
> + *
> + * Returns 0 on success, negative value otherwise.
> + *
> + * This is legacy support interface for older policy syntax.
> + * Current policy syntax uses "allow_read/write" instead of "6",
> + * "allow_read" instead of "4", "allow_write" instead of "2",
> + * "allow_execute" instead of "1".
> + */
> +static int update_file_acl(const char *filename, u8 perm,
> + struct domain_info * const domain,
> + const bool is_delete)
> +{
> + if (perm > 7 || !perm) {
> + printk(KERN_DEBUG "%s: Invalid permission '%d %s'\n",
> + __func__, perm, filename);
> + return -EINVAL;
> + }
> + if (filename[0] != '@' && strendswith(filename, "/"))
> + /*
> + * Only 'allow_mkdir' and 'allow_rmdir' are valid for
> + * directory permissions.
> + */
> + return 0;
> + if (perm & 4)
> + update_single_path_acl(TMY_TYPE_READ_ACL, filename, domain,
> + is_delete);
> + if (perm & 2)
> + update_single_path_acl(TMY_TYPE_WRITE_ACL, filename, domain,
> + is_delete);
> + if (perm & 1)
> + update_single_path_acl(TMY_TYPE_EXECUTE_ACL, filename, domain,
> + is_delete);
> + return 0;
> +}
> +
> +/**
> + * check_single_path_acl2 - Check permission for single path operation.
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @filename: Filename to check.
> + * @perm: Permission.
> + * @may_use_pattern: True if patterned ACL is permitted.
> + *
> + * Returns 0 on success, -EPERM otherwise.
> + */
> +static int check_single_path_acl2(const struct domain_info *domain,
> + const struct path_info *filename,
> + const u16 perm, const bool may_use_pattern)
> +{
> + struct acl_info *ptr;
> + list1_for_each_entry(ptr, &domain->acl_info_list, list) {
> + struct single_path_acl_record *acl;
> + if (tmy_acl_type2(ptr) != TYPE_SINGLE_PATH_ACL)
> + continue;
> + acl = container_of(ptr, struct single_path_acl_record, head);
> + if (!(acl->perm & perm))
> + continue;
> + if (may_use_pattern || !acl->filename->is_patterned) {
> + if (!tmy_path_matches_pattern(filename,
> + acl->filename))
> + continue;
> + } else {
> + continue;
> + }
> + return 0;
> + }
> + return -EPERM;
> +}
> +
> +/**
> + * check_file_acl - Check permission for opening files.
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @filename: Filename to check.
> + * @operation: Mode ("read" or "write" or "read/write" or "execute").
> + *
> + * Returns 0 on success, -EPERM otherwise.
> + */
> +static int check_file_acl(const struct domain_info *domain,
> + const struct path_info *filename, const u8 operation)
> +{
> + u16 perm = 0;
> + if (!tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE))
> + return 0;
> + if (operation == 6)
> + perm = 1 << TMY_TYPE_READ_WRITE_ACL;
> + else if (operation == 4)
> + perm = 1 << TMY_TYPE_READ_ACL;
> + else if (operation == 2)
> + perm = 1 << TMY_TYPE_WRITE_ACL;
> + else if (operation == 1)
> + perm = 1 << TMY_TYPE_EXECUTE_ACL;
> + else
> + BUG();
> + return check_single_path_acl2(domain, filename, perm, operation != 1);
> +}
> +
> +/**
> + * check_file_perm2 - Check permission for opening files.
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @filename: Filename to check.
> + * @perm: Mode ("read" or "write" or "read/write" or "execute").
> + * @operation: Operation name passed used for verbose mode.
> + * @mode: Access control mode.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +static int check_file_perm2(struct domain_info * const domain,
> + const struct path_info *filename, const u8 perm,
> + const char *operation, const u8 mode)
> +{
> + const bool is_enforce = (mode == 3);
> + const char *msg = "<unknown>";
> + int error = 0;
> + if (!filename)
> + return 0;
> + error = check_file_acl(domain, filename, perm);
> + if (error && perm == 4 &&
> + (domain->flags & DOMAIN_FLAGS_IGNORE_GLOBAL_ALLOW_READ) == 0 &&
> + is_globally_readable_file(filename))
> + error = 0;
> + if (perm == 6)
> + msg = tmy_sp2keyword(TMY_TYPE_READ_WRITE_ACL);
> + else if (perm == 4)
> + msg = tmy_sp2keyword(TMY_TYPE_READ_ACL);
> + else if (perm == 2)
> + msg = tmy_sp2keyword(TMY_TYPE_WRITE_ACL);
> + else if (perm == 1)
> + msg = tmy_sp2keyword(TMY_TYPE_EXECUTE_ACL);
> + else
> + BUG();
> + if (!error)
> + return 0;
> + if (tmy_verbose_mode(domain))
> + printk(KERN_WARNING "TOMOYO-%s: Access '%s(%s) %s' denied "
> + "for %s\n", tmy_get_msg(is_enforce), msg, operation,
> + filename->name, tmy_get_last_name(domain));
> + if (is_enforce)
> + return error;
> + if (mode == 1 && tmy_check_domain_quota(domain)) {
> + /* Don't use patterns for execute permission. */
> + const struct path_info *patterned_file = (perm != 1) ?
> + get_file_pattern(filename) : filename;
> + update_file_acl(patterned_file->name, perm,
> + domain, false);
> + }
> + return 0;
> +}
> +
> +/**
> + * tmy_write_file_policy - Update file related list.
> + *
> + * @data: String to parse.
> + * @domain: Pointer to "struct domain_info".
> + * @is_delete: True if it is a delete request.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +int tmy_write_file_policy(char *data, struct domain_info *domain,
> + const bool is_delete)
> +{
> + char *filename = strchr(data, ' ');
> + char *filename2;
> + unsigned int perm;
> + u8 type;
> + if (!filename)
> + return -EINVAL;
> + *filename++ = '\0';
> + if (sscanf(data, "%u", &perm) == 1)
> + return update_file_acl(filename, (u8) perm, domain, is_delete);
> + if (strncmp(data, "allow_", 6))
> + goto out;
> + data += 6;
> + for (type = 0; type < MAX_SINGLE_PATH_OPERATION; type++) {
> + if (strcmp(data, sp_keyword[type]))
> + continue;
> + return update_single_path_acl(type, filename,
> + domain, is_delete);
> + }
> + filename2 = strchr(filename, ' ');
> + if (!filename2)
> + goto out;
> + *filename2++ = '\0';
> + for (type = 0; type < MAX_DOUBLE_PATH_OPERATION; type++) {
> + if (strcmp(data, dp_keyword[type]))
> + continue;
> + return update_double_path_acl(type, filename, filename2, domain,
> + is_delete);
> + }
> +out:
> + return -EINVAL;
> +}
> +
> +/**
> + * update_single_path_acl - Update "struct single_path_acl_record" list.
> + *
> + * @type: Type of operation.
> + * @filename: Filename.
> + * @domain: Pointer to "struct domain_info".
> + * @is_delete: True if it is a delete request.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +static int update_single_path_acl(const u8 type, const char *filename,
> + struct domain_info * const domain,
> + const bool is_delete)
> +{
> + static const u16 rw_mask =
> + (1 << TMY_TYPE_READ_ACL) | (1 << TMY_TYPE_WRITE_ACL);
> + const struct path_info *saved_filename;
> + struct acl_info *ptr;
> + struct single_path_acl_record *acl;
> + int error = -ENOMEM;
> + const u16 perm = 1 << type;
> + if (!domain)
> + return -EINVAL;
> + if (!tmy_is_correct_path(filename, 0, 0, 0, __func__))
> + return -EINVAL;
> + saved_filename = tmy_save_name(filename);
> + if (!saved_filename)
> + return -ENOMEM;
> + mutex_lock(&domain_acl_lock);
> + if (is_delete)
> + goto delete;
> + list1_for_each_entry(ptr, &domain->acl_info_list, list) {
> + if (tmy_acl_type1(ptr) != TYPE_SINGLE_PATH_ACL)
> + continue;
> + acl = container_of(ptr, struct single_path_acl_record, head);
> + if (acl->filename != saved_filename)
> + continue;
> + /* Special case. Clear all bits if marked as deleted. */
> + if (ptr->type & ACL_DELETED)
> + acl->perm = 0;
> + acl->perm |= perm;
> + if ((acl->perm & rw_mask) == rw_mask)
> + acl->perm |= 1 << TMY_TYPE_READ_WRITE_ACL;
> + else if (acl->perm & (1 << TMY_TYPE_READ_WRITE_ACL))
> + acl->perm |= rw_mask;
> + error = tmy_add_domain_acl(NULL, ptr);
> + goto out;
> + }
> + /* Not found. Append it to the tail. */
> + acl = tmy_alloc_acl_element(TYPE_SINGLE_PATH_ACL);
> + if (!acl)
> + goto out;
> + acl->perm = perm;
> + acl->filename = saved_filename;
> + error = tmy_add_domain_acl(domain, &acl->head);
> + goto out;
> +delete:
> + error = -ENOENT;
> + list1_for_each_entry(ptr, &domain->acl_info_list, list) {
> + if (tmy_acl_type2(ptr) != TYPE_SINGLE_PATH_ACL)
> + continue;
> + acl = container_of(ptr, struct single_path_acl_record, head);
> + if (acl->filename != saved_filename)
> + continue;
> + acl->perm &= ~perm;
> + if ((acl->perm & rw_mask) != rw_mask)
> + acl->perm &= ~(1 << TMY_TYPE_READ_WRITE_ACL);
> + else if (!(acl->perm & (1 << TMY_TYPE_READ_WRITE_ACL)))
> + acl->perm &= ~rw_mask;
> + error = tmy_del_domain_acl(acl->perm ? NULL : ptr);
> + break;
> + }
> +out:
> + mutex_unlock(&domain_acl_lock);
> + return error;
> +}
> +
> +/**
> + * update_double_path_acl - Update "struct double_path_acl_record" list.
> + *
> + * @type: Type of operation.
> + * @filename1: First filename.
> + * @filename2: Second filename.
> + * @domain: Pointer to "struct domain_info".
> + * @is_delete: True if it is a delete request.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +static int update_double_path_acl(const u8 type, const char *filename1,
> + const char *filename2,
> + struct domain_info * const domain,
> + const bool is_delete)
> +{
> + const struct path_info *saved_filename1;
> + const struct path_info *saved_filename2;
> + struct acl_info *ptr;
> + struct double_path_acl_record *acl;
> + int error = -ENOMEM;
> + const u8 perm = 1 << type;
> + if (!domain)
> + return -EINVAL;
> + if (!tmy_is_correct_path(filename1, 0, 0, 0, __func__) ||
> + !tmy_is_correct_path(filename2, 0, 0, 0, __func__))
> + return -EINVAL;
> + saved_filename1 = tmy_save_name(filename1);
> + saved_filename2 = tmy_save_name(filename2);
> + if (!saved_filename1 || !saved_filename2)
> + return -ENOMEM;
> + mutex_lock(&domain_acl_lock);
> + if (is_delete)
> + goto delete;
> + list1_for_each_entry(ptr, &domain->acl_info_list, list) {
> + if (tmy_acl_type1(ptr) != TYPE_DOUBLE_PATH_ACL)
> + continue;
> + acl = container_of(ptr, struct double_path_acl_record, head);
> + if (acl->filename1 != saved_filename1 ||
> + acl->filename2 != saved_filename2)
> + continue;
> + /* Special case. Clear all bits if marked as deleted. */
> + if (ptr->type & ACL_DELETED)
> + acl->perm = 0;
> + acl->perm |= perm;
> + error = tmy_add_domain_acl(NULL, ptr);
> + goto out;
> + }
> + /* Not found. Append it to the tail. */
> + acl = tmy_alloc_acl_element(TYPE_DOUBLE_PATH_ACL);
> + if (!acl)
> + goto out;
> + acl->perm = perm;
> + acl->filename1 = saved_filename1;
> + acl->filename2 = saved_filename2;
> + error = tmy_add_domain_acl(domain, &acl->head);
> + goto out;
> +delete:
> + error = -ENOENT;
> + list1_for_each_entry(ptr, &domain->acl_info_list, list) {
> + if (tmy_acl_type2(ptr) != TYPE_DOUBLE_PATH_ACL)
> + continue;
> + acl = container_of(ptr, struct double_path_acl_record, head);
> + if (acl->filename1 != saved_filename1 ||
> + acl->filename2 != saved_filename2)
> + continue;
> + acl->perm &= ~perm;
> + error = tmy_del_domain_acl(acl->perm ? NULL : ptr);
> + break;
> + }
> +out:
> + mutex_unlock(&domain_acl_lock);
> + return error;
> +}
> +
> +/**
> + * check_single_path_acl - Check permission for single path operation.
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @type: Type of operation.
> + * @filename: Filename to check.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +static int check_single_path_acl(struct domain_info *domain, const u8 type,
> + const struct path_info *filename)
> +{
> + if (!tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE))
> + return 0;
> + return check_single_path_acl2(domain, filename, 1 << type, 1);
> +}
> +
> +/**
> + * check_double_path_acl - Check permission for double path operation.
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @type: Type of operation.
> + * @filename1: First filename to check.
> + * @filename2: Second filename to check.
> + *
> + * Returns 0 on success, -EPERM otherwise.
> + */
> +static int check_double_path_acl(const struct domain_info *domain,
> + const u8 type,
> + const struct path_info *filename1,
> + const struct path_info *filename2)
> +{
> + struct acl_info *ptr;
> + const u8 perm = 1 << type;
> + if (!tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE))
> + return 0;
> + list1_for_each_entry(ptr, &domain->acl_info_list, list) {
> + struct double_path_acl_record *acl;
> + if (tmy_acl_type2(ptr) != TYPE_DOUBLE_PATH_ACL)
> + continue;
> + acl = container_of(ptr, struct double_path_acl_record, head);
> + if (!(acl->perm & perm))
> + continue;
> + if (!tmy_path_matches_pattern(filename1,
> + acl->filename1))
> + continue;
> + if (!tmy_path_matches_pattern(filename2,
> + acl->filename2))
> + continue;
> + return 0;
> + }
> + return -EPERM;
> +}
> +
> +/**
> + * check_single_path_permission2 - Check permission for single path operation.
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @operation: Type of operation.
> + * @filename: Filename to check.
> + * @mode: Access control mode.
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +static int check_single_path_permission2(struct domain_info * const domain,
> + u8 operation,
> + const struct path_info *filename,
> + const u8 mode)
> +{
> + const char *msg;
> + int error;
> + const bool is_enforce = (mode == 3);
> + if (!mode)
> + return 0;
> +next:
> + error = check_single_path_acl(domain, operation, filename);
> + msg = tmy_sp2keyword(operation);
> + if (!error)
> + goto ok;
> + if (tmy_verbose_mode(domain))
> + printk(KERN_WARNING "TOMOYO-%s: Access '%s %s' denied for %s\n",
> + tmy_get_msg(is_enforce), msg, filename->name,
> + tmy_get_last_name(domain));
> + if (mode == 1 && tmy_check_domain_quota(domain))
> + update_single_path_acl(operation,
> + get_file_pattern(filename)->name,
> + domain, false);
> + if (!is_enforce)
> + error = 0;
> +ok:
> + /*
> + * Since "allow_truncate" doesn't imply "allow_rewrite" permission,
> + * we need to check "allow_rewrite" permission if the filename is
> + * specified by "deny_rewrite" keyword.
> + */
> + if (!error && operation == TMY_TYPE_TRUNCATE_ACL &&
> + is_no_rewrite_file(filename)) {
> + operation = TMY_TYPE_REWRITE_ACL;
> + goto next;
> + }
> + return error;
> +}
> +
> +/**
> + * tmy_check_file_perm - Check permission for sysctl()'s "read" and "write".
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @filename: Filename to check.
> + * @perm: Mode ("read" or "write" or "read/write").
> + * @operation: Always "sysctl".
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +int tmy_check_file_perm(struct domain_info *domain, const char *filename,
> + const u8 perm, const char *operation)
> +{
> + struct path_info name;
> + const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
> + if (!mode)
> + return 0;
> + name.name = filename;
> + tmy_fill_path_info(&name);
> + return check_file_perm2(domain, &name, perm, operation, mode);
> +}
> +
> +/**
> + * tmy_check_exec_perm - Check permission for "execute".
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @filename: Check permission for "execute".
> + * @tmp: Buffer for temporal use.
> + *
> + * Returns 0 on success, negativevalue otherwise.
> + */
> +int tmy_check_exec_perm(struct domain_info *domain,
> + const struct path_info *filename,
> + struct tmy_page_buffer *tmp)
> +{
> + const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
> + if (!mode)
> + return 0;
> + return check_file_perm2(domain, filename, 1, "do_execve", mode);
> +}
> +
> +/**
> + * tmy_check_open_permission - Check permission for "read" and "write".
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @dentry: Pointer to "struct dentry".
> + * @mnt: Pointer to "struct vfsmount".
> + * @flag: Flags for open().
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +int tmy_check_open_permission(struct domain_info *domain,
> + struct dentry *dentry, struct vfsmount *mnt,
> + const int flag)
> +{
> + const u8 acc_mode = ACC_MODE(flag);
> + int error = -ENOMEM;
> + struct path_info *buf;
> + const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
> + const bool is_enforce = (mode == 3);
> + if (!mode || !mnt)
> + return 0;
> + if (acc_mode == 0)
> + return 0;
> + if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
> + /*
> + * I don't check directories here because mkdir() and rmdir()
> + * don't call me.
> + */
> + return 0;
> + buf = tmy_get_path(dentry, mnt);
> + if (!buf)
> + goto out;
> + error = 0;
> + /*
> + * If the filename is specified by "deny_rewrite" keyword,
> + * we need to check "allow_rewrite" permission when the filename is not
> + * opened for append mode or the filename is truncated at open time.
> + */
> + if ((acc_mode & MAY_WRITE) &&
> + ((flag & O_TRUNC) || !(flag & O_APPEND)) &&
> + (is_no_rewrite_file(buf))) {
> + error = check_single_path_permission2(domain,
> + TMY_TYPE_REWRITE_ACL,
> + buf, mode);
> + }
> + if (!error)
> + error = check_file_perm2(domain, buf, acc_mode, "open", mode);
> + if (!error && (flag & O_TRUNC))
> + error = check_single_path_permission2(domain,
> + TMY_TYPE_TRUNCATE_ACL,
> + buf, mode);
> +out:
> + tmy_free(buf);
> + if (!is_enforce)
> + error = 0;
> + return error;
> +}
> +
> +/**
> + * tmy_check_1path_perm - Check permission for "create", "unlink", "mkdir", "rmdir", "mkfifo", "mksock", "mkblock", "mkchar", "truncate" and "symlink".
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @operation: Type of operation.
> + * @dentry: Pointer to "struct dentry".
> + * @mnt: Pointer to "struct vfsmount".
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +int tmy_check_1path_perm(struct domain_info *domain, const u8 operation,
> + struct dentry *dentry, struct vfsmount *mnt)
> +{
> + int error = -ENOMEM;
> + struct path_info *buf;
> + const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
> + const bool is_enforce = (mode == 3);
> + if (!mode || !mnt)
> + return 0;
> + buf = tmy_get_path(dentry, mnt);
> + if (!buf)
> + goto out;
> + switch (operation) {
> + case TMY_TYPE_MKDIR_ACL:
> + case TMY_TYPE_RMDIR_ACL:
> + if (!buf->is_dir) {
> + /* tmy_get_path() preserves space for appending "/." */
> + strcat((char *) buf->name, "/");
> + tmy_fill_path_info(buf);
> + }
> + }
> + error = check_single_path_permission2(domain, operation, buf, mode);
> +out:
> + tmy_free(buf);
> + if (!is_enforce)
> + error = 0;
> + return error;
> +}
> +
> +/**
> + * tmy_check_rewrite_permission - Check permission for "rewrite".
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @filp: Pointer to "struct file".
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +int tmy_check_rewrite_permission(struct domain_info *domain, struct file *filp)
> +{
> + int error = -ENOMEM;
> + const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
> + const bool is_enforce = (mode == 3);
> + struct path_info *buf;
> + if (!mode || !filp->f_path.mnt)
> + return 0;
> + buf = tmy_get_path(filp->f_path.dentry, filp->f_path.mnt);
> + if (!buf)
> + goto out;
> + if (!is_no_rewrite_file(buf)) {
> + error = 0;
> + goto out;
> + }
> + error = check_single_path_permission2(domain, TMY_TYPE_REWRITE_ACL,
> + buf, mode);
> +out:
> + tmy_free(buf);
> + if (!is_enforce)
> + error = 0;
> + return error;
> +}
> +
> +/**
> + * tmy_check_2path_perm - Check permission for "rename" and "link".
> + *
> + * @domain: Pointer to "struct domain_info".
> + * @operation: Type of operation.
> + * @dentry1: Pointer to "struct dentry".
> + * @mnt1: Pointer to "struct vfsmount".
> + * @dentry2: Pointer to "struct dentry".
> + * @mnt2: Pointer to "struct vfsmount".
> + *
> + * Returns 0 on success, negative value otherwise.
> + */
> +int tmy_check_2path_perm(struct domain_info * const domain, const u8 operation,
> + struct dentry *dentry1, struct vfsmount *mnt1,
> + struct dentry *dentry2, struct vfsmount *mnt2)
> +{
> + int error = -ENOMEM;
> + struct path_info *buf1, *buf2;
> + const u8 mode = tmy_check_flags(domain, TMY_TOMOYO_MAC_FOR_FILE);
> + const bool is_enforce = (mode == 3);
> + const char *msg;
> + if (!mode || !mnt1 || !mnt2)
> + return 0;
> + buf1 = tmy_get_path(dentry1, mnt1);
> + buf2 = tmy_get_path(dentry2, mnt2);
> + if (!buf1 || !buf2)
> + goto out;
> + if (operation == TMY_TYPE_RENAME_ACL) {
> + /* TYPE_LINK_ACL can't reach here for directory. */
> + if (dentry1->d_inode && S_ISDIR(dentry1->d_inode->i_mode)) {
> + /* tmy_get_path() preserves space for appending "/." */
> + if (!buf1->is_dir) {
> + strcat((char *) buf1->name, "/");
> + tmy_fill_path_info(buf1);
> + }
> + if (!buf2->is_dir) {
> + strcat((char *) buf2->name, "/");
> + tmy_fill_path_info(buf2);
> + }
> + }
> + }
> + error = check_double_path_acl(domain, operation, buf1, buf2);
> + msg = tmy_dp2keyword(operation);
> + if (!error)
> + goto out;
> + if (tmy_verbose_mode(domain))
> + printk(KERN_WARNING "TOMOYO-%s: Access '%s %s %s' "
> + "denied for %s\n", tmy_get_msg(is_enforce),
> + msg, buf1->name, buf2->name, tmy_get_last_name(domain));
> + if (mode == 1 && tmy_check_domain_quota(domain))
> + update_double_path_acl(operation,
> + get_file_pattern(buf1)->name,
> + get_file_pattern(buf2)->name,
> + domain, false);
> +out:
> + tmy_free(buf1);
> + tmy_free(buf2);
> + if (!is_enforce)
> + error = 0;
> + return error;
> +}
>
> --
Hello.
Serge E. Hallyn wrote:
> In a previous patch you mark funtions with 'begin/end critical section'.
> Please instead put a comment on top listing precisely which locks
> the fn expects to be held.
>
> As for protecting your own data, please
> 1. explain at the var declaration what lock protects it
> 2. define the lock next to the list
OK. I added comments and simplified dependencies.
http://svn.sourceforge.jp/cgi-bin/viewcvs.cgi/trunk/2.2.x/tomoyo-lsm/patches/?root=tomoyo
Anything else we can do before reposting as #11?
Summary of locking is listed below.
(1) tmy_real_domain() requires the caller to hold tasklist_lock spinlock.
(2) list1_add_tail_mb() requires the caller to hold a lock for protecting the
list.
(3) All other functions can manage necessary locking using local locks declared
inside each functions, for read operation requires no locks and write
operation is aggregated into single function.
> For instance, I assume the intent below is for pattern_list to be
> protected by the static 'lock' mutex defined in
> update_file_pattern_entry. But get_file_pattern() also walks the
> list without any locking.
>
> It looks like you only ever append to the list, with a memory barrier,
> so *maybe* it's safe, but your rationale should be spelled out here.
It *is* safe. Below is the introduce-write-once-read-many-linked-list.patch
taken from above URL. Paul, please review the below patch.
Regards.
--------------------
Subject: Singly linked list implementation.
Signed-off-by: Tetsuo Handa <[email protected]>
---
include/linux/list.h | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 95 insertions(+)
--- linux-next.orig/include/linux/list.h
+++ linux-next/include/linux/list.h
@@ -692,4 +692,99 @@ static inline void hlist_move_list(struc
({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
pos = n)
+/*
+ * Singly linked list implementation.
+ *
+ * This list supports only two operations.
+ * (1) Append an entry to the tail of the list.
+ * (2) Read all entries starting from the head of the list.
+ *
+ * This list is designed for holding "write once, read many" entries.
+ * This list requires no locks for read operation.
+ * This list doesn't support "remove an entry from the list" operation.
+ * This list penalize "mb()" for write operation but penalize nothing for read
+ * operation.
+ */
+
+/* To keep the reader read lock free, this list doesn't have "prev" field. */
+struct list1_head {
+ struct list1_head *next;
+};
+
+#define LIST1_HEAD_INIT(name) { &(name) }
+#define LIST1_HEAD(name) struct list1_head name = LIST1_HEAD_INIT(name)
+
+static inline void INIT_LIST1_HEAD(struct list1_head *list)
+{
+ list->next = list;
+}
+
+/**
+ * list1_entry - get the struct for this entry
+ * @ptr: the &struct list1_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list1_struct within the struct.
+ */
+#define list1_entry(ptr, type, member) container_of(ptr, type, member)
+
+/**
+ * list1_for_each - iterate over a list
+ * @pos: the &struct list1_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list1_for_each(pos, head) \
+ for (pos = (head)->next; prefetch(pos->next), pos != (head); \
+ pos = pos->next)
+
+/**
+ * list1_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list1_struct within the struct.
+ */
+#define list1_for_each_entry(pos, head, member) \
+ for (pos = list1_entry((head)->next, typeof(*pos), member); \
+ prefetch(pos->member.next), &pos->member != (head); \
+ pos = list1_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list1_for_each_cookie - iterate over a list with cookie.
+ * @pos: the &struct list1_head to use as a loop cursor.
+ * @cookie: the &struct list1_head to use as a cookie.
+ * @head: the head for your list.
+ *
+ * Same with list_for_each except that this primitive uses cookie
+ * so that we can continue iteration.
+ * Since list elements are never removed, we don't need to get a lock
+ * or a reference count.
+ */
+#define list1_for_each_cookie(pos, cookie, head) \
+ for (({ if (!cookie) \
+ cookie = head; }), pos = (cookie)->next; \
+ prefetch(pos->next), pos != (head) || ((cookie) = NULL); \
+ (cookie) = pos, pos = pos->next)
+
+/**
+ * list_add_tail_mb - add a new entry with memory barrier.
+ * @new: new entry to be added.
+ * @head: list head to add it before.
+ *
+ * Same with list_add_tail_rcu() except that this primitive uses mb()
+ * so that we can traverse forwards using list1_for_each() and
+ * list1_for_each_cookie() without any locks.
+ *
+ * Caller must hold a lock for protecting @head.
+ */
+static inline void list1_add_tail_mb(struct list1_head *new,
+ struct list1_head *head)
+{
+ struct list1_head *pos = head;
+
+ new->next = head;
+ mb(); /* Avoid out-of-order execution. */
+ while (pos->next != head)
+ pos = pos->next;
+ pos->next = new;
+}
+
#endif
On Sun, Oct 12, 2008 at 09:09:40AM +0900, Tetsuo Handa wrote:
> Hello.
>
> Serge E. Hallyn wrote:
> > In a previous patch you mark funtions with 'begin/end critical section'.
> > Please instead put a comment on top listing precisely which locks
> > the fn expects to be held.
> >
> > As for protecting your own data, please
> > 1. explain at the var declaration what lock protects it
> > 2. define the lock next to the list
>
> OK. I added comments and simplified dependencies.
> http://svn.sourceforge.jp/cgi-bin/viewcvs.cgi/trunk/2.2.x/tomoyo-lsm/patches/?root=tomoyo
> Anything else we can do before reposting as #11?
>
> Summary of locking is listed below.
>
> (1) tmy_real_domain() requires the caller to hold tasklist_lock spinlock.
> (2) list1_add_tail_mb() requires the caller to hold a lock for protecting the
> list.
> (3) All other functions can manage necessary locking using local locks declared
> inside each functions, for read operation requires no locks and write
> operation is aggregated into single function.
>
> > For instance, I assume the intent below is for pattern_list to be
> > protected by the static 'lock' mutex defined in
> > update_file_pattern_entry. But get_file_pattern() also walks the
> > list without any locking.
> >
> > It looks like you only ever append to the list, with a memory barrier,
> > so *maybe* it's safe, but your rationale should be spelled out here.
>
> It *is* safe. Below is the introduce-write-once-read-many-linked-list.patch
> taken from above URL. Paul, please review the below patch.
The memory barrier on the element-addition side is OK, though could
use rcu_assign_pointer(). In any case, it should be changed to smp_
form, as it is not needed in UP builds.
A few comments below -- some rcu_dereference()s are needed.
The general idea looks sound, at least as long as the lists remain
short. Otherwise, the list scan in list1_add_tail_mb() will take
too long.
Thanx, Paul
> Regards.
> --------------------
> Subject: Singly linked list implementation.
>
> Signed-off-by: Tetsuo Handa <[email protected]>
> ---
> include/linux/list.h | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 95 insertions(+)
>
> --- linux-next.orig/include/linux/list.h
> +++ linux-next/include/linux/list.h
> @@ -692,4 +692,99 @@ static inline void hlist_move_list(struc
> ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
> pos = n)
>
> +/*
> + * Singly linked list implementation.
> + *
> + * This list supports only two operations.
> + * (1) Append an entry to the tail of the list.
> + * (2) Read all entries starting from the head of the list.
> + *
> + * This list is designed for holding "write once, read many" entries.
> + * This list requires no locks for read operation.
> + * This list doesn't support "remove an entry from the list" operation.
> + * This list penalize "mb()" for write operation but penalize nothing for read
> + * operation.
> + */
> +
> +/* To keep the reader read lock free, this list doesn't have "prev" field. */
> +struct list1_head {
> + struct list1_head *next;
> +};
> +
> +#define LIST1_HEAD_INIT(name) { &(name) }
> +#define LIST1_HEAD(name) struct list1_head name = LIST1_HEAD_INIT(name)
> +
> +static inline void INIT_LIST1_HEAD(struct list1_head *list)
> +{
> + list->next = list;
> +}
Hmmm... This list is circular.
> +/**
> + * list1_entry - get the struct for this entry
> + * @ptr: the &struct list1_head pointer.
> + * @type: the type of the struct this is embedded in.
> + * @member: the name of the list1_struct within the struct.
> + */
> +#define list1_entry(ptr, type, member) container_of(ptr, type, member)
This is identical to list_entry(). Why have both?
> +/**
> + * list1_for_each - iterate over a list
> + * @pos: the &struct list1_head to use as a loop cursor.
> + * @head: the head for your list.
> + */
> +#define list1_for_each(pos, head) \
> + for (pos = (head)->next; prefetch(pos->next), pos != (head); \
> + pos = pos->next)
Unless there is a strong need for list1_for_each(), I would leave it
out. Error prone.
If you do retain it, don't you need rcu_dereference() as follows?
+#define list1_for_each(pos, head) \
+ for (pos = rcu_dereference((head)->next); prefetch(pos->next), pos != (head); \
+ pos = rcu_dereference(pos->next))
> +/**
> + * list1_for_each_entry - iterate over list of given type
> + * @pos: the type * to use as a loop cursor.
> + * @head: the head for your list.
> + * @member: the name of the list1_struct within the struct.
> + */
> +#define list1_for_each_entry(pos, head, member) \
> + for (pos = list1_entry((head)->next, typeof(*pos), member); \
> + prefetch(pos->member.next), &pos->member != (head); \
> + pos = list1_entry(pos->member.next, typeof(*pos), member))
Need rcu_dereference() here as well. Otherwise, compiler optimizations
and DEC Alpha can cause failures.
> +/**
> + * list1_for_each_cookie - iterate over a list with cookie.
> + * @pos: the &struct list1_head to use as a loop cursor.
> + * @cookie: the &struct list1_head to use as a cookie.
And cookie must initially be NULL.
> + * @head: the head for your list.
> + *
> + * Same with list_for_each except that this primitive uses cookie
> + * so that we can continue iteration.
> + * Since list elements are never removed, we don't need to get a lock
> + * or a reference count.
> + */
> +#define list1_for_each_cookie(pos, cookie, head) \
> + for (({ if (!cookie) \
> + cookie = head; }), pos = (cookie)->next; \
> + prefetch(pos->next), pos != (head) || ((cookie) = NULL); \
> + (cookie) = pos, pos = pos->next)
Need rcu_dereference() here as well:
+#define list1_for_each_cookie(pos, cookie, head) \
+ for (({ if (!cookie) \
+ cookie = head; }), pos = rcu_dereference((cookie)->next); \
+ prefetch(pos->next), pos != (head) || ((cookie) = NULL); \
+ (cookie) = pos, pos = rcu_dereference(pos->next))
> +/**
> + * list_add_tail_mb - add a new entry with memory barrier.
> + * @new: new entry to be added.
> + * @head: list head to add it before.
> + *
> + * Same with list_add_tail_rcu() except that this primitive uses mb()
> + * so that we can traverse forwards using list1_for_each() and
> + * list1_for_each_cookie() without any locks.
> + *
> + * Caller must hold a lock for protecting @head.
> + */
> +static inline void list1_add_tail_mb(struct list1_head *new,
> + struct list1_head *head)
> +{
> + struct list1_head *pos = head;
> +
> + new->next = head;
> + mb(); /* Avoid out-of-order execution. */
smp_wmb() should be sufficient. You could also instead use
rcu_assign_pointer() on the assignment to pos->next below.
> + while (pos->next != head)
> + pos = pos->next;
> + pos->next = new;
> +}
Hope the lists are never too long... ;-)
Another approach would be to maintain an explicit tail pointer, as
is done in the RCU callback lists in kernel/rcuclassic.c.
Either way, I suggest simply list1_add_tail() -- the "mb" is
implementation. The key point is that you can add to the list
even when there are concurrent readers.
> #endif
Quoting Tetsuo Handa ([email protected]):
> Hello.
>
> Serge E. Hallyn wrote:
> > In a previous patch you mark funtions with 'begin/end critical section'.
> > Please instead put a comment on top listing precisely which locks
> > the fn expects to be held.
> >
> > As for protecting your own data, please
> > 1. explain at the var declaration what lock protects it
> > 2. define the lock next to the list
>
> OK. I added comments and simplified dependencies.
> http://svn.sourceforge.jp/cgi-bin/viewcvs.cgi/trunk/2.2.x/tomoyo-lsm/patches/?root=tomoyo
Cool, thanks.
This, in general, is part of changing your mindset - from that of being
a maintainer of out-of-tree code, to being a part of the core community.
My dcache comment further down is along the same lines.
> Anything else we can do before reposting as #11?
Well I'd like to sit down one day and make sure that your _clean()s in
patch 1 cover all the error cases and there are no leaks.
The pathname walking code doesn't seem to be in any way tomoyo-specific,
so it really ought to be in fs/dcache.c where the relevant maintainers
will see, scrutinize, and update it when necessary. I realize that
means we make it look like we encourage others to use the functions
which we don't want either. But having them in tomoyo-specific code
isn't nice either.
And I haven't really looked at your patches 6-8 yet, and am not sure
when I'll get time.
Anyway I think we're well to the point where the patches should be
tossed into a tree and tested (once you address Paul's feedback).
Actually, one thing which is missing from this patchset is a MAINTAINERS
entry. What I'd particularly be interested in is a mailing list entry
(with a public readable archive), so we can see that there is in fact a
community using this.
-serge
Paul E. McKenney wrote:
> A few comments below -- some rcu_dereference()s are needed.
This list doesn't use RCU since it is for Write-Once-Read-Many
situation (i.e. no-update and no-delete). TOMOYO Linux uses this list
for storing policy elements. Most of elements are allocated when the
kernel is loaded, and they are referred during lifetime of the kernel.
Since read_lock is not needed when referring this list, code of
TOMOYO keeps its simplicity. If TOMOYO used RCU or reader/writer lock,
the code would be a jumble of read_lock and it would be almost
impossible to maintain and review the code... X-p This is the reason
why TOMOYO uses this WORM list.
Though size of policy increases with learning mode, the same
pathnames once learned will be reused. So memory usage of TOMOYO
doesn't increase infinitely; if still worried, we can set memory
quota.
> The general idea looks sound, at least as long as the lists remain
> short. Otherwise, the list scan in list1_add_tail_mb() will take
> too long.
Typically less than 100. The length of list won't matter since the
frequency of append is very low.
Paul, would you review this list from the perspective of WORM list?
Regards,
On Thu, Oct 16, 2008 at 01:05:34PM +0900, Kentaro Takeda wrote:
> Paul E. McKenney wrote:
> > A few comments below -- some rcu_dereference()s are needed.
>
> This list doesn't use RCU since it is for Write-Once-Read-Many
> situation (i.e. no-update and no-delete). TOMOYO Linux uses this list
> for storing policy elements. Most of elements are allocated when the
> kernel is loaded, and they are referred during lifetime of the kernel.
>
> Since read_lock is not needed when referring this list, code of
> TOMOYO keeps its simplicity. If TOMOYO used RCU or reader/writer lock,
> the code would be a jumble of read_lock and it would be almost
> impossible to maintain and review the code... X-p This is the reason
> why TOMOYO uses this WORM list.
>
> Though size of policy increases with learning mode, the same
> pathnames once learned will be reused. So memory usage of TOMOYO
> doesn't increase infinitely; if still worried, we can set memory
> quota.
>
> > The general idea looks sound, at least as long as the lists remain
> > short. Otherwise, the list scan in list1_add_tail_mb() will take
> > too long.
>
> Typically less than 100. The length of list won't matter since the
> frequency of append is very low.
That sounds small enough to me. If it becomes a problem some time in
the future, it will be easy for you to add a tail pointer or some such.
> Paul, would you review this list from the perspective of WORM list?
Hmmm... I thought I was reviewing it from that perspective. You never
delete anything from the list, so you don't need RCU.
But fair enough. How about the following?
#define worm_dereference() rcu_dereference()
#define worm_assign_pointer() rcu_assign_pointer()
Then put worm_dereference() where I told you to put rcu_dereference()
and also put worm_assign_pointer() where I suggested using
rcu_assign_pointer().
You might not be using RCU, but keep in mind that RCU is primarily about
-removing- items from a data structure. When you -insert- items into
a data structure that is being concurrently read, as you are doing, you
must solve all the same problems that RCU has to solve for list insertion.
Please see http://lkml.org/lkml/2008/2/2/255 for examples showing why
this is needed. This posting does indeed use RCU terminology, but it is
focused only on insertion, and therefore exactly describes the situation
that you are in.
I am sure that you will get back to me with any questions or objections
that you might have after looking this over. ;-)
Thanx, Paul
Quoting from http://lkml.org/lkml/2008/2/2/255
> Similarly, the smp_read_barrier_depends() is only for initialization
> of something that is about to enter the list. As with the smp_wmb()
> primitive, smp_read_barrier_depends() also is not to protect against
> freeing. Instead, it is rcu_read_lock() and rcu_read_unlock() that
> protect against freeing.
We don't need to use rcu_read_lock() and rcu_read_unlock() because
we don't free elements in a list. I see.
However, to ensure the reader gets up-to-date value, we need to use
smp_read_barrier_depends() (which is expanded to "mb()" for SMP on
Alpha, "read_barrier_depends()" for SMP on H8300, "((void)0)" for SMP
on M68K-nommu, "((void)0)" for M68K, "do { } while (0)" otherwise)
whenever the reader fetches an element in a list.
Paul E. McKenney wrote:
> But fair enough. How about the following?
>
> #define worm_dereference() rcu_dereference()
> #define worm_assign_pointer() rcu_assign_pointer()
>
So, I understood that the rcu_dereference() and rcu_assign_pointer()
are not only for RCU. They are needed to ensure the reader gets
up-to-date value. Then, their names should be var_dereference() and
var_assign_pointer() or something, shouldn't they? The "rcu_" prefix
and comments on rcu_dereference in include/linux/rcupdate.h sound for
me that they are used for variables protected by RCU locking
mechanism only...
You are suggesting to explicitly call rcu_assign_pointer() (which
will call smp_wmb()) and rcu_dereference() (which will call
smp_read_barrier_depends()). But I think that the various cache
invalidations driven by the workload will call rcu_assign_pointer()
and rcu_dereference() sooner or later. So, if the reader can tolerate
reading non-up-to-date value (in fact, TOMOYO can), isn't there a
choice to omit rcu_assign_pointer() and rcu_dereference() (which will
cost "mb()" for SMP on Alpha)?
Regards,
On Fri, Oct 17, 2008 at 05:32:43PM +0900, Kentaro Takeda wrote:
> Quoting from http://lkml.org/lkml/2008/2/2/255
> > Similarly, the smp_read_barrier_depends() is only for initialization
> > of something that is about to enter the list. As with the smp_wmb()
> > primitive, smp_read_barrier_depends() also is not to protect against
> > freeing. Instead, it is rcu_read_lock() and rcu_read_unlock() that
> > protect against freeing.
>
> We don't need to use rcu_read_lock() and rcu_read_unlock() because
> we don't free elements in a list. I see.
Agreed!
> However, to ensure the reader gets up-to-date value, we need to use
> smp_read_barrier_depends() (which is expanded to "mb()" for SMP on
> Alpha, "read_barrier_depends()" for SMP on H8300, "((void)0)" for SMP
> on M68K-nommu, "((void)0)" for M68K, "do { } while (0)" otherwise)
> whenever the reader fetches an element in a list.
Yep. You will also need the ACCESS_ONCE() on the pointer fetch in order
to suppress aggressive compiler optimizations. The rcu_dereference()
primitive packages them up nicely.
> Paul E. McKenney wrote:
> > But fair enough. How about the following?
> >
> > #define worm_dereference() rcu_dereference()
> > #define worm_assign_pointer() rcu_assign_pointer()
> >
> So, I understood that the rcu_dereference() and rcu_assign_pointer()
> are not only for RCU. They are needed to ensure the reader gets
> up-to-date value. Then, their names should be var_dereference() and
> var_assign_pointer() or something, shouldn't they? The "rcu_" prefix
> and comments on rcu_dereference in include/linux/rcupdate.h sound for
> me that they are used for variables protected by RCU locking
> mechanism only...
Well, there are 200+ uses of rcu_dereference() for RCU, so it would
99.5%+ accurate to retain the "rcu_" prefix. ;-)
Once we have several non-RCU uses, we can probably do a much better
job of coming up with a good name for the underlying independent-of-RCU
primitive. So we should stick with rcu_dereference() as the name of the
underlying primitive for now, and re-evaluate the naming in a year or
after another five non-RCU uses of rcu_dereference() appear, whichever
comes later. (My current guess for names are "pointer_subscribe()"
for rcu_dereference() and "pointer_publish()" for rcu_assign_pointer(),
but who knows?)
Fair enough?
> You are suggesting to explicitly call rcu_assign_pointer() (which
> will call smp_wmb()) and rcu_dereference() (which will call
> smp_read_barrier_depends()). But I think that the various cache
> invalidations driven by the workload will call rcu_assign_pointer()
> and rcu_dereference() sooner or later. So, if the reader can tolerate
> reading non-up-to-date value (in fact, TOMOYO can), isn't there a
> choice to omit rcu_assign_pointer() and rcu_dereference() (which will
> cost "mb()" for SMP on Alpha)?
TOMOYO can tolerate reading the complete garbage that would appear if
the pointer was assigned before the pointed-to fields are initialized?
I must confess that I am having a hard time believing that. Please
explain how this works.
Thanx, Paul
Hello.
Paul E. McKenney wrote:
> On Fri, Oct 17, 2008 at 05:32:43PM +0900, Kentaro Takeda wrote:
> > However, to ensure the reader gets up-to-date value, we need to use
> > smp_read_barrier_depends() (which is expanded to "mb()" for SMP on
> > Alpha, "read_barrier_depends()" for SMP on H8300, "((void)0)" for SMP
> > on M68K-nommu, "((void)0)" for M68K, "do { } while (0)" otherwise)
> > whenever the reader fetches an element in a list.
>
> Yep. You will also need the ACCESS_ONCE() on the pointer fetch in order
> to suppress aggressive compiler optimizations. The rcu_dereference()
> primitive packages them up nicely.
So, I should use rcu_dereference() rather than smp_read_barrier_depends().
I see.
> > > But fair enough. How about the following?
> > >
> > > #define worm_dereference() rcu_dereference()
> > > #define worm_assign_pointer() rcu_assign_pointer()
> > >
> > So, I understood that the rcu_dereference() and rcu_assign_pointer()
> > are not only for RCU. They are needed to ensure the reader gets
> > up-to-date value. Then, their names should be var_dereference() and
> > var_assign_pointer() or something, shouldn't they? The "rcu_" prefix
> > and comments on rcu_dereference in include/linux/rcupdate.h sound for
> > me that they are used for variables protected by RCU locking
> > mechanism only...
>
> Well, there are 200+ uses of rcu_dereference() for RCU, so it would
> 99.5%+ accurate to retain the "rcu_" prefix. ;-)
>
> Once we have several non-RCU uses, we can probably do a much better
> job of coming up with a good name for the underlying independent-of-RCU
> primitive. So we should stick with rcu_dereference() as the name of the
> underlying primitive for now, and re-evaluate the naming in a year or
> after another five non-RCU uses of rcu_dereference() appear, whichever
> comes later. (My current guess for names are "pointer_subscribe()"
> for rcu_dereference() and "pointer_publish()" for rcu_assign_pointer(),
> but who knows?)
>
> Fair enough?
Agreed.
> TOMOYO can tolerate reading the complete garbage that would appear if
> the pointer was assigned before the pointed-to fields are initialized?
> I must confess that I am having a hard time believing that. Please
> explain how this works.
Maybe I'm misunderstanding what "mb()" can do.
I inserted "mb()" between the initialization of the pointed-to field and
the assignment of the pointer like
static inline void list1_add_tail_mb(struct list1_head *new,
struct list1_head *head)
{
struct list1_head *pos = head;
new->next = head;
mb(); /* Avoid out-of-order execution. */
while (pos->next != head)
pos = pos->next;
pos->next = new;
}
to ensure that 'new->next' comes to point to 'head' before 'pos->next' comes to
point to 'new', for arch/ia64/include/asm/system.h says:
/*
* Macros to force memory ordering. In these descriptions, "previous"
* and "subsequent" refer to program order; "visible" means that all
* architecturally visible effects of a memory access have occurred
* (at a minimum, this means the memory has been read or written).
*
* wmb(): Guarantees that all preceding stores to memory-
* like regions are visible before any subsequent
* stores and that all following stores will be
* visible only after all previous stores.
* rmb(): Like wmb(), but for reads.
* mb(): wmb()/rmb() combo, i.e., all previous memory
* accesses are visible before all subsequent
* accesses and vice versa. This is also known as
* a "fence."
*
* Note: "mb()" and its variants cannot be used as a fence to order
* accesses to memory mapped I/O registers. For that, mf.a needs to
* be used. However, we don't want to always use mf.a because (a)
* it's (presumably) much slower than mf and (b) mf.a is supported for
* sequential memory pages only.
*/
But http://www.rdrop.com/users/paulmck/scalability/paper/ordering.2007.09.19a.pdf says:
On these systems, there are three orderings that must be accounted for:
1. Program order:
the order of the program's object code as seen by the CPU
2. Execution order:
The execution order can differ from program order due to both compiler and
CPU implementation optimizations.
3. Perceived order:
And I noticed that the comment in arch/ia64/include/asm/system.h uses
"program order", not "execution order".
Therefore, I have to also care "execution order".
I assumed that the "program order" == "execution order" and I thought:
The reader will read the appended element (i.e. 'new->next') only if
the reader can reach (i.e. 'pos->next' == 'new') the appended element.
The reader won't read the appended element (i.e. 'new->next') as long as
the reader can't reach (i.e. 'pos->next' == 'head') the appended element.
However, since "execution order" != "program order", you are mentioning that
I have to call smp_read_barrier_depends() which can control "execution order".
Am I right?
Regards.
On Sat, Oct 18, 2008 at 11:04:39PM +0900, Tetsuo Handa wrote:
> Hello.
>
> Paul E. McKenney wrote:
> > On Fri, Oct 17, 2008 at 05:32:43PM +0900, Kentaro Takeda wrote:
> > > However, to ensure the reader gets up-to-date value, we need to use
> > > smp_read_barrier_depends() (which is expanded to "mb()" for SMP on
> > > Alpha, "read_barrier_depends()" for SMP on H8300, "((void)0)" for SMP
> > > on M68K-nommu, "((void)0)" for M68K, "do { } while (0)" otherwise)
> > > whenever the reader fetches an element in a list.
> >
> > Yep. You will also need the ACCESS_ONCE() on the pointer fetch in order
> > to suppress aggressive compiler optimizations. The rcu_dereference()
> > primitive packages them up nicely.
>
> So, I should use rcu_dereference() rather than smp_read_barrier_depends().
> I see.
Yep!
> > > > But fair enough. How about the following?
> > > >
> > > > #define worm_dereference() rcu_dereference()
> > > > #define worm_assign_pointer() rcu_assign_pointer()
> > > >
> > > So, I understood that the rcu_dereference() and rcu_assign_pointer()
> > > are not only for RCU. They are needed to ensure the reader gets
> > > up-to-date value. Then, their names should be var_dereference() and
> > > var_assign_pointer() or something, shouldn't they? The "rcu_" prefix
> > > and comments on rcu_dereference in include/linux/rcupdate.h sound for
> > > me that they are used for variables protected by RCU locking
> > > mechanism only...
> >
> > Well, there are 200+ uses of rcu_dereference() for RCU, so it would
> > 99.5%+ accurate to retain the "rcu_" prefix. ;-)
> >
> > Once we have several non-RCU uses, we can probably do a much better
> > job of coming up with a good name for the underlying independent-of-RCU
> > primitive. So we should stick with rcu_dereference() as the name of the
> > underlying primitive for now, and re-evaluate the naming in a year or
> > after another five non-RCU uses of rcu_dereference() appear, whichever
> > comes later. (My current guess for names are "pointer_subscribe()"
> > for rcu_dereference() and "pointer_publish()" for rcu_assign_pointer(),
> > but who knows?)
> >
> > Fair enough?
>
> Agreed.
>
> > TOMOYO can tolerate reading the complete garbage that would appear if
> > the pointer was assigned before the pointed-to fields are initialized?
> > I must confess that I am having a hard time believing that. Please
> > explain how this works.
>
> Maybe I'm misunderstanding what "mb()" can do.
>
> I inserted "mb()" between the initialization of the pointed-to field and
> the assignment of the pointer like
>
> static inline void list1_add_tail_mb(struct list1_head *new,
> struct list1_head *head)
> {
> struct list1_head *pos = head;
>
> new->next = head;
> mb(); /* Avoid out-of-order execution. */
> while (pos->next != head)
> pos = pos->next;
> pos->next = new;
> }
>
> to ensure that 'new->next' comes to point to 'head' before 'pos->next' comes to
> point to 'new', for arch/ia64/include/asm/system.h says:
>
> /*
> * Macros to force memory ordering. In these descriptions, "previous"
> * and "subsequent" refer to program order; "visible" means that all
> * architecturally visible effects of a memory access have occurred
> * (at a minimum, this means the memory has been read or written).
> *
> * wmb(): Guarantees that all preceding stores to memory-
> * like regions are visible before any subsequent
> * stores and that all following stores will be
> * visible only after all previous stores.
> * rmb(): Like wmb(), but for reads.
> * mb(): wmb()/rmb() combo, i.e., all previous memory
> * accesses are visible before all subsequent
> * accesses and vice versa. This is also known as
> * a "fence."
> *
> * Note: "mb()" and its variants cannot be used as a fence to order
> * accesses to memory mapped I/O registers. For that, mf.a needs to
> * be used. However, we don't want to always use mf.a because (a)
> * it's (presumably) much slower than mf and (b) mf.a is supported for
> * sequential memory pages only.
> */
The problem is that while wmb() and mb() do in fact order writes, they
cannot order the other task's reads. So if you do the following with
all variables initially zero:
a = 1;
smp_wmb(); /* use this instead of wmb() unless doing device I/O. */
b = 1;
That will indeed make the assignment to "a" happen before the
assignment to "b". But if the reader does the following:
tmp_b = b;
tmp_a = a;
There is nothing preventing these two reads from being reordered.
The reader might well see tmp_a==0&&tmp_b==1, which is what the
smp_wmb() was trying to prevent.
> But http://www.rdrop.com/users/paulmck/scalability/paper/ordering.2007.09.19a.pdf says:
>
> On these systems, there are three orderings that must be accounted for:
> 1. Program order:
> the order of the program's object code as seen by the CPU
> 2. Execution order:
> The execution order can differ from program order due to both compiler and
> CPU implementation optimizations.
> 3. Perceived order:
>
> And I noticed that the comment in arch/ia64/include/asm/system.h uses
> "program order", not "execution order".
> Therefore, I have to also care "execution order".
>
> I assumed that the "program order" == "execution order" and I thought:
>
> The reader will read the appended element (i.e. 'new->next') only if
> the reader can reach (i.e. 'pos->next' == 'new') the appended element.
> The reader won't read the appended element (i.e. 'new->next') as long as
> the reader can't reach (i.e. 'pos->next' == 'head') the appended element.
This assumes something called "dependency ordering". Dependency
ordering is intuitive, but unfortunately is not respected by DEC Alpha
or by some aggressive value-speculation compiler optimizations.
> However, since "execution order" != "program order", you are mentioning that
> I have to call smp_read_barrier_depends() which can control "execution order".
> Am I right?
Partly.
See the section labelled "Alpha" on Page 4 of:
http://www.rdrop.com/users/paulmck/scalability/paper/ordering.2007.09.19a.pdf
for an explanation, though http://lkml.org/lkml/2008/2/2/255
does a much better job of explaining it.
Thanx, Paul
Hello.
Paul E. McKenney wrote:
> > Maybe I'm misunderstanding what "mb()" can do.
>
> The problem is that while wmb() and mb() do in fact order writes, they
> cannot order the other task's reads.
>
I expected that "mb()" can order the other task's reads.
Now, I understood that there is no room for optimizing the reader process
by omitting smp_read_barrier_depends() on read side.
OK, let's return to http://lkml.org/lkml/2008/10/14/406 .
Below is the updated version of list1 operations.
As I now use rcu_assign_pointer() and rcu_dereference() which depend on
include/linux/rcupdate.h , I separated the code from include/linux/list.h .
Did I update correctly?
---
Subject: Singly linked list implementation.
Signed-off-by: Tetsuo Handa <[email protected]>
---
include/linux/list1.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+)
--- /dev/null
+++ linux-next/include/linux/list1.h
@@ -0,0 +1,81 @@
+#ifndef _LINUX_LIST1_H
+#define _LINUX_LIST1_H
+
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+
+/*
+ * Singly linked list implementation.
+ *
+ * This list supports only two operations.
+ * (1) Append an entry to the tail of the list.
+ * (2) Read all entries starting from the head of the list.
+ *
+ * This list is designed for holding "write once, read many" entries.
+ * This list requires no locks for read operation.
+ * This list doesn't support "remove an entry from the list" operation.
+ */
+
+/* To reduce memory usage, this list doesn't use "->prev" pointer. */
+struct list1_head {
+ struct list1_head *next;
+};
+
+#define LIST1_HEAD_INIT(name) { &(name) }
+#define LIST1_HEAD(name) struct list1_head name = LIST1_HEAD_INIT(name)
+
+static inline void INIT_LIST1_HEAD(struct list1_head *list)
+{
+ list->next = list;
+}
+
+/* Reuse list_entry because it doesn't use "->prev" pointer. */
+#define list1_entry list_entry
+
+/* Reuse list_for_each_rcu because it doesn't use "->prev" pointer. */
+#define list1_for_each list_for_each_rcu
+/* Reuse list_for_each_entry_rcu because it doesn't use "->prev" pointer. */
+#define list1_for_each_entry list_for_each_entry_rcu
+
+/**
+ * list1_for_each_cookie - iterate over a list with cookie.
+ * @pos: the &struct list1_head to use as a loop cursor.
+ * @cookie: the &struct list1_head to use as a cookie.
+ * @head: the head for your list.
+ *
+ * Same with list_for_each_rcu() except that this primitive uses @cookie
+ * so that we can continue iteration.
+ * @cookie must be NULL when iteration starts, and @cookie will become
+ * NULL when iteration finishes.
+ *
+ * Since list elements are never removed, we don't need to get a lock
+ * or a reference count.
+ */
+#define list1_for_each_cookie(pos, cookie, head) \
+ for (({ if (!cookie) \
+ cookie = head; }), \
+ pos = rcu_dereference((cookie)->next); \
+ prefetch(pos->next), pos != (head) || ((cookie) = NULL); \
+ (cookie) = pos, pos = rcu_dereference(pos->next))
+
+/**
+ * list1_add_tail - add a new entry to list1 list.
+ * @new: new entry to be added.
+ * @head: list head to add it before.
+ *
+ * Same with list_add_tail_rcu() without "->prev" pointer.
+ *
+ * Caller must hold a lock for protecting @head.
+ */
+static inline void list1_add_tail(struct list1_head *new,
+ struct list1_head *head)
+{
+ struct list1_head *prev = head;
+
+ new->next = head;
+ while (prev->next != head)
+ prev = prev->next;
+ rcu_assign_pointer(prev->next, new);
+}
+
+#endif
---
By the way, quoting from ordering.2007.09.19a.pdf :
| One could place an smp_rmb() primitive between the pointer fetch and
| dereference. However, this imposes unneeded overhead on systems (such as
| i386, IA64, PPC, and SPARC) that respect data dependencies on the read side.
| A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel
| to eliminate overhead on these systems.
In 2.4 kernels, to support Alpha architecture, people use smp_rmb() which
imposes unneeded overhead on non Alpha architecture.
In 2.6 kernels, to support Alpha architecture, people use
smp_read_barrier_depends() which does not impose unneeded overhead on
non Alpha architecture.
That's nice.
| Alpha is the only CPU where smp_read_barrier_depends() is an smp_mb() rather
| than a no-op.
I found
#define smp_read_barrier_depends() read_barrier_depends()
in arch/h8300/include/asm/system.h but couldn't find the definition of
read_barrier_depends() within that file.
I hope read_barrier_depends() is defined as a no-op by some other header files.
Regards.
On Sun, Oct 19, 2008 at 10:10:23PM +0900, Tetsuo Handa wrote:
> Hello.
>
> Paul E. McKenney wrote:
> > > Maybe I'm misunderstanding what "mb()" can do.
> >
> > The problem is that while wmb() and mb() do in fact order writes, they
> > cannot order the other task's reads.
> >
> I expected that "mb()" can order the other task's reads.
So did I, a long time ago. It took an Alpha architect more than
an hour face-to-face to convince me otherwise. ;-)
> Now, I understood that there is no room for optimizing the reader process
> by omitting smp_read_barrier_depends() on read side.
>
> OK, let's return to http://lkml.org/lkml/2008/10/14/406 .
> Below is the updated version of list1 operations.
> As I now use rcu_assign_pointer() and rcu_dereference() which depend on
> include/linux/rcupdate.h , I separated the code from include/linux/list.h .
> Did I update correctly?
>
> ---
> Subject: Singly linked list implementation.
Looks good to me!
Reviewed-by: Paul E. McKenney <[email protected]>
> Signed-off-by: Tetsuo Handa <[email protected]>
> ---
> include/linux/list1.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 81 insertions(+)
>
> --- /dev/null
> +++ linux-next/include/linux/list1.h
> @@ -0,0 +1,81 @@
> +#ifndef _LINUX_LIST1_H
> +#define _LINUX_LIST1_H
> +
> +#include <linux/list.h>
> +#include <linux/rcupdate.h>
> +
> +/*
> + * Singly linked list implementation.
> + *
> + * This list supports only two operations.
> + * (1) Append an entry to the tail of the list.
> + * (2) Read all entries starting from the head of the list.
> + *
> + * This list is designed for holding "write once, read many" entries.
> + * This list requires no locks for read operation.
> + * This list doesn't support "remove an entry from the list" operation.
> + */
> +
> +/* To reduce memory usage, this list doesn't use "->prev" pointer. */
> +struct list1_head {
> + struct list1_head *next;
> +};
> +
> +#define LIST1_HEAD_INIT(name) { &(name) }
> +#define LIST1_HEAD(name) struct list1_head name = LIST1_HEAD_INIT(name)
> +
> +static inline void INIT_LIST1_HEAD(struct list1_head *list)
> +{
> + list->next = list;
> +}
> +
> +/* Reuse list_entry because it doesn't use "->prev" pointer. */
> +#define list1_entry list_entry
> +
> +/* Reuse list_for_each_rcu because it doesn't use "->prev" pointer. */
> +#define list1_for_each list_for_each_rcu
> +/* Reuse list_for_each_entry_rcu because it doesn't use "->prev" pointer. */
> +#define list1_for_each_entry list_for_each_entry_rcu
> +
> +/**
> + * list1_for_each_cookie - iterate over a list with cookie.
> + * @pos: the &struct list1_head to use as a loop cursor.
> + * @cookie: the &struct list1_head to use as a cookie.
> + * @head: the head for your list.
> + *
> + * Same with list_for_each_rcu() except that this primitive uses @cookie
> + * so that we can continue iteration.
> + * @cookie must be NULL when iteration starts, and @cookie will become
> + * NULL when iteration finishes.
> + *
> + * Since list elements are never removed, we don't need to get a lock
> + * or a reference count.
> + */
> +#define list1_for_each_cookie(pos, cookie, head) \
> + for (({ if (!cookie) \
> + cookie = head; }), \
> + pos = rcu_dereference((cookie)->next); \
> + prefetch(pos->next), pos != (head) || ((cookie) = NULL); \
> + (cookie) = pos, pos = rcu_dereference(pos->next))
> +
> +/**
> + * list1_add_tail - add a new entry to list1 list.
> + * @new: new entry to be added.
> + * @head: list head to add it before.
> + *
> + * Same with list_add_tail_rcu() without "->prev" pointer.
> + *
> + * Caller must hold a lock for protecting @head.
> + */
> +static inline void list1_add_tail(struct list1_head *new,
> + struct list1_head *head)
> +{
> + struct list1_head *prev = head;
> +
> + new->next = head;
> + while (prev->next != head)
> + prev = prev->next;
> + rcu_assign_pointer(prev->next, new);
> +}
> +
> +#endif
> ---
>
> By the way, quoting from ordering.2007.09.19a.pdf :
>
> | One could place an smp_rmb() primitive between the pointer fetch and
> | dereference. However, this imposes unneeded overhead on systems (such as
> | i386, IA64, PPC, and SPARC) that respect data dependencies on the read side.
> | A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel
> | to eliminate overhead on these systems.
>
> In 2.4 kernels, to support Alpha architecture, people use smp_rmb() which
> imposes unneeded overhead on non Alpha architecture.
> In 2.6 kernels, to support Alpha architecture, people use
> smp_read_barrier_depends() which does not impose unneeded overhead on
> non Alpha architecture.
> That's nice.
>
> | Alpha is the only CPU where smp_read_barrier_depends() is an smp_mb() rather
> | than a no-op.
>
> I found
>
> #define smp_read_barrier_depends() read_barrier_depends()
>
> in arch/h8300/include/asm/system.h but couldn't find the definition of
> read_barrier_depends() within that file.
> I hope read_barrier_depends() is defined as a no-op by some other header files.
>
> Regards.