LinuxLists.cc - [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

[permalink] [raw]

Subject: [PATCH 3/4] Auditing infrastructure

This is used throughout the code. Included for completeness, undergoing
separate review on linux-audit.

---
include/linux/audit.h | 38 +++++++++++++
kernel/auditfilter.c | 2
kernel/auditsc.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 176 insertions(+)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3c7a358..8faa4e0 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -122,6 +122,9 @@
#define AUDIT_MAC_UNLBL_STCADD 1416 /* NetLabel: add a static label */
#define AUDIT_MAC_UNLBL_STCDEL 1417 /* NetLabel: del a static label */

+#define AUDIT_CRYPTO_STORAGE_KEY 1600 /* Key storage key configured */
+#define AUDIT_CRYPTO_USERSPACE_OP 1601 /* User-space crypto operation */
+
#define AUDIT_FIRST_KERN_ANOM_MSG 1700
#define AUDIT_LAST_KERN_ANOM_MSG 1799
#define AUDIT_ANOM_PROMISCUOUS 1700 /* Device changed promiscuous mode */
@@ -207,6 +210,7 @@
#define AUDIT_OBJ_TYPE 21
#define AUDIT_OBJ_LEV_LOW 22
#define AUDIT_OBJ_LEV_HIGH 23
+#define AUDIT_CRYPTO_OP 24

/* These are ONLY useful when checking
* at syscall exit time (AUDIT_AT_EXIT). */
@@ -314,6 +318,20 @@ enum {
#define AUDIT_PERM_READ 4
#define AUDIT_PERM_ATTR 8

+#define AUDIT_CRYPTO_OP_CONTEXT_NEW 1
+#define AUDIT_CRYPTO_OP_CONTEXT_DEL 2
+#define AUDIT_CRYPTO_OP_SESSION_INIT 3
+#define AUDIT_CRYPTO_OP_SESSION_OP 4
+#define AUDIT_CRYPTO_OP_SESSION_FINAL 5
+#define AUDIT_CRYPTO_OP_KEY_IMPORT 6
+#define AUDIT_CRYPTO_OP_KEY_EXPORT 7
+#define AUDIT_CRYPTO_OP_KEY_WRAP 8
+#define AUDIT_CRYPTO_OP_KEY_UNWRAP 9
+#define AUDIT_CRYPTO_OP_KEY_GEN 10
+#define AUDIT_CRYPTO_OP_KEY_DERIVE 11
+#define AUDIT_CRYPTO_OP_KEY_ZEROIZE 12
+#define AUDIT_CRYPTO_OP_KEY_GET_INFO 13
+
struct audit_status {
__u32 mask; /* Bit mask for valid entries */
__u32 enabled; /* 1 = enabled, 0 = disabled */
@@ -479,6 +497,10 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
const struct cred *new,
const struct cred *old);
extern void __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
+extern int __audit_log_crypto_op(int op, int context, int session,
+ const char *operation, const char *algorithm,
+ int key1, void *key1_id, size_t key1_id_size,
+ int key2, void *key2_id, size_t key2_id_size);

static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{
@@ -532,6 +554,21 @@ static inline void audit_log_capset(pid_t pid, const struct cred *new,
__audit_log_capset(pid, new, old);
}

+static inline int audit_log_crypto_op(int op, int context, int session,
+ const char *operation,
+ const char *algorithm, int key1,
+ void *key1_id, size_t key1_id_size,
+ int key2, void *key2_id,
+ size_t key2_id_size)
+{
+ if (unlikely(!audit_dummy_context()))
+ return __audit_log_crypto_op(op, context, session, operation,
+ algorithm, key1, key1_id,
+ key1_id_size, key2, key2_id,
+ key2_id_size);
+ return 0;
+}
+
extern int audit_n_rules;
extern int audit_signals;
#else
@@ -565,6 +602,7 @@ extern int audit_signals;
#define audit_mq_getsetattr(d,s) ((void)0)
#define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
#define audit_log_capset(pid, ncr, ocr) ((void)0)
+#define audit_log_crypto_op(op, context, session, key1, key1_id, key1_id_size, key2, key2_id, key2_id_size) (0)
#define audit_ptrace(t) ((void)0)
#define audit_n_rules 0
#define audit_signals 0
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a706040..a25a587 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -363,6 +363,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
case AUDIT_DEVMINOR:
case AUDIT_EXIT:
case AUDIT_SUCCESS:
+ case AUDIT_CRYPTO_OP:
/* bit ops are only useful on syscall args */
if (f->op == Audit_bitmask || f->op == Audit_bittest)
goto exit_free;
@@ -457,6 +458,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_ARG1:
case AUDIT_ARG2:
case AUDIT_ARG3:
+ case AUDIT_CRYPTO_OP:
break;
case AUDIT_ARCH:
entry->rule.arch_f = f;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fc0f928..47c1cc4 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -50,6 +50,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mount.h>
+#include <linux/ncr.h>
#include <linux/socket.h>
#include <linux/mqueue.h>
#include <linux/audit.h>
@@ -157,6 +158,21 @@ struct audit_aux_data_capset {
struct audit_cap_data cap;
};

+struct audit_crypto_op {
+ struct list_head list;
+ int op;
+ int context;
+ int session;
+ const char *operation;
+ const char *algorithm;
+ int key1;
+ unsigned char key1_id[MAX_KEY_ID_SIZE];
+ size_t key1_id_size;
+ int key2;
+ unsigned char key2_id[MAX_KEY_ID_SIZE];
+ size_t key2_id_size;
+};
+
struct audit_tree_refs {
struct audit_tree_refs *next;
struct audit_chunk *c[31];
@@ -181,6 +197,7 @@ struct audit_context {
struct audit_context *previous; /* For nested syscalls */
struct audit_aux_data *aux;
struct audit_aux_data *aux_pids;
+ struct list_head crypto;
struct sockaddr_storage *sockaddr;
size_t sockaddr_len;
/* Save things to print about task_struct */
@@ -632,6 +649,18 @@ static int audit_filter_rules(struct task_struct *tsk,
case AUDIT_FILETYPE:
result = audit_match_filetype(ctx, f->val);
break;
+ case AUDIT_CRYPTO_OP:
+ if (ctx) {
+ struct audit_crypto_op *ax;
+
+ list_for_each_entry(ax, &ctx->crypto, list) {
+ result = audit_comparator(ax->op, f->op,
+ f->val);
+ if (result)
+ break;
+ }
+ }
+ break;
}

if (!result) {
@@ -827,6 +856,7 @@ static inline void audit_free_names(struct audit_context *context)
static inline void audit_free_aux(struct audit_context *context)
{
struct audit_aux_data *aux;
+ struct audit_crypto_op *crypto, *tmp;

while ((aux = context->aux)) {
context->aux = aux->next;
@@ -836,6 +866,10 @@ static inline void audit_free_aux(struct audit_context *context)
context->aux_pids = aux->next;
kfree(aux);
}
+ list_for_each_entry_safe(crypto, tmp, &context->crypto, list) {
+ list_del(&crypto->list);
+ kfree(crypto);
+ }
}

static inline void audit_zero_context(struct audit_context *context,
@@ -853,6 +887,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
if (!(context = kmalloc(sizeof(*context), GFP_KERNEL)))
return NULL;
audit_zero_context(context, state);
+ INIT_LIST_HEAD(&context->crypto);
INIT_LIST_HEAD(&context->killed_trees);
return context;
}
@@ -1316,6 +1351,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
int i, call_panic = 0;
struct audit_buffer *ab;
struct audit_aux_data *aux;
+ struct audit_crypto_op *crypto;
const char *tty;

/* tsk == current */
@@ -1442,6 +1478,58 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
call_panic = 1;
}

+ list_for_each_entry(crypto, &context->crypto, list) {
+ static const char *const ops[] = {
+ [AUDIT_CRYPTO_OP_CONTEXT_NEW] = "context_new",
+ [AUDIT_CRYPTO_OP_CONTEXT_DEL] = "context_del",
+ [AUDIT_CRYPTO_OP_SESSION_INIT] = "session_init",
+ [AUDIT_CRYPTO_OP_SESSION_OP] = "session_op",
+ [AUDIT_CRYPTO_OP_SESSION_FINAL] = "session_final",
+ [AUDIT_CRYPTO_OP_KEY_IMPORT] = "key_import",
+ [AUDIT_CRYPTO_OP_KEY_EXPORT] = "key_export",
+ [AUDIT_CRYPTO_OP_KEY_WRAP] = "key_wrap",
+ [AUDIT_CRYPTO_OP_KEY_UNWRAP] = "key_unwrap",
+ [AUDIT_CRYPTO_OP_KEY_GEN] = "key_gen",
+ [AUDIT_CRYPTO_OP_KEY_DERIVE] = "key_derive",
+ [AUDIT_CRYPTO_OP_KEY_ZEROIZE] = "key_zeroize",
+ [AUDIT_CRYPTO_OP_KEY_GET_INFO] = "key_get_info",
+ };
+
+ ab = audit_log_start(context, GFP_KERNEL,
+ AUDIT_CRYPTO_USERSPACE_OP);
+ if (!ab)
+ continue;
+ if (crypto->op < ARRAY_SIZE(ops) && ops[crypto->op] != NULL)
+ audit_log_format(ab, "crypto_op=%s", ops[crypto->op]);
+ else
+ audit_log_format(ab, "crypto_op=%d", crypto->op);
+ audit_log_format(ab, " ctx=%d", crypto->context);
+ if (crypto->session != -1)
+ audit_log_format(ab, " session=%d", crypto->session);
+ if (crypto->operation != NULL)
+ audit_log_format(ab, " operation=%s",
+ crypto->operation);
+ if (crypto->algorithm != NULL)
+ audit_log_format(ab, " algo=%s", crypto->algorithm);
+ if (crypto->key1 != -1) {
+ audit_log_format(ab, " key1=%d", crypto->key1);
+ if (crypto->key1_id_size > 0) {
+ audit_log_format(ab, " key1_id=");
+ audit_log_n_untrustedstring(ab, crypto->key1_id,
+ crypto->key1_id_size);
+ }
+ }
+ if (crypto->key2 != -1) {
+ audit_log_format(ab, " key2=%d", crypto->key2);
+ if (crypto->key2_id_size > 0) {
+ audit_log_format(ab, " key2_id=");
+ audit_log_n_untrustedstring(ab, crypto->key2_id,
+ crypto->key2_id_size);
+ }
+ }
+ audit_log_end(ab);
+ }
+
if (context->target_pid &&
audit_log_pid_context(context, context->target_pid,
context->target_auid, context->target_uid,
@@ -2486,6 +2574,54 @@ void __audit_log_capset(pid_t pid,
}

/**
+ * __audit_log_crypto_op - store information about an user-space crypto op
+ * @op: AUDIT_CRYPTO_OP_*
+ * @context: user-space context ID
+ * @session: session ID within @context, or -1
+ * @operation: more detailed operation description, or NULL
+ * @algorithm: algorithm (crypto API transform) name, or NULL
+ * @key1: ID of key 1 within @context, or -1
+ * @key1_id: user-space ID of key 1 set from user-space if @key1 != -1
+ * @key1_id_size: Size of @key1_id
+ * @key2: ID of key 2 within @context, or -1
+ * @key2_id: user-space ID of key 2 set from user-space if @key2 != -1
+ * @key2_id_size: Size of @key2_id
+ */
+int __audit_log_crypto_op(int op, int context, int session,
+ const char *operation, const char *algorithm,
+ int key1, void *key1_id, size_t key1_id_size,
+ int key2, void *key2_id, size_t key2_id_size)
+{
+ struct audit_crypto_op *ax;
+ struct audit_context *ctx = current->audit_context;
+
+ ax = kmalloc(sizeof(*ax), GFP_KERNEL);
+ if (!ax)
+ return -ENOMEM;
+
+ ax->op = op;
+ ax->context = context;
+ ax->session = session;
+ ax->operation = operation;
+ ax->algorithm = algorithm;
+ ax->key1 = key1;
+ if (key1 != -1) {
+ ax->key1_id_size = min(key1_id_size, sizeof(ax->key1_id));
+ memcpy(ax->key1_id, key1_id, ax->key1_id_size);
+ } else
+ ax->key1_id_size = 0;
+ ax->key2 = key2;
+ if (key2 != -1) {
+ ax->key2_id_size = min(key2_id_size, sizeof(ax->key2_id));
+ memcpy(ax->key2_id, key2_id, ax->key2_id_size);
+ } else
+ ax->key2_id_size = 0;
+ list_add_tail(&ax->list, &ctx->crypto);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__audit_log_crypto_op);
+
+/**
* audit_core_dumps - record information about processes that end abnormally
* @signr: signal value
*

2010-08-05 20:18:20

[permalink] [raw]

Subject: [PATCH 1/4] User-space API definition

This patch introduces the new user-space API, <ncr.h>.

Quick overview:

* open("/dev/crypto") to get a FD, which acts as a namespace for key and
session identifiers.

* ioctl(NCRIO_KEY_INIT) to allocate a key object; then generate the key
material inside the kernel, load a plaintext key, unwrap a key, or derive a
key. Similarly the key material can be copied out of the kernel or wrapped.

* ioctl(NCRIO_SESSION_INIT) to allocate a crypto session (to encrypt, decrypt,
hash, sign, or verify signature), the ioctl(NCRIO_SESSION_UPDATE) to act
on chunks of data. Deallocate the session, and optionally retrieve session
results (e.g. hash or signature), using ioctl(NCRIO_SESSION_FINAL).

There is also NCRIO_SESSION_ONCE for an one-shot crypto operation using
a single user->kernel context switch.

Full documentation of the interface follows.

NAME
/dev/crypto - kernel cryptographic module interface

SYNOPSIS
#include <ncr.h>
int fd = open("/dev/crypto", O_RDWR);
int res = ioctl(fd, NCRIO..., &data);

DESCRIPTION
The /dev/crypto device file provides an ioctl(2) interface to the ker-
nel-space crypto implementation.

Each open(2) of the /dev/crypto file establishes a separate namespace
within which crypto operations work. The namespace can be shared
across threads and processes by sharing the open file description.
Last close of the open file description automatically destroys all
objects allocated within the namespace.

All ioctl(2)s have the same form: The user sets up a data structure
with input data, and passes a pointer to the data structure as the
third parameter to ioctl(2). On success, output data is available in
the same structure.

The following operations are defined:

NCRIO_KEY_INIT
Allocate a kernel-space key object. The parameter is not used
on input (key attributes are set later, when the key material is
initialized). On success, an integer descriptor for the key
object (valid within the current /dev/crypto namespace) is
stored in the provided area.

There is a per-process and per-user (not per-namespace) limit on
the number key objects that can be allocated.

NCRIO_KEY_DEINIT
Deallocate a kernel-space key object. The parameter specifies
the integer descriptor of the key object. After all other oper-
ations using this key object (if any) terminate, the key mate-
rial will be cleared and the object will be freed. Note that
this may happen both before this operation returns, and after it
returns, depending on other references to this key object.

NCRIO_KEY_GENERATE
Clear existing key material in the specified key object, and
generate new key material.

The parameter points to struct ncr_key_generate_st, which speci-
fies:

desc The key descriptor

params.algorithm
The crypto algorithm with which the key will be used

params.keyflags
Key flags, a combination of NCR_KEY_FLAG_EXPORTABLE (the
key material can be exported in plaintext to user space)
and NCR_KEY_FLAG_WRAPPABLE (the key material can be
wrapped and the result made available to user space).

params.params
Algorithm-specific key parameters: For symmetric keys,
key length in bits. For RSA keys, the public exponent
and modulus length in bits. For DSA keys, p and q length
in bits. For DH keys, the prime and group generator.

Currently only symmetric keys can be generated using this opera-
tion.

In addition to generating the key material, the "persistent" key
ID is reset to a random value.

NCRIO_KEY_GENERATE_PAIR
Similar to NCRIO_KEY_GENERATE, except that a pair of public/pri-
vate keys is generated.

The parameter points to struct ncr_key_generate_st as specified
above, with the additional member desc2 used to specify the key
descriptor for the public key.

The NCR_KEY_FLAG_EXPORTABLE and NCR_KEY_FLAG_WRAPPABLE flags are
automatically set on the public key.

NCRIO_KEY_DERIVE
Derive a new key using one key and additional data.

The parameter points to struct ncr_key_derivation_params_st,
which specifies:

derive The derivation algorithm. Currently only NCR_DERIVE_DH
is supported.

newkey The descriptor of the resulting key

keyflags
Flags to use for the resulting key

key The source key descriptor

params Key type-specific parameters. For NCR_DERIVE_DH,
params.params.dh.pub and params.params.dh.pub_size spec-
ify the peer's public key.

NCRIO_KEY_EXPORT

Export key material in the specified key object to user space.
Only keys with the NCR_KEY_FLAG_EXPORTABLE flag can be exported
using this operation.

The parameter points to struct ncr_key_data_st, which specifies:

key The key descriptor

idata Destination buffer

idata_size
Buffer size

Symmetric keys are written directly into the destination buffer.
Public and private keys are formatted using ASN.1, except for DH
public keys, which are written a raw binary number.

On success, the idata_size member is set to the size of the
exported key.

NCRIO_KEY_IMPORT
Clear existing key material in the specified key object, and
import key material from user space.

The parameter points to struct ncr_key_data_st, which specifies:

key The key descriptor

idata Source data

idata_size
Source data size

key_id New "persistent" key ID.

key_id_size
Size of data in key_id.

type Key type, one of NCR_KEY_TYPE_SECRET, NCR_KEY_TYPE_PUBLIC
and NCR_KEY_TYPE_PRIVATE.

algorithm
The crypto algorithm with which the key will be used

flags Key flags

The data format is the same as in the NCRIO_KEY_EXPORT opera-
tion.

NCRIO_KEY_GET_INFO
Get metadata of an existing key.

The parameter points to struct ncr_key_info_st, which specifies
key, the key descriptor. On success, the following members are
set:

flags Key flags

type Key type

algorithm
Key algorithm

NCRIO_KEY_WRAP
Wrap one key using another, and write the result to user space.
Only keys with the NCR_KEY_FLAG_WRAPPABLE flag can be wrapped
using this operation.

The parameter points to struct ncr_key_wrap_st, which specifies:

algorithm
The wrapping algorithm to use, one of
NCR_WALG_AES_RFC3394 and NCR_WALG_AES_RFC5649.

keytowrap
The descriptor of the key to wrap

key The descriptor of the key used for wrapping

params Key type-specific parameters. For the currently sup-
ported wrapping algorithms, params.params.cipher.iv and
params.params.cipher.iv_size specify the IV.

io Destination buffer

io_size
Size of the destination buffer

Currently only secret keys can be wrapped, using one of the
above-mentioned AES-based algorithms.

On success, the io_size member is set to the size of the wrapped
key.

NCRIO_KEY_UNWRAP
Unwrap user-space data into a kernel-space key using another
key.

The parameter points to struct ncr_key_wrap_st, which specifies:

algorithm
The wrapping algorithm to use.

keytowrap
The descriptor of the target key object

key The descriptor of the key used for wrapping

params Key type-specific parameters. For the currently sup-
ported wrapping algorithms, params.params.cipher.iv and
params.params.cipher.iv_size specify the IV.

io Pointer to the wrapped key

io_size
Size of the wrapped key

The unwrapped key will have the NCR_KEY_FLAG_WRAPPABLE flag set,
and the NCR_KEY_FLAG_EXPORTABLE flag clear.

NCRIO_KEY_STORAGE_WRAP
Wrap a key object and associated metadata using the system-wide
storage master key, and write the result to user space.

Only keys with the NCR_KEY_FLAG_WRAPPABLE flag can be wrapped
using this operation.

The parameter points to struct ncr_key_storage_wrap_st, which
specifies:

keytowrap
The descriptor of the key to wrap

io Destination buffer

io_size
Size of the destination buffer

On success, the io_size member is set to the size of the wrapped
key.

Both symmetric and asymmetric keys can be wrapped using this
operation. The wrapped data includes the following information
in addition to the raw key material:

o Key type

o Key flags

o Key algorithm

o "Persistent" key ID.

NCRIO_KEY_STORAGE_UNWRAP
Unwrap key and associated metadata created using NCRIO_KEY_STOR-
AGE_WRAP, and restore the information into a specified key
object.

The parameter points to struct ncr_key_storage_wrap_st, which
specifies:

keytowrap
The target key descriptor

io Wrapped data

io_size
Size of the wrapped data

See NCRIO_KEY_STORAGE_WRAP above for the list of attributes that
will be restored.

NCRIO_SESSION_INIT
Allocate a session for performing crypto operations.

The parameter points to struct ncr_session_st, which specifies:

algorithm
The crypto algorithm to use.

key The key to use for the operation, if required.

params Parameters for the operation. For symmetric ciphers, the
IV. For RSA operations, the format, used hash algorithms
and PSS salt length. for DSA, the signature hash algo-
rithm.

op The operation to perform, one of NCR_OP_ENCRYPT,
NCR_OP_DECRYPT, NCR_OP_SIGN and NCR_OP_VERIFY. Use
NCR_OP_SIGN for computing an unkeyed hash as well as
keyed hashes and signatures.

On success, an integer descriptor for the created session (valid
within the current /dev/crypto namespace) is stored into the ses
member.

NCRIO_SESSION_UPDATE
Update an existing crypto session with new data (for operations,
such as hashing, for which data can be supplied in pieces), or
perform a single operation using the session context (for opera-
tions, such as public key encryption, that work on separate
units of data).

The parameter points to struct ncr_session_op_st, which speci-
fies:

ses The integer descriptor of the session.

type Type of the data references used for this operation,
either NCR_KEY_DATA or NCR_DIRECT_DATA.

data.udata.input, data.udata.input_size
If type == NCR_DIRECT_DATA, input data for the operation.

data.kdata.input
If type == NCR_KEY_DATA, integer key descriptor serving
as input for the operation. This can be currently used
only to compute or verify a signature or hash of a sym-
metric key: the keying material is directly used as input
data for the underlying hash.

data.udata.output, data,udata.output_size
If type == NCR_DIRECT_DATA, output buffer for the opera-
tion.

data.kdata.output, data,kdata.output_size
If type == NCR_KEY_DATA, output buffer for the operation.

For the NCR_OP_ENCRYPT and NCR_OP_DECRYPT operations using sym-
metric ciphers, the operation is performed on the input data,
resulting in an output data block of the same size; for opera-
tions using public-key cryptography, a single operation is per-
formed on the input data, resulting in output data. In both
cases, the relevant output_data member is set to the size of
valid output data on success.

For the NCR_OP_SIGN and NCR_OP_VERIFY operations, the input data
is supplied to the underlying hash function; no output data is
produced.

NCRIO_SESSION_FINAL
Finalize an existing crypto session and deallocate it.

The parameter points to struct ncr_session_op_st, as described
in the NCRIO_SESSION_UPDATE section above. If the parameter
specifies valid input data, it is processed as if using
NCRIO_SESSION_UPDATE; thus, the last update operation can be
performed together with the finalization in one step.

There is no specific finalization operation performed for
NCR_OP_ENCRYPT and NCR_OP_DECRYPT.

For the NCR_OP_SIGN operation, the signature is created and
written as output data.

For the NCR_OP_VERIFY operation, a signature specified as input
using the output data fields is verified; the result of this
operation (NCR_SUCCESS or NCR_VERIFICATION_FAILED) will be
stored into the err member. (Note that the ioctl(2) operation
will indicate success even if the signature verification fails,
as long all inputs were specified correctly.)

The session will be deallocated even if the NCRIO_SESSION_FINAL
operation reports an error, as long as valid session descriptor
was specified.

NCRIO_SESSION_ONCE
Perform an one-shot crypto operation, allocating a temporary
session, supplying a single instance of data, and finalizing the
session in one operation.

The parameter points to struct ncr_session_once_op_st, which
includes arguments for one NCRIO_SESSION_INIT and one NCRIO_SES-
SION_FINAL operation. The ses member for the NCRIO_SES-
SION_FINAL sub-operation is ignored, the sub-operation automati-
cally uses the temporary session.

NCRIO_MASTER_KEY_SET
Set the system-wide storage master key. Only a process with
EUID 0 and the CAP_SYS_ADMIN capability is allowed to perform
this operation. Once a master key is set, it can be changed
only by rebooting the system and setting a different key.

The parameter points to struct ncr_master_key_st, which speci-
fies:

key Pointer to the key material in user space.

key_size
Size of the key material in bytes.

Currently only an AES key with size 16, 24, or 32 bytes is acceptable.

CONFIGURATION
The NCRIO_KEY_STORAGE_WRAP and NCRIO_KEY_STORAGE_UNWRAP ioctl()s work
only after a storage master key is configured by the system administra-
tor. See NCRIO_MASTER_KEY_SET above.

---
Kbuild | 2
cryptodev.h | 64 ++++++++++++
ncr.h | 312 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 378 insertions(+)

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 756f831..f35589a 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -51,6 +51,7 @@ header-y += comstats.h
header-y += const.h
header-y += cgroupstats.h
header-y += cramfs_fs.h
+header-y += cryptodev.h
header-y += cycx_cfm.h
header-y += dcbnl.h
header-y += dlmconstants.h
@@ -116,6 +117,7 @@ header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
header-y += ncp_no.h
+header-y += ncr.h
header-y += neighbour.h
header-y += net_dropmon.h
header-y += net_tstamp.h
diff --git a/include/linux/cryptodev.h b/include/linux/cryptodev.h
new file mode 100644
index 0000000..97507d8
--- /dev/null
+++ b/include/linux/cryptodev.h
@@ -0,0 +1,64 @@
+/* This is a source compatible implementation with the original API of
+ * cryptodev by Angelos D. Keromytis, found at openbsd cryptodev.h.
+ * Placed under public domain */
+
+#ifndef _LINUX_CRYPTODEV_H
+#define _LINUX_CRYPTODEV_H
+
+/* API extensions for linux */
+#define CRYPTO_HMAC_MAX_KEY_LEN 512
+#define CRYPTO_CIPHER_MAX_KEY_LEN 64
+
+/* All the supported algorithms
+ */
+typedef enum {
+ CRYPTO_DES_CBC=1,
+ CRYPTO_3DES_CBC=2,
+ CRYPTO_BLF_CBC=3,
+ CRYPTO_CAST_CBC=4,
+ CRYPTO_SKIPJACK_CBC=5,
+ CRYPTO_MD5_HMAC=6,
+ CRYPTO_SHA1_HMAC=7,
+ CRYPTO_RIPEMD160_HMAC=8,
+ CRYPTO_MD5_KPDK=9,
+ CRYPTO_SHA1_KPDK=10,
+ CRYPTO_RIJNDAEL128_CBC=11,
+ CRYPTO_AES_CBC=CRYPTO_RIJNDAEL128_CBC,
+ CRYPTO_ARC4=12,
+ CRYPTO_MD5=13,
+ CRYPTO_SHA1=14,
+ CRYPTO_DEFLATE_COMP=15,
+ CRYPTO_NULL=16,
+ CRYPTO_LZS_COMP=17,
+ CRYPTO_SHA2_256_HMAC=18,
+ CRYPTO_SHA2_384_HMAC=19,
+ CRYPTO_SHA2_512_HMAC=20,
+ CRYPTO_AES_CTR=21,
+ CRYPTO_AES_XTS=22,
+
+ CRYPTO_CAMELLIA_CBC=101,
+ CRYPTO_RIPEMD160,
+ CRYPTO_SHA2_256,
+ CRYPTO_SHA2_384,
+ CRYPTO_SHA2_512,
+ CRYPTO_ALGORITHM_ALL, /* Keep updated - see below */
+} cryptodev_crypto_op_t;
+#define CRYPTO_ALGORITHM_MAX (CRYPTO_ALGORITHM_ALL - 1)
+
+/* Values for ciphers */
+#define DES_BLOCK_LEN 8
+#define DES3_BLOCK_LEN 8
+#define RIJNDAEL128_BLOCK_LEN 16
+#define AES_BLOCK_LEN RIJNDAEL128_BLOCK_LEN
+#define CAMELLIA_BLOCK_LEN
+#define BLOWFISH_BLOCK_LEN 8
+#define SKIPJACK_BLOCK_LEN 8
+#define CAST128_BLOCK_LEN 8
+
+/* the maximum of the above */
+#define EALG_MAX_BLOCK_LEN 16
+
+/* Values for hashes/MAC */
+#define AALG_MAX_RESULT_LEN 64
+
+#endif /* _LINUX_CRYPTODEV_H */
diff --git a/include/linux/ncr.h b/include/linux/ncr.h
new file mode 100644
index 0000000..06f34e2
--- /dev/null
+++ b/include/linux/ncr.h
@@ -0,0 +1,312 @@
+#ifndef _LINUX_NCR_H
+#define _LINUX_NCR_H
+
+#include <linux/types.h>
+
+#define NCR_CIPHER_MAX_BLOCK_LEN 32
+#define NCR_HASH_MAX_OUTPUT_SIZE 64
+
+typedef enum {
+ NCR_ALG_NONE,
+ NCR_ALG_NULL,
+ NCR_ALG_3DES_CBC,
+ NCR_ALG_AES_CBC,
+ NCR_ALG_CAMELLIA_CBC,
+ NCR_ALG_ARCFOUR,
+ NCR_ALG_AES_ECB,
+ NCR_ALG_CAMELLIA_ECB,
+ NCR_ALG_AES_CTR,
+ NCR_ALG_CAMELLIA_CTR,
+
+ NCR_ALG_SHA1=40,
+ NCR_ALG_MD5,
+ NCR_ALG_SHA2_224,
+ NCR_ALG_SHA2_256,
+ NCR_ALG_SHA2_384,
+ NCR_ALG_SHA2_512,
+
+ NCR_ALG_HMAC_SHA1=80,
+ NCR_ALG_HMAC_MD5,
+ NCR_ALG_HMAC_SHA2_224,
+ NCR_ALG_HMAC_SHA2_256,
+ NCR_ALG_HMAC_SHA2_384,
+ NCR_ALG_HMAC_SHA2_512,
+
+ NCR_ALG_RSA=140,
+ NCR_ALG_DSA,
+ NCR_ALG_DH, /* DH as in PKCS #3 */
+} ncr_algorithm_t;
+
+
+
+typedef enum {
+ NCR_WALG_AES_RFC3394, /* for secret keys only */
+ NCR_WALG_AES_RFC5649, /* can wrap arbitrary key */
+} ncr_wrap_algorithm_t;
+
+typedef enum {
+ NCR_KEY_TYPE_INVALID,
+ NCR_KEY_TYPE_SECRET=1,
+ NCR_KEY_TYPE_PUBLIC=2,
+ NCR_KEY_TYPE_PRIVATE=3,
+} ncr_key_type_t;
+
+/* Key handling
+ */
+
+typedef int ncr_key_t;
+
+#define NCR_KEY_INVALID ((ncr_key_t)-1)
+
+#define NCR_KEY_FLAG_EXPORTABLE 1
+#define NCR_KEY_FLAG_WRAPPABLE (1<<1)
+/* when generating a pair the flags correspond to private
+ * and public key usage is implicit. For example when private
+ * key can decrypt then public key can encrypt. If private key
+ * can sign then public key can verify.
+ */
+#define NCR_KEY_FLAG_DECRYPT (1<<2)
+#define NCR_KEY_FLAG_SIGN (1<<3)
+
+struct ncr_key_generate_params_st {
+ ncr_algorithm_t algorithm; /* just a cipher algorithm when
+ * generating secret keys
+ */
+
+ unsigned int keyflags;
+ union {
+ struct {
+ unsigned int bits;
+ } secret;
+ struct {
+ unsigned int bits;
+ unsigned long e; /* use zero for default */
+ } rsa;
+ struct {
+ /* For DSS standard allowed values
+ * are: p:1024 q: 160
+ * p:2048 q: 224
+ * p:2048 q: 256
+ * p:3072 q: 256
+ */
+ unsigned int p_bits;
+ unsigned int q_bits;
+ } dsa;
+ struct {
+ uint8_t __user *p; /* prime */
+ size_t p_size;
+ uint8_t __user *g; /* generator */
+ size_t g_size;
+ } dh;
+ } params;
+};
+
+/* used in generation
+ */
+struct ncr_key_generate_st {
+ ncr_key_t desc;
+ ncr_key_t desc2; /* public key when called with GENERATE_PAIR */
+ struct ncr_key_generate_params_st params;
+};
+
+typedef enum {
+ RSA_PKCS1_V1_5, /* both signatures and encryption */
+ RSA_PKCS1_OAEP, /* for encryption only */
+ RSA_PKCS1_PSS, /* for signatures only */
+} ncr_rsa_type_t;
+
+/* used in derivation/encryption
+ */
+struct ncr_key_params_st {
+ /* this structure always corresponds to a key. Hence the
+ * parameters of the union selected are based on the corresponding
+ * key */
+ union {
+ struct {
+ uint8_t iv[NCR_CIPHER_MAX_BLOCK_LEN];
+ size_t iv_size;
+ } cipher;
+ struct {
+ uint8_t __user *pub;
+ size_t pub_size;
+ } dh;
+ struct {
+ ncr_rsa_type_t type;
+ ncr_algorithm_t oaep_hash; /* for OAEP */
+ ncr_algorithm_t sign_hash; /* for signatures */
+ unsigned int pss_salt; /* PSS signatures */
+ } rsa;
+ struct {
+ ncr_algorithm_t sign_hash; /* for signatures */
+ } dsa;
+ } params;
+};
+
+typedef enum {
+ NCR_DERIVE_DH=1,
+} ncr_derive_t;
+
+struct ncr_key_derivation_params_st {
+ ncr_derive_t derive; /* the derivation algorithm */
+
+ ncr_key_t newkey;
+ unsigned int keyflags; /* for new key */
+
+ ncr_key_t key;
+ struct ncr_key_params_st params;
+};
+
+#define MAX_KEY_ID_SIZE 20
+
+struct ncr_key_info_st {
+ ncr_key_t key; /* input */
+
+ unsigned int flags;
+ ncr_key_type_t type;
+ ncr_algorithm_t algorithm; /* valid for public/private keys */
+
+ uint8_t key_id[MAX_KEY_ID_SIZE];
+ size_t key_id_size;
+};
+
+struct ncr_key_data_st {
+ ncr_key_t key;
+
+ void __user *idata;
+ size_t idata_size; /* rw in get */
+
+ /* in case of import this will be used as key id */
+ uint8_t key_id[MAX_KEY_ID_SIZE];
+ size_t key_id_size;
+ ncr_key_type_t type;
+ unsigned int flags;
+ ncr_algorithm_t algorithm; /* valid for public/private keys */
+};
+
+#define NCRIO_KEY_INIT _IOW ('c', 204, ncr_key_t)
+/* generate a secret key */
+#define NCRIO_KEY_GENERATE _IOR ('c', 205, struct ncr_key_generate_st)
+/* generate a public key pair */
+#define NCRIO_KEY_GENERATE_PAIR _IOR ('c', 206, struct ncr_key_generate_st)
+/* derive a new key from an old one */
+#define NCRIO_KEY_DERIVE _IOR ('c', 207, struct ncr_key_derivation_params_st)
+/* return information on a key */
+#define NCRIO_KEY_GET_INFO _IOWR('c', 208, struct ncr_key_info_st)
+/* export a secret key */
+#define NCRIO_KEY_EXPORT _IOWR('c', 209, struct ncr_key_data_st)
+/* import a secret key */
+#define NCRIO_KEY_IMPORT _IOWR('c', 210, struct ncr_key_data_st)
+
+#define NCRIO_KEY_DEINIT _IOR ('c', 215, ncr_key_t)
+
+/* Key wrap ioctls
+ */
+struct ncr_key_wrap_st {
+ ncr_wrap_algorithm_t algorithm;
+ ncr_key_t keytowrap;
+
+ ncr_key_t key;
+ struct ncr_key_params_st params;
+
+ void __user * io; /* encrypted keytowrap */
+ size_t io_size; /* this will be updated by the actual size on wrap */
+};
+
+#define NCRIO_KEY_WRAP _IOWR ('c', 250, struct ncr_key_wrap_st)
+#define NCRIO_KEY_UNWRAP _IOR ('c', 251, struct ncr_key_wrap_st)
+
+/* Internal ops */
+struct ncr_master_key_st {
+ uint8_t __user * key;
+ uint16_t key_size;
+};
+
+#define NCRIO_MASTER_KEY_SET _IOR ('c', 260, struct ncr_master_key_st)
+
+/* These are similar to key_wrap and unwrap except that will store some extra
+ * fields to be able to recover a key */
+struct ncr_key_storage_wrap_st {
+ ncr_key_t keytowrap;
+
+ void __user * io; /* encrypted keytowrap */
+ size_t io_size; /* this will be updated by the actual size on wrap */
+};
+
+#define NCRIO_KEY_STORAGE_WRAP _IOWR ('c', 261, struct ncr_key_storage_wrap_st)
+#define NCRIO_KEY_STORAGE_UNWRAP _IOR ('c', 262, struct ncr_key_storage_wrap_st)
+
+/* Crypto Operations ioctls
+ */
+
+typedef enum {
+ NCR_OP_ENCRYPT=1,
+ NCR_OP_DECRYPT,
+ NCR_OP_SIGN,
+ NCR_OP_VERIFY,
+} ncr_crypto_op_t;
+
+typedef int ncr_session_t;
+#define NCR_SESSION_INVALID ((ncr_session_t)-1)
+
+/* input of CIOCGSESSION */
+struct ncr_session_st {
+ /* input */
+ ncr_algorithm_t algorithm;
+
+ ncr_key_t key;
+ struct ncr_key_params_st params;
+ ncr_crypto_op_t op;
+
+ /* output */
+ ncr_session_t ses; /* session identifier */
+};
+
+typedef enum {
+ NCR_SUCCESS = 0,
+ NCR_ERROR_GENERIC = -1,
+ NCR_VERIFICATION_FAILED = -2,
+} ncr_error_t;
+
+typedef enum {
+ NCR_KEY_DATA,
+ NCR_DIRECT_DATA,
+} ncr_data_type_t;
+
+struct ncr_session_op_st {
+ /* input */
+ ncr_session_t ses;
+
+ union {
+ struct {
+ ncr_key_t input;
+ void __user * output; /* when verifying signature this is
+ * the place of the signature.
+ */
+ size_t output_size;
+ } kdata; /* NCR_KEY_DATA */
+ struct {
+ void __user * input;
+ size_t input_size;
+ void __user * output;
+ size_t output_size;
+ } udata; /* NCR_DIRECT_DATA */
+ } data;
+ ncr_data_type_t type;
+
+ /* output of verification */
+ ncr_error_t err;
+};
+
+struct ncr_session_once_op_st {
+ struct ncr_session_st init;
+ struct ncr_session_op_st op;
+};
+
+#define NCRIO_SESSION_INIT _IOR ('c', 300, struct ncr_session_st)
+#define NCRIO_SESSION_UPDATE _IOWR ('c', 301, struct ncr_session_op_st)
+#define NCRIO_SESSION_FINAL _IOWR ('c', 302, struct ncr_session_op_st)
+
+/* everything in one call */
+#define NCRIO_SESSION_ONCE _IOWR ('c', 303, struct ncr_session_once_op_st)
+
+#endif

2010-08-06 00:25:48

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

2010-08-07 00:54:28

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> On Thu, Aug 05, 2010 at 10:17:53PM +0200, Miloslav Trmač wrote:
> > Hello,
> > following is a patchset providing an user-space interface to the
> kernel crypto
> > API. It is based on the older, BSD-compatible, implementation, but
> the
> > user-space interface is different.
> >
>
> I only see patch 1/4 and 3/4. Where are 2/4 and 4/4?
Too large for the mailing list I'm afraid. I have uploaded them at http://people.redhat.com/mitr/cryptodev-ncr/ , you should also have received a personal copy yesterday.
Mirek

2010-08-09 14:39:47

by Herbert Xu

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Thu, Aug 05, 2010 at 10:17:53PM +0200, Miloslav Trmač wrote:
> Hello,
> following is a patchset providing an user-space interface to the kernel crypto
> API. It is based on the older, BSD-compatible, implementation, but the
> user-space interface is different.
>
> These are the major differences compared to the BSD-like interface:
>
> * The API supports key storage and management inside the kernel.
> An application can thus ask the kernel to generate a key; the key is
> then referenced via an integer identifier, and the application can be
> prevented from accessing the raw key data. Such a key can, if so configured,
> still be wrapped for key transport to the recipient of the message, and
> unwrapped by the recipient.
>
> The kernel key storage does not span system reboots, but applications can
> also wrap the keys for persistent storage, receiving an encrypted blob that
> does not reveal the raw key data, but can be later loaded back into the
> kernel.
>
> * More algorithms and mechanisms are supported by the API, including public key
> algorithms (RSA/DSA encryption and signing, D-H key derivation, key wrapping).

Thanks for the patches.

Unfortunately it fails to satisfy the requirement of supporting
all our existing kernel crypto interfaces, such as AEAD, as well
as being flexible enough in adding new interfaces such as xor.

So we need to address these issues before this can be integrated
into Linux.

Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

2010-08-10 00:01:02

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Herbert Xu" <[email protected]> wrote:

> On Thu, Aug 05, 2010 at 10:17:53PM +0200, Miloslav Trmač wrote:
> > Hello,
> > following is a patchset providing an user-space interface to the kernel crypto
> > API. It is based on the older, BSD-compatible, implementation, but the
> > user-space interface is different.
>
> Thanks for the patches.
>
> Unfortunately it fails to satisfy the requirement of supporting
> all our existing kernel crypto interfaces, such as AEAD, as well
> as being flexible enough in adding new interfaces such as xor.
Thanks for the review.

I think I can add AEAD (and compression/RNG, if requested) easily enough, I'll send an updated patch set.

(Talking specifically about xor, xor does not seem to be cryptographic operation that should be exposed as such to userspace - and AFAICS it is not integrated in the crypto API algorithm mechanism in the kernel either.)

Is the proposed interface acceptable in the general approach (enums for algorithms/operations, unions for parameters, session init/update/finalize)? With respect to flexibility, do you have specific suggestions or areas of concern?
Mirek

2010-08-10 12:50:41

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Mon, Aug 09, 2010 at 08:00:55PM -0400, Miloslav Trmac wrote:
> ----- "Herbert Xu" <[email protected]> wrote:
>
> > On Thu, Aug 05, 2010 at 10:17:53PM +0200, Miloslav Trmač wrote:
> > > Hello,
> > > following is a patchset providing an user-space interface to the kernel crypto
> > > API. It is based on the older, BSD-compatible, implementation, but the
> > > user-space interface is different.
> >
> > Thanks for the patches.
> >
> > Unfortunately it fails to satisfy the requirement of supporting
> > all our existing kernel crypto interfaces, such as AEAD, as well
> > as being flexible enough in adding new interfaces such as xor.
> Thanks for the review.
>
> I think I can add AEAD (and compression/RNG, if requested) easily enough, I'll send an updated patch set.
>
> (Talking specifically about xor, xor does not seem to be cryptographic operation that should be exposed as such to userspace - and AFAICS it is not integrated in the crypto API algorithm mechanism in the kernel either.)
>
> Is the proposed interface acceptable in the general approach (enums for algorithms/operations, unions for parameters, session init/update/finalize)? With respect to flexibility, do you have specific suggestions or areas of concern?

I know we spoke about this previously, but since you're asking publically, I'll
make my argument here so that we can have the debate in public as well. I'm ok
wih the general enumeration of operations in user space (I honestly can't see
how you would do it otherwise). What worries me though is the use ioctl for
these operations and the flexibility that it denies you in future updates.

Specifically, my concerns are twofold:

1) struct format. By passing down a structure as your doing through an ioctl
call, theres no way to extend/modify that structure easily for future use. For
instance the integration of aead might I think requires a blocking factor to be
sepcified, and entry for which you have no available field in your crypt_ops
structure. If you extend the structure in a later release of this code, you
create a potential incompatibility with user space because you are no longer
guaranteed that the size of the crypt_op structure is the same, and you need to
be prepared for a range of sizes to get passed down, at which point it becomes
difficult to differentiate between older code thats properly formatted, and
newer code thats legitimately broken. You might could add a version field to
mitigate that, but it gets ugly pretty quickly.

2) Use of pointers. Thats just a pain. You have the compat ioctl stuff in
place to handle the 32/64 bit conversions, but it would be really nice if you
could avoid having to maintain that extra code path.

Thats why I had suggested the use of a netlink protocol to manage this kind of
interface. There are other issue to manage there, but they're really not that
big a deal, comparatively speaking, and that interface allows for the easy
specification of arbitrary length data in a fashion that doesn't cause user
space breakage when additional algorithms/apis are added.

Thats just my opinion, I'm sure others have differing views. I'd be interested
to hear what others think.

Regards
Neil

>

2010-08-10 13:24:50

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tuesday, August 10, 2010 08:46:28 am Neil Horman wrote:
> Specifically, my concerns are twofold:
>
> 1) struct format. By passing down a structure as your doing through an
> ioctl call, theres no way to extend/modify that structure easily for
> future use. For instance the integration of aead might I think requires a
> blocking factor to be sepcified, and entry for which you have no available
> field in your crypt_ops structure. If you extend the structure in a later
> release of this code, you create a potential incompatibility with user
> space because you are no longer guaranteed that the size of the crypt_op
> structure is the same, and you need to be prepared for a range of sizes to
> get passed down, at which point it becomes difficult to differentiate
> between older code thats properly formatted, and newer code thats
> legitimately broken. You might could add a version field to mitigate
> that, but it gets ugly pretty quickly.

Yeah, this does call out for versioning. But the ioctls are just for crypto
parameter setup. Hopefully, that doesn't change too much since its just to
select an algorithm, possibly ask for a key to be wrapped and loaded, or ask
for a key to be created and exported. After setup, you just start using the
device.

> Thats why I had suggested the use of a netlink protocol to manage this kind
> of interface. There are other issue to manage there, but they're really
> not that big a deal, comparatively speaking, and that interface allows for
> the easy specification of arbitrary length data in a fashion that doesn't
> cause user space breakage when additional algorithms/apis are added.

The problem with the netlink approach is that auditing is not as good because
netlink is an async protocol. The kernel can only use the credentials that
ride in the skb with the command since there is no guarantee the process has
not changed credentials by the time you get the packet. Adding more
credentials to the netlink headers is also not good. As it is now, the
auditing is synchronous with the syscall and we can get at more information
and reliably create the audit records called out by the protection profiles or
FIPS-140 level 2.

-Steve

2010-08-10 14:42:27

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> On Tuesday, August 10, 2010 08:46:28 am Neil Horman wrote:
> > Specifically, my concerns are twofold:
> >
> > 1) struct format. By passing down a structure as your doing through an
> > ioctl call, theres no way to extend/modify that structure easily for
> > future use. For instance the integration of aead might I think requires a
> > blocking factor to be sepcified, and entry for which you have no available
> > field in your crypt_ops structure. If you extend the structure in a later
> > release of this code, you create a potential incompatibility with user
> > space because you are no longer guaranteed that the size of the crypt_op
> > structure is the same, and you need to be prepared for a range of sizes to
> > get passed down, at which point it becomes difficult to differentiate
> > between older code thats properly formatted, and newer code thats
> > legitimately broken. You might could add a version field to mitigate
> > that, but it gets ugly pretty quickly.
>
> Yeah, this does call out for versioning. But the ioctls are just for crypto
> parameter setup. Hopefully, that doesn't change too much since its just to
> select an algorithm, possibly ask for a key to be wrapped and loaded, or ask
> for a key to be created and exported. After setup, you just start using the
> device.
>
>
> > Thats why I had suggested the use of a netlink protocol to manage this kind
> > of interface. There are other issue to manage there, but they're really
> > not that big a deal, comparatively speaking, and that interface allows for
> > the easy specification of arbitrary length data in a fashion that doesn't
> > cause user space breakage when additional algorithms/apis are added.
>
> The problem with the netlink approach is that auditing is not as good because
> netlink is an async protocol. The kernel can only use the credentials that
> ride in the skb with the command since there is no guarantee the process has
> not changed credentials by the time you get the packet. Adding more
> credentials to the netlink headers is also not good. As it is now, the
> auditing is synchronous with the syscall and we can get at more information
> and reliably create the audit records called out by the protection profiles or
> FIPS-140 level 2.
>
> -Steve

I think thats pretty easy to serialize though. All you need to do is enforce a
rule in the kernel that dictates any creditial changes to a given context must
be serialized behind all previously submitted crypto operations. It might make
life a bit tougher on the audit code, but netlink contains pid/sequence data in
all messages so that audit can correlate requests and responses easily.

Neil

2010-08-10 14:47:24

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> > > Thats why I had suggested the use of a netlink protocol to manage this kind
> > > of interface. There are other issue to manage there, but they're really
> > > not that big a deal, comparatively speaking, and that interface allows for
> > > the easy specification of arbitrary length data in a fashion that doesn't
> > > cause user space breakage when additional algorithms/apis are added.
> >
> > The problem with the netlink approach is that auditing is not as good because
> > netlink is an async protocol. The kernel can only use the credentials that
> > ride in the skb with the command since there is no guarantee the process has
> > not changed credentials by the time you get the packet. Adding more
> > credentials to the netlink headers is also not good. As it is now, the
> > auditing is synchronous with the syscall and we can get at more information
> > and reliably create the audit records called out by the protection
> profiles or
> > FIPS-140 level 2.
> >
> > -Steve
>
> I think thats pretty easy to serialize though. All you need to do is enforce a
> rule in the kernel that dictates any creditial changes to a given context must
> be serialized behind all previously submitted crypto operations.
That would be a bit unusual from the layering/semantics aspect - why should credential changes care about crypto operations when they don't care about any other operations? - and it would require pretty widespread changes throughout the kernel core.
Mirek

2010-08-10 14:55:28

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 10:47:14AM -0400, Miloslav Trmac wrote:
>
> ----- "Neil Horman" <[email protected]> wrote:
> > On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> > > > Thats why I had suggested the use of a netlink protocol to manage this kind
> > > > of interface. There are other issue to manage there, but they're really
> > > > not that big a deal, comparatively speaking, and that interface allows for
> > > > the easy specification of arbitrary length data in a fashion that doesn't
> > > > cause user space breakage when additional algorithms/apis are added.
> > >
> > > The problem with the netlink approach is that auditing is not as good because
> > > netlink is an async protocol. The kernel can only use the credentials that
> > > ride in the skb with the command since there is no guarantee the process has
> > > not changed credentials by the time you get the packet. Adding more
> > > credentials to the netlink headers is also not good. As it is now, the
> > > auditing is synchronous with the syscall and we can get at more information
> > > and reliably create the audit records called out by the protection
> > profiles or
> > > FIPS-140 level 2.
> > >
> > > -Steve
> >
> > I think thats pretty easy to serialize though. All you need to do is enforce a
> > rule in the kernel that dictates any creditial changes to a given context must
> > be serialized behind all previously submitted crypto operations.
> That would be a bit unusual from the layering/semantics aspect - why should credential changes care about crypto operations when they don't care about any other operations? - and it would require pretty widespread changes throughout the kernel core.
> Mirek

I'm sorry, I thought steve was referring to credentials in the sense of changing
keys/etc while crypto operations were in flight. If you're referring to users
credentials in terms of permissions to use a device or service, then I think its
all moot anyway. As you say why should credential changes care about crypto ops
when they don't care about other ops. If you have permissions to use a
device/service, changing those permissions doesnt really change the fact that
you're in the process of using that service. We could do some modicum of
credential check when requests are submitted and deny service based on that
check, but anything already submitted was done when you had permission to do so
and should be allowed to complete.

Neil

2010-08-10 15:36:27

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> On Mon, Aug 09, 2010 at 08:00:55PM -0400, Miloslav Trmac wrote:
> > Is the proposed interface acceptable in the general approach (enums
> for algorithms/operations, unions for parameters, session
> init/update/finalize)? With respect to flexibility, do you have
> specific suggestions or areas of concern?
>
> I know we spoke about this previously, but since you're asking publically, I'll
> make my argument here so that we can have the debate in public as well. I'm ok
> wih the general enumeration of operations in user space (I honestly can't see
> how you would do it otherwise). What worries me though is the use ioctl for
> these operations and the flexibility that it denies you in future updates.
> Specifically, my concerns are twofold:
>
> 1) struct format. By passing down a structure as your doing through an ioctl
> call, theres no way to extend/modify that structure easily for future use. For
> instance the integration of aead might I think requires a blocking factor to be
> sepcified, and entry for which you have no available field in your crypt_ops
> structure. If you extend the structure in a later release of this code, you
> create a potential incompatibility with user space because you are no longer
> guaranteed that the size of the crypt_op structure is the same, and you need to
> be prepared for a range of sizes to get passed down, at which point it becomes
> difficult to differentiate between older code thats properly formatted, and
> newer code thats legitimately broken. You might could add a version field to
> mitigate that, but it gets ugly pretty quickly.

I think it would be useful to separate thinking about the data format and about the transmission mechanism. ioctl() can quite well be used to carry "netlink-like" packets - blobs of data with specified length and flexible internal structure - and on the other hand, netlink could be (and often is) used to carry fixed structs instead of variable-length packets.

So, any advantages netlink packets have in this respect can be provided using the ioctl() interface as well. Besides, adding new ioctl commands provides a quite natural versioning mechanism (of course, it would be better to design the data format right the first time, but the option to easily extend the interface is available). Versioned data structure that "gets ugly pretty quickly" is exactly what one can get with the netlink packets, I think :) - and it is not that bad actually.

Using a different design of the structures, perhaps appending a flexible array of "attributes" at the end of each operation structure, is certainly something to consider, if you think this is the way to go; the added flexibility is undisputable, at the cost of some encoding/decoding overhead.

As for the transmission mechanism, netlink seems to me to be one of the least desirable options: as described above, it does not provide any inherent advantages to ioctl, and it has significantly higher overhead (sendmsg()+recvmsg(), handling netlink ACKs, matching requests and replies in multi-threaded programs), and it makes auditing difficult.

> 2) Use of pointers. Thats just a pain. You have the compat ioctl stuff in
> place to handle the 32/64 bit conversions, but it would be really nice if you
> could avoid having to maintain that extra code path.
Pointers are pretty much unavoidable, to allow zero-copy references to input/output data. If that means maintaining 32-bit compat paths (as opposed to codifying say uint128_t for pointers in fixed-size structures), then so be it: the 32-bit paths will simply have to be maintained. Once we have the 32-bit compat paths, using pointers for other unformatted, variable-sized data (e.g. IVs, multiple-precision integers) seems to be easier than using variable-size data structures or explicit pointer arithmetic to compute data locations within the data packets.
Mirek

2010-08-10 15:40:14

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> On Tue, Aug 10, 2010 at 10:47:14AM -0400, Miloslav Trmac wrote:
> > ----- "Neil Horman" <[email protected]> wrote:
> > > On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> > > > The problem with the netlink approach is that auditing is not as good because
> > > > netlink is an async protocol. The kernel can only use the credentials that
> > > > ride in the skb with the command since there is no guarantee the process has
> > > > not changed credentials by the time you get the packet. Adding more
> > > > credentials to the netlink headers is also not good. As it is now, the
> > > > auditing is synchronous with the syscall and we can get at more information
> > > > and reliably create the audit records called out by the protection profiles or
> > > > FIPS-140 level 2.
> > > >
> > > > -Steve
> > >
> > > I think thats pretty easy to serialize though. All you need to do is enforce a
> > > rule in the kernel that dictates any creditial changes to a given context must
> > > be serialized behind all previously submitted crypto operations.
> > That would be a bit unusual from the layering/semantics aspect - why
> should credential changes care about crypto operations when they don't
> care about any other operations? - and it would require pretty
> widespread changes throughout the kernel core.
> > Mirek
>
>
> I'm sorry, I thought steve was referring to credentials in the sense of changing
> keys/etc while crypto operations were in flight.
The audited values are mainly process/thread attributes: pid, ppid, {,e,fs}[ug]id, session id, and the like. Most of these are attached to the netlink packet, and performing a lookup by PID is at packet handling time is unreliable - as far as I understand the netlink receive routine does not have to run in the same process context, so the PID might not even refer to the same process.
Mirek

2010-08-10 16:21:54

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 11:36:16AM -0400, Miloslav Trmac wrote:
>
> ----- "Neil Horman" <[email protected]> wrote:
> > On Mon, Aug 09, 2010 at 08:00:55PM -0400, Miloslav Trmac wrote:
> > > Is the proposed interface acceptable in the general approach (enums
> > for algorithms/operations, unions for parameters, session
> > init/update/finalize)? With respect to flexibility, do you have
> > specific suggestions or areas of concern?
> >
> > I know we spoke about this previously, but since you're asking publically, I'll
> > make my argument here so that we can have the debate in public as well. I'm ok
> > wih the general enumeration of operations in user space (I honestly can't see
> > how you would do it otherwise). What worries me though is the use ioctl for
> > these operations and the flexibility that it denies you in future updates.
> > Specifically, my concerns are twofold:
> >
> > 1) struct format. By passing down a structure as your doing through an ioctl
> > call, theres no way to extend/modify that structure easily for future use. For
> > instance the integration of aead might I think requires a blocking factor to be
> > sepcified, and entry for which you have no available field in your crypt_ops
> > structure. If you extend the structure in a later release of this code, you
> > create a potential incompatibility with user space because you are no longer
> > guaranteed that the size of the crypt_op structure is the same, and you need to
> > be prepared for a range of sizes to get passed down, at which point it becomes
> > difficult to differentiate between older code thats properly formatted, and
> > newer code thats legitimately broken. You might could add a version field to
> > mitigate that, but it gets ugly pretty quickly.
>
> I think it would be useful to separate thinking about the data format and about the transmission mechanism. ioctl() can quite well be used to carry "netlink-like" packets - blobs of data with specified length and flexible internal structure - and on the other hand, netlink could be (and often is) used to carry fixed structs instead of variable-length packets.

Yes, both mechanism can be used to carry either fixed or variable length
payloads. The difference is ioctl has no built in mechanism to do variable
length data safely. To do variable length data you need to setup a bunch of
pointers to extra data that you have to copy separately. Even then, you're
fixing the number of pointers that you have in your base structure. To add or
remove any would break your communication protocol to user space. This is
exactly the point I made above.

>
> So, any advantages netlink packets have in this respect can be provided using the ioctl() interface as well. Besides, adding new ioctl commands provides a quite natural versioning mechanism (of course, it would be better to design the data format right the first time, but the option to easily extend the interface is available). Versioned data structure that "gets ugly pretty quickly" is exactly what one can get with the netlink packets, I think :) - and it is not that bad actually.
No they can't. I just explained again why it can't above. At least not without
additional metadata and compatibility code built into the struct that you pass
in the ioctl.

Add commands is irrelevant. Its equally easy to do so in an enumeration
provided in a struct as it is to do in a netlink message. Theres no advantage
in either case there.

And there is no need for versioning in the netlink packet, because the data
types are all inlined, typed and attached to length values (at least when done
correctly, see then nl_attr structure and associated macros). You don't have
that with your ioctl. You could add it for certain, but at that point you're
re-inventing the wheel.

>
> Using a different design of the structures, perhaps appending a flexible array of "attributes" at the end of each operation structure, is certainly something to consider, if you think this is the way to go; the added flexibility is undisputable, at the cost of some encoding/decoding overhead.

Thats exactly what netlink already has built in. Each netlink message consistes
of a nlmsghdr structure which defines the source/dest process/etc. Thats
followed by a variable number of nlattr attributes, each of which consists of a
type, length, and array of data bytes. We have kernel macros built to
encode/decode these attribtues already. The work is already done (see the
nlmsg_parse function in the kernel).

>
> As for the transmission mechanism, netlink seems to me to be one of the least desirable options: as described above, it does not provide any inherent advantages to ioctl, and it has significantly higher overhead (sendmsg()+recvmsg(), handling netlink ACKs, matching requests and replies in multi-threaded programs), and it makes auditing difficult.

You're incorrect. I've explained several of the advantiges above and in my previous
email, you're just not seeing them. I will grant you some additional overhead
in the use of of two system calls rather than one per operation in the nominal
case, but there is no reason that can't be mitigated by the bundling of multiple
operations into a single netlink packet. There is no reason that multiple
netlink messages can't be sent with a single sendmsg call, and their responses
received with a single recvmsg (see the NLMSG_NEXT macro).

Likewise, matching requests and responses in a multi-threaded program is also an
already solved issue in multiple ways. The use of multiple sockets, in a 1 per
thread fashion is the most obvious. Theres also countless approaches in which a
thread can reassemble responses to registered requests in such a way that the
user space portion of an application sees these calls as being synchronous. Its
really not that hard.

>
> > 2) Use of pointers. Thats just a pain. You have the compat ioctl stuff in
> > place to handle the 32/64 bit conversions, but it would be really nice if you
> > could avoid having to maintain that extra code path.
> Pointers are pretty much unavoidable, to allow zero-copy references to input/output data. If that means maintaining 32-bit compat paths (as opposed to codifying say uint128_t for pointers in fixed-size structures), then so be it: the 32-bit paths will simply have to be maintained. Once we have the 32-bit compat paths, using pointers for other unformatted, variable-sized data (e.g. IVs, multiple-precision integers) seems to be easier than using variable-size data structures or explicit pointer arithmetic to compute data locations within the data packets.

No, they're not unavoidable. Thats the point I made in my last email. If you
use netlink, you inline all your data into a single byte array, with the proper
headers on it. no use of additional pointers needed at all. One buffer in, one
buffer out. Same number of copies that using an ioctl requires.

> Mirek
>

2010-08-10 16:45:07

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 11:40:00AM -0400, Miloslav Trmac wrote:
> ----- "Neil Horman" <[email protected]> wrote:
> > On Tue, Aug 10, 2010 at 10:47:14AM -0400, Miloslav Trmac wrote:
> > > ----- "Neil Horman" <[email protected]> wrote:
> > > > On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> > > > > The problem with the netlink approach is that auditing is not as good because
> > > > > netlink is an async protocol. The kernel can only use the credentials that
> > > > > ride in the skb with the command since there is no guarantee the process has
> > > > > not changed credentials by the time you get the packet. Adding more
> > > > > credentials to the netlink headers is also not good. As it is now, the
> > > > > auditing is synchronous with the syscall and we can get at more information
> > > > > and reliably create the audit records called out by the protection profiles or
> > > > > FIPS-140 level 2.
> > > > >
> > > > > -Steve
> > > >
> > > > I think thats pretty easy to serialize though. All you need to do is enforce a
> > > > rule in the kernel that dictates any creditial changes to a given context must
> > > > be serialized behind all previously submitted crypto operations.
> > > That would be a bit unusual from the layering/semantics aspect - why
> > should credential changes care about crypto operations when they don't
> > care about any other operations? - and it would require pretty
> > widespread changes throughout the kernel core.
> > > Mirek
> >
> >
> > I'm sorry, I thought steve was referring to credentials in the sense of changing
> > keys/etc while crypto operations were in flight.
> The audited values are mainly process/thread attributes: pid, ppid, {,e,fs}[ug]id, session id, and the like. Most of these are attached to the netlink packet, and performing a lookup by PID is at packet handling time is unreliable - as far as I understand the netlink receive routine does not have to run in the same process context, so the PID might not even refer to the same process.
> Mirek

I'm not so sure I follow. how can you receive messages on a socket in response
to requests that were sent from a different socket. In the netlink multicast
and broadcast case, sure, but theres no need to use those. I suppose you could
exit a process, start another process in which the pid gets reused, open up a
subsequent socket and perhaps confuse audit that way, but you're not going to
get responses to the requests that the previous process sent in that case. And
in the event that happens, Audit should log a close event on the fd inquestion
between the operations.

Theres also the child process case, in which a child process might read
responses from requests sent by a parent, and vice versa, since fork inherits
file descriptors, but thats an issue inherent in any mechanism you use,
including the character interface, so I'm not sure what the problem is there.

2010-08-10 16:58:00

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:

> On Tue, Aug 10, 2010 at 11:40:00AM -0400, Miloslav Trmac wrote:
> > ----- "Neil Horman" <[email protected]> wrote:
> > > On Tue, Aug 10, 2010 at 10:47:14AM -0400, Miloslav Trmac wrote:
> > > > ----- "Neil Horman" <[email protected]> wrote:
> > > > > On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> > > > > > The problem with the netlink approach is that auditing is not as good because
> > > > > > netlink is an async protocol. The kernel can only use the credentials that
> > > > > > ride in the skb with the command since there is no guarantee the process has
> > > > > > not changed credentials by the time you get the packet. Adding more
> > > > > > credentials to the netlink headers is also not good. As it is now, the
> > > > > > auditing is synchronous with the syscall and we can get at more information
> > > > > > and reliably create the audit records called out by the protection profiles or
> > > > > > FIPS-140 level 2.
> > > > > >
> > > > > > -Steve
> > > > >
> > > > > I think thats pretty easy to serialize though. All you need to do is enforce a
> > > > > rule in the kernel that dictates any creditial changes to a given context must
> > > > > be serialized behind all previously submitted crypto operations.
> > > > That would be a bit unusual from the layering/semantics aspect - why
> > > should credential changes care about crypto operations when they don't
> > > care about any other operations? - and it would require pretty
> > > widespread changes throughout the kernel core.
> > > > Mirek
> > >
> > >
> > > I'm sorry, I thought steve was referring to credentials in the sense of changing
> > > keys/etc while crypto operations were in flight.
> > The audited values are mainly process/thread attributes: pid, ppid,
> {,e,fs}[ug]id, session id, and the like. Most of these are attached
> to the netlink packet, and performing a lookup by PID is at packet
> handling time is unreliable - as far as I understand the netlink
> receive routine does not have to run in the same process context, so
> the PID might not even refer to the same process.
>
> I'm not so sure I follow. how can you receive messages on a socket in response
> to requests that were sent from a different socket. In the netlink multicast
> and broadcast case, sure, but theres no need to use those. I suppose you could
> exit a process, start another process in which the pid gets reused, open up a
> subsequent socket and perhaps confuse audit that way, but you're not going to
> get responses to the requests that the previous process sent in that case.
I don't even need to open a subsequent socket - as son as the process ID is reused, the audit message is incorrect, which is not really acceptable in itself.

> And
> in the event that happens, Audit should log a close event on the fd inquestion
> between the operations.
audit only logs explicitly requested operations, so an administrator that asks for crypto events does not automatically get any close events. Besides, the audit record should be correct in the first place, instead of giving the admin a puzzle to decipher.

> Theres also the child process case, in which a child process might read
> responses from requests sent by a parent, and vice versa, since fork inherits
> file descriptors, but thats an issue inherent in any mechanism you use,
> including the character interface, so I'm not sure what the problem is
> there.
Actually that's not a problem with ioctl() because the response is written back _before_ ioctl() returns, so it is always provided to the original calling thread.

With the current design the parent and child share the key/session namespace after fork(), but operation results are always reliably and automatically provided to the thread that invoked the operation, without any user-space collaboration.
Mirek

2010-08-10 18:02:03

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 12:57:43PM -0400, Miloslav Trmac wrote:
>
> ----- "Neil Horman" <[email protected]> wrote:
>
> > On Tue, Aug 10, 2010 at 11:40:00AM -0400, Miloslav Trmac wrote:
> > > ----- "Neil Horman" <[email protected]> wrote:
> > > > On Tue, Aug 10, 2010 at 10:47:14AM -0400, Miloslav Trmac wrote:
> > > > > ----- "Neil Horman" <[email protected]> wrote:
> > > > > > On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> > > > > > > The problem with the netlink approach is that auditing is not as good because
> > > > > > > netlink is an async protocol. The kernel can only use the credentials that
> > > > > > > ride in the skb with the command since there is no guarantee the process has
> > > > > > > not changed credentials by the time you get the packet. Adding more
> > > > > > > credentials to the netlink headers is also not good. As it is now, the
> > > > > > > auditing is synchronous with the syscall and we can get at more information
> > > > > > > and reliably create the audit records called out by the protection profiles or
> > > > > > > FIPS-140 level 2.
> > > > > > >
> > > > > > > -Steve
> > > > > >
> > > > > > I think thats pretty easy to serialize though. All you need to do is enforce a
> > > > > > rule in the kernel that dictates any creditial changes to a given context must
> > > > > > be serialized behind all previously submitted crypto operations.
> > > > > That would be a bit unusual from the layering/semantics aspect - why
> > > > should credential changes care about crypto operations when they don't
> > > > care about any other operations? - and it would require pretty
> > > > widespread changes throughout the kernel core.
> > > > > Mirek
> > > >
> > > >
> > > > I'm sorry, I thought steve was referring to credentials in the sense of changing
> > > > keys/etc while crypto operations were in flight.
> > > The audited values are mainly process/thread attributes: pid, ppid,
> > {,e,fs}[ug]id, session id, and the like. Most of these are attached
> > to the netlink packet, and performing a lookup by PID is at packet
> > handling time is unreliable - as far as I understand the netlink
> > receive routine does not have to run in the same process context, so
> > the PID might not even refer to the same process.
> >
> > I'm not so sure I follow. how can you receive messages on a socket in response
> > to requests that were sent from a different socket. In the netlink multicast
> > and broadcast case, sure, but theres no need to use those. I suppose you could
> > exit a process, start another process in which the pid gets reused, open up a
> > subsequent socket and perhaps confuse audit that way, but you're not going to
> > get responses to the requests that the previous process sent in that case.
> I don't even need to open a subsequent socket - as son as the process ID is reused, the audit message is incorrect, which is not really acceptable in itself.
>
But, you do, thats my point. If a process exits, and another process starts up
that happens to reuse the same pid, it can't just call recvmsg on the socket
descriptor that the last process used for netlink messages and expect to get any
data. That socket descriptor won't be connected to the netlink service (or
anything) anymore, and you'll get an error from the kernel.

> > And
> > in the event that happens, Audit should log a close event on the fd inquestion
> > between the operations.
> audit only logs explicitly requested operations, so an administrator that asks for crypto events does not automatically get any close events. Besides, the audit record should be correct in the first place, instead of giving the admin a puzzle to decipher.
I still don't see whats incorrect here. If two processes wind up reusing a
process id, thats audits problem to figure out, nothing elses. What exactly is
the problem that you see involving netlink and audit here? Compare whatever
problem you see a crypto netlink protocol having in regards to audit to another
netlink protocol (say rtnetlink), and explain to me why the latter doesn't have
that issue as well.

>
> > Theres also the child process case, in which a child process might read
> > responses from requests sent by a parent, and vice versa, since fork inherits
> > file descriptors, but thats an issue inherent in any mechanism you use,
> > including the character interface, so I'm not sure what the problem is
> > there.
> Actually that's not a problem with ioctl() because the response is written back _before_ ioctl() returns, so it is always provided to the original calling thread.
>
> With the current design the parent and child share the key/session namespace after fork(), but operation results are always reliably and automatically provided to the thread that invoked the operation, without any user-space collaboration.
Is that _really_ that important? That the kernel return data in the same call
that its made? I don't believe for a minute that FIPS has any madate on that.
I believe that it might be easier for audit to provide records on single calls,
rather than having to correlate multiple calls, but its not like audit doesn't
have to have some modicum of ability to handle that already, since it audits
sockets

Neil

> Mirek
>

2010-08-10 18:16:32

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tuesday, August 10, 2010 01:57:40 pm Neil Horman wrote:
> > > I'm not so sure I follow. how can you receive messages on a socket in
> > > response to requests that were sent from a different socket. In the
> > > netlink multicast and broadcast case, sure, but theres no need to use
> > > those. I suppose you could exit a process, start another process in
> > > which the pid gets reused, open up a subsequent socket and perhaps
> > > confuse audit that way, but you're not going to get responses to the
> > > requests that the previous process sent in that case.
> >
> > I don't even need to open a subsequent socket - as son as the process ID
> > is reused, the audit message is incorrect, which is not really
> > acceptable in itself.
> >
> >
>
> But, you do, thats my point. If a process exits, and another process
> starts up that happens to reuse the same pid, it can't just call recvmsg
> on the socket descriptor that the last process used for netlink messages
> and expect to get any data. That socket descriptor won't be connected to
> the netlink service (or anything) anymore, and you'll get an error from
> the kernel.

You are looking at it from the wrong end. Think of it from audit's perspective
about how do you guarantee that the audit trail is correct? This has been
discussed on linux-audit mail list before and the conclusion is you have very
limited information to work with. By being synchronous the syscall, we get
everything in the syscall record from the processes audit context.

The audit logs require non-repudiation. It cannot be racy or stitch together
possibly wrong events. Audit logs can and do wind up in court and we do not
want problems with any part of the system.

> > > And in the event that happens, Audit should log a close event on the fd
> > > inquestion between the operations.
> >
> > audit only logs explicitly requested operations, so an administrator that
> > asks for crypto events does not automatically get any close
> > events. Besides, the audit record should be correct in the first place,
> > instead of giving the admin a puzzle to decipher.
>
> I still don't see whats incorrect here. If two processes wind up reusing a
> process id, thats audits problem to figure out, nothing elses

True, but that is the point of this exercise - meeting common criteria and
FIPS. They both have rules about what the audit logs should present and the
assuarnce that the information is correct and not racy.

> . What exactly is the problem that you see involving netlink and audit
> here? Compare whatever problem you see a crypto netlink protocol having
> in regards to audit to another netlink protocol (say rtnetlink), and
> explain to me why the latter doesn't have that issue as well.

That one is not security sensitive. Nowhere in any protection profile does it
say to audit that.

-Steve

2010-08-10 18:20:13

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> On Tue, Aug 10, 2010 at 12:57:43PM -0400, Miloslav Trmac wrote:
> >
> > ----- "Neil Horman" <[email protected]> wrote:
> >
> > > On Tue, Aug 10, 2010 at 11:40:00AM -0400, Miloslav Trmac wrote:
> > > > ----- "Neil Horman" <[email protected]> wrote:
> > > > > On Tue, Aug 10, 2010 at 10:47:14AM -0400, Miloslav Trmac wrote:
> > > > > > ----- "Neil Horman" <[email protected]> wrote:
> > > > > > > On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> > > > > > > > The problem with the netlink approach is that auditing is not as good because
> > > > > > > > netlink is an async protocol. The kernel can only use the credentials that
> > > > > > > > ride in the skb with the command since there is no guarantee the process has
> > > > > > > > not changed credentials by the time you get the packet. Adding more
> > > > > > > > credentials to the netlink headers is also not good. As it is now, the
> > > > > > > > auditing is synchronous with the syscall and we can get at more information
> > > > > > > > and reliably create the audit records called out by the protection profiles or
> > > > > > > > FIPS-140 level 2.
> > > > > > > >
> > > > > > > > -Steve
> > > > > > >
> > > > > > > I think thats pretty easy to serialize though. All you need to do is enforce a
> > > > > > > rule in the kernel that dictates any creditial changes to a given context must
> > > > > > > be serialized behind all previously submitted crypto operations.
> > > > > > That would be a bit unusual from the layering/semantics aspect - why
> > > > > should credential changes care about crypto operations when they don't
> > > > > care about any other operations? - and it would require pretty
> > > > > widespread changes throughout the kernel core.
> > > > > > Mirek
> > > > >
> > > > >
> > > > > I'm sorry, I thought steve was referring to credentials in the sense of changing
> > > > > keys/etc while crypto operations were in flight.
> > > > The audited values are mainly process/thread attributes: pid, ppid,
> > > {,e,fs}[ug]id, session id, and the like. Most of these are attached
> > > to the netlink packet, and performing a lookup by PID is at packet
> > > handling time is unreliable - as far as I understand the netlink
> > > receive routine does not have to run in the same process context, so
> > > the PID might not even refer to the same process.
> > >
> > > I'm not so sure I follow. how can you receive messages on a socket in response
> > > to requests that were sent from a different socket. In the netlink multicast
> > > and broadcast case, sure, but theres no need to use those. I suppose you could
> > > exit a process, start another process in which the pid gets reused, open up a
> > > subsequent socket and perhaps confuse audit that way, but you're not going to
> > > get responses to the requests that the previous process sent in that case.
> > I don't even need to open a subsequent socket - as son as the
> process ID is reused, the audit message is incorrect, which is not
> really acceptable in itself.
> >
> But, you do, thats my point. If a process exits, and another process starts up
> that happens to reuse the same pid, it can't just call recvmsg on the socket
> descriptor that the last process used for netlink messages and expect to get any
> data.
It _doesn't matter_ that I don't receive a response. I have caused an operation in the kernel and the requested audit record is incorrect. The audit subsystem needs to work even if - especially if - the userspace process is malicious. The process might have wanted to simply cause a misleading audit record; or the operation (such as importing a key from supplied data) might not really need the response in order to achieve its effect.

> > > And
> > > in the event that happens, Audit should log a close event on the
> fd inquestion
> > > between the operations.
> > audit only logs explicitly requested operations, so an administrator
> that asks for crypto events does not automatically get any close
> events. Besides, the audit record should be correct in the first
> place, instead of giving the admin a puzzle to decipher.
> I still don't see whats incorrect here. If two processes wind up reusing a
> process id, thats audits problem to figure out, nothing elses.
If an operation attributed to user A is recorded by the kernel as being performed by user B, that is not an user problem.

> What exactly is
> the problem that you see involving netlink and audit here? Compare whatever
> problem you see a crypto netlink protocol having in regards to audit to another
> netlink protocol (say rtnetlink), and explain to me why the latter doesn't have
> that issue as well.
AFAIK most netlink users in the kernel are not audited, and those that are typically only allow operations by system administrators. And I haven't claimed that the other users are correct :) Notably audit itself does not record as much information about the invoking process as we'd like because it is not available.

> > > Theres also the child process case, in which a child process might read
> > > responses from requests sent by a parent, and vice versa, since fork inherits
> > > file descriptors, but thats an issue inherent in any mechanism you use,
> > > including the character interface, so I'm not sure what the problem is
> > > there.
> > Actually that's not a problem with ioctl() because the response is
> written back _before_ ioctl() returns, so it is always provided to the
> original calling thread.
> >
> > With the current design the parent and child share the key/session
> namespace after fork(), but operation results are always reliably and
> automatically provided to the thread that invoked the operation,
> without any user-space collaboration.

> Is that _really_ that important? That the kernel return data in the same call
> that its made?
Not for audit; but yes, it is important.

1) performance: this is not ip(8), where no one cares if the operation runs 10ms or 1000ms. Crypto is at least 2 operations per SSL record (which can be as little as 1 byte of application data), and users will care about the speed. Batching more operations into a single request is impractical because it would require very extensive changes in users of the crypto libraries, and the generic crypto interfaces such as PKCS#11 don't natively support batching so it might not even be possible to express this in some libraries.

2) simplicity and reliability: you are downplaying the overhead and synchronization necessary (potentially among multiple processes!) to simply receive a response, but it is still enormous compared to the single syscall case. Even worse, netlink(7) says "netlink is not a reliable protocol. ... but may drop messages". Would you accept such a mechanism to transfer "write data to file" operations? "Compress data using AES" is much more similar to "write data to file" than to "change this aspect of kernel routing configuration" - it is an application-level service, not a way to communicate long-term parameters to a pretty independent subsystem residing in the kernel.
Mirek

2010-08-10 18:21:09

by Loc Ho

[permalink] [raw]

Subject: RE: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

Hi Miloslav,

I had read or glance over the patch from " http://people.redhat.com/mitr/cryptodev-ncr/0002". We have post a version of the CryptoDev over a year ago. Unfortunately, we did not got a chance to pick up again. In that process, Herbert Xu provides a number of comments. You can search the mailing list for that. Here are my comment based on experience with hardware crypto:

1. This CrytoDev (user space) interface needs to support multiple operations at once
2. This CrytoDev interface needs to support async
3. This CryotDev interface needs to support large data operation such as an entire file
4. Zero crypto (already see this in your version)
5. Avoid a lot of little kmalloc for various small structures (This once is from Herbert Xu.)
6. This interface needs to work with OpenSSL which allow OpenSwan to work

Reason for item above:
Item #1. Multiple operations - This needs to take advance of the hardware offload. If you only submit one operation at a time, the latency of the software/hardware will be a problem. By allow submitting multiple operations, you fill up the hardware buffer and keep in running. Otherwise, it just be idle a majority of the time and the difference between SW vs HW is nill.

Item #2. Asnc - You can argue that you can open multiple /dev/crypto session and submit them. But this does not work for the same session and for HW base crypto. Having an async interface has the benefit to the user space application developer as they can use the same async programming interface as with other I/O operations.

Item #3. Large file support - Most hardware algorithms can't support this as the operation cannot be continue. Not sure how to handle this.

Item #4. Right now you are pining the entire buffer. For small buffer, this may not make sense. We have not got a chance to see if what is the limit for this.

Item #5. Herbert Xu mentioned that we should avoid having a lot of small kmalloc when possible.

Item #6. You should give OpenSSL a patach and see how it works out. Although, OpenSSL does not take advantage of batch submission. Again, to really take advantage of the HW, you really need to support batch operation.

For all other comments, search for previous /dev/cryptodev submission and you will find a bunch of argue on this "user space crypto API".

-Loc

CONFIDENTIALITY NOTICE: This e-mail message, including any attachments,
is for the sole use of the intended recipient(s) and contains information?
that is confidential and proprietary to AppliedMicro Corporation or its subsidiaries.
It is to be used solely for the purpose of furthering the parties' business relationship.
All unauthorized review, use, disclosure or distribution is prohibited.
If you are not the intended recipient, please contact the sender by reply e-mail
and destroy all copies of the original message.

2010-08-10 18:34:17

by Herbert Xu

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 02:19:59PM -0400, Miloslav Trmac wrote:
>
> 2) simplicity and reliability: you are downplaying the overhead and synchronization necessary (potentially among multiple processes!) to simply receive a response, but it is still enormous compared to the single syscall case. Even worse, netlink(7) says "netlink is not a reliable protocol. ... but may drop messages". Would you accept such a mechanism to transfer "write data to file" operations? "Compress data using AES" is much more similar to "write data to file" than to "change this aspect of kernel routing configuration" - it is an application-level service, not a way to communicate long-term parameters to a pretty independent subsystem residing in the kernel.

That just shows you have no idea how netlink works. The reliability
comment is in the context of congestion control, and does not apply
in this case.

Cheers,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

2010-08-10 18:34:53

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

Hello,
----- "Loc Ho" <[email protected]> wrote:
> I had read or glance over the patch from "
> http://people.redhat.com/mitr/cryptodev-ncr/0002". We have post a
> version of the CryptoDev over a year ago. Unfortunately, we did not
> got a chance to pick up again. In that process, Herbert Xu provides a
> number of comments. You can search the mailing list for that. Here are
> my comment based on experience with hardware crypto:
Thanks for the comments.

> 1. This CrytoDev (user space) interface needs to support multiple
> operations at once

I think this would be naturally solved by providing the async interface.

> Item #2. Asnc - You can argue that you can open multiple /dev/crypto
> session and submit them. But this does not work for the same session
> and for HW base crypto. Having an async interface has the benefit to
> the user space application developer as they can use the same async
> programming interface as with other I/O operations.

Right, that would make sense - but it can be added later (by providing an "async op" operation, poll() handler, and "get next result" operation). I'd like to get the existing functionality acceptable first.

> Item #3. Large file support - Most hardware algorithms can't support
> this as the operation cannot be continue. Not sure how to handle
> this.
The proposed interface does not have any inherent input/output size limits.

> Item #4. Right now you are pining the entire buffer. For small buffer,
> this may not make sense. We have not got a chance to see if what is
> the limit for this.
Good point; this kind of performance optimization can be added later as well, noted for future work.

> Item #5. Herbert Xu mentioned that we should avoid having a lot of
> small kmalloc when possible.
Looking at the session code, which is most critical, I see a few opportunities to improve this; noted.

> Item #6. You should give OpenSSL a patach and see how it works out.
> Although, OpenSSL does not take advantage of batch submission. Again,
> to really take advantage of the HW, you really need to support batch
> operation.
OpenSSL support is planned, but not ready yet.

Thanks again,
Mirek

2010-08-10 18:37:00

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Herbert Xu" <[email protected]> wrote:
> On Tue, Aug 10, 2010 at 02:19:59PM -0400, Miloslav Trmac wrote:
> >
> > 2) simplicity and reliability: you are downplaying the overhead and
> synchronization necessary (potentially among multiple processes!) to
> simply receive a response, but it is still enormous compared to the
> single syscall case. Even worse, netlink(7) says "netlink is not a
> reliable protocol. ... but may drop messages". Would you accept such
> a mechanism to transfer "write data to file" operations? "Compress
> data using AES" is much more similar to "write data to file" than to
> "change this aspect of kernel routing configuration" - it is an
> application-level service, not a way to communicate long-term
> parameters to a pretty independent subsystem residing in the kernel.
>
> That just shows you have no idea how netlink works.
I'm learning as fast as I can by reading all available documentation :) Nevertheless I believe the point stands even without the reliability problem.
Mirek

2010-08-10 18:39:42

by Loc Ho

[permalink] [raw]

Subject: RE: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

> 1. This CrytoDev (user space) interface needs to support multiple
> operations at once

I think this would be naturally solved by providing the async interface.
[Loc Ho]
Async only support a single operation at a time. HW are quite fast. The ability to submit multiple payload for a single OS call improve in performance. The software overhead with the submission alone can be more than a single encrypt/decrypt/hashing operation.

-Loc

CONFIDENTIALITY NOTICE: This e-mail message, including any attachments,
is for the sole use of the intended recipient(s) and contains information?
that is confidential and proprietary to AppliedMicro Corporation or its subsidiaries.
It is to be used solely for the purpose of furthering the parties' business relationship.
All unauthorized review, use, disclosure or distribution is prohibited.
If you are not the intended recipient, please contact the sender by reply e-mail
and destroy all copies of the original message.

2010-08-10 18:50:03

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 02:14:24PM -0400, Steve Grubb wrote:
> On Tuesday, August 10, 2010 01:57:40 pm Neil Horman wrote:
> > > > I'm not so sure I follow. how can you receive messages on a socket in
> > > > response to requests that were sent from a different socket. In the
> > > > netlink multicast and broadcast case, sure, but theres no need to use
> > > > those. I suppose you could exit a process, start another process in
> > > > which the pid gets reused, open up a subsequent socket and perhaps
> > > > confuse audit that way, but you're not going to get responses to the
> > > > requests that the previous process sent in that case.
> > >
> > > I don't even need to open a subsequent socket - as son as the process ID
> > > is reused, the audit message is incorrect, which is not really
> > > acceptable in itself.
> > >
> > >
> >
> > But, you do, thats my point. If a process exits, and another process
> > starts up that happens to reuse the same pid, it can't just call recvmsg
> > on the socket descriptor that the last process used for netlink messages
> > and expect to get any data. That socket descriptor won't be connected to
> > the netlink service (or anything) anymore, and you'll get an error from
> > the kernel.
>
> You are looking at it from the wrong end. Think of it from audit's perspective
> about how do you guarantee that the audit trail is correct? This has been
> discussed on linux-audit mail list before and the conclusion is you have very
> limited information to work with. By being synchronous the syscall, we get
> everything in the syscall record from the processes audit context.
>

What information do you need in the audit record that you might loose accross
two syscalls? It sounds from previous emails that, generally speaking, you're
worried that you want the task struct that current points to in the recvmsg call
be guaranteeed to be the same as the task struct that current points to in the
sendmsg call (i.e. no children (re)using the same socket descriptor, etc). Can
this be handled by using the fact that netlink is actually syncronous under the
covers? i.e. when you send a message to a netlink service, there is no reason
that all the relevant crypto ops in the request can't be completed in the
context of that call, as long as all your crypto operations are themselves
synchronous. By the time you are done with the sendmsg call, you can know if
your entire crypto op is successfull. The only thing that isn't complete is the
retrieval of the completed operations data from the kernel. Is that enough to
make an audit log entry in the same way that an ioctl would?

> The audit logs require non-repudiation. It cannot be racy or stitch together
> possibly wrong events. Audit logs can and do wind up in court and we do not
> want problems with any part of the system.
>
> > > > And in the event that happens, Audit should log a close event on the fd
> > > > inquestion between the operations.
> > >
> > > audit only logs explicitly requested operations, so an administrator that
> > > asks for crypto events does not automatically get any close
> > > events. Besides, the audit record should be correct in the first place,
> > > instead of giving the admin a puzzle to decipher.
> >
> > I still don't see whats incorrect here. If two processes wind up reusing a
> > process id, thats audits problem to figure out, nothing elses
>
> True, but that is the point of this exercise - meeting common criteria and
> FIPS. They both have rules about what the audit logs should present and the
> assuarnce that the information is correct and not racy.
>

Can you ennumerate here what FIPS and Common Criteria mandate be presented in
the audit logs?

Neil

> > . What exactly is the problem that you see involving netlink and audit
> > here? Compare whatever problem you see a crypto netlink protocol having
> > in regards to audit to another netlink protocol (say rtnetlink), and
> > explain to me why the latter doesn't have that issue as well.
>
> That one is not security sensitive. Nowhere in any protection profile does it
> say to audit that.
>
> -Steve

2010-08-10 18:58:26

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> On Tue, Aug 10, 2010 at 11:36:16AM -0400, Miloslav Trmac wrote:
> > I think it would be useful to separate thinking about the data
> format and about the transmission mechanism. ioctl() can quite well
> be used to carry "netlink-like" packets - blobs of data with specified
> length and flexible internal structure - and on the other hand,
> netlink could be (and often is) used to carry fixed structs instead of
> variable-length packets.
>
> Yes, both mechanism can be used to carry either fixed or variable length
> payloads. The difference is ioctl has no built in mechanism to do variable
> length data safely. To do variable length data you need to setup a bunch of
> pointers to extra data that you have to copy separately.
No, I can do exactly what netlink does:

struct operation {
// fixed params;
size_t size_of_attrs;
char attrs[]; // struct nlattr-tagged data
};

at the cost of one additional copy_from_user() (which netlink users often pay as well because using struct iovec is so convenient). Or perhaps even better, I can make sure the userspace application won't mess up the pointer arithmetic and get rid of all of the macros:

struct operation {
// fixed params;
size_t num_attrs;
struct { struct nlattr header; union { ... } data } attrs[];
}

at the same cost (and sacrificing string and variable-length parameters - but as argued below, pointers are still an option).

> Even then, you're
> fixing the number of pointers that you have in your base structure. To add or
> remove any would break your communication protocol to user space. This is
> exactly the point I made above.
The existence of a base structure does not inherently limit the number of extensions.

> And there is no need for versioning in the netlink packet, because the data
> types are all inlined, typed and attached to length values (at least when done
> correctly, see then nl_attr structure and associated macros). You don't have
> that with your ioctl. You could add it for certain, but at that point you're
> re-inventing the wheel.
Right, the nl_attr structure is quite nice, and I didn't know about it. Still...

> > As for the transmission mechanism, netlink seems to me to be one of
> the least desirable options: as described above, it does not provide
> any inherent advantages to ioctl, and it has significantly higher
> overhead (sendmsg()+recvmsg(), handling netlink ACKs, matching
> requests and replies in multi-threaded programs), and it makes
> auditing difficult.
>
> You're incorrect. I've explained several of the advantiges above and in my previous
> email, you're just not seeing them. I will grant you some additional overhead
> in the use of of two system calls rather than one per operation in the nominal
> case, but there is no reason that can't be mitigated by the bundling of multiple
> operations into a single netlink packet.
None of the existing crypto libraries provide such interfaces, and restructuring applications to take advantage of them would often be difficult - just restructuring the NSS _internals_ to support this would be difficult. Porting applications to a Linux-specific interface would make them less portable; besides, we have tried porting applications to a different library before (http://fedoraproject.org/wiki/FedoraCryptoConsolidation ), and based on that experience, any such new interface would have minimal, if any, impact.

> Likewise, matching requests and responses in a multi-threaded program is also an
> already solved issue in multiple ways. The use of multiple sockets, in a 1 per
> thread fashion is the most obvious.
That would give each thread a separate namespace of keys/sessions, rather strange and a problem to fit into existing applications.

> Theres also countless approaches in which a
> thread can reassemble responses to registered requests in such a way that the
> user space portion of an application sees these calls as being synchronous. Its
> really not that hard.
The overhead is still unnecessary.

> > > 2) Use of pointers. Thats just a pain. You have the compat ioctl stuff in
> > > place to handle the 32/64 bit conversions, but it would be really nice if you
> > > could avoid having to maintain that extra code path.
> > Pointers are pretty much unavoidable, to allow zero-copy references
> to input/output data. If that means maintaining 32-bit compat paths
> (as opposed to codifying say uint128_t for pointers in fixed-size
> structures), then so be it: the 32-bit paths will simply have to be
> maintained. Once we have the 32-bit compat paths, using pointers for
> other unformatted, variable-sized data (e.g. IVs, multiple-precision
> integers) seems to be easier than using variable-size data structures
> or explicit pointer arithmetic to compute data locations within the
> data packets.
>
> No, they're not unavoidable. Thats the point I made in my last email. If you
> use netlink, you inline all your data into a single byte array, with the proper
> headers on it. no use of additional pointers needed at all. One buffer in, one
> buffer out.
Right, one copy of input data in sendmsg() from userspace to a skbuf, one copy of output data from a skbuf into userspace. The submitted code already implements zero-copy for data: the pointers are used to form scatter-gather lists submitted directly to the crypto code. I don't think I can do that with the linear skbuf I get from af_netlink.c.

Overall, I'd much rather do the programming necessary to clone the nl_attr code, than suffer the system call overhead and audit problems, if those are the only options.
Mirek

2010-08-10 19:10:33

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tuesday, August 10, 2010 02:45:44 pm Neil Horman wrote:
> On Tue, Aug 10, 2010 at 02:14:24PM -0400, Steve Grubb wrote:
> > On Tuesday, August 10, 2010 01:57:40 pm Neil Horman wrote:
> > > > > I'm not so sure I follow. how can you receive messages on a socket
> > > > > in response to requests that were sent from a different socket.
> > > > > In the netlink multicast and broadcast case, sure, but theres no
> > > > > need to use those. I suppose you could exit a process, start
> > > > > another process in which the pid gets reused, open up a subsequent
> > > > > socket and perhaps confuse audit that way, but you're not going to
> > > > > get responses to the requests that the previous process sent in
> > > > > that case.
> > > >
> > > > I don't even need to open a subsequent socket - as son as the process
> > > > ID is reused, the audit message is incorrect, which is not really
> > > > acceptable in itself.
> > >
> > > But, you do, thats my point. If a process exits, and another process
> > > starts up that happens to reuse the same pid, it can't just call
> > > recvmsg on the socket descriptor that the last process used for
> > > netlink messages and expect to get any data. That socket descriptor
> > > won't be connected to the netlink service (or anything) anymore, and
> > > you'll get an error from the kernel.
> >
> > You are looking at it from the wrong end. Think of it from audit's
> > perspective about how do you guarantee that the audit trail is correct?
> > This has been discussed on linux-audit mail list before and the
> > conclusion is you have very limited information to work with. By being
> > synchronous the syscall, we get everything in the syscall record from
> > the processes audit context.
>
> What information do you need in the audit record that you might loose
> accross two syscalls?

This is easier to show that explain. With Mirek's current patch:

type=SYSCALL msg=audit(1281013374.713:11671): arch=c000003e syscall=2
success=yes exit=3 a0=400b67 a1=2 a2=0 a3=7fff4daa1200 items=1 ppid=1352
pid=1375 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0
tty=tty6 ses=3 comm="ncr-setkey" exe="/home/mitr/cryptodev-
linux/userspace/ncr-setkey" subj=unconfined_u:unconfined_r:unconfined_t:s0-
s0:c0.c1023 key=(null)
type=CRYPTO_USERSPACE_OP msg=audit(1281013374.713:11671):
crypto_op=context_new ctx=0
type=CWD msg=audit(1281013374.713:11671): cwd="/root"
type=PATH msg=audit(1281013374.713:11671): item=0 name="/dev/crypto"
inode=12498 dev=00:05 mode=020660 ouid=0 ogid=0 rdev=0a:3a
obj=system_u:object_r:device_t:s0
type=CRYPTO_STORAGE_KEY msg=audit(1281013374.715:11672): key_size=16

The netlink aproach, we only get the credentials that ride with the netlink
packet

http://lxr.linux.no/#linux+v2.6.35/include/linux/netlink.h#L159

There really is no comparison between what can be recorded synchronously vs
async.

> It sounds from previous emails that, generally speaking, you're worried that
> you want the task struct that current points to in the recvmsg call be
> guaranteeed to be the same as the task struct that current points to in the
> sendmsg call (i.e. no children (re)using the same socket descriptor, etc).

Not really, we are _hoping_ that the pid in the netlink credentials is the
same pid that sent the packet in the first place. From the time we reference
the pid out of the skb until we go hunting and locate the pid in the list of
tasks, it could have died and been replaced with another task with different
credentials. We can't take that risk.

> Can this be handled by using the fact that netlink is actually syncronous
> under the covers?

But its not or all the credentials would not be riding in the skb.

> Can you ennumerate here what FIPS and Common Criteria mandate be presented
> in the audit logs?

Who did what to whom at what time and what was the outcome. In the case of
configuration changes we need the new and old values. However, we need extra
information to make the selective audit work right.

-Steve

2010-08-10 19:14:44

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 02:19:59PM -0400, Miloslav Trmac wrote:
> ----- "Neil Horman" <[email protected]> wrote:
> > On Tue, Aug 10, 2010 at 12:57:43PM -0400, Miloslav Trmac wrote:
> > >
> > > ----- "Neil Horman" <[email protected]> wrote:
> > >
> > > > On Tue, Aug 10, 2010 at 11:40:00AM -0400, Miloslav Trmac wrote:
> > > > > ----- "Neil Horman" <[email protected]> wrote:
> > > > > > On Tue, Aug 10, 2010 at 10:47:14AM -0400, Miloslav Trmac wrote:
> > > > > > > ----- "Neil Horman" <[email protected]> wrote:
> > > > > > > > On Tue, Aug 10, 2010 at 09:24:31AM -0400, Steve Grubb wrote:
> > > > > > > > > The problem with the netlink approach is that auditing is not as good because
> > > > > > > > > netlink is an async protocol. The kernel can only use the credentials that
> > > > > > > > > ride in the skb with the command since there is no guarantee the process has
> > > > > > > > > not changed credentials by the time you get the packet. Adding more
> > > > > > > > > credentials to the netlink headers is also not good. As it is now, the
> > > > > > > > > auditing is synchronous with the syscall and we can get at more information
> > > > > > > > > and reliably create the audit records called out by the protection profiles or
> > > > > > > > > FIPS-140 level 2.
> > > > > > > > >
> > > > > > > > > -Steve
> > > > > > > >
> > > > > > > > I think thats pretty easy to serialize though. All you need to do is enforce a
> > > > > > > > rule in the kernel that dictates any creditial changes to a given context must
> > > > > > > > be serialized behind all previously submitted crypto operations.
> > > > > > > That would be a bit unusual from the layering/semantics aspect - why
> > > > > > should credential changes care about crypto operations when they don't
> > > > > > care about any other operations? - and it would require pretty
> > > > > > widespread changes throughout the kernel core.
> > > > > > > Mirek
> > > > > >
> > > > > >
> > > > > > I'm sorry, I thought steve was referring to credentials in the sense of changing
> > > > > > keys/etc while crypto operations were in flight.
> > > > > The audited values are mainly process/thread attributes: pid, ppid,
> > > > {,e,fs}[ug]id, session id, and the like. Most of these are attached
> > > > to the netlink packet, and performing a lookup by PID is at packet
> > > > handling time is unreliable - as far as I understand the netlink
> > > > receive routine does not have to run in the same process context, so
> > > > the PID might not even refer to the same process.
> > > >
> > > > I'm not so sure I follow. how can you receive messages on a socket in response
> > > > to requests that were sent from a different socket. In the netlink multicast
> > > > and broadcast case, sure, but theres no need to use those. I suppose you could
> > > > exit a process, start another process in which the pid gets reused, open up a
> > > > subsequent socket and perhaps confuse audit that way, but you're not going to
> > > > get responses to the requests that the previous process sent in that case.
> > > I don't even need to open a subsequent socket - as son as the
> > process ID is reused, the audit message is incorrect, which is not
> > really acceptable in itself.
> > >
> > But, you do, thats my point. If a process exits, and another process starts up
> > that happens to reuse the same pid, it can't just call recvmsg on the socket
> > descriptor that the last process used for netlink messages and expect to get any
> > data.
> It _doesn't matter_ that I don't receive a response. I have caused an operation in the kernel and the requested audit record is incorrect. The audit subsystem needs to work even if - especially if - the userspace process is malicious. The process might have wanted to simply cause a misleading audit record; or the operation (such as importing a key from supplied data) might not really need the response in order to achieve its effect.
>

Why is it incorrect? What would audit have recorded in the above case? That a
process attempted to recieve data on a descriptor that it didn't have open?
That seems correct to me.

> > > > And
> > > > in the event that happens, Audit should log a close event on the
> > fd inquestion
> > > > between the operations.
> > > audit only logs explicitly requested operations, so an administrator
> > that asks for crypto events does not automatically get any close
> > events. Besides, the audit record should be correct in the first
> > place, instead of giving the admin a puzzle to decipher.
> > I still don't see whats incorrect here. If two processes wind up reusing a
> > process id, thats audits problem to figure out, nothing elses.
> If an operation attributed to user A is recorded by the kernel as being performed by user B, that is not an user problem.
>
You're right, its also not a netlink problem, a cryptodev problem, or an ioctl
problem. Its an audit problem.

> > What exactly is
> > the problem that you see involving netlink and audit here? Compare whatever
> > problem you see a crypto netlink protocol having in regards to audit to another
> > netlink protocol (say rtnetlink), and explain to me why the latter doesn't have
> > that issue as well.
> AFAIK most netlink users in the kernel are not audited, and those that are typically only allow operations by system administrators. And I haven't claimed that the other users are correct :) Notably audit itself does not record as much information about the invoking process as we'd like because it is not available.
>

Ok, thats fair enough, if netlink users aren't audited, I can understand that.
But just the same, that seems like a shortcomming in audit. Perhaps what we
need is an ennumeration of additional constraints that netlink protocols need to
follow if they are to be audited (i.e. have to hold a reference to the task
on each sendmsg, and reduce the refcount for each skb dequeued from the
recvqueue or some such, so as to ensure that pid reuse doesn't take place
accross netlink sockets).

> > > > Theres also the child process case, in which a child process might read
> > > > responses from requests sent by a parent, and vice versa, since fork inherits
> > > > file descriptors, but thats an issue inherent in any mechanism you use,
> > > > including the character interface, so I'm not sure what the problem is
> > > > there.
> > > Actually that's not a problem with ioctl() because the response is
> > written back _before_ ioctl() returns, so it is always provided to the
> > original calling thread.
> > >
> > > With the current design the parent and child share the key/session
> > namespace after fork(), but operation results are always reliably and
> > automatically provided to the thread that invoked the operation,
> > without any user-space collaboration.
>
> > Is that _really_ that important? That the kernel return data in the same call
> > that its made?
> Not for audit; but yes, it is important.
>
I don't follow you here. If its not important for audit to have synchrnous
calls, why is it important at all?

> 1) performance: this is not ip(8), where no one cares if the operation runs 10ms or 1000ms. Crypto is at least 2 operations per SSL record (which can be as little as 1 byte of application data), and users will care about the speed. Batching more operations into a single request is impractical because it would require very extensive changes in users of the crypto libraries, and the generic crypto interfaces such as PKCS#11 don't natively support batching so it might not even be possible to express this in some libraries.
>

Ok, so is the assertion here that your kernel interface is restricted by other user space
libraries? i.e. the operation has to be syncronous because anymore than 1
syscall per operation has too much impact on performance? I ask because the
recvmmsg (2 m's) will give you the exact same performance profile as ioctl. You
can let the kernel do all the batching with multiple calls to sendmsg, and get
all the responses at once with a single additional call to recvmmsg.

> 2) simplicity and reliability: you are downplaying the overhead and synchronization necessary (potentially among multiple processes!) to simply receive a response, but it is still enormous compared to the single syscall case. Even worse, netlink(7) says "netlink is not a reliable protocol. ... but may drop messages". Would you accept such a mechanism to transfer "write data to file" operations? "Compress data using AES" is much more similar to "write data to file" than to "change this aspect of kernel routing configuration" - it is an application-level service, not a way to communicate long-term parameters to a pretty independent subsystem residing in the kernel.

As Herbert noted, you don't seem to understand how netlink works. You don't
have to loose any data in a netlink socket.

Neil

> Mirek

2010-08-10 19:18:57

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Miloslav Trmac" <[email protected]> wrote:
> ----- "Neil Horman" <[email protected]> wrote:
> > Likewise, matching requests and responses in a multi-threaded program is also an
> > already solved issue in multiple ways. The use of multiple sockets, in a 1 per
> > thread fashion is the most obvious.
> That would give each thread a separate namespace of keys/sessions,
> rather strange and a problem to fit into existing applications.
Hm, that's actually not necessarily true, depending on the specifics, but it does bring up the more general problem of "crypto contexts" and their lifetimes.

The proposed patch treats each open("/dev/crypto") as a new context within which keys and sessions are allocated. Only processes having this FD can access the keys and namespaces, and transferring the FD also transfers access to the crypto data. On last close() the data is automatically freed.

With netlink we could associate the data with the caller's netlink address, but we don't get any notification that the caller has exited. The other option is to have only one, per-process, context, which again runs into the problem that netlink message handling is, at least semantically, separate from the calling process.
Mirek

2010-08-10 19:22:25

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 03:10:12PM -0400, Steve Grubb wrote:
> On Tuesday, August 10, 2010 02:45:44 pm Neil Horman wrote:
> > On Tue, Aug 10, 2010 at 02:14:24PM -0400, Steve Grubb wrote:
> > > On Tuesday, August 10, 2010 01:57:40 pm Neil Horman wrote:
> > > > > > I'm not so sure I follow. how can you receive messages on a socket
> > > > > > in response to requests that were sent from a different socket.
> > > > > > In the netlink multicast and broadcast case, sure, but theres no
> > > > > > need to use those. I suppose you could exit a process, start
> > > > > > another process in which the pid gets reused, open up a subsequent
> > > > > > socket and perhaps confuse audit that way, but you're not going to
> > > > > > get responses to the requests that the previous process sent in
> > > > > > that case.
> > > > >
> > > > > I don't even need to open a subsequent socket - as son as the process
> > > > > ID is reused, the audit message is incorrect, which is not really
> > > > > acceptable in itself.
> > > >
> > > > But, you do, thats my point. If a process exits, and another process
> > > > starts up that happens to reuse the same pid, it can't just call
> > > > recvmsg on the socket descriptor that the last process used for
> > > > netlink messages and expect to get any data. That socket descriptor
> > > > won't be connected to the netlink service (or anything) anymore, and
> > > > you'll get an error from the kernel.
> > >
> > > You are looking at it from the wrong end. Think of it from audit's
> > > perspective about how do you guarantee that the audit trail is correct?
> > > This has been discussed on linux-audit mail list before and the
> > > conclusion is you have very limited information to work with. By being
> > > synchronous the syscall, we get everything in the syscall record from
> > > the processes audit context.
> >
> > What information do you need in the audit record that you might loose
> > accross two syscalls?
>
> This is easier to show that explain. With Mirek's current patch:
>
> type=SYSCALL msg=audit(1281013374.713:11671): arch=c000003e syscall=2
> success=yes exit=3 a0=400b67 a1=2 a2=0 a3=7fff4daa1200 items=1 ppid=1352
> pid=1375 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0
> tty=tty6 ses=3 comm="ncr-setkey" exe="/home/mitr/cryptodev-
> linux/userspace/ncr-setkey" subj=unconfined_u:unconfined_r:unconfined_t:s0-
> s0:c0.c1023 key=(null)
> type=CRYPTO_USERSPACE_OP msg=audit(1281013374.713:11671):
> crypto_op=context_new ctx=0
> type=CWD msg=audit(1281013374.713:11671): cwd="/root"
> type=PATH msg=audit(1281013374.713:11671): item=0 name="/dev/crypto"
> inode=12498 dev=00:05 mode=020660 ouid=0 ogid=0 rdev=0a:3a
> obj=system_u:object_r:device_t:s0
> type=CRYPTO_STORAGE_KEY msg=audit(1281013374.715:11672): key_size=16
>
>
> The netlink aproach, we only get the credentials that ride with the netlink
> packet
>
> http://lxr.linux.no/#linux+v2.6.35/include/linux/netlink.h#L159
>
> There really is no comparison between what can be recorded synchronously vs
> async.
>

Ok, so the $64 dollar question then: Do FIPS or Common Criteria require that you
log more than whats in the netlink packet?

>
> > It sounds from previous emails that, generally speaking, you're worried that
> > you want the task struct that current points to in the recvmsg call be
> > guaranteeed to be the same as the task struct that current points to in the
> > sendmsg call (i.e. no children (re)using the same socket descriptor, etc).
>
> Not really, we are _hoping_ that the pid in the netlink credentials is the
> same pid that sent the packet in the first place. From the time we reference
> the pid out of the skb until we go hunting and locate the pid in the list of
> tasks, it could have died and been replaced with another task with different
> credentials. We can't take that risk.
>
>
> > Can this be handled by using the fact that netlink is actually syncronous
> > under the covers?
>
> But its not or all the credentials would not be riding in the skb.
>
Not true. It not guaranteed to, as you can do anything you want when you
receive a netlink msg in the kernel. But thats not to say that you can't
enforce synchronization, and in so doing obtain more information.

>
> > Can you ennumerate here what FIPS and Common Criteria mandate be presented
> > in the audit logs?
>
> Who did what to whom at what time and what was the outcome. In the case of
> configuration changes we need the new and old values. However, we need extra
> information to make the selective audit work right.
>
Somehow I doubt that FIPS mandates that audit messages include "who did what to
whoom and what the result was" :). Seriously, what do FIPS and common criteria
require in an audit message? I assume this is a partial list:
* sending process id
* requested operation
* session id
* user id
* group id
* operation result

I see lots of other items in the above log message, but I'm not sure what else
is required. Can you elaborate?

Neil

> -Steve

2010-08-10 19:37:34

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> On Tue, Aug 10, 2010 at 02:19:59PM -0400, Miloslav Trmac wrote:
> > ----- "Neil Horman" <[email protected]> wrote:
> > It _doesn't matter_ that I don't receive a response. I have caused
> an operation in the kernel and the requested audit record is
> incorrect. The audit subsystem needs to work even if - especially if
> - the userspace process is malicious. The process might have wanted
> to simply cause a misleading audit record; or the operation (such as
> importing a key from supplied data) might not really need the response
> in order to achieve its effect.
>
> Why is it incorrect? What would audit have recorded in the above case? That a
> process attempted to recieve data on a descriptor that it didn't have open?
Again, the receive side _doesn't matter_. At least by the formal semantics of netlink (not relying on the AFAIK undocumented synchronous implementation), there's a risk that the audit record about the _sender_ is incorrect (athough the inability to audit the identity of the recipient of the data might be a problem as well, I'm not sure).

> > > > > And
> > > > > in the event that happens, Audit should log a close event on the
> > > > > fd inquestion
> > > > > between the operations.
> > > > audit only logs explicitly requested operations, so an administrator
> > > that asks for crypto events does not automatically get any close
> > > events. Besides, the audit record should be correct in the first
> > > place, instead of giving the admin a puzzle to decipher.
> > > I still don't see whats incorrect here. If two processes wind up reusing a
> > > process id, thats audits problem to figure out, nothing elses.
> > If an operation attributed to user A is recorded by the kernel as
> being performed by user B, that is not an user problem.
> >
> You're right, its also not a netlink problem, a cryptodev problem, or an ioctl
> problem. Its an audit problem.
The user doesn't particularly care which subsystem maintainer is supposed to fix what :) The reality seems to be that audit and netlink don't play well together.

> > > > > Theres also the child process case, in which a child process might read
> > > > > responses from requests sent by a parent, and vice versa, since fork inherits
> > > > > file descriptors, but thats an issue inherent in any mechanism you use,
> > > > > including the character interface, so I'm not sure what the problem is
> > > > > there.
> > > > Actually that's not a problem with ioctl() because the response is
> > > written back _before_ ioctl() returns, so it is always provided to the
> > > original calling thread.
> > > >
> > > > With the current design the parent and child share the key/session
> > > namespace after fork(), but operation results are always reliably and
> > > automatically provided to the thread that invoked the operation,
> > > without any user-space collaboration.
> >
> > > Is that _really_ that important? That the kernel return data in the same call
> > > that its made?
> > Not for audit; but yes, it is important.
> >
> I don't follow you here. If its not important for audit to have synchrnous
> calls, why is it important at all?
There are two separate reasons to avoid netlink:

- Audit wants the operation to run in the process context of the caller. This is not directly related to the question if there is one or >=2 syscalls.

- Operation invocation method. Here one syscall is important for the reasons I have (performance, simplicity/reliability). Making audit work well with netlink does not override these reasons in my opinion.

> > 1) performance: this is not ip(8), where no one cares if the
> operation runs 10ms or 1000ms. Crypto is at least 2 operations per
> SSL record (which can be as little as 1 byte of application data), and
> users will care about the speed. Batching more operations into a
> single request is impractical because it would require very extensive
> changes in users of the crypto libraries, and the generic crypto
> interfaces such as PKCS#11 don't natively support batching so it might
> not even be possible to express this in some libraries.
> >
>
> Ok, so is the assertion here that your kernel interface is restricted by other user space
> libraries?
The kernel interface is not "restricted" by other user-space libraries as such, but the only practical way for most user-space applications to use the kernel interface (within the next 5 years) is to modify existing user space libraries to use the kernel interface as a backend (there's just not enough manpower to port the whole world, and quite a few upstream maintainers are understandably reluctant to port their code to a Linux-specific interface). In that sense, only the features that are _currently_ provided by user-space libraries matter when discussing performance and other impact on users.

> i.e. the operation has to be syncronous because anymore than 1
> syscall per operation has too much impact on performance?
We need to assume that this implementation will be benchmarked against pure user-space implementations (same as measuring SELinux impact compared to kernels that don't use it). In that sense doubling the number of syscalls is not at all great.

> I ask
> because the
> recvmmsg (2 m's) will give you the exact same performance profile as ioctl. You
> can let the kernel do all the batching with multiple calls to sendmsg, and get
> all the responses at once with a single additional call to recvmmsg.
I'm afraid that doesn't help because the userspace libraries will only request one operation at a time.
Mirek

2010-08-10 19:41:27

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 02:58:01PM -0400, Miloslav Trmac wrote:
> ----- "Neil Horman" <[email protected]> wrote:
> > On Tue, Aug 10, 2010 at 11:36:16AM -0400, Miloslav Trmac wrote:
> > > I think it would be useful to separate thinking about the data
> > format and about the transmission mechanism. ioctl() can quite well
> > be used to carry "netlink-like" packets - blobs of data with specified
> > length and flexible internal structure - and on the other hand,
> > netlink could be (and often is) used to carry fixed structs instead of
> > variable-length packets.
> >
> > Yes, both mechanism can be used to carry either fixed or variable length
> > payloads. The difference is ioctl has no built in mechanism to do variable
> > length data safely. To do variable length data you need to setup a bunch of
> > pointers to extra data that you have to copy separately.
> No, I can do exactly what netlink does:
>
> struct operation {
> // fixed params;
> size_t size_of_attrs;
> char attrs[]; // struct nlattr-tagged data
> };
>
> at the cost of one additional copy_from_user() (which netlink users often pay as well because using struct iovec is so convenient). Or perhaps even better, I can make sure the userspace application won't mess up the pointer arithmetic and get rid of all of the macros:
>
> struct operation {
> // fixed params;
> size_t num_attrs;
> struct { struct nlattr header; union { ... } data } attrs[];
> }
>
> at the same cost (and sacrificing string and variable-length parameters - but as argued below, pointers are still an option).
>
> > Even then, you're
> > fixing the number of pointers that you have in your base structure. To add or
> > remove any would break your communication protocol to user space. This is
> > exactly the point I made above.
> The existence of a base structure does not inherently limit the number of extensions.
>
> > And there is no need for versioning in the netlink packet, because the data
> > types are all inlined, typed and attached to length values (at least when done
> > correctly, see then nl_attr structure and associated macros). You don't have
> > that with your ioctl. You could add it for certain, but at that point you're
> > re-inventing the wheel.
> Right, the nl_attr structure is quite nice, and I didn't know about it. Still...
>
>
> > > As for the transmission mechanism, netlink seems to me to be one of
> > the least desirable options: as described above, it does not provide
> > any inherent advantages to ioctl, and it has significantly higher
> > overhead (sendmsg()+recvmsg(), handling netlink ACKs, matching
> > requests and replies in multi-threaded programs), and it makes
> > auditing difficult.
> >
> > You're incorrect. I've explained several of the advantiges above and in my previous
> > email, you're just not seeing them. I will grant you some additional overhead
> > in the use of of two system calls rather than one per operation in the nominal
> > case, but there is no reason that can't be mitigated by the bundling of multiple
> > operations into a single netlink packet.
> None of the existing crypto libraries provide such interfaces, and restructuring applications to take advantage of them would often be difficult - just restructuring the NSS _internals_ to support this would be difficult. Porting applications to a Linux-specific interface would make them less portable; besides, we have tried porting applications to a different library before (http://fedoraproject.org/wiki/FedoraCryptoConsolidation ), and based on that experience, any such new interface would have minimal, if any, impact.
>
> > Likewise, matching requests and responses in a multi-threaded program is also an
> > already solved issue in multiple ways. The use of multiple sockets, in a 1 per
> > thread fashion is the most obvious.
> That would give each thread a separate namespace of keys/sessions, rather strange and a problem to fit into existing applications.
>
> > Theres also countless approaches in which a
> > thread can reassemble responses to registered requests in such a way that the
> > user space portion of an application sees these calls as being synchronous. Its
> > really not that hard.
> The overhead is still unnecessary.
>
> > > > 2) Use of pointers. Thats just a pain. You have the compat ioctl stuff in
> > > > place to handle the 32/64 bit conversions, but it would be really nice if you
> > > > could avoid having to maintain that extra code path.
> > > Pointers are pretty much unavoidable, to allow zero-copy references
> > to input/output data. If that means maintaining 32-bit compat paths
> > (as opposed to codifying say uint128_t for pointers in fixed-size
> > structures), then so be it: the 32-bit paths will simply have to be
> > maintained. Once we have the 32-bit compat paths, using pointers for
> > other unformatted, variable-sized data (e.g. IVs, multiple-precision
> > integers) seems to be easier than using variable-size data structures
> > or explicit pointer arithmetic to compute data locations within the
> > data packets.
> >
> > No, they're not unavoidable. Thats the point I made in my last email. If you
> > use netlink, you inline all your data into a single byte array, with the proper
> > headers on it. no use of additional pointers needed at all. One buffer in, one
> > buffer out.
> Right, one copy of input data in sendmsg() from userspace to a skbuf, one copy of output data from a skbuf into userspace. The submitted code already implements zero-copy for data: the pointers are used to form scatter-gather lists submitted directly to the crypto code. I don't think I can do that with the linear skbuf I get from af_netlink.c.
>
>
> Overall, I'd much rather do the programming necessary to clone the nl_attr code, than suffer the system call overhead and audit problems, if those are the only options.
> Mirek

Ok, well, I suppose we're just not going to agree on this. I don't know how
else to argue my case, you seem to be bent on re-inventing the wheel instead of
using what we have. Good luck.

Neil

2010-08-10 19:44:51

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> On Tue, Aug 10, 2010 at 03:10:12PM -0400, Steve Grubb wrote:
> > > Can you ennumerate here what FIPS and Common Criteria mandate be presented
> > > in the audit logs?
> >
> > Who did what to whom at what time and what was the outcome. In the case of
> > configuration changes we need the new and old values. However, we need extra
> > information to make the selective audit work right.
> >
> Somehow I doubt that FIPS mandates that audit messages include "who did what to
> whoom and what the result was" :).
Actually, that's about right for CC :)

> The TSF shall record within each audit record at least the following
> information:
> a) Date and time of the event, type of event, subject identity (if
> applicable), and the outcome (success or failure) of the event;

and, for specific operations, e.g.:
> Minimal level: Success and failure, and the type of cryptographic operation
> Basic level: Any applicable cryptographic mode(s) of operation, subject
> attributes and object attributes

Now what exactly is "subject/object identity" and "subject/object attributes" is the important question that's defined elsewhere, and I don't know enough about these aspects.
Mirek

2010-08-10 20:09:13

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tuesday, August 10, 2010 03:17:57 pm Neil Horman wrote:
> > There really is no comparison between what can be recorded synchronously
> > vs async.
>
> Ok, so the $64 dollar question then: Do FIPS or Common Criteria require
> that you log more than whats in the netlink packet?

The TSF shall be able to include or exclude auditable events from the set
of audited events based on the following attributes:
a) object identity,
b) user identity,
c) host identity,
d) event type,
e) success of auditable security events, and
f) failure of auditable security events.

We need the ability to selectively audit based on uid for example. The netlink
credentials doesn't have that. In many cases its the same as the loginuid, but
its not in the skb.

There is also a table of additional requirements. Take the case of logging in.
It says:

Origin of the attempt (e.g., terminal identifier, source IP address).

So, when someone changes their password and a hash function is called, we need
the origin information.

> > > It sounds from previous emails that, generally speaking, you're worried
> > > that you want the task struct that current points to in the recvmsg
> > > call be guaranteeed to be the same as the task struct that current
> > > points to in the sendmsg call (i.e. no children (re)using the same
> > > socket descriptor, etc).
> >
> > Not really, we are _hoping_ that the pid in the netlink credentials is
> > the same pid that sent the packet in the first place. From the time we
> > reference the pid out of the skb until we go hunting and locate the pid
> > in the list of tasks, it could have died and been replaced with another
> > task with different credentials. We can't take that risk.
> >
> > > Can this be handled by using the fact that netlink is actually
> > > syncronous under the covers?
> >
> > But its not or all the credentials would not be riding in the skb.
>
> Not true. It not guaranteed to, as you can do anything you want when you
> receive a netlink msg in the kernel. But thats not to say that you can't
> enforce synchronization, and in so doing obtain more information.

Racy.

> > > Can you ennumerate here what FIPS and Common Criteria mandate be
> > > presented in the audit logs?
> >
> > Who did what to whom at what time and what was the outcome. In the case
> > of configuration changes we need the new and old values. However, we
> > need extra information to make the selective audit work right.
>
> Somehow I doubt that FIPS mandates that audit messages include "who did
> what to whoom and what the result was" :). Seriously, what do FIPS and
> common criteria require in an audit message? I assume this is a partial
> list:

The TSF shall record within each audit record at least the following
information:

a) Date and time of the event, type of event, subject identity (if
applicable), and the outcome (success or failure) of the event; and
additional data as required. (Usually the object)

There are other implicit requirement such as selective audit that causes more
information to be needed as well as the additional requirements clause.

-Steve

2010-08-11 02:06:21

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

----- "Neil Horman" <[email protected]> wrote:
> Ok, well, I suppose we're just not going to agree on this. I don't know how
> else to argue my case, you seem to be bent on re-inventing the wheel instead of
> using what we have. Good luck.
Well, I basically spent yesterday learning about netlink and looking how it can or can not be adapted. I still see drawbacks that are not balanced by any benefits that are exclusive to netlink.

As a very unscientific benchmark, I modified a simple example program to add a simple non-blocking getmsg(..., MSG_PEEK) call on a netlink socket after each encryption operation; this is only a lower bound on the overhead because no copying of data between userspace and the skbuffer takes place and zero copy is still available. With cbc(aes), encrypting 256 bytes per operation, the throughput dropped from 156 MB/s to 124 MB/s (-20%); even with 32 KB per operation there is still a decrease from 131 to 127 MB/s (-2.7%).

If I have to do a little more programming work to get a permanent 20% performance boost, why not?

How about this: The existing ioctl (1-syscall interface) remains, using a very minimal fixed header (probably just a handle and perhaps algorithm ID) and using the netlink struct nlattr for all other attributes (both input and output, although I don't expect many output attribute).

- This gives us exactly the same flexibility benefits as using netlink directly.
- It uses the 1-syscall, higher performance, interface.
- The crypto operations are run in process context, making it possible to implement zero-copy operations and reliable auditing.
- The existing netlink parsing routines (both in the kernel and in libnl) can be used; formatting routines will have to be rewritten, but that's the easier part.

This kind of partial netlink reuse already has a precedent in the kernel, see zlib_compress_setup().

Would everyone be happy with such an approach?
Mirek

2010-08-11 06:50:28

by Linus Walleij

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

Hi Miloslav,

first, thanks a lot for working on the userspace API, we have long missed this
API and I've asked some times in the past about the status and proposals have
been stalled some times, so it is really fun to see that something is happening!

We recognize that since Redhat is providing hardware for governments, this
work will likely continue until the desired mechanism is available in a form
that can be integrated and shipped, which gives great confidence that it is
going to happen this time.

At ST-Ericsson we have considered to provide a user API to the kernel crypto
facilities as well, we have some rough initial ideas on how to go about this.
It has some similarities to what you/Redhat has presented, but we have some
further goals:

1. We'd like the API have to be general enough to not change with new
algorithms. Having to recompile user space programs becasuse of a minor
change is messing things up for us. This may be solved by a
functionality which presents the available crypto resources in the
user API. (As opposed to using enums for the algorithms.)

c.f how the ALSA mixer presents a lot of things to userspace without
using any enums at all in /dev/snd/controlC0 for card 0. For example
in include/linux/soundcard.h you find the different control knobs
enumerated with strings so as to avoid explicit enums.

We'd try to avoid as many enums as possible, and really only define
the necessary information nodes so that userspace can look for
strings like "aes-plain" instead, which is the same that dmcrypt
is using, and there are already descriptions of available algorithms
in /proc/crypto (from crypto/proc.c) using this format.

2. To avoid security hazards the API would benefit from being programmed
with at least some secure programming concepts in mid. Input argument
checking separate from algorithm separate from output argument checking,
and erasing of information from stacks and buffers. More or less
the advice you will find in places like:
http://www.dwheeler.com/secure-programs/

(Evidently we and others will help out reviewing and patching up
proposed code in this aspect, also since you're working on
government business I believe security audits will be mandatory?)

For internal keys, a function for compare of HMAC function results
could improve security considerably.

3. A general interface for stream ciphers would be nice. The only differences
are that they do not operate on blocks, but bits, and that they
always require
an IV. Arguably this can be added later if the aspect if just considered when
devising the interface. The recent discussion in this thread
regarding netlink
points in a direction where streams are a natural part of the concept I
believe.

There are some submission-related comments as well:

- The API description in the commit log of patch 1 should be a file in
Documentation/crypto/usespace-api.txt or so instead, this is of general
interest.

- The big patch 0002 to crypto/userspace is including the Makefile
changes for patch 0004 making it non-bisectable. Also it is very large,
is it possible to break it down a little? Many files are prefixed "ncr-*"
and I don't see why - can this prefix simply be removed?

I hope the patch 0004 with software fallbacks is not strictly required
by the userspace API so these two can be considered separately?
Else can you describe how the dependecies go.. surely it must be
possible to patch in the software fallbacks in libtom* separately?

- The big patch containing the entire libtomcrypt should be destined to
drivers/staging or similar at this point (OK it is not a driver, should have
been lib/staging if such a thing existed). The sheer size of it makes it very
hard to review, and all attempts at breaking it in smaller pieces would
be much appreciated. For example is it possible to have one patch per
algorithm?

Also: isn't this creating a fork of the library? Not that it matters much as
Linux is finding good use of it.

Since it is a fork, it should be adopted to the Linux source hiearchy, and
since here every algorithm is directly in crypto/ please remove all the
libtomcrypt subdir and put all directly into crypto/ for simplicity (subdirs
below like hashes, headers etc are nice and should be preserved though).

libtomath seems to be duplicating a lot of in-kernel stuff already found
inside the kernel, and needs to be merged with care. Arguably parts of
this can be cleaned-up later but it'd be nice to make some initial attempts
at unifying the math infrastructure.

Overall, thanks for working on this.

Yours,
Linus Walleij (et al)

2010-08-11 11:31:31

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

Hello,
thanks your review and all comments.

----- "Linus Walleij" <[email protected]> wrote:
> For internal keys, a function for compare of HMAC function results
> could improve security considerably.
I'm afraid I don't understand what this refers to. Can you give me an example? (You already can use OP_VERIFY with a HMAC, provide it the key, input data and incoming HMAC value, and receive a pass/fail indication.)

> 3. A general interface for stream ciphers would be nice. The only differences
> are that they do not operate on blocks, but bits, and that they always require
> an IV.
I think this is already supported: the IV is specified in "session init" operation, subsequent "session update" operations use a single crypto_ablkcipher that implicitly carries the IV through the chain of "update"s. Is that not sufficient?

> - The big patch 0002 to crypto/userspace is including the Makefile
> changes for patch 0004 making it non-bisectable. Also it is very large,
> is it possible to break it down a little?
It must be :) I was lazy when posting it for initial review.

> Many files are prefixed "ncr-*"
> and I don't see why - can this prefix simply be removed?
I suppose so - originally the module included two separate interfaces, "ncr-*" corresponds to the ncr.h inteface.

> I hope the patch 0004 with software fallbacks is not strictly required
> by the userspace API so these two can be considered separately?
Those are not fallbacks - AFAIK there is currently no RSA/DSA implementation in the kernel. It can be separated from the userspace API at the cost of loss of functionality.

> Else can you describe how the dependecies go.. surely it must be
> possible to patch in the software fallbacks in libtom* separately?
Right now ncr-dh.c and ncr-pk.c depend on libtomcrypt, and libtomcrypt depends on some utilities in patch 2/4. At least the second dependency can be eliminated.

> - The big patch containing the entire libtomcrypt should be destined to
> drivers/staging or similar at this point (OK it is not a driver, should have
> been lib/staging if such a thing existed). The sheer size of it makes it very
> hard to review, and all attempts at breaking it in smaller pieces would
> be much appreciated.
As noted in the patch, we are considering replacing this with a libgcrypt-based implementation; this was included mainly for functional completeness.

Thanks again for the comments,
Mirek

2010-08-11 11:50:39

[permalink] [raw]

Subject: Re: [PATCH 0/4] RFC: "New" /dev/crypto user-space interface

On Tue, Aug 10, 2010 at 10:06:05PM -0400, Miloslav Trmac wrote:
> ----- "Neil Horman" <[email protected]> wrote:
> > Ok, well, I suppose we're just not going to agree on this. I don't know how
> > else to argue my case, you seem to be bent on re-inventing the wheel instead of
> > using what we have. Good luck.
> Well, I basically spent yesterday learning about netlink and looking how it can or can not be adapted. I still see drawbacks that are not balanced by any benefits that are exclusive to netlink.
>
> As a very unscientific benchmark, I modified a simple example program to add a simple non-blocking getmsg(..., MSG_PEEK) call on a netlink socket after each encryption operation; this is only a lower bound on the overhead because no copying of data between userspace and the skbuffer takes place and zero copy is still available. With cbc(aes), encrypting 256 bytes per operation, the throughput dropped from 156 MB/s to 124 MB/s (-20%); even with 32 KB per operation there is still a decrease from 131 to 127 MB/s (-2.7%).
>
> If I have to do a little more programming work to get a permanent 20% performance boost, why not?
>
Because your test is misleading. By my read, all you did was add an extra
syscall to the work your already doing. The best case result in such a
test is equivalent performance if the additional syscall takes near-zero time.
The test fails to take into account the change in programming model that you can
use in the kernel when you make the operation asynchronous. What happens to
your test if you change the cipher your using to an asynchronous form
(ablkcipher or ahash)? When you do that, you no longer need to stall the send
routine while the crypto operation completes.

I'm not saying that 2 syscalls will be faster than 1, but its not the slam dunk
you're indicating here.

>
> How about this: The existing ioctl (1-syscall interface) remains, using a very minimal fixed header (probably just a handle and perhaps algorithm ID) and using the netlink struct nlattr for all other attributes (both input and output, although I don't expect many output attribute).
>
> - This gives us exactly the same flexibility benefits as using netlink directly.
> - It uses the 1-syscall, higher performance, interface.
> - The crypto operations are run in process context, making it possible to implement zero-copy operations and reliable auditing.
> - The existing netlink parsing routines (both in the kernel and in libnl) can be used; formatting routines will have to be rewritten, but that's the easier part.
>
This would be better, but it really just seems like you're re-inventing the
wheel at this point. As noted above, I think your performance comparison fails
to account for advantages that can be leveraged in an asynchronous model. The
zero-copy argument is misleading, as both a single syscall and a multiple
syscall are not zero copy, a copy_from_user and copy_to_user is required in
both cases. About the only clear advantage that I see to using a single syscall
inteface is the additional audit information that you are afforded. And I'm
still not sure if the additional audit information is required by FIPS or Common
Criteria.

Also, now that I'm poking about in it, how do you intend to support the async
interfaces? aead/ablkcipher/ahash all require callbacks to be set when the
crypto operation completes. I assume that, in the kernel your cryptodev code,
if it used the 1 syscall interface would setup a lock, and block until the
operation was complete? If thats the case, and the actual crypto operation were
handled by an alternate task (see the cryptd module), wouldn't you be loosing
soe modicum of audit information as well, just as you would using the netlink
interface?
Neil

> This kind of partial netlink reuse already has a precedent in the kernel, see zlib_compress_setup().
>
> Would everyone be happy with such an approach?
> Mirek
>

2010-08-11 12:09:20