From 762989cc1e80e00e8b076ca4dc6ac17b41137d33 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 2 Jun 2016 10:15:47 +0900 Subject: [PATCH] Native data and metadata encryption for zfs --- usr/src/cmd/mdb/intel/amd64/libzpool/Makefile | 3 +- usr/src/cmd/mdb/intel/amd64/zfs/Makefile | 3 +- usr/src/cmd/mdb/intel/ia32/libzpool/Makefile | 3 +- usr/src/cmd/mdb/intel/ia32/zfs/Makefile | 3 +- usr/src/cmd/mdb/sparc/v7/libzpool/Makefile | 3 +- usr/src/cmd/mdb/sparc/v9/libzpool/Makefile | 3 +- usr/src/cmd/zdb/Makefile.com | 3 + usr/src/cmd/zdb/zdb.c | 77 +- usr/src/cmd/zdb/zdb_il.c | 11 +- usr/src/cmd/zfs/zfs_main.c | 284 ++- usr/src/cmd/zinject/translate.c | 8 +- usr/src/cmd/zoneadm/Makefile | 2 + usr/src/cmd/zpool/zpool_main.c | 64 +- usr/src/cmd/zstreamdump/zstreamdump.c | 46 +- usr/src/cmd/ztest/ztest.c | 55 +- usr/src/common/zfs/zfeature_common.c | 10 + usr/src/common/zfs/zfeature_common.h | 1 + usr/src/common/zfs/zfs_deleg.c | 2 + usr/src/common/zfs/zfs_deleg.h | 2 + usr/src/common/zfs/zfs_prop.c | 83 +- usr/src/lib/libuutil/common/libuutil.h | 4 +- usr/src/lib/libzfs/Makefile.com | 9 +- usr/src/lib/libzfs/common/libzfs.h | 22 +- usr/src/lib/libzfs/common/libzfs_changelist.c | 6 +- usr/src/lib/libzfs/common/libzfs_crypto.c | 1596 ++++++++++++ usr/src/lib/libzfs/common/libzfs_dataset.c | 148 +- usr/src/lib/libzfs/common/libzfs_mount.c | 50 + usr/src/lib/libzfs/common/libzfs_pool.c | 28 +- usr/src/lib/libzfs/common/libzfs_sendrecv.c | 165 +- usr/src/lib/libzfs/common/libzfs_util.c | 2 + usr/src/lib/libzfs/common/mapfile-vers | 5 + usr/src/lib/libzfs_core/common/libzfs_core.c | 121 +- usr/src/lib/libzfs_core/common/libzfs_core.h | 18 +- usr/src/lib/libzfs_core/common/mapfile-vers | 3 + usr/src/lib/libzpool/Makefile.com | 6 + usr/src/lib/libzpool/common/kernel.c | 89 + usr/src/man/man1m/zfs.1m | 284 ++- usr/src/man/man1m/zpool.1m | 39 +- usr/src/man/man5/zpool-features.5 | 22 + usr/src/pkg/manifests/system-test-zfstest.mf | 117 + usr/src/test/zfs-tests/runfiles/delphix.run | 1 + usr/src/test/zfs-tests/runfiles/omnios.run | 18 +- .../cli_root/zfs_change-key/Makefile | 21 + .../cli_root/zfs_change-key/cleanup.ksh | 30 + .../cli_root/zfs_change-key/setup.ksh | 32 + .../zfs_change-key/zfs_change-key.ksh | 62 + .../zfs_change-key/zfs_change-key_format.ksh | 84 + .../zfs_change-key/zfs_change-key_inherit.ksh | 91 + .../zfs_change-key/zfs_change-key_load.ksh | 58 + .../zfs_change-key_location.ksh | 65 + .../zfs_change-key_pbkdf2iters.ksh | 75 + .../zfs_clone/zfs_clone_encrypted.ksh | 80 + .../zfs_create/zfs_create_014_pos.ksh | 121 + .../zfs_create/zfs_create_crypt_combos.ksh | 98 + .../zfs_create/zfs_create_encrypted.ksh | 134 + .../functional/cli_root/zfs_load-key/Makefile | 21 + .../cli_root/zfs_load-key/cleanup.ksh | 30 + .../cli_root/zfs_load-key/setup.ksh | 32 + .../cli_root/zfs_load-key/zfs_load-key.cfg | 26 + .../cli_root/zfs_load-key/zfs_load-key.ksh | 85 + .../zfs_load-key/zfs_load-key_all.ksh | 77 + .../zfs_load-key/zfs_load-key_common.kshlib | 61 + .../zfs_load-key/zfs_load-key_file.ksh | 58 + .../zfs_load-key/zfs_load-key_location.ksh | 73 + .../zfs_load-key/zfs_load-key_noop.ksh | 54 + .../zfs_load-key/zfs_load-key_recursive.ksh | 66 + .../zfs_mount/zfs_mount_encrypted.ksh | 70 + .../zfs_receive/zfs_receive_encrypted_neg.ksh | 73 + .../zfs_receive/zfs_receive_encrypted_pos.ksh | 84 + .../zfs_rename/zfs_rename_encrypted_child.ksh | 69 + .../zfs_rename/zfs_rename_to_encrypted.ksh | 51 + .../zfs_send/zfs_send_encrypted_neg.ksh | 70 + .../zfs_send/zfs_send_encrypted_pos.ksh | 66 + .../cli_root/zfs_set/zfs_set_keylocation.ksh | 93 + .../cli_root/zfs_unload-key/Makefile | 21 + .../cli_root/zfs_unload-key/cleanup.ksh | 30 + .../cli_root/zfs_unload-key/setup.ksh | 32 + .../zfs_unload-key/zfs_unload-key.ksh | 68 + .../zfs_unload-key/zfs_unload-key_all.ksh | 76 + .../zfs_unload-key_recursive.ksh | 72 + .../zpool_create/zpool_create_024_pos.ksh | 111 + .../zpool_create_crypt_combos.ksh | 89 + .../zpool_create/zpool_create_encrypted.ksh | 89 + .../cli_root/zpool_get/zpool_get.cfg | 2 +- .../zpool_import/zpool_import_encrypted.ksh | 59 + .../zpool_import_encrypted_load.ksh | 59 + usr/src/uts/common/Makefile.files | 2 + usr/src/uts/common/fs/zfs/arc.c | 1458 +++++++++-- usr/src/uts/common/fs/zfs/dbuf.c | 134 +- usr/src/uts/common/fs/zfs/ddt.c | 21 +- usr/src/uts/common/fs/zfs/dmu.c | 291 ++- usr/src/uts/common/fs/zfs/dmu_objset.c | 162 +- usr/src/uts/common/fs/zfs/dmu_send.c | 714 ++++-- usr/src/uts/common/fs/zfs/dmu_traverse.c | 27 +- usr/src/uts/common/fs/zfs/dnode.c | 10 +- usr/src/uts/common/fs/zfs/dnode_sync.c | 8 +- usr/src/uts/common/fs/zfs/dsl_crypt.c | 2266 +++++++++++++++++ usr/src/uts/common/fs/zfs/dsl_dataset.c | 99 +- usr/src/uts/common/fs/zfs/dsl_destroy.c | 14 +- usr/src/uts/common/fs/zfs/dsl_dir.c | 32 +- usr/src/uts/common/fs/zfs/dsl_pool.c | 19 +- usr/src/uts/common/fs/zfs/dsl_scan.c | 19 +- usr/src/uts/common/fs/zfs/spa.c | 93 +- usr/src/uts/common/fs/zfs/spa_history.c | 7 +- usr/src/uts/common/fs/zfs/sys/arc.h | 61 +- usr/src/uts/common/fs/zfs/sys/dbuf.h | 1 + usr/src/uts/common/fs/zfs/sys/ddt.h | 12 +- usr/src/uts/common/fs/zfs/sys/dmu.h | 73 +- usr/src/uts/common/fs/zfs/sys/dmu_objset.h | 12 +- usr/src/uts/common/fs/zfs/sys/dmu_send.h | 8 +- usr/src/uts/common/fs/zfs/sys/dmu_traverse.h | 9 + usr/src/uts/common/fs/zfs/sys/dnode.h | 24 +- usr/src/uts/common/fs/zfs/sys/dsl_crypt.h | 191 ++ usr/src/uts/common/fs/zfs/sys/dsl_dataset.h | 27 +- usr/src/uts/common/fs/zfs/sys/dsl_deleg.h | 2 + usr/src/uts/common/fs/zfs/sys/dsl_dir.h | 3 + usr/src/uts/common/fs/zfs/sys/dsl_pool.h | 4 +- usr/src/uts/common/fs/zfs/sys/spa.h | 126 +- usr/src/uts/common/fs/zfs/sys/spa_impl.h | 2 + usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h | 55 +- usr/src/uts/common/fs/zfs/sys/zil.h | 4 +- usr/src/uts/common/fs/zfs/sys/zio.h | 48 +- usr/src/uts/common/fs/zfs/sys/zio_checksum.h | 2 +- usr/src/uts/common/fs/zfs/sys/zio_crypt.h | 163 ++ usr/src/uts/common/fs/zfs/sys/zio_impl.h | 51 +- usr/src/uts/common/fs/zfs/zfeature.c | 4 +- usr/src/uts/common/fs/zfs/zfs_ioctl.c | 264 +- usr/src/uts/common/fs/zfs/zfs_vfsops.c | 9 +- usr/src/uts/common/fs/zfs/zil.c | 69 +- usr/src/uts/common/fs/zfs/zio.c | 224 +- usr/src/uts/common/fs/zfs/zio_checksum.c | 72 +- usr/src/uts/common/fs/zfs/zio_crypt.c | 1481 +++++++++++ usr/src/uts/common/fs/zfs/zvol.c | 24 +- usr/src/uts/common/sys/fs/zfs.h | 42 + usr/src/uts/common/sys/mount.h | 6 + 135 files changed, 13710 insertions(+), 976 deletions(-) create mode 100644 usr/src/lib/libzfs/common/libzfs_crypto.c create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_neg.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_pos.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_neg.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_pos.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh create mode 100644 usr/src/uts/common/fs/zfs/dsl_crypt.c create mode 100644 usr/src/uts/common/fs/zfs/sys/dsl_crypt.h create mode 100644 usr/src/uts/common/fs/zfs/sys/zio_crypt.h create mode 100644 usr/src/uts/common/fs/zfs/zio_crypt.c diff --git a/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile index 8947c877c511..62c467c646a7 100644 --- a/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile @@ -39,7 +39,8 @@ MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ - -I../../../../../uts/common/fs/zfs + -I../../../../../uts/common/fs/zfs \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/amd64/zfs/Makefile b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile index bae4d7f502fe..5290f97b7264 100644 --- a/usr/src/cmd/mdb/intel/amd64/zfs/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile @@ -35,7 +35,8 @@ include ../../Makefile.amd64 include ../../../Makefile.module include ../../../common/modules/zfs/Makefile.zfs -CPPFLAGS += -I../../../../../uts/common/fs/zfs +CPPFLAGS += -I../../../../../uts/common/fs/zfs \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile index bd86114ad721..d37b96b0d4d5 100644 --- a/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile @@ -38,7 +38,8 @@ MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ - -I../../../../../uts/common/fs/zfs + -I../../../../../uts/common/fs/zfs \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/ia32/zfs/Makefile b/usr/src/cmd/mdb/intel/ia32/zfs/Makefile index 17ab63f959c1..2852860e58fb 100644 --- a/usr/src/cmd/mdb/intel/ia32/zfs/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/zfs/Makefile @@ -34,7 +34,8 @@ include ../../Makefile.ia32 include ../../../Makefile.module include ../../../common/modules/zfs/Makefile.zfs -CPPFLAGS += -I../../../../../uts/common/fs/zfs +CPPFLAGS += -I../../../../../uts/common/fs/zfs \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile index 8d90aaca0ff7..36ee6528e520 100644 --- a/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile +++ b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile @@ -38,7 +38,8 @@ MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ - -I../../../../../uts/common/fs/zfs + -I../../../../../uts/common/fs/zfs \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile index ab253f6363e9..ce4e8d0bfc20 100644 --- a/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile @@ -39,7 +39,8 @@ MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ - -I../../../../../uts/common/fs/zfs + -I../../../../../uts/common/fs/zfs \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/zdb/Makefile.com b/usr/src/cmd/zdb/Makefile.com index ae18f2872d44..8a4a76b27751 100644 --- a/usr/src/cmd/zdb/Makefile.com +++ b/usr/src/cmd/zdb/Makefile.com @@ -57,6 +57,9 @@ LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 LINTFLAGS += -erroff=E_STATIC_UNUSED LINTFLAGS64 += -erroff=E_STATIC_UNUSED +LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTFLAGS64 += -erroff=E_BAD_PTR_CAST_ALIGN + .KEEP_STATE: all: $(PROG) diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 0137e6f4481e..6c9d1e801a0a 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #undef verify #include @@ -1598,7 +1599,7 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) uint64_t version = 0; VERIFY3P(sa_os, ==, NULL); - err = dmu_objset_own(path, type, B_TRUE, tag, osp); + err = dmu_objset_own(path, type, B_TRUE, B_FALSE, tag, osp); if (err != 0) { (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path, strerror(err)); @@ -1617,7 +1618,7 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) if (err != 0) { (void) fprintf(stderr, "sa_setup failed: %s\n", strerror(err)); - dmu_objset_disown(*osp, tag); + dmu_objset_disown(*osp, B_FALSE, tag); *osp = NULL; } } @@ -1632,7 +1633,7 @@ close_objset(objset_t *os, void *tag) VERIFY3P(os, ==, sa_os); if (os->os_sa != NULL) sa_tear_down(os); - dmu_objset_disown(os, tag); + dmu_objset_disown(os, B_FALSE, tag); sa_attr_table = NULL; sa_os = NULL; } @@ -1785,6 +1786,7 @@ dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) { } + static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { dump_none, /* unallocated */ dump_zap, /* object directory */ @@ -1849,6 +1851,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) dmu_buf_t *db = NULL; dmu_object_info_t doi; dnode_t *dn; + boolean_t dnode_held = B_FALSE; void *bonus = NULL; size_t bsize = 0; char iblk[32], dblk[32], lsize[32], asize[32], fill[32]; @@ -1865,16 +1868,33 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) if (object == 0) { dn = DMU_META_DNODE(os); + dmu_object_info_from_dnode(dn, &doi); } else { - error = dmu_bonus_hold(os, object, FTAG, &db); + /* + * Encrypted datasets will have sensitive bonus buffers + * encrypted. Therefore we cannot hold the bonus buffer and + * must hold the dnode itself instead. + */ + error = dmu_object_info(os, object, &doi); if (error) - fatal("dmu_bonus_hold(%llu) failed, errno %u", - object, error); - bonus = db->db_data; - bsize = db->db_size; - dn = DB_DNODE((dmu_buf_impl_t *)db); + fatal("dmu_object_info() failed, errno %u", error); + + if (os->os_encrypted && + DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) { + error = dnode_hold(os, object, FTAG, &dn); + if (error) + fatal("dnode_hold() failed, errno %u", error); + dnode_held = B_TRUE; + } else { + error = dmu_bonus_hold(os, object, FTAG, &db); + if (error) + fatal("dmu_bonus_hold(%llu) failed, errno %u", + object, error); + bonus = db->db_data; + bsize = db->db_size; + dn = DB_DNODE((dmu_buf_impl_t *)db); + } } - dmu_object_info_from_dnode(dn, &doi); zdb_nicenum(doi.doi_metadata_block_size, iblk); zdb_nicenum(doi.doi_data_block_size, dblk); @@ -1918,9 +1938,20 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) (void) printf("\tdnode maxblkid: %llu\n", (longlong_t)dn->dn_phys->dn_maxblkid); - object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object, - bonus, bsize); - object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0); + if (!dnode_held) { + object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, + object, bonus, bsize); + } else { + (void) printf("\t\t(bonus encrypted)\n"); + } + + if (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type)) { + object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, + NULL, 0); + } else { + (void) printf("\t\t(object encrypted)\n"); + } + *print_header = 1; } @@ -1962,6 +1993,8 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) if (db != NULL) dmu_buf_rele(db, FTAG); + if (dnode_held) + dnode_rele(dn, FTAG); } static char *objset_types[DMU_OST_NUMTYPES] = { @@ -2265,7 +2298,7 @@ dump_path(char *ds, char *path) if (err != 0) { (void) fprintf(stderr, "can't lookup root znode: %s\n", strerror(err)); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (EINVAL); } @@ -2376,9 +2409,11 @@ dump_one_dir(const char *dsname, void *arg) int error; objset_t *os; - error = open_objset(dsname, DMU_OST_ANY, FTAG, &os); - if (error != 0) + error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, B_FALSE, FTAG, &os); + if (error != 0) { + (void) printf("Could not open %s, error %d\n", dsname, error); return (0); + } for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (!dmu_objset_ds(os)->ds_feature_inuse[f]) @@ -2833,7 +2868,8 @@ dump_block_stats(spa_t *spa) zdb_cb_t zcb = { 0 }; zdb_blkstats_t *zb, *tzb; uint64_t norm_alloc, norm_space, total_alloc, total_found; - int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; + int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT | TRAVERSE_HARD; boolean_t leaks = B_FALSE; (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", @@ -3132,8 +3168,8 @@ dump_simulated_ddt(spa_t *spa) spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, - zdb_ddt_add_cb, &t); + (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t); spa_config_exit(spa, SCL_CONFIG, FTAG); @@ -3960,7 +3996,8 @@ main(int argc, char **argv) } } } else { - error = open_objset(target, DMU_OST_ANY, FTAG, &os); + error = dmu_objset_own(target, DMU_OST_ANY, + B_TRUE, B_FALSE, FTAG, &os); } } nvlist_free(policy); diff --git a/usr/src/cmd/zdb/zdb_il.c b/usr/src/cmd/zdb/zdb_il.c index bc02b1b6709f..242064c3ac3f 100644 --- a/usr/src/cmd/zdb/zdb_il.c +++ b/usr/src/cmd/zdb/zdb_il.c @@ -306,8 +306,13 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg) (u_longlong_t)lr->lrc_txg, (u_longlong_t)lr->lrc_seq); - if (txtype && verbose >= 3) - zil_rec_info[txtype].zri_print(zilog, txtype, lr); + if (txtype && verbose >= 3) { + if (!zilog->zl_os->os_encrypted) { + zil_rec_info[txtype].zri_print(zilog, txtype, lr); + } else { + (void) printf("%s(encrypted)\n", prefix); + } + } zil_rec_info[txtype].zri_count++; zil_rec_info[0].zri_count++; @@ -394,7 +399,7 @@ dump_intent_log(zilog_t *zilog) if (verbose >= 2) { (void) printf("\n"); (void) zil_parse(zilog, print_log_block, print_log_record, NULL, - zh->zh_claim_txg); + zh->zh_claim_txg, B_FALSE); print_log_stats(verbose); } } diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c index e34a40a474c5..4b0d874933d3 100644 --- a/usr/src/cmd/zfs/zfs_main.c +++ b/usr/src/cmd/zfs/zfs_main.c @@ -106,6 +106,9 @@ static int zfs_do_holds(int argc, char **argv); static int zfs_do_release(int argc, char **argv); static int zfs_do_diff(int argc, char **argv); static int zfs_do_bookmark(int argc, char **argv); +static int zfs_do_load_key(int argc, char **argv); +static int zfs_do_unload_key(int argc, char **argv); +static int zfs_do_change_key(int argc, char **argv); /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. @@ -153,6 +156,9 @@ typedef enum { HELP_RELEASE, HELP_DIFF, HELP_BOOKMARK, + HELP_LOAD_KEY, + HELP_UNLOAD_KEY, + HELP_CHANGE_KEY, } zfs_help_t; typedef struct zfs_command { @@ -206,6 +212,9 @@ static zfs_command_t command_table[] = { { "holds", zfs_do_holds, HELP_HOLDS }, { "release", zfs_do_release, HELP_RELEASE }, { "diff", zfs_do_diff, HELP_DIFF }, + { "load-key", zfs_do_load_key, HELP_LOAD_KEY }, + { "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY }, + { "change-key", zfs_do_change_key, HELP_CHANGE_KEY }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -247,7 +256,7 @@ get_usage(zfs_help_t idx) "[filesystem|volume|snapshot] ...\n")); case HELP_MOUNT: return (gettext("\tmount\n" - "\tmount [-vO] [-o opts] <-a | filesystem>\n")); + "\tmount [-lvO] [-o opts] <-a | filesystem>\n")); case HELP_PROMOTE: return (gettext("\tpromote \n")); case HELP_RECEIVE: @@ -264,16 +273,16 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] \n")); case HELP_SEND: - return (gettext("\tsend [-DnPpRvLec] [-[iI] snapshot] " + return (gettext("\tsend [-DnPpRvLecr] [-[iI] snapshot] " "\n" - "\tsend [-Le] [-i snapshot|bookmark] " + "\tsend [-Lecr] [-i snapshot|bookmark] " "\n" "\tsend [-nvPe] -t \n")); case HELP_SET: return (gettext("\tset ... " " ...\n")); case HELP_SHARE: - return (gettext("\tshare <-a | filesystem>\n")); + return (gettext("\tshare [-l] <-a | filesystem>\n")); case HELP_SNAPSHOT: return (gettext("\tsnapshot [-r] [-o property=value] ... " "@ ...\n")); @@ -324,6 +333,17 @@ get_usage(zfs_help_t idx) "[snapshot|filesystem]\n")); case HELP_BOOKMARK: return (gettext("\tbookmark \n")); + case HELP_LOAD_KEY: + return (gettext("\tload-key [-rn] [-L ] " + "<-a | filesystem|volume>\n")); + case HELP_UNLOAD_KEY: + return (gettext("\tunload-key [-r] " + "<-a | filesystem|volume>\n")); + case HELP_CHANGE_KEY: + return (gettext("\tchange-key [-l] [-o keyformat=]" + "\t [-o keylocation=] [-o pbkfd2iters=]" + "\t \n" + "\tchange-key -i [-l] \n")); } abort(); @@ -845,7 +865,7 @@ zfs_do_create(int argc, char **argv) (void) snprintf(msg, sizeof (msg), gettext("cannot create '%s'"), argv[0]); if (props && (real_props = zfs_valid_proplist(g_zfs, type, - props, 0, NULL, zpool_handle, msg)) == NULL) { + props, 0, NULL, zpool_handle, B_TRUE, msg)) == NULL) { zpool_close(zpool_handle); goto error; } @@ -3721,7 +3741,7 @@ zfs_do_send(int argc, char **argv) }; /* check options */ - while ((c = getopt_long(argc, argv, ":i:I:RbDpvnPLet:c", long_options, + while ((c = getopt_long(argc, argv, ":i:I:RbDpvnPLet:cr", long_options, NULL)) != -1) { switch (c) { case 'i': @@ -3769,6 +3789,9 @@ zfs_do_send(int argc, char **argv) case 'c': flags.compress = B_TRUE; break; + case 'r': + flags.raw = B_TRUE; + break; case ':': /* * If a parameter was not passed, optopt contains the @@ -3837,6 +3860,12 @@ zfs_do_send(int argc, char **argv) } } + if (flags.raw && flags.compress) { + (void) fprintf(stderr, + gettext("raw and compress flags are mutually exclusive\n")); + return (1); + } + if (!flags.dryrun && isatty(STDOUT_FILENO)) { (void) fprintf(stderr, gettext("Error: Stream can not be written to a terminal.\n" @@ -3876,6 +3905,8 @@ zfs_do_send(int argc, char **argv) lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; if (flags.compress) lzc_flags |= LZC_SEND_FLAG_COMPRESS; + if (flags.raw) + lzc_flags |= LZC_SEND_FLAG_RAW; if (fromname != NULL && (fromname[0] == '#' || fromname[0] == '@')) { @@ -4111,6 +4142,8 @@ zfs_do_receive(int argc, char **argv) #define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_BOOKMARK "bookmark" +#define ZFS_DELEG_PERM_LOAD_KEY "load-key" +#define ZFS_DELEG_PERM_CHANGE_KEY "change-key" #define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE @@ -4131,6 +4164,8 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, { ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK }, + { ZFS_DELEG_PERM_LOAD_KEY, ZFS_DELEG_NOTE_LOAD_KEY }, + { ZFS_DELEG_PERM_CHANGE_KEY, ZFS_DELEG_NOTE_CHANGE_KEY }, { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, @@ -4698,6 +4733,12 @@ deleg_perm_comment(zfs_deleg_note_t note) case ZFS_DELEG_NOTE_SNAPSHOT: str = gettext(""); break; + case ZFS_DELEG_NOTE_LOAD_KEY: + str = gettext("Allows loading or unloading an encryption key"); + break; + case ZFS_DELEG_NOTE_CHANGE_KEY: + str = gettext("Allows changing or adding an encryption key"); + break; /* * case ZFS_DELEG_NOTE_VSCAN: * str = gettext(""); @@ -5961,7 +6002,7 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, } if (!zfs_is_mounted(zhp, NULL) && - zfs_mount(zhp, NULL, 0) != 0) + zfs_mount(zhp, NULL, flags) != 0) return (1); if (protocol == NULL) { @@ -6068,7 +6109,7 @@ share_mount(int op, int argc, char **argv) int flags = 0; /* check options */ - while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a")) + while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:O" : "al")) != -1) { switch (c) { case 'a': @@ -6077,6 +6118,9 @@ share_mount(int op, int argc, char **argv) case 'v': verbose = B_TRUE; break; + case 'l': + flags |= MS_CRYPT; + break; case 'o': if (*optarg == '\0') { (void) fprintf(stderr, gettext("empty mount " @@ -6979,6 +7023,230 @@ zfs_do_bookmark(int argc, char **argv) return (-1); } +typedef struct loadkey_cbdata { + boolean_t cb_loadkey; + boolean_t cb_recursive; + boolean_t cb_noop; + char *cb_keylocation; + uint64_t cb_numfailed; + uint64_t cb_numattempted; +} loadkey_cbdata_t; + +static int +load_key_callback(zfs_handle_t *zhp, void *data) +{ + int ret; + boolean_t is_encroot; + loadkey_cbdata_t *cb = data; + uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* + * If we are doing a recursive load-key, we want to skip loading + * keys for non-encryption roots and datasets whose keys are already + * in the desired end-state. + */ + if (cb->cb_recursive) { + ret = zfs_crypto_is_encryption_root(zhp, &is_encroot); + if (ret != 0) + return (ret); + if (!is_encroot) + return (0); + + if ((cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_AVAILABLE) || + (!cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_UNAVAILABLE)) + return (0); + } + + cb->cb_numattempted++; + + if (cb->cb_loadkey) + ret = zfs_crypto_load_key(zhp, cb->cb_noop, cb->cb_keylocation); + else + ret = zfs_crypto_unload_key(zhp); + + if (ret != 0) { + cb->cb_numfailed++; + return (ret); + } + + return (0); +} + +static int +load_unload_keys(int argc, char **argv, boolean_t loadkey) +{ + int c, ret = 0, flags = 0; + boolean_t do_all = B_FALSE; + loadkey_cbdata_t cb = { 0 }; + + cb.cb_loadkey = loadkey; + + while ((c = getopt(argc, argv, "anrL:")) != -1) { + /* noop and alternate keylocations only apply to zfs load-key */ + if (loadkey) { + switch (c) { + case 'n': + cb.cb_noop = B_TRUE; + continue; + case 'L': + cb.cb_keylocation = optarg; + continue; + default: + break; + } + } + + switch (c) { + case 'a': + do_all = B_TRUE; + cb.cb_recursive = B_TRUE; + break; + case 'r': + flags |= ZFS_ITER_RECURSE; + cb.cb_recursive = B_TRUE; + break; + default: + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (!do_all && argc == 0) { + (void) fprintf(stderr, + gettext("Missing dataset argument or -a option\n")); + usage(B_FALSE); + } + + if (do_all && argc != 0) { + (void) fprintf(stderr, + gettext("Cannot specify dataset with -a option\n")); + usage(B_FALSE); + } + + if (cb.cb_recursive && cb.cb_keylocation != NULL && + strcmp(cb.cb_keylocation, "prompt") != 0) { + (void) fprintf(stderr, gettext("alternate keylocation may only " + "be 'prompt' with -r or -a\n")); + usage(B_FALSE); + } + + ret = zfs_for_each(argc, argv, flags, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL, 0, + load_key_callback, &cb); + + if (cb.cb_noop || (cb.cb_recursive && cb.cb_numattempted != 0)) { + (void) printf(gettext("%llu / %llu key(s) successfully %s\n"), + (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed), + (u_longlong_t)cb.cb_numattempted, + loadkey ? (cb.cb_noop ? "verified" : "loaded") : + "unloaded"); + } + + if (cb.cb_numfailed != 0) + ret = -1; + + return (ret); +} + +static int +zfs_do_load_key(int argc, char **argv) +{ + return (load_unload_keys(argc, argv, B_TRUE)); +} + + +static int +zfs_do_unload_key(int argc, char **argv) +{ + return (load_unload_keys(argc, argv, B_FALSE)); +} + +static int +zfs_do_change_key(int argc, char **argv) +{ + int c, ret; + uint64_t keystatus; + boolean_t loadkey = B_FALSE, inheritkey = B_FALSE; + zfs_handle_t *zhp = NULL; + nvlist_t *props = fnvlist_alloc(); + + while ((c = getopt(argc, argv, "lio:")) != -1) { + switch (c) { + case 'l': + loadkey = B_TRUE; + break; + case 'i': + inheritkey = B_TRUE; + break; + case 'o': + if (parseprop(props, optarg) != 0) { + nvlist_free(props); + return (1); + } + break; + default: + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + usage(B_FALSE); + } + } + + if (inheritkey && !nvlist_empty(props)) { + (void) fprintf(stderr, + gettext("Properties not allowed for inheriting\n")); + usage(B_FALSE); + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("Missing dataset argument\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("Too many arguments\n")); + usage(B_FALSE); + } + + zhp = zfs_open(g_zfs, argv[argc - 1], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + usage(B_FALSE); + + if (loadkey) { + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus != ZFS_KEYSTATUS_AVAILABLE) { + ret = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (ret != 0) + goto error; + } + + /* refresh the properties so the new keystatus is visable */ + zfs_refresh_properties(zhp); + } + + ret = zfs_crypto_rewrap(zhp, props, inheritkey); + if (ret != 0) + goto error; + + nvlist_free(props); + zfs_close(zhp); + return (0); + +error: + if (props != NULL) + nvlist_free(props); + if (zhp != NULL) + zfs_close(zhp); + return (-1); +} + int main(int argc, char **argv) { diff --git a/usr/src/cmd/zinject/translate.c b/usr/src/cmd/zinject/translate.c index 53a38e1ea853..090f2448b06e 100644 --- a/usr/src/cmd/zinject/translate.c +++ b/usr/src/cmd/zinject/translate.c @@ -175,7 +175,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, */ sync(); - err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); + err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, B_FALSE, FTAG, &os); if (err != 0) { (void) fprintf(stderr, "cannot open dataset '%s': %s\n", dataset, strerror(err)); @@ -185,7 +185,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, record->zi_objset = dmu_objset_id(os); record->zi_object = statbuf->st_ino; - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -261,7 +261,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range, * size. */ if ((err = dmu_objset_own(dataset, DMU_OST_ANY, - B_TRUE, FTAG, &os)) != 0) { + B_TRUE, B_FALSE, FTAG, &os)) != 0) { (void) fprintf(stderr, "cannot open dataset '%s': %s\n", dataset, strerror(err)); goto out; @@ -323,7 +323,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range, dnode_rele(dn, FTAG); } if (os) - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (ret); } diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile index 2b01078aec17..23584bbb5bfd 100644 --- a/usr/src/cmd/zoneadm/Makefile +++ b/usr/src/cmd/zoneadm/Makefile @@ -38,6 +38,8 @@ POFILES= $(OBJS:%.o=%.po) LDLIBS += -lzonecfg -lsocket -lgen -lpool -lzfs -luuid -lnvpair -lbrand -ldladm -lsecdb +INCS += -I../../common/zfs + CERRWARN += -_gcc=-Wno-uninitialized .KEEP_STATE: diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index 82b9672a44cf..ceb3cbc95770 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -221,12 +221,13 @@ get_usage(zpool_help_t idx) return (gettext("\thistory [-il] [] ...\n")); case HELP_IMPORT: return (gettext("\timport [-d dir] [-D]\n" - "\timport [-d dir | -c cachefile] [-F [-n]] \n" + "\timport [-d dir | -c cachefile] [-F [-n]] [-l] " + "\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]] -a\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]]\n" "\t [newpool]\n")); case HELP_IOSTAT: @@ -263,7 +264,7 @@ get_usage(zpool_help_t idx) case HELP_SET: return (gettext("\tset \n")); case HELP_SPLIT: - return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n" + return (gettext("\tsplit [-nl] [-R altroot] [-o mntopts]\n" "\t [-o property=value] " "[ ...]\n")); case HELP_REGUID: @@ -1830,6 +1831,7 @@ static int do_import(nvlist_t *config, const char *newname, const char *mntopts, nvlist_t *props, int flags) { + int ret = 0; zpool_handle_t *zhp; char *name; uint64_t state; @@ -1890,6 +1892,16 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) return (1); + /* + * Loading keys is best effort. We don't want to return if it fails + * but we do want to give the error to the caller. + */ + if (flags & ZFS_IMPORT_LOAD_KEYS) { + ret = zfs_crypto_attempt_load_keys(g_zfs, name); + if (ret != 0) + ret = 1; + } + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && !(flags & ZFS_IMPORT_ONLY) && zpool_enable_datasets(zhp, mntopts, 0) != 0) { @@ -1898,14 +1910,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, } zpool_close(zhp); - return (0); + return (ret); } /* * zpool import [-d dir] [-D] - * import [-o mntopts] [-o prop=value] ... [-R root] [-D] + * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] * [-d dir | -c cachefile] [-f] -a - * import [-o mntopts] [-o prop=value] ... [-R root] [-D] + * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] * [-d dir | -c cachefile] [-f] [-n] [-F] [newpool] * * -c Read pool information from a cachefile instead of searching @@ -1940,6 +1952,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, * * -a Import all pools found. * + * -l Load encryption keys while importing. + * * -o Set property=value and/or temporary mount options (without '='). * * The import command scans for pools to import, and import pools based on pool @@ -1976,7 +1990,7 @@ zpool_do_import(int argc, char **argv) char *endptr; /* check options */ - while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:rR:T:VX")) != -1) { + while ((c = getopt(argc, argv, ":aCc:d:DEfFlmnNo:rR:T:VX")) != -1) { switch (c) { case 'a': do_all = B_TRUE; @@ -2006,6 +2020,9 @@ zpool_do_import(int argc, char **argv) case 'F': do_rewind = B_TRUE; break; + case 'l': + flags |= ZFS_IMPORT_LOAD_KEYS; + break; case 'm': flags |= ZFS_IMPORT_MISSING_LOG; break; @@ -2074,6 +2091,17 @@ zpool_do_import(int argc, char **argv) usage(B_FALSE); } + if ((flags & ZFS_IMPORT_LOAD_KEYS) && (flags & ZFS_IMPORT_ONLY)) { + (void) fprintf(stderr, gettext("-l is incompatible with -N\n")); + usage(B_FALSE); + } + + if ((flags & ZFS_IMPORT_LOAD_KEYS) && !do_all && argc == 0) { + (void) fprintf(stderr, gettext("-l is only meaningful during " + "an import\n")); + usage(B_FALSE); + } + if ((dryrun || xtreme_rewind) && !do_rewind) { (void) fprintf(stderr, gettext("-n or -X only meaningful with -F\n")); @@ -3359,6 +3387,7 @@ zpool_do_detach(int argc, char **argv) * it were to be split. * -o Set property=value, or set mount options. * -R Mount the split-off pool under an alternate root. + * -l Load encryption keys while importing. * * Splits the named pool and gives it the new pool name. Devices to be split * off may be listed, provided that no more than one device is specified @@ -3376,6 +3405,7 @@ zpool_do_split(int argc, char **argv) char *mntopts = NULL; splitflags_t flags; int c, ret = 0; + boolean_t loadkeys = B_FALSE; zpool_handle_t *zhp; nvlist_t *config, *props = NULL; @@ -3383,7 +3413,7 @@ zpool_do_split(int argc, char **argv) flags.import = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":R:no:")) != -1) { + while ((c = getopt(argc, argv, ":R:lno:")) != -1) { switch (c) { case 'R': flags.import = B_TRUE; @@ -3394,6 +3424,9 @@ zpool_do_split(int argc, char **argv) usage(B_FALSE); } break; + case 'l': + loadkeys = B_TRUE; + break; case 'n': flags.dryrun = B_TRUE; break; @@ -3429,6 +3462,12 @@ zpool_do_split(int argc, char **argv) usage(B_FALSE); } + if (!flags.import && loadkeys) { + (void) fprintf(stderr, gettext("loading keys is only " + "valid when importing the pool\n")); + usage(B_FALSE); + } + argc -= optind; argv += optind; @@ -3473,6 +3512,13 @@ zpool_do_split(int argc, char **argv) */ if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL) return (1); + + if (loadkeys) { + ret = zfs_crypto_attempt_load_keys(g_zfs, newpool); + if (ret != 0) + ret = 1; + } + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && zpool_enable_datasets(zhp, mntopts, 0) != 0) { ret = 1; diff --git a/usr/src/cmd/zstreamdump/zstreamdump.c b/usr/src/cmd/zstreamdump/zstreamdump.c index 17adbecd7953..566e6141704a 100644 --- a/usr/src/cmd/zstreamdump/zstreamdump.c +++ b/usr/src/cmd/zstreamdump/zstreamdump.c @@ -202,6 +202,7 @@ main(int argc, char *argv[]) char *buf = safe_malloc(SPA_MAXBLOCKSIZE); uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; uint64_t total_records = 0; + uint64_t payload_size; dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; @@ -213,6 +214,7 @@ main(int argc, char *argv[]) struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; + struct drr_object_range *drror = &thedrr.drr_u.drr_object_range; struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; char c; boolean_t verbose = B_FALSE; @@ -412,24 +414,29 @@ main(int argc, char *argv[]) drro->drr_blksz = BSWAP_32(drro->drr_blksz); drro->drr_bonuslen = BSWAP_32(drro->drr_bonuslen); + drro->drr_raw_bonuslen = + BSWAP_32(drro->drr_raw_bonuslen); drro->drr_toguid = BSWAP_64(drro->drr_toguid); } + + payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); + if (verbose) { (void) printf("OBJECT object = %llu type = %u " - "bonustype = %u blksz = %u bonuslen = %u\n", + "bonustype = %u blksz = %u bonuslen = %u " + "raw_bonuslen = %u flags = %u\n", (u_longlong_t)drro->drr_object, drro->drr_type, drro->drr_bonustype, drro->drr_blksz, - drro->drr_bonuslen); + drro->drr_bonuslen, + drro->drr_raw_bonuslen, + drro->drr_flags); } if (drro->drr_bonuslen > 0) { - (void) ssread(buf, - P2ROUNDUP(drro->drr_bonuslen, 8), &zc); - if (dump) { - print_block(buf, - P2ROUNDUP(drro->drr_bonuslen, 8)); - } + (void) ssread(buf, payload_size, &zc); + if (dump) + print_block(buf, payload_size); } break; @@ -463,7 +470,7 @@ main(int argc, char *argv[]) BSWAP_64(drrw->drr_compressed_size); } - uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); /* * If this is verbose and/or dump output, @@ -472,7 +479,8 @@ main(int argc, char *argv[]) if (verbose) { (void) printf("WRITE object = %llu type = %u " "checksum type = %u compression type = %u\n" - " offset = %llu logical_size = %llu " + " flags = %u offset = %llu " + "logical_size = %llu " "compressed_size = %llu " "payload_size = %llu " "props = %llx\n", @@ -480,6 +488,7 @@ main(int argc, char *argv[]) drrw->drr_type, drrw->drr_checksumtype, drrw->drr_compressiontype, + drrw->drr_flags, (u_longlong_t)drrw->drr_offset, (u_longlong_t)drrw->drr_logical_size, (u_longlong_t)drrw->drr_compressed_size, @@ -555,6 +564,7 @@ main(int argc, char *argv[]) if (do_byteswap) { drrs->drr_object = BSWAP_64(drrs->drr_object); drrs->drr_length = BSWAP_64(drrs->drr_length); + drrs->drr_type = BSWAP_32(drrs->drr_type); } if (verbose) { (void) printf("SPILL block for object = %llu " @@ -598,6 +608,22 @@ main(int argc, char *argv[]) (void) ssread(buf, P2ROUNDUP(drrwe->drr_psize, 8), &zc); break; + case DRR_OBJECT_RANGE: + if (do_byteswap) { + drror->drr_firstobj = + BSWAP_64(drror->drr_firstobj); + drror->drr_numslots = + BSWAP_64(drror->drr_numslots); + drror->drr_toguid = BSWAP_64(drror->drr_toguid); + } + if (verbose) { + (void) printf("OBJECT_RANGE firstobj = %llu " + "numslots = %llu flags = %u\n", + (u_longlong_t)drror->drr_firstobj, + (u_longlong_t)drror->drr_numslots, + drror->drr_flags); + } + break; } if (drr->drr_type != DRR_BEGIN && very_verbose) { (void) printf(" checksum = %llx/%llx/%llx/%llx\n", diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index 16f79b52efc3..e57a0d80741b 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -2361,7 +2361,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_file", nvroot, NULL, NULL)); + spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); /* @@ -2369,7 +2369,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); + spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); /* @@ -2378,7 +2378,8 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ (void) rw_rdlock(&ztest_name_lock); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); - VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); + VERIFY3U(EEXIST, ==, + spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); @@ -2436,7 +2437,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) props = fnvlist_alloc(); fnvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), version); - VERIFY0(spa_create(name, nvroot, props, NULL)); + VERIFY0(spa_create(name, nvroot, props, NULL, NULL)); fnvlist_free(nvroot); fnvlist_free(props); @@ -3187,7 +3188,7 @@ static int ztest_dataset_create(char *dsname) { uint64_t zilset = ztest_random(100); - int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, + int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, NULL, ztest_objset_create_cb, NULL); if (err || zilset < 80) @@ -3210,7 +3211,7 @@ ztest_objset_destroy_cb(const char *name, void *arg) /* * Verify that the dataset contains a directory object. */ - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE, FTAG, &os)); error = dmu_object_info(os, ZTEST_DIROBJ, &doi); if (error != ENOENT) { /* We could have crashed in the middle of destroying it */ @@ -3218,7 +3219,7 @@ ztest_objset_destroy_cb(const char *name, void *arg) ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); ASSERT3S(doi.doi_physical_blocks_512, >=, 0); } - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); /* * Destroy the dataset. @@ -3291,11 +3292,12 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) * (invoked from ztest_objset_destroy_cb()) should just throw it away. */ if (ztest_random(2) == 0 && - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { + dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, + B_TRUE, FTAG, &os) == 0) { ztest_zd_init(&zdtmp, NULL, os); zil_replay(os, &zdtmp, ztest_replay_vector); ztest_zd_fini(&zdtmp); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); } /* @@ -3309,7 +3311,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Verify that the destroyed dataset is no longer in the namespace. */ - VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, + VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE, FTAG, &os)); /* @@ -3325,7 +3327,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", name, error); } - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, + FTAG, &os)); ztest_zd_init(&zdtmp, NULL, os); @@ -3349,7 +3352,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) * Verify that we cannot create an existing dataset. */ VERIFY3U(EEXIST, ==, - dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); + dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL)); /* * Verify that we can hold an objset that is also owned. @@ -3361,10 +3364,10 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) * Verify that we cannot own an objset that is already owned. */ VERIFY3U(EBUSY, ==, - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); + dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, FTAG, &os2)); zil_close(zilog); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); ztest_zd_fini(&zdtmp); (void) rw_unlock(&ztest_name_lock); @@ -3462,7 +3465,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); } - error = dmu_objset_clone(clone1name, snap1name); + error = dmu_objset_clone(clone1name, snap1name, NULL); if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3489,7 +3492,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); } - error = dmu_objset_clone(clone2name, snap3name); + error = dmu_objset_clone(clone2name, snap3name, NULL); if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3498,19 +3501,20 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); } - error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); + error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, B_TRUE, + FTAG, &os); if (error) fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); error = dsl_dataset_promote(clone2name, NULL); if (error == ENOSPC) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); ztest_record_enospc(FTAG); goto out; } if (error != EBUSY) fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, error); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); out: ztest_dsl_dataset_cleanup(osname, id); @@ -4714,7 +4718,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); } - error = dmu_objset_clone(clonename, fullname); + error = dmu_objset_clone(clonename, fullname, NULL); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_clone"); @@ -5604,7 +5608,7 @@ ztest_dataset_open(int d) } ASSERT(error == 0 || error == EEXIST); - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, zd, &os)); (void) rw_unlock(&ztest_name_lock); ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); @@ -5645,7 +5649,7 @@ ztest_dataset_close(int d) ztest_ds_t *zd = &ztest_ds[d]; zil_close(zd->zd_zilog); - dmu_objset_disown(zd->zd_os, zd); + dmu_objset_disown(zd->zd_os, B_TRUE, zd); ztest_zd_fini(zd); } @@ -5696,12 +5700,12 @@ ztest_run(ztest_shared_t *zs) dmu_objset_stats_t dds; VERIFY0(dmu_objset_own(ztest_opts.zo_pool, - DMU_OST_ANY, B_TRUE, FTAG, &os)); + DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os)); dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); zs->zs_guid = dds.dds_guid; - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; @@ -5995,7 +5999,8 @@ ztest_init(ztest_shared_t *zs) spa_feature_table[i].fi_uname); VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); } - VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); + VERIFY3U(0, ==, + spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL)); nvlist_free(nvroot); nvlist_free(props); diff --git a/usr/src/common/zfs/zfeature_common.c b/usr/src/common/zfs/zfeature_common.c index 2ca2120f4b38..c8adf5077f4e 100644 --- a/usr/src/common/zfs/zfeature_common.c +++ b/usr/src/common/zfs/zfeature_common.c @@ -275,4 +275,14 @@ zpool_feature_init(void) "org.illumos:edonr", "edonr", "Edon-R hash algorithm.", ZFEATURE_FLAG_PER_DATASET, edonr_deps); + + static const spa_feature_t encryption_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_ENCRYPTION, + "com.datto:encryption", "encryption", + "Support for dataset level encryption", + ZFEATURE_FLAG_PER_DATASET, encryption_deps); + } diff --git a/usr/src/common/zfs/zfeature_common.h b/usr/src/common/zfs/zfeature_common.h index 528ff42bb051..9fe415b47561 100644 --- a/usr/src/common/zfs/zfeature_common.h +++ b/usr/src/common/zfs/zfeature_common.h @@ -56,6 +56,7 @@ typedef enum spa_feature { SPA_FEATURE_SHA512, SPA_FEATURE_SKEIN, SPA_FEATURE_EDONR, + SPA_FEATURE_ENCRYPTION, SPA_FEATURES } spa_feature_t; diff --git a/usr/src/common/zfs/zfs_deleg.c b/usr/src/common/zfs/zfs_deleg.c index b66fac804fb3..c5fab0e413bb 100644 --- a/usr/src/common/zfs/zfs_deleg.c +++ b/usr/src/common/zfs/zfs_deleg.c @@ -65,6 +65,8 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { {ZFS_DELEG_PERM_GROUPUSED}, {ZFS_DELEG_PERM_HOLD}, {ZFS_DELEG_PERM_RELEASE}, + {ZFS_DELEG_PERM_LOAD_KEY}, + {ZFS_DELEG_PERM_CHANGE_KEY}, {NULL} }; diff --git a/usr/src/common/zfs/zfs_deleg.h b/usr/src/common/zfs/zfs_deleg.h index 16133c59f33f..3d9ec0221387 100644 --- a/usr/src/common/zfs/zfs_deleg.h +++ b/usr/src/common/zfs/zfs_deleg.h @@ -67,6 +67,8 @@ typedef enum { ZFS_DELEG_NOTE_RELEASE, ZFS_DELEG_NOTE_DIFF, ZFS_DELEG_NOTE_BOOKMARK, + ZFS_DELEG_NOTE_LOAD_KEY, + ZFS_DELEG_NOTE_CHANGE_KEY, ZFS_DELEG_NOTE_NONE } zfs_deleg_note_t; diff --git a/usr/src/common/zfs/zfs_prop.c b/usr/src/common/zfs/zfs_prop.c index be4d15a88a7c..45f823a95648 100644 --- a/usr/src/common/zfs/zfs_prop.c +++ b/usr/src/common/zfs/zfs_prop.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "zfs_prop.h" #include "zfs_deleg.h" @@ -115,6 +116,26 @@ zfs_prop_init(void) { NULL } }; + static zprop_index_t crypto_table[] = { + { "on", ZIO_CRYPT_ON }, + { "off", ZIO_CRYPT_OFF }, + { "aes-128-ccm", ZIO_CRYPT_AES_128_CCM }, + { "aes-192-ccm", ZIO_CRYPT_AES_192_CCM }, + { "aes-256-ccm", ZIO_CRYPT_AES_256_CCM }, + { "aes-128-gcm", ZIO_CRYPT_AES_128_GCM }, + { "aes-192-gcm", ZIO_CRYPT_AES_192_GCM }, + { "aes-256-gcm", ZIO_CRYPT_AES_256_GCM }, + { NULL } + }; + + static zprop_index_t keyformat_table[] = { + { "none", ZFS_KEYFORMAT_NONE }, + { "raw", ZFS_KEYFORMAT_RAW }, + { "hex", ZFS_KEYFORMAT_HEX }, + { "passphrase", ZFS_KEYFORMAT_PASSPHRASE }, + { NULL } + }; + static zprop_index_t snapdir_table[] = { { "hidden", ZFS_SNAPDIR_HIDDEN }, { "visible", ZFS_SNAPDIR_VISIBLE }, @@ -183,6 +204,13 @@ zfs_prop_init(void) { NULL } }; + static zprop_index_t keystatus_table[] = { + { "none", ZFS_KEYSTATUS_NONE }, + { "unavailable", ZFS_KEYSTATUS_UNAVAILABLE }, + { "available", ZFS_KEYSTATUS_AVAILABLE }, + { NULL } + }; + static zprop_index_t logbias_table[] = { { "latency", ZFS_LOGBIAS_LATENCY }, { "throughput", ZFS_LOGBIAS_THROUGHPUT }, @@ -301,12 +329,16 @@ zfs_prop_init(void) PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", "CANMOUNT", canmount_table); - /* readonly index (boolean) properties */ + /* readonly index properties */ zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", boolean_table); + zprop_register_index(ZFS_PROP_KEYSTATUS, "keystatus", + ZFS_KEYSTATUS_NONE, PROP_READONLY, ZFS_TYPE_DATASET, + "none | unavailable | available", + "KEYSTATUS", keystatus_table); /* set once index properties */ zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, @@ -317,6 +349,14 @@ zfs_prop_init(void) ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "sensitive | insensitive | mixed", "CASE", case_table); + zprop_register_index(ZFS_PROP_KEYFORMAT, "keyformat", + ZFS_KEYFORMAT_NONE, PROP_ONETIME, ZFS_TYPE_DATASET, + "none | raw | hex | passphrase", "KEYFORMAT", keyformat_table); + zprop_register_index(ZFS_PROP_ENCRYPTION, "encryption", + ZIO_CRYPT_DEFAULT, PROP_ONETIME, ZFS_TYPE_DATASET, + "on | off | aes-128-ccm | aes-192-ccm | aes-256-ccm | " + "aes-128-gcm | aes-192-gcm | aes-256-gcm", "ENCRYPTION", + crypto_table); /* set once index (boolean) properties */ zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, @@ -347,6 +387,9 @@ zfs_prop_init(void) "receive_resume_token", NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "RESUMETOK"); + zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation", + "none", PROP_INHERIT, ZFS_TYPE_DATASET, "prompt | ", + "KEYLOCATION"); /* readonly number properties */ zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, @@ -390,6 +433,8 @@ zfs_prop_init(void) zprop_register_number(ZFS_PROP_SNAPSHOT_COUNT, "snapshot_count", UINT64_MAX, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "SSCOUNT"); + zprop_register_number(ZFS_PROP_PBKDF2_ITERS, "pbkdf2iters", + 0, PROP_ONETIME, ZFS_TYPE_DATASET, "", "PBKDF2ITERS"); /* default number properties */ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, @@ -441,6 +486,8 @@ zfs_prop_init(void) PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT"); zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP"); + zprop_register_hidden(ZFS_PROP_PBKDF2_SALT, "pbkdf2salt", + PROP_TYPE_NUMBER, PROP_ONETIME, ZFS_TYPE_DATASET, "SALT"); /* oddball properties */ zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, @@ -625,6 +672,40 @@ zfs_prop_inheritable(zfs_prop_t prop) zfs_prop_table[prop].pd_attr == PROP_ONETIME); } +/* + * Returns TRUE if property is one of the encryption properties that requires + * a loaded encryption key to modify. + */ +boolean_t +zfs_prop_encryption_key_param(zfs_prop_t prop) +{ + /* + * keylocation does not count as an encryption property. It can be + * changed at will without needing the master keys. + */ + return (prop == ZFS_PROP_PBKDF2_SALT || prop == ZFS_PROP_PBKDF2_ITERS || + prop == ZFS_PROP_KEYFORMAT); +} + +/* + * Helper function used by both kernelspace and userspace to check the + * keylocation property. If encrypted is set, the keylocation must be valid + * for an encrypted dataset. + */ +boolean_t +zfs_prop_valid_keylocation(const char *str, boolean_t encrypted) +{ + if (strcmp("none", str) == 0) + return (!encrypted); + else if (strcmp("prompt", str) == 0) + return (B_TRUE); + else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0) + return (B_TRUE); + + return (B_FALSE); +} + + #ifndef _KERNEL /* diff --git a/usr/src/lib/libuutil/common/libuutil.h b/usr/src/lib/libuutil/common/libuutil.h index ec1bf907c1ab..a6e11ff05441 100644 --- a/usr/src/lib/libuutil/common/libuutil.h +++ b/usr/src/lib/libuutil/common/libuutil.h @@ -245,7 +245,7 @@ void uu_list_pool_destroy(uu_list_pool_t *); * usage: * * foo_t *a; - * a = malloc(sizeof(*a)); + * a = malloc(sizeof (*a)); * uu_list_node_init(a, &a->foo_list, pool); * ... * uu_list_node_fini(a, &a->foo_list, pool); @@ -348,7 +348,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *); * usage: * * foo_t *a; - * a = malloc(sizeof(*a)); + * a = malloc(sizeof (*a)); * uu_avl_node_init(a, &a->foo_avl, pool); * ... * uu_avl_node_fini(a, &a->foo_avl, pool); diff --git a/usr/src/lib/libzfs/Makefile.com b/usr/src/lib/libzfs/Makefile.com index 31619b01c12f..caf64b8113aa 100644 --- a/usr/src/lib/libzfs/Makefile.com +++ b/usr/src/lib/libzfs/Makefile.com @@ -40,6 +40,7 @@ OBJS_SHARED= \ OBJS_COMMON= \ libzfs_changelist.o \ libzfs_config.o \ + libzfs_crypto.o \ libzfs_dataset.o \ libzfs_diff.o \ libzfs_fru.o \ @@ -70,7 +71,7 @@ INCS += -I../../libc/inc C99MODE= -xc99=%all C99LMODE= -Xc99=%all LDLIBS += -lc -lm -ldevid -lgen -lnvpair -luutil -lavl -lefi \ - -ladm -lidmap -ltsol -lmd -lumem -lzfs_core + -ladm -lidmap -ltsol -lcryptoutil -lpkcs11 -lmd -lumem -lzfs_core CPPFLAGS += $(INCS) -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG @@ -84,6 +85,12 @@ SRCS= $(OBJS_COMMON:%.o=$(SRCDIR)/%.c) \ $(OBJS_SHARED:%.o=$(SRC)/common/zfs/%.c) $(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC) +# lint complains about unused inline functions, even though +# they are "inline", not "static inline", with "extern inline" +# implementations and usage in libzpool. +LINTFLAGS += -erroff=E_STATIC_UNUSED +LINTFLAGS64 += -erroff=E_STATIC_UNUSED + .KEEP_STATE: all: $(LIBS) diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index 657ab3f2a2a9..021d838bf5cd 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -128,6 +128,7 @@ typedef enum zfs_error { EZFS_DIFF, /* general failure of zfs diff */ EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ + EZFS_CRYPTOFAILED, /* failed to setup encryption */ EZFS_UNKNOWN } zfs_error_t; @@ -423,8 +424,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t); -extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, - nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *); +extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *, + uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *); extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); @@ -454,6 +455,19 @@ extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); +/* + * zfs encryption management + */ +extern int zfs_crypto_is_encryption_root(zfs_handle_t *, boolean_t *); +extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *, + uint8_t **, uint_t *); +extern int zfs_crypto_clone(libzfs_handle_t *, zfs_handle_t *, char *, + nvlist_t *, uint8_t **, uint_t *); +extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *); +extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *); +extern int zfs_crypto_unload_key(zfs_handle_t *); +extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t); + typedef struct zprop_list { int pl_prop; char *pl_user_prop; @@ -603,6 +617,9 @@ typedef struct sendflags { /* compressed WRITE records are permitted */ boolean_t compress; + + /* raw WRITE records are permitted, mutually exclusive with compress */ + boolean_t raw; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); @@ -686,6 +703,7 @@ extern const char *zfs_type_to_name(zfs_type_t); extern void zfs_refresh_properties(zfs_handle_t *); extern int zfs_name_valid(const char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); +extern int zfs_parent_name(zfs_handle_t *, char *, size_t); extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); diff --git a/usr/src/lib/libzfs/common/libzfs_changelist.c b/usr/src/lib/libzfs/common/libzfs_changelist.c index af5cb35f9d92..99d226019f9b 100644 --- a/usr/src/lib/libzfs/common/libzfs_changelist.c +++ b/usr/src/lib/libzfs/common/libzfs_changelist.c @@ -225,6 +225,7 @@ changelist_postfix(prop_changelist_t *clp) boolean_t sharenfs; boolean_t sharesmb; boolean_t mounted; + boolean_t needs_key; /* * If we are in the global zone, but this dataset is exported @@ -253,9 +254,12 @@ changelist_postfix(prop_changelist_t *clp) shareopts, sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0) && (strcmp(shareopts, "off") != 0)); + needs_key = (zfs_prop_get_int(cn->cn_handle, + ZFS_PROP_KEYSTATUS) == ZFS_KEYSTATUS_UNAVAILABLE); + mounted = zfs_is_mounted(cn->cn_handle, NULL); - if (!mounted && (cn->cn_mounted || + if (!mounted && !needs_key && (cn->cn_mounted || ((sharenfs || sharesmb || clp->cl_waslegacy) && (zfs_prop_get_int(cn->cn_handle, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) { diff --git a/usr/src/lib/libzfs/common/libzfs_crypto.c b/usr/src/lib/libzfs/common/libzfs_crypto.c new file mode 100644 index 000000000000..f792117a16e1 --- /dev/null +++ b/usr/src/lib/libzfs/common/libzfs_crypto.c @@ -0,0 +1,1596 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef sun +#include +#include +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include "libzfs_impl.h" +#include "zfeature_common.h" + +/* + * User keys are used to decrypt the master encryption keys of a dataset. This + * indirection allows a user to change his / her access key without having to + * re-encrypt the entire dataset. User keys can be provided in one of several + * ways. Raw keys are simlply given to the kernel as is. Similarly, hex keys + * are converted to binary and passed into the kernel. Password based keys are + * a bit more complicated. Passwords alone do not provide suitable entropy for + * encryption and may be too short or too long to be used. In order to derive + * a more appropriate key we use a PBKDF2 function. This function is designed + * to take a (relatively) long time to calculate in order to discourage + * attackers from guessing from a list of common passwords. PBKDF2 requires + * 2 additional parameters. The first is the number of iterations to run, which + * will ultimately decide how long it takes to derive the resulting key from + * the password. The second parameter is a salt that is randomly generated for + * each datasset. The salt is used to "tweak" PBKDF2 such that a group of + * attackers cannot reasonably generate a table of commonly known passwords to + * their output keys and expect it work for all past and future PBKDF2 users. + * We store the salt as a hidden property of the dataset (although it is + * technically ok if the salt is known to the attacker). + */ + +typedef enum key_locator { + KEY_LOCATOR_NONE, + KEY_LOCATOR_PROMPT, + KEY_LOCATOR_URI +} key_locator_t; + +#define MIN_PASSPHRASE_LEN 8 +#define MAX_PASSPHRASE_LEN 64 +#define MAX_KEY_PROMPT_ATTEMPTS 3 + +static int caught_interrupt; + +static zfs_keylocation_t +zfs_prop_parse_keylocation(const char *str) +{ + if (strcmp("prompt", str) == 0) + return (ZFS_KEYLOCATION_PROMPT); + else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0) + return (ZFS_KEYLOCATION_URI); + + return (ZFS_KEYLOCATION_NONE); +} + +static int +hex_key_to_raw(char *hex, int hexlen, uint8_t *out) +{ + int ret, i; + unsigned int c; + + for (i = 0; i < hexlen; i += 2) { + if (!isxdigit(hex[i]) || !isxdigit(hex[i + 1])) { + ret = EINVAL; + goto error; + } + + ret = sscanf(&hex[i], "%02x", &c); + if (ret != 1) { + ret = EINVAL; + goto error; + } + + out[i / 2] = c; + } + + return (0); + +error: + return (ret); +} + + +static void +catch_signal(int sig) +{ + caught_interrupt = sig; +} + +static char * +get_format_prompt_string(zfs_keyformat_t format) +{ + switch (format) { + case ZFS_KEYFORMAT_RAW: + return ("raw key"); + case ZFS_KEYFORMAT_HEX: + return ("hex key"); + case ZFS_KEYFORMAT_PASSPHRASE: + return ("passphrase"); + default: + /* shouldn't happen */ + return (NULL); + } +} + +static int +get_key_material_raw(FILE *fd, const char *fsname, zfs_keyformat_t keyformat, + boolean_t again, boolean_t newkey, uint8_t **buf, size_t *len_out) +{ + int ret = 0, bytes; + size_t buflen = 0; + struct termios old_term, new_term; + struct sigaction act, osigint, osigtstp; + + *len_out = 0; + + if (isatty(fileno(fd))) { + /* + * handle SIGINT and ignore SIGSTP. This is necessary to + * restore the state of the terminal. + */ + caught_interrupt = 0; + act.sa_flags = 0; + (void) sigemptyset(&act.sa_mask); + act.sa_handler = catch_signal; + + (void) sigaction(SIGINT, &act, &osigint); + act.sa_handler = SIG_IGN; + (void) sigaction(SIGTSTP, &act, &osigtstp); + + /* prompt for the key */ + if (fsname != NULL) { + (void) printf("%s %s%s for '%s': ", + (again) ? "Re-enter" : "Enter", + (newkey) ? "new " : "", + get_format_prompt_string( + (zfs_keyformat_t)keyformat), + fsname); + } else { + (void) printf("%s %s%s: ", + (again) ? "Re-enter" : "Enter", + (newkey) ? "new " : "", + get_format_prompt_string( + (zfs_keyformat_t)keyformat)); + + } + (void) fflush(stdout); + + /* disable the terminal echo for key input */ + (void) tcgetattr(fileno(fd), &old_term); + + new_term = old_term; + new_term.c_lflag &= ~(ECHO | ECHOE | ECHOK | ECHONL); + + ret = tcsetattr(fileno(fd), TCSAFLUSH, &new_term); + if (ret != 0) { + ret = errno; + errno = 0; + goto out; + } + } + + /* read the key material */ + if (keyformat != ZFS_KEYFORMAT_RAW) { + bytes = getline((char **)buf, &buflen, fd); + if (bytes < 0) { + ret = errno; + errno = 0; + goto out; + } + } else { + /* + * Raw keys may have newline characters in them and so can't + * use getline(). Here we attempt to read 33 bytes so that we + * can properly check the key length (the file should only have + * 32 bytes). + */ + *buf = malloc((WRAPPING_KEY_LEN + 1) * sizeof (char)); + if (*buf == NULL) { + ret = ENOMEM; + goto out; + } + + bytes = fread(*buf, 1, WRAPPING_KEY_LEN + 1, fd); + if (bytes < 0) { + /* size errors are handled by the calling function */ + free(*buf); + *buf = NULL; + ret = errno; + errno = 0; + goto out; + } + } + + /* trim the ending newline if it exists */ + if ((*buf)[bytes - 1] == '\n') { + (*buf)[bytes - 1] = '\0'; + bytes--; + } + + *len_out = bytes; + +out: + if (isatty(fileno(fd))) { + /* reset the teminal */ + (void) tcsetattr(fileno(fd), TCSAFLUSH, &old_term); + (void) sigaction(SIGINT, &osigint, NULL); + (void) sigaction(SIGTSTP, &osigtstp, NULL); + + /* if we caught a signal, re-throw it now */ + if (caught_interrupt != 0) { + (void) kill(getpid(), caught_interrupt); + } + + /* print the newline that was not echo'd */ + (void) printf("\n"); + } + + return (ret); + +} + +/* + * Attempts to fetch key material, no matter where it might live. The key + * material is allocated and returned in km_out. *can_retry_out will be set + * to B_TRUE if the user is providing the key material interactively, allowing + * for re-entry attempts. + */ +static int +get_key_material(libzfs_handle_t *hdl, boolean_t do_verify, boolean_t newkey, + zfs_keyformat_t keyformat, char *keylocation, const char *fsname, + uint8_t **km_out, size_t *kmlen_out, boolean_t *can_retry_out) +{ + int ret, i; + zfs_keylocation_t keyloc = ZFS_KEYLOCATION_NONE; + FILE *fd = NULL; + uint8_t *km = NULL, *km2 = NULL; + size_t kmlen, kmlen2; + boolean_t can_retry = B_FALSE; + + /* verify and parse the keylocation */ + keyloc = zfs_prop_parse_keylocation(keylocation); + + /* open the appropriate file descriptor */ + switch (keyloc) { + case ZFS_KEYLOCATION_PROMPT: + fd = stdin; + if (isatty(fileno(fd))) { + can_retry = B_TRUE; + + /* raw keys cannot be entered on the terminal */ + if (keyformat == ZFS_KEYFORMAT_RAW) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot enter raw keys on the terminal")); + goto error; + } + } + break; + case ZFS_KEYLOCATION_URI: + fd = fopen(&keylocation[7], "r"); + if (!fd) { + ret = errno; + errno = 0; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to open key material file")); + goto error; + } + break; + default: + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid keylocation.")); + goto error; + } + + /* fetch the key material into the buffer */ + ret = get_key_material_raw(fd, fsname, keyformat, B_FALSE, newkey, + &km, &kmlen); + if (ret != 0) + goto error; + + /* do basic validation of the key material */ + switch (keyformat) { + case ZFS_KEYFORMAT_RAW: + /* verify the key length is correct */ + if (kmlen < WRAPPING_KEY_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Raw key too short (expected %u)."), + WRAPPING_KEY_LEN); + goto error; + } + + if (kmlen > WRAPPING_KEY_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Raw key too long (expected %u)."), + WRAPPING_KEY_LEN); + goto error; + } + break; + case ZFS_KEYFORMAT_HEX: + /* verify the key length is correct */ + if (kmlen < WRAPPING_KEY_LEN * 2) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Hex key too short (expected %u)."), + WRAPPING_KEY_LEN * 2); + goto error; + } + + if (kmlen > WRAPPING_KEY_LEN * 2) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Hex key too long (expected %u)."), + WRAPPING_KEY_LEN * 2); + goto error; + } + + /* check for invalid hex digits */ + for (i = 0; i < WRAPPING_KEY_LEN * 2; i++) { + if (!isxdigit((char)km[i])) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid hex character detected.")); + goto error; + } + } + break; + case ZFS_KEYFORMAT_PASSPHRASE: + /* verify the length is correct */ + if (kmlen > MAX_PASSPHRASE_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Passphrase too long (max 64).")); + goto error; + } + + if (kmlen < MIN_PASSPHRASE_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Passphrase too short (min 8).")); + goto error; + } + break; + default: + /* can't happen */ + break; + } + + if (do_verify && isatty(fileno(fd))) { + ret = get_key_material_raw(fd, fsname, keyformat, B_TRUE, + newkey, &km2, &kmlen2); + if (ret != 0) + goto error; + + if (kmlen2 != kmlen || + (memcmp((char *)km, (char *)km2, kmlen) != 0)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Provided keys do not match.")); + goto error; + } + } + + if (fd != stdin) + (void) fclose(fd); + + if (km2 != NULL) + free(km2); + + *km_out = km; + *kmlen_out = kmlen; + if (can_retry_out != NULL) + *can_retry_out = can_retry; + + return (0); + +error: + if (km != NULL) + free(km); + + if (km2 != NULL) + free(km2); + + if (fd != NULL && fd != stdin) + (void) fclose(fd); + + *km_out = NULL; + *kmlen_out = 0; + + if (can_retry_out != NULL) + *can_retry_out = can_retry; + + return (ret); +} + +/* This needs to be fixed to be compatible with other platforms */ + +static int +pbkdf2(uint8_t *passphrase, size_t passphraselen, uint8_t *salt, + size_t saltlen, uint64_t iterations, uint8_t *output, + size_t outputlen) +{ + int ret = 0; + CK_SESSION_HANDLE session; + char *tmpkeydata = NULL; + size_t tmpkeydatalen = 0; + CK_OBJECT_HANDLE obj; + + /* initialize output */ + (void) memset(output, 0, outputlen); + + ret = SUNW_C_GetMechSession(CKM_PKCS5_PBKD2, &session); + if (ret) { + (void) fprintf(stderr, "failed to connect to pkcs5: %s\r\n", + pkcs11_strerror(ret)); + return (ret); + } + + ret = pkcs11_PasswdToPBKD2Object(session, (char *)passphrase, + passphraselen, salt, saltlen, iterations, CKK_AES, outputlen, 0, + &obj); + + if (ret == CKR_OK) + ret = pkcs11_ObjectToKey(session, obj, (void **)&tmpkeydata, + &tmpkeydatalen, B_TRUE); + + (void) C_CloseSession(session); + if (ret) { + (void) fprintf(stderr, "unable to generate key: %s\r\n", + pkcs11_strerror(ret)); + return (ret); + } + + /* + * Because it allocates an area for the passphrase, we copy it out + * then zero the original + */ + (void) memcpy(output, tmpkeydata, tmpkeydatalen); + (void) memset(tmpkeydata, 0, tmpkeydatalen); + free(tmpkeydata); + + return (ret); +} + +/* ARGSUSED */ +static int +derive_key(libzfs_handle_t *hdl, zfs_keyformat_t format, uint64_t iters, + uint8_t *key_material, size_t key_material_len, uint64_t salt, + uint8_t **key_out) +{ + int ret; + uint8_t *key; + + *key_out = NULL; + + key = zfs_alloc(hdl, WRAPPING_KEY_LEN); + if (!key) + return (ENOMEM); + + switch (format) { + case ZFS_KEYFORMAT_RAW: + bcopy(key_material, key, WRAPPING_KEY_LEN); + break; + case ZFS_KEYFORMAT_HEX: + ret = hex_key_to_raw((char *)key_material, + WRAPPING_KEY_LEN * 2, key); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid hex key provided.")); + goto error; + } + break; + case ZFS_KEYFORMAT_PASSPHRASE: + salt = LE_64(salt); + ret = pbkdf2(key_material, strlen((char *)key_material), + ((uint8_t *)&salt), sizeof (uint64_t), iters, + key, WRAPPING_KEY_LEN); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate key from passphrase.")); + goto error; + } + break; + default: + ret = EINVAL; + goto error; + } + + *key_out = key; + return (0); + +error: + free(key); + + *key_out = NULL; + return (ret); +} + +static boolean_t +encryption_feature_is_enabled(zpool_handle_t *zph) +{ + nvlist_t *features; + uint64_t feat_refcount; + + /* check that features can be enabled */ + if (zpool_get_prop_int(zph, ZPOOL_PROP_VERSION, NULL) + < SPA_VERSION_FEATURES) + return (B_FALSE); + + /* check for crypto feature */ + features = zpool_get_features(zph); + if (!features || nvlist_lookup_uint64(features, + spa_feature_table[SPA_FEATURE_ENCRYPTION].fi_guid, + &feat_refcount) != 0) + return (B_FALSE); + + return (B_TRUE); +} + +static int +populate_create_encryption_params_nvlists(libzfs_handle_t *hdl, + zfs_handle_t *zhp, boolean_t newkey, zfs_keyformat_t keyformat, + char *keylocation, nvlist_t *props, uint8_t **wkeydata, uint_t *wkeylen) +{ + int ret; + uint64_t iters = 0, salt = 0; + uint8_t *key_material = NULL; + size_t key_material_len = 0; + uint8_t *key_data = NULL; + const char *fsname = (zhp) ? zfs_get_name(zhp) : NULL; + + /* get key material from keyformat and keylocation */ + ret = get_key_material(hdl, B_TRUE, newkey, keyformat, keylocation, + fsname, &key_material, &key_material_len, NULL); + if (ret != 0) + goto error; + + /* passphrase formats require a salt and pbkdf2 iters property */ + if (keyformat == ZFS_KEYFORMAT_PASSPHRASE) { +#ifdef sun + /* always generate a new salt */ + ret = pkcs11_get_random(&salt, sizeof (uint64_t)); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate salt.")); + goto error; + } +#else + random_init(); + + ret = random_get_bytes((uint8_t *)&salt, sizeof (uint64_t)); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate salt.")); + goto error; + } + + random_fini(); +#endif + + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to add salt to properties.")); + goto error; + } + + /* + * If not otherwise specified, use the default number of + * pbkdf2 iterations. If specified, we have already checked + * that the given value is greater than MIN_PBKDF2_ITERATIONS + * during zfs_valid_proplist(). + */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); + if (ret == ENOENT) { + iters = DEFAULT_PBKDF2_ITERATIONS; + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters); + if (ret != 0) + goto error; + } else if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to get pbkdf2 iterations.")); + goto error; + } + } else { + /* check that pbkdf2iters was not specified by the user */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); + if (ret == 0) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot specify pbkdf2iters with a non-passphrase " + "keyformat.")); + goto error; + } + } + + /* derive a key from the key material */ + ret = derive_key(hdl, (zfs_keyformat_t)keyformat, iters, key_material, + key_material_len, salt, &key_data); + if (ret != 0) + goto error; + + free(key_material); + + *wkeydata = key_data; + *wkeylen = WRAPPING_KEY_LEN; + return (0); + +error: + if (key_material != NULL) + free(key_material); + if (key_data != NULL) + free(key_data); + + *wkeydata = NULL; + *wkeylen = 0; + return (ret); +} + +static boolean_t +proplist_has_encryption_props(nvlist_t *props) +{ + int ret; + uint64_t intval; + char *strval; + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &intval); + if (ret == 0 && intval != ZIO_CRYPT_OFF) + return (B_TRUE); + + ret = nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &strval); + if (ret == 0 && strcmp(strval, "none") != 0) + return (B_TRUE); + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &intval); + if (ret == 0) + return (B_TRUE); + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &intval); + if (ret == 0) + return (B_TRUE); + + return (B_FALSE); +} + +int +zfs_crypto_is_encryption_root(zfs_handle_t *zhp, boolean_t *enc_root) +{ + int ret; + char prop_keylocation[MAXNAMELEN]; + char keylocation_src[MAXNAMELEN]; + zprop_source_t keylocation_srctype; + + /* if the dataset isn't encrypted, just return */ + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == ZIO_CRYPT_OFF) { + *enc_root = B_FALSE; + return (0); + } + + /* fetch the keylocation and its source */ + ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, prop_keylocation, + sizeof (prop_keylocation), &keylocation_srctype, keylocation_src, + sizeof (keylocation_src), B_TRUE); + if (ret != 0) { + *enc_root = B_FALSE; + return (ret); + } + + /* check if the keylocation was inherited */ + if (keylocation_srctype == ZPROP_SRC_INHERITED) { + *enc_root = B_FALSE; + return (0); + } + + *enc_root = B_TRUE; + return (0); +} + +int +zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props, + nvlist_t *pool_props, uint8_t **wkeydata_out, uint_t *wkeylen_out) +{ + int ret; + char errbuf[1024]; + uint64_t crypt = ZIO_CRYPT_INHERIT, pcrypt = ZIO_CRYPT_INHERIT; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + char *keylocation = NULL; + zfs_handle_t *pzhp = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; + boolean_t local_crypt = B_TRUE; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Encryption create error")); + + /* lookup crypt from props */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); + if (ret != 0) + local_crypt = B_FALSE; + + /* lookup key location and format from props */ + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + + if (parent_name != NULL) { + /* get a reference to parent dataset */ + pzhp = make_dataset_handle(hdl, parent_name); + if (pzhp == NULL) { + ret = ENOENT; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + goto out; + } + + /* Lookup parent's crypt */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + + /* Params require the encryption feature */ + if (!encryption_feature_is_enabled(pzhp->zpool_hdl)) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + goto out; + } + + ret = 0; + goto out; + } + } else { + /* + * special case for root dataset where encryption feature + * feature won't be on disk yet + */ + if (!nvlist_exists(pool_props, "feature@encryption")) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + goto out; + } + + ret = 0; + goto out; + } + + pcrypt = ZIO_CRYPT_OFF; + } + + /* Check for encryption being explicitly truned off */ + if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid encryption value. Dataset must be encrypted.")); + goto out; + } + + /* Get the inherited encryption property if we don't have it locally */ + if (!local_crypt) + crypt = pcrypt; + + /* + * At this point crypt should be the actual encryption value. If + * encryption is off just verify that no encryption properties have + * been specified and return. + */ + if (crypt == ZIO_CRYPT_OFF) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption must be turned on to set encryption " + "properties.")); + goto out; + } + + ret = 0; + goto out; + } + + /* + * If we have a parent crypt it is valid to specify encryption alone. + * This will result in a child that is encrypted with the chosen + * encryption suite that will also inherit the parent's key. If + * the parent is not encrypted we need an encryption suite provided. + */ + if (pcrypt == ZIO_CRYPT_OFF && keylocation == NULL && + keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keyformat required for new encryption root.")); + goto out; + } + + /* + * Specifying a keylocation implies this will be a new encryption root. + * Check that a keyformat is also specified. + */ + if (keylocation != NULL && keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keyformat required for new encryption root.")); + goto out; + } + + /* default to prompt if no keylocation is specified */ + if (keyformat != ZFS_KEYFORMAT_NONE && keylocation == NULL) { + keylocation = "prompt"; + ret = nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), keylocation); + if (ret != 0) + goto out; + } + + /* + * If a local key format is provided, this dataset will be a new + * encryption root. Populate the encryption params. + */ + if (keylocation != NULL) { + ret = populate_create_encryption_params_nvlists(hdl, NULL, + B_FALSE, keyformat, keylocation, props, &wkeydata, + &wkeylen); + if (ret != 0) + goto out; + } + + if (pzhp != NULL) + zfs_close(pzhp); + + *wkeydata_out = wkeydata; + *wkeylen_out = wkeylen; + return (0); + +out: + if (pzhp != NULL) + zfs_close(pzhp); + if (wkeydata != NULL) + free(wkeydata); + + *wkeydata_out = NULL; + *wkeylen_out = 0; + return (ret); +} + +int +zfs_crypto_clone(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, + char *parent_name, nvlist_t *props, uint8_t **wkeydata_out, + uint_t *wkeylen_out) +{ + int ret; + char errbuf[1024]; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + char *keylocation = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; + zfs_handle_t *pzhp = NULL; + uint64_t crypt, pcrypt, ocrypt, okey_status; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Encryption clone error")); + + /* get a reference to parent dataset, should never be NULL */ + pzhp = make_dataset_handle(hdl, parent_name); + if (pzhp == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + return (ENOENT); + } + + /* Lookup parent's crypt */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + ocrypt = zfs_prop_get_int(origin_zhp, ZFS_PROP_ENCRYPTION); + + /* lookup keylocation from props */ + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + + /* encryption should not be set since it must match the origin */ + ret = nvlist_lookup_uint64(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION), + &crypt); + if (ret == 0) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption may not be specified during cloning.")); + goto out; + } + + /* all children of encrypted parents must be encrypted */ + if (pcrypt != ZIO_CRYPT_OFF && ocrypt == ZIO_CRYPT_OFF) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot create unencrypted clone as child " + "of encrypted parent.")); + goto out; + } + + /* + * If this dataset won't be encrypted check to ensure no encryption + * params were set and return. + */ + if (ocrypt == ZIO_CRYPT_OFF) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption properties may not be set " + "for an unencrypted clone.")); + goto out; + } + + ret = 0; + goto out; + } + + /* + * Specifying a keylocation implies this will be a new encryption root. + * Check that a keyformat is also specified. + */ + if (keylocation != NULL && keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keyformat required for new encryption root.")); + goto out; + } + + /* default to prompt if no keylocation is specified */ + if (keyformat != ZFS_KEYFORMAT_NONE && keylocation == NULL) { + keylocation = "prompt"; + ret = nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), keylocation); + if (ret != 0) + goto out; + } + + /* + * By this point this dataset will be encrypted. The origin's + * wrapping key must be loaded + */ + okey_status = zfs_prop_get_int(origin_zhp, ZFS_PROP_KEYSTATUS); + if (okey_status != ZFS_KEYSTATUS_AVAILABLE) { + ret = EACCES; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Origin wrapping key must be loaded.")); + goto out; + } + + /* + * if the parent doesn't have a key format to inherit we need + * one provided for us + */ + if (pcrypt == ZIO_CRYPT_OFF && keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keyformat required.")); + goto out; + } + + /* prepare the key if needed */ + if (keylocation != NULL) { + ret = populate_create_encryption_params_nvlists(hdl, NULL, + B_FALSE, keyformat, keylocation, props, &wkeydata, + &wkeylen); + if (ret != 0) + goto out; + } + + zfs_close(pzhp); + + *wkeydata_out = wkeydata; + *wkeylen_out = wkeylen; + return (0); + +out: + if (pzhp != NULL) + zfs_close(pzhp); + if (wkeydata != NULL) + free(wkeydata); + + *wkeydata_out = NULL; + *wkeylen_out = 0; + return (ret); +} + +typedef struct loadkeys_cbdata { + uint64_t cb_numfailed; + uint64_t cb_numattempted; +} loadkey_cbdata_t; + +static int +load_keys_cb(zfs_handle_t *zhp, void *arg) +{ + int ret; + boolean_t is_encroot; + loadkey_cbdata_t *cb = arg; + uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* only attempt to load keys for encryption roots */ + ret = zfs_crypto_is_encryption_root(zhp, &is_encroot); + if (ret != 0 || !is_encroot) + goto out; + + /* don't attempt to load already loaded keys */ + if (keystatus == ZFS_KEYSTATUS_AVAILABLE) + goto out; + + /* Attempt to load the key. Record status in cb. */ + cb->cb_numattempted++; + + ret = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (ret) + cb->cb_numfailed++; + +out: + (void) zfs_iter_filesystems(zhp, load_keys_cb, cb); + zfs_close(zhp); + + /* always return 0, since this function is best effort */ + return (0); +} + +/* + * This function is best effort. It attempts to load all the keys for the given + * filesystem and all of its children. + */ +int +zfs_crypto_attempt_load_keys(libzfs_handle_t *hdl, char *fsname) +{ + int ret; + zfs_handle_t *zhp = NULL; + loadkey_cbdata_t cb = { 0 }; + + zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) { + ret = ENOENT; + goto error; + } + + ret = load_keys_cb(zfs_handle_dup(zhp), &cb); + if (ret) + goto error; + + (void) printf(gettext("%llu / %llu keys successfully loaded\n"), + (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed), + (u_longlong_t)cb.cb_numattempted); + + if (cb.cb_numfailed != 0) { + ret = -1; + goto error; + } + + zfs_close(zhp); + return (0); + +error: + if (zhp != NULL) + zfs_close(zhp); + return (ret); +} + +int +zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation) +{ + int ret, attempts = 0; + char errbuf[1024]; + uint64_t keystatus, iters = 0, salt = 0; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + char prop_keylocation[MAXNAMELEN]; + char keylocation_src[MAXNAMELEN]; + char *keylocation = NULL; + uint8_t *key_material = NULL, *key_data = NULL; + size_t key_material_len; + zprop_source_t keylocation_srctype; + boolean_t can_retry = B_FALSE, correctible = B_FALSE; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key load error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* Fetch the keyformat. Check that the dataset is encrypted. */ + keyformat = zfs_prop_get_int(zhp, ZFS_PROP_KEYFORMAT); + if (keyformat == ZFS_KEYFORMAT_NONE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is not encrypted."), zfs_get_name(zhp)); + ret = EINVAL; + goto error; + } + + /* + * Fetch the key location. Check that we are working with an + * encryption root. + */ + ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, prop_keylocation, + sizeof (prop_keylocation), &keylocation_srctype, keylocation_src, + sizeof (keylocation_src), B_TRUE); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get keylocation for '%s'."), zfs_get_name(zhp)); + goto error; + } else if (keylocation_srctype == ZPROP_SRC_INHERITED) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Keys must be loaded for encryption root of '%s' (%s)."), + zfs_get_name(zhp), keylocation_src); + ret = EINVAL; + goto error; + } + + /* + * if the caller has elected to override the keylocation property + * use that instead + */ + if (alt_keylocation != NULL) + keylocation = alt_keylocation; + else + keylocation = prop_keylocation; + + /* check that the key is unloaded unless this is a noop */ + if (!noop) { + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_AVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded for '%s'."), zfs_get_name(zhp)); + ret = EEXIST; + goto error; + } + } + + /* passphrase formats require a salt and pbkdf2_iters property */ + if (keyformat == ZFS_KEYFORMAT_PASSPHRASE) { + salt = zfs_prop_get_int(zhp, ZFS_PROP_PBKDF2_SALT); + iters = zfs_prop_get_int(zhp, ZFS_PROP_PBKDF2_ITERS); + } + +try_again: + /* fetching and deriving the key are correctible errors. set the flag */ + correctible = B_TRUE; + + /* get key material from key format and location */ + ret = get_key_material(zhp->zfs_hdl, B_FALSE, B_FALSE, keyformat, + keylocation, zfs_get_name(zhp), &key_material, &key_material_len, + &can_retry); + if (ret != 0) + goto error; + + /* derive a key from the key material */ + ret = derive_key(zhp->zfs_hdl, keyformat, iters, key_material, + key_material_len, salt, &key_data); + if (ret != 0) + goto error; + + correctible = B_FALSE; + + /* pass the wrapping key and noop flag to the ioctl */ + ret = lzc_load_key(zhp->zfs_name, noop, key_data, WRAPPING_KEY_LEN); + if (ret != 0) { + switch (ret) { + case EINVAL: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Invalid parameters provided for %s."), + zfs_get_name(zhp)); + break; + case EEXIST: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded for '%s'."), zfs_get_name(zhp)); + break; + case EBUSY: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is busy."), zfs_get_name(zhp)); + break; + case EACCES: + correctible = B_TRUE; + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Incorrect key provided for '%s'."), + zfs_get_name(zhp)); + break; + } + goto error; + } + + free(key_material); + free(key_data); + + return (0); + +error: + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + if (key_material != NULL) + free(key_material); + if (key_data != NULL) + free(key_data); + + /* + * Here we decide if it is ok to allow the user to retry entering their + * key. The can_retry flag will be set if the user is entering their + * key from an interactive prompt. The correctible flag will only be + * set if an error that occured could be corrected by retrying. Both + * flags are needed to allow the user to attempt key entry again + */ + if (can_retry && correctible && attempts <= MAX_KEY_PROMPT_ATTEMPTS) { + attempts++; + goto try_again; + } + + return (ret); +} + +int +zfs_crypto_unload_key(zfs_handle_t *zhp) +{ + int ret; + char errbuf[1024]; + char prop_keylocation[MAXNAMELEN]; + char keylocation_src[MAXNAMELEN]; + uint64_t keystatus, keyformat; + zprop_source_t keylocation_srctype; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key unload error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* Fetch the keyformat. Check that the dataset is encrypted. */ + keyformat = zfs_prop_get_int(zhp, ZFS_PROP_KEYFORMAT); + if (keyformat == ZFS_KEYFORMAT_NONE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is not encrypted."), zfs_get_name(zhp)); + ret = EINVAL; + goto error; + } + + /* + * Fetch the key location. Check that we are working with an + * encryption root. + */ + ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, prop_keylocation, + sizeof (prop_keylocation), &keylocation_srctype, keylocation_src, + sizeof (keylocation_src), B_TRUE); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get keylocation for '%s'."), zfs_get_name(zhp)); + goto error; + } else if (keylocation_srctype == ZPROP_SRC_INHERITED) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Keys must be unloaded for encryption root of '%s' (%s)."), + zfs_get_name(zhp), keylocation_src); + ret = EINVAL; + goto error; + } + + /* check that the key is loaded */ + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already unloaded for '%s'."), zfs_get_name(zhp)); + ret = ENOENT; + goto error; + } + + /* call the ioctl */ + ret = lzc_unload_key(zhp->zfs_name); + + if (ret != 0) { + switch (ret) { + case ENOENT: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already unloaded for '%s'."), + zfs_get_name(zhp)); + break; + case EBUSY: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is busy."), zfs_get_name(zhp)); + break; + } + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + } + + return (ret); + +error: + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + return (ret); +} + +static int +zfs_crypto_verify_rewrap_nvlist(zfs_handle_t *zhp, nvlist_t *props, + nvlist_t **props_out, char *errbuf) +{ + int ret; + nvpair_t *elem = NULL; + zfs_prop_t prop; + nvlist_t *new_props = NULL; + + new_props = fnvlist_alloc(); + + /* + * loop through all provided properties, we should only have + * keyformat, keylocation and pbkdf2iters. The actual validation of + * values is done by zfs_valid_proplist(). + */ + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + const char *propname = nvpair_name(elem); + prop = zfs_name_to_prop(propname); + + switch (prop) { + case ZFS_PROP_PBKDF2_ITERS: + case ZFS_PROP_KEYFORMAT: + case ZFS_PROP_KEYLOCATION: + break; + default: + ret = EINVAL; + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Only keyformat, keylocation and pbkdf2iters may " + "be set with this command.")); + goto error; + } + } + + new_props = zfs_valid_proplist(zhp->zfs_hdl, zhp->zfs_type, props, + zfs_prop_get_int(zhp, ZFS_PROP_ZONED), NULL, zhp->zpool_hdl, + B_TRUE, errbuf); + if (new_props == NULL) + goto error; + + *props_out = new_props; + return (0); + +error: + nvlist_free(new_props); + *props_out = NULL; + return (ret); +} + +int +zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey) +{ + int ret; + char errbuf[1024]; + boolean_t is_encroot; + nvlist_t *props = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; + uint64_t crypt, pcrypt, keystatus, pkeystatus; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + zfs_handle_t *pzhp = NULL; + char *keylocation = NULL; + char prop_keylocation[MAXNAMELEN]; + char parent_name[ZFS_MAX_DATASET_NAME_LEN]; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key change error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* get crypt from dataset */ + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + if (crypt == ZIO_CRYPT_OFF) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Dataset not encrypted.")); + ret = EINVAL; + goto error; + } + + /* + * if the user wants to use the inheritkey variant of this function + * we son't need to collect any crypto arguments + */ + if (!inheritkey) { + /* validate the provided properties */ + ret = zfs_crypto_verify_rewrap_nvlist(zhp, raw_props, &props, + errbuf); + if (ret != 0) + goto error; + + /* + * Load keyformat and keylocation from the nvlist. Fetch from + * the dataset properties if not specified. + */ + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + + /* check whether zhp is an encryption root */ + ret = zfs_crypto_is_encryption_root(zhp, &is_encroot); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to find encryption root.")); + ret = EINVAL; + goto error; + } + + if (is_encroot) { + /* + * If this is already an ecryption root, just keep + * any properties not set by the user + */ + if (keyformat == ZFS_KEYFORMAT_NONE) + keyformat = zfs_prop_get_int(zhp, + ZFS_PROP_KEYFORMAT); + + if (keylocation == NULL) { + ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, + prop_keylocation, sizeof (prop_keylocation), + NULL, NULL, 0, B_TRUE); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, "Failed to " + "get existing keylocation " + "property.")); + goto error; + } + + keylocation = prop_keylocation; + } + } else { + /* need a new key for non-encryption roots */ + if (keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, "Keyformat required " + "for new encryption root.")); + goto error; + } + + /* default to prompt if no keylocation is specified */ + if (keylocation == NULL) { + keylocation = "prompt"; + ret = nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + keylocation); + if (ret != 0) + goto error; + } + } + + /* fetch the new wrapping key and associated properties */ + ret = populate_create_encryption_params_nvlists(zhp->zfs_hdl, + zhp, B_TRUE, keyformat, keylocation, props, &wkeydata, + &wkeylen); + if (ret != 0) + goto error; + } else { + /* check that zhp is an encryption root */ + ret = zfs_crypto_is_encryption_root(zhp, &is_encroot); + if (ret != 0 || !is_encroot) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key inheritting can only be performed on " + "encryption roots.")); + ret = EINVAL; + goto error; + } + + /* get the parent's name */ + ret = zfs_parent_name(zhp, parent_name, sizeof (parent_name)); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Root dataset cannot inherit key.")); + ret = EINVAL; + goto error; + } + + /* get a handle to the parent */ + pzhp = make_dataset_handle(zhp->zfs_hdl, parent_name); + if (pzhp == NULL) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + ret = ENOENT; + goto error; + } + + /* parent must be encrypted */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + if (pcrypt == ZIO_CRYPT_OFF) { + zfs_error_aux(pzhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Parent must be encrypted.")); + ret = EINVAL; + goto error; + } + + /* check that the parent's key is loaded */ + pkeystatus = zfs_prop_get_int(pzhp, ZFS_PROP_KEYSTATUS); + if (pkeystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(pzhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Parent key must be loaded.")); + ret = EACCES; + goto error; + } + } + + /* check that the key is loaded */ + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key must be loaded.")); + ret = EACCES; + goto error; + } + + /* call the ioctl */ + ret = lzc_change_key(zhp->zfs_name, props, wkeydata, wkeylen); + if (ret != 0) { + switch (ret) { + case EINVAL: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Invalid properties for key change.")); + break; + case EACCES: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key is not currently loaded.")); + break; + } + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + } + + if (pzhp != NULL) + zfs_close(pzhp); + if (props != NULL) + nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); + + return (ret); + +error: + if (pzhp != NULL) + zfs_close(pzhp); + if (props != NULL) + nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); + + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + return (ret); +} diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index a01d29e8c1e4..27289a50ec9c 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "zfs_namecheck.h" @@ -940,7 +941,7 @@ zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop) nvlist_t * zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl, - const char *errbuf) + boolean_t key_params_ok, const char *errbuf) { nvpair_t *elem; uint64_t intval; @@ -1097,7 +1098,8 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } if (zfs_prop_readonly(prop) && - (!zfs_prop_setonce(prop) || zhp != NULL)) { + !(zfs_prop_setonce(prop) && zhp == NULL) && + !(zfs_prop_encryption_key_param(prop) && key_params_ok)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is readonly"), propname); @@ -1362,6 +1364,48 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, break; + case ZFS_PROP_KEYLOCATION: + if (!zfs_prop_valid_keylocation(strval, B_FALSE)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid keylocation")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (zhp != NULL) { + uint64_t crypt = + zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + + if (crypt == ZIO_CRYPT_OFF && + strcmp(strval, "none") != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "keylocation must not be 'none' " + "for encrypted datasets")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } else if (crypt != ZIO_CRYPT_OFF && + strcmp(strval, "none") == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "keylocation must be 'none' " + "for unencrypted datasets")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + } + break; + + case ZFS_PROP_PBKDF2_ITERS: + if (intval < MIN_PBKDF2_ITERATIONS) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "minimum pbkdf2 iterations is %u"), + MIN_PBKDF2_ITERATIONS); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + case ZFS_PROP_UTF8ONLY: chosen_utf = (int)intval; break; @@ -1425,6 +1469,27 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, break; } } + + /* check encryption properties */ + if (zhp != NULL) { + int64_t crypt = zfs_prop_get_int(zhp, + ZFS_PROP_ENCRYPTION); + + switch (prop) { + case ZFS_PROP_COPIES: + if (crypt != ZIO_CRYPT_OFF && intval > 2) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encrypted datasets cannot have " + "3 copies")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + break; + default: + break; + } + } } /* @@ -1580,6 +1645,17 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, } break; + case EACCES: + if (prop == ZFS_PROP_KEYLOCATION) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keylocation may only be set if it is set locally. " + "This may be changed with 'zfs change-key.'")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + } else { + (void) zfs_standard_error(hdl, err, errbuf); + } + break; + case EOVERFLOW: /* * This platform can't address a volume this big. @@ -1649,7 +1725,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props) if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl, - errbuf)) == NULL) + B_FALSE, errbuf)) == NULL) goto error; /* @@ -3050,6 +3126,12 @@ parent_name(const char *path, char *buf, size_t buflen) return (0); } +int +zfs_parent_name(zfs_handle_t *zhp, char *buf, size_t buflen) +{ + return (parent_name(zfs_get_name(zhp), buf, buflen)); +} + /* * If accept_ancestor is false, then check to make sure that the given path has * a parent, and that it exists. If accept_ancestor is true, then find the @@ -3268,7 +3350,10 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, int ret; uint64_t size = 0; uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; char errbuf[1024]; + char parent[MAXNAMELEN]; uint64_t zoned; enum lzc_dataset_type ost; @@ -3313,7 +3398,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, zpool_handle_t *zpool_handle = zpool_open(hdl, pool_path); if (props && (props = zfs_valid_proplist(hdl, type, props, - zoned, NULL, zpool_handle, errbuf)) == 0) { + zoned, NULL, zpool_handle, B_TRUE, errbuf)) == 0) { zpool_close(zpool_handle); return (-1); } @@ -3365,15 +3450,21 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, } } + (void) parent_name(path, parent, sizeof (parent)); + if (zfs_crypto_create(hdl, parent, props, NULL, &wkeydata, + &wkeylen) != 0) { + nvlist_free(props); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + } + /* create the dataset */ - ret = lzc_create(path, ost, props); + ret = lzc_create(path, ost, props, wkeydata, wkeylen); nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); /* check for failure */ if (ret != 0) { - char parent[ZFS_MAX_DATASET_NAME_LEN]; - (void) parent_name(path, parent, sizeof (parent)); - switch (errno) { case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -3390,6 +3481,13 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, "pool must be upgraded to set this " "property or value")); return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); + + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption root's key is not loaded " + "or provided")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + #ifdef _ILP32 case EOVERFLOW: /* @@ -3554,6 +3652,8 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) char parent[ZFS_MAX_DATASET_NAME_LEN]; int ret; char errbuf[1024]; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; libzfs_handle_t *hdl = zhp->zfs_hdl; uint64_t zoned; @@ -3582,12 +3682,20 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) type = ZFS_TYPE_FILESYSTEM; } if ((props = zfs_valid_proplist(hdl, type, props, zoned, - zhp, zhp->zpool_hdl, errbuf)) == NULL) + zhp, zhp->zpool_hdl, B_TRUE, errbuf)) == NULL) return (-1); } - ret = lzc_clone(target, zhp->zfs_name, props); + if (zfs_crypto_clone(hdl, zhp, parent, props, &wkeydata, + &wkeylen) != 0) { + nvlist_free(props); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + } + + ret = lzc_clone(target, zhp->zfs_name, props, wkeydata, wkeylen); nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); if (ret != 0) { switch (errno) { @@ -3612,6 +3720,12 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET, errbuf)); + case EACCES: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "required encryption key not loaded or provided")); + return (zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, + errbuf)); + default: return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); @@ -3738,7 +3852,7 @@ zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) if (props != NULL && (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, - props, B_FALSE, NULL, zpool_hdl, errbuf)) == NULL) { + props, B_FALSE, NULL, zpool_hdl, B_FALSE, errbuf)) == NULL) { zpool_close(zpool_hdl); return (-1); } @@ -4105,6 +4219,18 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, "a child dataset already has a snapshot " "with the new name")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); + } else if (errno == EACCES) { + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == + ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot rename an unencrypted dataset to " + "be a decendent of an encrypted one")); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot move encryption child outside of " + "its encryption root")); + } + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); } else { (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); } diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c index ae6df7fa16e5..a2201a9a43f5 100644 --- a/usr/src/lib/libzfs/common/libzfs_mount.c +++ b/usr/src/lib/libzfs/common/libzfs_mount.c @@ -77,6 +77,7 @@ #include #include #include +#include #include @@ -331,6 +332,8 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) char mountpoint[ZFS_MAXPROPLEN]; char mntopts[MNT_LINE_MAX]; libzfs_handle_t *hdl = zhp->zfs_hdl; + uint64_t keystatus; + int rc; if (options == NULL) mntopts[0] = '\0'; @@ -346,6 +349,39 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) return (0); + /* + * If the filesystem is encrypted the key must be loaded in order to + * mount. If the key isn't loaded, the MS_CRYPT flag decides whether + * or not we attempt to load the keys. Note: we must call + * zfs_refresh_properties() here since some callers of this function + * (most notably zpool_enable_datasets()) may implicitly load our key + * by loading the parent's key first. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) { + zfs_refresh_properties(zhp); + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* + * If the key is unavailable and MS_CRYPT is set give the + * user a chance to enter the key. Otherwise just fail + * immediately. + */ + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + if (flags & MS_CRYPT) { + rc = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (rc != 0) + return (rc); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption key not loaded")); + return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), + mountpoint)); + } + } + + } + /* Create the directory if it doesn't already exist */ if (lstat(mountpoint, &buf) != 0) { if (mkdirp(mountpoint, 0755) != 0) { @@ -1099,6 +1135,12 @@ mount_cb(zfs_handle_t *zhp, void *data) return (0); } + if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_close(zhp); + return (0); + } + /* * If this filesystem is inconsistent and has a receive resume * token, we can not mount it. @@ -1188,6 +1230,14 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) ret = 0; for (i = 0; i < cb.cb_used; i++) { + /* + * don't attempt to mount encrypted datasets with + * unloaded keys + */ + if (zfs_prop_get_int(cb.cb_handles[i], ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) + continue; + if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0) ret = -1; else diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index f9a05aeb3977..946c4f890599 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -1102,6 +1102,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, zfs_cmd_t zc = { 0 }; nvlist_t *zc_fsprops = NULL; nvlist_t *zc_props = NULL; + nvlist_t *hidden_args = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; char msg[1024]; int ret = -1; @@ -1132,17 +1135,34 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, strcmp(zonestr, "on") == 0); if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM, - fsprops, zoned, NULL, NULL, msg)) == NULL) { + fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) { goto create_failed; } if (!zc_props && (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) { goto create_failed; } + if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, + &wkeydata, &wkeylen) != 0) { + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, msg); + goto create_failed; + } if (nvlist_add_nvlist(zc_props, ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) { goto create_failed; } + if (wkeydata != NULL) { + if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0) + goto create_failed; + + if (nvlist_add_uint8_array(hidden_args, "wkeydata", + wkeydata, wkeylen) != 0) + goto create_failed; + + if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS, + hidden_args) != 0) + goto create_failed; + } } if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) @@ -1155,6 +1175,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(zc_fsprops); + nvlist_free(hidden_args); + if (wkeydata != NULL) + free(wkeydata); switch (errno) { case EBUSY: @@ -1220,6 +1243,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(zc_fsprops); + nvlist_free(hidden_args); + if (wkeydata != NULL) + free(wkeydata); return (ret); } diff --git a/usr/src/lib/libzfs/common/libzfs_sendrecv.c b/usr/src/lib/libzfs/common/libzfs_sendrecv.c index 4e89dc053d1d..37eada654cb9 100644 --- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c @@ -324,11 +324,9 @@ cksummer(void *arg) struct drr_object *drro = &drr->drr_u.drr_object; if (drro->drr_bonuslen > 0) { (void) ssread(buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), - ofp); + DRR_OBJECT_PAYLOAD_SIZE(drro), ofp); } - if (dump_record(drr, buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro), &stream_cksum, outfd) != 0) goto out; break; @@ -337,8 +335,8 @@ cksummer(void *arg) case DRR_SPILL: { struct drr_spill *drrs = &drr->drr_u.drr_spill; - (void) ssread(buf, drrs->drr_length, ofp); - if (dump_record(drr, buf, drrs->drr_length, + (void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp); + if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs), &stream_cksum, outfd) != 0) goto out; break; @@ -368,7 +366,7 @@ cksummer(void *arg) if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, zero_cksum) || - !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { + !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) { SHA256_CTX ctx; zio_cksum_t tmpsha256; @@ -384,7 +382,7 @@ cksummer(void *arg) drrw->drr_key.ddk_cksum.zc_word[3] = BE_64(tmpsha256.zc_word[3]); drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; - drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; + drrw->drr_flags |= DRR_CHECKSUM_DEDUP; } dataref.ref_guid = drrw->drr_toguid; @@ -413,8 +411,7 @@ cksummer(void *arg) wbr_drrr->drr_checksumtype = drrw->drr_checksumtype; - wbr_drrr->drr_checksumflags = - drrw->drr_checksumtype; + wbr_drrr->drr_flags = drrw->drr_flags; wbr_drrr->drr_key.ddk_cksum = drrw->drr_key.ddk_cksum; wbr_drrr->drr_key.ddk_prop = @@ -453,6 +450,14 @@ cksummer(void *arg) break; } + case DRR_OBJECT_RANGE: + { + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) + goto out; + break; + } + default: (void) fprintf(stderr, "INVALID record type 0x%x\n", drr->drr_type); @@ -606,6 +611,7 @@ typedef struct send_data { const char *fsname; const char *fromsnap; const char *tosnap; + boolean_t raw; boolean_t recursive; boolean_t verbose; @@ -783,7 +789,8 @@ static int send_iterate_fs(zfs_handle_t *zhp, void *arg) { send_data_t *sd = arg; - nvlist_t *nvfs, *nv; + nvlist_t *nvfs = NULL, *nv = NULL; + char *strval; int rv = 0; uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; uint64_t fromsnap_txg_save = sd->fromsnap_txg; @@ -847,8 +854,48 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) /* iterate over props */ VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); send_iterate_prop(zhp, nv); + + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) { + /* + * If the root dataset of a properties send is encrypted but + * not an encryption root we need to adjust the properties so + * that it appears to be an encryption root. This ensures that + * all received datasets will have a valid keylocation. + */ + if (strcmp(zfs_get_name(zhp), sd->fsname) == 0 && + nvlist_lookup_string(nv, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &strval) != 0) { + char keylocation[MAXNAMELEN]; + + rv = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, + keylocation, sizeof (keylocation), NULL, + NULL, 0, B_FALSE); + if (rv != 0) { + rv = -1; + goto out; + } + + VERIFY(0 == nvlist_add_string(nv, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + keylocation)); + } + + /* + * Encrypted datasets can only be sent with properties if the + * raw flag is specified. Otherwise, the receiving side won't + * have a keyformat to use. + */ + if (!sd->raw) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "cannot send %s@%s: encrypted dataset %s may not " + "be sent with properties without the raw flag\n"), + sd->fsname, sd->tosnap, zhp->zfs_name); + rv = -1; + goto out; + } + } + VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); - nvlist_free(nv); /* iterate over snaps, and set sd->parent_fromsnap_guid */ sd->parent_fromsnap_guid = 0; @@ -864,7 +911,6 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) (void) snprintf(guidstring, sizeof (guidstring), "0x%llx", (longlong_t)guid); VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); - nvlist_free(nvfs); /* iterate over children */ if (sd->recursive) @@ -874,6 +920,8 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) sd->parent_fromsnap_guid = parent_fromsnap_guid_save; sd->fromsnap_txg = fromsnap_txg_save; sd->tosnap_txg = tosnap_txg_save; + nvlist_free(nv); + nvlist_free(nvfs); zfs_close(zhp); return (rv); @@ -881,7 +929,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) static int gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, - const char *tosnap, boolean_t recursive, boolean_t verbose, + const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t verbose, nvlist_t **nvlp, avl_tree_t **avlp) { zfs_handle_t *zhp; @@ -897,6 +945,7 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, sd.fromsnap = fromsnap; sd.tosnap = tosnap; sd.recursive = recursive; + sd.raw = raw; sd.verbose = verbose; if ((error = send_iterate_fs(zhp, &sd)) != 0) { @@ -928,7 +977,7 @@ typedef struct send_dump_data { uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t verbose, dryrun, parsable, progress, embed_data, std_out; - boolean_t large_block, compress; + boolean_t large_block, compress, raw; int outfd; boolean_t err; nvlist_t *fss; @@ -970,6 +1019,11 @@ estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj, "not an earlier snapshot from the same fs")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case ENOENT: if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { @@ -1231,6 +1285,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) flags |= LZC_SEND_FLAG_EMBED_DATA; if (sdd->compress) flags |= LZC_SEND_FLAG_COMPRESS; + if (sdd->raw) + flags |= LZC_SEND_FLAG_RAW; if (!sdd->doall && !isfromsnap && !istosnap) { if (sdd->replicate) { @@ -1614,6 +1670,8 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; if (flags->compress || nvlist_exists(resume_nvl, "compressok")) lzc_flags |= LZC_SEND_FLAG_COMPRESS; + if (flags->raw || nvlist_exists(resume_nvl, "rawok")) + lzc_flags |= LZC_SEND_FLAG_RAW; if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) { if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) { @@ -1691,6 +1749,11 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, switch (error) { case 0: return (0); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case EXDEV: case ENOENT: case EDQUOT: @@ -1769,7 +1832,14 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } } - if (flags->dedup && !flags->dryrun) { + /* + * Start the dedup thread if this is a dedup stream. We do not bother + * doing this if this a raw send of an encrypted dataset with dedup off + * because normal encrypted blocks won't dedup. + */ + if (flags->dedup && !flags->dryrun && !(flags->raw && + zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF && + zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) { featureflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); if ((err = pipe(pipefd)) != 0) { @@ -1808,10 +1878,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, VERIFY(0 == nvlist_add_boolean(hdrnv, "not_recursive")); } + if (flags->raw) { + VERIFY(0 == nvlist_add_boolean(hdrnv, "raw")); + } err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, - fromsnap, tosnap, flags->replicate, flags->verbose, - &fss, &fsavl); + fromsnap, tosnap, flags->replicate, flags->raw, + flags->verbose, &fss, &fsavl); if (err) goto err_out; VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); @@ -1876,6 +1949,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.large_block = flags->largeblock; sdd.embed_data = flags->embed_data; sdd.compress = flags->compress; + sdd.raw = flags->raw; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; if (debugnvp) @@ -2037,6 +2111,11 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, } return (zfs_error(hdl, EZFS_NOENT, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "target is busy; if a filesystem, " @@ -2423,7 +2502,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, needagain = progress = B_FALSE; if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, - recursive, B_FALSE, &local_nv, &local_avl)) != 0) + recursive, B_TRUE, B_FALSE, &local_nv, &local_avl)) != 0) return (error); /* @@ -3047,6 +3126,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zprop_errflags_t prop_errflags; boolean_t recursive; char *snapname = NULL; + nvlist_t *props = NULL; begin_time = time(NULL); @@ -3059,7 +3139,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (stream_avl != NULL) { nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, &snapname); - nvlist_t *props; int ret; (void) nvlist_lookup_uint64(fs, "parentfromsnap", @@ -3193,6 +3272,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & DMU_BACKUP_FEATURE_RESUMING; + boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_RAW; stream_wantsnewfs = (drrb->drr_fromguid == NULL || (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming; @@ -3328,6 +3409,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zfs_close(zhp); } else { + zfs_handle_t *zhp; + /* * Destination filesystem does not exist. Therefore we better * be creating a new filesystem (either from a full backup, or @@ -3355,6 +3438,35 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); } + /* + * It is invalid to receive a properties stream that was + * unencrypted on the send side as a child of an encrypted + * parent. Technically there is nothing preventing this, but + * it would mean that the encryption=off property which is + * locally set on the send side would not be received correctly. + * We can infer encryption=off if the stream is not raw and + * properties were included since the send side will only ever + * send the encryption property in a raw nvlist header. + */ + if (!raw && props != NULL) { + uint64_t crypt; + + zhp = zfs_open(hdl, zc.zc_name, ZFS_TYPE_DATASET); + if (zhp == NULL) { + return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); + } + + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + zfs_close(zhp); + + if (crypt != ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent '%s' must not be encrypted to " + "receive unenecrypted property"), zc.zc_name); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + } + newfs = B_TRUE; } @@ -3459,7 +3571,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * get a strange "does not exist" error message. */ *cp = '\0'; - if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, + if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, B_TRUE, B_FALSE, &local_nv, &local_avl) == 0) { *cp = '@'; fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); @@ -3495,6 +3607,11 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "since most recent snapshot"), zc.zc_name); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "inherited key must be loaded")); + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); + break; case EEXIST: cp = strchr(zc.zc_value, '@'); if (newfs) { @@ -3681,6 +3798,12 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } + if ((featureflags & DMU_BACKUP_FEATURE_RAW) && originsnap != NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "origin may not be specified for raw sends")); + return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); + } + if (strchr(drrb->drr_toname, '@') == NULL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "stream (bad snapshot name)")); diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c index b8a16a08d454..2d155e447d46 100644 --- a/usr/src/lib/libzfs/common/libzfs_util.c +++ b/usr/src/lib/libzfs/common/libzfs_util.c @@ -230,6 +230,8 @@ libzfs_error_description(libzfs_handle_t *hdl) return (dgettext(TEXT_DOMAIN, "invalid diff data")); case EZFS_POOLREADONLY: return (dgettext(TEXT_DOMAIN, "pool is read-only")); + case EZFS_CRYPTOFAILED: + return (dgettext(TEXT_DOMAIN, "encryption failure")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers index 7fa722a532e9..d628ba3605a9 100644 --- a/usr/src/lib/libzfs/common/mapfile-vers +++ b/usr/src/lib/libzfs/common/mapfile-vers @@ -71,6 +71,11 @@ SYMBOL_VERSION SUNWprivate_1.1 { zfs_close; zfs_create; zfs_create_ancestors; + zfs_crypto_attempt_load_keys; + zfs_crypto_is_encryption_root; + zfs_crypto_load_key; + zfs_crypto_rewrap; + zfs_crypto_unload_key; zfs_dataset_exists; zfs_deleg_share_nfs; zfs_destroy; diff --git a/usr/src/lib/libzfs_core/common/libzfs_core.c b/usr/src/lib/libzfs_core/common/libzfs_core.c index 71e4d5235153..99de19c5b47c 100644 --- a/usr/src/lib/libzfs_core/common/libzfs_core.c +++ b/usr/src/lib/libzfs_core/common/libzfs_core.c @@ -127,7 +127,7 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, { zfs_cmd_t zc = { 0 }; int error = 0; - char *packed; + char *packed = NULL; size_t size; ASSERT3S(g_refcount, >, 0); @@ -135,9 +135,11 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); - packed = fnvlist_pack(source, &size); - zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; - zc.zc_nvlist_src_size = size; + if (source != NULL) { + packed = fnvlist_pack(source, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + } if (resultp != NULL) { *resultp = NULL; @@ -171,34 +173,58 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, } out: - fnvlist_pack_free(packed, size); + if (packed != NULL) + fnvlist_pack_free(packed, size); free((void *)(uintptr_t)zc.zc_nvlist_dst); return (error); } int -lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props) +lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props, + uint8_t *wkeydata, uint_t wkeylen) { int error; + nvlist_t *hidden_args = NULL; nvlist_t *args = fnvlist_alloc(); + fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); if (props != NULL) fnvlist_add_nvlist(args, "props", props); + + if (wkeydata != NULL) { + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, + wkeylen); + fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args); + } + error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); + nvlist_free(hidden_args); nvlist_free(args); return (error); } int -lzc_clone(const char *fsname, const char *origin, - nvlist_t *props) +lzc_clone(const char *fsname, const char *origin, nvlist_t *props, + uint8_t *wkeydata, uint_t wkeylen) { int error; + nvlist_t *hidden_args = NULL; nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, "origin", origin); if (props != NULL) fnvlist_add_nvlist(args, "props", props); + + if (wkeydata != NULL) { + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, + wkeylen); + fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args); + } + error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); + nvlist_free(hidden_args); nvlist_free(args); return (error); } @@ -498,6 +524,8 @@ lzc_send_resume(const char *snapname, const char *from, int fd, fnvlist_add_boolean(args, "embedok"); if (flags & LZC_SEND_FLAG_COMPRESS) fnvlist_add_boolean(args, "compressok"); + if (flags & LZC_SEND_FLAG_RAW) + fnvlist_add_boolean(args, "rawok"); if (resumeobj != 0 || resumeoff != 0) { fnvlist_add_uint64(args, "resume_object", resumeobj); fnvlist_add_uint64(args, "resume_offset", resumeoff); @@ -568,7 +596,7 @@ recv_read(int fd, void *buf, int ilen) static int recv_impl(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, boolean_t resumable, int fd, + boolean_t force, boolean_t resumable, boolean_t raw, int fd, const dmu_replay_record_t *begin_record) { /* @@ -661,9 +689,9 @@ recv_impl(const char *snapname, nvlist_t *props, const char *origin, */ int lzc_receive(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) + boolean_t raw, boolean_t force, int fd) { - return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL)); + return (recv_impl(snapname, props, origin, force, B_FALSE, raw, fd, NULL)); } /* @@ -674,9 +702,9 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, */ int lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) + boolean_t force, boolean_t raw, int fd) { - return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL)); + return (recv_impl(snapname, props, origin, force, B_TRUE, raw, fd, NULL)); } /* @@ -692,12 +720,12 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, */ int lzc_receive_with_header(const char *snapname, nvlist_t *props, - const char *origin, boolean_t force, boolean_t resumable, int fd, - const dmu_replay_record_t *begin_record) + const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, + int fd, const dmu_replay_record_t *begin_record) { if (begin_record == NULL) return (EINVAL); - return (recv_impl(snapname, props, origin, force, resumable, fd, + return (recv_impl(snapname, props, origin, force, resumable, raw, fd, begin_record)); } @@ -820,3 +848,64 @@ lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) return (error); } + +/* + * Performs key management functions + * + * crypto_cmd should be a value from zfs_ioc_crypto_cmd_t. If the command + * specifies to load or change a wrapping key, the key should be specified in + * the hidden_args nvlist so that it is not logged + */ +int +lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata, + uint_t wkeylen) +{ + int error; + nvlist_t *ioc_args; + nvlist_t *hidden_args; + + if (wkeydata == NULL) + return (EINVAL); + + ioc_args = fnvlist_alloc(); + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen); + fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); + if (noop) + fnvlist_add_boolean(ioc_args, "noop"); + error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL); + nvlist_free(hidden_args); + nvlist_free(ioc_args); + + return (error); +} + +int +lzc_unload_key(const char *fsname) +{ + return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL)); +} + +int +lzc_change_key(const char *fsname, nvlist_t *props, uint8_t *wkeydata, + uint_t wkeylen) +{ + int error; + nvlist_t *ioc_args = fnvlist_alloc(); + nvlist_t *hidden_args = NULL; + + if (wkeydata != NULL) { + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, + wkeylen); + fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); + } + + if (props != NULL) + fnvlist_add_nvlist(ioc_args, "props", props); + + error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL); + nvlist_free(hidden_args); + nvlist_free(ioc_args); + return (error); +} diff --git a/usr/src/lib/libzfs_core/common/libzfs_core.h b/usr/src/lib/libzfs_core/common/libzfs_core.h index 094fa257e4c3..80e552e2988c 100644 --- a/usr/src/lib/libzfs_core/common/libzfs_core.h +++ b/usr/src/lib/libzfs_core/common/libzfs_core.h @@ -47,12 +47,16 @@ enum lzc_dataset_type { }; int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); -int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *); -int lzc_clone(const char *, const char *, nvlist_t *); +int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *, + uint_t); +int lzc_clone(const char *, const char *, nvlist_t *, uint8_t *, uint_t); int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); int lzc_bookmark(nvlist_t *, nvlist_t **); int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); +int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t); +int lzc_unload_key(const char *); +int lzc_change_key(const char *, nvlist_t *, uint8_t *, uint_t); int lzc_snaprange_space(const char *, const char *, uint64_t *); @@ -63,7 +67,8 @@ int lzc_get_holds(const char *, nvlist_t **); enum lzc_send_flags { LZC_SEND_FLAG_EMBED_DATA = 1 << 0, LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1, - LZC_SEND_FLAG_COMPRESS = 1 << 2 + LZC_SEND_FLAG_COMPRESS = 1 << 2, + LZC_SEND_FLAG_RAW = 1 << 3, }; int lzc_send(const char *, const char *, int, enum lzc_send_flags); @@ -73,11 +78,12 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *); struct dmu_replay_record; -int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); -int lzc_receive_resumable(const char *, nvlist_t *, const char *, +int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t, int); +int lzc_receive_resumable(const char *, nvlist_t *, const char *, + boolean_t, boolean_t, int); int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, int, const struct dmu_replay_record *); + boolean_t, boolean_t, int, const struct dmu_replay_record *); boolean_t lzc_exists(const char *); diff --git a/usr/src/lib/libzfs_core/common/mapfile-vers b/usr/src/lib/libzfs_core/common/mapfile-vers index b699fcbf4054..20b01c9211c9 100644 --- a/usr/src/lib/libzfs_core/common/mapfile-vers +++ b/usr/src/lib/libzfs_core/common/mapfile-vers @@ -44,6 +44,9 @@ SYMBOL_VERSION ILLUMOS_0.1 { lzc_bookmark; lzc_clone; lzc_create; + lzc_load_key; + lzc_unload_key; + lzc_change_key; lzc_destroy_bookmarks; lzc_destroy_snaps; lzc_exists; diff --git a/usr/src/lib/libzpool/Makefile.com b/usr/src/lib/libzpool/Makefile.com index b016ffaa7054..93a41076eb97 100644 --- a/usr/src/lib/libzpool/Makefile.com +++ b/usr/src/lib/libzpool/Makefile.com @@ -75,6 +75,12 @@ CERRWARN += -_gcc=-Wno-empty-body CERRWARN += -_gcc=-Wno-unused-function CERRWARN += -_gcc=-Wno-unused-label +# lint complains about unused inline functions, even though +# they are "inline", not "static inline", with "extern inline" +# implementations and usage in libzpool. +LINTFLAGS += -erroff=E_STATIC_UNUSED +LINTFLAGS64 += -erroff=E_STATIC_UNUSED + .KEEP_STATE: all: $(LIBS) diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c index 2290164413b4..e39716108682 100644 --- a/usr/src/lib/libzpool/common/kernel.c +++ b/usr/src/lib/libzpool/common/kernel.c @@ -40,6 +40,11 @@ #include #include #include +#include +#include +#include +#include +#include /* * Emulation of kernel services in userland. @@ -1159,3 +1164,87 @@ geterror(struct buf *bp) } return (error); } + +int +crypto_create_ctx_template(crypto_mechanism_t *mech, + crypto_key_t *key, crypto_ctx_template_t *tmpl, int kmflag) +{ + return (NULL); +} + +crypto_mech_type_t +crypto_mech2id(crypto_mech_name_t name) +{ + return (CRYPTO_MECH_INVALID); +} + +int +crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data, + crypto_key_t *key, crypto_ctx_template_t impl, + crypto_data_t *mac, crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, + crypto_key_t *key, crypto_ctx_template_t tmpl, + crypto_data_t *ciphertext, crypto_call_req_t *cr) +{ + return (0); +} + +/* This could probably be a weak reference */ +int +crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, + crypto_key_t *key, crypto_ctx_template_t tmpl, + crypto_data_t *ciphertext, crypto_call_req_t *cr) +{ + return (0); +} + + +int +crypto_digest_final(crypto_context_t context, crypto_data_t *digest, + crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_digest_update(crypto_context_t context, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp, + crypto_call_req_t *crq) +{ + return (0); +} + +void +crypto_destroy_ctx_template(crypto_ctx_template_t tmpl) +{ +} + +extern int crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key, + crypto_ctx_template_t tmpl, crypto_context_t *ctxp, + crypto_call_req_t *cr) +{ + return (0); +} + +extern int crypto_mac_update(crypto_context_t ctx, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} + +extern int crypto_mac_final(crypto_context_t ctx, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} diff --git a/usr/src/man/man1m/zfs.1m b/usr/src/man/man1m/zfs.1m index 22f5b7c0aa98..12d00948ea54 100644 --- a/usr/src/man/man1m/zfs.1m +++ b/usr/src/man/man1m/zfs.1m @@ -147,7 +147,7 @@ .Cm mount .Nm .Cm mount -.Op Fl Ov +.Op Fl Olv .Op Fl o Ar options .Fl a | Ar filesystem .Nm @@ -265,6 +265,22 @@ .Cm diff .Op Fl FHt .Ar snapshot Ar snapshot Ns | Ns Ar filesystem +.Nm +.Cm load-key +.Op Fl rn +.Op Fl L Ar keylocation +.Op Fl a Ns | Ns Ar filesystem +.Nm +.Cm unload-key +.Op Fl r +.Op Fl a Ns | Ns Ar filesystem +.Nm +.Cm change-key +.Op Fl l +.Op Fl o Sy keylocation Ns = Ns Ar value +.Op Fl o Sy keyformat Ns = Ns Ar value +.Op Fl o Sy pbkdf2iters Ns = Ns Ar value +.Ar filesystem .Sh DESCRIPTION The .Nm @@ -539,6 +555,16 @@ The total number of filesystems and volumes that exist under this location in the dataset tree. This value is only available when a .Sy filesystem_limit has been set somewhere in the tree under which the dataset resides. +.It Sy keystatus +Indicates if an encryption key is currently loaded into ZFS. +The possible values are +.Sy none , available , +and +.Sy unavaliable . +See +.Nm Cm load-key +and +.Nm Cm unload-key . .It Sy logicalreferenced The amount of space that is .Qq logically @@ -1004,8 +1030,9 @@ The .Sy zle compression algorithm compresses runs of zeros. .Pp -This property can also be referred to by its shortened column name -\fBcompress\fR. Changing this property affects only newly-written data. +This property can also be referred to by its shortened column name, +.Sy compress . +Changing this property affects only newly-written data. .It Sy copies Ns = Ns Sy 1 Ns | Ns Sy 2 Ns | Ns Sy 3 Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or @@ -1022,6 +1049,79 @@ option. Controls whether device nodes can be opened on this file system. The default value is .Sy on . +.It Xo +.Sy encryption Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy aes-128-ccm Ns | Ns +.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns +.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm +.Xc +Controls the encryption cipher suite +.Pq block cipher, key length, and mode +used for this dataset. +Requires the encryption feature to be enabled on the pool. +Requires a +.Sy keyformat +to be set at dataset creation time. +.Pp +Selecting +.Sy encryption Ns = Ns Sy on +when creating a dataset indicates that the default encryption suite will be +selected, which is currently +.Sy aes-256-ccm . +In order to provide consistent data protection, encryption must be specified at +dataset creation time and it cannot be changed afterwards. +.Pp +For more details and caveats about encryption see the +.Sx Encryption +section. +.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase +Controls what format the user's encryption key will be provided as. +This property is only set when the dataset is encrypted. +.Pp +Raw keys and hex keys must be 32 bytes long +.Pq regardless of the chosen encryption suite +and must be randomly generated. +Passphrases must be between 8 and 64 bytes long and will be processed through +PBKDF2 before being used +.Po see the +.Sy pbkdf2iters +property +.Pc . +Even though the encryption suite cannot be changed after dataset creation, the +keyformat can be with +.Nm Cm change-key . +.It Sy keylocation Ns = Ns Sy prompt Ns | Ns Ar file:// +Controls where the user's encryption key will be loaded from by default for +commands such as +.Nm Cm load-key +and +.Nm Cm mount Fl l . +This property is only set when the dataset is encrypted. +.Pp +If +.Sy keylocation +is set locally, the dataset is an encryption root. +Encryption roots share their keys with all datasets that inherit this property. +This means that when a key is loaded for the encryption root, the keys for all +children that inherit the +.Sy keylocation +property are automatically loaded as well. +.Pp +Even though the encryption suite cannot be changed after dataset creation, the +keylocation can be with +.Nm Cm change-key . +If the dataset is an encryption root this property may also be changed with +.Nm Cm set . +If +.Sy prompt +is selected ZFS will ask for the key at the command prompt when it is required +to access the encrypted data +.Po see +.Nm Cm load-key +.Pc . +This setting will also allow the key to be passed in via STDIN, but users should +be careful not to place keys which should be kept secret on the command line. +If a file URI is selected, the key will be loaded from the specified absolute +file path. .It Sy exec Ns = Ns Sy on Ns | Ns Sy off Controls whether processes can be executed from within this file system. The default value is @@ -1068,6 +1168,24 @@ file system is umounted and remounted. See for more information on .Sy nbmand mounts. +.It Sy pbkdf2iters Ns = Ns Ar iterations +Controls the number of PBKDF2 iterations that a +.Sy passphrase +encryption key should be run through when processing it into an encryption key. +This property is only defined when encryption is enabled and a +.Sy passphrase +key is selected. +The goal of PBKDF2 is to significantly increase the computational difficulty +needed to brute force a user's passphrase. +This is accomplished by forcing the attacker to run each passphrase through a +computationally expensive hashing function many times before they arrive at the +resulting key. +A user who actually knows the passphrase will only have to pay this cost once. +As CPUs become better at processing, this number should be raised to ensure that +a brute force attack is still not possible. +The current default is 350000 and the minimum is 100000. +This property may be changed with +.Nm Cm change-key . .It Sy primarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata Controls what is cached in the primary cache .Pq ARC . @@ -1651,6 +1769,47 @@ installed or upgraded, use the and .Xr dumpadm 1M commands. +.Ss "Encryption" +Enabling the +.Sy encryption +feature allows the user to create natively encrypted filesystems and volumes. +ZFS will encrypt all user data including file and zvol data, file attributes, +ACLs, permission bits, directory listings, FUID mappings, and userused/groupused +data. +ZFS +will not encrypt metadata related to the pool structure, including dataset +names, dataset hierarchy, file size, file holes, and dedup tables. +Key rotation is managed internally by the ZFS kernel module and changing the +user's key does not require re-encrypting the entire dataset. +Datasets can be scrubbed, resilvered, renamed, and deleted without the +encryption keys being loaded +.Po see the +.Sy keylocation +property and +.Nm Cm load-key +subcommand for more info +.Pc . +.Pp +Encryption changes the behavior of a few ZFS operations. +Encryption is applied after compression so compression ratios are preserved. +Normally checksums in ZFS are 256 bits long, but for encrypted data the checksum +is 128 bits of the user-chosen checksum and 128 bits of MAC from the encryption +suite, which provides additional protection against maliciously altered data. +Deduplication is still possible with encryption enabled but for security, +datasets will only dedup against themselves, their snapshots, and their clones. +.Pp +There are a few limitations on encrypted datasets. +Encrypted data cannot be embedded via the +.Sy embedded_data +feature. +Encrypted datasets may not have +.Sy copies Ns = Ns Sy 3 +since the implementation stores some encryption metadata where the third copy +would normally be. +Since compression is applied before encryption datasets may be vulnerable to a +CRIME-like attack if applications accessing the data allow for it. +Deduplication with encryption will leak information about which blocks are +equivalent in a dataset and will incur an extra CPU cost per block written. .Sh SUBCOMMANDS All subcommands that modify state are logged persistently to the pool in their original form. @@ -2357,7 +2516,7 @@ Displays all ZFS file systems currently mounted. .It Xo .Nm .Cm mount -.Op Fl Ov +.Op Fl Olv .Op Fl o Ar options .Fl a | Ar filesystem .Xc @@ -2370,6 +2529,16 @@ for more information. .It Fl a Mount all available ZFS file systems. Invoked automatically as part of the boot process. +.It Fl l +Load keys for encrypted filesystems as they are being mounted. +This is equivalent to executing +.Nm Cm load-key +on each encryption root before mounting it. +Note that if a filesystem has a +.Sy keylocation +of +.Sy prompt +this will cause the terminal to interactively block after asking for the key. .It Ar filesystem Mount the specified filesystem. .It Fl o Ar options @@ -2910,6 +3079,10 @@ destroy subcommand Must also have the 'mount' ability diff subcommand Allows lookup of paths within a dataset given an object number, and the ability to create snapshots necessary to 'zfs diff'. +load-key subcommand Allows loading and unloading of encryption key + (see 'zfs load-key' and 'zfs unload-key'). +change-key subcommand Allows changing an encryption key via + 'zfs change-key'. mount subcommand Allows mount/umount of ZFS datasets promote subcommand Must also have the 'mount' and 'promote' ability in the origin file system @@ -3146,6 +3319,109 @@ arrows. .It Fl t Display the path's inode change time as the first column of output. .El +.It Xo +.Nm +.Cm load-key +.Op Fl nr +.Op Fl L Ar keylocation +.Fl a Ns | Ns filesystem +.Xc +Use +.Ar keylocation +instead of the +.Sy keylocation +property. +This will not change the value of the property on the dataset. +Note that if used with either +.Fl r +or +.Fl a +.Ar keylocation +may only be given as +.Sy prompt . +.Bl -tag -width Ds +.It Fl a +Loads the keys for all encryption roots in all imported pools. +.It Fl n +Do a dry-run +.Cm load-key . +This will cause zfs to simply check that the provided key is correct. +This command may be run even if the key is already loaded. +.It Fl r +Recursively loads the keys for the specified filesystem and all descendent +encryption roots. +.El +.It Xo +.Nm +.Cm unload-key +.Op Fl r +.Fl a Ns | Ns Ar filesystem +.Xc +Unloads a key from ZFS, removing the ability to access the dataset and all of +its children that inherit the +.Sy encryption +property. +This requires that the dataset is not currently open or mounted. +When a key is unloaded the +.Sy keystatus +property will be set to +.Sy unavailable . +.Bl -tag -width Ds +.It Fl a +Unloads the keys for all encryption roots in all imported pools. +.It Fl r +Recursively unloads the keys for the specified filesystem and all descendent +encryption roots. +.El +.It Xo +.Nm +.Cm change-key +.Op Fl il +.Op Fl o Sy keylocation Ns = Ns Ar value +.Op Fl o Sy keyformat Ns = Ns Ar value +.Op Fl o Sy pbkdf2iters Ns = Ns Ar value +.Ar filesystem +.Xc +Allows a user to change the encryption key used to access a dataset. +This command requires that the existing key for the dataset is already loaded +into ZFS. +This command may also be used to change the +.Sy keylocation , keyformat , +and +.Sy pbkdf2iters +properties as needed. +If the dataset was previously inheriting the +.Sy keylocation +property when this command is run it will now be locally set, indicating that +this dataset must have its key loaded separately from the parent. +Alternatively, the +.Fl i +flag may be provided to cause an encryption root to inherit the parent's key +instead. +.Bl -tag -width Ds +.It Fl i +Indicates that ZFS should make +.Ar filesystem +inherit the key of its parent. +Note that this command can only be run on an encryption root that has an +encrypted parent. +.It Fl l +Ensures the key is loaded before attempting to change the key. +This is effectively equivalent to +.Qq Nm Cm load-key Ar filesystem ; Nm Cm change-key Ar filesystem . +.It Fl o Sy property Ns = Ns Ar value +Allows the user to set encryption key properties +.Pq +.Sy keyformat , keylocation , +and +.Sy pbkdf2iters +while changing the key. +This is the only way to alter +.Sy keyformat +and +.Sy pbkdf2iters +after the dataset has been created. +.El .El .Sh EXIT STATUS The diff --git a/usr/src/man/man1m/zpool.1m b/usr/src/man/man1m/zpool.1m index 04cac897e9b2..5fe6b8efd002 100644 --- a/usr/src/man/man1m/zpool.1m +++ b/usr/src/man/man1m/zpool.1m @@ -80,7 +80,7 @@ .Nm .Cm import .Fl a -.Op Fl DfmN +.Op Fl DflmN .Op Fl F Op Fl n .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl o Ar mntopts @@ -144,7 +144,7 @@ .Ar pool .Nm .Cm split -.Op Fl n +.Op Fl ln .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Op Fl R Ar root .Ar pool newpool @@ -961,7 +961,7 @@ Lists destroyed pools only. .Nm .Cm import .Fl a -.Op Fl DfmN +.Op Fl DflmN .Op Fl F Op Fl n .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl o Ar mntopts @@ -1006,6 +1006,16 @@ importable state by discarding the last few transactions. Not all damaged pools can be recovered by using this option. If successful, the data from the discarded transactions is irretrievably lost. This option is ignored if the pool is importable or already imported. +.It Fl l +Indicates that this command will request encryption keys for all encrypted +datasets it attempts to mount as it is bringing the pool online. +Note that if any datasets have a +.Sy keylocation +of +.Sy prompt +this command will block waiting for the keys to be entered. +Without this flag encrypted datasets will be left unavailable until the keys are +loaded. .It Fl m Allows a pool to import when there is a missing log device. Recent transactions can be lost because the log device will be discarded. @@ -1093,6 +1103,17 @@ importable state by discarding the last few transactions. Not all damaged pools can be recovered by using this option. If successful, the data from the discarded transactions is irretrievably lost. This option is ignored if the pool is importable or already imported. +.It Fl l +Indicates that the zpool command will request encryption keys for all +encrypted datasets it attempts to mount as it is bringing the pool +online. This is equivalent to running +.Nm Cm mount +on each encrypted dataset immediately after the pool is imported. +If any datasets have a +.Sy prompt +keysource this command will block waiting for the key to be entered. +Otherwise, encrypted datasets will be left unavailable until the keys are +loaded. .It Fl m Allows a pool to import when there is a missing log device. Recent transactions can be lost because the log device will be discarded. @@ -1353,7 +1374,7 @@ values. .It Xo .Nm .Cm split -.Op Fl n +.Op Fl ln .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Op Fl R Ar root .Ar pool newpool @@ -1369,6 +1390,16 @@ must be mirrors. At the time of the split, will be a replica of .Ar pool . .Bl -tag -width Ds +.It Fl l +Indicates that this command will request encryption keys for all encrypted +datasets it attempts to mount as it is bringing the new pool online. +Note that if any datasets have a +.Sy keylocation +of +.Sy prompt +this command will block waiting for the keys to be entered. +Without this flag encrypted datasets will be left unavailable until the keys are +loaded. .It Fl n Do dry run, do not actually perform the split. Print out the expected configuration of diff --git a/usr/src/man/man5/zpool-features.5 b/usr/src/man/man5/zpool-features.5 index c2196ecc4e6e..ec7656538cf2 100644 --- a/usr/src/man/man5/zpool-features.5 +++ b/usr/src/man/man5/zpool-features.5 @@ -554,5 +554,27 @@ Booting off of pools using \fBedonr\fR is \fBNOT\fR supported -- any attempt to enable \fBedonr\fR on a root pool will fail with an error. +.RE +.sp +.ne 2 +.na +\fB\fBencryption\fR\fR +.ad +.RS 4n +.TS +l l . +GUID com.datto:encryption +READ\-ONLY COMPATIBLE no +DEPENDENCIES extensible_dataset +.TE + +This feature enables the creation and management of natively encrypted datasets. + +This feature becomes \fBactive\fR when an encrypted dataset is created +and will be returned to the \fBenabled\fR state when all datasets that +use this feature are destroyed. + +.RE + .SH "SEE ALSO" \fBzpool\fR(1M) diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index 1010ad94ed06..b7af4c611d0d 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -42,12 +42,14 @@ dir path=opt/zfs-tests/tests/functional/clean_mirror dir path=opt/zfs-tests/tests/functional/cli_root dir path=opt/zfs-tests/tests/functional/cli_root/zdb dir path=opt/zfs-tests/tests/functional/cli_root/zfs +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_clone dir path=opt/zfs-tests/tests/functional/cli_root/zfs_copies dir path=opt/zfs-tests/tests/functional/cli_root/zfs_create dir path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy dir path=opt/zfs-tests/tests/functional/cli_root/zfs_get dir path=opt/zfs-tests/tests/functional/cli_root/zfs_inherit +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_mount dir path=opt/zfs-tests/tests/functional/cli_root/zfs_promote dir path=opt/zfs-tests/tests/functional/cli_root/zfs_property @@ -59,6 +61,7 @@ dir path=opt/zfs-tests/tests/functional/cli_root/zfs_send dir path=opt/zfs-tests/tests/functional/cli_root/zfs_set dir path=opt/zfs-tests/tests/functional/cli_root/zfs_share dir path=opt/zfs-tests/tests/functional/cli_root/zfs_snapshot +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unshare dir path=opt/zfs-tests/tests/functional/cli_root/zfs_upgrade @@ -419,6 +422,31 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_001_neg mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_003_neg mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/setup \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_001_neg \ @@ -441,6 +469,9 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_009_neg \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.cfg \ @@ -510,9 +541,18 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_013_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos \ + mode=0555 file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib \ mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy/setup mode=0555 @@ -609,6 +649,32 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/setup mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg \ + mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib \ + mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.cfg \ @@ -642,6 +708,9 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_neg \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_001_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/setup mode=0555 @@ -719,6 +788,12 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_neg \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_pos \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.cfg \ @@ -764,6 +839,12 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_013_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_reservation/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_reservation/setup \ @@ -813,6 +894,12 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_neg \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_pos \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_001_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg \ @@ -875,6 +962,8 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib \ mode=0444 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share.cfg \ @@ -934,6 +1023,19 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount/setup mode=0555 @@ -1144,6 +1246,15 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_023_neg \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted \ + mode=0555 file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_001_pos \ mode=0555 @@ -1277,6 +1388,12 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_all_001_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load \ + mode=0555 file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_features_001_pos \ mode=0555 diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index a2f1b2c4b2ad..227508f66bc3 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -234,6 +234,7 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_015_neg', 'zpool_create_016_pos', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', + 'zpool_create_024_pos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg'] diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index 3982dd6ba80e..6a41ba107e08 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -114,7 +114,7 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', - 'zfs_create_013_pos'] + 'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_destroy] tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', @@ -132,11 +132,16 @@ tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_key] +tests = ['zfs_key_unload_pos', 'zfs_key_unload_neg', 'zfs_key_load_pos', + 'zfs_key_load_neg', 'zfs_key_change_pos', 'zfs_key_change_neg'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', - 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_all_001_pos'] + 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_all_001_pos', + 'zfs_mount_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', @@ -150,7 +155,8 @@ tests = ['zfs_written_property_001_pos'] tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', - 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos'] + 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', + 'zfs_receive_encrypted_pos', 'zfs_receive_encrypted_neg'] [/opt/zfs-tests/tests/functional/cli_root/zfs_rename] tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', @@ -168,7 +174,8 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', - 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos'] + 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', + 'zfs_send_encrypted_pos', 'zfs_send_encrypted_neg'] [/opt/zfs-tests/tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', @@ -231,7 +238,8 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_015_neg', 'zpool_create_016_pos', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', - 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', + 'zpool_create_encrypted', + 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg'] [/opt/zfs-tests/tests/functional/cli_root/zpool_destroy] diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile new file mode 100644 index 000000000000..5be730f3fac4 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_change-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh new file mode 100755 index 000000000000..79cd6e9f908e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh new file mode 100755 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh new file mode 100755 index 000000000000..9a6d036f3df8 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key' should change the key material. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to change the key +# 3. Unmount the dataset and unload its key +# 4. Attempt to load the old key +# 5. Verify the key is not loaded +# 6. Attempt to load the new key +# 7. Verify the key is loaded +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key' should change the key material" + +log_must eval "$ECHO $PASSPHRASE1 | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must eval "$ECHO $PASSPHRASE2 | $ZFS change-key $TESTPOOL/$TESTFS1" + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_mustnot eval "$ECHO $PASSPHRASE1 | $ZFS load-key $TESTPOOL/$TESTFS1" +log_must key_unavailable $TESTPOOL/$TESTFS1 + +log_must eval "$ECHO $PASSPHRASE2 | $ZFS load-key $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key' changes the key material" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh new file mode 100755 index 000000000000..54621efd14f5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh @@ -0,0 +1,84 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the key format. +# +# STRATEGY: +# 1. Create an encryption dataset with a passphrase key format +# 2. Unmount the dataset +# 3. Verify the key format is passphrase +# 4. Change the key format to hex +# 5. Verify the key format is hex +# 6. Attempt to reload the dataset's key +# 7. Change the key format to raw +# 8. Verify the key format is raw +# 9. Attempt to reload the dataset's key +# + +verify_runnable "both" + +function verify_keyformat +{ + typeset ds=$1 + typeset format=$2 + typeset fmt=$(get_prop keyformat $ds) + + if [[ "$fmt" != "$format" ]]; then + log_fail "Expected keyformat $format, got $fmt" + fi + + return 0 +} + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the key format" + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must $ZFS unmount $TESTPOOL/$TESTFS1 + +log_must verify_keyformat $TESTPOOL/$TESTFS1 "passphrase" + +log_must eval "$ECHO $HEXKEY | $ZFS change-key -o keyformat=hex" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keyformat $TESTPOOL/$TESTFS1 "hex" + +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 +log_must eval "$ECHO $HEXKEY | $ZFS load-key $TESTPOOL/$TESTFS1" + +log_must eval "$ECHO $RAWKEY | $ZFS change-key -o keyformat=raw" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keyformat $TESTPOOL/$TESTFS1 "raw" + +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 +log_must eval "$ECHO $RAWKEY | $ZFS load-key $TESTPOOL/$TESTFS1" + +log_pass "'zfs change-key -o' changes the key format" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh new file mode 100755 index 000000000000..cb21e2778ada --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh @@ -0,0 +1,91 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -i' should cause a dataset to inherit its parent key +# +# STRATEGY: +# 1. Create a parent encrypted dataset +# 2. Create a child dataset as an encryption root +# 3. Attempt to inherit the parent key +# 4. Verify the key is inherited +# 5. Unmount the parent and unload its key +# 6. Verify the key is unavailable for parent and child +# 7. Load the parent key +# 8. Verify the key is available for parent and child +# 9. Attempt to mount the datasets +# + +verify_runnable "both" + +function verify_encryption_source +{ + typeset ds=$1 + typeset src=$2 + typeset cryptsrc=$($ZFS get -H -o source keylocation $ds) + + if [[ "$cryptsrc" != "$src" ]]; then + log_fail "Expected encryption source '$src', got '$cryptsrc'" + fi + + return 0 +} + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -i' should cause a dataset to inherit its" \ + "parent key" + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must eval "$ECHO $PASSPHRASE1 | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt" \ + "$TESTPOOL/$TESTFS1/child" + +log_must verify_encryption_source $TESTPOOL/$TESTFS1/child "local" + +log_must $ZFS change-key -i $TESTPOOL/$TESTFS1/child +log_must verify_encryption_source $TESTPOOL/$TESTFS1/child \ + "inherited from $TESTPOOL/$TESTFS1" + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL/$TESTFS1" + +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child + +log_must $ZFS mount $TESTPOOL/$TESTFS1 +log_must $ZFS mount $TESTPOOL/$TESTFS1/child + +log_pass "'zfs change-key -i' causes a dataset to inherit its parent key" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh new file mode 100755 index 000000000000..d3edc91d45d7 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -l' should load a dataset's key to change it. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Unload dataset and unload its key +# 3. Attempt to change the key +# 4. Verify the dataset key is loaded +# 3. Attempt to change the key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -l' should load a dataset's key to change it" + +log_must eval "$ECHO $PASSPHRASE > /$TESTPOOL/pkey" +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_must $ZFS change-key -l $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 + +log_must $ZFS change-key -l $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -l' loads a dataset's key to change it" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh new file mode 100755 index 000000000000..91ed1d7f0ec6 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh @@ -0,0 +1,65 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the keylocation. +# +# STRATEGY: +# 1. Create an encryption dataset with a file key location +# 2. Change the key location to 'prompt' +# 3. Verify the key location +# 4. Unmount the dataset and unload its key +# 5. Attempt to load the dataset's key +# 6. Attempt to change the key location to 'none' +# 7. Attempt to change the key location to an invalid value +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the keylocation" + +log_must eval "$ECHO $PASSPHRASE > /$TESTPOOL/pkey" +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey" + +log_must eval "$ECHO $PASSPHRASE1 | $ZFS change-key -o keylocation=prompt" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt" + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 +log_must eval "$ECHO $PASSPHRASE1 | $ZFS load-key $TESTPOOL/$TESTFS1" + +log_mustnot $ZFS change-key -o keylocation=none $TESTPOOL/$TESTFS1 +log_mustnot $ZFS change-key -o keylocation=foobar $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -o' changes the keylocation" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh new file mode 100755 index 000000000000..dcc123375c64 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the pbkdf2 iterations. +# +# STRATEGY: +# 1. Create an encryption dataset with a file key location +# 2. Unmount the dataset +# 3. Change the key location to prompt +# 4. Verify the key location +# 5. Unload the dataset's key +# 6. Attempt to load the dataset's key +# + +verify_runnable "both" + +function verify_pbkdf2iters +{ + typeset ds=$1 + typeset iterations=$2 + typeset iters=$(get_prop pbkdf2iters $ds) + + if [[ "$iters" != "$iterations" ]]; then + log_fail "Expected $iterations iterations, got $iters" + fi + + return 0 +} + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the pbkdf2 iterations" + +log_must eval "$ECHO $PASSPHRASE > /$TESTPOOL/pkey" +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey -o pbkdf2iters=200000 \ + $TESTPOOL/$TESTFS1 + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must verify_pbkdf2iters $TESTPOOL/$TESTFS1 "200000" + +log_must $ZFS change-key -o pbkdf2iters=150000 $TESTPOOL/$TESTFS1 +log_must verify_pbkdf2iters $TESTPOOL/$TESTFS1 "150000" + +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 +log_must $ZFS load-key $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -o' changes the pbkdf2 iterations" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh new file mode 100755 index 000000000000..d8607a3dc1ff --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh @@ -0,0 +1,80 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs clone' should create encrypted clones of encrypted datasets +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create a snapshot of the dataset +# 3. Attempt to clone the snapshot as an unencrypted dataset +# 4. Attempt to clone the snapshot as an encryption root without a new key +# 5. Attempt to clone the snapshot as an encryption root with a new key +# 6. Attempt to clone the snapshot as a encrypted child dataset +# 7. Unmount all datasets and unload their keys +# 8. Attempt to load each dataset's key +# 9. Verify each dataset's key is loaded +# 10. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs clone' should create encrypted clones of encrypted datasets" + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must $ZFS snapshot $TESTPOOL/$TESTFS1@now + +log_mustnot $ZFS clone -o encryption=off $TESTPOOL/$TESTFS1@now \ + $TESTPOOL/$TESTFS2 +log_mustnot $ZFS clone $TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS2 +log_must eval "$ECHO $PASSPHRASE1 | $ZFS clone -o keyformat=passphrase" \ + "$TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS2" +log_must $ZFS clone $TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS1/child + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unmount $TESTPOOL/$TESTFS2 +log_must $ZFS unload-key -a + +log_must eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL/$TESTFS1" +log_must eval "$ECHO $PASSPHRASE1 | $ZFS load-key $TESTPOOL/$TESTFS2" + +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child +log_must key_available $TESTPOOL/$TESTFS2 + +log_must $ZFS mount $TESTPOOL/$TESTFS1 +log_must $ZFS mount $TESTPOOL/$TESTFS1/child +log_must $ZFS mount $TESTPOOL/$TESTFS2 + +log_pass "'zfs clone' creates encrypted clones of encrypted datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh new file mode 100644 index 000000000000..ec5a02147151 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh @@ -0,0 +1,121 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2016, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib + +# +# DESCRIPTION: +# 'zfs create' should be able to create an encrypted dataset with +# a valid encryption algorithm, keysource, and key. +# +# STRATEGY: +# 1. Create a filesystem for each encryption type +# 2. Create a filesystem for each keysource type +# 3. Verify that each filesystem has the correct properties set +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -f $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +set -A ENCRYPTION_ALGS "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYSOURCE_TYPES "keysource=raw,prompt" \ + "keysource=hex,prompt" \ + "keysource=passphrase,prompt" + +set -A KEYSOURCES "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zfs create' should properly create encrypted datasets" + +typeset -i i=0 +while (( $i < ${#ENCRYPTION_ALGS[*]} )); do + log_must eval 'echo ${KEYSOURCES[0]} | \ + $ZFS create -o ${ENCRYPTION_ALGS[$i]} -o ${KEYSOURCE_TYPES[0]} \ + $TESTPOOL/$TESTFS1' + + datasetexists $TESTPOOL/$TESTFS1 || \ + log_fail "zfs create -o ${ENCRYPTION_ALGS[$i]} \ + -o ${KEYSOURCE_TYPES[0]} $TESTPOOL/$TESTFS1 fail." + + propertycheck $TESTPOOL/$TESTFS1 ${ENCRYPTION_PROPS[i]} || \ + log_fail "${ENCRYPTION_ALGS[i]} is failed to set." + propertycheck $TESTPOOL/$TESTFS1 ${KEYSOURCE_TYPES[0]} || \ + log_fail "${KEYSOURCE_TYPES[0]} is failed to set." + + log_must $ZFS destroy -f $TESTPOOL/$TESTFS1 + (( i = i + 1 )) +done + +typeset -i j=0 +while (( $j < ${#KEYSOURCE_TYPES[*]} )); do + log_must eval 'echo ${KEYSOURCES[$j]} | \ + $ZFS create -o ${ENCRYPTION_ALGS[0]} -o ${KEYSOURCE_TYPES[$j]} \ + $TESTPOOL/$TESTFS1' + + datasetexists $TESTPOOL/$TESTFS1 || \ + log_fail "zfs create -o ${ENCRYPTION_ALGS[0]} \ + -o ${KEYSOURCE_TYPES[$j]} $TESTPOOL/$TESTFS1 fail." + + propertycheck $TESTPOOL/$TESTFS1 ${ENCRYPTION_PROPS[0]} || \ + log_fail "${ENCRYPTION_ALGS[0]} is failed to set." + propertycheck $TESTPOOL/$TESTFS1 ${KEYSOURCE_TYPES[j]} || \ + log_fail "${KEYSOURCE_TYPES[j]} is failed to set." + + log_must $ZFS destroy -f $TESTPOOL/$TESTFS1 + (( j = j + 1 )) +done + +log_pass "'zfs create properly creates encrypted datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh new file mode 100755 index 000000000000..fc6040711408 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib + +# +# DESCRIPTION: +# 'zfs create' should create an encrypted dataset with a valid encryption +# algorithm, key format, key location, and key. +# +# STRATEGY: +# 1. Create a filesystem for each combination of encryption type and key format +# 2. Verify that each filesystem has the correct properties set +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -f $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +set -A ENCRYPTION_ALGS \ + "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYFORMATS "keyformat=raw" \ + "keyformat=hex" \ + "keyformat=passphrase" + +set -A USER_KEYS "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zfs create' should create encrypted datasets using all" \ + "combinations of supported properties" + +typeset -i i=0 +while (( i < ${#ENCRYPTION_ALGS[*]} )); do + typeset -i j=0 + while (( j < ${#KEYFORMATS[*]} )); do + log_must eval "$ECHO ${USER_KEYS[j]} | $ZFS create" \ + "-o ${ENCRYPTION_ALGS[i]} -o ${KEYFORMATS[j]}" \ + "$TESTPOOL/$TESTFS1" + + datasetexists $TESTPOOL/$TESTFS1 || \ + log_fail "Failed to create dataset using" \ + "${ENCRYPTION_ALGS[i]} and ${KEYFORMATS[j]}" + + propertycheck $TESTPOOL/$TESTFS1 ${ENCRYPTION_PROPS[i]} || \ + log_fail "failed to set ${ENCRYPTION_ALGS[i]}" + propertycheck $TESTPOOL/$TESTFS1 ${KEYFORMATS[j]} || \ + log_fail "failed to set ${KEYFORMATS[j]}" + + log_must $ZFS destroy -f $TESTPOOL/$TESTFS1 + (( j = j + 1 )) + done + (( i = i + 1 )) +done + +log_pass "'zfs create' creates encrypted datasets using all combinations of" \ + "supported properties" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh new file mode 100755 index 000000000000..99d7b5b31701 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh @@ -0,0 +1,134 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# ZFS should create datasets only if they have a valid combination of +# encryption properties set. +# +# penc = parent encrypted +# enc = encryption +# loc = keylocation provided +# fmt = keyformat provided +# +# penc enc fmt loc valid notes +# ------------------------------------------- +# no unspec 0 0 yes inherit no encryption (not tested here) +# no unspec 0 1 no no crypt specified +# no unspec 1 0 no no crypt specified +# no unspec 1 1 no no crypt specified +# no off 0 0 yes explicit no encryption +# no off 0 1 no keylocation given, but crypt off +# no off 1 0 no keyformat given, but crypt off +# no off 1 1 no keyformat given, but crypt off +# no on 0 0 no no keyformat specified for new key +# no on 0 1 no no keyformat specified for new key +# no on 1 0 yes new encryption root +# no on 1 1 yes new encryption root +# yes unspec 0 0 yes inherit encryption +# yes unspec 0 1 no no keyformat specified +# yes unspec 1 0 yes new encryption root, crypt inherited +# yes unspec 1 1 yes new encryption root, crypt inherited +# yes off 0 0 no unencrypted child of encrypted parent +# yes off 0 1 no unencrypted child of encrypted parent +# yes off 1 0 no unencrypted child of encrypted parent +# yes off 1 1 no unencrypted child of encrypted parent +# yes on 0 0 yes inherited encryption, local crypt +# yes on 0 1 no no keyformat specified for new key +# yes on 1 0 yes new encryption root +# yes on 1 1 yes new encryption root +# +# STRATEGY: +# 1. Attempt to create a dataset using all combinations of encryption +# properties +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS2 +} +log_onexit cleanup + +log_assert "ZFS should create datasets only if they have a valid" \ + "combination of encryption properties set." + +# Unencrypted parent +log_must $ZFS create $TESTPOOL/$TESTFS1 +log_mustnot $ZFS create -o keyformat=passphrase $TESTPOOL/$TESTFS1/c1 +log_mustnot $ZFS create -o keylocation=prompt $TESTPOOL/$TESTFS1/c1 +log_mustnot $ZFS create -o keyformat=passphrase -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c1 + +log_must $ZFS create -o encryption=off $TESTPOOL/$TESTFS1/c1 +log_mustnot $ZFS create -o encryption=off -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c2 +log_mustnot $ZFS create -o encryption=off -o keyformat=passphrase \ + $TESTPOOL/$TESTFS1/c2 +log_mustnot $ZFS create -o encryption=off -o keyformat=passphrase \ + -o keylocation=prompt $TESTPOOL/$TESTFS1/c2 + +log_mustnot $ZFS create -o encryption=on $TESTPOOL/$TESTFS1/c2 +log_mustnot $ZFS create -o encryption=on -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c2 +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1/c3" +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1/c4" + +# Encrypted parent +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2" + +log_must $ZFS create $TESTPOOL/$TESTFS2/c1 +log_mustnot $ZFS create -o keylocation=prompt $TESTPOOL/$TESTFS2/c2 +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o keyformat=passphrase" \ + "$TESTPOOL/$TESTFS2/c3" +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o keyformat=passphrase" \ + "-o keylocation=prompt $TESTPOOL/$TESTFS2/c4" + +log_mustnot $ZFS create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_mustnot $ZFS create -o encryption=off -o keylocation=prompt \ + $TESTPOOL/$TESTFS2/c5 +log_mustnot $ZFS create -o encryption=off -o keyformat=passphrase \ + $TESTPOOL/$TESTFS2/c5 +log_mustnot $ZFS create -o encryption=off -o keyformat=passphrase \ + -o keylocation=prompt $TESTPOOL/$TESTFS2/c5 + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "$TESTPOOL/$TESTFS2/c5" +log_mustnot $ZFS create -o encryption=on -o keylocation=prompt \ + $TESTPOOL/$TESTFS2/c6 +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c6" +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c7" + +log_pass "ZFS creates datasets only if they have a valid combination of" \ + "encryption properties set." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile new file mode 100644 index 000000000000..0751428a0d32 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_load-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh new file mode 100755 index 000000000000..79cd6e9f908e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh new file mode 100755 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg new file mode 100644 index 000000000000..90d9f63f1dba --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg @@ -0,0 +1,26 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +export PASSPHRASE="password" +export PASSPHRASE1="password1" +export PASSPHRASE2="password2" +export HEXKEY="000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F" +export HEXKEY1="201F1E1D1C1B1A191817161514131211100F0E0D0C0B0A090807060504030201" +export RAWKEY="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +export RAWKEY1="bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh new file mode 100755 index 000000000000..cee33bf0bced --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh @@ -0,0 +1,85 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key' should only load a key for an unloaded encrypted dataset. +# +# STRATEGY: +# 1. Attempt to load the default dataset's key +# 2. Unmount the dataset +# 3. Attempt to load the default dataset's key +# 4. Create an encrypted dataset +# 5. Unmount the dataset and unload its key +# 6. Attempt to load the dataset's key +# 7. Verify the dataset's key is loaded +# 8. Attempt to load the dataset's key again +# 9. Create an encrypted pool +# 10. Unmount the pool and unload its key +# 11. Attempt to load the pool's key +# 12. Verify the pool's key is loaded +# 13. Attempt to load the pool's key again +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs load-key' should only load the key for an" \ + "unloaded encrypted dataset" + +log_mustnot eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL/$TESTFS" + +log_must $ZFS unmount $TESTPOOL/$TESTFS +log_mustnot eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL/$TESTFS" + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_must eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_mustnot eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL/$TESTFS1" + +typeset DISK2="$($ECHO $DISKS | $AWK '{ print $2 }')" +log_must eval "$ECHO $PASSPHRASE | $ZPOOL create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $DISK2" + +log_must $ZFS unmount $TESTPOOL1 +log_must $ZFS unload-key $TESTPOOL1 + +log_must eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL1" +log_must key_available $TESTPOOL1 + +log_mustnot eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL1" + +log_pass "'zfs load-key' only loads the key for an unloaded encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh new file mode 100755 index 000000000000..409623702d41 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh @@ -0,0 +1,77 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -a' should load keys for all datasets. +# +# STRATEGY: +# 1. Create an encrypted filesystem, encrypted zvol, and an encrypted pool +# 2. Unmount all datasets and unload their keys +# 3. Attempt to load all dataset keys +# 4. Verify each dataset has its key loaded +# 5. Attempt to mount the pool and filesystem +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/zvol && log_must $ZFS destroy $TESTPOOL/zvol + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs load-key -a' should load keys for all datasets" + +log_must eval "$ECHO $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must $ZFS create -V 64M -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol + +typeset DISK2="$($ECHO $DISKS | $AWK '{ print $2}')" +log_must $ZPOOL create -O encryption=on -O keyformat=passphrase \ + -O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2 + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_must $ZFS unload-key $TESTPOOL/zvol + +log_must $ZFS unmount $TESTPOOL1 +log_must $ZFS unload-key $TESTPOOL1 + +log_must $ZFS load-key -a + +log_must key_available $TESTPOOL1 +log_must key_available $TESTPOOL/zvol +log_must key_available $TESTPOOL/$TESTFS1 + +log_must $ZFS mount $TESTPOOL1 +log_must $ZFS mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs load-key -a' loads keys for all datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib new file mode 100644 index 000000000000..5d258634442f --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib @@ -0,0 +1,61 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg + +# Return 0 is a dataset key is available, 1 otherwise +# +# $1 - dataset +# +function key_available +{ + typeset ds=$1 + + datasetexists $ds || return 1 + + typeset val=$(get_prop keystatus $ds) + if [[ "$val" == "none" ]]; then + log_note "Dataset $ds is not encrypted" + elif [[ "$val" == "available" ]]; then + return 0 + fi + + return 1 +} + +function key_unavailable +{ + key_available $1 && return 1 + return 0 +} + +function verify_keylocation +{ + typeset ds=$1 + typeset location=$2 + typeset keyloc=$(get_prop keylocation $ds) + + if [[ "$keyloc" != "$location" ]]; then + log_fail "Expected keylocation $location, got $keyloc" + fi + + return 0 +} diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh new file mode 100755 index 000000000000..be72f4f71471 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key' should load a dataset's key from a file. +# +# STRATEGY: +# 1. Create an encrypted dataset with a key file +# 2. Unmount the dataset and unload the key +# 3. Attempt to load the dataset's key +# 4. Verify the key is loaded +# 5. Attempt to mount the dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key' should load a key from a file" + +log_must eval "$ECHO $PASSPHRASE > /$TESTPOOL/pkey" +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_must $ZFS load-key $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 +log_must $ZFS mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs load-key' loads a key from a file" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh new file mode 100755 index 000000000000..c2d33d739ddd --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh @@ -0,0 +1,73 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -L' should override keylocation with provided value. +# +# STRATEGY: +# 1. Create a key file +# 2. Copy the key file to another location +# 3. Create an encrypted dataset using the keyfile +# 4. Unmount the dataset and unload its key +# 5. Attempt to load the dataset specifying a keylocation of file +# 6. Verify the key is loaded +# 7. Verify the keylocation is the original key file +# 8. Unload the dataset's key +# 9. Attempt to load the dataset specifying a keylocation of prompt +# 10. Verify the key is loaded +# 11. Verify the keylocation is the original key file +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -L' should override keylocation with provided value" + +typeset key_location="/$TESTPOOL/pkey1" + +log_must eval "$ECHO $PASSPHRASE > $key_location" +log_must $CP $key_location /$TESTPOOL/pkey2 + +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$key_location $TESTPOOL/$TESTFS1 + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_must $ZFS load-key -L file:///$TESTPOOL/pkey2 $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location" + +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 +log_must eval "$ECHO $PASSPHRASE | $ZFS load-key -L prompt $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location" + +log_pass "'zfs load-key -L' overrides keylocation with provided value" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh new file mode 100755 index 000000000000..a6caa1f09d58 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh @@ -0,0 +1,54 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -n' should load the key for an already loaded dataset. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to load the dataset's key +# 3. Verify the key is loaded +# 4. Attempt to load the dataset's key with an invalid key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -n' should load the key for a loaded dataset" + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" + +log_must eval "$ECHO $PASSPHRASE | $ZFS load-key -n $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_mustnot eval "$ECHO $PASSPHRASE1 | $ZFS load-key -n $TESTPOOL/$TESTFS1" + +log_pass "'zfs load-key -n' loads the key for a loaded dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh new file mode 100755 index 000000000000..d70914f5ccb9 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh @@ -0,0 +1,66 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -r' should recursively load keys. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create a child dataset as an encryption root +# 3. Unmount all datasets and unload their keys +# 4. Attempt to load all dataset keys +# 5. Verify each dataset has its key loaded +# 6. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -r' should recursively load keys" + +log_must eval "$ECHO $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must $ZFS create -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1/child +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_must $ZFS load-key -r $TESTPOOL +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child + +log_must $ZFS mount $TESTPOOL/$TESTFS1 +log_must $ZFS mount $TESTPOOL/$TESTFS1/child + +log_pass "'zfs load-key -r' recursively loads keys" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh new file mode 100644 index 000000000000..57d14c62a8ee --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zfs mount -l' should accept a valid key as it mounts the filesystem. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Unmount and unload the dataset's key +# 3. Attempt to mount the dataset +# 4. Verify the key is loaded correctly +# + +verify_runnable "both" + +typeset CRYPTDS="cryptds" +typeset PASSKEY="abcdefgh" + +function cleanup +{ + datasetexists $TESTPOOL/$CRYPTDS && \ + log_must $ZFS destroy -f $TESTPOOL/$CRYPTDS +} + +log_onexit cleanup + +log_assert "'zfs mount -l' should properly load a valid wrapping key" + +log_must eval 'echo $PASSKEY | $ZFS create -o encryption=on \ + -o keyformat=passphrase $TESTPOOL/$CRYPTDS' + +log_must $ZFS unmount $TESTPOOL/$CRYPTDS +log_must $ZFS unload-key $TESTPOOL/$CRYPTDS + +log_must eval '$ECHO $PASSKEY | $ZFS mount -l $TESTPOOL/$CRYPTDS' +mounted $TESTPOOL/$CRYPTDS || \ + log_fail "Filesystem $TESTPOOL/$TESTFS is unmounted" + +log_pass "'zfs mount -l' properly loads a valid wrapping key" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_neg.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_neg.ksh new file mode 100755 index 000000000000..0561152cf05e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_neg.ksh @@ -0,0 +1,73 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify 'zfs receive' fails when receiving to a dataset with unloaded keys. +# +# STRATEGY: +# 1. Create an unencrypted dataset and an encrypted dataset +# 2. Snapshot the unencrypted dataset +# 3. Unload the encrypted dataset's wrapping key +# 4. Verify that the receive operation fails without a loaded encryption key +# + +verify_runnable "both" + +function cleanup +{ + log_must $RM $streamfile + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 + log_must $ZFS destroy -r $TESTPOOL/$cryptds +} + +log_onexit cleanup + +log_assert "Verify 'zfs receive' fails when receiving to a dataset with \ + unloaded keys." + +typeset cryptds="crypt" +typeset passphrase="abcdefgh" +typeset streamfile=/var/tmp/streamfile.$$ + +log_must $ZFS create $TESTPOOL/$TESTFS1 +log_must $ZFS snapshot $TESTPOOL/$TESTFS1@snap +log_must eval "$ZFS send $TESTPOOL/$TESTFS1@snap > $streamfile" + +log_must eval "$ECHO $passphrase | \ + $ZFS create -o encryption=on -o keyformat=passphrase $TESTPOOL/$cryptds" +log_must $ZFS unmount $TESTPOOL/$cryptds +log_must $ZFS unload-key $TESTPOOL/$cryptds +log_mustnot eval "$ZFS recv $TESTPOOL/$cryptds/recv < $streamfile" + +log_pass "'zfs receive' fails when receiving to a dataset with unloaded keys." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_pos.ksh new file mode 100755 index 000000000000..bff36bb6ad00 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_encrypted_pos.ksh @@ -0,0 +1,84 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify 'zfs receive' works with encrypted datasets. +# +# STRATEGY: +# 1. Create an unencrypted dataset with some data +# 2. Snapshot the unencrypted dataset +# 3. Create an encrypted dataset +# 4. Verify that the send stream is receivable as an encrypted child dataset +# 5. Verify that the data that was sent matches the original data +# + +verify_runnable "both" + +function cleanup +{ + log_must $RM $streamfile + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 + log_must $ZFS destroy -r $TESTPOOL/$cryptds +} + +log_onexit cleanup + +log_assert "Verify 'zfs receive' works with encrypted datasets." + +typeset cryptds="crypt" +typeset passphrase="abcdefgh" +typeset testfile="testfile" +typeset streamfile=/var/tmp/streamfile.$$ + +log_must $ZFS create $TESTPOOL/$TESTFS1 +send_mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS1) || \ + log_fail "get_prop mountpoint $TESTPOOL/$TESTFS1" + +log_must $FILE_WRITE -o create -f $send_mntpnt/$testfile -b 4096 -c 5 -d 1 +log_must $ZFS snapshot $TESTPOOL/$TESTFS1@snap +log_must eval "$ZFS send $TESTPOOL/$TESTFS1@snap > $streamfile" + +log_must eval "$ECHO $passphrase | \ + $ZFS create -o encryption=on -o keyformat=passphrase $TESTPOOL/$cryptds" +log_must eval "$ZFS recv $TESTPOOL/$cryptds/recv < $streamfile" + +recv_mntpnt=$(get_prop mountpoint $TESTPOOL/$cryptds/recv) || \ + log_fail "get_prop mountpoint $TESTPOOL/$cryptds/recv" + +checksum1=$($SUM $send_mntpnt/$testfile | $AWK '{print $1}') +checksum2=$($SUM $recv_mntpnt/$testfile | $AWK '{print $1}') +[[ "$checksum1" != "$checksum2" ]] && \ + log_fail "Checksums differ ($checksum1 != $checksum2)" + +log_pass "'zfs receive' works with encrypted datasets." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh new file mode 100755 index 000000000000..3862f31728a5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh @@ -0,0 +1,69 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs rename' should not move an encrypted child dataset outside of its +# encryption root. +# +# STRATEGY: +# 1. Create two encryption roots and a child and grandchild of the first +# dataset +# 2. Attempt to rename the grandchild under an unencrypted parent +# 3. Attempt to rename the grandchild under a different encrypted parent +# 4. Attempt to rename the grandchild under the current parent +# 5. Attempt to rename the grandchild to a child +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/$TESTFS3 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS3 +} +log_onexit cleanup + +log_assert "'zfs rename' should not move an encrypted child outside of its" \ + "encryption root" + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" +log_must $ZFS create $TESTPOOL/$TESTFS2/child +log_must $ZFS create $TESTPOOL/$TESTFS2/child/grandchild +log_must eval "$ECHO $PASSPHRASE1 | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS3" + +log_mustnot $ZFS rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/grandchild +log_mustnot $ZFS rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/$TESTFS3/grandchild +log_must $ZFS rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/$TESTFS2/child/grandchild2 +log_must $ZFS rename $TESTPOOL/$TESTFS2/child/grandchild2 \ + $TESTPOOL/$TESTFS2/grandchild2 + +log_pass "'zfs rename' does not move an encrypted child outside of its" \ + "encryption root" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh new file mode 100755 index 000000000000..6a53dfd11443 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh @@ -0,0 +1,51 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs rename' should not rename an unencrypted dataset to a child +# of an encrypted dataset +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to rename the default dataset to a child of the encrypted dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS2 +} +log_onexit cleanup + +log_assert "'zfs rename' should not rename an unencrypted dataset to a" \ + "child of an encrypted dataset" + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" +log_mustnot $ZFS rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS + +log_pass "'zfs rename' does not rename an unencrypted dataset to a child" \ + "of an encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_neg.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_neg.ksh new file mode 100755 index 000000000000..532ec56c69c9 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_neg.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify 'zfs send' does not perform sends from encrypted datasets with +# unloaded keys. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Snapshot the dataset +# 3. Unload the dataset key +# 4. Verify sending the stream fails +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$cryptds && \ + log_must $ZFS destroy -r $TESTPOOL/$cryptds +} + +log_onexit cleanup + +log_assert "Verify 'zfs send' can perform sends from encrypted datasets with \ + unloaded keys." + +typeset cryptds="crypt" +typeset passphrase="abcdefgh" +typeset snap="$TESTPOOL/$cryptds@snap" + +log_must eval "$ECHO $passphrase | \ + $ZFS create -o encryption=on -o keyformat=passphrase $TESTPOOL/$cryptds" +log_must $ZFS snapshot $snap +log_must $ZFS unmount $TESTPOOL/$cryptds +log_must $ZFS unload-key $TESTPOOL/$cryptds +log_mustnot eval "$ZFS send $snap > /dev/null" + +log_pass "'zfs send' cannot perform sends from encrypted datasets with unloaded keys." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_pos.ksh new file mode 100755 index 000000000000..71862637f10d --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_pos.ksh @@ -0,0 +1,66 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify 'zfs send' can perform unencrypted sends from encrypted datasets. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Snapshot the dataset +# 3. Verify sending the stream succeeds +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$cryptds && \ + log_must $ZFS destroy -r $TESTPOOL/$cryptds +} + +log_onexit cleanup + +log_assert "Verify 'zfs send' can perform unencrypted sends from \ + encrypted datasets." + +typeset cryptds="crypt" +typeset passphrase="abcdefgh" +typeset snap="$TESTPOOL/$cryptds@snap" + +log_must eval "$ECHO $passphrase | \ + $ZFS create -o encryption=on -o keyformat=passphrase $TESTPOOL/$cryptds" +log_must $ZFS snapshot $snap +log_must eval "$ZFS send $snap > /dev/null" + +log_pass "'zfs send' can perform unencrypted sends from encrypted datasets." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh new file mode 100755 index 000000000000..238d9b979267 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh @@ -0,0 +1,93 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# Unencrypted datasets should only allow keylocation of 'none', encryption +# roots should only allow keylocation of 'prompt' and file URI, and encrypted +# child datasets should not be able to change their keylocation. +# +# STRATEGY: +# 1. Verify the key location of the default daaset is 'none' +# 2. Attempt to change the key location of the default dataset +# 3. Create an encrypted dataset using a key file +# 4. Attempt to change the key location of the encrypted dataset to 'none', +# an invalid location, its current location, and 'prompt' +# 5. Attempt to reload the encrypted dataset key using the new key location +# 6. Create a encrypted child dataset +# 7. Verify the key location of the child dataset the same as the parent +# 8. Attempt to change the key location of the child dataset +# 9. Verify the key location of the child dataset has not changed +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "Key location can only be 'prompt' or a file path for encryption" \ + "roots, and 'none' for unencrypted volumes" + +log_must eval "$ECHO $PASSPHRASE > /$TESTPOOL/pkey" + +log_must verify_keylocation $TESTPOOL/$TESTFS "none" +log_must $ZFS set keylocation=none $TESTPOOL/$TESTFS +log_mustnot $ZFS set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS +log_mustnot $ZFS set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS +log_must verify_keylocation $TESTPOOL/$TESTFS "none" + +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_mustnot $ZFS set keylocation=none $TESTPOOL/$TESTFS1 +log_mustnot $ZFS set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must $ZFS set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey" + +log_must $ZFS set keylocation=prompt $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt" + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_must $RM $TESTPOOL/pkey +log_must eval "$ECHO $PASSPHRASE | $ZFS load-key $TESTPOOL/$TESTFS1" +log_must $ZFS mount $TESTPOOL/$TESTFS1 + +log_must $ZFS create $TESTPOOL/$TESTFS1/child +log_must verify_keylocation $TESTPOOL/$TESTFS1/child "prompt" + +log_mustnot $ZFS set keylocation=none $TESTPOOL/$TESTFS1/child +log_mustnot $ZFS set keylocation=prompt $TESTPOOL/$TESTFS1/child +log_mustnot $ZFS set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child +log_mustnot $ZFS set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child + +log_must verify_keylocation $TESTPOOL/$TESTFS1/child "prompt" + +log_pass "Key location can only be 'prompt' or a file path for encryption" \ + "roots, and 'none' for unencrypted volumes" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile new file mode 100644 index 000000000000..8fe2bf42ca6e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_unload-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh new file mode 100755 index 000000000000..79cd6e9f908e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh new file mode 100755 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh new file mode 100755 index 000000000000..fd92a67a5bb0 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh @@ -0,0 +1,68 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key' should only unload the key of an unmounted dataset. +# +# STRATEGY: +# 1. Attempt to unload the default dataset's key +# 2. Unmount the dataset +# 3. Attempt to unload the default dataset's key +# 4. Create an encrypted dataset +# 5. Attempt to unload the dataset's key +# 6. Verify the key is loaded +# 7. Unmount the dataset +# 8. Attempt to unload the dataset's key +# 9. Verify the key is not loaded +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs unload-key' should unload the key for an unmounted" \ + "encrypted dataset" + +log_mustnot $ZFS unload-key $TESTPOOL/$TESTFS + +log_must $ZFS unmount $TESTPOOL/$TESTFS +log_mustnot $ZFS unload-key $TESTPOOL/$TESTFS + +log_must eval "$ECHO $PASSPHRASE | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_mustnot $ZFS unload-key $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unload-key $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1 + +log_mustnot $ZFS unload-key $TESTPOOL/$TESTFS1 + +log_pass "'zfs unload-key' unloads the key for an unmounted encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh new file mode 100755 index 000000000000..6fd65a7fe1e5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh @@ -0,0 +1,76 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key -a' should unload keys for all datasets. +# +# STRATEGY: +# 1. Create an encrypted filesystem, encrypted child dataset, an encrypted +# zvol, and an encrypted pool +# 2. Unmount all datasets +# 3. Attempt to unload all dataset keys +# 4. Verify each dataset has its key unloaded +# 5. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/zvol && log_must $ZFS destroy $TESTPOOL/zvol + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs unload-key -a' should unload keys for all datasets" + +log_must eval "$ECHO $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must $ZFS create $TESTPOOL/$TESTFS1/child + +log_must $ZFS create -V 64M -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol + +typeset DISK2="$($ECHO $DISKS | $AWK '{ print $2}')" +log_must $ZPOOL create -O encryption=on -O keyformat=passphrase \ + -O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2 + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unmount $TESTPOOL1 + +log_must $ZFS unload-key -a + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child +log_must key_unavailable $TESTPOOL/zvol +log_must key_unavailable $TESTPOOL1 + +log_mustnot $ZFS mount $TESTPOOL +log_mustnot $ZFS mount $TESTPOOL/zvol +log_mustnot $ZFS mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs unload-key -a' unloads keys for all datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh new file mode 100755 index 000000000000..fa9239cd68b5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh @@ -0,0 +1,72 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key -r' should recursively unload keys. +# +# STRATEGY: +# 1. Create a parent encrypted dataset +# 3. Create a sibling encrypted dataset +# 2. Create a child dataset as an encryption root +# 3. Unmount all datasets +# 4. Attempt to unload all dataset keys under parent +# 5. Verify parent and child have their keys unloaded +# 6. Verify sibling has its key loaded +# 7. Attempt to mount all datasets +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must $ZFS destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs unload-key -r' should recursively unload keys" + +log_must eval "$ECHO $PASSPHRASE > /$TESTPOOL/pkey" +log_must $ZFS create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must $ZFS create -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child +log_must eval "$ECHO $PASSPHRASE1 | $ZFS create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" + +log_must $ZFS unmount $TESTPOOL/$TESTFS1 +log_must $ZFS unmount $TESTPOOL/$TESTFS2 + +log_must $ZFS unload-key -r $TESTPOOL/$TESTFS1 + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must key_available $TESTPOOL/$TESTFS2 + +log_mustnot $ZFS mount $TESTPOOL/$TESTFS1 +log_mustnot $ZFS mount $TESTPOOL/$TESTFS1/child +log_must $ZFS mount $TESTPOOL/$TESTFS2 + +log_pass "'zfs unload-key -r' recursively unloads keys" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos.ksh new file mode 100755 index 000000000000..d2591f124fb5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos.ksh @@ -0,0 +1,111 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2016, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib + +# +# DESCRIPTION: +# Create an encrypted pool +# +# STRATEGY: +# 1. Create a pool for each encryption type and verify that it is properly set +# 2. Create a pool for each keysource type and verify that it is properly set +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_onexit cleanup + +set -A ENCRYPTION_ALGS "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYSOURCE_TYPES "keysource=raw,prompt" \ + "keysource=hex,prompt" \ + "keysource=passphrase,prompt" + +set -A KEYSOURCES "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zpool create' can create encrypted pools" + +typeset -i i=0 +while (( i < ${#ENCRYPTION_ALGS[*]} )); do + log_must eval "$ECHO ${KEYSOURCES[0]} | $ZPOOL create \ + -O ${ENCRYPTION_ALGS[i]} -O ${KEYSOURCE_TYPES[0]} \ + $TESTPOOL $DISKS" + + propertycheck $TESTPOOL ${ENCRYPTION_PROPS[i]} || \ + log_fail "${ENCRYPTION_ALGS[i]} is failed to set." + + propertycheck $TESTPOOL ${KEYSOURCE_TYPES[0]} || \ + log_fail "${KEYSOURCE_TYPES[0]} is failed to set." + + log_must $ZPOOL destroy $TESTPOOL + (( i = i + 1 )) +done + +typeset -i j=0 +while (( j < ${#KEYSOURCE_TYPES[*]} )); do + log_must eval "$ECHO ${KEYSOURCES[j]} | $ZPOOL create \ + -O ${ENCRYPTION_ALGS[0]} -O ${KEYSOURCE_TYPES[j]} \ + $TESTPOOL $DISKS" + + propertycheck $TESTPOOL ${ENCRYPTION_PROPS[0]} || \ + log_fail "${ENCRYPTION_ALGS[0]} is failed to set." + + propertycheck $TESTPOOL ${KEYSOURCE_TYPES[j]} || \ + log_fail "${KEYSOURCE_TYPES[j]} is failed to set." + + log_must $ZPOOL destroy $TESTPOOL + (( j = j + 1 )) +done + +log_pass "Creating encrypted pools works as expected." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh new file mode 100755 index 000000000000..f43de45874b9 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh @@ -0,0 +1,89 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib + +# +# DESCRIPTION: +# 'zpool create' should create encrypted pools when using a valid encryption +# algorithm, key format, key location, and key. +# +# STRATEGY: +# 1. Create a pool for each combination of encryption type and key format +# 2. Verify that each filesystem has the correct properties set +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} +log_onexit cleanup + +set -A ENCRYPTION_ALGS "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYFORMATS "keyformat=raw" \ + "keyformat=hex" \ + "keyformat=passphrase" + +set -A USER_KEYS "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zpool create' should create encrypted pools when using a valid" \ + "encryption algorithm, key format, key location, and key." + +typeset -i i=0 +while (( i < ${#ENCRYPTION_ALGS[*]} )); do + typeset -i j=0 + while (( j < ${#KEYFORMATS[*]} )); do + log_must eval "$ECHO ${USER_KEYS[j]} | $ZPOOL create" \ + "-O ${ENCRYPTION_ALGS[i]} -O ${KEYFORMATS[j]}" \ + "$TESTPOOL $DISKS" + + propertycheck $TESTPOOL ${ENCRYPTION_PROPS[i]} || \ + log_fail "failed to set ${ENCRYPTION_ALGS[i]}" + propertycheck $TESTPOOL ${KEY_FORMATS[j]} || \ + log_fail "failed to set ${KEYFORMATS[j]}" + + log_must $ZPOOL destroy $TESTPOOL + (( j = j + 1 )) + done + (( i = i + 1 )) +done + +log_pass "'zpool create' creates encrypted pools when using a valid" \ + "encryption algorithm, key format, key location, and key." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh new file mode 100755 index 000000000000..1c79eba3a127 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh @@ -0,0 +1,89 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +#. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool create' should create an encrypted dataset only if it has a valid +# combination of encryption properties set. +# +# enc = encryption +# loc = keylocation provided +# fmt = keyformat provided +# +# U = unspecified +# N = off +# Y = on +# +# enc fmt loc valid notes +# ------------------------------------------- +# U 0 1 no no crypt specified +# U 1 0 no no crypt specified +# U 1 1 no no crypt specified +# N 0 0 yes explicit no encryption +# N 0 1 no keylocation given, but crypt off +# N 1 0 no keyformat given, but crypt off +# N 1 1 no keyformat given, but crypt off +# Y 0 0 no no keyformat specified for new key +# Y 0 1 no no keyformat specified for new key +# Y 1 0 yes new encryption root +# Y 1 1 yes new encryption root +# +# STRATEGY: +# 1. Attempt to create a dataset using all combinations of encryption +# properties +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} +log_onexit cleanup + +log_assert "'zpool create' should create an encrypted dataset only if it" \ + "has a valid combination of encryption properties set." + +log_mustnot $ZPOOL create -O keylocation=prompt $TESTPOOL $DISKS +log_mustnot $ZPOOL create -O keyformat=passphrase $TESTPOOL $DISKS +log_mustnot $ZPOOL create -O keyformat=passphrase -O keylocation=prompt $TESTPOOL $DISKS + +log_must $ZPOOL create -O encryption=off $TESTPOOL $DISKS +log_must $ZPOOL destroy $TESTPOOL + +log_mustnot $ZPOOL create -O encryption=off -O keylocation=prompt $TESTPOOL $DISKS +log_mustnot $ZPOOL create -O encryption=off -O keyformat=passphrase $TESTPOOL $DISKS +log_mustnot $ZPOOL create -O encryption=off -O keyformat=passphrase -O keylocation=prompt $TESTPOOL $DISKS + +log_mustnot $ZPOOL create -O encryption=on $TESTPOOL $DISKS +log_mustnot $ZPOOL create -O encryption=on -O keylocation=prompt $TESTPOOL $DISKS + +log_must eval "$ECHO $PASSPHRASE | $ZPOOL create -O encryption=on -O keyformat=passphrase $TESTPOOL $DISKS" +log_must $ZPOOL destroy $TESTPOOL + +log_must eval "$ECHO $PASSPHRASE | $ZPOOL create -O encryption=on -O keyformat=passphrase -O keylocation=prompt $TESTPOOL $DISKS" +log_must $ZPOOL destroy $TESTPOOL + +log_pass "'zpool create' creates an encrypted dataset only if it has a" \ + "valid combination of encryption properties set." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index 47eede7dc1ad..907da8553ba0 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -35,4 +35,4 @@ typeset -a properties=("size" "capacity" "altroot" "health" "guid" "version" "listsnapshots" "autoexpand" "feature@async_destroy" "feature@empty_bpobj" "feature@lz4_compress" "feature@multi_vdev_crash_dump" "feature@spacemap_histogram" "feature@enabled_txg" "feature@hole_birth" - "feature@extensible_dataset" "feature@bookmarks") + "feature@extensible_dataset" "feature@bookmarks" "feature@encryption") diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh new file mode 100755 index 000000000000..28c7b2ae5068 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool import' should import a pool with an encrypted dataset without +# mounting it. +# +# STRATEGY: +# 1. Create an encrypted pool +# 2. Export the pool +# 3. Attempt to import the pool +# 4. Verify the pool exists and the key is not loaded +# + +verify_runnable "both" + +function cleanup +{ + destroy_pool $TESTPOOL1 + log_must $RM $VDEV0 + log_must $MKFILE $FILE_SIZE $VDEV0 +} +log_onexit cleanup + +log_assert "'zpool import' should import a pool with an encrypted dataset" \ + "without mounting it" + +log_must eval "$ECHO $PASSPHRASE | $ZPOOL create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $VDEV0" +log_must $ZPOOL export $TESTPOOL1 +log_must $ZPOOL import -d $DEVICE_DIR $TESTPOOL1 +log_must poolexists $TESTPOOL1 +log_must key_unavailable $TESTPOOL1 +log_must unmounted $TESTPOOL1 + +log_pass "'zpool import' imports a pool with an encrypted dataset without" \ + "mounting it" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh new file mode 100755 index 000000000000..f2d04ac64cf4 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool import -l' should import a pool with an encrypted dataset and load +# its key. +# +# STRATEGY: +# 1. Create an encrypted pool +# 2. Export the pool +# 3. Attempt to import the pool with the key +# 4. Verify the pool exists and the key is loaded +# + +verify_runnable "both" + +function cleanup +{ + destroy_pool $TESTPOOL1 + log_must $RM $VDEV0 + log_must $MKFILE $FILE_SIZE $VDEV0 +} +log_onexit cleanup + +log_assert "'zpool import -l' should import a pool with an encrypted dataset" \ + "and load its key" + +log_must eval "$ECHO $PASSPHRASE | $ZPOOL create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $VDEV0" +log_must $ZPOOL export $TESTPOOL1 +log_must eval "$ECHO $PASSPHRASE | $ZPOOL import -l -d $DEVICE_DIR $TESTPOOL1" +log_must poolexists $TESTPOOL1 +log_must key_available $TESTPOOL1 +log_must mounted $TESTPOOL1 + +log_pass "'zpool import -l' imports a pool with an encrypted dataset and" \ + "loads its key" diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 873e5755cc8a..ea537915a844 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1370,6 +1370,7 @@ ZFS_COMMON_OBJS += \ dnode_sync.o \ dsl_bookmark.o \ dsl_dir.o \ + dsl_crypt.o \ dsl_dataset.o \ dsl_deadlist.o \ dsl_destroy.o \ @@ -1425,6 +1426,7 @@ ZFS_COMMON_OBJS += \ zio.o \ zio_checksum.o \ zio_compress.o \ + zio_crypt.o \ zio_inject.o \ zle.o \ zrlock.o diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index 328c332ae462..1ef5f81b0ab3 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -250,6 +250,21 @@ * ARC is disabled, then the L2ARC's block must be transformed to look * like the physical block in the main data pool before comparing the * checksum and determining its validity. + * + * The L1ARC has a slightly different system for storing encrypted data. + * Raw (encrypted + possibly compressed) data has a few subtle differences from + * data that is just compressed. The biggest difference is that it is not + * possible to decrypt encrypted data (or visa versa) if the keys aren't loaded. + * The other difference is that encryption cannot be treated as a suggestion. + * If a caller would prefer compressed data, but they actually wind up with + * uncompressed data the worst thing that could happen is there might be a + * performance hit. If the caller requests encrypted data, however, we must be + * sure they actually get it or else secret information could be leaked. Raw + * data is stored in hdr->b_crypt_hdr.b_rdata. An encrypted header, therefore, + * may have both an encrypted version and a decrypted version of its data at + * once. When a caller needs a raw arc_buf_t, it is allocated and the data is + * copied out of this header. To avoid complications with b_pdata, raw buffers + * cannot be shared. */ #include @@ -266,6 +281,7 @@ #include #include #include +#include #ifdef _KERNEL #include #include @@ -806,8 +822,9 @@ typedef struct arc_callback arc_callback_t; struct arc_callback { void *acb_private; - arc_done_func_t *acb_done; + arc_read_done_func_t *acb_done; arc_buf_t *acb_buf; + boolean_t acb_encrypted; boolean_t acb_compressed; zio_t *acb_zio_dummy; arc_callback_t *acb_next; @@ -816,12 +833,12 @@ struct arc_callback { typedef struct arc_write_callback arc_write_callback_t; struct arc_write_callback { - void *awcb_private; - arc_done_func_t *awcb_ready; - arc_done_func_t *awcb_children_ready; - arc_done_func_t *awcb_physdone; - arc_done_func_t *awcb_done; - arc_buf_t *awcb_buf; + void *awcb_private; + arc_write_done_func_t *awcb_ready; + arc_write_done_func_t *awcb_children_ready; + arc_write_done_func_t *awcb_physdone; + arc_write_done_func_t *awcb_done; + arc_buf_t *awcb_buf; }; /* @@ -887,6 +904,36 @@ typedef struct l1arc_buf_hdr { abd_t *b_pabd; } l1arc_buf_hdr_t; +/* + * Encrypted blocks will need to be stored encrypted on the L2ARC + * disk as they appear in the main pool. In order for this to work we + * need to pass around the encryption parameters so they can be used + * to write data to the L2ARC. This struct is only defined in the + * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED + * flag set. + */ +typedef struct arc_buf_hdr_crypt { + abd_t *b_rabd; /* raw encrypted data */ + dmu_object_type_t b_ot; /* object type */ + uint32_t b_ebufcnt; /* number or encryped buffers */ + + /* dsobj for looking up encryption key for l2arc encryption */ + uint64_t b_dsobj; /* for looking up key */ + + /* encryption parameters */ + uint8_t b_salt[ZIO_DATA_SALT_LEN]; + uint8_t b_iv[ZIO_DATA_IV_LEN]; + + /* + * Technically this could be removed since we will always be able to + * get the mac from the bp when we need it. However, it is inconvenient + * for callers of arc code to have to pass a bp in all the time. This + * also allows us to assert that L2ARC data is properly encrypted to + * match the data in the main storage pool. + */ + uint8_t b_mac[ZIO_DATA_MAC_LEN]; +} arc_buf_hdr_crypt_t; + typedef struct l2arc_dev l2arc_dev_t; typedef struct l2arc_buf_hdr { @@ -937,6 +984,11 @@ struct arc_buf_hdr { l2arc_buf_hdr_t b_l2hdr; /* L1ARC fields. Undefined when in l2arc_only state */ l1arc_buf_hdr_t b_l1hdr; + /* + * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED + * is set and the L1 header exists. + */ + arc_buf_hdr_crypt_t b_crypt_hdr; }; #define GHOST_STATE(state) \ @@ -957,6 +1009,7 @@ struct arc_buf_hdr { #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING) #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED) #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD) +#define HDR_ENCRYPTED(hdr) ((hdr)->b_flags & ARC_FLAG_ENCRYPTED) #define HDR_SHARED_DATA(hdr) ((hdr)->b_flags & ARC_FLAG_SHARED_DATA) #define HDR_ISTYPE_METADATA(hdr) \ @@ -965,6 +1018,9 @@ struct arc_buf_hdr { #define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR) #define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR) +#define HDR_HAS_RABD(hdr) \ + (HDR_HAS_L1HDR(hdr) && HDR_ENCRYPTED(hdr) && \ + (hdr)->b_crypt_hdr.b_rabd != NULL) /* For storing compression mode in b_flags */ #define HDR_COMPRESS_OFFSET (highbit64(ARC_FLAG_COMPRESS_0) - 1) @@ -977,12 +1033,14 @@ struct arc_buf_hdr { #define ARC_BUF_LAST(buf) ((buf)->b_next == NULL) #define ARC_BUF_SHARED(buf) ((buf)->b_flags & ARC_BUF_FLAG_SHARED) #define ARC_BUF_COMPRESSED(buf) ((buf)->b_flags & ARC_BUF_FLAG_COMPRESSED) +#define ARC_BUF_ENCRYPTED(buf) ((buf)->b_flags & ARC_BUF_FLAG_ENCRYPTED) /* * Other sizes */ -#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) +#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) +#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr)) #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr)) /* @@ -1095,13 +1153,20 @@ static kcondvar_t l2arc_feed_thr_cv; static uint8_t l2arc_thread_exit; static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *); +typedef enum arc_fill_flags { + ARC_FILL_LOCKED = 1 << 0, + ARC_FILL_COMPRESSED = 1 << 1, + ARC_FILL_ENCRYPTED = 1 << 2, + ARC_FILL_IN_PLACE = 1 << 3 +} arc_fill_flags_t; + static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *); static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); -static void arc_hdr_free_pabd(arc_buf_hdr_t *); -static void arc_hdr_alloc_pabd(arc_buf_hdr_t *); +static void arc_hdr_free_pabd(arc_buf_hdr_t *, boolean_t); +static void arc_hdr_alloc_pabd(arc_buf_hdr_t *, boolean_t); static void arc_access(arc_buf_hdr_t *, kmutex_t *); static boolean_t arc_is_overflowing(); static void arc_buf_watch(arc_buf_t *); @@ -1250,7 +1315,9 @@ buf_hash_remove(arc_buf_hdr_t *hdr) /* * Global data structures and functions for the buf kmem cache. */ + static kmem_cache_t *hdr_full_cache; +static kmem_cache_t *hdr_full_crypt_cache; static kmem_cache_t *hdr_l2only_cache; static kmem_cache_t *buf_cache; @@ -1264,6 +1331,7 @@ buf_fini(void) for (i = 0; i < BUF_LOCKS; i++) mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock); kmem_cache_destroy(hdr_full_cache); + kmem_cache_destroy(hdr_full_crypt_cache); kmem_cache_destroy(hdr_l2only_cache); kmem_cache_destroy(buf_cache); } @@ -1288,6 +1356,19 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) return (0); } +/* ARGSUSED */ +static int +hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag) +{ + arc_buf_hdr_t *hdr = vbuf; + + (void) hdr_full_cons(vbuf, unused, kmflag); + bzero(&hdr->b_crypt_hdr, sizeof (hdr->b_crypt_hdr)); + arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS); + + return (0); +} + /* ARGSUSED */ static int hdr_l2only_cons(void *vbuf, void *unused, int kmflag) @@ -1331,6 +1412,16 @@ hdr_full_dest(void *vbuf, void *unused) arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS); } +/* ARGSUSED */ +static void +hdr_full_crypt_dest(void *vbuf, void *unused) +{ + arc_buf_hdr_t *hdr = vbuf; + + hdr_full_dest(hdr, unused); + arc_space_return(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS); +} + /* ARGSUSED */ static void hdr_l2only_dest(void *vbuf, void *unused) @@ -1394,6 +1485,9 @@ buf_init(void) hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, 0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0); + hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt", + HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest, + hdr_recl, NULL, NULL, 0); hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl, NULL, NULL, 0); @@ -1428,6 +1522,36 @@ arc_buf_lsize(arc_buf_t *buf) return (HDR_GET_LSIZE(buf->b_hdr)); } +/* + * This function will return B_TRUE if the buffer is encrypted in memory. + * This buffer can be decrypted by calling arc_untransform(). + */ +boolean_t +arc_is_encrypted(arc_buf_t *buf) +{ + return (ARC_BUF_ENCRYPTED(buf) != 0); +} + +void +arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, + uint8_t *iv, uint8_t *mac) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(HDR_ENCRYPTED(hdr)); + + bcopy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN); + bcopy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN); + bcopy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN); + *byteorder = (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ? + ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER; +} + +/* + * Indicates how this buffer is compressed in memory. If it is not compressed + * the value will be ZIO_COMPRESS_OFF. It can be made normally readable with + * arc_untransform() as long as it is also unencrypted. + */ enum zio_compress arc_get_compression(arc_buf_t *buf) { @@ -1437,6 +1561,18 @@ arc_get_compression(arc_buf_t *buf) #define ARC_MINTIME (hz>>4) /* 62 ms */ +/* + * Return the compression algorithm used to store this data in the ARC. If ARC + * compression is enabled or this is an encrypted block, this will be the same + * as what's used to store it on-disk. Otherwise, this will be ZIO_COMPRESS_OFF. + */ +static inline enum zio_compress +arc_hdr_get_compress(arc_buf_hdr_t *hdr) +{ + return (HDR_COMPRESSION_ENABLED(hdr) ? + HDR_GET_COMPRESS(hdr) : ZIO_COMPRESS_OFF); +} + static inline boolean_t arc_buf_is_shared(arc_buf_t *buf) { @@ -1464,6 +1600,7 @@ static inline void arc_cksum_free(arc_buf_hdr_t *hdr) { ASSERT(HDR_HAS_L1HDR(hdr)); + mutex_enter(&hdr->b_l1hdr.b_freeze_lock); if (hdr->b_l1hdr.b_freeze_cksum != NULL) { kmem_free(hdr->b_l1hdr.b_freeze_cksum, sizeof (zio_cksum_t)); @@ -1520,55 +1657,17 @@ arc_cksum_verify(arc_buf_t *buf) mutex_exit(&hdr->b_l1hdr.b_freeze_lock); } +/* + * This function makes the assumption that data stored in the L2ARC + * will be transformed exactly as it is in the main pool. Because of + * this we can verify the checksum against the reading process's bp. + */ static boolean_t arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) { - enum zio_compress compress = BP_GET_COMPRESS(zio->io_bp); - boolean_t valid_cksum; - ASSERT(!BP_IS_EMBEDDED(zio->io_bp)); VERIFY3U(BP_GET_PSIZE(zio->io_bp), ==, HDR_GET_PSIZE(hdr)); - /* - * We rely on the blkptr's checksum to determine if the block - * is valid or not. When compressed arc is enabled, the l2arc - * writes the block to the l2arc just as it appears in the pool. - * This allows us to use the blkptr's checksum to validate the - * data that we just read off of the l2arc without having to store - * a separate checksum in the arc_buf_hdr_t. However, if compressed - * arc is disabled, then the data written to the l2arc is always - * uncompressed and won't match the block as it exists in the main - * pool. When this is the case, we must first compress it if it is - * compressed on the main pool before we can validate the checksum. - */ - if (!HDR_COMPRESSION_ENABLED(hdr) && compress != ZIO_COMPRESS_OFF) { - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); - uint64_t lsize = HDR_GET_LSIZE(hdr); - uint64_t csize; - - void *cbuf = zio_buf_alloc(HDR_GET_PSIZE(hdr)); - csize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); - - ASSERT3U(csize, <=, HDR_GET_PSIZE(hdr)); - if (csize < HDR_GET_PSIZE(hdr)) { - /* - * Compressed blocks are always a multiple of the - * smallest ashift in the pool. Ideally, we would - * like to round up the csize to the next - * spa_min_ashift but that value may have changed - * since the block was last written. Instead, - * we rely on the fact that the hdr's psize - * was set to the psize of the block when it was - * last written. We set the csize to that value - * and zero out any part that should not contain - * data. - */ - bzero((char *)cbuf + csize, HDR_GET_PSIZE(hdr) - csize); - csize = HDR_GET_PSIZE(hdr); - } - zio_push_transform(zio, cbuf, csize, HDR_GET_PSIZE(hdr), NULL); - } - /* * Block pointers always store the checksum for the logical data. * If the block pointer has the gang bit set, then the checksum @@ -1582,11 +1681,9 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) * generated using the correct checksum algorithm and accounts for the * logical I/O size and not just a gang fragment. */ - valid_cksum = (zio_checksum_error_impl(zio->io_spa, zio->io_bp, + return (zio_checksum_error_impl(zio->io_spa, zio->io_bp, BP_GET_CHECKSUM(zio->io_bp), zio->io_abd, zio->io_size, zio->io_offset, NULL) == 0); - zio_pop_transforms(zio); - return (valid_cksum); } /* @@ -1611,10 +1708,20 @@ arc_cksum_compute(arc_buf_t *buf) mutex_exit(&hdr->b_l1hdr.b_freeze_lock); return; } else if (ARC_BUF_COMPRESSED(buf)) { + /* + * Since the checksum doesn't apply to compressed buffers, we + * only keep a checksum if there are uncompressed buffers. + * Therefore there must be another buffer, which is + * uncompressed. Encrypted buffers will also have the + * compressed flag set. + */ + IMPLY(hdr->b_l1hdr.b_freeze_cksum != NULL, + hdr->b_l1hdr.b_bufcnt > 1); mutex_exit(&hdr->b_l1hdr.b_freeze_lock); return; } + ASSERT(!ARC_BUF_ENCRYPTED(buf)); ASSERT(!ARC_BUF_COMPRESSED(buf)); hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP); @@ -1807,15 +1914,14 @@ arc_hdr_set_compress(arc_buf_hdr_t *hdr, enum zio_compress cmp) */ if (!zfs_compressed_arc_enabled || HDR_GET_PSIZE(hdr) == 0) { arc_hdr_clear_flags(hdr, ARC_FLAG_COMPRESSED_ARC); - HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF); ASSERT(!HDR_COMPRESSION_ENABLED(hdr)); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); } else { arc_hdr_set_flags(hdr, ARC_FLAG_COMPRESSED_ARC); - HDR_SET_COMPRESS(hdr, cmp); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp); ASSERT(HDR_COMPRESSION_ENABLED(hdr)); } + + HDR_SET_COMPRESS(hdr, cmp); + ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp); } /* @@ -1855,6 +1961,170 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) return (copied); } +/* + * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. + */ +static uint64_t +arc_hdr_size(arc_buf_hdr_t *hdr) +{ + uint64_t size; + + if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF && + HDR_GET_PSIZE(hdr) > 0) { + size = HDR_GET_PSIZE(hdr); + } else { + ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0); + size = HDR_GET_LSIZE(hdr); + } + return (size); +} + +/* + * This function will take a header that only has raw encrypted data in + * b_crypt_hdr.b_rabd and decrypts it into a new buffer which is stored in + * b_l1hdr.b_pabd. If designated in the header, this function will also + * decompress the data. + */ +static int +arc_hdr_decrypt(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, spa_t *spa, + uint64_t dsobj) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + abd_t *cabd = NULL; + void *tmp = NULL; + boolean_t no_crypt = B_FALSE; + + if (hash_lock != NULL) + mutex_enter(hash_lock); + + /* + * Check that we only have an encrypted copy of the data. This was + * already checked in arc_buf_fill() as a quick check, but we do it + * again now under the hash_lock to make sure nothing has changed. + * If this isn't true there is no work to do so we can simply return. + */ + if (!HDR_HAS_RABD(hdr) || hdr->b_l1hdr.b_pabd != NULL) + goto out_unlock; + + arc_hdr_alloc_pabd(hdr, B_FALSE); + + /* + * We must be careful to use the passed-in dsobj value here and + * not the value in b_dsobj. b_dsobj is meant to be a best guess for + * the L2ARC, which has the luxury of being able to fail without real + * consequences (the data simply won't make it to the L2ARC). In + * reality, the dsobj stored in the header may belong to a dataset + * that has been unmounted or otherwise disowned, meaning the key + * won't be accessible via that dsobj anymore. + */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) { + ret = SET_ERROR(EACCES); + goto error; + } + + ret = zio_do_crypt_abd(B_FALSE, &dck->dck_key, + hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_ot, + hdr->b_crypt_hdr.b_iv, hdr->b_crypt_hdr.b_mac, + HDR_GET_PSIZE(hdr), hdr->b_l1hdr.b_pabd, + hdr->b_crypt_hdr.b_rabd, &no_crypt); + if (ret != 0) + goto error; + + if (no_crypt) { + abd_copy(hdr->b_l1hdr.b_pabd, hdr->b_crypt_hdr.b_rabd, + HDR_GET_PSIZE(hdr)); + } + + /* + * If this header has disabled arc compression but the b_pabd is + * compressed after decrypting it, we need to decompress the newly + * decrypted data. + */ + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) { + /* + * We want to make sure that we are correctly honoring the + * zfs_abd_scatter_enabled setting, so we allocate an abd here + * and then loan a buffer from it, rather than allocating a + * linear buffer and wrapping it in an abd later. + */ + cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr)); + + ret = zio_decompress_data(HDR_GET_COMPRESS(hdr), + hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr), + HDR_GET_LSIZE(hdr)); + if (ret != 0) { + abd_return_buf(cabd, tmp, arc_hdr_size(hdr)); + goto error; + } + + abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = cabd; + } + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + +out_unlock: + if (hash_lock != NULL) + mutex_exit(hash_lock); + + return (0); + +error: + arc_hdr_free_pabd(hdr, B_FALSE); + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + if (cabd != NULL) + arc_free_data_buf(hdr, cabd, arc_hdr_size(hdr), hdr); + if (hash_lock != NULL) + mutex_exit(hash_lock); + + return (ret); +} + +/* + * This function is used by the dbuf code to decrypt bonus buffers in place. + * The dbuf code itself doesn't have any locking for decrypting a shared dnode + * block, so we use the hash lock here to protect against concurrent writes. + */ +static int +arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + if (hdr->b_l1hdr.b_pabd == NULL) + return (SET_ERROR(EIO)); + + if (hash_lock != NULL) + mutex_enter(hash_lock); + + /* + * Check that the buffer has not already been decrypted. We checked + * this once outside the lock, which is safe because a buffer will + * never go from unencrypted to encrypted, but we do it again now + * that we have taken out the hash_lock. + */ + if (!ARC_BUF_ENCRYPTED(buf)) + goto out_unlock; + + zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data, + arc_buf_size(buf)); + buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED; + buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; + hdr->b_crypt_hdr.b_ebufcnt -= 1; + +out_unlock: + if (hash_lock != NULL) + mutex_exit(hash_lock); + + return (0); +} + /* * Given a buf that has a data buffer attached to it, this function will * efficiently fill the buf with data of the specified compression setting from @@ -1869,15 +2139,66 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) * the correct-sized data buffer. */ static int -arc_buf_fill(arc_buf_t *buf, boolean_t compressed) +arc_buf_fill(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, arc_fill_flags_t flags) { + int error = 0; arc_buf_hdr_t *hdr = buf->b_hdr; - boolean_t hdr_compressed = (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + boolean_t hdr_compressed = + (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); + boolean_t compressed = (flags & ARC_FILL_COMPRESSED) != 0; + boolean_t encrypted = (flags & ARC_FILL_ENCRYPTED) != 0; dmu_object_byteswap_t bswap = hdr->b_l1hdr.b_byteswap; + kmutex_t *hash_lock = (flags & ARC_FILL_LOCKED) ? NULL : HDR_LOCK(hdr); ASSERT3P(buf->b_data, !=, NULL); - IMPLY(compressed, hdr_compressed); + IMPLY(compressed, hdr_compressed || ARC_BUF_ENCRYPTED(buf)); IMPLY(compressed, ARC_BUF_COMPRESSED(buf)); + IMPLY(encrypted, HDR_ENCRYPTED(hdr)); + IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf)); + IMPLY(encrypted, ARC_BUF_COMPRESSED(buf)); + IMPLY(encrypted, !ARC_BUF_SHARED(buf)); + + if (encrypted) { + ASSERT(HDR_HAS_RABD(hdr)); + abd_copy_to_buf(buf->b_data, hdr->b_crypt_hdr.b_rabd, + HDR_GET_PSIZE(hdr)); + goto byteswap; + } else if (HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd == NULL) { + /* + * If we only have the encrypted version of the data, but the + * unencrypted version was requested we take this opportunity + * to store the decrypted version in the header for future use. + */ + error = arc_hdr_decrypt(hdr, hash_lock, spa, dsobj); + if (error != 0) + return (error); + } + + /* + * There is a special case here for dnode blocks which are + * decrypting their bonus buffers. These blocks may request to + * be decrypted in-place. This is necessary because there may + * be many dnodes pointing into this buffer and there is + * currently no method to synchronize replacing the backing + * b_data buffer and updating all of the pointers. Here we use + * the hash lock to ensure there are no races. If the need + * arises for other types to be decrypted in-place, they must + * add handling here as well. + */ + if ((flags & ARC_FILL_IN_PLACE) != 0) { + ASSERT(!hdr_compressed); + ASSERT(!compressed); + ASSERT(!encrypted); + + if (HDR_ENCRYPTED(hdr) && ARC_BUF_ENCRYPTED(buf)) { + ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE); + error = arc_buf_untransform_in_place(buf, hash_lock); + } + + /* Compute the hdr's checksum if necessary */ + arc_cksum_compute(buf); + return (error); + } if (hdr_compressed == compressed) { if (!arc_buf_is_shared(buf)) { @@ -1932,7 +2253,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL); return (0); } else { - int error = zio_decompress_data(HDR_GET_COMPRESS(hdr), + error = zio_decompress_data(HDR_GET_COMPRESS(hdr), hdr->b_l1hdr.b_pabd, buf->b_data, HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); @@ -1943,13 +2264,14 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) if (error != 0) { zfs_dbgmsg( "hdr %p, compress %d, psize %d, lsize %d", - hdr, HDR_GET_COMPRESS(hdr), + hdr, arc_hdr_get_compress(hdr), HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); return (SET_ERROR(EIO)); } } } +byteswap: /* Byteswap the buf's data if necessary */ if (bswap != DMU_BSWAP_NUMFUNCS) { ASSERT(!HDR_SHARED_DATA(hdr)); @@ -1963,28 +2285,17 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) return (0); } -int -arc_decompress(arc_buf_t *buf) -{ - return (arc_buf_fill(buf, B_FALSE)); -} - /* * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. + * If this function is being called to decrypt an encrypted buffer, the key + * must be loaded and a mapping must be made available in the keystore via + * spa_keystore_create_mapping() or one of its callers. */ -static uint64_t -arc_hdr_size(arc_buf_hdr_t *hdr) +int +arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, boolean_t in_place) { - uint64_t size; - - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && - HDR_GET_PSIZE(hdr) > 0) { - size = HDR_GET_PSIZE(hdr); - } else { - ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0); - size = HDR_GET_LSIZE(hdr); - } - return (size); + return (arc_buf_fill(buf, spa, dsobj, + (in_place) ? ARC_FILL_IN_PLACE : 0)); } /* @@ -2003,6 +2314,7 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); (void) refcount_add_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); return; @@ -2013,6 +2325,10 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) (void) refcount_add_many(&state->arcs_esize[type], arc_hdr_size(hdr), hdr); } + if (HDR_HAS_RABD(hdr)) { + (void) refcount_add_many(&state->arcs_esize[type], + HDR_GET_PSIZE(hdr), hdr); + } for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { if (arc_buf_is_shared(buf)) @@ -2038,6 +2354,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); (void) refcount_remove_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); return; @@ -2048,6 +2365,10 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) (void) refcount_remove_many(&state->arcs_esize[type], arc_hdr_size(hdr), hdr); } + if (HDR_HAS_RABD(hdr)) { + (void) refcount_remove_many(&state->arcs_esize[type], + HDR_GET_PSIZE(hdr), hdr); + } for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { if (arc_buf_is_shared(buf)) @@ -2141,7 +2462,9 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, old_state = hdr->b_l1hdr.b_state; refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt); bufcnt = hdr->b_l1hdr.b_bufcnt; - update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL); + + update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); } else { old_state = arc_l2c_only; refcnt = 0; @@ -2212,6 +2535,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, (void) refcount_add_many(&new_state->arcs_size, HDR_GET_LSIZE(hdr), hdr); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); } else { uint32_t buffers = 0; @@ -2243,8 +2567,11 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, if (hdr->b_l1hdr.b_pabd != NULL) { (void) refcount_add_many(&new_state->arcs_size, arc_hdr_size(hdr), hdr); - } else { - ASSERT(GHOST_STATE(old_state)); + } + + if (HDR_HAS_RABD(hdr)) { + (void) refcount_add_many(&new_state->arcs_size, + HDR_GET_PSIZE(hdr), hdr); } } } @@ -2254,6 +2581,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, if (GHOST_STATE(old_state)) { ASSERT0(bufcnt); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); /* * When moving a header off of a ghost state, @@ -2293,9 +2621,20 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, buf); } ASSERT3U(bufcnt, ==, buffers); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - (void) refcount_remove_many( - &old_state->arcs_size, arc_hdr_size(hdr), hdr); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); + + if (hdr->b_l1hdr.b_pabd != NULL) { + (void) refcount_remove_many( + &old_state->arcs_size, arc_hdr_size(hdr), + hdr); + } + + if (HDR_HAS_RABD(hdr)) { + (void) refcount_remove_many( + &old_state->arcs_size, HDR_GET_PSIZE(hdr), + hdr); + } } } @@ -2382,12 +2721,13 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) { /* * The criteria for sharing a hdr's data are: - * 1. the hdr's compression matches the buf's compression - * 2. the hdr doesn't need to be byteswapped - * 3. the hdr isn't already being shared - * 4. the buf is either compressed or it is the last buf in the hdr list + * 1. the buffer is not encrypted + * 2. the hdr's compression matches the buf's compression + * 3. the hdr doesn't need to be byteswapped + * 4. the hdr isn't already being shared + * 5. the buf is either compressed or it is the last buf in the hdr list * - * Criterion #4 maintains the invariant that shared uncompressed + * Criterion #5 maintains the invariant that shared uncompressed * bufs must be the final buf in the hdr's b_buf list. Reading this, you * might ask, "if a compressed buf is allocated first, won't that be the * last thing in the list?", but in that case it's impossible to create @@ -2402,9 +2742,11 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) * sharing if the new buf isn't the first to be added. */ ASSERT3P(buf->b_hdr, ==, hdr); - boolean_t hdr_compressed = HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF; + boolean_t hdr_compressed = arc_hdr_get_compress(hdr) != + ZIO_COMPRESS_OFF; boolean_t buf_compressed = ARC_BUF_COMPRESSED(buf) != 0; - return (buf_compressed == hdr_compressed && + return (!ARC_BUF_ENCRYPTED(buf) && + buf_compressed == hdr_compressed && hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS && !HDR_SHARED_DATA(hdr) && (ARC_BUF_LAST(buf) || ARC_BUF_COMPRESSED(buf))); @@ -2416,10 +2758,11 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) * copy was made successfully, or an error code otherwise. */ static int -arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, - boolean_t fill, arc_buf_t **ret) +arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj, void *tag, + boolean_t encrypted, boolean_t compressed, boolean_t fill, arc_buf_t **ret) { arc_buf_t *buf; + arc_fill_flags_t flags = ARC_FILL_LOCKED; ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); @@ -2427,6 +2770,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, hdr->b_type == ARC_BUFC_METADATA); ASSERT3P(ret, !=, NULL); ASSERT3P(*ret, ==, NULL); + IMPLY(encrypted, compressed); buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); buf->b_hdr = hdr; @@ -2444,10 +2788,19 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, /* * Only honor requests for compressed bufs if the hdr is actually - * compressed. + * compressed. This must be overriden if the buffer is encrypted since + * encrypted buffers cannot be decompressed. */ - if (compressed && HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) + if (encrypted && HDR_ENCRYPTED(hdr)) { + buf->b_flags |= ARC_BUF_FLAG_COMPRESSED; + buf->b_flags |= ARC_BUF_FLAG_ENCRYPTED; + flags |= ARC_FILL_COMPRESSED | ARC_FILL_ENCRYPTED; + } else if (compressed && + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) { + ASSERT(!encrypted); buf->b_flags |= ARC_BUF_FLAG_COMPRESSED; + flags |= ARC_FILL_COMPRESSED; + } /* * If the hdr's data can be shared then we share the data buffer and @@ -2464,7 +2817,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, * need to be ABD-aware. */ boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) && - abd_is_linear(hdr->b_l1hdr.b_pabd); + hdr->b_l1hdr.b_pabd != NULL && abd_is_linear(hdr->b_l1hdr.b_pabd); /* Set up b_data and sharing */ if (can_share) { @@ -2480,13 +2833,15 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, hdr->b_l1hdr.b_buf = buf; hdr->b_l1hdr.b_bufcnt += 1; + if (encrypted) + hdr->b_crypt_hdr.b_ebufcnt += 1; /* * If the user wants the data from the hdr, we need to either copy or * decompress the data. */ if (fill) { - return (arc_buf_fill(buf, ARC_BUF_COMPRESSED(buf) != 0)); + return (arc_buf_fill(buf, spa, dsobj, flags)); } return (0); @@ -2532,6 +2887,19 @@ arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, return (buf); } +arc_buf_t * +arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type) +{ + arc_buf_t *buf = arc_alloc_raw_buf(spa, arc_onloan_tag, dsobj, + byteorder, salt, iv, mac, ot, psize, lsize, compression_type); + + atomic_add_64(&arc_loaned_bytes, psize); + return (buf); +} + /* * Return a loaned arc buffer to the arc. @@ -2577,11 +2945,11 @@ l2arc_free_abd_on_write(abd_t *abd, size_t size, arc_buf_contents_t type) } static void -arc_hdr_free_on_write(arc_buf_hdr_t *hdr) +arc_hdr_free_on_write(arc_buf_hdr_t *hdr, boolean_t free_rdata) { arc_state_t *state = hdr->b_l1hdr.b_state; arc_buf_contents_t type = arc_buf_type(hdr); - uint64_t size = arc_hdr_size(hdr); + uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr); /* protected by hash lock, if in the hash table */ if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) { @@ -2599,7 +2967,11 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr) arc_space_return(size, ARC_SPACE_DATA); } - l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type); + if (free_rdata) { + l2arc_free_abd_on_write(hdr->b_crypt_hdr.b_rabd, size, type); + } else { + l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type); + } } /* @@ -2614,6 +2986,7 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) ASSERT(arc_can_share(hdr, buf)); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!ARC_BUF_ENCRYPTED(buf)); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); /* @@ -2675,12 +3048,12 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) static arc_buf_t * arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf) { - ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); - arc_buf_t **bufp = &hdr->b_l1hdr.b_buf; arc_buf_t *lastbuf = NULL; + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); + /* * Remove the buf from the hdr list and locate the last * remaining buffer on the list. @@ -2716,6 +3089,7 @@ static void arc_buf_destroy_impl(arc_buf_t *buf) { arc_buf_hdr_t *hdr = buf->b_hdr; + uint32_t ebufcnt = 0; /* * Free up the data associated with the buf but only if we're not @@ -2743,6 +3117,18 @@ arc_buf_destroy_impl(arc_buf_t *buf) ASSERT(hdr->b_l1hdr.b_bufcnt > 0); hdr->b_l1hdr.b_bufcnt -= 1; + + if (ARC_BUF_ENCRYPTED(buf)) + hdr->b_crypt_hdr.b_ebufcnt -= 1; + + /* + * if we have no more encrypted buffers and we've already + * gotten a copy of the decrypted data we can free b_rabd to + * save some space. + */ + if (hdr->b_crypt_hdr.b_ebufcnt == 0 && HDR_HAS_RABD(hdr) && + hdr->b_l1hdr.b_pabd != NULL) + arc_hdr_free_pabd(hdr, B_TRUE); } arc_buf_t *lastbuf = arc_buf_remove(hdr, buf); @@ -2757,16 +3143,17 @@ arc_buf_destroy_impl(arc_buf_t *buf) * There is an equivalent case for compressed bufs, but since * they aren't guaranteed to be the last buf in the list and * that is an exceedingly rare case, we just allow that space be - * wasted temporarily. + * wasted temporarily. We must also be careful not to share + * encrypted buffers, since they cannot be shared. */ - if (lastbuf != NULL) { + if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) { /* Only one buf can be shared at once */ VERIFY(!arc_buf_is_shared(lastbuf)); /* hdr is uncompressed so can't have compressed buf */ VERIFY(!ARC_BUF_COMPRESSED(lastbuf)); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); /* * We must setup a new shared block between the @@ -2787,16 +3174,14 @@ arc_buf_destroy_impl(arc_buf_t *buf) */ ASSERT3P(lastbuf, !=, NULL); ASSERT(arc_buf_is_shared(lastbuf) || - HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); } - /* - * Free the checksum if we're removing the last uncompressed buf from - * this hdr. - */ - if (!arc_hdr_has_uncompressed_buf(hdr)) { + if (HDR_ENCRYPTED(hdr)) + ebufcnt = hdr->b_crypt_hdr.b_ebufcnt; + + if (hdr->b_l1hdr.b_bufcnt - ebufcnt == 0) arc_cksum_free(hdr); - } /* clean up the buf */ buf->b_hdr = NULL; @@ -2804,26 +3189,43 @@ arc_buf_destroy_impl(arc_buf_t *buf) } static void -arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr) +arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata) { + uint64_t size; + ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(!HDR_SHARED_DATA(hdr)); + ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata); + IMPLY(alloc_rdata, HDR_ENCRYPTED(hdr)); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr)); + if (alloc_rdata) { + size = HDR_GET_PSIZE(hdr); + ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL); + hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr); + ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL); + } else { + size = arc_hdr_size(hdr); + ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + } + + ARCSTAT_INCR(arcstat_compressed_size, size); ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr)); } static void -arc_hdr_free_pabd(arc_buf_hdr_t *hdr) +arc_hdr_free_pabd(arc_buf_hdr_t *hdr, boolean_t free_rdata) { + uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr); + ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); + IMPLY(free_rdata, HDR_HAS_RABD(hdr)); + /* * If the hdr is currently being written to the l2arc then @@ -2832,28 +3234,41 @@ arc_hdr_free_pabd(arc_buf_hdr_t *hdr) * writing it to the l2arc device. */ if (HDR_L2_WRITING(hdr)) { - arc_hdr_free_on_write(hdr); + arc_hdr_free_on_write(hdr, free_rdata); ARCSTAT_BUMP(arcstat_l2_free_on_write); + } else if (free_rdata) { + arc_free_data_abd(hdr, hdr->b_crypt_hdr.b_rabd, size, hdr); } else { arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, - arc_hdr_size(hdr), hdr); + size, hdr); } - hdr->b_l1hdr.b_pabd = NULL; - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr)); + if (free_rdata) { + hdr->b_crypt_hdr.b_rabd = NULL; + } else { + hdr->b_l1hdr.b_pabd = NULL; + } + + if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + + ARCSTAT_INCR(arcstat_compressed_size, -size); ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr)); } static arc_buf_hdr_t * arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, - enum zio_compress compression_type, arc_buf_contents_t type) + boolean_t encrypted, enum zio_compress compression_type, + arc_buf_contents_t type, boolean_t alloc_rdata) { arc_buf_hdr_t *hdr; VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA); - - hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); + if (encrypted) { + hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE); + } else { + hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); + } ASSERT(HDR_EMPTY(hdr)); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_thawed, ==, NULL); @@ -2864,6 +3279,8 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, hdr->b_flags = 0; arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR); arc_hdr_set_compress(hdr, compression_type); + if (encrypted) + arc_hdr_set_flags(hdr, ARC_FLAG_ENCRYPTED); hdr->b_l1hdr.b_state = arc_anon; hdr->b_l1hdr.b_arc_access = 0; @@ -2875,7 +3292,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, * the compressed or uncompressed data depending on the block * it references and compressed arc enablement. */ - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, alloc_rdata); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); return (hdr); @@ -2899,6 +3316,16 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) || (old == hdr_l2only_cache && new == hdr_full_cache)); + /* + * if the caller wanted a new full header and the header is to be + * encrypted we will actually allocate the header from the full crypt + * cache instead. The same applies to freeing from the old cache. + */ + if (HDR_ENCRYPTED(hdr) && new == hdr_full_cache) + new = hdr_full_crypt_cache; + if (HDR_ENCRYPTED(hdr) && old == hdr_full_cache) + old = hdr_full_crypt_cache; + nhdr = kmem_cache_alloc(new, KM_PUSHPAGE); ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); @@ -2906,7 +3333,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) bcopy(hdr, nhdr, HDR_L2ONLY_SIZE); - if (new == hdr_full_cache) { + if (new == hdr_full_cache || new == hdr_full_crypt_cache) { arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR); /* * arc_access and arc_change_state need to be aware that a @@ -2917,6 +3344,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) /* Verify previous threads set to NULL before freeing */ ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); } else { ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT0(hdr->b_l1hdr.b_bufcnt); @@ -2939,6 +3367,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) */ VERIFY(!HDR_L2_WRITING(hdr)); VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); #ifdef ZFS_DEBUG if (hdr->b_l1hdr.b_thawed != NULL) { @@ -2987,6 +3416,117 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) return (nhdr); } +/* + * This function allows an L1 header to be reallocated as a crypt + * header and vice versa. If going to a crypt header, the new fields + * will be zeroed out. + */ +static arc_buf_hdr_t * +arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t encrypt) +{ + arc_buf_hdr_t *nhdr; + arc_buf_t *buf; + kmem_cache_t *ncache, *ocache; + + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT3U(!!HDR_ENCRYPTED(hdr), !=, encrypt); + ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); + ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); + + if (encrypt) { + ncache = hdr_full_crypt_cache; + ocache = hdr_full_cache; + } else { + ncache = hdr_full_cache; + ocache = hdr_full_crypt_cache; + } + + nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE); + bcopy(hdr, nhdr, HDR_L2ONLY_SIZE); + nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum; + nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt; + nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap; + nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state; + nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access; + nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb; + nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd; + nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf; +#ifdef ZFS_DEBUG + if (hdr->b_l1hdr.b_thawed != NULL) { + nhdr->b_l1hdr.b_thawed = hdr->b_l1hdr.b_thawed; + hdr->b_l1hdr.b_thawed = NULL; + } +#endif + + /* + * This refcount_add() exists only to ensure that the individual + * arc buffers always point to a header that is referenced, avoiding + * a small race condition that could trigger ASSERTs. + */ + (void) refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG); + + for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { + mutex_enter(&buf->b_evict_lock); + buf->b_hdr = nhdr; + mutex_exit(&buf->b_evict_lock); + } + refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt); + (void) refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG); + + if (encrypt) { + arc_hdr_set_flags(nhdr, ARC_FLAG_ENCRYPTED); + } else { + arc_hdr_clear_flags(nhdr, ARC_FLAG_ENCRYPTED); + } + buf_discard_identity(hdr); + kmem_cache_free(ocache, hdr); + + return (nhdr); +} + +/* + * This function is used by the send / receive code to convert a newly + * allocated arc_buf_t to one that is suitable for a raw encrypted write. + * Currently we only need support for L0 dnode buffers since other object + * types can simply allocated a raw buffer to begin with. Encrypted dnode + * blocks will always be uncompressed so we do not have to worry about + * compression type or psize. + */ +void +arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, + dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT3U(ot, ==, DMU_OT_DNODE); + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT0(HDR_ENCRYPTED(hdr)); + ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); + ASSERT0(ARC_BUF_COMPRESSED(buf)); + ASSERT0(ARC_BUF_ENCRYPTED(buf)); + + buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED); + hdr = arc_hdr_realloc_crypt(hdr, B_TRUE); + hdr->b_crypt_hdr.b_dsobj = dsobj; + hdr->b_crypt_hdr.b_ot = ot; + hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? + DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot); + bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); + bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN); + bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN); + + /* free the non-raw header data */ + if (hdr->b_l1hdr.b_pabd != NULL) { + if (arc_buf_is_shared(buf)) { + arc_unshare_buf(hdr, buf); + } else { + arc_hdr_free_pabd(hdr, B_FALSE); + } + VERIFY3P(buf->b_data, !=, NULL); + } +} + /* * Allocate a new arc_buf_hdr_t and arc_buf_t and return the buf to the caller. * The buf is returned thawed since we expect the consumer to modify it. @@ -2995,11 +3535,12 @@ arc_buf_t * arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size) { arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size, - ZIO_COMPRESS_OFF, type); + B_FALSE, ZIO_COMPRESS_OFF, type, B_FALSE); ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); arc_buf_t *buf = NULL; - VERIFY0(arc_buf_alloc_impl(hdr, tag, B_FALSE, B_FALSE, &buf)); + VERIFY0(arc_buf_alloc_impl(hdr, spa, 0, tag, B_FALSE, + B_FALSE, B_FALSE, &buf)); arc_buf_thaw(buf); return (buf); @@ -3015,15 +3556,16 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, { ASSERT3U(lsize, >, 0); ASSERT3U(lsize, >=, psize); - ASSERT(compression_type > ZIO_COMPRESS_OFF); - ASSERT(compression_type < ZIO_COMPRESS_FUNCTIONS); + ASSERT3U(compression_type, >, ZIO_COMPRESS_OFF); + ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS); arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, - compression_type, ARC_BUFC_DATA); + B_FALSE, compression_type, ARC_BUFC_DATA, B_FALSE); ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); arc_buf_t *buf = NULL; - VERIFY0(arc_buf_alloc_impl(hdr, tag, B_TRUE, B_FALSE, &buf)); + VERIFY0(arc_buf_alloc_impl(hdr, spa, 0, tag, B_FALSE, B_TRUE, + B_FALSE, &buf)); arc_buf_thaw(buf); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); @@ -3035,13 +3577,50 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, * buf and the hdr. */ ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd)); - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); arc_share_buf(hdr, buf); } return (buf); } +arc_buf_t * +arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type) +{ + arc_buf_hdr_t *hdr; + arc_buf_t *buf; + arc_buf_contents_t type = DMU_OT_IS_METADATA(ot) ? + ARC_BUFC_METADATA : ARC_BUFC_DATA; + + ASSERT3U(lsize, >, 0); + ASSERT3U(lsize, >=, psize); + ASSERT3U(compression_type, >=, ZIO_COMPRESS_OFF); + ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS); + + hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE, + compression_type, type, B_TRUE); + ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); + + hdr->b_crypt_hdr.b_dsobj = dsobj; + hdr->b_crypt_hdr.b_ot = ot; + hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? + DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot); + bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); + bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN); + bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN); + + buf = NULL; + VERIFY0(arc_buf_alloc_impl(hdr, spa, dsobj, tag, B_TRUE, B_TRUE, + B_FALSE, &buf)); + arc_buf_thaw(buf); + ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + + return (buf); +} + static void arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) { @@ -3115,7 +3694,11 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) #endif if (hdr->b_l1hdr.b_pabd != NULL) { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); + } + + if (HDR_HAS_RABD(hdr)) { + arc_hdr_free_pabd(hdr, B_TRUE); } } @@ -3123,7 +3706,12 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) if (HDR_HAS_L1HDR(hdr)) { ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); - kmem_cache_free(hdr_full_cache, hdr); + + if (!HDR_ENCRYPTED(hdr)) { + kmem_cache_free(hdr_full_cache, hdr); + } else { + kmem_cache_free(hdr_full_crypt_cache, hdr); + } } else { kmem_cache_free(hdr_l2only_cache, hdr); } @@ -3157,7 +3745,7 @@ arc_buf_destroy(arc_buf_t *buf, void* tag) /* * Evict the arc_buf_hdr that is provided as a parameter. The resultant - * state of the header is dependent on it's state prior to entering this + * state of the header is dependent on its state prior to entering this * function. The following transitions are possible: * * - arc_mru -> arc_mru_ghost @@ -3198,8 +3786,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); if (HDR_HAS_L2HDR(hdr)) { + ASSERT(hdr->b_l1hdr.b_pabd == NULL); + ASSERT(!HDR_HAS_RABD(hdr)); /* * This buffer is cached on the 2nd Level ARC; * don't destroy the header. @@ -3266,7 +3855,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * This ensures that the accounting is updated correctly * in arc_free_data_impl(). */ - arc_hdr_free_pabd(hdr); + if (hdr->b_l1hdr.b_pabd != NULL) + arc_hdr_free_pabd(hdr, B_FALSE); + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); arc_change_state(evicted_state, hdr, hash_lock); ASSERT(HDR_IN_HASH_TABLE(hdr)); @@ -4194,7 +4787,7 @@ arc_reclaim_thread(void) /* * Adapt arc info given the number of bytes we are trying to add and - * the state that we are comming from. This function is only called + * the state that we are coming from. This function is only called * when we are adding new content to the cache. */ static void @@ -4580,22 +5173,23 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) } } -/* a generic arc_done_func_t which you can use */ +/* a generic arc_read_done_func_t which you can use */ /* ARGSUSED */ void -arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg) +arc_bcopy_func(zio_t *zio, int error, arc_buf_t *buf, void *arg) { - if (zio == NULL || zio->io_error == 0) + if (error == 0) bcopy(buf->b_data, arg, arc_buf_size(buf)); arc_buf_destroy(buf, arg); } -/* a generic arc_done_func_t */ +/* a generic arc_read_done_func_t */ +/* ARGSUSED */ void -arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg) +arc_getbuf_func(zio_t *zio, int error, arc_buf_t *buf, void *arg) { arc_buf_t **bufp = arg; - if (zio && zio->io_error) { + if (error != 0) { arc_buf_destroy(buf, arg); *bufp = NULL; } else { @@ -4609,20 +5203,22 @@ arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp) { if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) { ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); + ASSERT3U(arc_hdr_get_compress(hdr), ==, ZIO_COMPRESS_OFF); } else { if (HDR_COMPRESSION_ENABLED(hdr)) { - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, + ASSERT3U(arc_hdr_get_compress(hdr), ==, BP_GET_COMPRESS(bp)); } ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp)); ASSERT3U(HDR_GET_PSIZE(hdr), ==, BP_GET_PSIZE(bp)); + ASSERT3U(!!HDR_ENCRYPTED(hdr), ==, BP_IS_ENCRYPTED(bp)); } } static void arc_read_done(zio_t *zio) { + blkptr_t *bp = zio->io_bp; arc_buf_hdr_t *hdr = zio->io_private; kmutex_t *hash_lock = NULL; arc_callback_t *callback_list; @@ -4654,6 +5250,26 @@ arc_read_done(zio_t *zio) ASSERT3P(hash_lock, !=, NULL); } + if (BP_IS_ENCRYPTED(bp)) { + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); + hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; + zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt, + hdr->b_crypt_hdr.b_iv); + + if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) { + void *tmpbuf; + + tmpbuf = abd_borrow_buf_copy(zio->io_abd, + sizeof (zil_chain_t)); + zio_crypt_decode_mac_zil(tmpbuf, + hdr->b_crypt_hdr.b_mac); + abd_return_buf(zio->io_abd, tmpbuf, + sizeof (zil_chain_t)); + } else { + zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac); + } + } + if (no_zio_error) { /* byteswap if necessary */ if (BP_SHOULD_BYTESWAP(zio->io_bp)) { @@ -4699,8 +5315,18 @@ arc_read_done(zio_t *zio) /* This is a demand read since prefetches don't use callbacks */ callback_cnt++; - int error = arc_buf_alloc_impl(hdr, acb->acb_private, - acb->acb_compressed, no_zio_error, &acb->acb_buf); + int error = arc_buf_alloc_impl(hdr, zio->io_spa, + zio->io_bookmark.zb_objset, acb->acb_private, + acb->acb_encrypted, acb->acb_compressed, no_zio_error, + &acb->acb_buf); + + /* + * assert non-speculative zios didn't fail because an + * encryption key wasn't loaded + */ + ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) || + error == 0 || error != ENOENT); + if (no_zio_error) { zio->io_error = error; } @@ -4709,8 +5335,7 @@ arc_read_done(zio_t *zio) arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); if (callback_cnt == 0) { ASSERT(HDR_PREFETCH(hdr)); - ASSERT0(hdr->b_l1hdr.b_bufcnt); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); } ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || @@ -4749,8 +5374,10 @@ arc_read_done(zio_t *zio) /* execute each callback and free its structure */ while ((acb = callback_list) != NULL) { - if (acb->acb_done) - acb->acb_done(zio, acb->acb_buf, acb->acb_private); + if (acb->acb_done) { + acb->acb_done(zio, zio->io_error, acb->acb_buf, + acb->acb_private); + } if (acb->acb_zio_dummy != NULL) { acb->acb_zio_dummy->io_error = zio->io_error; @@ -4784,15 +5411,18 @@ arc_read_done(zio_t *zio) * for readers of this block. */ int -arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, - void *private, zio_priority_t priority, int zio_flags, - arc_flags_t *arc_flags, const zbookmark_phys_t *zb) +arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, + arc_read_done_func_t *done, void *private, zio_priority_t priority, + int zio_flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = NULL; kmutex_t *hash_lock = NULL; zio_t *rzio; uint64_t guid = spa_load_guid(spa); - boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW) != 0; + boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW_COMPRESS) != 0; + boolean_t encrypted_read = BP_IS_ENCRYPTED(bp) && + (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0; + int rc = 0; ASSERT(!BP_IS_EMBEDDED(bp) || BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA); @@ -4806,7 +5436,16 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, hdr = buf_hash_find(guid, bp, &hash_lock); } - if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) { + /* + * Determine if we have an L1 cache hit or a cache miss. For simplicity + * we maintain encrypted data seperately from compressed / uncompressed + * data. If the user is requesting raw encrypted data and we don't have + * that in the header we will read from disk to guarantee that we can + * get it even if the encryption keys aren't loaded. + */ + if (hdr != NULL && HDR_HAS_L1HDR(hdr) && + (hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)) && + (!encrypted_read || !BP_IS_ENCRYPTED(bp) || HDR_HAS_RABD(hdr))) { arc_buf_t *buf = NULL; *arc_flags |= ARC_FLAG_CACHED; @@ -4893,8 +5532,12 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp)); /* Get a buf with the desired data in it. */ - VERIFY0(arc_buf_alloc_impl(hdr, private, - compressed_read, B_TRUE, &buf)); + rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset, + private, encrypted_read, compressed_read, B_TRUE, + &buf); + + ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || + rc == 0 || rc != ENOENT); } else if (*arc_flags & ARC_FLAG_PREFETCH && refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); @@ -4910,7 +5553,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, data, metadata, hits); if (done) - done(NULL, buf, private); + done(NULL, rc, buf, private); } else { uint64_t lsize = BP_GET_LSIZE(bp); uint64_t psize = BP_GET_PSIZE(bp); @@ -4919,13 +5562,15 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, uint64_t addr = 0; boolean_t devw = B_FALSE; uint64_t size; + void *hdr_abd; if (hdr == NULL) { /* this block is not in the cache */ arc_buf_hdr_t *exists = NULL; arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp); hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, - BP_GET_COMPRESS(bp), type); + BP_IS_ENCRYPTED(bp), BP_GET_COMPRESS(bp), type, + encrypted_read); if (!BP_IS_EMBEDDED(bp)) { hdr->b_dva = *BP_IDENTITY(bp); @@ -4941,25 +5586,42 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, } } else { /* - * This block is in the ghost cache. If it was L2-only - * (and thus didn't have an L1 hdr), we realloc the - * header to add an L1 hdr. + * This block is in the ghost cache or encrypted data + * was requested and we didn't have it. If it was + * L2-only (and thus didn't have an L1 hdr), + * we realloc the header to add an L1 hdr. */ if (!HDR_HAS_L1HDR(hdr)) { hdr = arc_hdr_realloc(hdr, hdr_l2only_cache, hdr_full_cache); } - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state)); - ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + + if (GHOST_STATE(hdr->b_l1hdr.b_state)) { + ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); + ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt)); + ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + } else if (HDR_IO_IN_PROGRESS(hdr)) { + /* + * If this header already had an IO in progress + * and we are performing another IO to fetch + * encrypted data we must wait until the first + * IO completes so as not to confuse + * arc_read_done(). This should be very rare + * and so the performance impact shouldn't + * matter. + */ + cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); + mutex_exit(hash_lock); + goto top; + } /* * This is a delicate dance that we play here. - * This hdr is in the ghost list so we access it - * to move it out of the ghost list before we + * This hdr might be in the ghost list so we access + * it to move it out of the ghost list before we * initiate the read. If it's a prefetch then * it won't have a callback so we'll remove the * reference that arc_buf_alloc_impl() created. We @@ -4967,19 +5629,22 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, * avoid hitting an assert in remove_reference(). */ arc_access(hdr, hash_lock); - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, encrypted_read); } - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - size = arc_hdr_size(hdr); - /* - * If compression is enabled on the hdr, then will do - * RAW I/O and will store the compressed data in the hdr's - * data block. Otherwise, the hdr's data block will contain - * the uncompressed data. - */ - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { + if (encrypted_read) { + ASSERT(HDR_HAS_RABD(hdr)); + size = HDR_GET_PSIZE(hdr); + hdr_abd = hdr->b_crypt_hdr.b_rabd; zio_flags |= ZIO_FLAG_RAW; + } else { + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + size = arc_hdr_size(hdr); + hdr_abd = hdr->b_l1hdr.b_pabd; + + if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) { + zio_flags |= ZIO_FLAG_RAW_COMPRESS; + } } if (*arc_flags & ARC_FLAG_PREFETCH) @@ -4996,6 +5661,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, acb->acb_done = done; acb->acb_private = private; acb->acb_compressed = compressed_read; + acb->acb_encrypted = encrypted_read; ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); hdr->b_l1hdr.b_acb = acb; @@ -5069,10 +5735,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, * Issue a null zio if the underlying buffer * was squashed to zero size by compression. */ - ASSERT3U(HDR_GET_COMPRESS(hdr), !=, + ASSERT3U(arc_hdr_get_compress(hdr), !=, ZIO_COMPRESS_EMPTY); rzio = zio_read_phys(pio, vd, addr, - size, hdr->b_l1hdr.b_pabd, + HDR_GET_PSIZE(hdr), hdr_abd, ZIO_CHECKSUM_OFF, l2arc_read_done, cb, priority, zio_flags | ZIO_FLAG_DONT_CACHE | @@ -5081,7 +5747,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, ZIO_FLAG_DONT_RETRY, B_FALSE); DTRACE_PROBE2(l2arc__read, vdev_t *, vd, zio_t *, rzio); - ARCSTAT_INCR(arcstat_l2_read_bytes, size); + ARCSTAT_INCR(arcstat_l2_read_bytes, + HDR_GET_PSIZE(hdr)); if (*arc_flags & ARC_FLAG_NOWAIT) { zio_nowait(rzio); @@ -5111,7 +5778,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, } } - rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size, + rzio = zio_read(pio, spa, bp, hdr_abd, size, arc_read_done, hdr, priority, zio_flags, zb); if (*arc_flags & ARC_FLAG_WAIT) @@ -5261,7 +5928,8 @@ arc_release(arc_buf_t *buf, void *tag) uint64_t spa = hdr->b_spa; uint64_t psize = HDR_GET_PSIZE(hdr); uint64_t lsize = HDR_GET_LSIZE(hdr); - enum zio_compress compress = HDR_GET_COMPRESS(hdr); + boolean_t encrypted = HDR_ENCRYPTED(hdr); + enum zio_compress compress = arc_hdr_get_compress(hdr); arc_buf_contents_t type = arc_buf_type(hdr); VERIFY3U(hdr->b_type, ==, type); @@ -5286,6 +5954,7 @@ arc_release(arc_buf_t *buf, void *tag) * buffer, then we must stop sharing that block. */ if (arc_buf_is_shared(buf)) { + ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf); VERIFY(!arc_buf_is_shared(lastbuf)); /* @@ -5303,7 +5972,7 @@ arc_release(arc_buf_t *buf, void *tag) if (arc_can_share(hdr, lastbuf)) { arc_share_buf(hdr, lastbuf); } else { - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, psize); } @@ -5318,10 +5987,10 @@ arc_release(arc_buf_t *buf, void *tag) * if we have a compressed, shared buffer. */ ASSERT(arc_buf_is_shared(lastbuf) || - HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); ASSERT(!ARC_BUF_SHARED(buf)); } - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); ASSERT3P(state, !=, arc_l2c_only); (void) refcount_remove_many(&state->arcs_size, @@ -5334,6 +6003,9 @@ arc_release(arc_buf_t *buf, void *tag) } hdr->b_l1hdr.b_bufcnt -= 1; + if (ARC_BUF_ENCRYPTED(buf)) + hdr->b_crypt_hdr.b_ebufcnt -= 1; + arc_cksum_verify(buf); arc_buf_unwatch(buf); @@ -5343,7 +6015,8 @@ arc_release(arc_buf_t *buf, void *tag) * Allocate a new hdr. The new hdr will contain a b_pabd * buffer which will be freed in arc_write(). */ - nhdr = arc_hdr_alloc(spa, psize, lsize, compress, type); + nhdr = arc_hdr_alloc(spa, psize, lsize, encrypted, + compress, type, HDR_HAS_RABD(hdr)); ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL); ASSERT0(nhdr->b_l1hdr.b_bufcnt); ASSERT0(refcount_count(&nhdr->b_l1hdr.b_refcnt)); @@ -5352,12 +6025,14 @@ arc_release(arc_buf_t *buf, void *tag) nhdr->b_l1hdr.b_buf = buf; nhdr->b_l1hdr.b_bufcnt = 1; + if (ARC_BUF_ENCRYPTED(buf)) + nhdr->b_crypt_hdr.b_ebufcnt = 1; (void) refcount_add(&nhdr->b_l1hdr.b_refcnt, tag); buf->b_hdr = nhdr; mutex_exit(&buf->b_evict_lock); (void) refcount_add_many(&arc_anon->arcs_size, - arc_buf_size(buf), buf); + HDR_GET_LSIZE(nhdr), buf); } else { mutex_exit(&buf->b_evict_lock); ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) == 1); @@ -5422,11 +6097,15 @@ arc_write_ready(zio_t *zio) if (arc_buf_is_shared(buf)) { arc_unshare_buf(hdr, buf); } else { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); } } + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); } ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_SHARED_DATA(hdr)); ASSERT(!arc_buf_is_shared(buf)); @@ -5438,6 +6117,20 @@ arc_write_ready(zio_t *zio) arc_cksum_compute(buf); arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); + if (BP_IS_ENCRYPTED(zio->io_bp) != !!HDR_ENCRYPTED(hdr)) + hdr = arc_hdr_realloc_crypt(hdr, BP_IS_ENCRYPTED(zio->io_bp)); + + if (HDR_ENCRYPTED(hdr)) { + /* ZIL blocks are written through zio_rewrite */ + ASSERT3U(BP_GET_TYPE(zio->io_bp), !=, DMU_OT_INTENT_LOG); + + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(zio->io_bp); + hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; + zio_crypt_decode_params_bp(zio->io_bp, hdr->b_crypt_hdr.b_salt, + hdr->b_crypt_hdr.b_iv); + zio_crypt_decode_mac_bp(zio->io_bp, hdr->b_crypt_hdr.b_mac); + } + enum zio_compress compress; if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) { compress = ZIO_COMPRESS_OFF; @@ -5450,8 +6143,10 @@ arc_write_ready(zio_t *zio) /* - * Fill the hdr with data. If the hdr is compressed, the data we want - * is available from the zio, otherwise we can take it from the buf. + * Fill the hdr with data. If the buffer is encrypted we have no choice + * but to copy the data into b_rabd. If the hdr is compressed, the data + * we want is available from the zio, otherwise we can take it from + * the buf. * * We might be able to share the buf's data with the hdr here. However, * doing so would cause the ARC to be full of linear ABDs if we write a @@ -5461,23 +6156,28 @@ arc_write_ready(zio_t *zio) * written. Therefore, if they're allowed then we allocate one and copy * the data into it; otherwise, we share the data directly if we can. */ - if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { - arc_hdr_alloc_pabd(hdr); - + if (ARC_BUF_ENCRYPTED(buf)) { + ASSERT(ARC_BUF_COMPRESSED(buf)); + arc_hdr_alloc_pabd(hdr, B_TRUE); + abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); + } else if (!arc_can_share(hdr, buf)) { /* * Ideally, we would always copy the io_abd into b_pabd, but the * user may have disabled compressed ARC, thus we must check the * hdr's compression setting rather than the io_bp's. */ - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { - ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, - ZIO_COMPRESS_OFF); + if (HDR_ENCRYPTED(hdr)) { ASSERT3U(psize, >, 0); - + arc_hdr_alloc_pabd(hdr, B_TRUE); + abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); + } else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF && + !ARC_BUF_COMPRESSED(buf)) { + ASSERT3U(psize, >, 0); + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize); } else { ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr)); - + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, arc_buf_size(buf)); } @@ -5485,7 +6185,6 @@ arc_write_ready(zio_t *zio) ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd)); ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf)); ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); - arc_share_buf(hdr, buf); } @@ -5599,9 +6298,9 @@ arc_write_done(zio_t *zio) zio_t * arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, - boolean_t l2arc, const zio_prop_t *zp, arc_done_func_t *ready, - arc_done_func_t *children_ready, arc_done_func_t *physdone, - arc_done_func_t *done, void *private, zio_priority_t priority, + boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready, + arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone, + arc_write_done_func_t *done, void *private, zio_priority_t priority, int zio_flags, const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = buf->b_hdr; @@ -5616,10 +6315,13 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); if (l2arc) arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); - if (ARC_BUF_COMPRESSED(buf)) { - ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_OFF); - ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf)); + + if (ARC_BUF_ENCRYPTED(buf)) { + ASSERT(ARC_BUF_COMPRESSED(buf)); zio_flags |= ZIO_FLAG_RAW; + } else if (ARC_BUF_COMPRESSED(buf)) { + ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_OFF); + zio_flags |= ZIO_FLAG_RAW_COMPRESS; } callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); callback->awcb_ready = ready; @@ -5643,11 +6345,16 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, if (arc_buf_is_shared(buf)) { arc_unshare_buf(hdr, buf); } else { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); } VERIFY3P(buf->b_data, !=, NULL); - arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); } + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); + + arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); + ASSERT(!arc_buf_is_shared(buf)); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); @@ -5947,6 +6654,8 @@ arc_state_fini(void) multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]); multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]); } uint64_t @@ -6540,10 +7249,12 @@ l2arc_write_done(zio_t *zio) static void l2arc_read_done(zio_t *zio) { + int tfm_error = 0; + spa_t *spa = zio->io_spa; l2arc_read_callback_t *cb; arc_buf_hdr_t *hdr; kmutex_t *hash_lock; - boolean_t valid_cksum; + boolean_t valid_cksum, using_rdata, needs_tfm; ASSERT3P(zio->io_vd, !=, NULL); ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE); @@ -6564,12 +7275,92 @@ l2arc_read_done(zio_t *zio) /* * Check this survived the L2ARC journey. */ - ASSERT3P(zio->io_abd, ==, hdr->b_l1hdr.b_pabd); + ASSERT(zio->io_abd == hdr->b_l1hdr.b_pabd || + (HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd)); zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */ zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */ + /* + * b_rabd should always match the data as it exists on disk if it is + * being used. Therefore if we are reading into b_rabd we do not + * attempt to transform the data. + */ valid_cksum = arc_cksum_is_equal(hdr, zio); - if (valid_cksum && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) { + using_rdata = + (HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd); + needs_tfm = (valid_cksum && !using_rdata); + + /* Decrypt the data if it was encrypted in the L2ARC */ + if (needs_tfm && HDR_ENCRYPTED(hdr)) { + dsl_crypto_key_t *dck; + blkptr_t *bp = zio->io_bp; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + boolean_t no_crypt = B_FALSE; + + /* + * ZIL data is never be written to the L2ARC, so we don't need + * special handling for its unique MAC storage. + */ + ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); + + tfm_error = spa_keystore_lookup_key(spa, cb->l2rcb_zb.zb_objset, + FTAG, &dck); + if (tfm_error != 0) { + ASSERT(tfm_error != ENOENT || + (zio->io_flags & ZIO_FLAG_SPECULATIVE)); + } else { + zio_crypt_decode_params_bp(bp, salt, iv); + zio_crypt_decode_mac_bp(bp, mac); + + tfm_error = zio_do_crypt_abd(B_FALSE, &dck->dck_key, + salt, BP_GET_TYPE(bp), iv, mac, HDR_GET_PSIZE(hdr), + eabd, hdr->b_l1hdr.b_pabd, &no_crypt); + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + } + + if (tfm_error == 0 && !no_crypt) { + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = eabd; + zio->io_abd = eabd; + } else { + arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr); + } + } + /* + * If the L2ARC block was compressed, but ARC compression + * is disabled we decompress the data into a new buffer and + * replace the existing data. + */ + if (needs_tfm && tfm_error == 0 && + HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) { + abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr)); + + tfm_error = zio_decompress_data(HDR_GET_COMPRESS(hdr), + hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr), + HDR_GET_LSIZE(hdr)); + + if (tfm_error == 0) { + abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = cabd; + zio->io_abd = cabd; + zio->io_size = HDR_GET_LSIZE(hdr); + } else { + abd_return_buf(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr); + } + } + + if (valid_cksum && tfm_error == 0 && zio->io_error == 0 && + !HDR_L2_EVICTED(hdr)) { mutex_exit(hash_lock); zio->io_private = hdr; arc_read_done(zio); @@ -6584,7 +7375,7 @@ l2arc_read_done(zio_t *zio) } else { zio->io_error = SET_ERROR(EIO); } - if (!valid_cksum) + if (!valid_cksum || tfm_error != 0) ARCSTAT_BUMP(arcstat_l2_cksum_bad); /* @@ -6594,11 +7385,13 @@ l2arc_read_done(zio_t *zio) */ if (zio->io_waiter == NULL) { zio_t *pio = zio_unique_parent(zio); + void *abd = (using_rdata) ? + hdr->b_crypt_hdr.b_rabd : hdr->b_l1hdr.b_pabd; ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp, - hdr->b_l1hdr.b_pabd, zio->io_size, arc_read_done, + abd, zio->io_size, arc_read_done, hdr, zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb)); } @@ -6763,6 +7556,112 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) mutex_exit(&dev->l2ad_mtx); } +static int +l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, abd_t **abd_out, + uint64_t *bsize_out, uint64_t *csize_out) +{ + int ret; + void *tmp = NULL; + abd_t *cabd = NULL, *eabd = NULL, *to_write = hdr->b_l1hdr.b_pabd; + enum zio_compress compress = HDR_GET_COMPRESS(hdr); + uint64_t bsize = arc_hdr_size(hdr); + uint64_t csize = bsize; + boolean_t ismd = HDR_ISTYPE_METADATA(hdr); + dsl_crypto_key_t *dck = NULL; + uint8_t mac[ZIO_DATA_MAC_LEN] = { 0 }; + boolean_t no_crypt; + + ASSERT((HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) || + HDR_ENCRYPTED(hdr) || + HDR_SHARED_DATA(hdr)); + + /* + * If this is just a shared buffer, we simply copy the data. Otherwise + * we apply the needed transforms so that the data matches what is + * on disk. + */ + if (!(compress != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) && + !HDR_ENCRYPTED(hdr)) { + to_write = abd_alloc_for_io(bsize, ismd); + abd_copy(to_write, hdr->b_l1hdr.b_pabd, bsize); + goto out; + } + + if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) { + cabd = abd_alloc_for_io(bsize, ismd); + tmp = abd_borrow_buf(cabd, bsize); + + csize = zio_compress_data(compress, to_write, tmp, bsize); + ASSERT3U(csize, <=, HDR_GET_PSIZE(hdr)); + if (csize < HDR_GET_PSIZE(hdr)) { + bzero((char *)tmp + csize, HDR_GET_PSIZE(hdr) - csize); + csize = HDR_GET_PSIZE(hdr); + } + abd_return_buf_copy(cabd, tmp, bsize); + + to_write = cabd; + } + + if (HDR_ENCRYPTED(hdr)) { + eabd = abd_alloc_for_io(csize, ismd); + + /* + * If the dataset was disowned before the buffer + * made it to this point, the key to re-encrypt + * it won't be available. In this case we simply + * won't write the buffer to the L2ARC. + */ + ret = spa_keystore_lookup_key(spa, hdr->b_crypt_hdr.b_dsobj, + FTAG, &dck); + if (ret != 0) + goto error; + + ret = zio_do_crypt_abd(B_TRUE, &dck->dck_key, + hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_ot, + hdr->b_crypt_hdr.b_iv, mac, csize, to_write, eabd, + &no_crypt); + if (ret != 0) + goto error; + + if (no_crypt) { + spa_keystore_dsl_key_rele(spa, dck, FTAG); + abd_free(eabd); + goto out; + } + + /* assert that the MAC we got here matches the one we saved */ + ASSERT0(bcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN)); + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + if (to_write == cabd) + abd_free(cabd); + + to_write = eabd; + bsize = csize; + } + +out: + *bsize_out = bsize; + *csize_out = csize; + *abd_out = to_write; + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + if (cabd != NULL) + abd_free(cabd); + if (eabd != NULL) + abd_free(eabd); + + *bsize_out = 0; + *csize_out = 0; + *abd_out = NULL; + return (ret); +} + /* * Find and write ARC buffers to the L2ARC device. * @@ -6816,6 +7715,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) for (; hdr; hdr = hdr_prev) { kmutex_t *hash_lock; + uint64_t bsize, csize; + abd_t *to_write = NULL; if (arc_warm == B_FALSE) hdr_prev = multilist_sublist_next(mls, hdr); @@ -6850,6 +7751,61 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) break; } + /* + * We rely on the L1 portion of the header below, so + * it's invalid for this header to have been evicted out + * of the ghost cache, prior to being written out. The + * ARC_FLAG_L2_WRITING bit ensures this won't happen. + */ + arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING); + ASSERT(HDR_HAS_L1HDR(hdr)); + + ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); + ASSERT3U(arc_hdr_size(hdr), >, 0); + + /* + * If this header has b_rabd, we can use this since it + * must always match the data exactly as it exists on + * disk. Otherwise, the L2ARC can normally use the + * hdr's data, but if we're sharing data between the + * hdr and one of its bufs, L2ARC needs its own copy of + * the data so that the ZIO below can't race with the + * buf consumer. To ensure that this copy will be + * available for the lifetime of the ZIO and be cleaned + * up afterwards, we add it to the l2arc_free_on_write + * queue. If we need to apply any transforms to the + * data (compression, encryption) we will also need the + * extra buffer. + */ + if (HDR_HAS_RABD(hdr)) { + bsize = HDR_GET_PSIZE(hdr); + csize = bsize; + to_write = hdr->b_crypt_hdr.b_rabd; + } else if (!(!HDR_COMPRESSION_ENABLED(hdr) && + HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) && + !HDR_ENCRYPTED(hdr) && + !HDR_SHARED_DATA(hdr)) { + bsize = arc_hdr_size(hdr); + csize = bsize; + to_write = hdr->b_l1hdr.b_pabd; + } else { + int ret; + arc_buf_contents_t type = arc_buf_type(hdr); + + ret = l2arc_apply_transforms(spa, hdr, + &to_write, &bsize, &csize); + if (ret != 0) { + arc_hdr_clear_flags(hdr, + ARC_FLAG_L2_WRITING); + mutex_exit(hash_lock); + continue; + } + + l2arc_free_abd_on_write(to_write, bsize, type); + } + if (pio == NULL) { /* * Insert a dummy header on the buflist so @@ -6870,49 +7826,17 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) hdr->b_l2hdr.b_dev = dev; hdr->b_l2hdr.b_daddr = dev->l2ad_hand; - arc_hdr_set_flags(hdr, - ARC_FLAG_L2_WRITING | ARC_FLAG_HAS_L2HDR); + arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR); mutex_enter(&dev->l2ad_mtx); list_insert_head(&dev->l2ad_buflist, hdr); mutex_exit(&dev->l2ad_mtx); - /* - * We rely on the L1 portion of the header below, so - * it's invalid for this header to have been evicted out - * of the ghost cache, prior to being written out. The - * ARC_FLAG_L2_WRITING bit ensures this won't happen. - */ - ASSERT(HDR_HAS_L1HDR(hdr)); - - ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - ASSERT3U(arc_hdr_size(hdr), >, 0); - uint64_t size = arc_hdr_size(hdr); - - (void) refcount_add_many(&dev->l2ad_alloc, size, hdr); + (void) refcount_add_many(&dev->l2ad_alloc, + arc_hdr_size(hdr), hdr); - /* - * Normally the L2ARC can use the hdr's data, but if - * we're sharing data between the hdr and one of its - * bufs, L2ARC needs its own copy of the data so that - * the ZIO below can't race with the buf consumer. To - * ensure that this copy will be available for the - * lifetime of the ZIO and be cleaned up afterwards, we - * add it to the l2arc_free_on_write queue. - */ - abd_t *to_write; - if (!HDR_SHARED_DATA(hdr)) { - to_write = hdr->b_l1hdr.b_pabd; - } else { - to_write = abd_alloc_for_io(size, - HDR_ISTYPE_METADATA(hdr)); - abd_copy(to_write, hdr->b_l1hdr.b_pabd, size); - l2arc_free_abd_on_write(to_write, size, - arc_buf_type(hdr)); - } wzio = zio_write_phys(pio, dev->l2ad_vdev, - hdr->b_l2hdr.b_daddr, size, to_write, + hdr->b_l2hdr.b_daddr, csize, to_write, ZIO_CHECKSUM_OFF, NULL, hdr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE); @@ -6921,12 +7845,12 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev, zio_t *, wzio); - write_asize += size; + write_asize += csize; /* * Keep the clock hand suitably device-aligned. */ uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, - size); + csize); write_psize += asize; dev->l2ad_hand += asize; diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c index 843bc4cc47cb..b8b4ce30e392 100644 --- a/usr/src/uts/common/fs/zfs/dbuf.c +++ b/usr/src/uts/common/fs/zfs/dbuf.c @@ -897,8 +897,9 @@ dbuf_whichblock(dnode_t *dn, int64_t level, uint64_t offset) } } +/* ARGSUSED */ static void -dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) +dbuf_read_done(zio_t *zio, int err, arc_buf_t *buf, void *vdb) { dmu_buf_impl_t *db = vdb; @@ -918,7 +919,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) db->db_freed_in_flight = FALSE; dbuf_set_data(db, buf); db->db_state = DB_CACHED; - } else if (zio == NULL || zio->io_error == 0) { + } else if (err == 0) { dbuf_set_data(db, buf); db->db_state = DB_CACHED; } else { @@ -937,6 +938,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) dnode_t *dn; zbookmark_phys_t zb; arc_flags_t aflags = ARC_FLAG_NOWAIT; + int err, zio_flags = 0; DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -949,6 +951,21 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) if (db->db_blkid == DMU_BONUS_BLKID) { int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); + arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ? + dn->dn_dbuf->db_buf : NULL; + + /* if the underlying dnode block is encrypted, decrypt it */ + if (dn_buf != NULL && dn->dn_objset->os_encrypted && + DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) && + (flags & DB_RF_NO_DECRYPT) == 0) { + err = arc_untransform(dn_buf, dn->dn_objset->os_spa, + dmu_objset_id(dn->dn_objset), B_TRUE); + if (err != 0) { + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return; + } + } ASSERT3U(bonuslen, <=, db->db.db_size); db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); @@ -1017,9 +1034,17 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) dbuf_add_ref(db, NULL); + zio_flags = (flags & DB_RF_CANFAIL) ? + ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED; + + if ((flags & DB_RF_NO_DECRYPT) && BP_IS_ENCRYPTED(db->db_blkptr)) { + ASSERT3U(BP_GET_TYPE(db->db_blkptr), ==, DMU_OT_DNODE); + ASSERT3U(BP_GET_LEVEL(db->db_blkptr), ==, 0); + zio_flags |= ZIO_FLAG_RAW; + } + (void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr, - dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, - (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, + dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); } @@ -1065,18 +1090,31 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN); } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { + dnode_t *dn = DB_DNODE(db); int size = arc_buf_size(db->db_buf); arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); spa_t *spa = db->db_objset->os_spa; enum zio_compress compress_type = arc_get_compression(db->db_buf); - if (compress_type == ZIO_COMPRESS_OFF) { - dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size); - } else { + if (arc_is_encrypted(db->db_buf)) { + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + arc_get_raw_params(db->db_buf, &byteorder, salt, + iv, mac); + dr->dt.dl.dr_data = arc_alloc_raw_buf(spa, db, + dmu_objset_id(dn->dn_objset), byteorder, salt, iv, + mac, dn->dn_type, size, arc_buf_lsize(db->db_buf), + compress_type); + } else if (compress_type != ZIO_COMPRESS_OFF) { ASSERT3U(type, ==, ARC_BUFC_DATA); dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db, size, arc_buf_lsize(db->db_buf), compress_type); + } else { + dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size); } bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); } else { @@ -1112,16 +1150,22 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) mutex_enter(&db->db_mtx); if (db->db_state == DB_CACHED) { + spa_t *spa = dn->dn_objset->os_spa; + /* - * If the arc buf is compressed, we need to decompress it to - * read the data. This could happen during the "zfs receive" of - * a stream which is compressed and deduplicated. + * If the arc buf is compressed or encrypted, we need to + * untransform it to read the data. This could happen during + * the "zfs receive" of a stream which is deduplicated and + * either raw or compressed. We do not need to do this if the + * caller wants raw encrypted data. */ - if (db->db_buf != NULL && - arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) { + if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 && + (arc_is_encrypted(db->db_buf) || + arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) { dbuf_fix_old_data(db, spa_syncing_txg(dmu_objset_spa(db->db_objset))); - err = arc_decompress(db->db_buf); + err = arc_untransform(db->db_buf, spa, + dmu_objset_id(db->db_objset), B_FALSE); dbuf_set_data(db, db->db_buf); } mutex_exit(&db->db_mtx); @@ -2024,10 +2068,11 @@ dbuf_destroy(dmu_buf_impl_t *db) } if (db->db_blkid == DMU_BONUS_BLKID) { - ASSERT(db->db.db_data != NULL); - zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); - arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); - db->db_state = DB_UNCACHED; + if (db->db.db_data != NULL) { + zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); + arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); + db->db_state = DB_UNCACHED; + } } dbuf_clear_data(db); @@ -2312,7 +2357,7 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp) * prefetch if the next block down is our target. */ static void -dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private) +dbuf_prefetch_indirect_done(zio_t *zio, int err, arc_buf_t *abuf, void *private) { dbuf_prefetch_arg_t *dpa = private; @@ -2332,7 +2377,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private) */ if (zio != NULL) { ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel); - if (zio->io_flags & ZIO_FLAG_RAW) { + if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS) { ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size); } else { ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size); @@ -2357,7 +2402,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private) (dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level)); blkptr_t *bp = ((blkptr_t *)abuf->b_data) + P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs); - if (BP_IS_HOLE(bp) || (zio != NULL && zio->io_error != 0)) { + if (BP_IS_HOLE(bp) || err != 0) { kmem_free(dpa, sizeof (*dpa)); } else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) { ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid); @@ -3047,8 +3092,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) ASSERT(*datap != NULL); ASSERT0(db->db_level); - ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); - bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); + ASSERT3U(DN_MAX_BONUS_LEN(dn->dn_phys), <=, + DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1)); + bcopy(*datap, DN_BONUS(dn->dn_phys), + DN_MAX_BONUS_LEN(dn->dn_phys)); DB_DNODE_EXIT(db); if (*datap != db->db.db_data) { @@ -3106,16 +3153,26 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) * DNONE_DNODE blocks). */ int psize = arc_buf_size(*datap); + int lsize = arc_buf_lsize(*datap); arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); enum zio_compress compress_type = arc_get_compression(*datap); - if (compress_type == ZIO_COMPRESS_OFF) { - *datap = arc_alloc_buf(os->os_spa, db, type, psize); - } else { + if (arc_is_encrypted(*datap)) { + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + arc_get_raw_params(*datap, &byteorder, salt, iv, mac); + *datap = arc_alloc_raw_buf(os->os_spa, db, + dmu_objset_id(os), byteorder, salt, iv, mac, + dn->dn_type, psize, lsize, compress_type); + } else if (compress_type != ZIO_COMPRESS_OFF) { ASSERT3U(type, ==, ARC_BUFC_DATA); - int lsize = arc_buf_lsize(*datap); *datap = arc_alloc_compressed_buf(os->os_spa, db, psize, lsize, compress_type); + } else { + *datap = arc_alloc_buf(os->os_spa, db, type, psize); } bcopy(db->db.db_data, (*datap)->b_data, psize); } @@ -3246,7 +3303,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) DB_DNODE_EXIT(db); if (!BP_IS_EMBEDDED(bp)) - bp->blk_fill = fill; + BP_SET_FILL(bp, fill); mutex_exit(&db->db_mtx); @@ -3530,9 +3587,24 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) wp_flag = WP_SPILL; wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0; - dmu_write_policy(os, dn, db->db_level, wp_flag, - (data != NULL && arc_get_compression(data) != ZIO_COMPRESS_OFF) ? - arc_get_compression(data) : ZIO_COMPRESS_INHERIT, &zp); + dmu_write_policy(os, dn, db->db_level, wp_flag, &zp); + + if (data != NULL) { + if (arc_is_encrypted(data)) { + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + arc_get_raw_params(data, &byteorder, salt, iv, mac); + dmu_write_policy_override_encrypt(&zp, byteorder, + arc_get_compression(data), salt, iv, mac); + } else if (arc_get_compression(data) != ZIO_COMPRESS_OFF) { + dmu_write_policy_override_compress(&zp, + arc_get_compression(data)); + } + } + DB_DNODE_EXIT(db); /* @@ -3579,7 +3651,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) * ready callback so that we can properly handle an indirect * block that only contains holes. */ - arc_done_func_t *children_ready_cb = NULL; + arc_write_done_func_t *children_ready_cb = NULL; if (db->db_level != 0) children_ready_cb = dbuf_write_children_ready; diff --git a/usr/src/uts/common/fs/zfs/ddt.c b/usr/src/uts/common/fs/zfs/ddt.c index ba3e02cfb5b0..a1f0d81c56ba 100644 --- a/usr/src/uts/common/fs/zfs/ddt.c +++ b/usr/src/uts/common/fs/zfs/ddt.c @@ -253,6 +253,10 @@ ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); } +/* + * The bp created via this function may be used for repairs and scrub, but it + * will be missing the salt / IV required to do a full decrypting read. + */ void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) @@ -263,11 +267,12 @@ ddt_bp_create(enum zio_checksum checksum, ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); bp->blk_cksum = ddk->ddk_cksum; - bp->blk_fill = 1; BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); + BP_SET_ENCRYPTED(bp, DDK_GET_ENCRYPTED(ddk)); + BP_SET_FILL(bp, 1); BP_SET_CHECKSUM(bp, checksum); BP_SET_TYPE(bp, DMU_OT_DEDUP); BP_SET_LEVEL(bp, 0); @@ -284,6 +289,7 @@ ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); + DDK_SET_ENCRYPTED(ddk, BP_IS_ENCRYPTED(bp)); } void @@ -367,7 +373,7 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) if (ddp->ddp_phys_birth == 0) continue; - for (int d = 0; d < SPA_DVAS_PER_BP; d++) + for (int d = 0; d < DDE_GET_NDVAS(dde); d++) dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); dds->dds_blocks += 1; @@ -521,6 +527,7 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) uint64_t ditto = spa->spa_dedup_ditto; int total_copies = 0; int desired_copies = 0; + int copies_needed = 0; for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { ddt_phys_t *ddp = &dde->dde_phys[p]; @@ -546,7 +553,13 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) if (total_refcnt >= ditto * ditto) desired_copies++; - return (MAX(desired_copies, total_copies) - total_copies); + copies_needed = MAX(desired_copies, total_copies) - total_copies; + + /* encrypted blocks store their IV in DVA[2] */ + if (DDK_GET_ENCRYPTED(&dde->dde_key)) + copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1); + + return (copies_needed); } int @@ -556,7 +569,7 @@ ddt_ditto_copies_present(ddt_entry_t *dde) dva_t *dva = ddp->ddp_dva; int copies = 0 - DVA_GET_GANG(dva); - for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) + for (int d = 0; d < DDE_GET_NDVAS(dde); d++, dva++) if (DVA_IS_VALID(dva)) copies++; diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index 20a41cc98ed7..e7fcbdd567ff 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -66,60 +66,60 @@ int zfs_nopwrite_enabled = 1; uint32_t zfs_per_txg_dirty_frees_percent = 30; const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { - { DMU_BSWAP_UINT8, TRUE, "unallocated" }, - { DMU_BSWAP_ZAP, TRUE, "object directory" }, - { DMU_BSWAP_UINT64, TRUE, "object array" }, - { DMU_BSWAP_UINT8, TRUE, "packed nvlist" }, - { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj header" }, - { DMU_BSWAP_UINT64, TRUE, "SPA space map header" }, - { DMU_BSWAP_UINT64, TRUE, "SPA space map" }, - { DMU_BSWAP_UINT64, TRUE, "ZIL intent log" }, - { DMU_BSWAP_DNODE, TRUE, "DMU dnode" }, - { DMU_BSWAP_OBJSET, TRUE, "DMU objset" }, - { DMU_BSWAP_UINT64, TRUE, "DSL directory" }, - { DMU_BSWAP_ZAP, TRUE, "DSL directory child map"}, - { DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" }, - { DMU_BSWAP_ZAP, TRUE, "DSL props" }, - { DMU_BSWAP_UINT64, TRUE, "DSL dataset" }, - { DMU_BSWAP_ZNODE, TRUE, "ZFS znode" }, - { DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" }, - { DMU_BSWAP_UINT8, FALSE, "ZFS plain file" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS directory" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS master node" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" }, - { DMU_BSWAP_UINT8, FALSE, "zvol object" }, - { DMU_BSWAP_ZAP, TRUE, "zvol prop" }, - { DMU_BSWAP_UINT8, FALSE, "other uint8[]" }, - { DMU_BSWAP_UINT64, FALSE, "other uint64[]" }, - { DMU_BSWAP_ZAP, TRUE, "other ZAP" }, - { DMU_BSWAP_ZAP, TRUE, "persistent error log" }, - { DMU_BSWAP_UINT8, TRUE, "SPA history" }, - { DMU_BSWAP_UINT64, TRUE, "SPA history offsets" }, - { DMU_BSWAP_ZAP, TRUE, "Pool properties" }, - { DMU_BSWAP_ZAP, TRUE, "DSL permissions" }, - { DMU_BSWAP_ACL, TRUE, "ZFS ACL" }, - { DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" }, - { DMU_BSWAP_UINT8, TRUE, "FUID table" }, - { DMU_BSWAP_UINT64, TRUE, "FUID table size" }, - { DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"}, - { DMU_BSWAP_ZAP, TRUE, "scan work queue" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" }, - { DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"}, - { DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" }, - { DMU_BSWAP_ZAP, TRUE, "DDT statistics" }, - { DMU_BSWAP_UINT8, TRUE, "System attributes" }, - { DMU_BSWAP_ZAP, TRUE, "SA master node" }, - { DMU_BSWAP_ZAP, TRUE, "SA attr registration" }, - { DMU_BSWAP_ZAP, TRUE, "SA attr layouts" }, - { DMU_BSWAP_ZAP, TRUE, "scan translations" }, - { DMU_BSWAP_UINT8, FALSE, "deduplicated block" }, - { DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" }, - { DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" }, - { DMU_BSWAP_ZAP, TRUE, "DSL dir clones" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj subobj" } + { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "object directory" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "object array" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" }, + { DMU_BSWAP_UINT64, TRUE, TRUE, "ZIL intent log" }, + { DMU_BSWAP_DNODE, TRUE, TRUE, "DMU dnode" }, + { DMU_BSWAP_OBJSET, TRUE, FALSE, "DMU objset" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL directory" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL directory child map"}, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset snap map" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL props" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL dataset" }, + { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" }, + { DMU_BSWAP_OLDACL, TRUE, TRUE, "ZFS V0 ACL" }, + { DMU_BSWAP_UINT8, FALSE, TRUE, "ZFS plain file" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS directory" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS delete queue" }, + { DMU_BSWAP_UINT8, FALSE, TRUE, "zvol object" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "zvol prop" }, + { DMU_BSWAP_UINT8, FALSE, TRUE, "other uint8[]" }, + { DMU_BSWAP_UINT64, FALSE, TRUE, "other uint64[]" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "Pool properties" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL permissions" }, + { DMU_BSWAP_ACL, TRUE, TRUE, "ZFS ACL" }, + { DMU_BSWAP_UINT8, TRUE, TRUE, "ZFS SYSACL" }, + { DMU_BSWAP_UINT8, TRUE, TRUE, "FUID table" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset next clones"}, + { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group used" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group quota" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "snapshot refcount tags"}, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" }, + { DMU_BSWAP_UINT8, TRUE, TRUE, "System attributes" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "SA master node" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "SA attr registration" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "SA attr layouts" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" }, + { DMU_BSWAP_UINT8, FALSE, FALSE, "deduplicated block" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL deadlist map" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL deadlist map hdr" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dir clones" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" } }; const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { @@ -191,6 +191,8 @@ dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset, if (flags & DMU_READ_NO_PREFETCH) db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp); if (err == 0) { @@ -214,6 +216,8 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, if (flags & DMU_READ_NO_PREFETCH) db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; err = dmu_buf_hold_noread(os, object, offset, tag, dbp); if (err == 0) { @@ -314,11 +318,18 @@ dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) * returns ENOENT, EIO, or 0. */ int -dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) +dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags, + dmu_buf_t **dbp) { dnode_t *dn; dmu_buf_impl_t *db; int error; + uint32_t db_flags = DB_RF_MUST_SUCCEED; + + if (flags & DMU_READ_NO_PREFETCH) + db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; error = dnode_hold(os, object, FTAG, &dn); if (error) @@ -348,12 +359,24 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) dnode_rele(dn, FTAG); - VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH)); + error = dbuf_read(db, NULL, db_flags); + if (error) { + dnode_evict_bonus(dn); + dbuf_rele(db, tag); + *dbp = NULL; + return (error); + } *dbp = &db->db; return (0); } +int +dmu_bonus_hold(objset_t *os, uint64_t obj, void *tag, dmu_buf_t **dbp) +{ + return (dmu_bonus_hold_impl(os, obj, tag, DMU_READ_NO_PREFETCH, dbp)); +} + /* * returns ENOENT, EIO, or 0. * @@ -589,8 +612,8 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) * indirect blocks prefeteched will be those that point to the blocks containing * the data starting at offset, and continuing to offset + len. * - * Note that if the indirect blocks above the blocks being prefetched are not in - * cache, they will be asychronously read in. + * Note that if the indirect blocks above the blocks being prefetched are not + * in cache, they will be asychronously read in. */ void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, @@ -1470,6 +1493,76 @@ dmu_return_arcbuf(arc_buf_t *buf) arc_buf_destroy(buf, FTAG); } +void +dmu_assign_arcbuf_impl(dmu_buf_t *handle, arc_buf_t *buf, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle; + dbuf_assign_arcbuf(db, buf, tx); +} + +void +dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt, + const uint8_t *iv, const uint8_t *mac) +{ + dmu_object_type_t type; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle; + uint64_t dsobj = dmu_objset_id(db->db_objset); + + ASSERT3P(db->db_buf, !=, NULL); + ASSERT3U(dsobj, !=, 0); + + DB_DNODE_ENTER(db); + type = DB_DNODE(db)->dn_type; + DB_DNODE_EXIT(db); + + arc_convert_to_raw(db->db_buf, dsobj, byteorder, type, salt, iv, mac); +} + +void +dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, + dmu_buf_t *handle, dmu_tx_t *tx) +{ + dmu_buf_t *dst_handle; + dmu_buf_impl_t *dstdb; + dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle; + arc_buf_t *abuf; + uint64_t datalen; + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + ASSERT3P(srcdb->db_buf, !=, NULL); + + /* hold the db that we want to write to */ + VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &dst_handle, + DMU_READ_NO_DECRYPT)); + dstdb = (dmu_buf_impl_t *)dst_handle; + datalen = arc_buf_size(srcdb->db_buf); + + /* allocated an arc buffer that matches the type of srcdb->db_buf */ + if (arc_is_encrypted(srcdb->db_buf)) { + arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac); + abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os), + byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type, + datalen, arc_buf_lsize(srcdb->db_buf), + arc_get_compression(srcdb->db_buf)); + } else { + /* we won't get a compressed db back from dmu_buf_hold() */ + ASSERT3U(arc_get_compression(srcdb->db_buf), + ==, ZIO_COMPRESS_OFF); + abuf = arc_loan_buf(os->os_spa, + DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen); + } + + ASSERT3U(datalen, ==, arc_buf_size(abuf)); + + /* copy the data to the new buffer and assign it to the dstdb */ + bcopy(srcdb->db_buf->b_data, abuf->b_data, datalen); + dbuf_assign_arcbuf(dstdb, abuf, tx); + dmu_buf_rele(dst_handle, FTAG); +} + /* * When possible directly assign passed loaned arc buffer to a dbuf. * If this is not possible copy the contents of passed arc buf via @@ -1545,7 +1638,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg) BP_SET_LSIZE(bp, db->db_size); } else if (!BP_IS_EMBEDDED(bp)) { ASSERT(BP_GET_LEVEL(bp) == 0); - bp->blk_fill = 1; + BP_SET_FILL(bp, 1); } } } @@ -1713,8 +1806,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) DB_DNODE_ENTER(db); dn = DB_DNODE(db); - dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, - ZIO_COMPRESS_INHERIT, &zp); + dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp); DB_DNODE_EXIT(db); /* @@ -1884,8 +1976,7 @@ int zfs_mdcomp_disable = 0; int zfs_redundant_metadata_most_ditto_level = 2; void -dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, - enum zio_compress override_compress, zio_prop_t *zp) +dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) { dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET; boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) || @@ -1896,11 +1987,8 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, boolean_t dedup = B_FALSE; boolean_t nopwrite = B_FALSE; boolean_t dedup_verify = os->os_dedup_verify; + boolean_t encrypt = B_FALSE; int copies = os->os_copies; - boolean_t lz4_ac = spa_feature_is_active(os->os_spa, - SPA_FEATURE_LZ4_COMPRESS); - - IMPLY(override_compress == ZIO_COMPRESS_LZ4, lz4_ac); /* * We maintain different write policies for each of the following @@ -1987,23 +2075,76 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled); } - zp->zp_checksum = checksum; - /* - * If we're writing a pre-compressed buffer, the compression type we use - * must match the data. If it hasn't been compressed yet, then we should - * use the value dictated by the policies above. + * Encrypted objects override the checksum type with sha256-mac (which + * is dedupable). Encrypted, dedup'd ojects cannot use all available + * copies since we use the last one to store the IV. Encryption is also + * incompatible with nopwrite because encrypted checksums are not + * reproducible (unless dedup is on). */ - zp->zp_compress = override_compress != ZIO_COMPRESS_INHERIT - ? override_compress : compress; - ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT); + if (os->os_encrypted && DMU_OT_IS_ENCRYPTED(type) && + !(wp & WP_NOFILL) && level <= 0) { + encrypt = B_TRUE; + nopwrite = B_FALSE; + copies = MIN(copies, SPA_DVAS_PER_BP - 1); + + if (type == DMU_OT_DNODE) + compress = ZIO_COMPRESS_EMPTY; + } + zp->zp_compress = compress; + zp->zp_checksum = checksum; zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type; zp->zp_level = level; zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa)); zp->zp_dedup = dedup; zp->zp_dedup_verify = dedup && dedup_verify; zp->zp_nopwrite = nopwrite; + zp->zp_encrypt = encrypt; + bzero(zp->zp_salt, ZIO_DATA_SALT_LEN); + bzero(zp->zp_iv, ZIO_DATA_IV_LEN); + bzero(zp->zp_mac, ZIO_DATA_MAC_LEN); + + ASSERT(!(zp->zp_encrypt && zp->zp_copies >= 3)); +} + +/* + * If we're writing a pre-compressed buffer, the compression type we use + * must match the data. If it hasn't been compressed yet, then we should + * use the value dictated by the original policy. + */ +void +dmu_write_policy_override_compress(zio_prop_t *zp, enum zio_compress compress) +{ + ASSERT3U(compress, !=, ZIO_COMPRESS_INHERIT); + zp->zp_compress = compress; +} + +/* + * Raw encrypted data must pass a few values to the zio layer. The encryption + * parameters must be passsed in with the policy so that they can be written + * along with the block. In addition, raw encrypted writes must also be raw + * compressed since encryption is applied after compression, so we must set + * that field here as well. + */ +void +dmu_write_policy_override_encrypt(zio_prop_t *zp, boolean_t byteorder, + enum zio_compress compress, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac) +{ + ASSERT3U(compress, !=, ZIO_COMPRESS_INHERIT); + ASSERT3U(zp->zp_level, <=, 0); + + zp->zp_byteorder = byteorder; + zp->zp_compress = compress; + zp->zp_nopwrite = B_FALSE; + zp->zp_encrypt = B_TRUE; + bcopy(salt, zp->zp_salt, ZIO_DATA_SALT_LEN); + bcopy(iv, zp->zp_iv, ZIO_DATA_IV_LEN); + bcopy(mac, zp->zp_mac, ZIO_DATA_MAC_LEN); + + if (zp->zp_copies >= SPA_DVAS_PER_BP) + zp->zp_copies = SPA_DVAS_PER_BP - 1; } int diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 7e5ef039835c..cc70680bc0ac 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -405,6 +405,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, if (ds != NULL) { boolean_t needlock = B_FALSE; + os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0); + /* * Note: it's valid to open the objset if the dataset is * long-held, in which case the pool_config lock will not @@ -476,6 +478,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, /* It's the meta-objset. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_ON; + os->os_encrypted = B_FALSE; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = B_FALSE; @@ -578,6 +581,7 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp) return (err); } +/* ARGSUSED */ static int dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp) @@ -586,15 +590,13 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, err = dmu_objset_from_ds(ds, osp); if (err != 0) { - dsl_dataset_disown(ds, tag); + return (err); } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { - dsl_dataset_disown(ds, tag); return (SET_ERROR(EINVAL)); } else if (!readonly && dsl_dataset_is_snapshot(ds)) { - dsl_dataset_disown(ds, tag); return (SET_ERROR(EROFS)); } - return (err); + return (0); } /* @@ -604,38 +606,52 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, */ int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp) + boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp) { dsl_pool_t *dp; dsl_dataset_t *ds; int err; + int flags = (key_required) ? DS_HOLD_FLAG_DECRYPT : 0; err = dsl_pool_hold(name, FTAG, &dp); if (err != 0) return (err); - err = dsl_dataset_own(dp, name, tag, &ds); + err = dsl_dataset_own(dp, name, flags, tag, &ds); if (err != 0) { dsl_pool_rele(dp, FTAG); return (err); } err = dmu_objset_own_impl(ds, type, readonly, tag, osp); + if (err != 0) { + dsl_dataset_disown(ds, flags, tag); + dsl_pool_rele(dp, FTAG); + return (err); + } + dsl_pool_rele(dp, FTAG); - return (err); + return (0); } int dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp) + boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp) { dsl_dataset_t *ds; int err; + int flags = (key_required) ? DS_HOLD_FLAG_DECRYPT: 0; - err = dsl_dataset_own_obj(dp, obj, tag, &ds); + err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds); if (err != 0) return (err); - return (dmu_objset_own_impl(ds, type, readonly, tag, osp)); + err = dmu_objset_own_impl(ds, type, readonly, tag, osp); + if (err != 0) { + dsl_dataset_disown(ds, flags, tag); + return (err); + } + + return (0); } void @@ -658,11 +674,11 @@ dmu_objset_rele(objset_t *os, void *tag) * same name so that it can be partially torn down and reconstructed. */ void -dmu_objset_refresh_ownership(objset_t *os, void *tag) +dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed, void *tag) { dsl_pool_t *dp; dsl_dataset_t *ds, *newds; - char name[ZFS_MAX_DATASET_NAME_LEN]; + char name[MAXNAMELEN]; ds = os->os_dsl_dataset; VERIFY3P(ds, !=, NULL); @@ -672,16 +688,18 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag) dsl_dataset_name(ds, name); dp = dmu_objset_pool(os); dsl_pool_config_enter(dp, FTAG); - dmu_objset_disown(os, tag); - VERIFY0(dsl_dataset_own(dp, name, tag, &newds)); + dmu_objset_disown(os, key_needed, tag); + VERIFY0(dsl_dataset_own(dp, name, + (key_needed) ? DS_HOLD_FLAG_DECRYPT : 0, tag, &newds)); VERIFY3P(newds, ==, os->os_dsl_dataset); dsl_pool_config_exit(dp, FTAG); } void -dmu_objset_disown(objset_t *os, void *tag) +dmu_objset_disown(objset_t *os, boolean_t key_needed, void *tag) { - dsl_dataset_disown(os->os_dsl_dataset, tag); + dsl_dataset_disown(os->os_dsl_dataset, + (key_needed) ? DS_HOLD_FLAG_DECRYPT : 0, tag); } void @@ -758,6 +776,8 @@ dmu_objset_evict(objset_t *os) } else { mutex_exit(&os->os_lock); } + + } void @@ -857,7 +877,11 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, ASSERT(type != DMU_OST_ANY); ASSERT(type < DMU_OST_NUMTYPES); os->os_phys->os_type = type; - if (dmu_objset_userused_enabled(os)) { + + /* enable user accounting if it is enabled and this is not a raw recv */ + if (dmu_objset_userused_enabled(os) && (!os->os_encrypted || + spa_keystore_lookup_key(os->os_spa, dmu_objset_id(os), + NULL, NULL) == 0)) { os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; os->os_flags = os->os_phys->os_flags; } @@ -875,6 +899,7 @@ typedef struct dmu_objset_create_arg { void *doca_userarg; dmu_objset_type_t doca_type; uint64_t doca_flags; + dsl_crypto_params_t *doca_dcp; } dmu_objset_create_arg_t; /*ARGSUSED*/ @@ -900,8 +925,16 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx) dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EEXIST)); } + + error = dmu_objset_create_crypt_check(pdd, NULL, doca->doca_dcp); + if (error != 0) { + dsl_dir_rele(pdd, FTAG); + return (error); + } + error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, doca->doca_cred); + dsl_dir_rele(pdd, FTAG); return (error); @@ -918,13 +951,15 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) uint64_t obj; blkptr_t *bp; objset_t *os; + zio_t *rzio; VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, - doca->doca_cred, tx); + doca->doca_cred, doca->doca_dcp, tx); - VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_obj_flags(pdd->dd_pool, obj, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds)); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bp = dsl_dataset_get_blkptr(ds); os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, @@ -936,14 +971,38 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) doca->doca_cred, tx); } + /* + * The doca_userfunc() will write out some data that needs to be + * encrypted if the dataset is encrypted (specifically the root + * directory). This data must be written out before the encryption + * key mapping is removed by dsl_dataset_rele_flags(). Force the + * I/O to occur immediately by invoking the relevant sections of + * dsl_pool_sync(). + */ + if (os->os_encrypted) { + rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + dsl_dataset_sync(ds, rzio, tx); + VERIFY0(zio_wait(rzio)); + dmu_objset_do_userquota_updates(os, tx); + taskq_wait(dp->dp_sync_taskq); + + rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + dsl_dataset_sync(ds, rzio, tx); + VERIFY0(zio_wait(rzio)); + + /* dsl_dataset_sync_done will drop this reference. */ + dmu_buf_add_ref(ds->ds_dbuf, ds); + dsl_dataset_sync_done(ds, tx); + } + spa_history_log_internal_ds(ds, "create", tx, ""); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dsl_dir_rele(pdd, FTAG); } int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) + dsl_crypto_params_t *dcp, dmu_objset_create_sync_func_t func, void *arg) { dmu_objset_create_arg_t doca; @@ -953,6 +1012,7 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, doca.doca_userfunc = func; doca.doca_userarg = arg; doca.doca_type = type; + doca.doca_dcp = dcp; return (dsl_sync_task(name, dmu_objset_create_check, dmu_objset_create_sync, &doca, @@ -963,6 +1023,7 @@ typedef struct dmu_objset_clone_arg { const char *doca_clone; const char *doca_origin; cred_t *doca_cred; + dsl_crypto_params_t *doca_dcp; } dmu_objset_clone_arg_t; /*ARGSUSED*/ @@ -996,18 +1057,30 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx) dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EDQUOT)); } - dsl_dir_rele(pdd, FTAG); error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); - if (error != 0) + if (error != 0) { + dsl_dir_rele(pdd, FTAG); return (error); + } /* You can only clone snapshots, not the head datasets. */ if (!origin->ds_is_snapshot) { dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EINVAL)); } + + error = dmu_objset_create_crypt_check(pdd, origin->ds_dir, + doca->doca_dcp); + if (error != 0) { + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); + return (error); + } + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); return (0); } @@ -1027,7 +1100,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); obj = dsl_dataset_create_sync(pdd, tail, origin, 0, - doca->doca_cred, tx); + doca->doca_cred, doca->doca_dcp, tx); VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); dsl_dataset_name(origin, namebuf); @@ -1039,13 +1112,15 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) } int -dmu_objset_clone(const char *clone, const char *origin) +dmu_objset_clone(const char *clone, const char *origin, + dsl_crypto_params_t *dcp) { dmu_objset_clone_arg_t doca; doca.doca_clone = clone; doca.doca_origin = origin; doca.doca_cred = CRED(); + doca.doca_dcp = dcp; return (dsl_sync_task(clone, dmu_objset_clone_check, dmu_objset_clone_sync, &doca, @@ -1067,9 +1142,11 @@ dmu_objset_snapshot_one(const char *fsname, const char *snapname) } static void -dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx) +dmu_objset_sync_dnodes(objset_t *os, multilist_sublist_t *list, dmu_tx_t *tx) { dnode_t *dn; + boolean_t raw = (os->os_encrypted && spa_keystore_lookup_key(os->os_spa, + dmu_objset_id(os), NULL, NULL) != 0); while ((dn = multilist_sublist_head(list)) != NULL) { ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); @@ -1085,7 +1162,7 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx) multilist_sublist_remove(list, dn); multilist_t *newlist = dn->dn_objset->os_synced_dnodes; - if (newlist != NULL) { + if (newlist != NULL && !raw) { (void) dnode_add_ref(dn, newlist); multilist_insert(newlist, dn); } @@ -1101,10 +1178,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) blkptr_t *bp = zio->io_bp; objset_t *os = arg; dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; + uint64_t fill = 0; ASSERT(!BP_IS_EMBEDDED(bp)); + ASSERT(!BP_IS_ENCRYPTED(bp)); ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); - ASSERT0(BP_GET_LEVEL(bp)); /* * Update rootbp fill count: it should be the number of objects @@ -1112,9 +1190,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) * objects that are stored in the objset_phys_t -- the meta * dnode and user/group accounting objects). */ - bp->blk_fill = 0; for (int i = 0; i < dnp->dn_nblkptr; i++) - bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); + fill += BP_GET_FILL(&dnp->dn_blkptr[i]); + + BP_SET_FILL(bp, fill); + if (os->os_dsl_dataset != NULL) rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG); *os->os_rootbp = *bp; @@ -1143,6 +1223,7 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) } typedef struct sync_dnodes_arg { + objset_t *sda_os; multilist_t *sda_list; int sda_sublist_idx; multilist_t *sda_newlist; @@ -1157,7 +1238,7 @@ sync_dnodes_task(void *arg) multilist_sublist_t *ms = multilist_sublist_lock(sda->sda_list, sda->sda_sublist_idx); - dmu_objset_sync_dnodes(ms, sda->sda_tx); + dmu_objset_sync_dnodes(sda->sda_os, ms, sda->sda_tx); multilist_sublist_unlock(ms); @@ -1201,7 +1282,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); arc_release(os->os_phys_buf, &os->os_phys_buf); - dmu_write_policy(os, NULL, 0, 0, ZIO_COMPRESS_INHERIT, &zp); + dmu_write_policy(os, NULL, 0, 0, &zp); zio = arc_write(pio, os->os_spa, tx->tx_txg, blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), @@ -1246,6 +1327,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) for (int i = 0; i < multilist_get_num_sublists(os->os_dirty_dnodes[txgoff]); i++) { sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP); + sda->sda_os = os; sda->sda_list = os->os_dirty_dnodes[txgoff]; sda->sda_sublist_idx = i; sda->sda_tx = tx; @@ -1463,6 +1545,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) if (!dmu_objset_userused_enabled(os)) return; + if (os->os_encrypted && spa_keystore_lookup_key(os->os_spa, + dmu_objset_id(os), NULL, NULL) != 0) + return; + /* Allocate the user/groupused objects if necessary. */ if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { VERIFY0(zap_create_claim(os, @@ -1542,6 +1628,18 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) if (!dmu_objset_userused_enabled(dn->dn_objset)) return; + /* + * If we are doing a raw receive we will be writing out raw data + * and will not have access to the decrypted bonus / spill data that + * we would normally need to do all of the user space accounting. + * However, in this case the we will receive the user accounting data + * as part of the send anyway so we can simply rely on that without + * redoing the work. + */ + if (os->os_encrypted && spa_keystore_lookup_key(os->os_spa, + dmu_objset_id(os), NULL, NULL) != 0) + return; + if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| DN_ID_CHKED_SPILL))) return; diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index ac71c5a11a4e..181740093182 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -98,18 +98,17 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) ssize_t resid; /* have to get resid to get detailed errno */ /* - * The code does not rely on this (len being a multiple of 8). We keep + * The code does not rely on len being a multiple of 8. We keep * this assertion because of the corresponding assertion in * receive_read(). Keeping this assertion ensures that we do not * inadvertently break backwards compatibility (causing the assertion - * in receive_read() to trigger on old software). - * - * Removing the assertions could be rolled into a new feature that uses - * data that isn't 8-byte aligned; if the assertions were removed, a - * feature flag would have to be added. + * in receive_read() to trigger on old software). Newer feature flags + * (such as raw send) may break this assertion since they were + * introduced after the requirement was made obsolete. */ - ASSERT0(len % 8); + ASSERT(len % 8 == 0 || + (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0); dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp, (caddr_t)buf, len, @@ -249,8 +248,8 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, static int dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, - uint64_t object, uint64_t offset, int lsize, int psize, const blkptr_t *bp, - void *data) + uint64_t object, uint64_t offset, boolean_t raw, int lsize, int psize, + const blkptr_t *bp, void *data) { uint64_t payload_size; struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); @@ -285,16 +284,37 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, drrw->drr_toguid = dsp->dsa_toguid; drrw->drr_logical_size = lsize; - /* only set the compression fields if the buf is compressed */ - if (lsize != psize) { - ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED); + /* only set the compression fields if the buf is compressed or raw */ + if (raw || lsize != psize) { ASSERT(!BP_IS_EMBEDDED(bp)); - ASSERT(!BP_SHOULD_BYTESWAP(bp)); - ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp))); - ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF); ASSERT3S(psize, >, 0); - ASSERT3S(lsize, >=, psize); + if (raw) { + ASSERT(BP_IS_ENCRYPTED(bp)); + + /* + * This is a raw encrypted block so we set the encrypted + * flag. We need to pass along everything the receiving + * side will need to interpret this block, including the + * byteswap, salt, IV, and MAC. + */ + drrw->drr_flags |= DRR_RAW_ENCRYPTED; + if (BP_SHOULD_BYTESWAP(bp)) + drrw->drr_flags |= DRR_RAW_BYTESWAP; + zio_crypt_decode_params_bp(bp, drrw->drr_salt, + drrw->drr_iv); + zio_crypt_decode_mac_bp(bp, drrw->drr_mac); + } else { + /* this is a compressed block */ + ASSERT(dsp->dsa_featureflags & + DMU_BACKUP_FEATURE_COMPRESSED); + ASSERT(!BP_SHOULD_BYTESWAP(bp)); + ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp))); + ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF); + ASSERT3S(lsize, >=, psize); + } + + /* set fields common to compressed and raw sends */ drrw->drr_compressiontype = BP_GET_COMPRESS(bp); drrw->drr_compressed_size = psize; payload_size = drrw->drr_compressed_size; @@ -302,22 +322,23 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, payload_size = drrw->drr_logical_size; } - if (bp == NULL || BP_IS_EMBEDDED(bp)) { + if (bp == NULL || BP_IS_EMBEDDED(bp) || (BP_IS_ENCRYPTED(bp) && !raw)) { /* - * There's no pre-computed checksum for partial-block - * writes or embedded BP's, so (like - * fletcher4-checkummed blocks) userland will have to - * compute a dedup-capable checksum itself. + * There's no pre-computed checksum for partial-block writes, + * embedded BP's, or encrypted BP's that are being sent as + * plaintext, so (like fletcher4-checkummed blocks) userland + * will have to compute a dedup-capable checksum itself. */ drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; } else { drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); if (zio_checksum_table[drrw->drr_checksumtype].ci_flags & ZCHECKSUM_FLAG_DEDUP) - drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; + drrw->drr_flags |= DRR_CHECKSUM_DEDUP; DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); + DDK_SET_ENCRYPTED(&drrw->drr_key, BP_IS_ENCRYPTED(bp)); drrw->drr_key.ddk_cksum = bp->blk_cksum; } @@ -361,9 +382,10 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, } static int -dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) +dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data) { struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); + uint64_t blksz = BP_GET_LSIZE(bp); if (dsp->dsa_pending_op != PENDING_NONE) { if (dump_record(dsp, NULL, 0) != 0) @@ -378,6 +400,18 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) drrs->drr_length = blksz; drrs->drr_toguid = dsp->dsa_toguid; + /* handle raw send fields */ + if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0 && + BP_IS_ENCRYPTED(bp)) { + drrs->drr_flags |= DRR_RAW_ENCRYPTED; + if (BP_SHOULD_BYTESWAP(bp)) + drrs->drr_flags |= DRR_RAW_BYTESWAP; + drrs->drr_compressiontype = BP_GET_COMPRESS(bp); + drrs->drr_compressed_size = BP_GET_PSIZE(bp); + zio_crypt_decode_params_bp(bp, drrs->drr_salt, drrs->drr_iv); + zio_crypt_decode_mac_bp(bp, drrs->drr_mac); + } + if (dump_record(dsp, data, blksz) != 0) return (SET_ERROR(EINTR)); return (0); @@ -430,9 +464,11 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) } static int -dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) +dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, + dnode_phys_t *dnp) { struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); + int bonuslen = P2ROUNDUP(dnp->dn_bonuslen, 8); if (object < dsp->dsa_resume_object) { /* @@ -472,11 +508,22 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE) drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE; - if (dump_record(dsp, DN_BONUS(dnp), - P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) { - return (SET_ERROR(EINTR)); + if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) && + BP_IS_ENCRYPTED(bp)) { + drro->drr_flags |= DRR_RAW_ENCRYPTED; + if (BP_SHOULD_BYTESWAP(bp)) + drro->drr_flags |= DRR_RAW_BYTESWAP; + + /* raw bonus buffers extend to the end of the dnp */ + if (bonuslen != 0) { + drro->drr_raw_bonuslen = DN_MAX_BONUS_LEN(dnp); + bonuslen = drro->drr_raw_bonuslen; + } } + if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0) + return (SET_ERROR(EINTR)); + /* Free anything past the end of the file. */ if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) @@ -486,6 +533,40 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) return (0); } +static int +dump_object_range(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t firstobj, + uint64_t numslots) +{ + struct drr_object_range *drror = + &(dsp->dsa_drr->drr_u.drr_object_range); + + /* we only use this for raw sends */ + ASSERT(BP_IS_ENCRYPTED(bp)); + ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW); + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_record(dsp, NULL, 0) != 0) + return (SET_ERROR(EINTR)); + dsp->dsa_pending_op = PENDING_NONE; + } + + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_OBJECT_RANGE; + drror->drr_firstobj = firstobj; + drror->drr_numslots = numslots; + drror->drr_toguid = dsp->dsa_toguid; + drror->drr_flags |= DRR_RAW_ENCRYPTED; + if (BP_SHOULD_BYTESWAP(bp)) + drror->drr_flags |= DRR_RAW_BYTESWAP; + zio_crypt_decode_params_bp(bp, drror->drr_salt, drror->drr_iv); + zio_crypt_decode_mac_bp(bp, drror->drr_mac); + + if (dump_record(dsp, NULL, 0) != 0) + return (SET_ERROR(EINTR)); + return (0); +} + static boolean_t backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp) { @@ -616,36 +697,57 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { - int blksz = BP_GET_LSIZE(bp); + int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; + enum zio_flag zioflags = ZIO_FLAG_CANFAIL; + + if ((dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) && + BP_IS_ENCRYPTED(bp)) { + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + zioflags |= ZIO_FLAG_RAW; + } ASSERT0(zb->zb_level); if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, - &aflags, zb) != 0) + ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) return (SET_ERROR(EIO)); dnode_phys_t *blk = abuf->b_data; - uint64_t dnobj = zb->zb_blkid * (blksz >> DNODE_SHIFT); - for (int i = 0; i < blksz >> DNODE_SHIFT; i++) { - err = dump_dnode(dsa, dnobj + i, blk + i); - if (err != 0) - break; + uint64_t dnobj = zb->zb_blkid * epb; + + /* + * Raw sends require sending encryption parameters for the + * block of dnodes. Regular sends do not need to send this + * info. + */ + if (arc_is_encrypted(abuf)) + err = dump_object_range(dsa, bp, dnobj, epb); + + if (err == 0) { + for (int i = 0; i < epb; + i += blk[i].dn_extra_slots + 1) { + err = dump_dnode(dsa, bp, dnobj + i, blk + i); + if (err != 0) + break; + } } arc_buf_destroy(abuf, &abuf); } else if (type == DMU_OT_SA) { arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; - int blksz = BP_GET_LSIZE(bp); + enum zio_flag zioflags = ZIO_FLAG_CANFAIL; + + if ((dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) && + BP_IS_ENCRYPTED(bp)) + zioflags |= ZIO_FLAG_RAW; if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, - &aflags, zb) != 0) + ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) return (SET_ERROR(EIO)); - err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data); + err = dump_spill(dsa, bp, zb->zb_object, abuf->b_data); arc_buf_destroy(abuf, &abuf); } else if (backup_do_embed(dsa, bp)) { /* it's an embedded level-0 block of a regular object */ @@ -682,6 +784,19 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) !split_large_blocks && !BP_SHOULD_BYTESWAP(bp) && !BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp)); + /* + * Raw sends only apply to encrypted data. Unencrypted metadata + * in an encrypted dataset is sent normally. Raw sends are + * mutually exclusive with splitting large blocks and compressed + * sends, so we assert that here. + */ + boolean_t request_raw = + (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) && + BP_IS_ENCRYPTED(bp); + + IMPLY(request_raw, !request_compressed); + IMPLY(request_raw, !split_large_blocks); + IMPLY(request_raw, !BP_IS_EMBEDDED(bp)); ASSERT0(zb->zb_level); ASSERT(zb->zb_object > dsa->dsa_resume_object || (zb->zb_object == dsa->dsa_resume_object && @@ -696,7 +811,10 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) enum zio_flag zioflags = ZIO_FLAG_CANFAIL; if (request_compressed) + zioflags |= ZIO_FLAG_RAW_COMPRESS; + else if (request_raw) zioflags |= ZIO_FLAG_RAW; + if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) { if (zfs_send_corrupt_data) { @@ -716,20 +834,22 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) offset = zb->zb_blkid * blksz; if (split_large_blocks) { + ASSERT0(arc_is_encrypted(abuf)); ASSERT3U(arc_get_compression(abuf), ==, ZIO_COMPRESS_OFF); char *buf = abuf->b_data; while (blksz > 0 && err == 0) { int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE); err = dump_write(dsa, type, zb->zb_object, - offset, n, n, NULL, buf); + offset, B_FALSE, n, n, NULL, buf); offset += n; buf += n; blksz -= n; } } else { err = dump_write(dsa, type, zb->zb_object, offset, - blksz, arc_buf_size(abuf), bp, abuf->b_data); + arc_is_encrypted(abuf), blksz, arc_buf_size(abuf), + bp, abuf->b_data); } arc_buf_destroy(abuf, &abuf); } @@ -758,7 +878,7 @@ static int dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, uint64_t resumeobj, uint64_t resumeoff, + boolean_t rawok, int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp, offset_t *off) { objset_t *os; @@ -769,6 +889,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, uint64_t featureflags = 0; struct send_thread_arg to_arg = { 0 }; + ASSERT0(rawok && compressok); + err = dmu_objset_from_ds(to_ds, &os); if (err != 0) { dsl_pool_rele(dp, tag); @@ -795,7 +917,9 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, } #endif - if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS]) + /* raw sends imply large_block_ok */ + if ((large_block_ok || rawok) && + to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS]) featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS; if (embedok && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) { @@ -803,9 +927,13 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) featureflags |= DMU_BACKUP_FEATURE_LZ4; } - if (compressok) { + + if (compressok || (rawok && !os->os_encrypted)) { featureflags |= DMU_BACKUP_FEATURE_COMPRESSED; + } else if (rawok && os->os_encrypted) { + featureflags |= DMU_BACKUP_FEATURE_RAW; } + if ((featureflags & (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED)) != 0 && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) { @@ -864,19 +992,43 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, void *payload = NULL; size_t payload_len = 0; - if (resumeobj != 0 || resumeoff != 0) { - dmu_object_info_t to_doi; - err = dmu_object_info(os, resumeobj, &to_doi); - if (err != 0) - goto out; - SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0, - resumeoff / to_doi.doi_data_block_size); - + /* handle features that require a DRR_BEGIN payload */ + if (featureflags & + (DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_RAW)) { + nvlist_t *keynvl = NULL; nvlist_t *nvl = fnvlist_alloc(); - fnvlist_add_uint64(nvl, "resume_object", resumeobj); - fnvlist_add_uint64(nvl, "resume_offset", resumeoff); + + if (featureflags & DMU_BACKUP_FEATURE_RESUMING) { + dmu_object_info_t to_doi; + err = dmu_object_info(os, resumeobj, &to_doi); + if (err != 0) { + fnvlist_free(nvl); + goto out; + } + + SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, + resumeobj, 0, + resumeoff / to_doi.doi_data_block_size); + + fnvlist_add_uint64(nvl, "resume_object", resumeobj); + fnvlist_add_uint64(nvl, "resume_offset", resumeoff); + } + + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(os->os_encrypted); + + err = dsl_crypto_populate_key_nvlist(to_ds, &keynvl); + if (err != 0) { + fnvlist_free(nvl); + goto out; + } + + fnvlist_add_nvlist(nvl, "crypt_keydata", keynvl); + } + payload = fnvlist_pack(nvl, &payload_len); drr->drr_payloadlen = payload_len; + fnvlist_free(keynvl); fnvlist_free(nvl); } @@ -894,6 +1046,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, to_arg.ds = to_ds; to_arg.fromtxg = fromtxg; to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH; + if (rawok) + to_arg.flags |= TRAVERSE_NO_DECRYPT; (void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, curproc, TS_RUN, minclsyspri); @@ -940,7 +1094,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, if (dump_record(dsp, NULL, 0) != 0) err = dsp->dsa_err; - out: mutex_enter(&to_ds->ds_sendstream_lock); list_remove(&to_ds->ds_sendstreams, dsp); @@ -959,18 +1112,19 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, vnode_t *vp, offset_t *off) + boolean_t rawok, int outfd, vnode_t *vp, offset_t *off) { dsl_pool_t *dp; dsl_dataset_t *ds; dsl_dataset_t *fromds = NULL; + ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT; int err; err = dsl_pool_hold(pool, FTAG, &dp); if (err != 0) return (err); - err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); + err = dsl_dataset_hold_obj_flags(dp, tosnap, dsflags, FTAG, &ds); if (err != 0) { dsl_pool_rele(dp, FTAG); return (err); @@ -982,7 +1136,7 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); if (err != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); dsl_pool_rele(dp, FTAG); return (err); } @@ -995,24 +1149,27 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, is_clone = (fromds->ds_dir != ds->ds_dir); dsl_dataset_rele(fromds, FTAG); err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, - embedok, large_block_ok, compressok, outfd, 0, 0, vp, off); + embedok, large_block_ok, compressok, rawok, outfd, + 0, 0, vp, off); } else { err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, - embedok, large_block_ok, compressok, outfd, 0, 0, vp, off); + embedok, large_block_ok, compressok, rawok, outfd, + 0, 0, vp, off); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (err); } int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, - boolean_t large_block_ok, boolean_t compressok, int outfd, - uint64_t resumeobj, uint64_t resumeoff, - vnode_t *vp, offset_t *off) + boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, + int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp, + offset_t *off) { dsl_pool_t *dp; dsl_dataset_t *ds; int err; + ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT; boolean_t owned = B_FALSE; if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) @@ -1027,10 +1184,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, * We are sending a filesystem or volume. Ensure * that it doesn't change by owning the dataset. */ - err = dsl_dataset_own(dp, tosnap, FTAG, &ds); + err = dsl_dataset_own(dp, tosnap, dsflags, FTAG, &ds); owned = B_TRUE; } else { - err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); + err = dsl_dataset_hold_flags(dp, tosnap, dsflags, FTAG, &ds); } if (err != 0) { dsl_pool_rele(dp, FTAG); @@ -1070,22 +1227,27 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); } if (err != 0) { - dsl_dataset_rele(ds, FTAG); + if (owned) + dsl_dataset_disown(ds, dsflags, FTAG); + else + dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_pool_rele(dp, FTAG); return (err); } err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, - embedok, large_block_ok, compressok, + embedok, large_block_ok, compressok, rawok, outfd, resumeobj, resumeoff, vp, off); } else { err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, - embedok, large_block_ok, compressok, + embedok, large_block_ok, compressok, rawok, outfd, resumeobj, resumeoff, vp, off); } if (owned) - dsl_dataset_disown(ds, FTAG); + dsl_dataset_disown(ds, dsflags, FTAG); else - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); + return (err); } @@ -1227,7 +1389,8 @@ dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg, * traverse the blocks of the snapshot with birth times after * from_txg, summing their uncompressed size */ - err = traverse_dataset(ds, from_txg, TRAVERSE_POST, + err = traverse_dataset(ds, from_txg, + TRAVERSE_POST | TRAVERSE_NO_DECRYPT, dmu_calculate_send_traversal, &size); if (err) return (err); @@ -1338,6 +1501,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) struct drr_begin *drrb = drba->drba_cookie->drc_drrb; uint64_t fromguid = drrb->drr_fromguid; int flags = drrb->drr_flags; + ds_hold_flags_t dsflags = 0; int error; uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; @@ -1353,6 +1517,11 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) return (SET_ERROR(EINVAL)); + /* Raw streams are mutually exclusive with compressed streams */ + if ((featureflags & DMU_BACKUP_FEATURE_COMPRESSED) && + (featureflags & DMU_BACKUP_FEATURE_RAW)) + return (SET_ERROR(EINVAL)); + /* Verify pool version supports SA if SA_SPILL feature set */ if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && spa_version(dp->dp_spa) < SPA_VERSION_SA) @@ -1384,18 +1553,33 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS)) return (SET_ERROR(ENOTSUP)); - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if ((featureflags & DMU_BACKUP_FEATURE_RAW)) { + /* raw receives require the encryption feature */ + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) + return (SET_ERROR(ENOTSUP)); + + /* + * Raw receives cannot specify an origin snapshot because we + * cannot ensure the keys match. + */ + if (drba->drba_origin != NULL) + return (SET_ERROR(EINVAL)); + } else { + dsflags |= DS_HOLD_FLAG_DECRYPT; + } + + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error == 0) { /* target fs already exists; recv into temp clone */ /* Can't recv a clone into an existing fs */ if (flags & DRR_FLAG_CLONE || drba->drba_origin) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } error = recv_begin_check_existing_impl(drba, ds, fromguid); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); } else if (error == ENOENT) { /* target fs does not exist; must be a full backup or clone */ char buf[ZFS_MAX_DATASET_NAME_LEN]; @@ -1420,7 +1604,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* Open the parent of tofs */ ASSERT3U(strlen(tofs), <, sizeof (buf)); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); - error = dsl_dataset_hold(dp, buf, FTAG, &ds); + error = dsl_dataset_hold_flags(dp, buf, dsflags, FTAG, &ds); if (error != 0) return (error); @@ -1432,39 +1616,47 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } if (drba->drba_origin != NULL) { dsl_dataset_t *origin; - error = dsl_dataset_hold(dp, drba->drba_origin, - FTAG, &origin); + + /* + * We hold origin with DS_HOLD_FLAG_DECRYPT so that we + * can check that the key is loaded for cloning. + */ + error = dsl_dataset_hold_flags(dp, drba->drba_origin, + DS_HOLD_FLAG_DECRYPT, FTAG, &origin); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } if (!origin->ds_is_snapshot) { - dsl_dataset_rele(origin, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(origin, + DS_HOLD_FLAG_DECRYPT, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } if (dsl_dataset_phys(origin)->ds_guid != fromguid && fromguid != 0) { - dsl_dataset_rele(origin, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(origin, + DS_HOLD_FLAG_DECRYPT, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(ENODEV)); } - dsl_dataset_rele(origin, FTAG); + dsl_dataset_rele_flags(origin, + DS_HOLD_FLAG_DECRYPT, FTAG); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); error = 0; } return (error); @@ -1478,15 +1670,23 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) objset_t *mos = dp->dp_meta_objset; struct drr_begin *drrb = drba->drba_cookie->drc_drrb; const char *tofs = drba->drba_cookie->drc_tofs; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds, *newds; uint64_t dsobj; + ds_hold_flags_t dsflags = 0; int error; uint64_t crflags = 0; + dsl_crypto_params_t dcp = { 0 }; if (drrb->drr_flags & DRR_FLAG_CI_DATA) crflags |= DS_FLAG_CI_DATASET; + if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0) { + dsflags |= DS_HOLD_FLAG_DECRYPT; + } else { + dcp.cp_flags |= DCP_FLAG_RAW_RECV; + } - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error == 0) { /* create temporary clone */ dsl_dataset_t *snap = NULL; @@ -1495,10 +1695,10 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) drba->drba_snapobj, FTAG, &snap)); } dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, - snap, crflags, drba->drba_cred, tx); + snap, crflags, drba->drba_cred, &dcp, tx); if (drba->drba_snapobj != 0) dsl_dataset_rele(snap, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); } else { dsl_dir_t *dd; const char *tail; @@ -1514,13 +1714,13 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) /* Create new dataset. */ dsobj = dsl_dataset_create_sync(dd, strrchr(tofs, '/') + 1, - origin, crflags, drba->drba_cred, tx); + origin, crflags, drba->drba_cred, &dcp, tx); if (origin != NULL) dsl_dataset_rele(origin, FTAG); dsl_dir_rele(dd, FTAG); drba->drba_cookie->drc_newfs = B_TRUE; } - VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &newds)); if (drba->drba_cookie->drc_resumable) { dsl_dataset_zapify(newds, tx); @@ -1540,21 +1740,34 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 8, 1, &zero, tx)); VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES, 8, 1, &zero, tx)); - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_LARGE_BLOCKS) { + if (featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_LARGEBLOCK, 8, 1, &one, tx)); } - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_EMBED_DATA) { + if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK, 8, 1, &one, tx)); } - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_COMPRESSED) { + if (featureflags & DMU_BACKUP_FEATURE_COMPRESSED) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_COMPRESSOK, 8, 1, &one, tx)); } + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_RAWOK, + 8, 1, &one, tx)); + } + } + + /* + * Usually the os->os_encrypted value is tied to the presence of a + * DSL Crypto Key object in the dd. However, that will not be received + * until dmu_recv_stream(), so we set the value manually for now. + */ + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + objset_t *os; + VERIFY0(dmu_objset_from_ds(newds, &os)); + os->os_encrypted = B_TRUE; + drba->drba_cookie->drc_raw = B_TRUE; } dmu_buf_will_dirty(newds->ds_dbuf, tx); @@ -1583,6 +1796,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) dsl_pool_t *dp = dmu_tx_pool(tx); struct drr_begin *drrb = drba->drba_cookie->drc_drrb; int error; + ds_hold_flags_t dsflags = 0; uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; const char *tofs = drba->drba_cookie->drc_tofs; @@ -1596,6 +1810,11 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) drrb->drr_type >= DMU_OST_NUMTYPES) return (SET_ERROR(EINVAL)); + /* Raw streams are mutually exclusive with compressed streams */ + if ((featureflags & DMU_BACKUP_FEATURE_COMPRESSED) && + (featureflags & DMU_BACKUP_FEATURE_RAW)) + return (SET_ERROR(EINVAL)); + /* Verify pool version supports SA if SA_SPILL feature set */ if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && spa_version(dp->dp_spa) < SPA_VERSION_SA) @@ -1620,29 +1839,32 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) (void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs, recv_clone_name); - if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) { + if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0) + dsflags |= DS_HOLD_FLAG_DECRYPT; + + if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) { /* %recv does not exist; continue in tofs */ - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error != 0) return (error); } /* check that ds is marked inconsistent */ if (!DS_IS_INCONSISTENT(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } /* check that there is resuming data, and that the toguid matches */ if (!dsl_dataset_is_zapified(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } uint64_t val; error = zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val); if (error != 0 || drrb->drr_toguid != val) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } @@ -1652,13 +1874,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) * fails) because it will be marked inconsistent. */ if (dsl_dataset_has_owner(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EBUSY)); } /* There should not be any snapshots of this fs yet. */ if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } @@ -1672,11 +1894,11 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) (void) zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val); if (drrb->drr_fromguid != val) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (0); } @@ -1686,7 +1908,10 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) dmu_recv_begin_arg_t *drba = arg; dsl_pool_t *dp = dmu_tx_pool(tx); const char *tofs = drba->drba_cookie->drc_tofs; + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; + ds_hold_flags_t dsflags = 0; uint64_t dsobj; /* 6 extra bytes for /%recv */ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; @@ -1694,9 +1919,15 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) (void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs, recv_clone_name); - if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) { + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + drba->drba_cookie->drc_raw = B_TRUE; + } else { + dsflags |= DS_HOLD_FLAG_DECRYPT; + } + + if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) { /* %recv does not exist; continue in tofs */ - VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds)); drba->drba_cookie->drc_newfs = B_TRUE; } @@ -1705,9 +1936,9 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT; dsobj = ds->ds_object; - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); - VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds)); + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &ds)); dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; @@ -1775,7 +2006,7 @@ struct receive_record_arg { * If the record is a write, pointer to the arc_buf_t containing the * payload. */ - arc_buf_t *write_buf; + arc_buf_t *arc_buf; int payload_size; uint64_t bytes_read; /* bytes read from stream when record created */ boolean_t eos_marker; /* Marks the end of the stream */ @@ -1833,12 +2064,14 @@ struct receive_arg { zio_cksum_t prev_cksum; int err; boolean_t byteswap; + uint64_t featureflags; /* Sorted list of objects not to issue prefetches for. */ struct objlist ignore_objlist; }; typedef struct guid_map_entry { uint64_t guid; + boolean_t raw; dsl_dataset_t *gme_ds; avl_node_t avlnode; } guid_map_entry_t; @@ -1865,7 +2098,8 @@ free_guid_map_onexit(void *arg) while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { dsl_dataset_long_rele(gmep->gme_ds, gmep); - dsl_dataset_rele(gmep->gme_ds, gmep); + dsl_dataset_rele_flags(gmep->gme_ds, + (gmep->raw) ? 0 : DS_HOLD_FLAG_DECRYPT, gmep); kmem_free(gmep, sizeof (guid_map_entry_t)); } avl_destroy(ca); @@ -1881,7 +2115,8 @@ receive_read(struct receive_arg *ra, int len, void *buf) * The code doesn't rely on this (lengths being multiples of 8). See * comment in dump_bytes. */ - ASSERT0(len % 8); + ASSERT(len % 8 == 0 || + (ra->featureflags & DMU_BACKUP_FEATURE_RAW) != 0); while (done < len) { ssize_t resid; @@ -1934,6 +2169,7 @@ byteswap_record(dmu_replay_record_t *drr) DO32(drr_object.drr_bonustype); DO32(drr_object.drr_blksz); DO32(drr_object.drr_bonuslen); + DO32(drr_object.drr_raw_bonuslen); DO64(drr_object.drr_toguid); break; case DRR_FREEOBJECTS: @@ -1981,6 +2217,12 @@ byteswap_record(dmu_replay_record_t *drr) DO64(drr_spill.drr_object); DO64(drr_spill.drr_length); DO64(drr_spill.drr_toguid); + DO32(drr_spill.drr_type); + break; + case DRR_OBJECT_RANGE: + DO64(drr_object_range.drr_firstobj); + DO64(drr_object_range.drr_numslots); + DO64(drr_object_range.drr_toguid); break; case DRR_END: DO64(drr_end.drr_toguid); @@ -2065,6 +2307,16 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, return (SET_ERROR(EINVAL)); } + if (DRR_IS_RAW_ENCRYPTED(drro->drr_flags)) { + if (drro->drr_raw_bonuslen < drro->drr_bonuslen || + DN_SLOTS_TO_BONUSLEN(drro->drr_dn_slots) < + drro->drr_raw_bonuslen) + return (SET_ERROR(EINVAL)); + } else { + if (drro->drr_flags != 0 && drro->drr_raw_bonuslen != 0) + return (SET_ERROR(EINVAL)); + } + err = dmu_object_info(rwa->os, drro->drr_object, &doi); if (err != 0 && err != ENOENT) @@ -2125,17 +2377,27 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, if (data != NULL) { dmu_buf_t *db; + uint32_t flags = DMU_READ_NO_PREFETCH; + + if ((drro->drr_flags & DRR_RAW_ENCRYPTED) != 0) + flags |= DMU_READ_NO_DECRYPT; - VERIFY0(dmu_bonus_hold(rwa->os, drro->drr_object, FTAG, &db)); + VERIFY0(dmu_bonus_hold_impl(rwa->os, drro->drr_object, + FTAG, flags, &db)); dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, drro->drr_bonuslen); - bcopy(data, db->db_data, drro->drr_bonuslen); - if (rwa->byteswap) { + bcopy(data, db->db_data, DRR_OBJECT_PAYLOAD_SIZE(drro)); + + /* + * Raw bonus buffers have their byteorder determined by the + * DRR_OBJECT_RANGE record. + */ + if (rwa->byteswap && !(drro->drr_flags & DRR_RAW_ENCRYPTED)) { dmu_object_byteswap_t byteswap = DMU_OT_BYTESWAP(drro->drr_bonustype); dmu_ot_byteswap[byteswap].ob_func(db->db_data, - drro->drr_bonuslen); + DRR_OBJECT_PAYLOAD_SIZE(drro)); } dmu_buf_rele(db, FTAG); } @@ -2207,7 +2469,8 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, dmu_tx_abort(tx); return (err); } - if (rwa->byteswap) { + if (rwa->byteswap && !arc_is_encrypted(abuf) && + arc_get_compression(abuf) == ZIO_COMPRESS_OFF) { dmu_object_byteswap_t byteswap = DMU_OT_BYTESWAP(drrw->drr_type); dmu_ot_byteswap[byteswap].ob_func(abuf->b_data, @@ -2250,6 +2513,7 @@ receive_write_byref(struct receive_writer_arg *rwa, guid_map_entry_t *gmep; avl_index_t where; objset_t *ref_os = NULL; + int flags = DMU_READ_PREFETCH; dmu_buf_t *dbp; if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) @@ -2271,8 +2535,13 @@ receive_write_byref(struct receive_writer_arg *rwa, ref_os = rwa->os; } + if (DRR_IS_RAW_ENCRYPTED(drrwbr->drr_flags)) { + flags |= DMU_READ_NO_DECRYPT; + } + + /* may return either a regular db or an encrypted one */ err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, - drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); + drrwbr->drr_refoffset, FTAG, &dbp, flags); if (err != 0) return (err); @@ -2285,8 +2554,14 @@ receive_write_byref(struct receive_writer_arg *rwa, dmu_tx_abort(tx); return (err); } - dmu_write(rwa->os, drrwbr->drr_object, - drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); + + if (DRR_IS_RAW_ENCRYPTED(drrwbr->drr_flags)) { + dmu_copy_from_buf(rwa->os, drrwbr->drr_object, + drrwbr->drr_offset, dbp, tx); + } else { + dmu_write(rwa->os, drrwbr->drr_object, + drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); + } dmu_buf_rele(dbp, FTAG); /* See comment in restore_write. */ @@ -2336,7 +2611,7 @@ receive_write_embedded(struct receive_writer_arg *rwa, static int receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, - void *data) + arc_buf_t *abuf) { dmu_tx_t *tx; dmu_buf_t *db, *db_spill; @@ -2346,6 +2621,13 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os))) return (SET_ERROR(EINVAL)); + if (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags)) { + if (!DMU_OT_IS_VALID(drrs->drr_type) || + drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS || + drrs->drr_compressed_size == 0) + return (SET_ERROR(EINVAL)); + } + if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); @@ -2371,7 +2653,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); - bcopy(data, db_spill->db_data, drrs->drr_length); + dmu_assign_arcbuf_impl(db_spill, abuf, tx); dmu_buf_rele(db, FTAG); dmu_buf_rele(db_spill, FTAG); @@ -2399,18 +2681,73 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) return (err); } +static int +receive_object_range(struct receive_writer_arg *rwa, + struct drr_object_range *drror) +{ + int ret; + dmu_tx_t *tx; + dnode_t *mdn = NULL; + dmu_buf_t *db = NULL; + uint64_t offset; + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ + !!DRR_IS_RAW_BYTESWAPPED(drror->drr_flags) ^ + rwa->byteswap; + + /* + * Since dnode block sizes are constant, we should not need to worry + * about making sure that the dnode block size is the same on the + * sending and receiving sides for the time being. For non-raw sends, + * this does not matter (and in fact we do not send a DRR_OBJECT_RANGE + * record at all). Raw sends require this record type because the + * encryption parameters are used to protect an entire block of bonus + * buffers. If the size of dnode blocks ever becomes variable, + * handling will need to be added to ensure that dnode block sizes + * match on the sending and receiving side. + */ + if (drror->drr_numslots != DNODES_PER_BLOCK || + drror->drr_numslots - drror->drr_firstobj != DNODES_PER_BLOCK || + (drror->drr_flags & DRR_RAW_ENCRYPTED) == 0) + return (SET_ERROR(EINVAL)); + + offset = drror->drr_firstobj * sizeof (dnode_phys_t); + mdn = DMU_META_DNODE(rwa->os); + + tx = dmu_tx_create(rwa->os); + ret = dmu_tx_assign(tx, TXG_WAIT); + if (ret != 0) { + dmu_tx_abort(tx); + return (ret); + } + + ret = dmu_buf_hold_by_dnode(mdn, offset, FTAG, &db, DMU_READ_PREFETCH); + if (ret != 0) { + dmu_tx_commit(tx); + return (ret); + } + + dmu_buf_will_dirty(db, tx); + dmu_convert_to_raw(db, byteorder, drror->drr_salt, drror->drr_iv, + drror->drr_mac); + dmu_buf_rele(db, FTAG); + dmu_tx_commit(tx); + return (0); +} + /* used to destroy the drc_ds on error */ static void dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) { + ds_hold_flags_t dsflags = DS_HOLD_FLAG_DECRYPT; + if (drc->drc_resumable) { /* wait for our resume state to be written to disk */ txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0); - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + dsl_dataset_disown(drc->drc_ds, dsflags, dmu_recv_tag); } else { char name[ZFS_MAX_DATASET_NAME_LEN]; dsl_dataset_name(drc->drc_ds, name); - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + dsl_dataset_disown(drc->drc_ds, dsflags, dmu_recv_tag); (void) dsl_destroy_head(name); } } @@ -2458,6 +2795,7 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf) err = receive_read(ra, sizeof (ra->next_rrd->header), &ra->next_rrd->header); ra->next_rrd->bytes_read = ra->bytes_read; + if (err != 0) { kmem_free(ra->next_rrd, sizeof (*ra->next_rrd)); ra->next_rrd = NULL; @@ -2599,9 +2937,10 @@ receive_read_record(struct receive_arg *ra) case DRR_OBJECT: { struct drr_object *drro = &ra->rrd->header.drr_u.drr_object; - uint32_t size = P2ROUNDUP(drro->drr_bonuslen, 8); + uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro); void *buf = kmem_zalloc(size, KM_SLEEP); dmu_object_info_t doi; + err = receive_read_payload_and_next_header(ra, size, buf); if (err != 0) { kmem_free(buf, size); @@ -2629,7 +2968,18 @@ receive_read_record(struct receive_arg *ra) struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write; arc_buf_t *abuf; boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type); - if (DRR_WRITE_COMPRESSED(drrw)) { + + if (DRR_IS_RAW_ENCRYPTED(drrw->drr_flags)) { + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ + !!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^ + ra->byteswap; + + abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os), + drrw->drr_object, byteorder, drrw->drr_salt, + drrw->drr_iv, drrw->drr_mac, drrw->drr_type, + drrw->drr_compressed_size, drrw->drr_logical_size, + drrw->drr_compressiontype); + } else if (DRR_WRITE_COMPRESSED(drrw)) { ASSERT3U(drrw->drr_compressed_size, >, 0); ASSERT3U(drrw->drr_logical_size, >=, drrw->drr_compressed_size); @@ -2649,7 +2999,7 @@ receive_read_record(struct receive_arg *ra) dmu_return_arcbuf(abuf); return (err); } - ra->rrd->write_buf = abuf; + ra->rrd->arc_buf = abuf; receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset, drrw->drr_logical_size); return (err); @@ -2699,11 +3049,38 @@ receive_read_record(struct receive_arg *ra) case DRR_SPILL: { struct drr_spill *drrs = &ra->rrd->header.drr_u.drr_spill; - void *buf = kmem_zalloc(drrs->drr_length, KM_SLEEP); - err = receive_read_payload_and_next_header(ra, drrs->drr_length, - buf); - if (err != 0) - kmem_free(buf, drrs->drr_length); + arc_buf_t *abuf; + int len = DRR_SPILL_PAYLOAD_SIZE(drrs); + + /* DRR_SPILL records are either raw or uncompressed */ + if (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags)) { + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ + !!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^ + ra->byteswap; + + abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os), + drrs->drr_object, byteorder, drrs->drr_salt, + drrs->drr_iv, drrs->drr_mac, drrs->drr_type, + drrs->drr_compressed_size, drrs->drr_length, + drrs->drr_compressiontype); + } else { + abuf = arc_loan_buf(dmu_objset_spa(ra->os), + DMU_OT_IS_METADATA(drrs->drr_type), + drrs->drr_length); + } + + err = receive_read_payload_and_next_header(ra, len, + abuf->b_data); + if (err != 0) { + dmu_return_arcbuf(abuf); + return (err); + } + ra->rrd->arc_buf = abuf; + return (err); + } + case DRR_OBJECT_RANGE: + { + err = receive_read_payload_and_next_header(ra, 0, NULL); return (err); } default: @@ -2742,11 +3119,11 @@ receive_process_record(struct receive_writer_arg *rwa, case DRR_WRITE: { struct drr_write *drrw = &rrd->header.drr_u.drr_write; - err = receive_write(rwa, drrw, rrd->write_buf); + err = receive_write(rwa, drrw, rrd->arc_buf); /* if receive_write() is successful, it consumes the arc_buf */ if (err != 0) - dmu_return_arcbuf(rrd->write_buf); - rrd->write_buf = NULL; + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; return (err); } @@ -2773,11 +3150,20 @@ receive_process_record(struct receive_writer_arg *rwa, case DRR_SPILL: { struct drr_spill *drrs = &rrd->header.drr_u.drr_spill; - err = receive_spill(rwa, drrs, rrd->payload); - kmem_free(rrd->payload, rrd->payload_size); + err = receive_spill(rwa, drrs, rrd->arc_buf); + /* if receive_spill() is successful, it consumes the arc_buf */ + if (err != 0) + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; return (err); } + case DRR_OBJECT_RANGE: + { + struct drr_object_range *drror = + &rrd->header.drr_u.drr_object_range; + return (receive_object_range(rwa, drror)); + } default: return (SET_ERROR(EINVAL)); } @@ -2801,9 +3187,9 @@ receive_writer_thread(void *arg) */ if (rwa->err == 0) { rwa->err = receive_process_record(rwa, rrd); - } else if (rrd->write_buf != NULL) { - dmu_return_arcbuf(rrd->write_buf); - rrd->write_buf = NULL; + } else if (rrd->arc_buf != NULL) { + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; } else if (rrd->payload != NULL) { kmem_free(rrd->payload, rrd->payload_size); @@ -2893,6 +3279,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT); featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); + ra.featureflags = featureflags; /* if this stream is dedup'ed, set up the avl tree for guid mapping */ if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { @@ -2947,6 +3334,23 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, goto out; } + /* handle DSL encryption key payload */ + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + nvlist_t *keynvl = NULL; + + ASSERT(ra.os->os_encrypted); + ASSERT(drc->drc_raw); + + err = nvlist_lookup_nvlist(begin_nvl, "crypt_keydata", &keynvl); + if (err != 0) + goto out; + + err = dsl_crypto_recv_key(spa_name(ra.os->os_spa), + drc->drc_ds->ds_object, keynvl); + if (err != 0) + goto out; + } + if (featureflags & DMU_BACKUP_FEATURE_RESUMING) { err = resume_check(&ra, begin_nvl); if (err != 0) @@ -3201,21 +3605,31 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj; } + /* * Release the hold from dmu_recv_begin. This must be done before - * we return to open context, so that when we free the dataset's dnode, - * we can evict its bonus buffer. + * we return to open context, so that when we free the dataset's dnode + * we can evict its bonus buffer. Since the dataset may be destroyed + * at this point (and therefore won't have a valid pointer to the spa) + * we release the key mapping manually here while we do have a valid + * pointer. */ - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + if (!drc->drc_raw) { + (void) spa_keystore_remove_mapping(dmu_tx_pool(tx)->dp_spa, + drc->drc_ds->ds_object, drc->drc_ds); + } + dsl_dataset_disown(drc->drc_ds, 0, dmu_recv_tag); drc->drc_ds = NULL; } static int -add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) +add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj, + boolean_t raw) { dsl_pool_t *dp; dsl_dataset_t *snapds; guid_map_entry_t *gmep; + ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT; int err; ASSERT(guid_map != NULL); @@ -3224,9 +3638,10 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) if (err != 0) return (err); gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); - err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); + err = dsl_dataset_hold_obj_flags(dp, snapobj, dsflags, gmep, &snapds); if (err == 0) { gmep->guid = dsl_dataset_phys(snapds)->ds_guid; + gmep->raw = raw; gmep->gme_ds = snapds; avl_add(guid_map, gmep); dsl_dataset_long_hold(snapds, gmep); @@ -3281,9 +3696,8 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner) if (error != 0) { dmu_recv_cleanup_ds(drc); } else if (drc->drc_guid_to_ds_map != NULL) { - (void) add_ds_to_guidmap(drc->drc_tofs, - drc->drc_guid_to_ds_map, - drc->drc_newsnapobj); + (void) add_ds_to_guidmap(drc->drc_tofs, drc->drc_guid_to_ds_map, + drc->drc_newsnapobj, drc->drc_raw); } return (error); } diff --git a/usr/src/uts/common/fs/zfs/dmu_traverse.c b/usr/src/uts/common/fs/zfs/dmu_traverse.c index d3061a07f2c9..c3fbea455372 100644 --- a/usr/src/uts/common/fs/zfs/dmu_traverse.c +++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c @@ -132,7 +132,7 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh) zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, - claim_txg); + claim_txg, !(td->td_flags & TRAVERSE_NO_DECRYPT)); zil_free(zilog); } @@ -181,6 +181,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const blkptr_t *bp, const zbookmark_phys_t *zb) { arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; + int zio_flags = ZIO_FLAG_CANFAIL; if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) return; @@ -196,8 +197,11 @@ traverse_prefetch_metadata(traverse_data_t *td, if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) return; + if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_ENCRYPTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + (void) arc_read(NULL, td->td_spa, bp, NULL, NULL, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); } static boolean_t @@ -296,6 +300,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; + ASSERT(!BP_IS_ENCRYPTED(bp)); + err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err != 0) @@ -320,11 +326,18 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, } } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { arc_flags_t flags = ARC_FLAG_WAIT; + uint32_t zio_flags = ZIO_FLAG_CANFAIL; int i; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; + /* + * dnode blocks might have their bonus buffers encrypted, so + * we must be careful to honor TRAVERSE_NO_DECRYPT + */ + if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_ENCRYPTED(bp)) + zio_flags |= ZIO_FLAG_RAW; err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err != 0) goto post; dnode_phys_t *child_dnp = buf->b_data; @@ -344,6 +357,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { arc_flags_t flags = ARC_FLAG_WAIT; + ASSERT(!BP_IS_ENCRYPTED(bp)); + err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err != 0) @@ -494,6 +509,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { prefetch_data_t *pfd = arg; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; ASSERT(pfd->pd_bytes_fetched >= 0); @@ -512,8 +528,11 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, cv_broadcast(&pfd->pd_cv); mutex_exit(&pfd->pd_mtx); + if ((pfd->pd_flags & TRAVERSE_NO_DECRYPT) && BP_IS_ENCRYPTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + (void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb); + zio_flags, &aflags, zb); return (0); } diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c index b439037d161e..61bc9ce1cbbe 100644 --- a/usr/src/uts/common/fs/zfs/dnode.c +++ b/usr/src/uts/common/fs/zfs/dnode.c @@ -1112,7 +1112,12 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, rw_exit(&mdn->dn_struct_rwlock); if (db == NULL) return (SET_ERROR(EIO)); - err = dbuf_read(db, NULL, DB_RF_CANFAIL); + + /* + * We do not need to decrypt to read the dnode so it doesn't matter + * if we get the encrypted or decrypted version. + */ + err = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_NO_DECRYPT); if (err) { dbuf_rele(db, FTAG); return (err); @@ -1840,7 +1845,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, */ return (SET_ERROR(ESRCH)); } - error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT); + error = dbuf_read(db, NULL, + DB_RF_CANFAIL | DB_RF_HAVESTRUCT | DB_RF_NO_DECRYPT); if (error) { dbuf_rele(db, FTAG); return (error); diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index bfa92db7a954..d669e84fc6db 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -546,6 +546,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) void dnode_sync(dnode_t *dn, dmu_tx_t *tx) { + objset_t *os = dn->dn_objset; dnode_phys_t *dnp = dn->dn_phys; int txgoff = tx->tx_txg & TXG_MASK; list_t *list = &dn->dn_dirty_records[txgoff]; @@ -560,8 +561,11 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); - if (dmu_objset_userused_enabled(dn->dn_objset) && - !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { + /* do user accounting if it is enabled and this is not a raw recv */ + if (dmu_objset_userused_enabled(os) && + !DMU_OBJECT_IS_SPECIAL(dn->dn_object) && (!os->os_encrypted || + spa_keystore_lookup_key(os->os_spa, dmu_objset_id(os), + NULL, NULL) == 0)) { mutex_enter(&dn->dn_mtx); dn->dn_oldused = DN_USED_BYTES(dn->dn_phys); dn->dn_oldflags = dn->dn_phys->dn_flags; diff --git a/usr/src/uts/common/fs/zfs/dsl_crypt.c b/usr/src/uts/common/fs/zfs/dsl_crypt.c new file mode 100644 index 000000000000..270d4d38fcc6 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c @@ -0,0 +1,2266 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This file's primary purpose is for managing master encryption keys in + * memory and on disk. For more info on how these keys are used, see the + * block comment in zio_crypt.c. + * + * All master keys are stored encrypted on disk in the form of the DSL + * Crypto Key ZAP object. The binary key data in this object is always + * randomly generated and is encrypted with the user's secret key. This + * layer of indirection allows the user to change their key without + * needing to re-encrypt the entire dataset. The ZAP also holds on to the + * (non-encrypted) encryption algorithm identifier, IV, and MAC needed to + * safely decrypt the master key. For more info on the user's key see the + * block comment in libzfs_crypto.c + * + * In memory encryption keys are managed through the spa_keystore. The + * keystore consists of 3 AVL trees, which are as follows: + * + * The Wrapping Key Tree: + * The wrapping key (wkey) tree stores the user's keys that are fed into the + * kernel through 'zfs load-key' and related commands. Datasets inherit their + * parent's wkey, so they are refcounted. The wrapping keys remain in memory + * until they are explicitly unloaded (with "zfs unload-key"). Unloading is + * only possible when no datasets are using them (refcount=0). + * + * The DSL Crypto Key Tree: + * The DSL Crypto Keys are the in-memory representation of decrypted master + * keys. They are used by the functions in zio_crypt.c to perform encryption + * and decryption. The decrypted master key bit patterns are shared between + * all datasets within a "clone family", but each clone may use a different + * wrapping key. As a result, we maintain one of these structs for each clone + * to allow us to manage the loading and unloading of each separately. + * Snapshots of a given dataset, however, will share a DSL Crypto Key, so they + * are also refcounted. Once the refcount on a key hits zero, it is immediately + * zeroed out and freed. + * + * The Crypto Key Mapping Tree: + * The zio layer needs to lookup master keys by their dataset object id. Since + * the DSL Crypto Keys can belong to multiple datasets, we maintain a tree of + * dsl_key_mapping_t's which essentially just map the dataset object id to its + * appropriate DSL Crypto Key. The management for creating and destroying these + * mappings hooks into the code for owning and disowning datasets. Usually, + * there will only be one active dataset owner, but there are times + * (particularly during dataset creation and destruction) when this may not be + * true or the dataset may not be initialized enough to own. As a result, this + * object is also refcounted. + */ + +void +dsl_wrapping_key_hold(dsl_wrapping_key_t *wkey, void *tag) +{ + (void) refcount_add(&wkey->wk_refcnt, tag); +} + +void +dsl_wrapping_key_rele(dsl_wrapping_key_t *wkey, void *tag) +{ + (void) refcount_remove(&wkey->wk_refcnt, tag); +} + +void +dsl_wrapping_key_free(dsl_wrapping_key_t *wkey) +{ + ASSERT0(refcount_count(&wkey->wk_refcnt)); + + if (wkey->wk_key.ck_data) { + bzero(wkey->wk_key.ck_data, + BITS_TO_BYTES(wkey->wk_key.ck_length)); + kmem_free(wkey->wk_key.ck_data, + BITS_TO_BYTES(wkey->wk_key.ck_length)); + } + + refcount_destroy(&wkey->wk_refcnt); + kmem_free(wkey, sizeof (dsl_wrapping_key_t)); +} + +int +dsl_wrapping_key_create(uint8_t *wkeydata, dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_wrapping_key_t *wkey; + + /* allocate the wrapping key */ + wkey = kmem_alloc(sizeof (dsl_wrapping_key_t), KM_SLEEP); + if (!wkey) + return (SET_ERROR(ENOMEM)); + + /* allocate and initialize the underlying crypto key */ + wkey->wk_key.ck_data = kmem_alloc(WRAPPING_KEY_LEN, KM_SLEEP); + if (!wkey->wk_key.ck_data) { + ret = ENOMEM; + goto error; + } + + wkey->wk_key.ck_format = CRYPTO_KEY_RAW; + wkey->wk_key.ck_length = BYTES_TO_BITS(WRAPPING_KEY_LEN); + + /* copy the data */ + bcopy(wkeydata, wkey->wk_key.ck_data, WRAPPING_KEY_LEN); + + /* initialize the refcount */ + refcount_create(&wkey->wk_refcnt); + + *wkey_out = wkey; + return (0); + +error: + dsl_wrapping_key_free(wkey); + + *wkey_out = NULL; + return (ret); +} + +int +dsl_crypto_params_create_nvlist(nvlist_t *props, nvlist_t *crypto_args, + dsl_crypto_params_t **dcp_out) +{ + int ret; + boolean_t do_inherit = B_TRUE; + uint64_t crypt = ZIO_CRYPT_INHERIT; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + dsl_crypto_params_t *dcp = NULL; + dsl_wrapping_key_t *wkey = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeydata_len = 0; + char *keylocation = NULL; + + dcp = kmem_zalloc(sizeof (dsl_crypto_params_t), KM_SLEEP); + if (!dcp) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + /* get relevant properties from the nvlist */ + if (props != NULL) { + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); + if (ret == 0) + do_inherit = B_FALSE; + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + if (ret == 0) + do_inherit = B_FALSE; + + ret = nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + if (ret == 0) + do_inherit = B_FALSE; + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &dcp->cp_salt); + if (ret == 0) + do_inherit = B_FALSE; + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), + &dcp->cp_iters); + if (ret == 0) + do_inherit = B_FALSE; + } + + if (crypto_args != NULL) { + ret = nvlist_lookup_uint8_array(crypto_args, "wkeydata", + &wkeydata, &wkeydata_len); + if (ret == 0) + do_inherit = B_FALSE; + } + + /* no parameters are valid; results in inherited crypto settings */ + if (do_inherit) { + kmem_free(dcp, sizeof (dsl_crypto_params_t)); + *dcp_out = NULL; + return (0); + } + + /* check for valid crypt */ + if (crypt >= ZIO_CRYPT_FUNCTIONS) { + ret = SET_ERROR(EINVAL); + goto error; + } else { + dcp->cp_crypt = crypt; + } + + /* check for valid keyformat */ + if (keyformat >= ZFS_KEYFORMAT_FORMATS) { + ret = SET_ERROR(EINVAL); + goto error; + } else { + dcp->cp_keyformat = keyformat; + } + + /* check for a valid keylocation (of any kind) and copy it in */ + if (keylocation != NULL) { + if (!zfs_prop_valid_keylocation(keylocation, B_FALSE)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + dcp->cp_keylocation = spa_strdup(keylocation); + } + + /* check wrapping key length, if given */ + if (wkeydata != NULL && wkeydata_len != WRAPPING_KEY_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* specifying a keyformat requires keydata */ + if (keyformat != ZFS_KEYFORMAT_NONE && wkeydata == NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* if the user asked for the deault crypt, determine that now */ + if (dcp->cp_crypt == ZIO_CRYPT_ON) + dcp->cp_crypt = ZIO_CRYPT_ON_VALUE; + + /* create the wrapping key from the raw data */ + if (wkeydata != NULL) { + /* create the wrapping key with the verified parameters */ + ret = dsl_wrapping_key_create(wkeydata, &wkey); + if (ret != 0) + goto error; + + dcp->cp_wkey = wkey; + } + + /* + * Remove the encryption property from the nvlist since it is not + * maintained through the DSL. + */ + (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)); + + *dcp_out = dcp; + + return (0); + +error: + if (wkey != NULL) + dsl_wrapping_key_free(wkey); + if (dcp != NULL) + kmem_free(dcp, sizeof (dsl_crypto_params_t)); + + *dcp_out = NULL; + return (ret); +} + +void +dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload) +{ + if (dcp == NULL) + return; + + if (dcp->cp_keylocation != NULL) + spa_strfree(dcp->cp_keylocation); + if (unload && dcp->cp_wkey != NULL) + dsl_wrapping_key_free(dcp->cp_wkey); + + kmem_free(dcp, sizeof (dsl_crypto_params_t)); +} + +static int +spa_crypto_key_compare(const void *a, const void *b) +{ + const dsl_crypto_key_t *dcka = a; + const dsl_crypto_key_t *dckb = b; + + if (dcka->dck_obj < dckb->dck_obj) + return (-1); + if (dcka->dck_obj > dckb->dck_obj) + return (1); + return (0); +} + +static int +spa_key_mapping_compare(const void *a, const void *b) +{ + const dsl_key_mapping_t *kma = a; + const dsl_key_mapping_t *kmb = b; + + if (kma->km_dsobj < kmb->km_dsobj) + return (-1); + if (kma->km_dsobj > kmb->km_dsobj) + return (1); + return (0); +} + +static int +spa_wkey_compare(const void *a, const void *b) +{ + const dsl_wrapping_key_t *wka = a; + const dsl_wrapping_key_t *wkb = b; + + if (wka->wk_ddobj < wkb->wk_ddobj) + return (-1); + if (wka->wk_ddobj > wkb->wk_ddobj) + return (1); + return (0); +} + +void +spa_keystore_init(spa_keystore_t *sk) +{ + rw_init(&sk->sk_dk_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sk->sk_km_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sk->sk_wkeys_lock, NULL, RW_DEFAULT, NULL); + avl_create(&sk->sk_dsl_keys, spa_crypto_key_compare, + sizeof (dsl_crypto_key_t), + offsetof(dsl_crypto_key_t, dck_avl_link)); + avl_create(&sk->sk_key_mappings, spa_key_mapping_compare, + sizeof (dsl_key_mapping_t), + offsetof(dsl_key_mapping_t, km_avl_link)); + avl_create(&sk->sk_wkeys, spa_wkey_compare, sizeof (dsl_wrapping_key_t), + offsetof(dsl_wrapping_key_t, wk_avl_link)); +} + +void +spa_keystore_fini(spa_keystore_t *sk) +{ + dsl_wrapping_key_t *wkey; + void *cookie = NULL; + + ASSERT(avl_is_empty(&sk->sk_dsl_keys)); + ASSERT(avl_is_empty(&sk->sk_key_mappings)); + + while ((wkey = avl_destroy_nodes(&sk->sk_wkeys, &cookie)) != NULL) + dsl_wrapping_key_free(wkey); + + avl_destroy(&sk->sk_wkeys); + avl_destroy(&sk->sk_key_mappings); + avl_destroy(&sk->sk_dsl_keys); + rw_destroy(&sk->sk_wkeys_lock); + rw_destroy(&sk->sk_km_lock); + rw_destroy(&sk->sk_dk_lock); +} + +static int +dsl_dir_hold_keylocation_source_dd(dsl_dir_t *dd, void *tag, + dsl_dir_t **inherit_dd_out) +{ + int ret; + dsl_dir_t *inherit_dd = NULL; + char keylocation[MAXNAMELEN]; + char setpoint[MAXNAMELEN]; + + /* + * Lookup dd's keylocation property and find out where it was + * inherited from. dsl_prop_get_dd() might not find anything and + * return the default value. We detect this by checking if setpoint + * is an empty string and return ENOENT. + */ + ret = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + 1, sizeof (keylocation), keylocation, setpoint, B_FALSE); + if (ret != 0) { + goto error; + } else if (setpoint[0] == '\0') { + ret = ENOENT; + goto error; + } + + /* hold the dsl dir that we inherited the property from */ + ret = dsl_dir_hold(dd->dd_pool, setpoint, tag, &inherit_dd, NULL); + if (ret != 0) + goto error; + + *inherit_dd_out = inherit_dd; + return (0); + +error: + + *inherit_dd_out = NULL; + return (ret); +} + +static int +spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj, + void *tag, dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_wrapping_key_t search_wkey; + dsl_wrapping_key_t *found_wkey; + + ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_wkeys_lock)); + + /* init the search wrapping key */ + search_wkey.wk_ddobj = ddobj; + + /* lookup the wrapping key */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL); + if (!found_wkey) { + ret = SET_ERROR(ENOENT); + goto error; + } + + /* increment the refcount */ + dsl_wrapping_key_hold(found_wkey, tag); + + *wkey_out = found_wkey; + return (0); + +error: + *wkey_out = NULL; + return (ret); +} + +static int +spa_keystore_wkey_hold_ddobj(spa_t *spa, uint64_t ddobj, void *tag, + dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_pool_t *dp = spa_get_dsl(spa); + dsl_dir_t *dd = NULL, *inherit_dd = NULL; + dsl_wrapping_key_t *wkey; + boolean_t locked = B_FALSE; + + if (!RW_WRITE_HELD(&spa->spa_keystore.sk_wkeys_lock)) { + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_READER); + locked = B_TRUE; + } + + /* + * There is a special case for zfs_create_fs() where the wrapping key + * is needed before the filesystem's properties are set. This is + * problematic because dsl_dir_hold_keylocation_source_dd() uses the + * properties to determine where the wrapping key is inherited from. + * As a result, here we try to find a wrapping key for this dd before + * checking for wrapping key inheritance. + */ + ret = spa_keystore_wkey_hold_ddobj_impl(spa, ddobj, tag, &wkey); + if (ret == 0) { + if (locked) + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + *wkey_out = wkey; + return (0); + } + + /* hold the dsl dir */ + ret = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd); + if (ret != 0) + goto error; + + /* get the dd that the keylocation property was inherited from */ + ret = dsl_dir_hold_keylocation_source_dd(dd, FTAG, &inherit_dd); + if (ret != 0) + goto error; + + /* lookup the wkey in the avl tree */ + ret = spa_keystore_wkey_hold_ddobj_impl(spa, inherit_dd->dd_object, + tag, &wkey); + if (ret != 0) + goto error; + + /* unlock the wkey tree if we locked it */ + if (locked) + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + dsl_dir_rele(inherit_dd, FTAG); + dsl_dir_rele(dd, FTAG); + + *wkey_out = wkey; + return (0); + +error: + if (locked) + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + if (inherit_dd != NULL) + dsl_dir_rele(inherit_dd, FTAG); + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + + *wkey_out = NULL; + return (ret); +} + +int +dsl_crypto_can_set_keylocation(const char *dsname, zprop_source_t source, + const char *keylocation) +{ + int ret = 0; + dsl_dir_t *dd = NULL; + dsl_dir_t *inherit_dd = NULL; + dsl_pool_t *dp = NULL; + dsl_wrapping_key_t *wkey = NULL; + + /* hold the dsl dir */ + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto out; + + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto out; + + /* if dd is not encrypted, the value may only be "none" */ + if (dd->dd_crypto_obj == 0) { + if (strcmp(keylocation, "none") != 0) { + ret = SET_ERROR(EACCES); + goto out; + } + + ret = 0; + goto out; + } + + /* check for a valid keylocation for encrypted datasets */ + if (!zfs_prop_valid_keylocation(keylocation, B_TRUE)) { + ret = SET_ERROR(EINVAL); + goto out; + } + + /* If this is a received keylocation we don't need do anything else */ + if ((source & ZPROP_SRC_RECEIVED) != 0) { + ret = 0; + goto out; + } + + /* + * Now we want to check that this dataset is an encryption root since + * keylocation may only be set on encryption roots. Normally this is + * trivial, using dsl_dir_hold_keylocation_source_dd(), but this + * function also gets called during dataset creation when the + * properties have not been setup yet. Fortunately, the wrapping key + * will always be loaded at creation time, so we can check for this + * first. + */ + rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_READER); + ret = spa_keystore_wkey_hold_ddobj_impl(dp->dp_spa, dd->dd_object, + FTAG, &wkey); + rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock); + if (ret != 0) { + ret = dsl_dir_hold_keylocation_source_dd(dd, FTAG, &inherit_dd); + if (ret != 0) + goto out; + + if (inherit_dd->dd_object != dd->dd_object) { + ret = SET_ERROR(EACCES); + goto out; + } + } + + if (wkey != NULL) + dsl_wrapping_key_rele(wkey, FTAG); + if (inherit_dd != NULL) + dsl_dir_rele(inherit_dd, FTAG); + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +out: + if (wkey != NULL) + dsl_wrapping_key_rele(wkey, FTAG); + if (inherit_dd != NULL) + dsl_dir_rele(inherit_dd, FTAG); + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +static void +dsl_crypto_key_free(dsl_crypto_key_t *dck) +{ + ASSERT(refcount_count(&dck->dck_refcnt) == 0); + + /* destroy the zio_crypt_key_t */ + zio_crypt_key_destroy(&dck->dck_key); + + /* free the refcount, wrapping key, and lock */ + refcount_destroy(&dck->dck_refcnt); + if (dck->dck_wkey) + dsl_wrapping_key_rele(dck->dck_wkey, dck); + + /* free the key */ + kmem_free(dck, sizeof (dsl_crypto_key_t)); +} + +static void +dsl_crypto_key_rele(dsl_crypto_key_t *dck, void *tag) +{ + if (refcount_remove(&dck->dck_refcnt, tag) == 0) + dsl_crypto_key_free(dck); +} + +static int +dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, + uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out) +{ + int ret; + uint64_t crypt = 0; + uint8_t raw_keydata[MAX_MASTER_KEY_LEN]; + uint8_t raw_hmac_keydata[HMAC_SHA256_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + dsl_crypto_key_t *dck; + + /* allocate and initialize the key */ + dck = kmem_zalloc(sizeof (dsl_crypto_key_t), KM_SLEEP); + if (!dck) + return (SET_ERROR(ENOMEM)); + + /* fetch all of the values we need from the ZAP */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MAX_MASTER_KEY_LEN, raw_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + HMAC_SHA256_KEYLEN, raw_hmac_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac); + if (ret != 0) + goto error; + + /* + * Unwrap the keys. If there is an error return EACCES to indicate + * an authentication failure. + */ + ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, raw_keydata, + raw_hmac_keydata, iv, mac, &dck->dck_key); + if (ret != 0) { + ret = SET_ERROR(EACCES); + goto error; + } + + /* finish initializing the dsl_crypto_key_t */ + refcount_create(&dck->dck_refcnt); + dsl_wrapping_key_hold(wkey, dck); + dck->dck_wkey = wkey; + dck->dck_obj = dckobj; + (void) refcount_add(&dck->dck_refcnt, tag); + + *dck_out = dck; + return (0); + +error: + if (dck != NULL) { + bzero(dck, sizeof (dsl_crypto_key_t)); + kmem_free(dck, sizeof (dsl_crypto_key_t)); + } + + *dck_out = NULL; + return (ret); +} + +static int +spa_keystore_dsl_key_hold_impl(spa_t *spa, uint64_t dckobj, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + dsl_crypto_key_t search_dck; + dsl_crypto_key_t *found_dck; + + ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_dk_lock)); + + /* init the search key */ + search_dck.dck_obj = dckobj; + + /* find the matching key in the keystore */ + found_dck = avl_find(&spa->spa_keystore.sk_dsl_keys, &search_dck, NULL); + if (!found_dck) { + ret = SET_ERROR(ENOENT); + goto error; + } + + /* increment the refcount */ + (void) refcount_add(&found_dck->dck_refcnt, tag); + + *dck_out = found_dck; + return (0); + +error: + *dck_out = NULL; + return (ret); +} + +static int +spa_keystore_dsl_key_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + avl_index_t where; + dsl_crypto_key_t *dck = NULL; + dsl_wrapping_key_t *wkey = NULL; + uint64_t dckobj = dd->dd_crypto_obj; + + rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); + + /* lookup the key in the tree of currently loaded keys */ + ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck); + if (!ret) { + rw_exit(&spa->spa_keystore.sk_dk_lock); + *dck_out = dck; + return (0); + } + + /* lookup the wrapping key from the keystore */ + ret = spa_keystore_wkey_hold_ddobj(spa, dd->dd_object, FTAG, &wkey); + if (ret != 0) { + ret = SET_ERROR(EACCES); + goto error_unlock; + } + + /* read the key from disk */ + ret = dsl_crypto_key_open(spa->spa_meta_objset, wkey, dckobj, + tag, &dck); + if (ret != 0) + goto error_unlock; + + /* + * add the key to the keystore (this should always succeed + * since we made sure it didn't exist before) + */ + (void) avl_find(&spa->spa_keystore.sk_dsl_keys, dck, &where); + avl_insert(&spa->spa_keystore.sk_dsl_keys, dck, where); + + /* release the wrapping key (the dsl key now has a reference to it) */ + dsl_wrapping_key_rele(wkey, FTAG); + + rw_exit(&spa->spa_keystore.sk_dk_lock); + + *dck_out = dck; + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_dk_lock); + if (wkey != NULL) + dsl_wrapping_key_rele(wkey, FTAG); + + *dck_out = NULL; + return (ret); +} + +void +spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag) +{ + rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); + + if (refcount_remove(&dck->dck_refcnt, tag) == 0) { + avl_remove(&spa->spa_keystore.sk_dsl_keys, dck); + dsl_crypto_key_free(dck); + } + + rw_exit(&spa->spa_keystore.sk_dk_lock); +} + +int +spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey) +{ + int ret; + avl_index_t where; + dsl_wrapping_key_t *found_wkey; + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* insert the wrapping key into the keystore */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); + if (found_wkey != NULL) { + ret = SET_ERROR(EEXIST); + goto error_unlock; + } + avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + return (ret); +} + +int +spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, + boolean_t noop) +{ + int ret; + dsl_dir_t *dd = NULL; + dsl_crypto_key_t *dck = NULL; + dsl_wrapping_key_t *wkey = dcp->cp_wkey; + dsl_pool_t *dp = NULL; + + if (dcp == NULL || dcp->cp_wkey == NULL) + return (SET_ERROR(EINVAL)); + if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || dcp->cp_keylocation != NULL || + dcp->cp_salt != 0 || dcp->cp_iters != 0) + return (SET_ERROR(EINVAL)); + + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto error; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = (SET_ERROR(ENOTSUP)); + goto error; + } + + /* hold the dsl dir */ + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* initialize the wkey's ddobj */ + wkey->wk_ddobj = dd->dd_object; + + /* verify that the wkey is correct by opening its dsl key */ + ret = dsl_crypto_key_open(dp->dp_meta_objset, wkey, + dd->dd_crypto_obj, FTAG, &dck); + if (ret != 0) + goto error; + + /* + * At this point we have verified the key. We can simply cleanup and + * return if this is all the user wanted to do. + */ + if (noop) + goto error; + + /* insert the wrapping key into the keystore */ + ret = spa_keystore_load_wkey_impl(dp->dp_spa, wkey); + if (ret != 0) + goto error; + + dsl_crypto_key_rele(dck, FTAG); + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +error: + if (dck != NULL) + dsl_crypto_key_rele(dck, FTAG); + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +int +spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj) +{ + int ret; + dsl_wrapping_key_t search_wkey; + dsl_wrapping_key_t *found_wkey; + + /* init the search wrapping key */ + search_wkey.wk_ddobj = ddobj; + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* remove the wrapping key from the keystore */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, + &search_wkey, NULL); + if (!found_wkey) { + ret = SET_ERROR(ENOENT); + goto error_unlock; + } else if (refcount_count(&found_wkey->wk_refcnt) != 0) { + ret = SET_ERROR(EBUSY); + goto error_unlock; + } + avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + /* free the wrapping key */ + dsl_wrapping_key_free(found_wkey); + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + return (ret); +} + +int +spa_keystore_unload_wkey(const char *dsname) +{ + int ret = 0; + dsl_dir_t *dd = NULL; + dsl_pool_t *dp = NULL; + + /* hold the dsl dir */ + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto error; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = (SET_ERROR(ENOTSUP)); + goto error; + } + + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* unload the wkey */ + ret = spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); + if (ret != 0) + goto error; + + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +error: + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +int +spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, + dsl_dir_t *dd, void *tag) +{ + int ret; + avl_index_t where; + dsl_key_mapping_t *km = NULL, *found_km; + boolean_t should_free = B_FALSE; + + /* allocate the mapping */ + km = kmem_alloc(sizeof (dsl_key_mapping_t), KM_SLEEP); + if (!km) + return (SET_ERROR(ENOMEM)); + + /* initialize the mapping */ + refcount_create(&km->km_refcnt); + + ret = spa_keystore_dsl_key_hold_dd(spa, dd, km, &km->km_key); + if (ret != 0) + goto error; + + km->km_dsobj = dsobj; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); + + /* + * If a mapping already exists, simply increment its refcount and + * cleanup the one we made. We want to allocate / free outside of + * the lock because this lock is also used by the zio layer to lookup + * key mappings. Otherwise, use the one we created. Normally, there will + * only be one active reference at a time (the objset owner), but there + * are times when there could be multiple async users. + */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, km, &where); + if (found_km != NULL) { + should_free = B_TRUE; + (void) refcount_add(&found_km->km_refcnt, tag); + } else { + (void) refcount_add(&km->km_refcnt, tag); + avl_insert(&spa->spa_keystore.sk_key_mappings, km, where); + } + + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (should_free) { + spa_keystore_dsl_key_rele(spa, km->km_key, km); + refcount_destroy(&km->km_refcnt); + kmem_free(km, sizeof (dsl_key_mapping_t)); + } + + return (0); + +error: + if (km->km_key) + spa_keystore_dsl_key_rele(spa, km->km_key, km); + + refcount_destroy(&km->km_refcnt); + kmem_free(km, sizeof (dsl_key_mapping_t)); + + return (ret); +} + +int +spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag) +{ + return (spa_keystore_create_mapping_impl(spa, ds->ds_object, + ds->ds_dir, tag)); +} + +int +spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag) +{ + int ret; + dsl_key_mapping_t search_km; + dsl_key_mapping_t *found_km; + boolean_t should_free = B_FALSE; + + /* init the search key mapping */ + search_km.km_dsobj = dsobj; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); + + /* find the matching mapping */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, + &search_km, NULL); + if (found_km == NULL) { + ret = SET_ERROR(ENOENT); + goto error_unlock; + } + + /* + * Decrement the refcount on the mapping and remove it from the tree if + * it is zero. Try to minimize time spent in this lock by deferring + * cleanup work. + */ + if (refcount_remove(&found_km->km_refcnt, tag) == 0) { + should_free = B_TRUE; + avl_remove(&spa->spa_keystore.sk_key_mappings, found_km); + } + + rw_exit(&spa->spa_keystore.sk_km_lock); + + /* destroy the key mapping */ + if (should_free) { + spa_keystore_dsl_key_rele(spa, found_km->km_key, found_km); + kmem_free(found_km, sizeof (dsl_key_mapping_t)); + } + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_km_lock); + return (ret); +} + +/* + * This function is primarily used by the zio and arc layer to lookup + * DSL Crypto Keys for encryption. Callers must release the key with + * spa_keystore_dsl_key_rele(). The function may also be called with + * dck_out == NULL and tag == NULL to simply check that a key exists + * without getting a reference to it. + */ +int +spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + dsl_key_mapping_t search_km; + dsl_key_mapping_t *found_km; + + ASSERT((tag != NULL && dck_out != NULL) || + (tag == NULL && dck_out == NULL)); + + /* init the search key mapping */ + search_km.km_dsobj = dsobj; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); + + /* remove the mapping from the tree */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km, + NULL); + if (found_km == NULL) { + ret = SET_ERROR(ENOENT); + goto error_unlock; + } + + if (found_km && tag) + (void) refcount_add(&found_km->km_key->dck_refcnt, tag); + + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (dck_out != NULL) + *dck_out = found_km->km_key; + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (dck_out != NULL) + *dck_out = NULL; + return (ret); +} + +static int +dmu_objset_check_wkey_loaded(dsl_dir_t *dd) +{ + int ret; + dsl_wrapping_key_t *wkey = NULL; + + ret = spa_keystore_wkey_hold_ddobj(dd->dd_pool->dp_spa, + dd->dd_object, FTAG, &wkey); + if (ret != 0) + return (SET_ERROR(EACCES)); + + dsl_wrapping_key_rele(wkey, FTAG); + + return (0); +} + +static void +dsl_crypto_key_sync_impl(objset_t *mos, uint64_t dckobj, uint64_t crypt, + uint8_t *iv, uint8_t *mac, uint8_t *keydata, uint8_t *hmac_keydata, + dmu_tx_t *tx) +{ + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MAX_MASTER_KEY_LEN, keydata, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + HMAC_SHA256_KEYLEN, hmac_keydata, tx)); +} + +static void +dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx) +{ + uint64_t dckobj = dck->dck_obj; + zio_crypt_key_t *key = &dck->dck_key; + objset_t *mos = tx->tx_pool->dp_meta_objset; + uint8_t keydata[MAX_MASTER_KEY_LEN]; + uint8_t hmac_keydata[HMAC_SHA256_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT3U(key->zk_crypt, <, ZIO_CRYPT_FUNCTIONS); + + /* encrypt and store the keys along with the IV and MAC */ + VERIFY0(zio_crypt_key_wrap(&dck->dck_wkey->wk_key, key, iv, mac, + keydata, hmac_keydata)); + + /* update the ZAP with the obtained values */ + dsl_crypto_key_sync_impl(tx->tx_pool->dp_meta_objset, dck->dck_obj, + key->zk_crypt, iv, mac, keydata, hmac_keydata, tx); +} + +typedef struct spa_keystore_rewrap_args { + const char *skra_dsname; + dsl_crypto_params_t *skra_cp; +} spa_keystore_rewrap_args_t; + +static int +spa_keystore_rewrap_check(void *arg, dmu_tx_t *tx) +{ + int ret; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + dsl_dir_t *dd = NULL, *inherit_dd = NULL; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_keystore_rewrap_args_t *skra = arg; + dsl_crypto_params_t *dcp = skra->skra_cp; + + /* check for the encryption feature */ + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = SET_ERROR(ENOTSUP); + goto error; + } + + /* hold the dd */ + ret = dsl_dir_hold(dp, skra->skra_dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* verify that the dataset is encrypted */ + if (dd->dd_crypto_obj == 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* hold the dd where this dd is inheritting its key from */ + ret = dsl_dir_hold_keylocation_source_dd(dd, FTAG, &inherit_dd); + if (ret != 0) + goto error; + + /* + * A NULL dcp implies that the user wants this dataset to inherit + * the parent's wrapping key. + */ + if (dcp == NULL) { + /* check that this is an encryption root */ + if (dd->dd_object != inherit_dd->dd_object) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that the parent is encrypted */ + if (dd->dd_parent->dd_crypto_obj == 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = dmu_objset_check_wkey_loaded(dd); + if (ret != 0) + goto error; + + ret = dmu_objset_check_wkey_loaded(dd->dd_parent); + if (ret != 0) + goto error; + + dsl_dir_rele(dd, FTAG); + dsl_dir_rele(inherit_dd, FTAG); + + return (0); + } + + /* + * If this dataset is not currently an encryption root we need a fully + * specified key for this dataset to become a new encryption root. + */ + if (dd->dd_object != inherit_dd->dd_object && + (dcp->cp_keyformat == ZFS_KEYFORMAT_NONE || + dcp->cp_keylocation == NULL)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* figure out what the new format will be */ + if (dcp->cp_keyformat == ZFS_KEYFORMAT_NONE) { + ret = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + 8, 1, &keyformat, NULL, B_FALSE); + if (ret != 0) + goto error; + } else { + keyformat = dcp->cp_keyformat; + } + + /* crypt cannot be changed after creation */ + if (dcp->cp_crypt != ZIO_CRYPT_INHERIT) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* we are not inheritting our parent's wkey so we need one ourselves */ + if (dcp->cp_wkey == NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that the keylocation is valid or NULL */ + if (dcp->cp_keylocation != NULL && + !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* passphrases require pbkdf2 salt and iters */ + if (keyformat == ZFS_KEYFORMAT_PASSPHRASE && + (skra->skra_cp->cp_salt == 0 || + skra->skra_cp->cp_iters < MIN_PBKDF2_ITERATIONS)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* make sure the dd's wkey is loaded */ + ret = dmu_objset_check_wkey_loaded(dd); + if (ret != 0) + goto error; + + dsl_dir_rele(dd, FTAG); + dsl_dir_rele(inherit_dd, FTAG); + + return (0); + +error: + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (inherit_dd != NULL) + dsl_dir_rele(inherit_dd, FTAG); + + return (ret); +} + + +static void +spa_keystore_rewrap_sync_impl(uint64_t root_ddobj, uint64_t ddobj, + dsl_wrapping_key_t *wkey, dmu_tx_t *tx) +{ + zap_cursor_t zc; + zap_attribute_t za; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *dd = NULL, *inherit_dd = NULL; + dsl_crypto_key_t *dck = NULL; + + ASSERT(RW_WRITE_HELD(&dp->dp_spa->spa_keystore.sk_wkeys_lock)); + + /* hold the dd */ + VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); + + /* ignore hidden dsl dirs */ + if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') { + dsl_dir_rele(dd, FTAG); + return; + } + + /* hold the dd we inherited the keylocation from */ + VERIFY0(dsl_dir_hold_keylocation_source_dd(dd, FTAG, &inherit_dd)); + + /* stop recursing if this dsl dir didn't inherit from the root */ + if (inherit_dd->dd_object != root_ddobj) { + dsl_dir_rele(inherit_dd, FTAG); + dsl_dir_rele(dd, FTAG); + return; + } + + /* get the dsl_crypt_key_t for the current dsl dir */ + VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, FTAG, &dck)); + + /* replace the wrapping key */ + dsl_wrapping_key_hold(wkey, dck); + dsl_wrapping_key_rele(dck->dck_wkey, dck); + dck->dck_wkey = wkey; + + /* sync the dsl key wrapped with the new wrapping key */ + dsl_crypto_key_sync(dck, tx); + + /* recurse into all children dsl dirs */ + for (zap_cursor_init(&zc, dp->dp_meta_objset, + dsl_dir_phys(dd)->dd_child_dir_zapobj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + spa_keystore_rewrap_sync_impl(root_ddobj, za.za_first_integer, + wkey, tx); + } + zap_cursor_fini(&zc); + + spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); + dsl_dir_rele(inherit_dd, FTAG); + dsl_dir_rele(dd, FTAG); +} + +static void +spa_keystore_rewrap_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_t *ds; + avl_index_t where; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_t *spa = dp->dp_spa; + spa_keystore_rewrap_args_t *skra = arg; + dsl_wrapping_key_t *wkey, *found_wkey; + dsl_wrapping_key_t wkey_search; + uint64_t keyformat; + const char *keylocation; + + /* create and initialize the wrapping key */ + VERIFY0(dsl_dataset_hold(dp, skra->skra_dsname, FTAG, &ds)); + ASSERT(!ds->ds_is_snapshot); + + if (skra->skra_cp != NULL) { + /* + * We are changing to a new wkey. Set additional properties + * which can be sent along with this ioctl. Note that this + * command can set keylocation even if it can't normally be + * set via 'zfs set' due to a non-local keylocation. + */ + keylocation = skra->skra_cp->cp_keylocation; + wkey = skra->skra_cp->cp_wkey; + wkey->wk_ddobj = ds->ds_dir->dd_object; + + if (keylocation != NULL) { + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, + keylocation, tx); + } + + if (skra->skra_cp->cp_keyformat != ZFS_KEYFORMAT_NONE) { + keyformat = skra->skra_cp->cp_keyformat; + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + ZPROP_SRC_LOCAL, 8, 1, &keyformat, tx); + } + + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), ZPROP_SRC_LOCAL, + 8, 1, &skra->skra_cp->cp_iters, tx); + + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), ZPROP_SRC_LOCAL, + 8, 1, &skra->skra_cp->cp_salt, tx); + } else { + /* + * We are inheritting the parent's wkey. Unset encryption all + * parameters and grab a reference to the wkey. + */ + VERIFY0(spa_keystore_wkey_hold_ddobj(spa, + ds->ds_dir->dd_parent->dd_object, FTAG, &wkey)); + + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE, + 0, 0, NULL, tx); + + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), ZPROP_SRC_NONE, + 0, 0, NULL, tx); + + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), ZPROP_SRC_NONE, + 0, 0, NULL, tx); + + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), ZPROP_SRC_NONE, + 0, 0, NULL, tx); + } + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* recurse through all children and rewrap their keys */ + spa_keystore_rewrap_sync_impl(wkey->wk_ddobj, ds->ds_dir->dd_object, + wkey, tx); + + /* + * All references to the old wkey should be released now (if it + * existed). Replace the wrapping key. + */ + wkey_search.wk_ddobj = ds->ds_dir->dd_object; + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &wkey_search, NULL); + if (found_wkey != NULL) { + ASSERT0(refcount_count(&found_wkey->wk_refcnt)); + avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); + dsl_wrapping_key_free(found_wkey); + } + + if (skra->skra_cp != NULL) { + (void) avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); + avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); + } else { + dsl_wrapping_key_rele(wkey, FTAG); + } + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + dsl_dataset_rele(ds, FTAG); +} + +int +spa_keystore_rewrap(const char *dsname, dsl_crypto_params_t *dcp) +{ + spa_keystore_rewrap_args_t skra; + + /* initialize the args struct */ + skra.skra_dsname = dsname; + skra.skra_cp = dcp; + + /* perform the actual work in syncing context */ + return (dsl_sync_task(dsname, spa_keystore_rewrap_check, + spa_keystore_rewrap_sync, &skra, 0, ZFS_SPACE_CHECK_NORMAL)); +} + +int +dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent) +{ + int ret; + dsl_dir_t *inherit_dd = NULL; + dsl_dir_t *pinherit_dd = NULL; + + if (dd->dd_crypto_obj == 0) { + /* children of encrypted parents must be encrypted */ + if (newparent->dd_crypto_obj != 0) { + ret = SET_ERROR(EACCES); + goto error; + } + + return (0); + } + + ret = dsl_dir_hold_keylocation_source_dd(dd, FTAG, &inherit_dd); + if (ret != 0) + goto error; + + /* + * if this is not an encryption root, we must make sure we are not + * moving dd to a new encryption root + */ + if (dd->dd_object != inherit_dd->dd_object) { + ret = dsl_dir_hold_keylocation_source_dd(newparent, FTAG, + &pinherit_dd); + if (ret != 0) + goto error; + + if (pinherit_dd->dd_object != inherit_dd->dd_object) { + ret = SET_ERROR(EACCES); + goto error; + } + } + + if (inherit_dd != NULL) + dsl_dir_rele(inherit_dd, FTAG); + if (pinherit_dd != NULL) + dsl_dir_rele(pinherit_dd, FTAG); + return (0); + +error: + if (inherit_dd != NULL) + dsl_dir_rele(inherit_dd, FTAG); + if (pinherit_dd != NULL) + dsl_dir_rele(pinherit_dd, FTAG); + return (ret); +} + +/* + * This is the combined check function for verifying encrypted create and + * clone parameters. There are a lot of edge cases to handle here so it has + * been commented rather extensively. Some checks are duplicated in an effort + * to ensure the error codes returned are consistent (EINVAL before EACCES). + */ +int +dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd, + dsl_crypto_params_t *dcp) +{ + int ret; + uint64_t pcrypt, effective_crypt; + + /* get the parent's crypt */ + ret = dsl_dir_get_crypt(parentdd, &pcrypt); + if (ret != 0) + return (ret); + + /* + * Figure out what the crypt will be for the new dataset. + * Clones must always use the same crypt as their origin. + */ + if (origindd != NULL) { + ret = dsl_dir_get_crypt(origindd, &effective_crypt); + if (ret != 0) + return (ret); + } else if (dcp == NULL || dcp->cp_crypt == ZIO_CRYPT_INHERIT) { + effective_crypt = pcrypt; + } else { + effective_crypt = dcp->cp_crypt; + } + + ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); + ASSERT3U(effective_crypt, !=, ZIO_CRYPT_INHERIT); + + /* + * can't create an unencrypted child of an encrypted parent + * under any circumstances + */ + if (effective_crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + /* NULL dcp implies inheritence. Make sure the needed keys exist. */ + if (dcp == NULL) { + /* no encryption */ + if (effective_crypt == ZIO_CRYPT_OFF) + return (0); + + /* check for parent key */ + ret = dmu_objset_check_wkey_loaded(parentdd); + if (ret != 0) + return (ret); + + /* check for origin key if this is a clone */ + if (origindd != NULL) { + ret = dmu_objset_check_wkey_loaded(origindd); + if (ret != 0) + return (ret); + } + + return (0); + } + + /* flags are only used for raw receives, which are not checked here */ + ASSERT0(dcp->cp_flags); + + /* check for valid dcp with no encryption (inherited or local) */ + if (effective_crypt == ZIO_CRYPT_OFF) { + /* Must not specify encryption params */ + if (dcp->cp_salt != 0 || dcp->cp_iters != 0 || + dcp->cp_keyformat != ZFS_KEYFORMAT_NONE || + dcp->cp_wkey != NULL || + (dcp->cp_keylocation != NULL && + strcmp(dcp->cp_keylocation, "none") != 0)) + return (SET_ERROR(EINVAL)); + + return (0); + } + + /* We will now definitely be encrypting. Check the feature flag */ + if (!spa_feature_is_enabled(parentdd->dd_pool->dp_spa, + SPA_FEATURE_ENCRYPTION)) { + return (SET_ERROR(EOPNOTSUPP)); + } + + /* handle non-implicit inheritence */ + if (dcp->cp_wkey == NULL) { + /* key must be fully unspecified */ + if (dcp->cp_keyformat != ZFS_KEYFORMAT_NONE || + dcp->cp_keylocation != NULL || dcp->cp_salt != 0 || + dcp->cp_iters != 0) + return (SET_ERROR(EINVAL)); + + /* parent must have a key to inherit */ + if (pcrypt == ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + /* check for parent key */ + ret = dmu_objset_check_wkey_loaded(parentdd); + if (ret != 0) + return (ret); + + /* check for origin key if this is a clone */ + if (origindd != NULL) { + ret = dmu_objset_check_wkey_loaded(origindd); + if (ret != 0) + return (ret); + } + + return (0); + } + + /* At this point we should have a fully specified key. Check location */ + if (dcp->cp_keylocation == NULL || + !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) + return (SET_ERROR(EINVAL)); + + /* Must have fully specified keyformat */ + switch (dcp->cp_keyformat) { + case ZFS_KEYFORMAT_HEX: + case ZFS_KEYFORMAT_RAW: + /* requires no pbkdf2 iters and salt */ + if (dcp->cp_salt != 0 || dcp->cp_iters != 0) + return (SET_ERROR(EINVAL)); + break; + case ZFS_KEYFORMAT_PASSPHRASE: + /* requires pbkdf2 iters and salt */ + if (dcp->cp_salt == 0 || + dcp->cp_iters < MIN_PBKDF2_ITERATIONS) + return (SET_ERROR(EINVAL)); + break; + case ZFS_KEYFORMAT_NONE: + default: + /* keyformat must be specified and valid */ + return (SET_ERROR(EINVAL)); + } + + /* check for origin key if this is a clone */ + if (origindd != NULL) { + ret = dmu_objset_check_wkey_loaded(origindd); + if (ret != 0) + return (ret); + } + + return (0); +} + +void +dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, + dsl_dataset_t *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dd->dd_pool; + uint64_t crypt = (dcp != NULL) ? dcp->cp_crypt : ZIO_CRYPT_INHERIT; + dsl_wrapping_key_t *wkey = (dcp != NULL) ? dcp->cp_wkey : NULL; + + if (dcp != NULL) { + /* raw receives will handle their own key creation */ + if (dcp->cp_flags & DCP_FLAG_RAW_RECV) { + ASSERT3U(dcp->cp_crypt, ==, ZIO_CRYPT_INHERIT); + ASSERT3U(dcp->cp_keyformat, ==, ZFS_KEYFORMAT_NONE); + ASSERT3P(dcp->cp_keylocation, ==, NULL); + ASSERT3P(dcp->cp_wkey, ==, NULL); + ASSERT0(dcp->cp_salt); + ASSERT0(dcp->cp_iters); + return; + } + + crypt = dcp->cp_crypt; + wkey = dcp->cp_wkey; + } else { + crypt = ZIO_CRYPT_INHERIT; + wkey = NULL; + } + + /* figure out the effective crypt */ + if (!dsl_dir_is_clone(dd)) { + if (crypt == ZIO_CRYPT_INHERIT && dd->dd_parent != NULL) { + VERIFY0(dsl_dir_get_crypt(dd->dd_parent, &crypt)); + } + } else if (origin->ds_dir->dd_crypto_obj != 0) { + VERIFY0(dsl_dir_get_crypt(origin->ds_dir, &crypt)); + } + + /* if we aren't doing encryption just return */ + if (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT) + return; + + /* zapify the dd so that we can add the crypto key obj to it */ + dmu_buf_will_dirty(dd->dd_dbuf, tx); + dsl_dir_zapify(dd, tx); + + /* use the new key if given or inherit from the parent */ + if (wkey == NULL) { + VERIFY0(spa_keystore_wkey_hold_ddobj(dp->dp_spa, + dd->dd_parent->dd_object, FTAG, &wkey)); + } else { + wkey->wk_ddobj = dd->dd_object; + } + + /* + * Create or clone the DSL crypto key. If we are creating activate + * the feature on the dataset (cloning will do this automatically). + */ + if (!dsl_dir_is_clone(dd)) { + dd->dd_crypto_obj = dsl_crypto_key_create_sync(crypt, wkey, tx); + dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx); + } else if (origin->ds_dir->dd_crypto_obj != 0) { + dd->dd_crypto_obj = dsl_crypto_key_clone_sync(origin->ds_dir, + wkey, tx); + } + + /* add the crypto key obj to the dd on disk */ + VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, + DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, + tx)); + + /* + * If we inherited the wrapping key we release our reference now. + * Otherwise, this is a new key and we need to load it into the + * keystore. + */ + if (dcp == NULL || dcp->cp_wkey == NULL) { + dsl_wrapping_key_rele(wkey, FTAG); + } else { + VERIFY0(spa_keystore_load_wkey_impl(dp->dp_spa, wkey)); + } +} + +typedef struct dsl_crypto_recv_key_arg { + uint64_t dcrka_dsobj; + nvlist_t *dcrka_nvl; +} dsl_crypto_recv_key_arg_t; + +int +dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) +{ + int ret; + dsl_crypto_recv_key_arg_t *dcrka = arg; + nvlist_t *nvl = dcrka->dcrka_nvl; + dsl_dataset_t *ds = NULL; + uint8_t *buf = NULL; + uint_t len; + uint64_t intval; + boolean_t is_passphrase = B_FALSE; + + /* + * Check that the ds exists. Assert that it isn't already encrypted + * and that it is inconsistent for sanity. + */ + ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds); + if (ret != 0) + goto error; + + ASSERT0(ds->ds_dir->dd_crypto_obj); + ASSERT(dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT); + + /* + * Read and check all the encryption values from the nvlist. We need + * all of the fields of a DSL Crypto Key, as well as a fully specified + * wrapping key. + */ + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, &intval); + if (ret != 0 || intval >= ZIO_CRYPT_FUNCTIONS || + intval <= ZIO_CRYPT_OFF) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + &buf, &len); + if (ret != 0 || len != MAX_MASTER_KEY_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + &buf, &len); + if (ret != 0 || len != HMAC_SHA256_KEYLEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &buf, &len); + if (ret != 0 || len != WRAPPING_IV_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &buf, &len); + if (ret != 0 || len != WRAPPING_MAC_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + &intval); + if (ret != 0 || intval >= ZFS_KEYFORMAT_FORMATS || + intval <= ZFS_KEYFORMAT_NONE) { + ret = SET_ERROR(EINVAL); + goto error; + } + + is_passphrase = (intval == ZFS_KEYFORMAT_PASSPHRASE); + + /* + * for raw receives we allow any number of pbkdf2iters since there + * won't be a chance for the user to change it. + */ + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), + &intval); + if (ret != 0 || (is_passphrase == (intval == 0))) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), + &intval); + if (ret != 0 || (is_passphrase == (intval == 0))) { + ret = SET_ERROR(EINVAL); + goto error; + } + + dsl_dataset_rele(ds, FTAG); + return (0); + +error: + if (ds != NULL) + dsl_dataset_rele(ds, FTAG); + return (ret); +} + +static void +dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) +{ + dsl_crypto_recv_key_arg_t *dcrka = arg; + uint64_t dsobj = dcrka->dcrka_dsobj; + nvlist_t *nvl = dcrka->dcrka_nvl; + dsl_pool_t *dp = tx->tx_pool; + objset_t *mos = dp->dp_meta_objset; + dsl_dataset_t *ds; + uint8_t *keydata, *hmac_keydata, *iv, *mac; + uint_t len; + uint64_t crypt, keyformat, iters, salt; + + VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + + /* lookup the values we need to create the DSL Crypto Key */ + crypt = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE); + keyformat = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); + iters = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); + salt = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + &keydata, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + &hmac_keydata, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &iv, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &mac, &len)); + + /* zapify the dsl dir so we can add the key object to it */ + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + dsl_dir_zapify(ds->ds_dir, tx); + + /* create the DSL Crypto Key on disk and activate the feature */ + ds->ds_dir->dd_crypto_obj = zap_create(mos, + DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); + dsl_crypto_key_sync_impl(mos, ds->ds_dir->dd_crypto_obj, crypt, iv, + mac, keydata, hmac_keydata, tx); + dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx); + ds->ds_feature_inuse[SPA_FEATURE_ENCRYPTION] = B_TRUE; + + /* save the dd_crypto_obj on disk */ + VERIFY0(zap_add(mos, ds->ds_dir->dd_object, DD_FIELD_CRYPTO_KEY_OBJ, + sizeof (uint64_t), 1, &ds->ds_dir->dd_crypto_obj, tx)); + + /* set the encryption properties from the nvlist */ + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + ZPROP_SRC_LOCAL, 8, 1, &keyformat, tx); + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), + ZPROP_SRC_LOCAL, 8, 1, &iters, tx); + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), + ZPROP_SRC_LOCAL, 8, 1, &salt, tx); + + dsl_dataset_rele(ds, FTAG); +} + +/* + * This function is used to sync an nvlist representing a DSL Crypto Key and + * the associated encryption parameters. The key will be written exactly as is + * without wrapping it. + */ +int +dsl_crypto_recv_key(const char *poolname, uint64_t dsobj, nvlist_t *nvl) +{ + dsl_crypto_recv_key_arg_t dcrka; + + dcrka.dcrka_dsobj = dsobj; + dcrka.dcrka_nvl = nvl; + + return (dsl_sync_task(poolname, dsl_crypto_recv_key_check, + dsl_crypto_recv_key_sync, &dcrka, 5, ZFS_SPACE_CHECK_NORMAL)); +} + +int +dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) +{ + int ret; + nvlist_t *nvl = NULL; + uint64_t dckobj = ds->ds_dir->dd_crypto_obj; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t crypt = 0, format = 0, iters = 0, salt = 0; + uint8_t raw_keydata[MAX_MASTER_KEY_LEN]; + uint8_t raw_hmac_keydata[HMAC_SHA256_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + + ASSERT(dckobj != 0); + + ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); + if (ret != 0) + goto error; + + /* lookup values from the DSL Crypto Key */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MAX_MASTER_KEY_LEN, raw_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + HMAC_SHA256_KEYLEN, raw_hmac_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac); + if (ret != 0) + goto error; + + /* lookup values from the properties */ + dsl_pool_config_enter(ds->ds_dir->dd_pool, FTAG); + + ret = dsl_prop_get_dd(ds->ds_dir, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + 8, 1, &format, NULL, B_FALSE); + if (ret != 0) + goto error_unlock; + + if (format == ZFS_KEYFORMAT_PASSPHRASE) { + ret = dsl_prop_get_dd(ds->ds_dir, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters, + NULL, B_FALSE); + if (ret != 0) + goto error_unlock; + + ret = dsl_prop_get_dd(ds->ds_dir, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt, + NULL, B_FALSE); + if (ret != 0) + goto error_unlock; + } + + dsl_pool_config_exit(ds->ds_dir->dd_pool, FTAG); + + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + raw_keydata, MAX_MASTER_KEY_LEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + raw_hmac_keydata, HMAC_SHA256_KEYLEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_IV, iv, + WRAPPING_IV_LEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, mac, + WRAPPING_MAC_LEN)); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), format); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt); + + *nvl_out = nvl; + return (0); + +error_unlock: + dsl_pool_config_exit(ds->ds_dir->dd_pool, FTAG); +error: + nvlist_free(nvl); + + *nvl_out = NULL; + return (ret); +} + +uint64_t +dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, + dmu_tx_t *tx) +{ + dsl_crypto_key_t dck; + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(crypt, >, ZIO_CRYPT_OFF); + + /* create the DSL Crypto Key ZAP object */ + dck.dck_obj = zap_create(tx->tx_pool->dp_meta_objset, + DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); + + /* fill in the key (on the stack) and sync it to disk */ + dck.dck_wkey = wkey; + VERIFY0(zio_crypt_key_init(crypt, &dck.dck_key)); + + dsl_crypto_key_sync(&dck, tx); + + zio_crypt_key_destroy(&dck.dck_key); + bzero(&dck.dck_key, sizeof (zio_crypt_key_t)); + + return (dck.dck_obj); +} + +uint64_t +dsl_crypto_key_clone_sync(dsl_dir_t *orig_dd, dsl_wrapping_key_t *wkey, + dmu_tx_t *tx) +{ + dsl_pool_t *dp = tx->tx_pool; + dsl_crypto_key_t *orig_dck; + dsl_crypto_key_t dck; + + ASSERT(dmu_tx_is_syncing(tx)); + + /* get the key from the original dataset */ + VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, orig_dd, FTAG, + &orig_dck)); + + /* create the DSL Crypto Key ZAP object */ + dck.dck_obj = zap_create(dp->dp_meta_objset, DMU_OTN_ZAP_METADATA, + DMU_OT_NONE, 0, tx); + + /* assign the wrapping key temporarily */ + dck.dck_wkey = wkey; + + /* + * Fill in the temporary key with the original key's data. We only need + * to actually copy the fields that will be synced to disk, namely the + * master key, hmac key and crypt. + */ + bcopy(&orig_dck->dck_key.zk_master_keydata, + &dck.dck_key.zk_master_keydata, MAX_MASTER_KEY_LEN); + bcopy(&orig_dck->dck_key.zk_hmac_keydata, + &dck.dck_key.zk_hmac_keydata, HMAC_SHA256_KEYLEN); + + dck.dck_key.zk_crypt = orig_dck->dck_key.zk_crypt; + + /* sync the new key, wrapped with the new wrapping key */ + dsl_crypto_key_sync(&dck, tx); + bzero(&dck.dck_key, sizeof (zio_crypt_key_t)); + + spa_keystore_dsl_key_rele(dp->dp_spa, orig_dck, FTAG); + + return (dck.dck_obj); +} + +void +dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx) +{ + /* destroy the DSL Crypto Key object */ + VERIFY0(zap_destroy(tx->tx_pool->dp_meta_objset, dckobj, tx)); +} + +zfs_keystatus_t +dsl_dataset_get_keystatus(dsl_dataset_t *ds) +{ + int ret; + dsl_wrapping_key_t *wkey; + + /* check if this dataset has a owns a dsl key */ + if (ds->ds_dir->dd_crypto_obj == 0) + return (ZFS_KEYSTATUS_NONE); + + /* lookup the wkey. if it doesn't exist the key is unavailable */ + ret = spa_keystore_wkey_hold_ddobj(ds->ds_dir->dd_pool->dp_spa, + ds->ds_dir->dd_object, FTAG, &wkey); + if (ret != 0) + return (ZFS_KEYSTATUS_UNAVAILABLE); + + dsl_wrapping_key_rele(wkey, FTAG); + + return (ZFS_KEYSTATUS_AVAILABLE); +} + +int +dsl_dir_get_crypt(dsl_dir_t *dd, uint64_t *crypt) +{ + if (dd->dd_crypto_obj == 0) { + *crypt = ZIO_CRYPT_OFF; + return (0); + } + + return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, crypt)); +} + +int +spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) + goto error; + + ret = zio_crypt_key_get_salt(&dck->dck_key, salt); + if (ret != 0) + goto error; + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + return (ret); +} + +/* + * This function serve as a multiplexer for encryption and decryption of + * all blocks (except the L2ARC). For encryption, it will populate the IV, + * salt, MAC, and cabd (the ciphertext). On decryption it will simply use + * these fields to populate pabd (the plaintext). + */ +/* ARGSUSED */ +int +spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, zbookmark_phys_t *zb, + const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd, + abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt) +{ + int ret; + dmu_object_type_t ot = BP_GET_TYPE(bp); + dsl_crypto_key_t *dck = NULL; + uint8_t *plainbuf = NULL, *cipherbuf = NULL; + + ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); + ASSERT(!BP_IS_EMBEDDED(bp)); + ASSERT(BP_IS_ENCRYPTED(bp)); + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, zb->zb_objset, FTAG, &dck); + if (ret != 0) + return (ret); + + if (encrypt) { + plainbuf = abd_borrow_buf_copy(pabd, datalen); + cipherbuf = abd_borrow_buf(cabd, datalen); + } else { + plainbuf = abd_borrow_buf(pabd, datalen); + cipherbuf = abd_borrow_buf_copy(cabd, datalen); + } + + /* + * Both encryption and decryption functions need a salt for key + * generation and an IV. When encrypting a non-dedup block, we + * generate the salt and IV randomly to be stored by the caller. Dedup + * blocks perform a (more expensive) HMAC of the plaintext to obtain + * the salt and the IV. ZIL blocks have their salt and IV generated + * at allocation time in zio_alloc_zil(). On decryption, we simply use + * the provided values. + */ + if (encrypt && ot != DMU_OT_INTENT_LOG && !BP_GET_DEDUP(bp)) { + ret = zio_crypt_key_get_salt(&dck->dck_key, salt); + if (ret != 0) + goto error; + + ret = zio_crypt_generate_iv(iv); + if (ret != 0) + goto error; + } else if (encrypt && BP_GET_DEDUP(bp)) { + ret = zio_crypt_generate_iv_salt_dedup(&dck->dck_key, + plainbuf, datalen, iv, salt); + if (ret != 0) + goto error; + } + + /* call lower level function to perform encryption / decryption */ + ret = zio_do_crypt_data(encrypt, &dck->dck_key, salt, ot, iv, mac, + datalen, plainbuf, cipherbuf, no_crypt); + if (ret != 0) + goto error; + + if (encrypt) { + abd_return_buf(pabd, plainbuf, datalen); + abd_return_buf_copy(cabd, cipherbuf, datalen); + } else { + abd_return_buf_copy(pabd, plainbuf, datalen); + abd_return_buf(cabd, cipherbuf, datalen); + } + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + return (0); + +error: + if (encrypt) { + /* zero out any state we might have changed while encrypting */ + bzero(salt, ZIO_DATA_SALT_LEN); + bzero(iv, ZIO_DATA_IV_LEN); + bzero(mac, ZIO_DATA_MAC_LEN); + abd_return_buf(pabd, plainbuf, datalen); + abd_return_buf_copy(cabd, cipherbuf, datalen); + } else { + abd_return_buf_copy(pabd, plainbuf, datalen); + abd_return_buf(cabd, cipherbuf, datalen); + } + + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + return (ret); +} diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index 90f4956e8105..6220b5f4fca6 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -376,8 +376,8 @@ dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) } int -dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, - dsl_dataset_t **dsp) +dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { objset_t *mos = dp->dp_meta_objset; dmu_buf_t *dbuf; @@ -535,11 +535,27 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); *dsp = ds; + + if ((flags & DS_HOLD_FLAG_DECRYPT) && ds->ds_dir->dd_crypto_obj != 0) { + err = spa_keystore_create_mapping(dp->dp_spa, ds, ds); + if (err != 0) { + dsl_dataset_rele(ds, tag); + return (SET_ERROR(EACCES)); + } + } + return (0); } int -dsl_dataset_hold(dsl_pool_t *dp, const char *name, +dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, + dsl_dataset_t **dsp) +{ + return (dsl_dataset_hold_obj_flags(dp, dsobj, 0, tag, dsp)); +} + +int +dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { dsl_dir_t *dd; @@ -555,7 +571,7 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, ASSERT(dsl_pool_config_held(dp)); obj = dsl_dir_phys(dd)->dd_head_dataset_obj; if (obj != 0) - err = dsl_dataset_hold_obj(dp, obj, tag, &ds); + err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds); else err = SET_ERROR(ENOENT); @@ -564,16 +580,18 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, dsl_dataset_t *snap_ds; if (*snapname++ != '@') { - dsl_dataset_rele(ds, tag); + dsl_dataset_rele_flags(ds, flags, tag); dsl_dir_rele(dd, FTAG); return (SET_ERROR(ENOENT)); } dprintf("looking for snapshot '%s'\n", snapname); err = dsl_dataset_snap_lookup(ds, snapname, &obj); - if (err == 0) - err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds); - dsl_dataset_rele(ds, tag); + if (err == 0) { + err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, + &snap_ds); + } + dsl_dataset_rele_flags(ds, flags, tag); if (err == 0) { mutex_enter(&snap_ds->ds_lock); @@ -591,14 +609,21 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, } int -dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, +dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag, + dsl_dataset_t **dsp) +{ + return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp)); +} + +int +dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); + int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp); if (err != 0) return (err); if (!dsl_dataset_tryown(*dsp, tag)) { - dsl_dataset_rele(*dsp, tag); + dsl_dataset_rele_flags(*dsp, flags, tag); *dsp = NULL; return (SET_ERROR(EBUSY)); } @@ -606,14 +631,14 @@ dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, } int -dsl_dataset_own(dsl_pool_t *dp, const char *name, +dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold(dp, name, tag, dsp); + int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp); if (err != 0) return (err); if (!dsl_dataset_tryown(*dsp, tag)) { - dsl_dataset_rele(*dsp, tag); + dsl_dataset_rele_flags(*dsp, flags, tag); return (SET_ERROR(EBUSY)); } return (0); @@ -689,13 +714,24 @@ dsl_dataset_namelen(dsl_dataset_t *ds) } void -dsl_dataset_rele(dsl_dataset_t *ds, void *tag) +dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) { + if (ds->ds_dir != NULL && ds->ds_dir->dd_crypto_obj != 0 && + (flags & DS_HOLD_FLAG_DECRYPT)) { + (void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa, + ds->ds_object, ds); + } dmu_buf_rele(ds->ds_dbuf, tag); } void -dsl_dataset_disown(dsl_dataset_t *ds, void *tag) +dsl_dataset_rele(dsl_dataset_t *ds, void *tag) +{ + dsl_dataset_rele_flags(ds, 0, tag); +} + +void +dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) { ASSERT3P(ds->ds_owner, ==, tag); ASSERT(ds->ds_dbuf != NULL); @@ -704,7 +740,7 @@ dsl_dataset_disown(dsl_dataset_t *ds, void *tag) ds->ds_owner = NULL; mutex_exit(&ds->ds_lock); dsl_dataset_long_rele(ds, tag); - dsl_dataset_rele(ds, tag); + dsl_dataset_rele_flags(ds, flags, tag); } boolean_t @@ -733,7 +769,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds) return (rv); } -static void +void dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) { spa_t *spa = dmu_tx_pool(tx)->dp_spa; @@ -763,7 +799,7 @@ dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, - uint64_t flags, dmu_tx_t *tx) + dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx) { dsl_pool_t *dp = dd->dd_pool; dmu_buf_t *dbuf; @@ -862,6 +898,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, } } + /* handle encryption */ + dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx); + if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; @@ -897,7 +936,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) uint64_t dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, - dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) + dsl_dataset_t *origin, uint64_t flags, cred_t *cr, + dsl_crypto_params_t *dcp, dmu_tx_t *tx) { dsl_pool_t *dp = pdd->dd_pool; uint64_t dsobj, ddobj; @@ -909,7 +949,7 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); - dsobj = dsl_dataset_create_sync_dd(dd, origin, + dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp, flags & ~DS_CREATE_FLAG_NODIRTY, tx); dsl_deleg_set_create_perms(dd, tx, cr); @@ -1788,6 +1828,10 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv) DS_FIELD_RESUME_COMPRESSOK) == 0) { fnvlist_add_boolean(token_nv, "compressok"); } + if (zap_contains(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_RAWOK) == 0) { + fnvlist_add_boolean(token_nv, "rawok"); + } packed = fnvlist_pack(token_nv, &packed_size); fnvlist_free(token_nv); compressed = kmem_alloc(packed_size, KM_SLEEP); @@ -1819,8 +1863,9 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv) void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { + int err; dsl_pool_t *dp = ds->ds_dir->dd_pool; - uint64_t refd, avail, uobjs, aobjs, ratio; + uint64_t refd, avail, uobjs, aobjs, ratio, crypt; ASSERT(dsl_pool_config_held(dp)); @@ -1869,13 +1914,19 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) ds->ds_userrefs); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYSTATUS, + dsl_dataset_get_keystatus(ds)); + + err = dsl_dir_get_crypt(ds->ds_dir, &crypt); + if (err == 0) + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_ENCRYPTION, crypt); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { uint64_t written, comp, uncomp; dsl_pool_t *dp = ds->ds_dir->dd_pool; dsl_dataset_t *prev; - int err = dsl_dataset_hold_obj(dp, + err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); if (err == 0) { err = dsl_dataset_space_written(prev, ds, &written, @@ -2288,7 +2339,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) fnvlist_add_string(ddra->ddra_result, "target", namebuf); cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", - ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); + ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); diff --git a/usr/src/uts/common/fs/zfs/dsl_destroy.c b/usr/src/uts/common/fs/zfs/dsl_destroy.c index 09fa5406ec60..ff06fed1d613 100644 --- a/usr/src/uts/common/fs/zfs/dsl_destroy.c +++ b/usr/src/uts/common/fs/zfs/dsl_destroy.c @@ -592,8 +592,8 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) ka.ds = ds; ka.tx = tx; VERIFY0(traverse_dataset(ds, - dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST, - kill_blkptr, &ka)); + dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST | + TRAVERSE_NO_DECRYPT, kill_blkptr, &ka)); ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || dsl_dataset_phys(ds)->ds_unique_bytes == 0); } @@ -700,6 +700,11 @@ dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) for (t = 0; t < DD_USED_NUM; t++) ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]); + if (dd->dd_crypto_obj != 0) { + dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx); + (void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); + } + VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx)); @@ -943,7 +948,8 @@ dsl_destroy_head(const char *name) * remove the objects from open context so that the txg sync * is not too long. */ - error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); + error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE, + FTAG, &os); if (error == 0) { uint64_t prev_snap_txg = dsl_dataset_phys(dmu_objset_ds(os))-> @@ -954,7 +960,7 @@ dsl_destroy_head(const char *name) (void) dmu_free_long_object(os, obj); /* sync out all frees */ txg_wait_synced(dmu_objset_pool(os), 0); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); } } diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c index b4e86ea7929d..ee1bfab2a9e2 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dir.c +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c @@ -158,6 +158,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, { dmu_buf_t *dbuf; dsl_dir_t *dd; + dmu_object_info_t doi; int err; ASSERT(dsl_pool_config_held(dp)); @@ -166,14 +167,11 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, if (err != 0) return (err); dd = dmu_buf_get_user(dbuf); -#ifdef ZFS_DEBUG - { - dmu_object_info_t doi; - dmu_object_info_from_db(dbuf, &doi); - ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); - ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); - } -#endif + + dmu_object_info_from_db(dbuf, &doi); + ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); + ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); + if (dd == NULL) { dsl_dir_t *winner; @@ -181,6 +179,15 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, dd->dd_object = ddobj; dd->dd_dbuf = dbuf; dd->dd_pool = dp; + + if (dsl_dir_is_zapified(dd) && + zap_contains(dp->dp_meta_objset, ddobj, + DD_FIELD_CRYPTO_KEY_OBJ) == 0) { + VERIFY0(zap_lookup(dp->dp_meta_objset, + ddobj, DD_FIELD_CRYPTO_KEY_OBJ, + sizeof (uint64_t), 1, &dd->dd_crypto_obj)); + } + mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); dsl_prop_init(dd); @@ -911,6 +918,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; + dmu_buf_rele(dbuf, FTAG); return (ddobj); @@ -1785,6 +1793,14 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx) } } + /* check for encryption errors */ + error = dsl_dir_rename_crypt_check(dd, newparent); + if (error != 0) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (SET_ERROR(EACCES)); + } + /* no rename into our descendant */ if (closest_common_ancestor(dd, newparent) == dd) { dsl_dir_rele(newparent, FTAG); diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c index 58bfb614328e..762b7ed28548 100644 --- a/usr/src/uts/common/fs/zfs/dsl_pool.c +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c @@ -345,7 +345,8 @@ dsl_pool_close(dsl_pool_t *dp) } dsl_pool_t * -dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) +dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp, + uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); @@ -359,6 +360,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); + spa->spa_meta_objset = dp->dp_meta_objset; /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, @@ -396,8 +398,19 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); + /* + * some features can get enabled when creating the root dataset, so we + * create the feature objects here. + */ + if (spa_version(spa) >= SPA_VERSION_FEATURES) + spa_feature_create_zap_objects(spa, tx); + + if (dcp != NULL && dcp->cp_crypt != ZIO_CRYPT_OFF && + dcp->cp_crypt != ZIO_CRYPT_INHERIT) + spa_feature_enable(spa, SPA_FEATURE_ENCRYPTION, tx); + /* create the root dataset */ - obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); + obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); @@ -848,7 +861,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) /* create the origin dir, ds, & snap-ds */ dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, - NULL, 0, kcred, tx); + NULL, 0, kcred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c index 1963f15385a3..1150af96f758 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scan.c +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c @@ -550,7 +550,7 @@ dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh) zilog = zil_alloc(dp->dp_meta_objset, zh); (void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa, - claim_txg); + claim_txg, B_FALSE); zil_free(zilog); } @@ -562,6 +562,7 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp, { zbookmark_phys_t czb; arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; if (zfs_no_scrub_prefetch) return; @@ -570,11 +571,16 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp, (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)) return; + if (BP_IS_ENCRYPTED(bp)) { + ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE); + ASSERT3U(BP_GET_LEVEL(bp), ==, 0); + zio_flags |= ZIO_FLAG_RAW; + } + SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid); (void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp, - NULL, NULL, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb); + NULL, NULL, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, &czb); } static boolean_t @@ -660,6 +666,11 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; arc_buf_t *buf; + if (BP_IS_ENCRYPTED(bp)) { + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + zio_flags |= ZIO_FLAG_RAW; + } + err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err) { @@ -780,7 +791,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, return; /* - * If dsl_scan_ddt() has aready visited this block, it will have + * If dsl_scan_ddt() has already visited this block, it will have * already done any translations or scrubbing, so don't call the * callback again. */ diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index be5b66fd3b9a..ea86e772022e 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -1100,6 +1100,8 @@ spa_activate(spa_t *spa, int mode) avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); + + spa_keystore_init(&spa->spa_keystore); } /* @@ -1139,10 +1141,11 @@ spa_deactivate(spa_t *spa) * still have errors left in the queues. Empty them just in case. */ spa_errlog_drain(spa); - avl_destroy(&spa->spa_errlist_scrub); avl_destroy(&spa->spa_errlist_last); + spa_keystore_fini(&spa->spa_keystore); + spa->spa_state = POOL_STATE_UNINITIALIZED; mutex_enter(&spa->spa_proc_lock); @@ -1970,8 +1973,8 @@ spa_load_verify(spa_t *spa) if (spa_load_verify_metadata) { error = traverse_pool(spa, spa->spa_verify_min_txg, - TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, - spa_load_verify_cb, rio); + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio); } (void) zio_wait(rio); @@ -3587,12 +3590,41 @@ spa_l2cache_drop(spa_t *spa) } } +/* + * Verify encryption parameters for spa creation. If we have specified a crypt, + * we must have a fully specified key, with the encryption feature enabled. + * Otherwise, we should not have any specified encryption parameters. + */ +static int +spa_create_check_encryption_params(dsl_crypto_params_t *dcp, + boolean_t has_encryption) +{ + if (dcp->cp_crypt != ZIO_CRYPT_OFF && + dcp->cp_crypt != ZIO_CRYPT_INHERIT) { + if (!has_encryption || dcp->cp_wkey == NULL || + dcp->cp_keylocation == NULL || + dcp->cp_keyformat == ZFS_KEYFORMAT_NONE) + return (SET_ERROR(EINVAL)); + + if (dcp->cp_keyformat == ZFS_KEYFORMAT_PASSPHRASE && + (dcp->cp_salt == 0 || dcp->cp_iters == 0)) + return (SET_ERROR(EINVAL)); + } else { + if (dcp->cp_wkey != NULL || dcp->cp_keylocation != NULL || + dcp->cp_keyformat != ZFS_KEYFORMAT_NONE || + dcp->cp_salt != 0 || dcp->cp_iters != 0) + return (SET_ERROR(EINVAL)); + } + + return (0); +} + /* * Pool Creation */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - nvlist_t *zplprops) + nvlist_t *zplprops, dsl_crypto_params_t *dcp) { spa_t *spa; char *altroot = NULL; @@ -3603,8 +3635,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, uint64_t txg = TXG_INITIAL; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; - uint64_t version, obj; + uint64_t version, obj, root_dsobj = 0; boolean_t has_features; + boolean_t has_encryption; + spa_feature_t feat; + char *feat_name; /* * If this pool already exists, return failure. @@ -3631,10 +3666,27 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, } has_features = B_FALSE; + has_encryption = B_FALSE; for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); elem != NULL; elem = nvlist_next_nvpair(props, elem)) { - if (zpool_prop_feature(nvpair_name(elem))) + if (zpool_prop_feature(nvpair_name(elem))) { has_features = B_TRUE; + feat_name = strchr(nvpair_name(elem), '@') + 1; + VERIFY0(zfeature_lookup_name(feat_name, &feat)); + if (feat == SPA_FEATURE_ENCRYPTION) + has_encryption = B_TRUE; + } + } + + /* verify encryption params, if they were provided */ + if (dcp != NULL) { + error = spa_create_check_encryption_params(dcp, has_encryption); + if (error != 0) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); + } } if (has_features || nvlist_lookup_uint64(props, @@ -3724,8 +3776,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, } spa->spa_is_initializing = B_TRUE; - spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); - spa->spa_meta_objset = dp->dp_meta_objset; + spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg); spa->spa_is_initializing = B_FALSE; /* @@ -3750,9 +3801,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, cmn_err(CE_PANIC, "failed to add pool config"); } - if (spa_version(spa) >= SPA_VERSION_FEATURES) - spa_feature_create_zap_objects(spa, tx); - if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, sizeof (uint64_t), 1, &version, tx) != 0) { @@ -3812,14 +3860,25 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, dmu_tx_commit(tx); - spa->spa_sync_on = B_TRUE; - txg_sync_start(spa->spa_dsl_pool); - /* - * We explicitly wait for the first transaction to complete so that our - * bean counters are appropriately updated. + * If the root dataset is encrypted we will need to create key mappings + * for the zio layer before we start to write any data to disk and hold + * them until after the first txg has been synced. Waiting for the first + * transaction to complete also ensures that our bean counters are + * appropriately updated. */ - txg_wait_synced(spa->spa_dsl_pool, txg); + if (dp->dp_root_dir->dd_crypto_obj != 0) { + root_dsobj = dsl_dir_phys(dp->dp_root_dir)->dd_head_dataset_obj; + VERIFY0(spa_keystore_create_mapping_impl(spa, root_dsobj, + dp->dp_root_dir, FTAG)); + } + + spa->spa_sync_on = B_TRUE; + txg_sync_start(dp); + txg_wait_synced(dp, txg); + + if (dp->dp_root_dir->dd_crypto_obj != 0) + VERIFY0(spa_keystore_remove_mapping(spa, root_dsobj, FTAG)); spa_config_sync(spa, B_FALSE, B_TRUE); spa_event_notify(spa, NULL, ESC_ZFS_POOL_CREATE); diff --git a/usr/src/uts/common/fs/zfs/spa_history.c b/usr/src/uts/common/fs/zfs/spa_history.c index 2b15e79a9853..2b272a84d6db 100644 --- a/usr/src/uts/common/fs/zfs/spa_history.c +++ b/usr/src/uts/common/fs/zfs/spa_history.c @@ -302,11 +302,16 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { int err = 0; dmu_tx_t *tx; - nvlist_t *nvarg; + nvlist_t *nvarg, *in_nvl = NULL; if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) return (SET_ERROR(EINVAL)); + err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl); + if (err == 0) { + (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS); + } + tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { diff --git a/usr/src/uts/common/fs/zfs/sys/arc.h b/usr/src/uts/common/fs/zfs/sys/arc.h index 10c920ff9d97..788db9bfc85c 100644 --- a/usr/src/uts/common/fs/zfs/sys/arc.h +++ b/usr/src/uts/common/fs/zfs/sys/arc.h @@ -58,11 +58,22 @@ _NOTE(CONSTCOND) } while (0) typedef struct arc_buf_hdr arc_buf_hdr_t; typedef struct arc_buf arc_buf_t; -typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private); + +/* + * Because the ARC can store encrypted data, errors (not due to bugs) may arise + * while transforming data into its desired format - specifically, when + * decrypting, the key may not be present, or the HMAC may not be correct, + * which signifies deliberate tampering with the on-disk state + * (assuming that the checksum was correct). The "error" parameter will be + * nonzero in this case, even if there is no associated zio. + */ +typedef void arc_read_done_func_t(zio_t *zio, int error, arc_buf_t *buf, + void *private); +typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private); /* generic arc_done_func_t's which you can use */ -arc_done_func_t arc_bcopy_func; -arc_done_func_t arc_getbuf_func; +arc_read_done_func_t arc_bcopy_func; +arc_read_done_func_t arc_getbuf_func; typedef enum arc_flags { @@ -90,20 +101,22 @@ typedef enum arc_flags ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */ ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */ ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */ + /* encrypted on disk (may or may not be encrypted in memory) */ + ARC_FLAG_ENCRYPTED = 1 << 14, /* indicates that the buffer contains metadata (otherwise, data) */ - ARC_FLAG_BUFC_METADATA = 1 << 14, + ARC_FLAG_BUFC_METADATA = 1 << 15, /* Flags specifying whether optional hdr struct fields are defined */ - ARC_FLAG_HAS_L1HDR = 1 << 15, - ARC_FLAG_HAS_L2HDR = 1 << 16, + ARC_FLAG_HAS_L1HDR = 1 << 16, + ARC_FLAG_HAS_L2HDR = 1 << 17, /* * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data. * This allows the l2arc to use the blkptr's checksum to verify * the data without having to store the checksum in the hdr. */ - ARC_FLAG_COMPRESSED_ARC = 1 << 17, - ARC_FLAG_SHARED_DATA = 1 << 18, + ARC_FLAG_COMPRESSED_ARC = 1 << 18, + ARC_FLAG_SHARED_DATA = 1 << 19, /* * The arc buffer's compression mode is stored in the top 7 bits of the @@ -122,7 +135,12 @@ typedef enum arc_flags typedef enum arc_buf_flags { ARC_BUF_FLAG_SHARED = 1 << 0, - ARC_BUF_FLAG_COMPRESSED = 1 << 1 + ARC_BUF_FLAG_COMPRESSED = 1 << 1, + /* + * indicates whether this arc_buf_t is encrypted, regardless of + * state on-disk + */ + ARC_BUF_FLAG_ENCRYPTED = 1 << 2 } arc_buf_flags_t; struct arc_buf { @@ -155,15 +173,30 @@ typedef enum arc_space_type { void arc_space_consume(uint64_t space, arc_space_type_t type); void arc_space_return(uint64_t space, arc_space_type_t type); boolean_t arc_is_metadata(arc_buf_t *buf); +boolean_t arc_is_encrypted(arc_buf_t *buf); enum zio_compress arc_get_compression(arc_buf_t *buf); -int arc_decompress(arc_buf_t *buf); +void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, + uint8_t *iv, uint8_t *mac); +int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, + boolean_t in_place); +void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, + dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac); arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size); arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, enum zio_compress compression_type); +arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, + boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type); arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size); arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, enum zio_compress compression_type); +arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type); void arc_return_buf(arc_buf_t *buf, void *tag); void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); void arc_buf_destroy(arc_buf_t *buf, void *tag); @@ -178,12 +211,12 @@ int arc_referenced(arc_buf_t *buf); #endif int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, - arc_done_func_t *done, void *private, zio_priority_t priority, int flags, - arc_flags_t *arc_flags, const zbookmark_phys_t *zb); + arc_read_done_func_t *done, void *private, zio_priority_t priority, + int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb); zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, - arc_done_func_t *ready, arc_done_func_t *child_ready, - arc_done_func_t *physdone, arc_done_func_t *done, + arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, + arc_write_done_func_t *physdone, arc_write_done_func_t *done, void *private, zio_priority_t priority, int zio_flags, const zbookmark_phys_t *zb); void arc_freed(spa_t *spa, const blkptr_t *bp); diff --git a/usr/src/uts/common/fs/zfs/sys/dbuf.h b/usr/src/uts/common/fs/zfs/sys/dbuf.h index 6862599a6540..b69042deee53 100644 --- a/usr/src/uts/common/fs/zfs/sys/dbuf.h +++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h @@ -54,6 +54,7 @@ extern "C" { #define DB_RF_NOPREFETCH (1 << 3) #define DB_RF_NEVERWAIT (1 << 4) #define DB_RF_CACHED (1 << 5) +#define DB_RF_NO_DECRYPT (1 << 6) /* * The simplified state transition diagram for dbufs looks like: diff --git a/usr/src/uts/common/fs/zfs/sys/ddt.h b/usr/src/uts/common/fs/zfs/sys/ddt.h index 15d2a9a7ad71..244254e42e9a 100644 --- a/usr/src/uts/common/fs/zfs/sys/ddt.h +++ b/usr/src/uts/common/fs/zfs/sys/ddt.h @@ -69,7 +69,7 @@ typedef struct ddt_key { /* * Encoded with logical & physical size, and compression, as follows: * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | 0 | 0 | 0 | comp | PSIZE | LSIZE | + * | 0 | 0 | 0 |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ */ uint64_t ddk_prop; @@ -85,11 +85,17 @@ typedef struct ddt_key { #define DDK_SET_PSIZE(ddk, x) \ BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) -#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8) -#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x) +#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7) +#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x) + +#define DDK_GET_ENCRYPTED(ddk) BF64_GET((ddk)->ddk_prop, 39, 1) +#define DDK_SET_ENCRYPTED(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x) #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) +#define DDE_GET_NDVAS(dde) (DDK_GET_ENCRYPTED(&dde->dde_key) \ + ? SPA_DVAS_PER_BP : SPA_DVAS_PER_BP - 1) + typedef struct ddt_phys { dva_t ddp_dva[SPA_DVAS_PER_BP]; uint64_t ddp_refcnt; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index 7eb6549cce18..3f8f059a909b 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -75,6 +75,7 @@ struct nvlist; struct arc_buf; struct zio_prop; struct sa_handle; +struct dsl_crypto_params; typedef struct objset objset_t; typedef struct dmu_tx dmu_tx_t; @@ -104,16 +105,18 @@ typedef enum dmu_object_byteswap { #define DMU_OT_NEWTYPE 0x80 #define DMU_OT_METADATA 0x40 -#define DMU_OT_BYTESWAP_MASK 0x3f +#define DMU_OT_ENCRYPTED 0x20 +#define DMU_OT_BYTESWAP_MASK 0x1f /* * Defines a uint8_t object type. Object types specify if the data * in the object is metadata (boolean) and how to byteswap the data * (dmu_object_byteswap_t). */ -#define DMU_OT(byteswap, metadata) \ +#define DMU_OT(byteswap, metadata, encrypted) \ (DMU_OT_NEWTYPE | \ ((metadata) ? DMU_OT_METADATA : 0) | \ + ((encrypted) ? DMU_OT_ENCRYPTED : 0) | \ ((byteswap) & DMU_OT_BYTESWAP_MASK)) #define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ @@ -124,6 +127,10 @@ typedef enum dmu_object_byteswap { ((ot) & DMU_OT_METADATA) : \ dmu_ot[(ot)].ot_metadata) +#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_ENCRYPTED) : \ + dmu_ot[(int)(ot)].ot_encrypt) + /* * These object types use bp_fill != 1 for their L0 bp's. Therefore they can't * have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill @@ -219,16 +226,27 @@ typedef enum dmu_object_type { /* * Names for valid types declared with DMU_OT(). */ - DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), - DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), - DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), - DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), - DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), - DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), - DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), - DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), - DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), - DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE), + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE), + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE), + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE), + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE), + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE), + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE), + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE), + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE), + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE), + + DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE), + DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE), + DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE), + DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE), + DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE), + DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE), + DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE), + DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE), + DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE), + DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE), } dmu_object_type_t; typedef enum txg_how { @@ -265,20 +283,26 @@ void zfs_znode_byteswap(void *buf, size_t size); */ #define DMU_BONUS_BLKID (-1ULL) #define DMU_SPILL_BLKID (-2ULL) + /* * Public routines to create, destroy, open, and close objsets. */ +typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg, + cred_t *cr, dmu_tx_t *tx); + int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp); + boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp); void dmu_objset_rele(objset_t *os, void *tag); -void dmu_objset_disown(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag); int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); void dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); -int dmu_objset_clone(const char *name, const char *origin); + struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func, + void *arg); +int dmu_objset_clone(const char *name, const char *origin, + struct dsl_crypto_params *dcp); int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, struct nvlist *errlist); int dmu_objset_snapshot_one(const char *fsname, const char *snapname); @@ -420,7 +444,12 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, #define WP_SPILL 0x4 void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, - enum zio_compress compress_override, struct zio_prop *zp); + struct zio_prop *zp); +void dmu_write_policy_override_compress(struct zio_prop *zp, + enum zio_compress compress); +void dmu_write_policy_override_encrypt(struct zio_prop *zp, boolean_t byteorder, + enum zio_compress compress, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac); /* * The bonus data is accessed more or less like a regular buffer. * You must dmu_bonus_hold() to get the buffer, which will give you a @@ -433,6 +462,8 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, * * Returns ENOENT, EIO, or 0. */ +int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, + uint32_t flags, dmu_buf_t **dbp); int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **); int dmu_bonus_max(void); int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *); @@ -725,6 +756,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object); */ #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ +#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */ int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf, uint32_t flags); int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf, @@ -747,6 +779,12 @@ struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); void dmu_return_arcbuf(struct arc_buf *buf); void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, dmu_tx_t *tx); +void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf, + dmu_tx_t *tx); +void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac); +void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, + dmu_buf_t *handle, dmu_tx_t *tx); int dmu_xuio_init(struct xuio *uio, int niov); void dmu_xuio_fini(struct xuio *uio); int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off, @@ -788,6 +826,7 @@ typedef void arc_byteswap_func_t(void *buf, size_t size); typedef struct dmu_object_type_info { dmu_object_byteswap_t ot_byteswap; boolean_t ot_metadata; + boolean_t ot_encrypt; char *ot_name; } dmu_object_type_info_t; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h index b20a9f9557cb..33a2531d5dda 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h @@ -75,6 +75,8 @@ struct objset { spa_t *os_spa; arc_buf_t *os_phys_buf; objset_phys_t *os_phys; + boolean_t os_encrypted; + /* * The following "special" dnodes have no parent, are exempt * from dnode_move(), and are not recorded in os_dnodes, but they @@ -150,12 +152,14 @@ struct objset { /* called from zpl */ int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp); + boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp); int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj, - dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp); -void dmu_objset_refresh_ownership(objset_t *os, void *tag); + dmu_objset_type_t type, boolean_t readonly, boolean_t key_required, + void *tag, objset_t **osp); +void dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed, + void *tag); void dmu_objset_rele(objset_t *os, void *tag); -void dmu_objset_disown(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); void dmu_objset_stats(objset_t *os, nvlist_t *nv); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_send.h b/usr/src/uts/common/fs/zfs/sys/dmu_send.h index 38b1b042e54e..781d1e62e5dc 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_send.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_send.h @@ -42,16 +42,15 @@ struct dmu_replay_record; extern const char *recv_clone_name; int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, - boolean_t large_block_ok, boolean_t compressok, int outfd, - uint64_t resumeobj, uint64_t resumeoff, - struct vnode *vp, offset_t *off); + boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd, + uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off); int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, boolean_t stream_compressed, uint64_t *sizep); int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg, boolean_t stream_compressed, uint64_t *sizep); int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, struct vnode *vp, offset_t *off); + boolean_t rawok, int outfd, struct vnode *vp, offset_t *off); typedef struct dmu_recv_cookie { struct dsl_dataset *drc_ds; @@ -63,6 +62,7 @@ typedef struct dmu_recv_cookie { boolean_t drc_byteswap; boolean_t drc_force; boolean_t drc_resumable; + boolean_t drc_raw; struct avl_tree *drc_guid_to_ds_map; zio_cksum_t drc_cksum; uint64_t drc_newsnapobj; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h index c010edd440d9..8ceef5cf13e1 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h @@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, #define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA) #define TRAVERSE_HARD (1<<4) +/* + * Encrypted dnode blocks have encrypted bonus buffers while the rest + * of the dnode is left unencrypted. Callers can specify the + * TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that + * they wish to receive the raw encrypted dnodes instead of attempting + * to read the logical data. + */ +#define TRAVERSE_NO_DECRYPT (1<<5) + /* Special traverse error return value to indicate skipping of children */ #define TRAVERSE_VISIT_NO_CHILDREN -1 diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h index 92f50a01774c..b55c0ad8d88a 100644 --- a/usr/src/uts/common/fs/zfs/sys/dnode.h +++ b/usr/src/uts/common/fs/zfs/sys/dnode.h @@ -74,9 +74,7 @@ extern "C" { /* * dnode id flags * - * Note: a file will never ever have its - * ids moved from bonus->spill - * and only in a crypto environment would it be on spill + * Note: a file will never ever have its ids moved from bonus->spill */ #define DN_ID_CHKED_BONUS 0x1 #define DN_ID_CHKED_SPILL 0x2 @@ -87,6 +85,9 @@ extern "C" { * Derived constants. */ #define DNODE_SIZE (1 << DNODE_SHIFT) +#define DN_BONUS_SIZE(dnsize) ((dnsize) - DNODE_CORE_SIZE - \ + (1 << SPA_BLKPTRSHIFT)) +#define DN_SLOTS_TO_BONUSLEN(slots) DN_BONUS_SIZE((slots) << DNODE_SHIFT) #define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT) #define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT)) #define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT) @@ -109,6 +110,10 @@ extern "C" { #define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) +#define DN_MAX_BONUS_LEN(dnp) \ + ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \ + (uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \ + (uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp)) #define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \ (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT) @@ -143,7 +148,15 @@ typedef struct dnode_phys { uint8_t dn_flags; /* DNODE_FLAG_* */ uint16_t dn_datablkszsec; /* data block size in 512b sectors */ uint16_t dn_bonuslen; /* length of dn_bonus */ - uint8_t dn_pad2[4]; + /* + * dn_extra_slots is a placeholder for a feature in other ZFS + * implementations. In this implementation, its value is always + * 0. We declare it here to ensure it isn't used for a different + * purpose, and to improve code portability with implementations + * which support extra dnode slots. + */ + uint8_t dn_extra_slots; /* # of subsequent slots consumed */ + uint8_t dn_pad2[3]; /* accounting is protected by dn_dirty_mtx */ uint64_t dn_maxblkid; /* largest allocated block ID */ @@ -156,6 +169,9 @@ typedef struct dnode_phys { blkptr_t dn_spill; } dnode_phys_t; +#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \ + (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)) + struct dnode { /* * Protects the structure of the dnode, including the number of levels diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h new file mode 100644 index 000000000000..243acb6a243f --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h @@ -0,0 +1,191 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_DSL_CRYPT_H +#define _SYS_DSL_CRYPT_H + +#include +#include +#include +#include +#include + +/* ZAP entry keys for DSL Encryption Keys stored on disk */ +#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE" +#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV" +#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC" +#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1" +#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1" + +/* in memory representation of a wrapping key */ +typedef struct dsl_wrapping_key { + /* link on spa_keystore_t:sk_wkeys */ + avl_node_t wk_avl_link; + + /* actual wrapping key */ + crypto_key_t wk_key; + + /* refcount of number of dsl_crypto_key_t's holding this struct */ + refcount_t wk_refcnt; + + /* dsl directory object that owns this wrapping key */ + uint64_t wk_ddobj; +} dsl_wrapping_key_t; + +typedef enum dcp_flags { + DCP_FLAG_RAW_RECV = (1 << 0) /* dcp represents raw recv */ +} dcp_flags_t; + +/* + * This struct is a simple wrapper around all the parameters that are usually + * required to setup encryption. It exists so that all of the params can be + * passed around the kernel together for convenience. + */ +typedef struct dsl_crypto_params { + /* the encryption algorithm */ + enum zio_encrypt cp_crypt; + + /* flags for extra info */ + dcp_flags_t cp_flags; + + /* keyformat property enum */ + zfs_keyformat_t cp_keyformat; + + /* the pbkdf2 iterations, if the keyformat is of type passphrase */ + uint64_t cp_salt; + + /* the pbkdf2 iterations, if the keysource is of type passphrase */ + uint64_t cp_iters; + + /* keylocation property string */ + char *cp_keylocation; + + /* the wrapping key */ + dsl_wrapping_key_t *cp_wkey; +} dsl_crypto_params_t; + +/* in-memory representation of an encryption key for a dataset */ +typedef struct dsl_crypto_key { + /* link on spa_keystore_t:sk_dsl_keys */ + avl_node_t dck_avl_link; + + /* refcount of dsl_key_mapping_t's holding this key */ + refcount_t dck_refcnt; + + /* master key used to derive encryption keys */ + zio_crypt_key_t dck_key; + + /* wrapping key for syncing this structure to disk */ + dsl_wrapping_key_t *dck_wkey; + + /* on-disk object id */ + uint64_t dck_obj; +} dsl_crypto_key_t; + +/* + * In memory mapping of a dataset to a DSL Crypto Key. This is used + * to look up the corresponding dsl_crypto_key_t from the zio layer + * for performing data encryption and decryption. + */ +typedef struct dsl_key_mapping { + /* link on spa_keystore_t:sk_key_mappings */ + avl_node_t km_avl_link; + + /* refcount of how many users are depending on this mapping */ + refcount_t km_refcnt; + + /* dataset this crypto key belongs to (index) */ + uint64_t km_dsobj; + + /* crypto key (value) of this record */ + dsl_crypto_key_t *km_key; +} dsl_key_mapping_t; + +/* in memory structure for holding all wrapping and dsl keys */ +typedef struct spa_keystore { + /* lock for protecting sk_dsl_keys */ + krwlock_t sk_dk_lock; + + /* tree of all dsl_crypto_key_t's */ + avl_tree_t sk_dsl_keys; + + /* lock for protecting sk_key_mappings */ + krwlock_t sk_km_lock; + + /* tree of all dsl_key_mapping_t's, indexed by dsobj */ + avl_tree_t sk_key_mappings; + + /* lock for protecting the wrapping keys tree */ + krwlock_t sk_wkeys_lock; + + /* tree of all dsl_wrapping_key_t's, indexed by ddobj */ + avl_tree_t sk_wkeys; +} spa_keystore_t; + +void dsl_wrapping_key_hold(dsl_wrapping_key_t *wkey, void *tag); +void dsl_wrapping_key_rele(dsl_wrapping_key_t *wkey, void *tag); +void dsl_wrapping_key_free(dsl_wrapping_key_t *wkey); +int dsl_wrapping_key_create(uint8_t *wkeydata, dsl_wrapping_key_t **wkey_out); + +int dsl_crypto_params_create_nvlist(nvlist_t *props, nvlist_t *crypto_args, + dsl_crypto_params_t **dcp_out); +void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload); +int dsl_crypto_can_set_keylocation(const char *dsname, zprop_source_t source, + const char *keylocation); + +void spa_keystore_init(spa_keystore_t *sk); +void spa_keystore_fini(spa_keystore_t *sk); +zfs_keystatus_t dsl_dataset_get_keystatus(struct dsl_dataset *ds); +int dsl_dir_get_crypt(struct dsl_dir *dd, uint64_t *crypt); + +void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag); +int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey); +int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, + boolean_t noop); +int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj); +int spa_keystore_unload_wkey(const char *dsname); + +int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd, + void *tag); +int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag); +int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag); +int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag, + dsl_crypto_key_t **dck_out); + +int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, nvlist_t **nvl_out); +int dsl_crypto_recv_key(const char *poolname, uint64_t dsobj, nvlist_t *nvl); + +int spa_keystore_rewrap(const char *dsname, dsl_crypto_params_t *dcp); +int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent); +int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd, + dsl_crypto_params_t *dcp); +void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, + struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); +uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, + dmu_tx_t *tx); +uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, + dsl_wrapping_key_t *wkey, dmu_tx_t *tx); +void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx); + +int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt); +int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, zbookmark_phys_t *zb, + const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd, + abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt); + +#endif /* _SYS_DSL_CRYPT_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index fc5117bc9be8..b739dbb70f3d 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #ifdef __cplusplus @@ -49,6 +50,7 @@ extern "C" { struct dsl_dataset; struct dsl_dir; struct dsl_pool; +struct dsl_crypto_params; #define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_IS_INCONSISTENT(ds) \ @@ -99,6 +101,7 @@ struct dsl_pool; #define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok" #define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok" #define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok" +#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok" /* * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose @@ -239,26 +242,38 @@ dsl_dataset_phys(dsl_dataset_t *ds) #define DS_UNIQUE_IS_ACCURATE(ds) \ ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) +/* flags for holding the dataset */ +typedef enum ds_hold_flags { + DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access encrypted data */ +} ds_hold_flags_t; + int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, dsl_dataset_t **dsp); +int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds, void *tag); int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, dsl_dataset_t **); +int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **); void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); +void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, + void *tag); int dsl_dataset_own(struct dsl_pool *dp, const char *name, - void *tag, dsl_dataset_t **dsp); + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, - void *tag, dsl_dataset_t **dsp); -void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); +void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); int dsl_dataset_namelen(dsl_dataset_t *ds); boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, - dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); + dsl_dataset_t *origin, uint64_t flags, cred_t *, + struct dsl_crypto_params *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, - uint64_t flags, dmu_tx_t *tx); + struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx); int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors); int dsl_dataset_promote(const char *name, char *conflsnap); int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, @@ -336,6 +351,8 @@ boolean_t dsl_dataset_is_zapified(dsl_dataset_t *ds); boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds); int dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result); +void dsl_dataset_activate_feature(uint64_t dsobj, + spa_feature_t f, dmu_tx_t *tx); void dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h index 59e8e055551a..922883ea4283 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h @@ -57,6 +57,8 @@ extern "C" { #define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_BOOKMARK "bookmark" +#define ZFS_DELEG_PERM_LOAD_KEY "load-key" +#define ZFS_DELEG_PERM_CHANGE_KEY "change-key" /* * Note: the names of properties that are marked delegatable are also diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h index a6414887c310..3699fa38464f 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -47,6 +48,7 @@ struct dsl_dataset; #define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count" #define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count" +#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj" typedef enum dd_used { DD_USED_HEAD, @@ -89,6 +91,7 @@ struct dsl_dir { /* These are immutable; no lock needed: */ uint64_t dd_object; + uint64_t dd_crypto_obj; dsl_pool_t *dd_pool; /* Stable until user eviction; no lock needed: */ diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h index 8291e470a116..6c3fb005d161 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h @@ -49,6 +49,7 @@ struct dsl_dataset; struct dsl_pool; struct dmu_tx; struct dsl_scan; +struct dsl_crypto_params; extern uint64_t zfs_dirty_data_max; extern uint64_t zfs_dirty_data_max_max; @@ -138,7 +139,8 @@ typedef struct dsl_pool { int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp); int dsl_pool_open(dsl_pool_t *dp); void dsl_pool_close(dsl_pool_t *dp); -dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg); +dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, + struct dsl_crypto_params *dcp, uint64_t txg); void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg); void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg); int dsl_pool_sync_context(dsl_pool_t *dp); diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index 0caefcd153f4..ee17a6c0ea86 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -57,6 +57,7 @@ typedef struct ddt ddt_t; typedef struct ddt_entry ddt_entry_t; struct dsl_pool; struct dsl_dataset; +struct dsl_crypto_params; /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -211,7 +212,7 @@ typedef struct zio_cksum_salt { * G gang block indicator * B byteorder (endianness) * D dedup - * X encryption (on version 30, which is not supported) + * X encryption * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type @@ -221,6 +222,84 @@ typedef struct zio_cksum_salt { * checksum[4] 256-bit checksum of the data this bp describes */ +/* + * The blkptr_t's of encrypted blocks also need to store the encryption + * parameters so that the block can be decrypted. TThis layout is as follows: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | vdev1 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 1 |G| offset1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 2 | vdev2 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 3 |G| offset2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 4 | salt | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 5 | IV1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 7 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 8 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 9 | physical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * a | logical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * b | IV2 | fill count | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * c | checksum[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * d | checksum[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * e | MAC[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * f | MAC[2] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * vdev virtual device ID + * offset offset into virtual device + * LSIZE logical size + * PSIZE physical size (after compression) + * ASIZE allocated size (including RAID-Z parity and gang block headers) + * salt First 64 bits of encryption IV + * IV1 First 64 bits of encryption IV + * GRID RAID-Z layout information (reserved for future use) + * cksum checksum function + * comp compression function + * G gang block indicator + * B byteorder (endianness) + * D dedup + * X encryption (set to 1) + * E blkptr_t contains embedded data (set to 0, see below) + * lvl level of indirection + * type DMU object type + * phys birth txg of block allocation; zero if same as logical birth txg + * log. birth transaction group in which the block was logically born + * fill count number of non-zero blocks under this bp + * IV2 Last 32 bits of encryption IV + * checksum[2] 256-bit checksum of the data this bp describes + * MAC[2] message authentication code + * + * The additional encryption parameters are the salt, IV, and MAC which are + * explained in greater detail in the block comment at the top of zio_crypt.c. + * The MAC occupies half of the checksum space since it serves a very similar + * purpose: to prevent data corruption on disk. The only functional difference + * is that the MAC provides additional protection against malicious disk + * tampering. We use the 3rd vdev to store the salt and first 64 bits of the IV. + * as a result encrypted blocks can only have 2 copies maximum instead of the + * normal 3. The last 32 bits are stored in the upper bits of what is usually + * the fill count. Note that only level 0 bocks are ever encrypted (or -2 in + * the case of ZIL blocks), which allows us to guarantee that these 32 bits + * are not trampled over by other code (see zio_crypt.c for details). + */ + /* * "Embedded" blkptr_t's don't actually point to a block, instead they * have a data payload embedded in the blkptr_t itself. See the comment @@ -276,7 +355,9 @@ typedef struct zio_cksum_salt { * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before * other macros, as they assert that they are only used on BP's of the correct - * "embedded-ness". + * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use + * the payload space for encryption parameters (see the comment above on + * how encryption parameters are stored). */ #define BPE_GET_ETYPE(bp) \ @@ -400,6 +481,9 @@ _NOTE(CONSTCOND) } while (0) #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) +#define BP_IS_ENCRYPTED(bp) BF64_GET((bp)->blk_prop, 61, 1) +#define BP_SET_ENCRYPTED(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x) + #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) @@ -417,7 +501,26 @@ _NOTE(CONSTCOND) } while (0) (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ } -#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill) +#define BP_GET_FILL(bp) \ + ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \ + ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill)) + +#define BP_SET_FILL(bp, fill) \ +{ \ + if (BP_IS_ENCRYPTED(bp)) \ + BF64_SET((bp)->blk_fill, 0, 32, fill); \ + else \ + (bp)->blk_fill = fill; \ +} + +#define BP_GET_IV2(bp) \ + (ASSERT(BP_IS_ENCRYPTED(bp)), \ + BF64_GET((bp)->blk_fill, 32, 32)) +#define BP_SET_IV2(bp, iv2) \ +{ \ + ASSERT(BP_IS_ENCRYPTED(bp)); \ + BF64_SET((bp)->blk_fill, 32, 32, iv2); \ +} #define BP_IS_METADATA(bp) \ (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) @@ -426,7 +529,7 @@ _NOTE(CONSTCOND) } while (0) (BP_IS_EMBEDDED(bp) ? 0 : \ DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - DVA_GET_ASIZE(&(bp)->blk_dva[2])) + (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))) #define BP_GET_UCSIZE(bp) \ (BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) @@ -435,13 +538,13 @@ _NOTE(CONSTCOND) } while (0) (BP_IS_EMBEDDED(bp) ? 0 : \ !!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) + (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))) #define BP_COUNT_GANG(bp) \ (BP_IS_EMBEDDED(bp) ? 0 : \ (DVA_GET_GANG(&(bp)->blk_dva[0]) + \ DVA_GET_GANG(&(bp)->blk_dva[1]) + \ - DVA_GET_GANG(&(bp)->blk_dva[2]))) + (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))) #define DVA_EQUAL(dva1, dva2) \ ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ @@ -460,6 +563,10 @@ _NOTE(CONSTCOND) } while (0) ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ ((zc1).zc_word[3] - (zc2).zc_word[3]))) +#define ZIO_CHECKSUM_MAC_EQUAL(zc1, zc2) \ + (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ + ((zc1).zc_word[1] - (zc2).zc_word[1]))) + #define ZIO_CHECKSUM_IS_ZERO(zc) \ (0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \ (zc)->zc_word[2] | (zc)->zc_word[3])) @@ -572,13 +679,14 @@ _NOTE(CONSTCOND) } while (0) DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \ copies--; \ len += func(buf + len, size - len, \ - "[L%llu %s] %s %s %s %s %s %s%c" \ + "[L%llu %s] %s %s %s %s %s %s %s%c" \ "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ "cksum=%llx:%llx:%llx:%llx", \ (u_longlong_t)BP_GET_LEVEL(bp), \ type, \ checksum, \ compress, \ + BP_IS_ENCRYPTED(bp) ? "encrypted" : "unencrypted", \ BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \ BP_IS_GANG(bp) ? "gang" : "contiguous", \ BP_GET_DEDUP(bp) ? "dedup" : "unique", \ @@ -612,8 +720,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag, nvlist_t *policy, nvlist_t **config); extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, size_t buflen); -extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, - nvlist_t *zplprops); +extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, + nvlist_t *zplprops, struct dsl_crypto_params *dcp); extern int spa_import_rootpool(char *devpath, char *devid); extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index 8413a843cd87..7018d40bb9ab 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -264,6 +265,7 @@ struct spa { uint64_t spa_deadman_synctime; /* deadman expiration timer */ uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */ spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */ + spa_keystore_t spa_keystore; /* loaded crypto keys */ /* * spa_iokstat_lock protects spa_iokstat and diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h index d86e3b45f182..8c1133239b47 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -93,6 +93,7 @@ typedef enum drr_headertype { #define DMU_BACKUP_FEATURE_RESUMING (1 << 20) /* flag #21 is reserved for a Delphix feature */ #define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22) +#define DMU_BACKUP_FEATURE_RAW (1 << 23) /* * Mask of all supported backup features @@ -102,7 +103,8 @@ typedef enum drr_headertype { DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \ DMU_BACKUP_FEATURE_RESUMING | \ DMU_BACKUP_FEATURE_LARGE_BLOCKS | \ - DMU_BACKUP_FEATURE_COMPRESSED) + DMU_BACKUP_FEATURE_COMPRESSED | \ + DMU_BACKUP_FEATURE_RAW) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) @@ -148,18 +150,28 @@ typedef enum dmu_send_resume_token_version { #define DRR_FLAG_FREERECORDS (1<<2) /* - * flags in the drr_checksumflags field in the DRR_WRITE and - * DRR_WRITE_BYREF blocks + * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT, + * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks */ -#define DRR_CHECKSUM_DEDUP (1<<0) +#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */ +#define DRR_RAW_ENCRYPTED (1<<1) +#define DRR_RAW_BYTESWAP (1<<2) #define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP) +#define DRR_IS_RAW_ENCRYPTED(flags) ((flags) & DRR_RAW_ENCRYPTED) +#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP) /* deal with compressed drr_write replay records */ #define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0) #define DRR_WRITE_PAYLOAD_SIZE(drrw) \ (DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \ (drrw)->drr_logical_size) +#define DRR_SPILL_PAYLOAD_SIZE(drrs) \ + (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags) ? \ + (drrs)->drr_compressed_size : (drrs)->drr_length) +#define DRR_OBJECT_PAYLOAD_SIZE(drro) \ + (DRR_IS_RAW_ENCRYPTED(drro->drr_flags) ? \ + drro->drr_raw_bonuslen : P2ROUNDUP(drro->drr_bonuslen, 8)) /* * zfs ioctl command structure @@ -168,7 +180,8 @@ typedef struct dmu_replay_record { enum { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF, - DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES + DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, + DRR_NUMTYPES } drr_type; uint32_t drr_payloadlen; union { @@ -194,7 +207,9 @@ typedef struct dmu_replay_record { uint32_t drr_bonuslen; uint8_t drr_checksumtype; uint8_t drr_compress; - uint8_t drr_pad[6]; + uint8_t drr_dn_slots; /* place holder for large dnode */ + uint8_t drr_flags; + uint32_t drr_raw_bonuslen; uint64_t drr_toguid; /* bonus content follows */ } drr_object; @@ -211,13 +226,17 @@ typedef struct dmu_replay_record { uint64_t drr_logical_size; uint64_t drr_toguid; uint8_t drr_checksumtype; - uint8_t drr_checksumflags; + uint8_t drr_flags; uint8_t drr_compressiontype; uint8_t drr_pad2[5]; /* deduplication key */ ddt_key_t drr_key; /* only nonzero if drr_compressiontype is not 0 */ uint64_t drr_compressed_size; + /* only nonzero if DRR_RAW_ENCRYPTED flag is set */ + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; /* content follows */ } drr_write; struct drr_free { @@ -238,7 +257,7 @@ typedef struct dmu_replay_record { uint64_t drr_refoffset; /* properties of the data */ uint8_t drr_checksumtype; - uint8_t drr_checksumflags; + uint8_t drr_flags; uint8_t drr_pad2[6]; ddt_key_t drr_key; /* deduplication key */ } drr_write_byref; @@ -246,7 +265,15 @@ typedef struct dmu_replay_record { uint64_t drr_object; uint64_t drr_length; uint64_t drr_toguid; - uint64_t drr_pad[4]; /* needed for crypto */ + uint8_t drr_flags; + uint8_t drr_compressiontype; + uint8_t drr_pad[6]; + /* only nonzero if DRR_RAW_ENCRYPTED flag is set */ + uint64_t drr_compressed_size; + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; + dmu_object_type_t drr_type; /* spill data follows */ } drr_spill; struct drr_write_embedded { @@ -262,6 +289,16 @@ typedef struct dmu_replay_record { uint32_t drr_psize; /* compr. (real) size of payload */ /* (possibly compressed) content follows */ } drr_write_embedded; + struct drr_object_range { + uint64_t drr_firstobj; + uint64_t drr_numslots; + uint64_t drr_toguid; + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; + uint8_t drr_flags; + uint8_t drr_pad[3]; + } drr_object_range; /* * Nore: drr_checksum is overlaid with all record types diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h index 1642da08197f..78e584e9766b 100644 --- a/usr/src/uts/common/fs/zfs/sys/zil.h +++ b/usr/src/uts/common/fs/zfs/sys/zil.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -383,7 +384,8 @@ typedef int zil_replay_func_t(); typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio); extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg); + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt); extern void zil_init(void); extern void zil_fini(void); diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index d1de03923bc0..25f9d934d785 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -104,6 +104,29 @@ enum zio_checksum { #define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256 #define ZIO_DEDUPDITTO_MIN 100 +/* supported encryption algorithms */ +enum zio_encrypt { + ZIO_CRYPT_INHERIT = 0, + ZIO_CRYPT_ON, + ZIO_CRYPT_OFF, + ZIO_CRYPT_AES_128_CCM, + ZIO_CRYPT_AES_192_CCM, + ZIO_CRYPT_AES_256_CCM, + ZIO_CRYPT_AES_128_GCM, + ZIO_CRYPT_AES_192_GCM, + ZIO_CRYPT_AES_256_GCM, + ZIO_CRYPT_FUNCTIONS +}; + +#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM +#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF + +/* macros defining encryption lengths */ +#define MAX_MASTER_KEY_LEN 32 +#define ZIO_DATA_IV_LEN 12 +#define ZIO_DATA_SALT_LEN 8 +#define ZIO_DATA_MAC_LEN 16 + /* * The number of "legacy" compression functions which can be set on individual * objects. @@ -185,16 +208,18 @@ enum zio_flag { ZIO_FLAG_DONT_PROPAGATE = 1 << 20, ZIO_FLAG_IO_BYPASS = 1 << 21, ZIO_FLAG_IO_REWRITE = 1 << 22, - ZIO_FLAG_RAW = 1 << 23, - ZIO_FLAG_GANG_CHILD = 1 << 24, - ZIO_FLAG_DDT_CHILD = 1 << 25, - ZIO_FLAG_GODFATHER = 1 << 26, - ZIO_FLAG_NOPWRITE = 1 << 27, - ZIO_FLAG_REEXECUTED = 1 << 28, - ZIO_FLAG_DELEGATED = 1 << 29, + ZIO_FLAG_RAW_COMPRESS = 1 << 23, + ZIO_FLAG_RAW_ENCRYPT = 1 << 24, + ZIO_FLAG_GANG_CHILD = 1 << 25, + ZIO_FLAG_DDT_CHILD = 1 << 26, + ZIO_FLAG_GODFATHER = 1 << 27, + ZIO_FLAG_NOPWRITE = 1 << 28, + ZIO_FLAG_REEXECUTED = 1 << 29, + ZIO_FLAG_DELEGATED = 1 << 30, }; #define ZIO_FLAG_MUSTSUCCEED 0 +#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT) #define ZIO_DDT_CHILD_FLAGS(zio) \ (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \ @@ -296,6 +321,11 @@ typedef struct zio_prop { boolean_t zp_dedup; boolean_t zp_dedup_verify; boolean_t zp_nopwrite; + boolean_t zp_encrypt; + boolean_t zp_byteorder; + uint8_t zp_salt[ZIO_DATA_SALT_LEN]; + uint8_t zp_iv[ZIO_DATA_IV_LEN]; + uint8_t zp_mac[ZIO_DATA_MAC_LEN]; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; @@ -504,8 +534,8 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, enum zio_flag flags); -extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, - blkptr_t *old_bp, uint64_t size, boolean_t use_slog); +extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, + blkptr_t *new_bp, blkptr_t *old_bp, uint64_t size, boolean_t use_slog); extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp); extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_shrink(zio_t *zio, uint64_t size); diff --git a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h index 3eda057eae80..ee7a9bf7c766 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h +++ b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h @@ -54,7 +54,7 @@ typedef enum zio_checksum_flags { /* Uses salt value */ ZCHECKSUM_FLAG_SALTED = (1 << 4), /* Strong enough for nopwrite? */ - ZCHECKSUM_FLAG_NOPWRITE = (1 << 5) + ZCHECKSUM_FLAG_NOPWRITE = (1 << 5), } zio_checksum_flags_t; /* diff --git a/usr/src/uts/common/fs/zfs/sys/zio_crypt.h b/usr/src/uts/common/fs/zfs/sys/zio_crypt.h new file mode 100644 index 000000000000..3773cd27413a --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/zio_crypt.h @@ -0,0 +1,163 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_ZIO_CRYPT_H +#define _SYS_ZIO_CRYPT_H + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* forward declarations */ +struct zbookmark_phys; + +#define WRAPPING_KEY_LEN 32 +#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN +#define WRAPPING_MAC_LEN 16 + +#define SHA_256_DIGEST_LEN 32 +#define HMAC_SHA256_KEYLEN 32 + +#define L2ARC_DEFAULT_CRYPT ZIO_CRYPT_AES_256_CCM + +/* + * After encrypting many blocks with the same key we may start to run up + * against the theoretical limits of how much data can securely be encrypted + * with a single key using the supported encryption modes. The most obvious + * limitation is that our risk of generating 2 equivalent 96 bit IVs increases + * the more IVs we generate (which both GCM and CCM modes strictly forbid). + * This risk actually grows surprisingly quickly over time according to the + * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have + * generated n IVs with a cryptographically secure RNG, the approximate + * probability p(n) of a collision is given as: + * + * p(n) ~= e^(-n(n-1)/(2*(2^96))) + * + * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html] + * + * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion + * we must not write more than 398065730 blocks with the same encryption key, + * which is significantly less than the zettabyte of data that ZFS claims to + * be able to store. To counteract this, we rotate our keys after 400000000 + * blocks have been written by generating a new random 64 bit salt for our + * HKDF encryption key generation function. + */ +#define ZIO_CRYPT_MAX_SALT_USAGE 400000000 + +#define BITS_TO_BYTES(x) ((x + NBBY - 1) / NBBY) +#define BYTES_TO_BITS(x) (x * NBBY) + +typedef enum zio_crypt_type { + ZC_TYPE_NONE = 0, + ZC_TYPE_CCM, + ZC_TYPE_GCM +} zio_crypt_type_t; + +/* table of supported crypto algorithms, modes and keylengths. */ +typedef struct zio_crypt_info { + /* mechanism name, needed by ICP */ + crypto_mech_name_t ci_mechname; + + /* cipher mode type (GCM, CCM) */ + zio_crypt_type_t ci_crypt_type; + + /* length of the encryption key */ + size_t ci_keylen; + + /* human-readable name of the encryption alforithm */ + char *ci_name; +} zio_crypt_info_t; + +extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS]; + +/* in memory representation of an unwrapped key that is loaded into memory */ +typedef struct zio_crypt_key { + /* encryption algorithm */ + uint64_t zk_crypt; + + /* buffer for master key */ + uint8_t zk_master_keydata[MAX_MASTER_KEY_LEN]; + + /* buffer for hmac key */ + uint8_t zk_hmac_keydata[HMAC_SHA256_KEYLEN]; + + /* buffer for currrent encryption key derived from master key */ + uint8_t zk_current_keydata[MAX_MASTER_KEY_LEN]; + + /* current 64 bit salt for deriving an encryption key */ + uint8_t zk_salt[ZIO_DATA_SALT_LEN]; + + /* count of how many times the current salt has been used */ + uint64_t zk_salt_count; + + /* illumos crypto api current encryption key */ + crypto_key_t zk_current_key; + + /* template of current encryption key for illumos crypto api */ + crypto_ctx_template_t zk_current_tmpl; + + /* illumos crypto api current hmac key */ + crypto_key_t zk_hmac_key; + + /* template of hmac key for illumos crypto api */ + crypto_ctx_template_t zk_hmac_tmpl; + + /* lock for changing the salt and dependant values */ + krwlock_t zk_salt_lock; +} zio_crypt_key_t; + +void zio_crypt_key_destroy(zio_crypt_key_t *key); +int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key); +int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out); + +int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out); +int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint8_t *keydata, + uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, zio_crypt_key_t *key); +int zio_crypt_generate_iv(uint8_t *ivbuf); +int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, + uint_t datalen, uint8_t *ivbuf, uint8_t *salt); + +void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv); +void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv); +void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac); +void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac); +void zio_crypt_encode_mac_zil(void *data, uint8_t *mac); +void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac); +void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen); + +int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + uint8_t *plainbuf, uint8_t *cipherbuf, boolean_t *no_crypt); +int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + abd_t *pabd, abd_t *cabd, boolean_t *no_crypt); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZIO_CRYPT_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/zio_impl.h b/usr/src/uts/common/fs/zfs/sys/zio_impl.h index a36749a308d6..2c809c93b2ef 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/zio_impl.h @@ -99,6 +99,17 @@ extern "C" { * physical I/O. The nop write feature can handle writes in either * syncing or open context (i.e. zil writes) and as a result is mutually * exclusive with dedup. + * + * Encryption: + * Encryption is handled by the ZIO_STAGE_ENCRYPT stage. If the data is to be + * encrypted, this stage determines how the encryption metadata is stored in + * the bp. Decryption is performed during ZIO_STAGE_READ_BP_INIT as a transform + * callback. Encryption is also mutually exclusive with nopwrite, because + * blocks with the same plaintext will be encrypted with different salts and + * therefore different IV's (if dedup is off), and therefore have different + * ciphertexts. For dedup blocks we deterministically generate the IV by + * performing a SHA256-HMAC of the plaintext, so we can actually still do + * dedup. See the block comment in zio_crypt.c for details. */ /* @@ -113,32 +124,33 @@ enum zio_stage { ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */ - ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */ + ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */ + ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */ - ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */ + ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */ - ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */ - ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */ - ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */ - ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */ + ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */ + ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */ + ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */ + ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */ - ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */ - ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */ + ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */ + ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */ - ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */ - ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */ - ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */ - ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */ + ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */ + ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */ + ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */ + ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */ - ZIO_STAGE_READY = 1 << 18, /* RWFCI */ + ZIO_STAGE_READY = 1 << 19, /* RWFCI */ - ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */ - ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */ - ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */ + ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */ + ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */ + ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */ - ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */ + ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */ - ZIO_STAGE_DONE = 1 << 23 /* RWFCI */ + ZIO_STAGE_DONE = 1 << 24 /* RWFCI */ }; #define ZIO_INTERLOCK_STAGES \ @@ -190,12 +202,14 @@ enum zio_stage { #define ZIO_REWRITE_PIPELINE \ (ZIO_WRITE_COMMON_STAGES | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_WRITE_BP_INIT) #define ZIO_WRITE_PIPELINE \ (ZIO_WRITE_COMMON_STAGES | \ ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_DVA_THROTTLE | \ ZIO_STAGE_DVA_ALLOCATE) @@ -210,6 +224,7 @@ enum zio_stage { ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_ISSUE_ASYNC | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_CHECKSUM_GENERATE | \ ZIO_STAGE_DDT_WRITE) diff --git a/usr/src/uts/common/fs/zfs/zfeature.c b/usr/src/uts/common/fs/zfs/zfeature.c index 35ce827979e4..dc1c9166d42c 100644 --- a/usr/src/uts/common/fs/zfs/zfeature.c +++ b/usr/src/uts/common/fs/zfs/zfeature.c @@ -413,8 +413,8 @@ spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx) * We create feature flags ZAP objects in two instances: during pool * creation and during pool upgrade. */ - ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on && - tx->tx_txg == TXG_INITIAL)); + ASSERT((!spa->spa_sync_on && tx->tx_txg == TXG_INITIAL) || + dsl_pool_sync_context(spa_get_dsl(spa))); spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index da22cb6c8151..fda42651bb0a 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -31,6 +31,7 @@ * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome + * Copyright (c) 2017, Datto, Inc. All rights reserved. */ /* @@ -185,6 +186,7 @@ #include #include #include +#include #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -583,12 +585,12 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) * Try to own the dataset; abort if there is any error, * (e.g., already mounted, in use, or other error). */ - error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, + error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE, setsl_tag, &os); if (error != 0) return (SET_ERROR(EPERM)); - dmu_objset_disown(os, setsl_tag); + dmu_objset_disown(os, B_TRUE, setsl_tag); if (new_default) { needed_priv = PRIV_FILE_DOWNGRADE_SL; @@ -1267,6 +1269,22 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (0); } +/* ARGSUSED */ +static int +zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_LOAD_KEY, cr)); +} + +/* ARGSUSED */ +static int +zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_CHANGE_KEY, cr)); +} + /* * Policy for allowing temporary snapshots to be taken or released */ @@ -1455,7 +1473,7 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) if (zfsvfs->z_vfs) { VFS_RELE(zfsvfs->z_vfs); } else { - dmu_objset_disown(zfsvfs->z_os, zfsvfs); + dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); zfsvfs_free(zfsvfs); } } @@ -1467,6 +1485,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; + dsl_crypto_params_t *dcp = NULL; if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) @@ -1481,6 +1500,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) if (props) { nvlist_t *nvl = NULL; + nvlist_t *ha = NULL; uint64_t version = SPA_VERSION; (void) nvlist_lookup_uint64(props, @@ -1499,6 +1519,16 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) } (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); } + + (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS, &ha); + error = dsl_crypto_params_create_nvlist(rootprops, ha, &dcp); + if (error != 0) { + nvlist_free(config); + nvlist_free(props); + return (error); + } + (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS); + VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops_root(version, rootprops, zplprops, NULL); @@ -1506,7 +1536,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) goto pool_props_bad; } - error = spa_create(zc->zc_name, config, props, zplprops); + error = spa_create(zc->zc_name, config, props, zplprops, dcp); /* * Set the remaining root properties @@ -1520,6 +1550,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_free(zplprops); nvlist_free(config); nvlist_free(props); + dsl_crypto_params_free(dcp, !!error); return (error); } @@ -2396,6 +2427,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); uint64_t intval; + char *strval = NULL; int err = -1; if (prop == ZPROP_INVAL) { @@ -2411,10 +2443,15 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, &pair) == 0); } - if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) - return (-1); + /* all special properties are numeric except for keylocation */ + if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) { + if (prop != ZFS_PROP_KEYLOCATION) + return (-1); - VERIFY(0 == nvpair_value_uint64(pair, &intval)); + strval = fnvpair_value_string(pair); + } else { + intval = fnvpair_value_uint64(pair); + } switch (prop) { case ZFS_PROP_QUOTA: @@ -2431,6 +2468,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, } else { err = dsl_dir_activate_fs_ss_limit(dsname); } + /* + * Set err to -1 to force the zfs_set_prop_nvlist code down the + * default path to set the value in the nvlist. + */ + if (err == 0) + err = -1; + break; + case ZFS_PROP_KEYLOCATION: + err = dsl_crypto_can_set_keylocation(dsname, source, strval); + /* * Set err to -1 to force the zfs_set_prop_nvlist code down the * default path to set the value in the nvlist. @@ -3166,6 +3213,8 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, * innvl: { * "type" -> dmu_objset_type_t (int32) * (optional) "props" -> { prop -> value } + * (optional) "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) * } * * outnvl: propname -> error code (int32) @@ -3176,15 +3225,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) int error = 0; zfs_creat_t zct = { 0 }; nvlist_t *nvprops = NULL; + nvlist_t *hidden_args = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); int32_t type32; dmu_objset_type_t type; boolean_t is_insensitive = B_FALSE; + dsl_crypto_params_t *dcp = NULL; if (nvlist_lookup_int32(innvl, "type", &type32) != 0) return (SET_ERROR(EINVAL)); type = type32; (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); switch (type) { case DMU_OST_ZFS: @@ -3250,9 +3302,17 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) } } + error = dsl_crypto_params_create_nvlist(nvprops, hidden_args, &dcp); + if (error != 0) { + nvlist_free(zct.zct_zplprops); + return (error); + } + error = dmu_objset_create(fsname, type, - is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + dsl_crypto_params_free(dcp, !!error); /* * It would be nice to do this atomically. @@ -3270,6 +3330,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) * innvl: { * "origin" -> name of origin snapshot * (optional) "props" -> { prop -> value } + * (optional) "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) * } * * outnvl: propname -> error code (int32) @@ -3279,11 +3341,14 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { int error = 0; nvlist_t *nvprops = NULL; + nvlist_t *hidden_args = NULL; + dsl_crypto_params_t *dcp = NULL; char *origin_name; if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0) return (SET_ERROR(EINVAL)); (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); if (strchr(fsname, '@') || strchr(fsname, '%')) @@ -3291,10 +3356,15 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) if (dataset_namecheck(origin_name, NULL, NULL) != 0) return (SET_ERROR(EINVAL)); - error = dmu_objset_clone(fsname, origin_name); + + error = dsl_crypto_params_create_nvlist(nvprops, hidden_args, &dcp); if (error != 0) return (error); + error = dmu_objset_clone(fsname, origin_name, dcp); + + dsl_crypto_params_free(dcp, !!error); + /* * It would be nice to do this atomically. */ @@ -4117,7 +4187,11 @@ extract_delay_props(nvlist_t *props) { nvlist_t *delayprops; nvpair_t *nvp, *tmp; - static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 }; + static const zfs_prop_t delayable[] = { + ZFS_PROP_REFQUOTA, + ZFS_PROP_KEYLOCATION, + 0 + }; int i; VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); @@ -4260,6 +4334,14 @@ zfs_ioc_recv(zfs_cmd_t *zc) } else { zc->zc_obj |= ZPROP_ERR_NOCLEAR; } + } else if (drc.drc_raw) { + /* + * Raw send streams default to a "prompt" keylocation if + * no properties are given. + */ + delayprops = fnvlist_alloc(); + fnvlist_add_string(delayprops, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), "prompt"); } if (props != NULL) { @@ -4306,7 +4388,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) } } - if (delayprops != NULL) { + if (delayprops != NULL && props != NULL) { /* * Merge delayed props back in with initial props, in case * we're DEBUG and zfs_ioc_recv_inject_err is set (which means @@ -4416,6 +4498,10 @@ zfs_ioc_send(zfs_cmd_t *zc) boolean_t embedok = (zc->zc_flags & 0x1); boolean_t large_block_ok = (zc->zc_flags & 0x2); boolean_t compressok = (zc->zc_flags & 0x4); + boolean_t rawok = (zc->zc_flags & 0x8); + + if (rawok && compressok) + return (SET_ERROR(EINVAL)); if (zc->zc_obj != 0) { dsl_pool_t *dp; @@ -4447,7 +4533,8 @@ zfs_ioc_send(zfs_cmd_t *zc) if (error != 0) return (error); - error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, + FTAG, &tosnap); if (error != 0) { dsl_pool_rele(dp, FTAG); return (error); @@ -4463,7 +4550,7 @@ zfs_ioc_send(zfs_cmd_t *zc) } } - error = dmu_send_estimate(tosnap, fromsnap, compressok, + error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok, &zc->zc_objset_type); if (fromsnap != NULL) @@ -4477,7 +4564,7 @@ zfs_ioc_send(zfs_cmd_t *zc) off = fp->f_offset; error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, - zc->zc_fromobj, embedok, large_block_ok, compressok, + zc->zc_fromobj, embedok, large_block_ok, compressok, rawok, zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) @@ -4823,7 +4910,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) error = zfs_suspend_fs(zfsvfs); if (error == 0) { dmu_objset_refresh_ownership(zfsvfs->z_os, - zfsvfs); + B_TRUE, zfsvfs); error = zfs_resume_fs(zfsvfs, ds); } } @@ -5412,6 +5499,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) * presence indicates DRR_WRITE_EMBEDDED records are permitted * (optional) "compressok" -> (value ignored) * presence indicates compressed DRR_WRITE records are permitted + * (optional) "rawok" -> (value ignored) + * presence indicates raw DRR_WRITE records should be used. * (optional) "resume_object" and "resume_offset" -> (uint64) * if present, resume send stream from specified object and offset. * } @@ -5429,6 +5518,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) boolean_t largeblockok; boolean_t embedok; boolean_t compressok; + boolean_t rawok; uint64_t resumeobj = 0; uint64_t resumeoff = 0; @@ -5441,6 +5531,10 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) largeblockok = nvlist_exists(innvl, "largeblockok"); embedok = nvlist_exists(innvl, "embedok"); compressok = nvlist_exists(innvl, "compressok"); + rawok = nvlist_exists(innvl, "rawok"); + + if (rawok && compressok) + return (SET_ERROR(EINVAL)); (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj); (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff); @@ -5451,7 +5545,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) off = fp->f_offset; error = dmu_send(snapname, fromname, embedok, largeblockok, compressok, - fd, resumeobj, resumeoff, fp->f_vnode, &off); + rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; @@ -5490,6 +5584,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) /* LINTED E_FUNC_SET_NOT_USED */ boolean_t embedok; boolean_t compressok; + boolean_t rawok; uint64_t space; error = dsl_pool_hold(snapname, FTAG, &dp); @@ -5505,6 +5600,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) largeblockok = nvlist_exists(innvl, "largeblockok"); embedok = nvlist_exists(innvl, "embedok"); compressok = nvlist_exists(innvl, "compressok"); + rawok = nvlist_exists(innvl, "rawok"); error = nvlist_lookup_string(innvl, "from", &fromname); if (error == 0) { @@ -5518,8 +5614,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap); if (error != 0) goto out; - error = dmu_send_estimate(tosnap, fromsnap, compressok, - &space); + error = dmu_send_estimate(tosnap, fromsnap, + compressok || rawok, &space); dsl_dataset_rele(fromsnap, FTAG); } else if (strchr(fromname, '#') != NULL) { /* @@ -5534,7 +5630,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) if (error != 0) goto out; error = dmu_send_estimate_from_txg(tosnap, - frombm.zbm_creation_txg, compressok, &space); + frombm.zbm_creation_txg, compressok || rawok, + &space); } else { /* * from is not properly formatted as a snapshot or @@ -5545,7 +5642,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) } } else { // If estimating the size of a full send, use dmu_send_estimate - error = dmu_send_estimate(tosnap, NULL, compressok, &space); + error = dmu_send_estimate(tosnap, NULL, compressok || rawok, + &space); } fnvlist_add_uint64(outnvl, "space", space); @@ -5556,6 +5654,121 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) return (error); } +/* + * Load a user's wrapping key into the kernel. + * innvl: { + * "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) + * (optional) "noop" -> (value ignored) + * presence indicated key should only be verified, not loaded + * } + */ +/* ARGSUSED */ +static int +zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret = 0; + dsl_crypto_params_t *dcp = NULL; + nvlist_t *hidden_args; + boolean_t noop = nvlist_exists(innvl, "noop"); + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); + if (ret != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = dsl_crypto_params_create_nvlist(NULL, hidden_args, &dcp); + if (ret != 0) + goto error; + + ret = spa_keystore_load_wkey(dsname, dcp, noop); + if (ret != 0) + goto error; + + dsl_crypto_params_free(dcp, B_FALSE); + + return (0); + +error: + dsl_crypto_params_free(dcp, B_TRUE); + return (ret); +} + +/* + * Unload a user's wrapping key from the kernel. + * Both innvl and outnvl are unused. + */ +/* ARGSUSED */ +static int +zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret = 0; + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = (SET_ERROR(EINVAL)); + goto out; + } + + ret = spa_keystore_unload_wkey(dsname); + if (ret != 0) + goto out; + +out: + return (ret); +} + +/* + * Changes a user's wrapping key used to decrypt a dataset. The keyformat, + * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified + * here to change how the key is derived in userspace. + * + * innvl: { + * "hidden_args" (optional) -> { "wkeydata" -> value } + * raw uint8_t array of new encryption wrapping key data (32 bytes) + * "props" (optional) -> { prop -> value } + * } + * + * outnvl is unused + */ +/* ARGSUSED */ +static int +zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret; + dsl_crypto_params_t *dcp = NULL; + nvlist_t *args = NULL, *hidden_args = NULL; + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = (SET_ERROR(EINVAL)); + goto error; + } + + (void) nvlist_lookup_nvlist(innvl, "props", &args); + (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); + + ret = dsl_crypto_params_create_nvlist(args, hidden_args, &dcp); + if (ret != 0) + goto error; + + ret = spa_keystore_rewrap(dsname, dcp); + if (ret != 0) + goto error; + + dsl_crypto_params_free(dcp, B_FALSE); + + return (0); + +error: + dsl_crypto_params_free(dcp, B_TRUE); + return (ret); +} + static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; static void @@ -5724,6 +5937,17 @@ zfs_ioctl_init(void) POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY, + zfs_ioc_load_key, zfs_secpolicy_load_key, + DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE); + zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY, + zfs_ioc_unload_key, zfs_secpolicy_load_key, + DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE); + zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY, + zfs_ioc_change_key, zfs_secpolicy_change_key, + DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, + B_TRUE, B_TRUE); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c index 213a30b6e6b2..e882abab80cc 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c @@ -974,7 +974,8 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) * We claim to always be readonly so we can open snapshots; * other ZPL code will prevent us from writing to snapshots. */ - error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); + error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE, zfsvfs, + &os); if (error) { kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); @@ -995,7 +996,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) error = zfsvfs_init(zfsvfs, os); if (error != 0) { - dmu_objset_disown(os, zfsvfs); + dmu_objset_disown(os, B_TRUE, zfsvfs); *zfvp = NULL; kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); @@ -1222,7 +1223,7 @@ zfs_domount(vfs_t *vfsp, char *osname) zfsctl_create(zfsvfs); out: if (error) { - dmu_objset_disown(zfsvfs->z_os, zfsvfs); + dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); zfsvfs_free(zfsvfs); } else { atomic_inc_32(&zfs_active_fs_count); @@ -1890,7 +1891,7 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) /* * Finally release the objset */ - dmu_objset_disown(os, zfsvfs); + dmu_objset_disown(os, B_TRUE, zfsvfs); } /* diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c index 1aa4900bc5cb..95fb33d203fa 100644 --- a/usr/src/uts/common/fs/zfs/zil.c +++ b/usr/src/uts/common/fs/zfs/zil.c @@ -179,8 +179,8 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) * Read a log block and make sure it's valid. */ static int -zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, - char **end) +zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, + blkptr_t *nbp, void *dst, char **end) { enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t aflags = ARC_FLAG_WAIT; @@ -194,11 +194,14 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) zio_flags |= ZIO_FLAG_SPECULATIVE; + if (!decrypt) + zio_flags |= ZIO_FLAG_RAW; + SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); - error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, + &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { zio_cksum_t cksum = bp->blk_cksum; @@ -273,6 +276,14 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) if (zilog->zl_header->zh_claim_txg == 0) zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; + /* + * If we are not using the resulting data, we are just checking that + * it hasn't been corrupted so we don't need to waste CPU time + * decompressing and decrypting it. + */ + if (wbuf == NULL) + zio_flags |= ZIO_FLAG_RAW; + SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); @@ -293,7 +304,8 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) */ int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt) { const zil_header_t *zh = zilog->zl_header; boolean_t claimed = !!zh->zh_claim_txg; @@ -332,7 +344,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if (blk_seq > claim_blk_seq) break; - if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0) + + error = parse_blk_func(zilog, &blk, arg, txg); + if (error != 0) break; ASSERT3U(max_blk_seq, <, blk_seq); max_blk_seq = blk_seq; @@ -341,7 +355,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq) break; - error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); + error = zil_read_log_block(zilog, decrypt, &blk, &next_blk, + lrbuf, &end); if (error != 0) break; @@ -351,7 +366,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, ASSERT3U(reclen, >=, sizeof (lr_t)); if (lr->lrc_seq > claim_lr_seq) goto done; - if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0) + + error = parse_lr_func(zilog, lr, arg, txg); + if (error != 0) goto done; ASSERT3U(max_lr_seq, <, lr->lrc_seq); max_lr_seq = lr->lrc_seq; @@ -366,7 +383,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, zilog->zl_parse_lr_count = lr_count; ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) || - (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq)); + (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) || + (decrypt && error == EIO)); zil_bp_tree_fini(zilog); zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); @@ -407,9 +425,12 @@ zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg) * waited for all writes to be stable first), so it is semantically * correct to declare this the end of the log. */ - if (lr->lr_blkptr.blk_birth >= first_txg && - (error = zil_read_log_data(zilog, lr, NULL)) != 0) - return (error); + if (lr->lr_blkptr.blk_birth >= first_txg) { + error = zil_read_log_data(zilog, lr, NULL); + if (error != 0) + return (error); + } + return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); } @@ -557,8 +578,9 @@ zil_create(zilog_t *zilog) BP_ZERO(&blk); } - error = zio_alloc_zil(zilog->zl_spa, txg, &blk, NULL, - ZIL_MIN_BLKSZ, zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, + NULL, ZIL_MIN_BLKSZ, + zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); if (error == 0) zil_init_log_chain(zilog, &blk); @@ -646,7 +668,7 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) { ASSERT(list_is_empty(&zilog->zl_lwb_list)); (void) zil_parse(zilog, zil_free_log_block, - zil_free_log_record, tx, zilog->zl_header->zh_claim_txg); + zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE); } int @@ -660,7 +682,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) int error; error = dmu_objset_own_obj(dp, ds->ds_object, - DMU_OST_ANY, B_FALSE, FTAG, &os); + DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os); if (error != 0) { /* * EBUSY indicates that the objset is inconsistent, in which @@ -681,7 +703,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); BP_ZERO(&zh->zh_log); dsl_dataset_dirty(dmu_objset_ds(os), tx); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -695,7 +717,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) ASSERT3U(zh->zh_claim_txg, <=, first_txg); if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { (void) zil_parse(zilog, zil_claim_log_block, - zil_claim_log_record, tx, first_txg); + zil_claim_log_record, tx, first_txg, B_FALSE); zh->zh_claim_txg = first_txg; zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq; zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq; @@ -706,7 +728,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -764,7 +786,8 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx) * which will update spa_max_claim_txg. See spa_load() for details. */ error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, - zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa)); + zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa), + B_FALSE); return ((error == ECKSUM || error == ENOENT) ? 0 : error); } @@ -1014,8 +1037,8 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ - error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz, - USE_SLOG(zilog)); + error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, &lwb->lwb_blk, + zil_blksz, USE_SLOG(zilog)); if (error == 0) { ASSERT3U(bp->blk_birth, ==, txg); bp->blk_cksum = lwb->lwb_blk.blk_cksum; @@ -2149,7 +2172,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) zilog->zl_replay_time = ddi_get_lbolt(); ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, - zh->zh_claim_txg); + zh->zh_claim_txg, B_TRUE); kmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index c7d2e0536ed1..5147c26e7f3e 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -42,6 +42,7 @@ #include #include #include +#include /* * ========================================================================== @@ -319,7 +320,7 @@ zio_pop_transforms(zio_t *zio) /* * ========================================================================== - * I/O transform callbacks for subblocks and decompression + * I/O transform callbacks for subblocks, decompression, and decryption * ========================================================================== */ static void @@ -345,6 +346,45 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size) } } +static void +zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) +{ + int ret; + void *tmp; + blkptr_t *bp = zio->io_bp; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + ASSERT(BP_IS_ENCRYPTED(bp)); + ASSERT3U(size, !=, 0); + + if (zio->io_error != 0) + return; + + zio_crypt_decode_params_bp(bp, salt, iv); + + if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) { + tmp = abd_borrow_buf_copy(zio->io_abd, sizeof (zil_chain_t)); + zio_crypt_decode_mac_zil(tmp, mac); + abd_return_buf(zio->io_abd, tmp, sizeof (zil_chain_t)); + } else { + zio_crypt_decode_mac_bp(bp, mac); + } + + ret = spa_do_crypt_abd(B_FALSE, zio->io_spa, &zio->io_bookmark, bp, + bp->blk_birth, size, data, zio->io_abd, iv, mac, salt, &no_crypt); + if (ret != 0) { + /* assert that the key was found unless this was speculative */ + ASSERT(ret != ENOENT || (zio->io_flags & ZIO_FLAG_SPECULATIVE)); + zio->io_error = ret; + } + + if (no_crypt) + abd_copy(data, zio->io_abd, size); +} + /* * ========================================================================== * I/O parent/child relationships and pipeline interlocks @@ -555,7 +595,9 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); ASSERT(vd || stage == ZIO_STAGE_OPEN); - IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0); + IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW_COMPRESS) != 0); + IMPLY((flags & ZIO_FLAG_RAW_ENCRYPT) != 0, + (flags & ZIO_FLAG_RAW_COMPRESS) != 0); zio = kmem_cache_alloc(zio_cache, KM_SLEEP); bzero(zio, sizeof (zio_t)); @@ -787,9 +829,12 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, * Data can be NULL if we are going to call zio_write_override() to * provide the already-allocated BP. But we may need the data to * verify a dedup hit (if requested). In this case, don't try to - * dedup (just take the already-allocated BP verbatim). + * dedup (just take the already-allocated BP verbatim). Encrypted + * dedup blocks need data as well so we also disable dedup in this + * case. */ - if (data == NULL && zio->io_prop.zp_dedup_verify) { + if (data == NULL && + (zio->io_prop.zp_dedup_verify || zio->io_prop.zp_encrypt)) { zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE; } @@ -1128,21 +1173,27 @@ static int zio_read_bp_init(zio_t *zio) { blkptr_t *bp = zio->io_bp; + uint64_t psize = + BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && zio->io_child_type == ZIO_CHILD_LOGICAL && - !(zio->io_flags & ZIO_FLAG_RAW)) { - uint64_t psize = - BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); + !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), psize, psize, zio_decompress); } - if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { - zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + if (BP_IS_ENCRYPTED(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL && + !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) { + zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), + psize, psize, zio_decrypt); + } + if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { int psize = BPE_GET_PSIZE(bp); void *data = abd_borrow_buf(zio->io_abd, psize); + + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; decode_embedded_bp_compressed(bp, data); abd_return_buf_copy(zio->io_abd, data, psize); } else { @@ -1202,7 +1253,8 @@ zio_write_bp_init(zio_t *zio) ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags & ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify); - if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { + if (BP_GET_CHECKSUM(bp) == zp->zp_checksum && + !zp->zp_encrypt) { BP_SET_DEDUP(bp, 1); zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; return (ZIO_PIPELINE_CONTINUE); @@ -1231,8 +1283,6 @@ zio_write_compress(zio_t *zio) uint64_t psize = zio->io_size; int pass = 1; - EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0); - /* * If our children haven't all reached the ready stage, * wait for them and then repeat this pipeline stage. @@ -1282,13 +1332,15 @@ zio_write_compress(zio_t *zio) } /* If it's a compressed write that is not raw, compress the buffer. */ - if (compress != ZIO_COMPRESS_OFF && psize == lsize) { + if (compress != ZIO_COMPRESS_OFF && + !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { void *cbuf = zio_buf_alloc(lsize); psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); if (psize == 0 || psize == lsize) { compress = ZIO_COMPRESS_OFF; zio_buf_free(cbuf, lsize); - } else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE && + } else if (!zp->zp_dedup && !zp->zp_encrypt && + psize <= BPE_PAYLOAD_SIZE && zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) && spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) { encode_embedded_bp_compressed(bp, @@ -1350,8 +1402,8 @@ zio_write_compress(zio_t *zio) if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize && pass >= zfs_sync_pass_rewrite) { - ASSERT(psize != 0); enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; + ASSERT(psize != 0); zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages; zio->io_flags |= ZIO_FLAG_IO_REWRITE; } else { @@ -2152,6 +2204,13 @@ zio_write_gang_block(zio_t *pio) zio_prop_t zp; int error; + /* + * encrypted blocks need DVA[2] free so encrypted gang headers can't + * have a third copy. + */ + if (gio->io_prop.zp_encrypt && gbh_copies >= SPA_DVAS_PER_BP) + gbh_copies = SPA_DVAS_PER_BP - 1; + int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); @@ -2223,12 +2282,16 @@ zio_write_gang_block(zio_t *pio) zp.zp_checksum = gio->io_prop.zp_checksum; zp.zp_compress = ZIO_COMPRESS_OFF; + zp.zp_encrypt = gio->io_prop.zp_encrypt; zp.zp_type = DMU_OT_NONE; zp.zp_level = 0; zp.zp_copies = gio->io_prop.zp_copies; zp.zp_dedup = B_FALSE; zp.zp_dedup_verify = B_FALSE; zp.zp_nopwrite = B_FALSE; + bzero(zp.zp_salt, ZIO_DATA_SALT_LEN); + bzero(zp.zp_iv, ZIO_DATA_IV_LEN); + bzero(zp.zp_mac, ZIO_DATA_MAC_LEN); zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], abd_get_offset(pio->io_abd, pio->io_size - resid), lsize, @@ -2302,6 +2365,7 @@ zio_nop_write(zio_t *zio) if (BP_IS_HOLE(bp_orig) || !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags & ZCHECKSUM_FLAG_NOPWRITE) || + BP_IS_ENCRYPTED(bp) || BP_IS_ENCRYPTED(bp_orig) || BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) || BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) || BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) || @@ -2914,8 +2978,8 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) * Try to allocate an intent log block. Return 0 on success, errno on failure. */ int -zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t size, boolean_t use_slog) +zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, + blkptr_t *old_bp, uint64_t size, boolean_t use_slog) { int error = 1; zio_alloc_list_t io_alloc_list; @@ -2948,6 +3012,23 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, BP_SET_LEVEL(new_bp, 0); BP_SET_DEDUP(new_bp, 0); BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER); + + /* + * encrypted blocks will require an IV and salt. We generate + * these now since we will not be rewriting the bp at + * rewrite time. + */ + if (os->os_encrypted) { + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t salt[ZIO_DATA_SALT_LEN]; + + BP_SET_ENCRYPTED(new_bp, B_TRUE); + VERIFY0(spa_crypt_get_salt(spa, + dmu_objset_id(os), salt)); + VERIFY0(zio_crypt_generate_iv(iv)); + + zio_crypt_encode_params_bp(new_bp, salt, iv); + } } return (error); @@ -3274,6 +3355,114 @@ zio_vdev_io_bypass(zio_t *zio) zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1; } +/* + * ========================================================================== + * Encrypt and store encryption parameters + * ========================================================================== + */ + + +/* + * This function is used for ZIO_STAGE_ENCRYPT. It is responsible for + * managing the storage of encryption parameters and passing them to the + * lower-level encryption functions. + */ +static int +zio_encrypt(zio_t *zio) +{ + zio_prop_t *zp = &zio->io_prop; + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + uint64_t psize = BP_GET_PSIZE(bp); + dmu_object_type_t ot = BP_GET_TYPE(bp); + void *enc_buf = NULL; + abd_t *eabd = NULL; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + /* the root zio already encrypted the data */ + if (zio->io_child_type == ZIO_CHILD_GANG) + return (ZIO_PIPELINE_CONTINUE); + + /* only ZIL blocks are re-encrypted on rewrite */ + if (!IO_IS_ALLOCATING(zio) && + BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) + return (ZIO_PIPELINE_CONTINUE); + + /* if we are doing raw encryption set the provided encryption params */ + if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) { + ASSERT(zio->io_flags & ZIO_FLAG_RAW_COMPRESS); + ASSERT(zp->zp_encrypt); + BP_SET_ENCRYPTED(bp, B_TRUE); + BP_SET_BYTEORDER(bp, zp->zp_byteorder); + zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv); + zio_crypt_encode_mac_bp(bp, zp->zp_mac); + return (ZIO_PIPELINE_CONTINUE); + } + + if (!(zp->zp_encrypt || BP_IS_ENCRYPTED(bp))) { + BP_SET_ENCRYPTED(bp, B_FALSE); + return (ZIO_PIPELINE_CONTINUE); + } + + /* + * Later passes of sync-to-convergence may decide to rewrite data + * in place to avoid more disk reallocations. This presents a problem + * for encryption because this consitutes rewriting the new data with + * the same encryption key and IV. However, this only applies to blocks + * in the MOS (particularly the spacemaps) and we do not encrypt the + * MOS. We assert that the zio is allocating or an intent log write + * to enforce this. + */ + ASSERT(IO_IS_ALLOCATING(zio) || ot == DMU_OT_INTENT_LOG); + ASSERT(BP_GET_LEVEL(bp) == 0 || ot == DMU_OT_INTENT_LOG); + ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); + ASSERT3U(psize, !=, 0); + + enc_buf = zio_buf_alloc(psize); + eabd = abd_get_from_buf(enc_buf, psize); + abd_take_ownership_of_buf(eabd, B_TRUE); + + /* + * For an explanation of what encryption parameters are stored + * where, see the block comment in zio_crypt.c. + */ + if (ot == DMU_OT_INTENT_LOG) { + zio_crypt_decode_params_bp(bp, salt, iv); + } else { + BP_SET_ENCRYPTED(bp, B_TRUE); + } + + /* Perform the encryption. This should not fail */ + VERIFY0(spa_do_crypt_abd(B_TRUE, spa, &zio->io_bookmark, bp, + zio->io_txg, psize, zio->io_abd, eabd, iv, mac, salt, + &no_crypt)); + + /* encode encryption metadata into the bp */ + if (ot == DMU_OT_INTENT_LOG) { + /* + * ZIL blocks store the MAC in the embedded checksum, so the + * transform must always be applied. + */ + zio_crypt_encode_mac_zil(enc_buf, mac); + zio_push_transform(zio, eabd, psize, psize, NULL); + } else { + if (no_crypt) { + ASSERT3U(ot, ==, DMU_OT_DNODE); + BP_SET_ENCRYPTED(bp, B_FALSE); + abd_free(eabd); + } else { + zio_crypt_encode_params_bp(bp, salt, iv); + zio_crypt_encode_mac_bp(bp, mac); + zio_push_transform(zio, eabd, psize, psize, NULL); + } + } + + return (ZIO_PIPELINE_CONTINUE); +} + /* * ========================================================================== * Generate and verify checksums @@ -3838,6 +4027,7 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_free_bp_init, zio_issue_async, zio_write_compress, + zio_encrypt, zio_checksum_generate, zio_nop_write, zio_ddt_read_start, diff --git a/usr/src/uts/common/fs/zfs/zio_checksum.c b/usr/src/uts/common/fs/zfs/zio_checksum.c index e1c98b0b99c3..468cc84f6966 100644 --- a/usr/src/uts/common/fs/zfs/zio_checksum.c +++ b/usr/src/uts/common/fs/zfs/zio_checksum.c @@ -297,12 +297,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, blkptr_t *bp = zio->io_bp; uint64_t offset = zio->io_offset; zio_checksum_info_t *ci = &zio_checksum_table[checksum]; - zio_cksum_t cksum; + zio_cksum_t cksum, tmp_cksum; + zio_cksum_t *final_cksum; spa_t *spa = zio->io_spa; ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); ASSERT(ci->ci_func[0] != NULL); + bzero(&tmp_cksum, sizeof (zio_cksum_t)); zio_checksum_template_init(checksum, spa); if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { @@ -318,19 +320,52 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, } else { eck = (zio_eck_t *)((char *)data + size) - 1; } - if (checksum == ZIO_CHECKSUM_GANG_HEADER) + + if (checksum == ZIO_CHECKSUM_GANG_HEADER) { zio_checksum_gang_verifier(&eck->zec_cksum, bp); - else if (checksum == ZIO_CHECKSUM_LABEL) + } else if (checksum == ZIO_CHECKSUM_LABEL) { zio_checksum_label_verifier(&eck->zec_cksum, offset); - else - bp->blk_cksum = eck->zec_cksum; + } else { + tmp_cksum = eck->zec_cksum; + eck->zec_cksum = bp->blk_cksum; + } + eck->zec_magic = ZEC_MAGIC; - ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], - &cksum); - eck->zec_cksum = cksum; + final_cksum = &eck->zec_cksum; } else { - ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], - &bp->blk_cksum); + final_cksum = &bp->blk_cksum; + } + + ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], + &cksum); + + if (bp != NULL && BP_IS_ENCRYPTED(bp)) { + /* + * Weak checksums do not have their entropy spread evenly + * across the bits of the checksum. Therefore, when truncating + * a weak checksum we XOR the first 2 words with the last 2 so + * that we don't "lose" any entropy unnecessarily. + */ + if (!(ci->ci_flags & ZCHECKSUM_FLAG_DEDUP)) { + cksum.zc_word[0] ^= cksum.zc_word[2]; + cksum.zc_word[1] ^= cksum.zc_word[3]; + } + + final_cksum->zc_word[0] = cksum.zc_word[0]; + final_cksum->zc_word[1] = cksum.zc_word[1]; + + /* + * If this is an encrypted ZIL block we overwrote the MAC with + * the verifier before we performed the checksum. Restore it + * now from the copy we saved. + */ + if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { + ASSERT3U(checksum, ==, ZIO_CHECKSUM_ZILOG2); + final_cksum->zc_word[2] = tmp_cksum.zc_word[2]; + final_cksum->zc_word[3] = tmp_cksum.zc_word[3]; + } + } else { + *final_cksum = cksum; } } @@ -410,6 +445,23 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, spa->spa_cksum_tmpls[checksum], &actual_cksum); } + /* + * MAC checksums are a special case since half of this checksum will + * actually be the encryption MAC. This will be verified by the + * decryption process, so we just check the truncated checksum now. + */ + if (bp != NULL && BP_IS_ENCRYPTED(bp)) { + if (!(ci->ci_flags & ZCHECKSUM_FLAG_DEDUP)) { + actual_cksum.zc_word[0] ^= actual_cksum.zc_word[2]; + actual_cksum.zc_word[1] ^= actual_cksum.zc_word[3]; + } + + actual_cksum.zc_word[2] = 0; + actual_cksum.zc_word[3] = 0; + expected_cksum.zc_word[2] = 0; + expected_cksum.zc_word[3] = 0; + } + if (info != NULL) { info->zbc_expected = expected_cksum; info->zbc_actual = actual_cksum; diff --git a/usr/src/uts/common/fs/zfs/zio_crypt.c b/usr/src/uts/common/fs/zfs/zio_crypt.c new file mode 100644 index 000000000000..135ac30c43f5 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/zio_crypt.c @@ -0,0 +1,1481 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +/* + * This file is responsible for handling all of the details of generating + * encryption parameters and performing encryption. + * + * BLOCK ENCRYPTION PARAMETERS: + * Encryption Algorithm (crypt): + * The encryption algorithm and mode we are going to use. We currently support + * AES-GCM and AES-CCM in 128, 192, and 256 bits. + * + * Plaintext: + * The unencrypted data that we want to encrypt + * + * Initialization Vector (IV): + * An initialization vector for the encryption algorithms. This is + * used to "tweak" the encryption algorithms so that equivalent blocks of + * data are encrypted into different ciphertext outputs. Different modes + * of encryption have different requirements for the IV. AES-GCM and AES-CCM + * require that an IV is never reused with the same encryption key. This + * value is stored unencrypted and must simply be provided to the decryption + * function. We use a 96 bit IV (as recommended by NIST). For non-dedup blocks + * we derive the IV randomly. The first 64 bits of the IV are stored in the + * second word of DVA[2] and the remaining 32 bits are stored in the upper 32 + * bits of blk_fill. For most object types this is safe because we only encrypt + * level 0 blocks which means that the fill count will be 1. For DMU_OT_DNODE + * blocks the fill count is instead used to indicate the number of allocated + * dnodes beneath the bp. The on-disk format supports at most 2^15 slots per + * L0 dnode block, because the maximum block size is 16MB (2^24). In either + * case, for level 0 blocks this number will still be smaller than UINT32_MAX + * so it is safe to store the IV in the top 32 bits of blk_fill, while leaving + * the bottom 32 bits of the fill count for the dnode code. + * + * Master key: + * This is the most important secret data of an encrypted dataset. It is used + * along with the salt to generate that actual encryption keys via HKDF. We + * do not use the master key to encrypt any data because there are theoretical + * limits on how much data can actually be safely encrypted with any encryption + * mode. The master key is stored encrypted on disk with the user's key. It's + * length is determined by the encryption algorithm. For details on how this is + * stored see the block comment in dsl_crypt.c + * + * Salt: + * Used as an input to the HKDF function, along with the master key. We use a + * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt + * can be used for encrypting many blocks, so we cache the current salt and the + * associated derived key in zio_crypt_t so we do not need to derive it again + * needlessly. + * + * Encryption Key: + * A secret binary key, generated from an HKDF function used to encrypt and + * decrypt data. + * + * Message Authenication Code (MAC) + * The MAC is an output of authenticated encryption modes such as AES-GCM and + * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted + * data on disk and return garbage to the application. Effectively, it is a + * checksum that can not be reproduced by an attacker. We store the MAC in the + * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated + * regular checksum of the ciphertext which can be used for scrubbing. + * + * ZIL ENCRYPTION: + * ZIL blocks have their bp written to disk ahead of the associated data, so we + * cannot store encryption paramaters there as we normally do. For these blocks + * the MAC is stored in the embedded checksum within the zil_chain_t header. The + * salt and IV are generated for the block on bp allocation instead of at + * encryption time. In addition, ZIL blocks have some pieces that must be left + * in plaintext for claiming while all of the sensitive user data still needs to + * be encrypted. The function zio_crypt_init_uios_zil() handles parsing which + * which pieces of the block need to be encrypted. + * + * DNODE ENCRYPTION: + * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left + * in plaintext for scrubbing and claiming, but the bonus buffers might contain + * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing + * which which pieces of the block need to be encrypted. + * + * DNODE ENCRYPTION: + * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left + * in plaintext for scrubbing and claiming, but the bonus buffers might contain + * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing + * which which pieces of the block need to be encrypted. + * CONSIDERATIONS FOR DEDUP: + * In order for dedup to work, blocks that we want to dedup with one another + * need to use the same IV and encryption key, so that they will have the same + * cyphertext. Normally, one should never reuse an IV with the same encryption + * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both + * blocks. In this case, however, since we are using the same plaindata as + * well all that we end up with is a duplicate of the original data we already + * had. As a result, an attacker with read access to the raw disk will be able + * to tell which blocks are the same but this information is already given away + * by dedup anyway. In order to get the same IVs and encryption keys for + * equivalent blocks of data we use a HMAC of the plaindata. We use an HMAC + * here so there is never a reproducible checksum of the plaindata available + * to the attacker. The HMAC key is kept alongside the master key, encrypted + * on disk. The first 64 bits of the HMAC are used in place of the random salt, + * and the next 96 bits are used as the IV. + */ + +zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { + {"", ZC_TYPE_NONE, 0, "inherit"}, + {"", ZC_TYPE_NONE, 0, "on"}, + {"", ZC_TYPE_NONE, 0, "off"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 16, "aes-128-ccm"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 24, "aes-192-ccm"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 32, "aes-256-ccm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 16, "aes-128-gcm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 24, "aes-192-gcm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 32, "aes-256-gcm"} +}; + +static int +hkdf_sha256_extract(uint8_t *salt, uint_t salt_len, uint8_t *key_material, + uint_t km_len, uint8_t *out_buf) +{ + int ret; + crypto_mechanism_t mech; + crypto_key_t key; + crypto_data_t input_cd, output_cd; + + /* initialize sha 256 hmac mechanism */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA256_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the salt as a crypto key */ + key.ck_format = CRYPTO_KEY_RAW; + key.ck_length = BYTES_TO_BITS(salt_len); + key.ck_data = salt; + + /* initialize crypto data for the input and output data */ + input_cd.cd_format = CRYPTO_DATA_RAW; + input_cd.cd_offset = 0; + input_cd.cd_length = km_len; + input_cd.cd_raw.iov_base = (char *)key_material; + input_cd.cd_raw.iov_len = km_len; + + output_cd.cd_format = CRYPTO_DATA_RAW; + output_cd.cd_offset = 0; + output_cd.cd_length = SHA_256_DIGEST_LEN; + output_cd.cd_raw.iov_base = (char *)out_buf; + output_cd.cd_raw.iov_len = SHA_256_DIGEST_LEN; + + ret = crypto_mac(&mech, &input_cd, &key, NULL, &output_cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + return (0); + +error: + return (ret); +} + +static int +hkdf_sha256_expand(uint8_t *extract_key, uint8_t *info, uint_t info_len, + uint8_t *out_buf, uint_t out_len) +{ + int ret; + crypto_mechanism_t mech; + crypto_context_t ctx; + crypto_key_t key; + crypto_data_t T_cd, info_cd, c_cd; + uint_t i, T_len = 0, pos = 0; + uint8_t c; + uint_t N = (out_len + SHA_256_DIGEST_LEN) / SHA_256_DIGEST_LEN; + uint8_t T[SHA_256_DIGEST_LEN]; + + if (N > 255) + return (SET_ERROR(EINVAL)); + + /* initialize sha 256 hmac mechanism */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA256_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the salt as a crypto key */ + key.ck_format = CRYPTO_KEY_RAW; + key.ck_length = BYTES_TO_BITS(SHA_256_DIGEST_LEN); + key.ck_data = extract_key; + + /* initialize crypto data for the input and output data */ + T_cd.cd_format = CRYPTO_DATA_RAW; + T_cd.cd_offset = 0; + T_cd.cd_raw.iov_base = (char *)T; + + c_cd.cd_format = CRYPTO_DATA_RAW; + c_cd.cd_offset = 0; + c_cd.cd_length = 1; + c_cd.cd_raw.iov_base = (char *)&c; + c_cd.cd_raw.iov_len = 1; + + info_cd.cd_format = CRYPTO_DATA_RAW; + info_cd.cd_offset = 0; + info_cd.cd_length = info_len; + info_cd.cd_raw.iov_base = (char *)info; + info_cd.cd_raw.iov_len = info_len; + + for (i = 1; i <= N; i++) { + c = i; + + T_cd.cd_length = T_len; + T_cd.cd_raw.iov_len = T_len; + + ret = crypto_mac_init(&mech, &key, NULL, &ctx, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + ret = crypto_mac_update(ctx, &T_cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + ret = crypto_mac_update(ctx, &info_cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + ret = crypto_mac_update(ctx, &c_cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + T_len = SHA_256_DIGEST_LEN; + T_cd.cd_length = T_len; + T_cd.cd_raw.iov_len = T_len; + + ret = crypto_mac_final(ctx, &T_cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + bcopy(T, out_buf + pos, + (i != N) ? SHA_256_DIGEST_LEN : (out_len - pos)); + pos += SHA_256_DIGEST_LEN; + } + + return (0); + +error: + return (ret); +} + +/* + * HKDF is designed to be a relatively fast function for deriving keys from a + * master key + a salt. We use this function to generate new encryption keys + * so as to avoid hitting the cryptographic limits of the underlying + * encryption modes. Note that, for the sake of deriving encryption keys, the + * info parameter is called the "salt" everywhere else in the code. + */ +static int +hkdf_sha256(uint8_t *key_material, uint_t km_len, uint8_t *salt, + uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key, + uint_t out_len) +{ + int ret; + uint8_t extract_key[SHA_256_DIGEST_LEN]; + + ret = hkdf_sha256_extract(salt, salt_len, key_material, km_len, + extract_key); + if (ret != 0) + goto error; + + ret = hkdf_sha256_expand(extract_key, info, info_len, output_key, + out_len); + if (ret != 0) + goto error; + + return (0); + +error: + return (ret); +} + +void +zio_crypt_key_destroy(zio_crypt_key_t *key) +{ + rw_destroy(&key->zk_salt_lock); + + /* free crypto templates */ + crypto_destroy_ctx_template(key->zk_current_tmpl); + crypto_destroy_ctx_template(key->zk_hmac_tmpl); + + /* zero out sensitive data */ + bzero(key, sizeof (zio_crypt_key_t)); +} + +int +zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) +{ + int ret; + crypto_mechanism_t mech; + uint_t keydata_len; + + ASSERT(key != NULL); + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + + keydata_len = zio_crypt_table[crypt].ci_keylen; + + /* fill keydata buffers and salt with random data */ + ret = random_get_bytes(key->zk_master_keydata, keydata_len); + if (ret != 0) + goto error; + + ret = random_get_bytes(key->zk_hmac_keydata, HMAC_SHA256_KEYLEN); + if (ret != 0) + goto error; + + ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + /* derive the current key from the master key */ + ret = hkdf_sha256(key->zk_master_keydata, keydata_len, NULL, 0, + key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, + keydata_len); + if (ret != 0) + goto error; + + /* initialize keys for the ICP */ + key->zk_current_key.ck_format = CRYPTO_KEY_RAW; + key->zk_current_key.ck_data = key->zk_current_keydata; + key->zk_current_key.ck_length = BYTES_TO_BITS(keydata_len); + + key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; + key->zk_hmac_key.ck_data = &key->zk_hmac_key; + key->zk_hmac_key.ck_length = BYTES_TO_BITS(HMAC_SHA256_KEYLEN); + + /* + * Initialize the crypto templates. It's ok if this fails because + * this is just an optimization. + */ + mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + mech.cm_type = crypto_mech2id(SUN_CKM_SHA256_HMAC); + ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, + &key->zk_hmac_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_hmac_tmpl = NULL; + + key->zk_crypt = crypt; + key->zk_salt_count = 0; + rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); + + return (0); + +error: + zio_crypt_key_destroy(key); + return (ret); +} + +static int +zio_crypt_key_change_salt(zio_crypt_key_t *key) +{ + int ret; + uint8_t salt[ZIO_DATA_SALT_LEN]; + crypto_mechanism_t mech; + uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen; + + /* generate a new salt */ + ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + rw_enter(&key->zk_salt_lock, RW_WRITER); + + /* derive the current key from the master key and the new salt */ + ret = hkdf_sha256(key->zk_master_keydata, keydata_len, NULL, 0, + salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); + if (ret != 0) + goto error_unlock; + + /* assign the salt and reset the usage count */ + bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN); + key->zk_salt_count = 0; + + /* destroy the old context template and create the new one */ + crypto_destroy_ctx_template(key->zk_current_tmpl); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + rw_exit(&key->zk_salt_lock); + + return (0); + +error_unlock: + rw_exit(&key->zk_salt_lock); +error: + return (ret); +} + +/* See comment above ZIO_CRYPT_MAX_SALT_USAGE definition for details */ +int +zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt) +{ + int ret; + boolean_t salt_change; + + rw_enter(&key->zk_salt_lock, RW_READER); + + bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN); + salt_change = (atomic_inc_64_nv(&key->zk_salt_count) == + ZIO_CRYPT_MAX_SALT_USAGE); + + rw_exit(&key->zk_salt_lock); + + if (salt_change) { + ret = zio_crypt_key_change_salt(key); + if (ret != 0) + goto error; + } + + return (0); + +error: + return (ret); +} + +/* + * This function handles all encryption and decryption in zfs. When + * encrypting it expects puio to refernce the plaintext and cuio to + * have enough space for the ciphertext + room for a MAC. On decrypting + * it expects both puio and cuio to have enough room for a MAC, although + * the plaintext uio can be dsicarded afterwards. datalen should be the + * length of only the plaintext / ciphertext in either case. + */ +/* ARGSUSED */ +static int +zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key, + crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen, + uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len) +{ + int ret; + crypto_data_t plaindata, cipherdata; + CK_AES_CCM_PARAMS ccmp; + CK_AES_GCM_PARAMS gcmp; + crypto_mechanism_t mech; + zio_crypt_info_t crypt_info; + uint_t plain_full_len, maclen; + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW); + + /* lookup the encryption info */ + crypt_info = zio_crypt_table[crypt]; + + /* the mac will always be the last iovec_t in the cipher uio */ + maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len; + + ASSERT(maclen <= ZIO_DATA_MAC_LEN); + + /* setup encryption mechanism (same as crypt) */ + mech.cm_type = crypto_mech2id(crypt_info.ci_mechname); + + /* plain length will include the MAC if we are decrypting */ + if (encrypt) { + plain_full_len = datalen; + } else { + plain_full_len = datalen + maclen; + } + + /* + * setup encryption params (currently only AES CCM and AES GCM + * are supported) + */ + if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) { + ccmp.ulNonceSize = ZIO_DATA_IV_LEN; + ccmp.ulAuthDataSize = auth_len; + ccmp.authData = authbuf; + ccmp.ulMACSize = maclen; + ccmp.nonce = ivbuf; + ccmp.ulDataSize = plain_full_len; + + mech.cm_param = (char *)(&ccmp); + mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS); + } else { + gcmp.ulIvLen = ZIO_DATA_IV_LEN; + gcmp.ulIvBits = BYTES_TO_BITS(ZIO_DATA_IV_LEN); + gcmp.ulAADLen = auth_len; + gcmp.pAAD = authbuf; + gcmp.ulTagBits = BYTES_TO_BITS(maclen); + gcmp.pIv = ivbuf; + + mech.cm_param = (char *)(&gcmp); + mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS); + } + + /* populate the cipher and plain data structs. */ + plaindata.cd_format = CRYPTO_DATA_UIO; + plaindata.cd_offset = 0; + plaindata.cd_uio = puio; + plaindata.cd_miscdata = NULL; + plaindata.cd_length = plain_full_len; + + cipherdata.cd_format = CRYPTO_DATA_UIO; + cipherdata.cd_offset = 0; + cipherdata.cd_uio = cuio; + cipherdata.cd_miscdata = NULL; + cipherdata.cd_length = datalen + maclen; + + /* perform the actual encryption */ + if (encrypt) { + ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata, + NULL); + } else { + ret = crypto_decrypt(&mech, &cipherdata, key, tmpl, &plaindata, + NULL); + } + + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + return (0); + +error: + return (ret); +} + +int +zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out) +{ + int ret; + uio_t puio, cuio; + iovec_t plain_iovecs[2], cipher_iovecs[3]; + uint64_t crypt = key->zk_crypt; + uint_t enc_len, keydata_len; + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); + + keydata_len = zio_crypt_table[crypt].ci_keylen; + + /* generate iv for wrapping the master and hmac key */ + ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN); + if (ret != 0) + goto error; + + /* initialize uio_ts */ + plain_iovecs[0].iov_base = (char *)key->zk_master_keydata; + plain_iovecs[0].iov_len = keydata_len; + plain_iovecs[1].iov_base = (char *)key->zk_hmac_keydata; + plain_iovecs[1].iov_len = HMAC_SHA256_KEYLEN; + + cipher_iovecs[0].iov_base = (char *)keydata_out; + cipher_iovecs[0].iov_len = keydata_len; + cipher_iovecs[1].iov_base = (char *)hmac_keydata_out; + cipher_iovecs[1].iov_len = HMAC_SHA256_KEYLEN; + cipher_iovecs[2].iov_base = (char *)mac; + cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + + enc_len = zio_crypt_table[crypt].ci_keylen + HMAC_SHA256_KEYLEN; + puio.uio_iov = plain_iovecs; + puio.uio_iovcnt = 2; + puio.uio_segflg = UIO_SYSSPACE; + cuio.uio_iov = cipher_iovecs; + cuio.uio_iovcnt = 3; + cuio.uio_segflg = UIO_SYSSPACE; + + /* encrypt the keys and store the resulting ciphertext and mac */ + ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len, + &puio, &cuio, NULL, 0); + if (ret != 0) + goto error; + + return (0); + +error: + return (ret); +} + +int +zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint8_t *keydata, + uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, zio_crypt_key_t *key) +{ + int ret; + crypto_mechanism_t mech; + uio_t puio, cuio; + iovec_t plain_iovecs[3], cipher_iovecs[3]; + uint8_t outmac[WRAPPING_MAC_LEN]; + uint_t enc_len, keydata_len; + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); + + keydata_len = zio_crypt_table[crypt].ci_keylen; + + /* initialize uio_ts */ + plain_iovecs[0].iov_base = (char *)key->zk_master_keydata; + plain_iovecs[0].iov_len = keydata_len; + plain_iovecs[1].iov_base = (char *)key->zk_hmac_keydata; + plain_iovecs[1].iov_len = HMAC_SHA256_KEYLEN; + plain_iovecs[2].iov_base = (char *)outmac; + plain_iovecs[2].iov_len = WRAPPING_MAC_LEN; + + cipher_iovecs[0].iov_base = (char *)keydata; + cipher_iovecs[0].iov_len = keydata_len; + cipher_iovecs[1].iov_base = (char *)hmac_keydata; + cipher_iovecs[1].iov_len = HMAC_SHA256_KEYLEN; + cipher_iovecs[2].iov_base = (char *)mac; + cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + + enc_len = keydata_len + HMAC_SHA256_KEYLEN; + puio.uio_iov = plain_iovecs; + puio.uio_segflg = UIO_SYSSPACE; + puio.uio_iovcnt = 3; + cuio.uio_iov = cipher_iovecs; + cuio.uio_iovcnt = 3; + cuio.uio_segflg = UIO_SYSSPACE; + + /* decrypt the keys and store the result in the output buffers */ + ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len, + &puio, &cuio, NULL, 0); + if (ret != 0) + goto error; + + /* generate a fresh salt */ + ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + /* derive the current key from the master key */ + ret = hkdf_sha256(key->zk_master_keydata, keydata_len, NULL, 0, + key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, + keydata_len); + if (ret != 0) + goto error; + + /* initialize keys for ICP */ + key->zk_current_key.ck_format = CRYPTO_KEY_RAW; + key->zk_current_key.ck_data = key->zk_current_keydata; + key->zk_current_key.ck_length = BYTES_TO_BITS(keydata_len); + + key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; + key->zk_hmac_key.ck_data = key->zk_hmac_keydata; + key->zk_hmac_key.ck_length = BYTES_TO_BITS(HMAC_SHA256_KEYLEN); + + /* + * Initialize the crypto templates. It's ok if this fails because + * this is just an optimization. + */ + mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + mech.cm_type = crypto_mech2id(SUN_CKM_SHA256_HMAC); + ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, + &key->zk_hmac_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_hmac_tmpl = NULL; + + key->zk_crypt = crypt; + key->zk_salt_count = 0; + rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); + + return (0); + +error: + zio_crypt_key_destroy(key); + return (ret); +} + +int +zio_crypt_generate_iv(uint8_t *ivbuf) +{ + int ret; + + /* randomly generate the IV */ + ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN); + if (ret != 0) + goto error; + + return (0); + +error: + bzero(ivbuf, ZIO_DATA_IV_LEN); + return (ret); +} + +int +zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, + uint_t datalen, uint8_t *ivbuf, uint8_t *salt) +{ + int ret; + crypto_mechanism_t mech; + crypto_data_t in_data, digest_data; + uint8_t digestbuf[SHA_256_DIGEST_LEN]; + + /* initialize sha256-hmac mechanism and crypto data */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA256_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the crypto data */ + in_data.cd_format = CRYPTO_DATA_RAW; + in_data.cd_offset = 0; + in_data.cd_length = datalen; + in_data.cd_raw.iov_base = (char *)data; + in_data.cd_raw.iov_len = datalen; + + digest_data.cd_format = CRYPTO_DATA_RAW; + digest_data.cd_offset = 0; + digest_data.cd_length = SHA_256_DIGEST_LEN; + digest_data.cd_raw.iov_base = (char *)digestbuf; + digest_data.cd_raw.iov_len = SHA_256_DIGEST_LEN; + + /* generate the hmac */ + ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl, + &digest_data, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* truncate and copy the digest into the output buffer */ + bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN); + bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN); + + return (0); + +error: + return (ret); +} + +void +zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv) +{ + uint32_t val32; + + ASSERT(BP_IS_ENCRYPTED(bp)); + + bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); + bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, val32); +} + +void +zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv) +{ + uint64_t val64; + uint32_t val32; + + ASSERT(BP_IS_ENCRYPTED(bp)); + + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t)); + bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t)); + + val32 = (uint32_t)BP_GET_IV2(bp); + bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); + } else { + val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]); + bcopy(&val64, salt, sizeof (uint64_t)); + + val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]); + bcopy(&val64, iv, sizeof (uint64_t)); + + val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp)); + bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); + } +} + +void +zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac) +{ + ASSERT(BP_IS_ENCRYPTED(bp)); + + bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], + sizeof (uint64_t)); +} + +void +zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac) +{ + uint64_t val64; + + ASSERT(BP_IS_ENCRYPTED(bp)); + + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t)); + bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t), + sizeof (uint64_t)); + } else { + val64 = BSWAP_64(bp->blk_cksum.zc_word[2]); + bcopy(&val64, mac, sizeof (uint64_t)); + + val64 = BSWAP_64(bp->blk_cksum.zc_word[3]); + bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t)); + } +} + +void +zio_crypt_encode_mac_zil(void *data, uint8_t *mac) +{ + zil_chain_t *zilc = data; + + bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3], + sizeof (uint64_t)); +} + +void +zio_crypt_decode_mac_zil(const void *data, uint8_t *mac) +{ + /* + * The ZIL MAC is embedded in the block it protects, which will + * not have been byteswapped by the time this function has been called. + * As a result, we don't need to worry about byteswapping the MAC. + */ + const zil_chain_t *zilc = data; + + bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t)); + bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t), + sizeof (uint64_t)); +} + +/* + * This function is modeled off of zio_crypt_init_uios_dnode(). This function, + * however, copies bonus buffers instead of parsing them into a uio_t. + */ +void +zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen) +{ + uint_t i, max_dnp = datalen >> DNODE_SHIFT; + uint8_t *src; + dnode_phys_t *dnp, *sdnp, *ddnp; + + src = abd_borrow_buf_copy(src_abd, datalen); + + sdnp = (dnode_phys_t *)src; + ddnp = (dnode_phys_t *)dst; + + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + dnp = &sdnp[i]; + if (dnp->dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && + dnp->dn_bonuslen != 0) { + bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), + DN_MAX_BONUS_LEN(dnp)); + } + } + + abd_return_buf(src_abd, src, datalen); +} + +static void +zio_crypt_destroy_uio(uio_t *uio) +{ + if (uio->uio_iov) + kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t)); +} + +/* + * We do not check for the older zil chain because this feature was not + * available before the newer zil chain was introduced. The goal here + * is to encrypt everything except the blkptr_t of a lr_write_t and + * the zil_chain_t header. + */ + +/* ARGSUSED */ +static int +zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio, + uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) +{ + int ret; + boolean_t byteswap; + uint64_t txtype; + uint_t nr_src, nr_dst, lr_len, crypt_len; + uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; + iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; + uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp; + zil_chain_t *zilc; + lr_t *lr; + uint8_t *aadbuf = zio_buf_alloc(datalen); + + /* if we are decrypting, the plainbuffer needs an extra iovec */ + if (encrypt) { + src = plainbuf; + dst = cipherbuf; + nr_src = 0; + nr_dst = 1; + } else { + src = cipherbuf; + dst = plainbuf; + nr_src = 1; + nr_dst = 0; + } + + /* find the start and end record of the log block */ + zilc = (zil_chain_t *)src; + slrp = src + sizeof (zil_chain_t); + aadp = aadbuf; + + /* + * Determine if we need to byteswap values we use for parsing. If we + * are writing the data, the zec_magic value will not exist so we must + * be writing this block in native endianness. + */ + if (zilc->zc_eck.zec_magic == BSWAP_64(ZEC_MAGIC)) { + ASSERT(!encrypt); + byteswap = B_TRUE; + blkend = src + BSWAP_64(zilc->zc_nused); + } else { + byteswap = B_FALSE; + blkend = src + zilc->zc_nused; + } + + /* calculate the number of encrypted iovecs we will need */ + for (; slrp < blkend; slrp += lr_len) { + lr = (lr_t *)slrp; + + if (!byteswap) { + txtype = lr->lrc_txtype; + lr_len = lr->lrc_reclen; + } else { + txtype = BSWAP_64(lr->lrc_txtype); + lr_len = BSWAP_64(lr->lrc_reclen); + } + + nr_iovecs++; + if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t)) + nr_iovecs++; + } + + nr_src += nr_iovecs; + nr_dst += nr_iovecs; + + /* allocate the iovec arrays */ + if (nr_src != 0) { + src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP); + if (!src_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + if (nr_dst != 0) { + dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP); + if (!dst_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + /* + * Copy the plain zil header over and authenticate everything except + * the checksum that will store our MAC. If we are writing the data + * the embedded checksum will not have been calculated yet, so we don't + * authenticate that. + */ + bcopy(src, dst, sizeof (zil_chain_t)); + bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t)); + aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t); + aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t); + + /* loop over records again, filling in iovecs */ + nr_iovecs = 0; + slrp = src + sizeof (zil_chain_t); + dlrp = dst + sizeof (zil_chain_t); + + for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) { + lr = (lr_t *)slrp; + + if (!byteswap) { + txtype = lr->lrc_txtype; + lr_len = lr->lrc_reclen; + } else { + txtype = BSWAP_64(lr->lrc_txtype); + lr_len = BSWAP_64(lr->lrc_reclen); + } + + /* copy the common lr_t */ + bcopy(slrp, dlrp, sizeof (lr_t)); + bcopy(slrp, aadp, sizeof (lr_t)); + aadp += sizeof (lr_t); + aad_len += sizeof (lr_t); + + if (txtype == TX_WRITE) { + crypt_len = sizeof (lr_write_t) - + sizeof (lr_t) - sizeof (blkptr_t); + src_iovecs[nr_iovecs].iov_base = (char *)slrp + + sizeof (lr_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *)dlrp + + sizeof (lr_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + + /* copy the bp now since it will not be encrypted */ + bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), + dlrp + sizeof (lr_write_t) - sizeof (blkptr_t), + sizeof (blkptr_t)); + bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), + aadp, sizeof (blkptr_t)); + aadp += sizeof (blkptr_t); + aad_len += sizeof (blkptr_t); + nr_iovecs++; + total_len += crypt_len; + + if (lr_len != sizeof (lr_write_t)) { + crypt_len = lr_len - sizeof (lr_write_t); + src_iovecs[nr_iovecs].iov_base = (char *) + slrp + sizeof (lr_write_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *) + dlrp + sizeof (lr_write_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + nr_iovecs++; + total_len += crypt_len; + } + } else { + crypt_len = lr_len - sizeof (lr_t); + src_iovecs[nr_iovecs].iov_base = (char *)slrp + + sizeof (lr_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *)dlrp + + sizeof (lr_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + nr_iovecs++; + total_len += crypt_len; + } + } + + *no_crypt = (nr_iovecs == 0); + *enc_len = total_len; + *authbuf = aadbuf; + *auth_len = aad_len; + + if (encrypt) { + puio->uio_iov = src_iovecs; + puio->uio_iovcnt = nr_src; + cuio->uio_iov = dst_iovecs; + cuio->uio_iovcnt = nr_dst; + } else { + puio->uio_iov = dst_iovecs; + puio->uio_iovcnt = nr_dst; + cuio->uio_iov = src_iovecs; + cuio->uio_iovcnt = nr_src; + } + + return (0); + +error: + zio_buf_free(aadbuf, datalen); + if (src_iovecs != NULL) + kmem_free(src_iovecs, nr_src * sizeof (iovec_t)); + if (dst_iovecs != NULL) + kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t)); + + *enc_len = 0; + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +static int +zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio, + uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) +{ + int ret; + uint_t nr_src, nr_dst, crypt_len; + uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; + uint_t i, max_dnp = datalen >> DNODE_SHIFT; + iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; + uint8_t *src, *dst, *bonus, *bonus_end, *dn_end, *aadp; + dnode_phys_t *dnp, *sdnp, *ddnp; + uint8_t *aadbuf = zio_buf_alloc(datalen); + + if (encrypt) { + src = plainbuf; + dst = cipherbuf; + nr_src = 0; + nr_dst = 1; + } else { + src = cipherbuf; + dst = plainbuf; + nr_src = 1; + nr_dst = 0; + } + + sdnp = (dnode_phys_t *)src; + ddnp = (dnode_phys_t *)dst; + aadp = aadbuf; + + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + /* + * This block may still be byteswapped. However, all of the + * values we use are either uint8_t's (for which byteswapping + * is a noop) or a * != 0 check, which will work regardless + * of whether or not we byteswap. + */ + if (sdnp[i].dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) && + sdnp[i].dn_bonuslen != 0) { + nr_iovecs++; + } + } + + nr_src += nr_iovecs; + nr_dst += nr_iovecs; + + if (nr_src != 0) { + src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP); + if (!src_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + if (nr_dst != 0) { + dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP); + if (!dst_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + if (nr_iovecs == 0) { + /* XXX placeholder until full dnode authentication is added */ + uint64_t placeholder = 0x2f52f52f5ULL; + bcopy(&placeholder, aadp, sizeof (uint64_t)); + aad_len += sizeof (uint64_t); + goto out; + } + + nr_iovecs = 0; + + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + dnp = &sdnp[i]; + dn_end = (uint8_t *)(dnp + (dnp->dn_extra_slots + 1)); + if (dnp->dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && + dnp->dn_bonuslen != 0) { + bonus = (uint8_t *)DN_BONUS(dnp); + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + bonus_end = (uint8_t *)DN_SPILL_BLKPTR(dnp); + } else { + bonus_end = (uint8_t *)dn_end; + } + crypt_len = bonus_end - bonus; + + bcopy(dnp, &ddnp[i], bonus - (uint8_t *)dnp); + src_iovecs[nr_iovecs].iov_base = (void *)bonus; + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = + (void *)DN_BONUS(&ddnp[i]); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) + bcopy(bonus_end, DN_SPILL_BLKPTR(&ddnp[i]), + sizeof (blkptr_t)); + + nr_iovecs++; + total_len += crypt_len; + } else { + bcopy(dnp, &ddnp[i], dn_end - (uint8_t *)dnp); + } + } + +out: + *no_crypt = (nr_iovecs == 0); + *enc_len = total_len; + *authbuf = aadbuf; + *auth_len = aad_len; + + if (encrypt) { + puio->uio_iov = src_iovecs; + puio->uio_iovcnt = nr_src; + cuio->uio_iov = dst_iovecs; + cuio->uio_iovcnt = nr_dst; + } else { + puio->uio_iov = dst_iovecs; + puio->uio_iovcnt = nr_dst; + cuio->uio_iov = src_iovecs; + cuio->uio_iovcnt = nr_src; + } + + return (0); + +error: + zio_buf_free(aadbuf, datalen); + if (src_iovecs != NULL) + kmem_free(src_iovecs, nr_src * sizeof (iovec_t)); + if (dst_iovecs != NULL) + kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t)); + + *enc_len = 0; + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +/* ARGSUSED */ +static int +zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio, + uint_t *enc_len) +{ + int ret; + uint_t nr_plain = 1, nr_cipher = 2; + iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL; + + /* allocate the iovecs for the plain and cipher data */ + plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t), + KM_SLEEP); + if (!plain_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t), + KM_SLEEP); + if (!cipher_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + plain_iovecs[0].iov_base = (void *)plainbuf; + plain_iovecs[0].iov_len = datalen; + cipher_iovecs[0].iov_base = (void *)cipherbuf; + cipher_iovecs[0].iov_len = datalen; + + *enc_len = datalen; + puio->uio_iov = plain_iovecs; + puio->uio_iovcnt = nr_plain; + cuio->uio_iov = cipher_iovecs; + cuio->uio_iovcnt = nr_cipher; + + return (0); + +error: + if (plain_iovecs != NULL) + kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t)); + if (cipher_iovecs != NULL) + kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t)); + + *enc_len = 0; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +/* ARGSUSED */ +static int +zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, uint8_t *mac, uio_t *puio, uio_t *cuio, + uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) +{ + int ret; + iovec_t *mac_iov; + + ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE); + + /* route to handler */ + switch (ot) { + case DMU_OT_INTENT_LOG: + ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf, + datalen, puio, cuio, enc_len, authbuf, auth_len, no_crypt); + break; + case DMU_OT_DNODE: + ret = zio_crypt_init_uios_dnode(encrypt, plainbuf, cipherbuf, + datalen, puio, cuio, enc_len, authbuf, auth_len, no_crypt); + break; + default: + ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf, + datalen, puio, cuio, enc_len); + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + break; + } + + if (ret != 0) + goto error; + + /* populate the uios */ + puio->uio_segflg = UIO_SYSSPACE; + cuio->uio_segflg = UIO_SYSSPACE; + + mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]); + mac_iov->iov_base = (void *)mac; + mac_iov->iov_len = ZIO_DATA_MAC_LEN; + + return (0); + +error: + return (ret); +} + +/* + * Primary encryption / decryption entrypoint for zio data. + */ +int +zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + uint8_t *plainbuf, uint8_t *cipherbuf, boolean_t *no_crypt) +{ + int ret; + boolean_t locked = B_FALSE; + uint64_t crypt = key->zk_crypt; + uint_t keydata_len = zio_crypt_table[crypt].ci_keylen; + uint_t enc_len, auth_len; + uio_t puio, cuio; + uint8_t enc_keydata[MAX_MASTER_KEY_LEN]; + crypto_key_t tmp_ckey, *ckey = NULL; + crypto_ctx_template_t tmpl; + uint8_t *authbuf = NULL; + + bzero(&puio, sizeof (uio_t)); + bzero(&cuio, sizeof (uio_t)); + + /* create uios for encryption */ + ret = zio_crypt_init_uios(encrypt, ot, plainbuf, cipherbuf, datalen, + mac, &puio, &cuio, &enc_len, &authbuf, &auth_len, no_crypt); + if (ret != 0) + return (ret); + + /* + * If the needed key is the current one, just use it. Otherwise we + * need to generate a temporary one from the given salt + master key. + * If we are encrypting, we must return a copy of the current salt + * so that it can be stored in the blkptr_t. + */ + rw_enter(&key->zk_salt_lock, RW_READER); + locked = B_TRUE; + + if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) { + ckey = &key->zk_current_key; + tmpl = key->zk_current_tmpl; + } else { + rw_exit(&key->zk_salt_lock); + locked = B_FALSE; + + ret = hkdf_sha256(key->zk_master_keydata, keydata_len, NULL, 0, + salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len); + if (ret != 0) + goto error; + + tmp_ckey.ck_format = CRYPTO_KEY_RAW; + tmp_ckey.ck_data = enc_keydata; + tmp_ckey.ck_length = BYTES_TO_BITS(keydata_len); + + ckey = &tmp_ckey; + tmpl = NULL; + } + + /* perform the encryption / decryption */ + ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len, + &puio, &cuio, authbuf, auth_len); + if (ret != 0) + goto error; + + if (locked) { + rw_exit(&key->zk_salt_lock); + locked = B_FALSE; + } + + if (authbuf != NULL) + zio_buf_free(authbuf, datalen); + if (ckey == &tmp_ckey) + bzero(enc_keydata, keydata_len); + zio_crypt_destroy_uio(&puio); + zio_crypt_destroy_uio(&cuio); + + return (0); + +error: + if (locked) + rw_exit(&key->zk_salt_lock); + if (authbuf != NULL) + zio_buf_free(authbuf, datalen); + if (ckey == &tmp_ckey) + bzero(enc_keydata, keydata_len); + zio_crypt_destroy_uio(&puio); + zio_crypt_destroy_uio(&cuio); + + return (ret); +} + +/* + * Simple wrapper around zio_do_crypt_data() to work with abd's instead of + * linear buffers. + */ +int +zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + abd_t *pabd, abd_t *cabd, boolean_t *no_crypt) +{ + int ret; + void *ptmp, *ctmp; + + if (encrypt) { + ptmp = abd_borrow_buf_copy(pabd, datalen); + ctmp = abd_borrow_buf(cabd, datalen); + } else { + ptmp = abd_borrow_buf(pabd, datalen); + ctmp = abd_borrow_buf_copy(cabd, datalen); + } + + ret = zio_do_crypt_data(encrypt, key, salt, ot, iv, mac, + datalen, ptmp, ctmp, no_crypt); + if (ret != 0) + goto error; + + if (encrypt) { + abd_return_buf(pabd, ptmp, datalen); + abd_return_buf_copy(cabd, ctmp, datalen); + } else { + abd_return_buf_copy(pabd, ptmp, datalen); + abd_return_buf(cabd, ctmp, datalen); + } + + return (0); + +error: + if (encrypt) { + abd_return_buf(pabd, ptmp, datalen); + abd_return_buf_copy(cabd, ctmp, datalen); + } else { + abd_return_buf_copy(pabd, ptmp, datalen); + abd_return_buf(cabd, ctmp, datalen); + } + + return (ret); +} diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 0742f86322ca..ccc29c730207 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -495,7 +495,7 @@ zvol_create_minor(const char *name) } /* lie and say we're read-only */ - error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); + error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os); if (error) { mutex_exit(&zfsdev_state_lock); @@ -503,13 +503,13 @@ zvol_create_minor(const char *name) } if ((minor = zfsdev_minor_alloc()) == 0) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(ENXIO)); } if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -521,7 +521,7 @@ zvol_create_minor(const char *name) if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_soft_state_free(zfsdev_state, minor); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -532,7 +532,7 @@ zvol_create_minor(const char *name) minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_remove_minor_node(zfs_dip, chrbuf); ddi_soft_state_free(zfsdev_state, minor); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -562,7 +562,7 @@ zvol_create_minor(const char *name) else zil_replay(os, zv, zvol_replay_vector); } - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); zv->zv_objset = NULL; zvol_minors++; @@ -627,7 +627,7 @@ zvol_first_open(zvol_state_t *zv) uint64_t readonly; /* lie and say we're read-only */ - error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, + error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, B_TRUE, zvol_tag, &os); if (error) return (error); @@ -636,13 +636,13 @@ zvol_first_open(zvol_state_t *zv) error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) { ASSERT(error == 0); - dmu_objset_disown(os, zvol_tag); + dmu_objset_disown(os, 1, zvol_tag); return (error); } error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); if (error) { - dmu_objset_disown(os, zvol_tag); + dmu_objset_disown(os, 1, zvol_tag); return (error); } @@ -676,7 +676,7 @@ zvol_last_close(zvol_state_t *zv) txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); dmu_objset_evict_dbufs(zv->zv_objset); - dmu_objset_disown(zv->zv_objset, zvol_tag); + dmu_objset_disown(zv->zv_objset, 1, zvol_tag); zv->zv_objset = NULL; } @@ -844,7 +844,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) zv = zvol_minor_lookup(name); if (zv == NULL || zv->zv_objset == NULL) { - if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, + if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, B_TRUE, FTAG, &os)) != 0) { mutex_exit(&zfsdev_state_lock); return (error); @@ -866,7 +866,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) error = zvol_update_live_volsize(zv, volsize); out: if (owned) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); if (zv != NULL) zv->zv_objset = NULL; } diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 55f73868d642..7868f455b83f 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -159,6 +159,12 @@ typedef enum { ZFS_PROP_REDUNDANT_METADATA, ZFS_PROP_PREV_SNAP, ZFS_PROP_RECEIVE_RESUME_TOKEN, + ZFS_PROP_ENCRYPTION, + ZFS_PROP_KEYLOCATION, + ZFS_PROP_KEYFORMAT, + ZFS_PROP_PBKDF2_SALT, + ZFS_PROP_PBKDF2_ITERS, + ZFS_PROP_KEYSTATUS, ZFS_NUM_PROPS } zfs_prop_t; @@ -260,6 +266,8 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t); boolean_t zfs_prop_readonly(zfs_prop_t); boolean_t zfs_prop_inheritable(zfs_prop_t); boolean_t zfs_prop_setonce(zfs_prop_t); +boolean_t zfs_prop_encryption_key_param(zfs_prop_t); +boolean_t zfs_prop_valid_keylocation(const char *, boolean_t); const char *zfs_prop_to_name(zfs_prop_t); zfs_prop_t zfs_name_to_prop(const char *); boolean_t zfs_prop_user(const char *); @@ -360,6 +368,30 @@ typedef enum { ZFS_REDUNDANT_METADATA_MOST } zfs_redundant_metadata_type_t; +typedef enum zfs_keystatus { + ZFS_KEYSTATUS_NONE = 0, + ZFS_KEYSTATUS_UNAVAILABLE, + ZFS_KEYSTATUS_AVAILABLE +} zfs_keystatus_t; + +typedef enum zfs_keyformat { + ZFS_KEYFORMAT_NONE = 0, + ZFS_KEYFORMAT_RAW, + ZFS_KEYFORMAT_HEX, + ZFS_KEYFORMAT_PASSPHRASE, + ZFS_KEYFORMAT_FORMATS +} zfs_keyformat_t; + +typedef enum zfs_key_location { + ZFS_KEYLOCATION_NONE, + ZFS_KEYLOCATION_PROMPT, + ZFS_KEYLOCATION_URI, + ZFS_KEYLOCATION_LOCATIONS +} zfs_keylocation_t; + +#define DEFAULT_PBKDF2_ITERATIONS 350000 +#define MIN_PBKDF2_ITERATIONS 100000 + /* * On-disk version number. */ @@ -873,6 +905,9 @@ typedef enum zfs_ioc { ZFS_IOC_BOOKMARK, ZFS_IOC_GET_BOOKMARKS, ZFS_IOC_DESTROY_BOOKMARKS, + ZFS_IOC_LOAD_KEY, + ZFS_IOC_UNLOAD_KEY, + ZFS_IOC_CHANGE_KEY, ZFS_IOC_LAST } zfs_ioc_t; @@ -918,6 +953,12 @@ typedef enum { #define ZPOOL_HIST_DSNAME "dsname" #define ZPOOL_HIST_DSID "dsid" +/* + * Special nvlist name that will not have its args recorded in the pool's + * history log. + */ +#define ZPOOL_HIDDEN_ARGS "hidden_args" + /* * Flags for ZFS_IOC_VDEV_SET_STATE */ @@ -935,6 +976,7 @@ typedef enum { #define ZFS_IMPORT_ANY_HOST 0x2 #define ZFS_IMPORT_MISSING_LOG 0x4 #define ZFS_IMPORT_ONLY 0x8 +#define ZFS_IMPORT_LOAD_KEYS 0x20 /* * Sysevent payload members. ZFS will generate the following sysevents with the diff --git a/usr/src/uts/common/sys/mount.h b/usr/src/uts/common/sys/mount.h index 8b6055a65294..97a52b165adf 100644 --- a/usr/src/uts/common/sys/mount.h +++ b/usr/src/uts/common/sys/mount.h @@ -58,6 +58,12 @@ extern "C" { #define MS_SYSSPACE 0x0008 /* Mounta already in kernel space */ #define MS_NOSPLICE 0x1000 /* Don't splice fs instance into name space */ #define MS_NOCHECK 0x2000 /* Clustering: suppress mount busy checks */ +/* + * MS_CRYPT indicates that encryption keys should be loaded if they are not + * already available. This is not defined in glibc, but it is never seen by + * the kernel so it will not cause any problems. + */ +#define MS_CRYPT 0x4000 /* * Mask to sift out flag bits allowable from mount(2). */