summaryrefslogtreecommitdiff
path: root/usr.sbin/fstyp
diff options
context:
space:
mode:
authorPedro F. Giffuni <pfg@FreeBSD.org>2019-12-24 19:00:20 +0000
committerPedro F. Giffuni <pfg@FreeBSD.org>2019-12-24 19:00:20 +0000
commit509798ea65f3bb459f4262e7f5c2c1bfeb5051d7 (patch)
tree3e24813dca58d36e0a0021db3d7de47e4df31cc7 /usr.sbin/fstyp
parent7e1b379e1e9be2f2c475281b6bb53ae48f104221 (diff)
downloadsrc-test2-509798ea65f3bb459f4262e7f5c2c1bfeb5051d7.tar.gz
src-test2-509798ea65f3bb459f4262e7f5c2c1bfeb5051d7.zip
Notes
Diffstat (limited to 'usr.sbin/fstyp')
-rw-r--r--usr.sbin/fstyp/Makefile3
-rw-r--r--usr.sbin/fstyp/fstyp.86
-rw-r--r--usr.sbin/fstyp/fstyp.c2
-rw-r--r--usr.sbin/fstyp/fstyp.h2
-rw-r--r--usr.sbin/fstyp/hammer.c198
-rw-r--r--usr.sbin/fstyp/hammer2.c152
-rw-r--r--usr.sbin/fstyp/hammer2_disk.h1390
-rw-r--r--usr.sbin/fstyp/hammer_disk.h1091
8 files changed, 2842 insertions, 2 deletions
diff --git a/usr.sbin/fstyp/Makefile b/usr.sbin/fstyp/Makefile
index 7915147edac7..088209ec785f 100644
--- a/usr.sbin/fstyp/Makefile
+++ b/usr.sbin/fstyp/Makefile
@@ -3,7 +3,8 @@
.include <src.opts.mk>
PROG= fstyp
-SRCS= apfs.c cd9660.c exfat.c ext2fs.c fstyp.c geli.c hfsplus.c msdosfs.c ntfs.c ufs.c
+SRCS= apfs.c cd9660.c exfat.c ext2fs.c fstyp.c geli.c hammer.c \
+ hammer2.c hfsplus.c msdosfs.c ntfs.c ufs.c
.if ${MK_ZFS} != "no"
SRCS += zfs.c
diff --git a/usr.sbin/fstyp/fstyp.8 b/usr.sbin/fstyp/fstyp.8
index cb8f97f85588..961a45705c0c 100644
--- a/usr.sbin/fstyp/fstyp.8
+++ b/usr.sbin/fstyp/fstyp.8
@@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd April 26, 2017
+.Dd December 24, 2019
.Dt FSTYP 8
.Os
.Sh NAME
@@ -68,6 +68,10 @@ ext2fs
.It
geli
.It
+hammer
+.It
+hammer2
+.It
msdosfs
.It
ntfs
diff --git a/usr.sbin/fstyp/fstyp.c b/usr.sbin/fstyp/fstyp.c
index 632fbcb2623c..790ec2e1d58c 100644
--- a/usr.sbin/fstyp/fstyp.c
+++ b/usr.sbin/fstyp/fstyp.c
@@ -69,6 +69,8 @@ static struct {
{ "exfat", &fstyp_exfat, false, EXFAT_ENC },
{ "ext2fs", &fstyp_ext2fs, false, NULL },
{ "geli", &fstyp_geli, true, NULL },
+ { "hammer", &fstyp_hammer, true, NULL },
+ { "hammer2", &fstyp_hammer2, true, NULL },
{ "hfs+", &fstyp_hfsp, false, NULL },
{ "msdosfs", &fstyp_msdosfs, false, NULL },
{ "ntfs", &fstyp_ntfs, false, NTFS_ENC },
diff --git a/usr.sbin/fstyp/fstyp.h b/usr.sbin/fstyp/fstyp.h
index 2f331f63d119..f2a1bd447e9e 100644
--- a/usr.sbin/fstyp/fstyp.h
+++ b/usr.sbin/fstyp/fstyp.h
@@ -55,6 +55,8 @@ int fstyp_cd9660(FILE *fp, char *label, size_t size);
int fstyp_exfat(FILE *fp, char *label, size_t size);
int fstyp_ext2fs(FILE *fp, char *label, size_t size);
int fstyp_geli(FILE *fp, char *label, size_t size);
+int fstyp_hammer(FILE *fp, char *label, size_t size);
+int fstyp_hammer2(FILE *fp, char *label, size_t size);
int fstyp_hfsp(FILE *fp, char *label, size_t size);
int fstyp_msdosfs(FILE *fp, char *label, size_t size);
int fstyp_ntfs(FILE *fp, char *label, size_t size);
diff --git a/usr.sbin/fstyp/hammer.c b/usr.sbin/fstyp/hammer.c
new file mode 100644
index 000000000000..dc41166938dd
--- /dev/null
+++ b/usr.sbin/fstyp/hammer.c
@@ -0,0 +1,198 @@
+/*-
+ * Copyright (c) 2016 The DragonFly Project
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <assert.h>
+
+#include <sys/types.h>
+
+#include "hammer_disk.h"
+
+#include "fstyp.h"
+
+static hammer_volume_ondisk_t
+__read_ondisk(FILE *fp)
+{
+ hammer_volume_ondisk_t ondisk;
+
+ ondisk = read_buf(fp, 0, sizeof(*ondisk));
+ if (ondisk == NULL)
+ err(1, "failed to read ondisk");
+
+ return (ondisk);
+}
+
+static int
+__test_ondisk(const hammer_volume_ondisk_t ondisk)
+{
+ static int count = 0;
+ static hammer_uuid_t fsid, fstype;
+ static char label[64];
+
+ if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME &&
+ ondisk->vol_signature != HAMMER_FSBUF_VOLUME_REV)
+ return (1);
+ if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO)
+ return (2);
+ if (ondisk->vol_no < 0 || ondisk->vol_no > HAMMER_MAX_VOLUMES - 1)
+ return (3);
+ if (ondisk->vol_count < 1 || ondisk->vol_count > HAMMER_MAX_VOLUMES)
+ return (4);
+
+ if (count == 0) {
+ count = ondisk->vol_count;
+ assert(count != 0);
+ memcpy(&fsid, &ondisk->vol_fsid, sizeof(fsid));
+ memcpy(&fstype, &ondisk->vol_fstype, sizeof(fstype));
+ strncpy(label, ondisk->vol_label, sizeof(label));
+ } else {
+ if (ondisk->vol_count != count)
+ return (5);
+ if (memcmp(&ondisk->vol_fsid, &fsid, sizeof(fsid)))
+ return (6);
+ if (memcmp(&ondisk->vol_fstype, &fstype, sizeof(fstype)))
+ return (7);
+ if (strncmp(ondisk->vol_label, label, sizeof(label)))
+ return (8);
+ }
+
+ return (0);
+}
+
+int
+fstyp_hammer(FILE *fp, char *label, size_t size)
+{
+ hammer_volume_ondisk_t ondisk;
+ int error = 1;
+
+ ondisk = __read_ondisk(fp);
+ if (ondisk->vol_no != HAMMER_ROOT_VOLNO)
+ goto done;
+ if (ondisk->vol_count != 1)
+ goto done;
+ if (__test_ondisk(ondisk))
+ goto done;
+
+ strlcpy(label, ondisk->vol_label, size);
+ error = 0;
+done:
+ free(ondisk);
+ return (error);
+}
+
+static int
+__test_volume(const char *volpath)
+{
+ hammer_volume_ondisk_t ondisk;
+ FILE *fp;
+ int volno = -1;
+
+ if ((fp = fopen(volpath, "r")) == NULL)
+ err(1, "failed to open %s", volpath);
+
+ ondisk = __read_ondisk(fp);
+ fclose(fp);
+ if (__test_ondisk(ondisk))
+ goto done;
+
+ volno = ondisk->vol_no;
+done:
+ free(ondisk);
+ return (volno);
+}
+
+static int
+__fsvtyp_hammer(const char *blkdevs, char *label, size_t size, int partial)
+{
+ hammer_volume_ondisk_t ondisk;
+ FILE *fp;
+ char *dup, *p, *volpath, x[HAMMER_MAX_VOLUMES];
+ int i, volno, error = 1;
+
+ memset(x, 0, sizeof(x));
+ dup = strdup(blkdevs);
+ p = dup;
+
+ while (p) {
+ volpath = p;
+ if ((p = strchr(p, ':')) != NULL)
+ *p++ = '\0';
+ if ((volno = __test_volume(volpath)) == -1)
+ break;
+ x[volno]++;
+ }
+
+ if ((fp = fopen(volpath, "r")) == NULL)
+ err(1, "failed to open %s", volpath);
+ ondisk = __read_ondisk(fp);
+ fclose(fp);
+
+ free(dup);
+
+ if (volno == -1)
+ goto done;
+ if (partial)
+ goto success;
+
+ for (i = 0; i < HAMMER_MAX_VOLUMES; i++)
+ if (x[i] > 1)
+ goto done;
+ for (i = 0; i < HAMMER_MAX_VOLUMES; i++)
+ if (x[i] == 0)
+ break;
+ if (ondisk->vol_count != i)
+ goto done;
+ for (; i < HAMMER_MAX_VOLUMES; i++)
+ if (x[i] != 0)
+ goto done;
+success:
+ strlcpy(label, ondisk->vol_label, size);
+ error = 0;
+done:
+ free(ondisk);
+ return (error);
+}
+
+int
+fsvtyp_hammer(const char *blkdevs, char *label, size_t size)
+{
+ return (__fsvtyp_hammer(blkdevs, label, size, 0));
+}
+
+int
+fsvtyp_hammer_partial(const char *blkdevs, char *label, size_t size)
+{
+ return (__fsvtyp_hammer(blkdevs, label, size, 1));
+}
diff --git a/usr.sbin/fstyp/hammer2.c b/usr.sbin/fstyp/hammer2.c
new file mode 100644
index 000000000000..a5892a148711
--- /dev/null
+++ b/usr.sbin/fstyp/hammer2.c
@@ -0,0 +1,152 @@
+/*-
+ * Copyright (c) 2017-2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+
+#include <sys/types.h>
+
+#include "hammer2_disk.h"
+
+#include "fstyp.h"
+
+static hammer2_volume_data_t*
+__read_voldata(FILE *fp)
+{
+ hammer2_volume_data_t *voldata;
+
+ voldata = read_buf(fp, 0, sizeof(*voldata));
+ if (voldata == NULL)
+ err(1, "failed to read volume data");
+
+ return (voldata);
+}
+
+static int
+__test_voldata(const hammer2_volume_data_t *voldata)
+{
+ if (voldata->magic != HAMMER2_VOLUME_ID_HBO &&
+ voldata->magic != HAMMER2_VOLUME_ID_ABO)
+ return (1);
+
+ return (0);
+}
+
+static int
+__read_label(FILE *fp, char *label, size_t size)
+{
+ hammer2_blockref_t broot, best, *bref;
+ hammer2_media_data_t *vols[HAMMER2_NUM_VOLHDRS], *media;
+ hammer2_off_t io_off, io_base;
+ size_t bytes, io_bytes, boff;
+ int i, best_i, error = 0;
+
+ best_i = -1;
+ memset(&best, 0, sizeof(best));
+
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) {
+ memset(&broot, 0, sizeof(broot));
+ broot.type = HAMMER2_BREF_TYPE_VOLUME;
+ broot.data_off = (i * HAMMER2_ZONE_BYTES64) | HAMMER2_PBUFRADIX;
+ vols[i] = read_buf(fp, broot.data_off & ~HAMMER2_OFF_MASK_RADIX,
+ sizeof(*vols[i]));
+ broot.mirror_tid = vols[i]->voldata.mirror_tid;
+ if (best_i < 0 || best.mirror_tid < broot.mirror_tid) {
+ best_i = i;
+ best = broot;
+ }
+ }
+ if (best_i == -1) {
+ warnx("Failed to find volume header from zones");
+ error = 1;
+ goto done;
+ }
+
+ bref = &vols[best_i]->voldata.sroot_blockset.blockref[0];
+ if (bref->type != HAMMER2_BREF_TYPE_INODE) {
+ warnx("Superroot blockref type is not inode");
+ error = 2;
+ goto done;
+ }
+
+ bytes = bref->data_off & HAMMER2_OFF_MASK_RADIX;
+ if (bytes)
+ bytes = (size_t)1 << bytes;
+ if (bytes != sizeof(hammer2_inode_data_t)) {
+ warnx("Superroot blockref size does not match inode size");
+ error = 3;
+ goto done;
+ }
+
+ io_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
+ io_base = io_off & ~(hammer2_off_t)(HAMMER2_MINIOSIZE - 1);
+ boff = io_off - io_base;
+
+ io_bytes = HAMMER2_MINIOSIZE;
+ while (io_bytes + boff < bytes)
+ io_bytes <<= 1;
+ if (io_bytes > sizeof(*media)) {
+ warnx("Invalid I/O bytes");
+ error = 4;
+ goto done;
+ }
+
+ media = read_buf(fp, io_base, io_bytes);
+ if (boff)
+ memcpy(media, (char*)media + boff, bytes);
+
+ strlcpy(label, (char*)media->ipdata.filename, size);
+ free(media);
+done:
+ for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++)
+ free(vols[i]);
+
+ return (error);
+}
+
+int
+fstyp_hammer2(FILE *fp, char *label, size_t size)
+{
+ hammer2_volume_data_t *voldata;
+ int error = 1;
+
+ voldata = __read_voldata(fp);
+ if (__test_voldata(voldata))
+ goto done;
+
+ error = __read_label(fp, label, size);
+done:
+ free(voldata);
+ return (error);
+}
diff --git a/usr.sbin/fstyp/hammer2_disk.h b/usr.sbin/fstyp/hammer2_disk.h
new file mode 100644
index 000000000000..89afafbdc9a9
--- /dev/null
+++ b/usr.sbin/fstyp/hammer2_disk.h
@@ -0,0 +1,1390 @@
+/*-
+ * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _HAMMER2_DISK_H_
+#define _HAMMER2_DISK_H_
+
+#ifndef _SYS_UUID_H_
+#include <sys/uuid.h>
+#endif
+#ifndef _SYS_DMSG_H_
+/*
+ * dmsg_hdr must be 64 bytes
+ */
+struct dmsg_hdr {
+ uint16_t magic; /* 00 sanity, synchro, endian */
+ uint16_t reserved02; /* 02 */
+ uint32_t salt; /* 04 random salt helps w/crypto */
+
+ uint64_t msgid; /* 08 message transaction id */
+ uint64_t circuit; /* 10 circuit id or 0 */
+ uint64_t reserved18; /* 18 */
+
+ uint32_t cmd; /* 20 flags | cmd | hdr_size / ALIGN */
+ uint32_t aux_crc; /* 24 auxillary data crc */
+ uint32_t aux_bytes; /* 28 auxillary data length (bytes) */
+ uint32_t error; /* 2C error code or 0 */
+ uint64_t aux_descr; /* 30 negotiated OOB data descr */
+ uint32_t reserved38; /* 38 */
+ uint32_t hdr_crc; /* 3C (aligned) extended header crc */
+};
+
+typedef struct dmsg_hdr dmsg_hdr_t;
+#endif
+
+/*
+ * The structures below represent the on-disk media structures for the HAMMER2
+ * filesystem. Note that all fields for on-disk structures are naturally
+ * aligned. The host endian format is typically used - compatibility is
+ * possible if the implementation detects reversed endian and adjusts accesses
+ * accordingly.
+ *
+ * HAMMER2 primarily revolves around the directory topology: inodes,
+ * directory entries, and block tables. Block device buffer cache buffers
+ * are always 64KB. Logical file buffers are typically 16KB. All data
+ * references utilize 64-bit byte offsets.
+ *
+ * Free block management is handled independently using blocks reserved by
+ * the media topology.
+ */
+
+/*
+ * The data at the end of a file or directory may be a fragment in order
+ * to optimize storage efficiency. The minimum fragment size is 1KB.
+ * Since allocations are in powers of 2 fragments must also be sized in
+ * powers of 2 (1024, 2048, ... 65536).
+ *
+ * For the moment the maximum allocation size is HAMMER2_PBUFSIZE (64K),
+ * which is 2^16. Larger extents may be supported in the future. Smaller
+ * fragments might be supported in the future (down to 64 bytes is possible),
+ * but probably will not be.
+ *
+ * A full indirect block use supports 512 x 128-byte blockrefs in a 64KB
+ * buffer. Indirect blocks down to 1KB are supported to keep small
+ * directories small.
+ *
+ * A maximally sized file (2^64-1 bytes) requires ~6 indirect block levels
+ * using 64KB indirect blocks (128 byte refs, 512 or radix 9 per indblk).
+ *
+ * 16(datablk) + 9 + 9 + 9 + 9 + 9 + 9 = ~70.
+ * 16(datablk) + 7 + 9 + 9 + 9 + 9 + 9 = ~68. (smaller top level indblk)
+ *
+ * The actual depth depends on copies redundancy and whether the filesystem
+ * has chosen to use a smaller indirect block size at the top level or not.
+ */
+#define HAMMER2_ALLOC_MIN 1024 /* minimum allocation size */
+#define HAMMER2_RADIX_MIN 10 /* minimum allocation size 2^N */
+#define HAMMER2_ALLOC_MAX 65536 /* maximum allocation size */
+#define HAMMER2_RADIX_MAX 16 /* maximum allocation size 2^N */
+#define HAMMER2_RADIX_KEY 64 /* number of bits in key */
+
+/*
+ * MINALLOCSIZE - The minimum allocation size. This can be smaller
+ * or larger than the minimum physical IO size.
+ *
+ * NOTE: Should not be larger than 1K since inodes
+ * are 1K.
+ *
+ * MINIOSIZE - The minimum IO size. This must be less than
+ * or equal to HAMMER2_LBUFSIZE.
+ *
+ * HAMMER2_LBUFSIZE - Nominal buffer size for I/O rollups.
+ *
+ * HAMMER2_PBUFSIZE - Topological block size used by files for all
+ * blocks except the block straddling EOF.
+ *
+ * HAMMER2_SEGSIZE - Allocation map segment size, typically 4MB
+ * (space represented by a level0 bitmap).
+ */
+
+#define HAMMER2_SEGSIZE (1 << HAMMER2_FREEMAP_LEVEL0_RADIX)
+#define HAMMER2_SEGRADIX HAMMER2_FREEMAP_LEVEL0_RADIX
+
+#define HAMMER2_PBUFRADIX 16 /* physical buf (1<<16) bytes */
+#define HAMMER2_PBUFSIZE 65536
+#define HAMMER2_LBUFRADIX 14 /* logical buf (1<<14) bytes */
+#define HAMMER2_LBUFSIZE 16384
+
+/*
+ * Generally speaking we want to use 16K and 64K I/Os
+ */
+#define HAMMER2_MINIORADIX HAMMER2_LBUFRADIX
+#define HAMMER2_MINIOSIZE HAMMER2_LBUFSIZE
+
+#define HAMMER2_IND_BYTES_MIN 4096
+#define HAMMER2_IND_BYTES_NOM HAMMER2_LBUFSIZE
+#define HAMMER2_IND_BYTES_MAX HAMMER2_PBUFSIZE
+#define HAMMER2_IND_RADIX_MIN 12
+#define HAMMER2_IND_RADIX_NOM HAMMER2_LBUFRADIX
+#define HAMMER2_IND_RADIX_MAX HAMMER2_PBUFRADIX
+#define HAMMER2_IND_COUNT_MIN (HAMMER2_IND_BYTES_MIN / \
+ sizeof(hammer2_blockref_t))
+#define HAMMER2_IND_COUNT_MAX (HAMMER2_IND_BYTES_MAX / \
+ sizeof(hammer2_blockref_t))
+
+/*
+ * In HAMMER2, arrays of blockrefs are fully set-associative, meaning that
+ * any element can occur at any index and holes can be anywhere. As a
+ * future optimization we will be able to flag that such arrays are sorted
+ * and thus optimize lookups, but for now we don't.
+ *
+ * Inodes embed either 512 bytes of direct data or an array of 4 blockrefs,
+ * resulting in highly efficient storage for files <= 512 bytes and for files
+ * <= 512KB. Up to 4 directory entries can be referenced from a directory
+ * without requiring an indirect block.
+ *
+ * Indirect blocks are typically either 4KB (64 blockrefs / ~4MB represented),
+ * or 64KB (1024 blockrefs / ~64MB represented).
+ */
+#define HAMMER2_SET_RADIX 2 /* radix 2 = 4 entries */
+#define HAMMER2_SET_COUNT (1 << HAMMER2_SET_RADIX)
+#define HAMMER2_EMBEDDED_BYTES 512 /* inode blockset/dd size */
+#define HAMMER2_EMBEDDED_RADIX 9
+
+#define HAMMER2_PBUFMASK (HAMMER2_PBUFSIZE - 1)
+#define HAMMER2_LBUFMASK (HAMMER2_LBUFSIZE - 1)
+#define HAMMER2_SEGMASK (HAMMER2_SEGSIZE - 1)
+
+#define HAMMER2_LBUFMASK64 ((hammer2_off_t)HAMMER2_LBUFMASK)
+#define HAMMER2_PBUFSIZE64 ((hammer2_off_t)HAMMER2_PBUFSIZE)
+#define HAMMER2_PBUFMASK64 ((hammer2_off_t)HAMMER2_PBUFMASK)
+#define HAMMER2_SEGSIZE64 ((hammer2_off_t)HAMMER2_SEGSIZE)
+#define HAMMER2_SEGMASK64 ((hammer2_off_t)HAMMER2_SEGMASK)
+
+#define HAMMER2_UUID_STRING "5cbb9ad1-862d-11dc-a94d-01301bb8a9f5"
+
+/*
+ * A 4MB segment is reserved at the beginning of each 2GB zone. This segment
+ * contains the volume header (or backup volume header), the free block
+ * table, and possibly other information in the future. A 4MB segment for
+ * freemap is reserved at the beginning of every 1GB.
+ *
+ * 4MB = 64 x 64K blocks. Each 4MB segment is broken down as follows:
+ *
+ * ==========
+ * 0 volume header (for the first four 2GB zones)
+ * 1 freemap00 level1 FREEMAP_LEAF (256 x 128B bitmap data per 1GB)
+ * 2 level2 FREEMAP_NODE (256 x 128B indirect block per 256GB)
+ * 3 level3 FREEMAP_NODE (256 x 128B indirect block per 64TB)
+ * 4 level4 FREEMAP_NODE (256 x 128B indirect block per 16PB)
+ * 5 level5 FREEMAP_NODE (256 x 128B indirect block per 4EB)
+ * 6 freemap01 level1 (rotation)
+ * 7 level2
+ * 8 level3
+ * 9 level4
+ * 10 level5
+ * 11 freemap02 level1 (rotation)
+ * 12 level2
+ * 13 level3
+ * 14 level4
+ * 15 level5
+ * 16 freemap03 level1 (rotation)
+ * 17 level2
+ * 18 level3
+ * 19 level4
+ * 20 level5
+ * 21 freemap04 level1 (rotation)
+ * 22 level2
+ * 23 level3
+ * 24 level4
+ * 25 level5
+ * 26 freemap05 level1 (rotation)
+ * 27 level2
+ * 28 level3
+ * 29 level4
+ * 30 level5
+ * 31 freemap06 level1 (rotation)
+ * 32 level2
+ * 33 level3
+ * 34 level4
+ * 35 level5
+ * 36 freemap07 level1 (rotation)
+ * 37 level2
+ * 38 level3
+ * 39 level4
+ * 40 level5
+ * 41 unused
+ * .. unused
+ * 63 unused
+ * ==========
+ *
+ * The first four 2GB zones contain volume headers and volume header backups.
+ * After that the volume header block# is reserved for future use. Similarly,
+ * there are many blocks related to various Freemap levels which are not
+ * used in every segment and those are also reserved for future use.
+ * Note that each FREEMAP_LEAF or FREEMAP_NODE uses 32KB out of 64KB slot.
+ *
+ * Freemap (see the FREEMAP document)
+ *
+ * The freemap utilizes blocks #1-40 in 8 sets of 5 blocks. Each block in
+ * a set represents a level of depth in the freemap topology. Eight sets
+ * exist to prevent live updates from disturbing the state of the freemap
+ * were a crash/reboot to occur. That is, a live update is not committed
+ * until the update's flush reaches the volume root. There are FOUR volume
+ * roots representing the last four synchronization points, so the freemap
+ * must be consistent no matter which volume root is chosen by the mount
+ * code.
+ *
+ * Each freemap set is 5 x 64K blocks and represents the 1GB, 256GB, 64TB,
+ * 16PB and 4EB indirect map. The volume header itself has a set of 4 freemap
+ * blockrefs representing another 2 bits, giving us a total 64 bits of
+ * representable address space.
+ *
+ * The Level 0 64KB block represents 1GB of storage represented by 32KB
+ * (256 x struct hammer2_bmap_data). Each structure represents 4MB of storage
+ * and has a 512 bit bitmap, using 2 bits to represent a 16KB chunk of
+ * storage. These 2 bits represent the following states:
+ *
+ * 00 Free
+ * 01 (reserved) (Possibly partially allocated)
+ * 10 Possibly free
+ * 11 Allocated
+ *
+ * One important thing to note here is that the freemap resolution is 16KB,
+ * but the minimum storage allocation size is 1KB. The hammer2 vfs keeps
+ * track of sub-allocations in memory, which means that on a unmount or reboot
+ * the entire 16KB of a partially allocated block will be considered fully
+ * allocated. It is possible for fragmentation to build up over time, but
+ * defragmentation is fairly easy to accomplish since all modifications
+ * allocate a new block.
+ *
+ * The Second thing to note is that due to the way snapshots and inode
+ * replication works, deleting a file cannot immediately free the related
+ * space. Furthermore, deletions often do not bother to traverse the
+ * block subhierarchy being deleted. And to go even further, whole
+ * sub-directory trees can be deleted simply by deleting the directory inode
+ * at the top. So even though we have a symbol to represent a 'possibly free'
+ * block (binary 10), only the bulk free scanning code can actually use it.
+ * Normal 'rm's or other deletions do not.
+ *
+ * WARNING! ZONE_SEG and VOLUME_ALIGN must be a multiple of 1<<LEVEL0_RADIX
+ * (i.e. a multiple of 4MB). VOLUME_ALIGN must be >= ZONE_SEG.
+ *
+ * In Summary:
+ *
+ * (1) Modifications to freemap blocks 'allocate' a new copy (aka use a block
+ * from the next set). The new copy is reused until a flush occurs at
+ * which point the next modification will then rotate to the next set.
+ */
+#define HAMMER2_VOLUME_ALIGN (8 * 1024 * 1024)
+#define HAMMER2_VOLUME_ALIGN64 ((hammer2_off_t)HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_VOLUME_ALIGNMASK (HAMMER2_VOLUME_ALIGN - 1)
+#define HAMMER2_VOLUME_ALIGNMASK64 ((hammer2_off_t)HAMMER2_VOLUME_ALIGNMASK)
+
+#define HAMMER2_NEWFS_ALIGN (HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_NEWFS_ALIGN64 ((hammer2_off_t)HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_NEWFS_ALIGNMASK (HAMMER2_VOLUME_ALIGN - 1)
+#define HAMMER2_NEWFS_ALIGNMASK64 ((hammer2_off_t)HAMMER2_NEWFS_ALIGNMASK)
+
+#define HAMMER2_ZONE_BYTES64 (2LLU * 1024 * 1024 * 1024)
+#define HAMMER2_ZONE_MASK64 (HAMMER2_ZONE_BYTES64 - 1)
+#define HAMMER2_ZONE_SEG (4 * 1024 * 1024)
+#define HAMMER2_ZONE_SEG64 ((hammer2_off_t)HAMMER2_ZONE_SEG)
+#define HAMMER2_ZONE_BLOCKS_SEG (HAMMER2_ZONE_SEG / HAMMER2_PBUFSIZE)
+
+#define HAMMER2_ZONE_FREEMAP_INC 5 /* 5 deep */
+
+#define HAMMER2_ZONE_VOLHDR 0 /* volume header or backup */
+#define HAMMER2_ZONE_FREEMAP_00 1 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_01 6 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_02 11 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_03 16 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_04 21 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_05 26 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_06 31 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_07 36 /* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_END 41 /* (non-inclusive) */
+
+#define HAMMER2_ZONE_UNUSED41 41
+#define HAMMER2_ZONE_UNUSED42 42
+#define HAMMER2_ZONE_UNUSED43 43
+#define HAMMER2_ZONE_UNUSED44 44
+#define HAMMER2_ZONE_UNUSED45 45
+#define HAMMER2_ZONE_UNUSED46 46
+#define HAMMER2_ZONE_UNUSED47 47
+#define HAMMER2_ZONE_UNUSED48 48
+#define HAMMER2_ZONE_UNUSED49 49
+#define HAMMER2_ZONE_UNUSED50 50
+#define HAMMER2_ZONE_UNUSED51 51
+#define HAMMER2_ZONE_UNUSED52 52
+#define HAMMER2_ZONE_UNUSED53 53
+#define HAMMER2_ZONE_UNUSED54 54
+#define HAMMER2_ZONE_UNUSED55 55
+#define HAMMER2_ZONE_UNUSED56 56
+#define HAMMER2_ZONE_UNUSED57 57
+#define HAMMER2_ZONE_UNUSED58 58
+#define HAMMER2_ZONE_UNUSED59 59
+#define HAMMER2_ZONE_UNUSED60 60
+#define HAMMER2_ZONE_UNUSED61 61
+#define HAMMER2_ZONE_UNUSED62 62
+#define HAMMER2_ZONE_UNUSED63 63
+#define HAMMER2_ZONE_END 64 /* non-inclusive */
+
+#define HAMMER2_NFREEMAPS 8 /* FREEMAP_00 - FREEMAP_07 */
+
+ /* relative to FREEMAP_x */
+#define HAMMER2_ZONEFM_LEVEL1 0 /* 1GB leafmap */
+#define HAMMER2_ZONEFM_LEVEL2 1 /* 256GB indmap */
+#define HAMMER2_ZONEFM_LEVEL3 2 /* 64TB indmap */
+#define HAMMER2_ZONEFM_LEVEL4 3 /* 16PB indmap */
+#define HAMMER2_ZONEFM_LEVEL5 4 /* 4EB indmap */
+/* LEVEL6 is a set of 4 blockrefs in the volume header 16EB */
+
+/*
+ * Freemap radix. Assumes a set-count of 4, 128-byte blockrefs,
+ * 32KB indirect block for freemap (LEVELN_PSIZE below).
+ *
+ * Leaf entry represents 4MB of storage broken down into a 512-bit
+ * bitmap, 2-bits per entry. So course bitmap item represents 16KB.
+ */
+#if HAMMER2_SET_COUNT != 4
+#error "hammer2_disk.h - freemap assumes SET_COUNT is 4"
+#endif
+#define HAMMER2_FREEMAP_LEVEL6_RADIX 64 /* 16EB (end) */
+#define HAMMER2_FREEMAP_LEVEL5_RADIX 62 /* 4EB */
+#define HAMMER2_FREEMAP_LEVEL4_RADIX 54 /* 16PB */
+#define HAMMER2_FREEMAP_LEVEL3_RADIX 46 /* 64TB */
+#define HAMMER2_FREEMAP_LEVEL2_RADIX 38 /* 256GB */
+#define HAMMER2_FREEMAP_LEVEL1_RADIX 30 /* 1GB */
+#define HAMMER2_FREEMAP_LEVEL0_RADIX 22 /* 4MB (128by in l-1 leaf) */
+
+#define HAMMER2_FREEMAP_LEVELN_PSIZE 32768 /* physical bytes */
+
+#define HAMMER2_FREEMAP_LEVEL5_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL5_RADIX)
+#define HAMMER2_FREEMAP_LEVEL4_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL4_RADIX)
+#define HAMMER2_FREEMAP_LEVEL3_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL3_RADIX)
+#define HAMMER2_FREEMAP_LEVEL2_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL2_RADIX)
+#define HAMMER2_FREEMAP_LEVEL1_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL1_RADIX)
+#define HAMMER2_FREEMAP_LEVEL0_SIZE ((hammer2_off_t)1 << \
+ HAMMER2_FREEMAP_LEVEL0_RADIX)
+
+#define HAMMER2_FREEMAP_LEVEL5_MASK (HAMMER2_FREEMAP_LEVEL5_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL4_MASK (HAMMER2_FREEMAP_LEVEL4_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL3_MASK (HAMMER2_FREEMAP_LEVEL3_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL2_MASK (HAMMER2_FREEMAP_LEVEL2_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL1_MASK (HAMMER2_FREEMAP_LEVEL1_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL0_MASK (HAMMER2_FREEMAP_LEVEL0_SIZE - 1)
+
+#define HAMMER2_FREEMAP_COUNT (int)(HAMMER2_FREEMAP_LEVELN_PSIZE / \
+ sizeof(hammer2_bmap_data_t))
+
+/*
+ * XXX I made a mistake and made the reserved area begin at each LEVEL1 zone,
+ * which is on a 1GB demark. This will eat a little more space but for
+ * now we retain compatibility and make FMZONEBASE every 1GB
+ */
+#define H2FMZONEBASE(key) ((key) & ~HAMMER2_FREEMAP_LEVEL1_MASK)
+#define H2FMBASE(key, radix) ((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
+
+/*
+ * 16KB bitmap granularity (x2 bits per entry).
+ */
+#define HAMMER2_FREEMAP_BLOCK_RADIX 14
+#define HAMMER2_FREEMAP_BLOCK_SIZE (1 << HAMMER2_FREEMAP_BLOCK_RADIX)
+#define HAMMER2_FREEMAP_BLOCK_MASK (HAMMER2_FREEMAP_BLOCK_SIZE - 1)
+
+/*
+ * bitmap[] structure. 2 bits per HAMMER2_FREEMAP_BLOCK_SIZE.
+ *
+ * 8 x 64-bit elements, 2 bits per block.
+ * 32 blocks (radix 5) per element.
+ * representing INDEX_SIZE bytes worth of storage per element.
+ */
+
+typedef uint64_t hammer2_bitmap_t;
+
+#define HAMMER2_BMAP_ALLONES ((hammer2_bitmap_t)-1)
+#define HAMMER2_BMAP_ELEMENTS 8
+#define HAMMER2_BMAP_BITS_PER_ELEMENT 64
+#define HAMMER2_BMAP_INDEX_RADIX 5 /* 32 blocks per element */
+#define HAMMER2_BMAP_BLOCKS_PER_ELEMENT (1 << HAMMER2_BMAP_INDEX_RADIX)
+
+#define HAMMER2_BMAP_INDEX_SIZE (HAMMER2_FREEMAP_BLOCK_SIZE * \
+ HAMMER2_BMAP_BLOCKS_PER_ELEMENT)
+#define HAMMER2_BMAP_INDEX_MASK (HAMMER2_BMAP_INDEX_SIZE - 1)
+
+#define HAMMER2_BMAP_SIZE (HAMMER2_BMAP_INDEX_SIZE * \
+ HAMMER2_BMAP_ELEMENTS)
+#define HAMMER2_BMAP_MASK (HAMMER2_BMAP_SIZE - 1)
+
+/*
+ * Two linear areas can be reserved after the initial 4MB segment in the base
+ * zone (the one starting at offset 0). These areas are NOT managed by the
+ * block allocator and do not fall under HAMMER2 crc checking rules based
+ * at the volume header (but can be self-CRCd internally, depending).
+ */
+#define HAMMER2_BOOT_MIN_BYTES HAMMER2_VOLUME_ALIGN
+#define HAMMER2_BOOT_NOM_BYTES (64*1024*1024)
+#define HAMMER2_BOOT_MAX_BYTES (256*1024*1024)
+
+#define HAMMER2_REDO_MIN_BYTES HAMMER2_VOLUME_ALIGN
+#define HAMMER2_REDO_NOM_BYTES (256*1024*1024)
+#define HAMMER2_REDO_MAX_BYTES (1024*1024*1024)
+
+/*
+ * Most HAMMER2 types are implemented as unsigned 64-bit integers.
+ * Transaction ids are monotonic.
+ *
+ * We utilize 32-bit iSCSI CRCs.
+ */
+typedef uint64_t hammer2_tid_t;
+typedef uint64_t hammer2_off_t;
+typedef uint64_t hammer2_key_t;
+typedef uint32_t hammer2_crc32_t;
+
+/*
+ * Miscellanious ranges (all are unsigned).
+ */
+#define HAMMER2_TID_MIN 1ULL
+#define HAMMER2_TID_MAX 0xFFFFFFFFFFFFFFFFULL
+#define HAMMER2_KEY_MIN 0ULL
+#define HAMMER2_KEY_MAX 0xFFFFFFFFFFFFFFFFULL
+#define HAMMER2_OFFSET_MIN 0ULL
+#define HAMMER2_OFFSET_MAX 0xFFFFFFFFFFFFFFFFULL
+
+/*
+ * HAMMER2 data offset special cases and masking.
+ *
+ * All HAMMER2 data offsets have to be broken down into a 64K buffer base
+ * offset (HAMMER2_OFF_MASK_HI) and a 64K buffer index (HAMMER2_OFF_MASK_LO).
+ *
+ * Indexes into physical buffers are always 64-byte aligned. The low 6 bits
+ * of the data offset field specifies how large the data chunk being pointed
+ * to as a power of 2. The theoretical minimum radix is thus 6 (The space
+ * needed in the low bits of the data offset field). However, the practical
+ * minimum allocation chunk size is 1KB (a radix of 10), so HAMMER2 sets
+ * HAMMER2_RADIX_MIN to 10. The maximum radix is currently 16 (64KB), but
+ * we fully intend to support larger extents in the future.
+ *
+ * WARNING! A radix of 0 (such as when data_off is all 0's) is a special
+ * case which means no data associated with the blockref, and
+ * not the '1 byte' it would otherwise calculate to.
+ */
+#define HAMMER2_OFF_BAD ((hammer2_off_t)-1)
+#define HAMMER2_OFF_MASK 0xFFFFFFFFFFFFFFC0ULL
+#define HAMMER2_OFF_MASK_LO (HAMMER2_OFF_MASK & HAMMER2_PBUFMASK64)
+#define HAMMER2_OFF_MASK_HI (~HAMMER2_PBUFMASK64)
+#define HAMMER2_OFF_MASK_RADIX 0x000000000000003FULL
+#define HAMMER2_MAX_COPIES 6
+
+/*
+ * HAMMER2 directory support and pre-defined keys
+ */
+#define HAMMER2_DIRHASH_VISIBLE 0x8000000000000000ULL
+#define HAMMER2_DIRHASH_USERMSK 0x7FFFFFFFFFFFFFFFULL
+#define HAMMER2_DIRHASH_LOMASK 0x0000000000007FFFULL
+#define HAMMER2_DIRHASH_HIMASK 0xFFFFFFFFFFFF0000ULL
+#define HAMMER2_DIRHASH_FORCED 0x0000000000008000ULL /* bit forced on */
+
+#define HAMMER2_SROOT_KEY 0x0000000000000000ULL /* volume to sroot */
+#define HAMMER2_BOOT_KEY 0xd9b36ce135528000ULL /* sroot to BOOT PFS */
+
+/************************************************************************
+ * DMSG SUPPORT *
+ ************************************************************************
+ * LNK_VOLCONF
+ *
+ * All HAMMER2 directories directly under the super-root on your local
+ * media can be mounted separately, even if they share the same physical
+ * device.
+ *
+ * When you do a HAMMER2 mount you are effectively tying into a HAMMER2
+ * cluster via local media. The local media does not have to participate
+ * in the cluster, other than to provide the hammer2_volconf[] array and
+ * root inode for the mount.
+ *
+ * This is important: The mount device path you specify serves to bootstrap
+ * your entry into the cluster, but your mount will make active connections
+ * to ALL copy elements in the hammer2_volconf[] array which match the
+ * PFSID of the directory in the super-root that you specified. The local
+ * media path does not have to be mentioned in this array but becomes part
+ * of the cluster based on its type and access rights. ALL ELEMENTS ARE
+ * TREATED ACCORDING TO TYPE NO MATTER WHICH ONE YOU MOUNT FROM.
+ *
+ * The actual cluster may be far larger than the elements you list in the
+ * hammer2_volconf[] array. You list only the elements you wish to
+ * directly connect to and you are able to access the rest of the cluster
+ * indirectly through those connections.
+ *
+ * WARNING! This structure must be exactly 128 bytes long for its config
+ * array to fit in the volume header.
+ */
+struct hammer2_volconf {
+ uint8_t copyid; /* 00 copyid 0-255 (must match slot) */
+ uint8_t inprog; /* 01 operation in progress, or 0 */
+ uint8_t chain_to; /* 02 operation chaining to, or 0 */
+ uint8_t chain_from; /* 03 operation chaining from, or 0 */
+ uint16_t flags; /* 04-05 flags field */
+ uint8_t error; /* 06 last operational error */
+ uint8_t priority; /* 07 priority and round-robin flag */
+ uint8_t remote_pfs_type;/* 08 probed direct remote PFS type */
+ uint8_t reserved08[23]; /* 09-1F */
+ uuid_t pfs_clid; /* 20-2F copy target must match this uuid */
+ uint8_t label[16]; /* 30-3F import/export label */
+ uint8_t path[64]; /* 40-7F target specification string or key */
+} __packed;
+
+typedef struct hammer2_volconf hammer2_volconf_t;
+
+#define DMSG_VOLF_ENABLED 0x0001
+#define DMSG_VOLF_INPROG 0x0002
+#define DMSG_VOLF_CONN_RR 0x80 /* round-robin at same priority */
+#define DMSG_VOLF_CONN_EF 0x40 /* media errors flagged */
+#define DMSG_VOLF_CONN_PRI 0x0F /* select priority 0-15 (15=best) */
+
+struct dmsg_lnk_hammer2_volconf {
+ dmsg_hdr_t head;
+ hammer2_volconf_t copy; /* copy spec */
+ int32_t index;
+ int32_t unused01;
+ uuid_t mediaid;
+ int64_t reserved02[32];
+} __packed;
+
+typedef struct dmsg_lnk_hammer2_volconf dmsg_lnk_hammer2_volconf_t;
+
+#define DMSG_LNK_HAMMER2_VOLCONF DMSG_LNK(DMSG_LNK_CMD_HAMMER2_VOLCONF, \
+ dmsg_lnk_hammer2_volconf)
+
+#define H2_LNK_VOLCONF(msg) ((dmsg_lnk_hammer2_volconf_t *)(msg)->any.buf)
+
+/*
+ * HAMMER2 directory entry header (embedded in blockref) exactly 16 bytes
+ */
+struct hammer2_dirent_head {
+ hammer2_tid_t inum; /* inode number */
+ uint16_t namlen; /* name length */
+ uint8_t type; /* OBJTYPE_* */
+ uint8_t unused0B;
+ uint8_t unused0C[4];
+} __packed;
+
+typedef struct hammer2_dirent_head hammer2_dirent_head_t;
+
+/*
+ * The media block reference structure. This forms the core of the HAMMER2
+ * media topology recursion. This 128-byte data structure is embedded in the
+ * volume header, in inodes (which are also directory entries), and in
+ * indirect blocks.
+ *
+ * A blockref references a single media item, which typically can be a
+ * directory entry (aka inode), indirect block, or data block.
+ *
+ * The primary feature a blockref represents is the ability to validate
+ * the entire tree underneath it via its check code. Any modification to
+ * anything propagates up the blockref tree all the way to the root, replacing
+ * the related blocks and compounding the generated check code.
+ *
+ * The check code can be a simple 32-bit iscsi code, a 64-bit crc, or as
+ * complex as a 512 bit cryptographic hash. I originally used a 64-byte
+ * blockref but later expanded it to 128 bytes to be able to support the
+ * larger check code as well as to embed statistics for quota operation.
+ *
+ * Simple check codes are not sufficient for unverified dedup. Even with
+ * a maximally-sized check code unverified dedup should only be used in
+ * in subdirectory trees where you do not need 100% data integrity.
+ *
+ * Unverified dedup is deduping based on meta-data only without verifying
+ * that the data blocks are actually identical. Verified dedup guarantees
+ * integrity but is a far more I/O-expensive operation.
+ *
+ * --
+ *
+ * mirror_tid - per cluster node modified (propagated upward by flush)
+ * modify_tid - clc record modified (not propagated).
+ * update_tid - clc record updated (propagated upward on verification)
+ *
+ * CLC - Stands for 'Cluster Level Change', identifiers which are identical
+ * within the topology across all cluster nodes (when fully
+ * synchronized).
+ *
+ * NOTE: The range of keys represented by the blockref is (key) to
+ * ((key) + (1LL << keybits) - 1). HAMMER2 usually populates
+ * blocks bottom-up, inserting a new root when radix expansion
+ * is required.
+ *
+ * leaf_count - Helps manage leaf collapse calculations when indirect
+ * blocks become mostly empty. This value caps out at
+ * HAMMER2_BLOCKREF_LEAF_MAX (65535).
+ *
+ * Used by the chain code to determine when to pull leafs up
+ * from nearly empty indirect blocks. For the purposes of this
+ * calculation, BREF_TYPE_INODE is considered a leaf, along
+ * with DIRENT and DATA.
+ *
+ * RESERVED FIELDS
+ *
+ * A number of blockref fields are reserved and should generally be set to
+ * 0 for future compatibility.
+ *
+ * FUTURE BLOCKREF EXPANSION
+ *
+ * CONTENT ADDRESSABLE INDEXING (future) - Using a 256 or 512-bit check code.
+ */
+struct hammer2_blockref { /* MUST BE EXACTLY 64 BYTES */
+ uint8_t type; /* type of underlying item */
+ uint8_t methods; /* check method & compression method */
+ uint8_t copyid; /* specify which copy this is */
+ uint8_t keybits; /* #of keybits masked off 0=leaf */
+ uint8_t vradix; /* virtual data/meta-data size */
+ uint8_t flags; /* blockref flags */
+ uint16_t leaf_count; /* leaf aggregation count */
+ hammer2_key_t key; /* key specification */
+ hammer2_tid_t mirror_tid; /* media flush topology & freemap */
+ hammer2_tid_t modify_tid; /* clc modify (not propagated) */
+ hammer2_off_t data_off; /* low 6 bits is phys size (radix)*/
+ hammer2_tid_t update_tid; /* clc modify (propagated upward) */
+ union {
+ char buf[16];
+
+ /*
+ * Directory entry header (BREF_TYPE_DIRENT)
+ *
+ * NOTE: check.buf contains filename if <= 64 bytes. Longer
+ * filenames are stored in a data reference of size
+ * HAMMER2_ALLOC_MIN (at least 256, typically 1024).
+ *
+ * NOTE: inode structure may contain a copy of a recently
+ * associated filename, for recovery purposes.
+ *
+ * NOTE: Superroot entries are INODEs, not DIRENTs. Code
+ * allows both cases.
+ */
+ hammer2_dirent_head_t dirent;
+
+ /*
+ * Statistics aggregation (BREF_TYPE_INODE, BREF_TYPE_INDIRECT)
+ */
+ struct {
+ hammer2_key_t data_count;
+ hammer2_key_t inode_count;
+ } stats;
+ } embed;
+ union { /* check info */
+ char buf[64];
+ struct {
+ uint32_t value;
+ uint32_t reserved[15];
+ } iscsi32;
+ struct {
+ uint64_t value;
+ uint64_t reserved[7];
+ } xxhash64;
+ struct {
+ char data[24];
+ char reserved[40];
+ } sha192;
+ struct {
+ char data[32];
+ char reserved[32];
+ } sha256;
+ struct {
+ char data[64];
+ } sha512;
+
+ /*
+ * Freemap hints are embedded in addition to the icrc32.
+ *
+ * bigmask - Radixes available for allocation (0-31).
+ * Heuristical (may be permissive but not
+ * restrictive). Typically only radix values
+ * 10-16 are used (i.e. (1<<10) through (1<<16)).
+ *
+ * avail - Total available space remaining, in bytes
+ */
+ struct {
+ uint32_t icrc32;
+ uint32_t bigmask; /* available radixes */
+ uint64_t avail; /* total available bytes */
+ char reserved[48];
+ } freemap;
+ } check;
+} __packed;
+
+typedef struct hammer2_blockref hammer2_blockref_t;
+
+#define HAMMER2_BLOCKREF_BYTES 128 /* blockref struct in bytes */
+#define HAMMER2_BLOCKREF_RADIX 7
+
+#define HAMMER2_BLOCKREF_LEAF_MAX 65535
+
+/*
+ * On-media and off-media blockref types.
+ *
+ * types >= 128 are pseudo values that should never be present on-media.
+ */
+#define HAMMER2_BREF_TYPE_EMPTY 0
+#define HAMMER2_BREF_TYPE_INODE 1
+#define HAMMER2_BREF_TYPE_INDIRECT 2
+#define HAMMER2_BREF_TYPE_DATA 3
+#define HAMMER2_BREF_TYPE_DIRENT 4
+#define HAMMER2_BREF_TYPE_FREEMAP_NODE 5
+#define HAMMER2_BREF_TYPE_FREEMAP_LEAF 6
+#define HAMMER2_BREF_TYPE_FREEMAP 254 /* pseudo-type */
+#define HAMMER2_BREF_TYPE_VOLUME 255 /* pseudo-type */
+
+#define HAMMER2_BREF_FLAG_PFSROOT 0x01 /* see also related opflag */
+#define HAMMER2_BREF_FLAG_ZERO 0x02
+
+/*
+ * Encode/decode check mode and compression mode for
+ * bref.methods. The compression level is not encoded in
+ * bref.methods.
+ */
+#define HAMMER2_ENC_CHECK(n) (((n) & 15) << 4)
+#define HAMMER2_DEC_CHECK(n) (((n) >> 4) & 15)
+#define HAMMER2_ENC_COMP(n) ((n) & 15)
+#define HAMMER2_DEC_COMP(n) ((n) & 15)
+
+#define HAMMER2_CHECK_NONE 0
+#define HAMMER2_CHECK_DISABLED 1
+#define HAMMER2_CHECK_ISCSI32 2
+#define HAMMER2_CHECK_XXHASH64 3
+#define HAMMER2_CHECK_SHA192 4
+#define HAMMER2_CHECK_FREEMAP 5
+
+#define HAMMER2_CHECK_DEFAULT HAMMER2_CHECK_XXHASH64
+
+/* user-specifiable check modes only */
+#define HAMMER2_CHECK_STRINGS { "none", "disabled", "crc32", \
+ "xxhash64", "sha192" }
+#define HAMMER2_CHECK_STRINGS_COUNT 5
+
+/*
+ * Encode/decode check or compression algorithm request in
+ * ipdata->meta.check_algo and ipdata->meta.comp_algo.
+ */
+#define HAMMER2_ENC_ALGO(n) (n)
+#define HAMMER2_DEC_ALGO(n) ((n) & 15)
+#define HAMMER2_ENC_LEVEL(n) ((n) << 4)
+#define HAMMER2_DEC_LEVEL(n) (((n) >> 4) & 15)
+
+#define HAMMER2_COMP_NONE 0
+#define HAMMER2_COMP_AUTOZERO 1
+#define HAMMER2_COMP_LZ4 2
+#define HAMMER2_COMP_ZLIB 3
+
+#define HAMMER2_COMP_NEWFS_DEFAULT HAMMER2_COMP_LZ4
+#define HAMMER2_COMP_STRINGS { "none", "autozero", "lz4", "zlib" }
+#define HAMMER2_COMP_STRINGS_COUNT 4
+
+/*
+ * Passed to hammer2_chain_create(), causes methods to be inherited from
+ * parent.
+ */
+#define HAMMER2_METH_DEFAULT -1
+
+/*
+ * HAMMER2 block references are collected into sets of 4 blockrefs. These
+ * sets are fully associative, meaning the elements making up a set are
+ * not sorted in any way and may contain duplicate entries, holes, or
+ * entries which shortcut multiple levels of indirection. Sets are used
+ * in various ways:
+ *
+ * (1) When redundancy is desired a set may contain several duplicate
+ * entries pointing to different copies of the same data. Up to 4 copies
+ * are supported.
+ *
+ * (2) The blockrefs in a set can shortcut multiple levels of indirections
+ * within the bounds imposed by the parent of set.
+ *
+ * When a set fills up another level of indirection is inserted, moving
+ * some or all of the set's contents into indirect blocks placed under the
+ * set. This is a top-down approach in that indirect blocks are not created
+ * until the set actually becomes full (that is, the entries in the set can
+ * shortcut the indirect blocks when the set is not full). Depending on how
+ * things are filled multiple indirect blocks will eventually be created.
+ *
+ * Indirect blocks are typically 4KB (64 entres) or 64KB (1024 entries) and
+ * are also treated as fully set-associative.
+ */
+struct hammer2_blockset {
+ hammer2_blockref_t blockref[HAMMER2_SET_COUNT];
+};
+
+typedef struct hammer2_blockset hammer2_blockset_t;
+
+/*
+ * Catch programmer snafus
+ */
+#if (1 << HAMMER2_SET_RADIX) != HAMMER2_SET_COUNT
+#error "hammer2 direct radix is incorrect"
+#endif
+#if (1 << HAMMER2_PBUFRADIX) != HAMMER2_PBUFSIZE
+#error "HAMMER2_PBUFRADIX and HAMMER2_PBUFSIZE are inconsistent"
+#endif
+#if (1 << HAMMER2_RADIX_MIN) != HAMMER2_ALLOC_MIN
+#error "HAMMER2_RADIX_MIN and HAMMER2_ALLOC_MIN are inconsistent"
+#endif
+
+/*
+ * hammer2_bmap_data - A freemap entry in the LEVEL1 block.
+ *
+ * Each 128-byte entry contains the bitmap and meta-data required to manage
+ * a LEVEL0 (4MB) block of storage. The storage is managed in 256 x 16KB
+ * chunks.
+ *
+ * A smaller allocation granularity is supported via a linear iterator and/or
+ * must otherwise be tracked in ram.
+ *
+ * (data structure must be 128 bytes exactly)
+ *
+ * linear - A BYTE linear allocation offset used for sub-16KB allocations
+ * only. May contain values between 0 and 4MB. Must be ignored
+ * if 16KB-aligned (i.e. force bitmap scan), otherwise may be
+ * used to sub-allocate within the 16KB block (which is already
+ * marked as allocated in the bitmap).
+ *
+ * Sub-allocations need only be 1KB-aligned and do not have to be
+ * size-aligned, and 16KB or larger allocations do not update this
+ * field, resulting in pretty good packing.
+ *
+ * Please note that file data granularity may be limited by
+ * other issues such as buffer cache direct-mapping and the
+ * desire to support sector sizes up to 16KB (so H2 only issues
+ * I/O's in multiples of 16KB anyway).
+ *
+ * class - Clustering class. Cleared to 0 only if the entire leaf becomes
+ * free. Used to cluster device buffers so all elements must have
+ * the same device block size, but may mix logical sizes.
+ *
+ * Typically integrated with the blockref type in the upper 8 bits
+ * to localize inodes and indrect blocks, improving bulk free scans
+ * and directory scans.
+ *
+ * bitmap - Two bits per 16KB allocation block arranged in arrays of
+ * 64-bit elements, 256x2 bits representing ~4MB worth of media
+ * storage. Bit patterns are as follows:
+ *
+ * 00 Unallocated
+ * 01 (reserved)
+ * 10 Possibly free
+ * 11 Allocated
+ */
+struct hammer2_bmap_data {
+ int32_t linear; /* 00 linear sub-granular allocation offset */
+ uint16_t class; /* 04-05 clustering class ((type<<8)|radix) */
+ uint8_t reserved06; /* 06 */
+ uint8_t reserved07; /* 07 */
+ uint32_t reserved08; /* 08 */
+ uint32_t reserved0C; /* 0C */
+ uint32_t reserved10; /* 10 */
+ uint32_t reserved14; /* 14 */
+ uint32_t reserved18; /* 18 */
+ uint32_t avail; /* 1C */
+ uint32_t reserved20[8]; /* 20-3F 256 bits manages 128K/1KB/2-bits */
+ /* 40-7F 512 bits manages 4MB of storage */
+ hammer2_bitmap_t bitmapq[HAMMER2_BMAP_ELEMENTS];
+} __packed;
+
+typedef struct hammer2_bmap_data hammer2_bmap_data_t;
+
+/*
+ * XXX "Inodes ARE directory entries" is no longer the case. Hardlinks are
+ * dirents which refer to the same inode#, which is how filesystems usually
+ * implement hardlink. The following comments need to be updated.
+ *
+ * In HAMMER2 inodes ARE directory entries, with a special exception for
+ * hardlinks. The inode number is stored in the inode rather than being
+ * based on the location of the inode (since the location moves every time
+ * the inode or anything underneath the inode is modified).
+ *
+ * The inode is 1024 bytes, made up of 256 bytes of meta-data, 256 bytes
+ * for the filename, and 512 bytes worth of direct file data OR an embedded
+ * blockset. The in-memory hammer2_inode structure contains only the mostly-
+ * node-independent meta-data portion (some flags are node-specific and will
+ * not be synchronized). The rest of the inode is node-specific and chain I/O
+ * is required to obtain it.
+ *
+ * Directories represent one inode per blockref. Inodes are not laid out
+ * as a file but instead are represented by the related blockrefs. The
+ * blockrefs, in turn, are indexed by the 64-bit directory hash key. Remember
+ * that blocksets are fully associative, so a certain degree efficiency is
+ * achieved just from that.
+ *
+ * Up to 512 bytes of direct data can be embedded in an inode, and since
+ * inodes are essentially directory entries this also means that small data
+ * files end up simply being laid out linearly in the directory, resulting
+ * in fewer seeks and highly optimal access.
+ *
+ * The compression mode can be changed at any time in the inode and is
+ * recorded on a blockref-by-blockref basis.
+ *
+ * Hardlinks are supported via the inode map. Essentially the way a hardlink
+ * works is that all individual directory entries representing the same file
+ * are special cased and specify the same inode number. The actual file
+ * is placed in the nearest parent directory that is parent to all instances
+ * of the hardlink. If all hardlinks to a file are in the same directory
+ * the actual file will also be placed in that directory. This file uses
+ * the inode number as the directory entry key and is invisible to normal
+ * directory scans. Real directory entry keys are differentiated from the
+ * inode number key via bit 63. Access to the hardlink silently looks up
+ * the real file and forwards all operations to that file. Removal of the
+ * last hardlink also removes the real file.
+ */
+#define HAMMER2_INODE_BYTES 1024 /* (asserted by code) */
+#define HAMMER2_INODE_MAXNAME 256 /* maximum name in bytes */
+#define HAMMER2_INODE_VERSION_ONE 1
+
+#define HAMMER2_INODE_START 1024 /* dynamically allocated */
+
+struct hammer2_inode_meta {
+ uint16_t version; /* 0000 inode data version */
+ uint8_t reserved02; /* 0002 */
+ uint8_t pfs_subtype; /* 0003 pfs sub-type */
+
+ /*
+ * core inode attributes, inode type, misc flags
+ */
+ uint32_t uflags; /* 0004 chflags */
+ uint32_t rmajor; /* 0008 available for device nodes */
+ uint32_t rminor; /* 000C available for device nodes */
+ uint64_t ctime; /* 0010 inode change time */
+ uint64_t mtime; /* 0018 modified time */
+ uint64_t atime; /* 0020 access time (unsupported) */
+ uint64_t btime; /* 0028 birth time */
+ uuid_t uid; /* 0030 uid / degenerate unix uid */
+ uuid_t gid; /* 0040 gid / degenerate unix gid */
+
+ uint8_t type; /* 0050 object type */
+ uint8_t op_flags; /* 0051 operational flags */
+ uint16_t cap_flags; /* 0052 capability flags */
+ uint32_t mode; /* 0054 unix modes (typ low 16 bits) */
+
+ /*
+ * inode size, identification, localized recursive configuration
+ * for compression and backup copies.
+ *
+ * NOTE: Nominal parent inode number (iparent) is only applicable
+ * for directories but can also help for files during
+ * catastrophic recovery.
+ */
+ hammer2_tid_t inum; /* 0058 inode number */
+ hammer2_off_t size; /* 0060 size of file */
+ uint64_t nlinks; /* 0068 hard links (typ only dirs) */
+ hammer2_tid_t iparent; /* 0070 nominal parent inum */
+ hammer2_key_t name_key; /* 0078 full filename key */
+ uint16_t name_len; /* 0080 filename length */
+ uint8_t ncopies; /* 0082 ncopies to local media */
+ uint8_t comp_algo; /* 0083 compression request & algo */
+
+ /*
+ * These fields are currently only applicable to PFSROOTs.
+ *
+ * NOTE: We can't use {volume_data->fsid, pfs_clid} to uniquely
+ * identify an instance of a PFS in the cluster because
+ * a mount may contain more than one copy of the PFS as
+ * a separate node. {pfs_clid, pfs_fsid} must be used for
+ * registration in the cluster.
+ */
+ uint8_t target_type; /* 0084 hardlink target type */
+ uint8_t check_algo; /* 0085 check code request & algo */
+ uint8_t pfs_nmasters; /* 0086 (if PFSROOT) if multi-master */
+ uint8_t pfs_type; /* 0087 (if PFSROOT) node type */
+ uint64_t pfs_inum; /* 0088 (if PFSROOT) inum allocator */
+ uuid_t pfs_clid; /* 0090 (if PFSROOT) cluster uuid */
+ uuid_t pfs_fsid; /* 00A0 (if PFSROOT) unique uuid */
+
+ /*
+ * Quotas and aggregate sub-tree inode and data counters. Note that
+ * quotas are not replicated downward, they are explicitly set by
+ * the sysop and in-memory structures keep track of inheritance.
+ */
+ hammer2_key_t data_quota; /* 00B0 subtree quota in bytes */
+ hammer2_key_t unusedB8; /* 00B8 subtree byte count */
+ hammer2_key_t inode_quota; /* 00C0 subtree quota inode count */
+ hammer2_key_t unusedC8; /* 00C8 subtree inode count */
+
+ /*
+ * The last snapshot tid is tested against modify_tid to determine
+ * when a copy must be made of a data block whos check mode has been
+ * disabled (a disabled check mode allows data blocks to be updated
+ * in place instead of copy-on-write).
+ */
+ hammer2_tid_t pfs_lsnap_tid; /* 00D0 last snapshot tid */
+ hammer2_tid_t reservedD8; /* 00D8 (avail) */
+
+ /*
+ * Tracks (possibly degenerate) free areas covering all sub-tree
+ * allocations under inode, not counting the inode itself.
+ * 0/0 indicates empty entry. fully set-associative.
+ *
+ * (not yet implemented)
+ */
+ uint64_t decrypt_check; /* 00E0 decryption validator */
+ hammer2_off_t reservedE0[3]; /* 00E8/F0/F8 */
+} __packed;
+
+typedef struct hammer2_inode_meta hammer2_inode_meta_t;
+
+struct hammer2_inode_data {
+ hammer2_inode_meta_t meta; /* 0000-00FF */
+ unsigned char filename[HAMMER2_INODE_MAXNAME];
+ /* 0100-01FF (256 char, unterminated) */
+ union { /* 0200-03FF (64x8 = 512 bytes) */
+ hammer2_blockset_t blockset;
+ char data[HAMMER2_EMBEDDED_BYTES];
+ } u;
+} __packed;
+
+typedef struct hammer2_inode_data hammer2_inode_data_t;
+
+#define HAMMER2_OPFLAG_DIRECTDATA 0x01
+#define HAMMER2_OPFLAG_PFSROOT 0x02 /* (see also bref flag) */
+#define HAMMER2_OPFLAG_COPYIDS 0x04 /* copyids override parent */
+
+#define HAMMER2_OBJTYPE_UNKNOWN 0
+#define HAMMER2_OBJTYPE_DIRECTORY 1
+#define HAMMER2_OBJTYPE_REGFILE 2
+#define HAMMER2_OBJTYPE_FIFO 4
+#define HAMMER2_OBJTYPE_CDEV 5
+#define HAMMER2_OBJTYPE_BDEV 6
+#define HAMMER2_OBJTYPE_SOFTLINK 7
+#define HAMMER2_OBJTYPE_UNUSED08 8
+#define HAMMER2_OBJTYPE_SOCKET 9
+#define HAMMER2_OBJTYPE_WHITEOUT 10
+
+#define HAMMER2_COPYID_NONE 0
+#define HAMMER2_COPYID_LOCAL ((uint8_t)-1)
+
+#define HAMMER2_COPYID_COUNT 256
+
+/*
+ * PFS types identify the role of a PFS within a cluster. The PFS types
+ * is stored on media and in LNK_SPAN messages and used in other places.
+ *
+ * The low 4 bits specify the current active type while the high 4 bits
+ * specify the transition target if the PFS is being upgraded or downgraded,
+ * If the upper 4 bits are not zero it may effect how a PFS is used during
+ * the transition.
+ *
+ * Generally speaking, downgrading a MASTER to a SLAVE cannot complete until
+ * at least all MASTERs have updated their pfs_nmasters field. And upgrading
+ * a SLAVE to a MASTER cannot complete until the new prospective master has
+ * been fully synchronized (though theoretically full synchronization is
+ * not required if a (new) quorum of other masters are fully synchronized).
+ *
+ * It generally does not matter which PFS element you actually mount, you
+ * are mounting 'the cluster'. So, for example, a network mount will mount
+ * a DUMMY PFS type on a memory filesystem. However, there are two exceptions.
+ * In order to gain the benefits of a SOFT_MASTER or SOFT_SLAVE, those PFSs
+ * must be directly mounted.
+ */
+#define HAMMER2_PFSTYPE_NONE 0x00
+#define HAMMER2_PFSTYPE_CACHE 0x01
+#define HAMMER2_PFSTYPE_UNUSED02 0x02
+#define HAMMER2_PFSTYPE_SLAVE 0x03
+#define HAMMER2_PFSTYPE_SOFT_SLAVE 0x04
+#define HAMMER2_PFSTYPE_SOFT_MASTER 0x05
+#define HAMMER2_PFSTYPE_MASTER 0x06
+#define HAMMER2_PFSTYPE_UNUSED07 0x07
+#define HAMMER2_PFSTYPE_SUPROOT 0x08
+#define HAMMER2_PFSTYPE_DUMMY 0x09
+#define HAMMER2_PFSTYPE_MAX 16
+
+#define HAMMER2_PFSTRAN_NONE 0x00 /* no transition in progress */
+#define HAMMER2_PFSTRAN_CACHE 0x10
+#define HAMMER2_PFSTRAN_UNMUSED20 0x20
+#define HAMMER2_PFSTRAN_SLAVE 0x30
+#define HAMMER2_PFSTRAN_SOFT_SLAVE 0x40
+#define HAMMER2_PFSTRAN_SOFT_MASTER 0x50
+#define HAMMER2_PFSTRAN_MASTER 0x60
+#define HAMMER2_PFSTRAN_UNUSED70 0x70
+#define HAMMER2_PFSTRAN_SUPROOT 0x80
+#define HAMMER2_PFSTRAN_DUMMY 0x90
+
+#define HAMMER2_PFS_DEC(n) ((n) & 0x0F)
+#define HAMMER2_PFS_DEC_TRANSITION(n) (((n) >> 4) & 0x0F)
+#define HAMMER2_PFS_ENC_TRANSITION(n) (((n) & 0x0F) << 4)
+
+#define HAMMER2_PFSSUBTYPE_NONE 0
+#define HAMMER2_PFSSUBTYPE_SNAPSHOT 1 /* manual/managed snapshot */
+#define HAMMER2_PFSSUBTYPE_AUTOSNAP 2 /* automatic snapshot */
+
+/*
+ * PFS mode of operation is a bitmask. This is typically not stored
+ * on-media, but defined here because the field may be used in dmsgs.
+ */
+#define HAMMER2_PFSMODE_QUORUM 0x01
+#define HAMMER2_PFSMODE_RW 0x02
+
+/*
+ * Allocation Table
+ *
+ */
+
+
+/*
+ * Flags (8 bits) - blockref, for freemap only
+ *
+ * Note that the minimum chunk size is 1KB so we could theoretically have
+ * 10 bits here, but we might have some future extension that allows a
+ * chunk size down to 256 bytes and if so we will need bits 8 and 9.
+ */
+#define HAMMER2_AVF_SELMASK 0x03 /* select group */
+#define HAMMER2_AVF_ALL_ALLOC 0x04 /* indicate all allocated */
+#define HAMMER2_AVF_ALL_FREE 0x08 /* indicate all free */
+#define HAMMER2_AVF_RESERVED10 0x10
+#define HAMMER2_AVF_RESERVED20 0x20
+#define HAMMER2_AVF_RESERVED40 0x40
+#define HAMMER2_AVF_RESERVED80 0x80
+#define HAMMER2_AVF_AVMASK32 ((uint32_t)0xFFFFFF00LU)
+#define HAMMER2_AVF_AVMASK64 ((uint64_t)0xFFFFFFFFFFFFFF00LLU)
+
+#define HAMMER2_AV_SELECT_A 0x00
+#define HAMMER2_AV_SELECT_B 0x01
+#define HAMMER2_AV_SELECT_C 0x02
+#define HAMMER2_AV_SELECT_D 0x03
+
+/*
+ * The volume header eats a 64K block. There is currently an issue where
+ * we want to try to fit all nominal filesystem updates in a 512-byte section
+ * but it may be a lost cause due to the need for a blockset.
+ *
+ * All information is stored in host byte order. The volume header's magic
+ * number may be checked to determine the byte order. If you wish to mount
+ * between machines w/ different endian modes you'll need filesystem code
+ * which acts on the media data consistently (either all one way or all the
+ * other). Our code currently does not do that.
+ *
+ * A read-write mount may have to recover missing allocations by doing an
+ * incremental mirror scan looking for modifications made after alloc_tid.
+ * If alloc_tid == last_tid then no recovery operation is needed. Recovery
+ * operations are usually very, very fast.
+ *
+ * Read-only mounts do not need to do any recovery, access to the filesystem
+ * topology is always consistent after a crash (is always consistent, period).
+ * However, there may be shortcutted blockref updates present from deep in
+ * the tree which are stored in the volumeh eader and must be tracked on
+ * the fly.
+ *
+ * NOTE: The copyinfo[] array contains the configuration for both the
+ * cluster connections and any local media copies. The volume
+ * header will be replicated for each local media copy.
+ *
+ * The mount command may specify multiple medias or just one and
+ * allow HAMMER2 to pick up the others when it checks the copyinfo[]
+ * array on mount.
+ *
+ * NOTE: root_blockref points to the super-root directory, not the root
+ * directory. The root directory will be a subdirectory under the
+ * super-root.
+ *
+ * The super-root directory contains all root directories and all
+ * snapshots (readonly or writable). It is possible to do a
+ * null-mount of the super-root using special path constructions
+ * relative to your mounted root.
+ *
+ * NOTE: HAMMER2 allows any subdirectory tree to be managed as if it were
+ * a PFS, including mirroring and storage quota operations, and this is
+ * preferred over creating discrete PFSs in the super-root. Instead
+ * the super-root is most typically used to create writable snapshots,
+ * alternative roots, and so forth. The super-root is also used by
+ * the automatic snapshotting mechanism.
+ */
+#define HAMMER2_VOLUME_ID_HBO 0x48414d3205172011LLU
+#define HAMMER2_VOLUME_ID_ABO 0x11201705324d4148LLU
+
+struct hammer2_volume_data {
+ /*
+ * sector #0 - 512 bytes
+ */
+ uint64_t magic; /* 0000 Signature */
+ hammer2_off_t boot_beg; /* 0008 Boot area (future) */
+ hammer2_off_t boot_end; /* 0010 (size = end - beg) */
+ hammer2_off_t aux_beg; /* 0018 Aux area (future) */
+ hammer2_off_t aux_end; /* 0020 (size = end - beg) */
+ hammer2_off_t volu_size; /* 0028 Volume size, bytes */
+
+ uint32_t version; /* 0030 */
+ uint32_t flags; /* 0034 */
+ uint8_t copyid; /* 0038 copyid of phys vol */
+ uint8_t freemap_version; /* 0039 freemap algorithm */
+ uint8_t peer_type; /* 003A HAMMER2_PEER_xxx */
+ uint8_t reserved003B; /* 003B */
+ uint32_t reserved003C; /* 003C */
+
+ uuid_t fsid; /* 0040 */
+ uuid_t fstype; /* 0050 */
+
+ /*
+ * allocator_size is precalculated at newfs time and does not include
+ * reserved blocks, boot, or redo areas.
+ *
+ * Initial non-reserved-area allocations do not use the freemap
+ * but instead adjust alloc_iterator. Dynamic allocations take
+ * over starting at (allocator_beg). This makes newfs_hammer2's
+ * job a lot easier and can also serve as a testing jig.
+ */
+ hammer2_off_t allocator_size; /* 0060 Total data space */
+ hammer2_off_t allocator_free; /* 0068 Free space */
+ hammer2_off_t allocator_beg; /* 0070 Initial allocations */
+
+ /*
+ * mirror_tid reflects the highest committed change for this
+ * block device regardless of whether it is to the super-root
+ * or to a PFS or whatever.
+ *
+ * freemap_tid reflects the highest committed freemap change for
+ * this block device.
+ */
+ hammer2_tid_t mirror_tid; /* 0078 committed tid (vol) */
+ hammer2_tid_t reserved0080; /* 0080 */
+ hammer2_tid_t reserved0088; /* 0088 */
+ hammer2_tid_t freemap_tid; /* 0090 committed tid (fmap) */
+ hammer2_tid_t bulkfree_tid; /* 0098 bulkfree incremental */
+ hammer2_tid_t reserved00A0[5]; /* 00A0-00C7 */
+
+ /*
+ * Copyids are allocated dynamically from the copyexists bitmap.
+ * An id from the active copies set (up to 8, see copyinfo later on)
+ * may still exist after the copy set has been removed from the
+ * volume header and its bit will remain active in the bitmap and
+ * cannot be reused until it is 100% removed from the hierarchy.
+ */
+ uint32_t copyexists[8]; /* 00C8-00E7 copy exists bmap */
+ char reserved0140[248]; /* 00E8-01DF */
+
+ /*
+ * 32 bit CRC array at the end of the first 512 byte sector.
+ *
+ * icrc_sects[7] - First 512-4 bytes of volume header (including all
+ * the other icrc's except this one).
+ *
+ * icrc_sects[6] - Sector 1 (512 bytes) of volume header, which is
+ * the blockset for the root.
+ *
+ * icrc_sects[5] - Sector 2
+ * icrc_sects[4] - Sector 3
+ * icrc_sects[3] - Sector 4 (the freemap blockset)
+ */
+ hammer2_crc32_t icrc_sects[8]; /* 01E0-01FF */
+
+ /*
+ * sector #1 - 512 bytes
+ *
+ * The entire sector is used by a blockset.
+ */
+ hammer2_blockset_t sroot_blockset; /* 0200-03FF Superroot dir */
+
+ /*
+ * sector #2-7
+ */
+ char sector2[512]; /* 0400-05FF reserved */
+ char sector3[512]; /* 0600-07FF reserved */
+ hammer2_blockset_t freemap_blockset; /* 0800-09FF freemap */
+ char sector5[512]; /* 0A00-0BFF reserved */
+ char sector6[512]; /* 0C00-0DFF reserved */
+ char sector7[512]; /* 0E00-0FFF reserved */
+
+ /*
+ * sector #8-71 - 32768 bytes
+ *
+ * Contains the configuration for up to 256 copyinfo targets. These
+ * specify local and remote copies operating as masters or slaves.
+ * copyid's 0 and 255 are reserved (0 indicates an empty slot and 255
+ * indicates the local media).
+ *
+ * Each inode contains a set of up to 8 copyids, either inherited
+ * from its parent or explicitly specified in the inode, which
+ * indexes into this array.
+ */
+ /* 1000-8FFF copyinfo config */
+ hammer2_volconf_t copyinfo[HAMMER2_COPYID_COUNT];
+
+ /*
+ * Remaining sections are reserved for future use.
+ */
+ char reserved0400[0x6FFC]; /* 9000-FFFB reserved */
+
+ /*
+ * icrc on entire volume header
+ */
+ hammer2_crc32_t icrc_volheader; /* FFFC-FFFF full volume icrc*/
+} __packed;
+
+typedef struct hammer2_volume_data hammer2_volume_data_t;
+
+/*
+ * Various parts of the volume header have their own iCRCs.
+ *
+ * The first 512 bytes has its own iCRC stored at the end of the 512 bytes
+ * and not included the icrc calculation.
+ *
+ * The second 512 bytes also has its own iCRC but it is stored in the first
+ * 512 bytes so it covers the entire second 512 bytes.
+ *
+ * The whole volume block (64KB) has an iCRC covering all but the last 4 bytes,
+ * which is where the iCRC for the whole volume is stored. This is currently
+ * a catch-all for anything not individually iCRCd.
+ */
+#define HAMMER2_VOL_ICRC_SECT0 7
+#define HAMMER2_VOL_ICRC_SECT1 6
+
+#define HAMMER2_VOLUME_BYTES 65536
+
+#define HAMMER2_VOLUME_ICRC0_OFF 0
+#define HAMMER2_VOLUME_ICRC1_OFF 512
+#define HAMMER2_VOLUME_ICRCVH_OFF 0
+
+#define HAMMER2_VOLUME_ICRC0_SIZE (512 - 4)
+#define HAMMER2_VOLUME_ICRC1_SIZE (512)
+#define HAMMER2_VOLUME_ICRCVH_SIZE (65536 - 4)
+
+#define HAMMER2_VOL_VERSION_MIN 1
+#define HAMMER2_VOL_VERSION_DEFAULT 1
+#define HAMMER2_VOL_VERSION_WIP 2
+
+#define HAMMER2_NUM_VOLHDRS 4
+
+union hammer2_media_data {
+ hammer2_volume_data_t voldata;
+ hammer2_inode_data_t ipdata;
+ hammer2_blockset_t blkset;
+ hammer2_blockref_t npdata[HAMMER2_IND_COUNT_MAX];
+ hammer2_bmap_data_t bmdata[HAMMER2_FREEMAP_COUNT];
+ char buf[HAMMER2_PBUFSIZE];
+} __packed;
+
+typedef union hammer2_media_data hammer2_media_data_t;
+
+#endif /* !_HAMMER2_DISK_H_ */
diff --git a/usr.sbin/fstyp/hammer_disk.h b/usr.sbin/fstyp/hammer_disk.h
new file mode 100644
index 000000000000..ddedd72ff003
--- /dev/null
+++ b/usr.sbin/fstyp/hammer_disk.h
@@ -0,0 +1,1091 @@
+/*-
+ * Copyright (c) 2007 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.55 2008/11/13 02:18:43 dillon Exp $
+ * $FreeBSD$
+ */
+
+#ifndef VFS_HAMMER_DISK_H_
+#define VFS_HAMMER_DISK_H_
+
+#include <sys/endian.h>
+
+#ifndef _SYS_UUID_H_
+#include <sys/uuid.h>
+#endif
+
+/*
+ * The structures below represent the on-disk format for a HAMMER
+ * filesystem. Note that all fields for on-disk structures are naturally
+ * aligned. HAMMER uses little endian for fields in on-disk structures.
+ * HAMMER doesn't support big endian arch, but is planned.
+ *
+ * Most of HAMMER revolves around the concept of an object identifier. An
+ * obj_id is a 64 bit quantity which uniquely identifies a filesystem object
+ * FOR THE ENTIRE LIFE OF THE FILESYSTEM. This uniqueness allows backups
+ * and mirrors to retain varying amounts of filesystem history by removing
+ * any possibility of conflict through identifier reuse.
+ *
+ * A HAMMER filesystem may span multiple volumes.
+ *
+ * A HAMMER filesystem uses a 16K filesystem buffer size. All filesystem
+ * I/O is done in multiples of 16K.
+ *
+ * 64K X-bufs are used for blocks >= a file's 1MB mark.
+ *
+ * Per-volume storage limit: 52 bits 4096 TB
+ * Per-Zone storage limit: 60 bits 1 MTB
+ * Per-filesystem storage limit: 60 bits 1 MTB
+ */
+#define HAMMER_BUFSIZE 16384
+#define HAMMER_XBUFSIZE 65536
+#define HAMMER_HBUFSIZE (HAMMER_BUFSIZE / 2)
+#define HAMMER_XDEMARC (1024 * 1024)
+#define HAMMER_BUFMASK (HAMMER_BUFSIZE - 1)
+#define HAMMER_XBUFMASK (HAMMER_XBUFSIZE - 1)
+
+#define HAMMER_BUFSIZE64 ((uint64_t)HAMMER_BUFSIZE)
+#define HAMMER_BUFMASK64 ((uint64_t)HAMMER_BUFMASK)
+
+#define HAMMER_XBUFSIZE64 ((uint64_t)HAMMER_XBUFSIZE)
+#define HAMMER_XBUFMASK64 ((uint64_t)HAMMER_XBUFMASK)
+
+#define HAMMER_OFF_ZONE_MASK 0xF000000000000000ULL /* zone portion */
+#define HAMMER_OFF_VOL_MASK 0x0FF0000000000000ULL /* volume portion */
+#define HAMMER_OFF_SHORT_MASK 0x000FFFFFFFFFFFFFULL /* offset portion */
+#define HAMMER_OFF_LONG_MASK 0x0FFFFFFFFFFFFFFFULL /* offset portion */
+
+#define HAMMER_OFF_BAD ((hammer_off_t)-1)
+
+#define HAMMER_BUFSIZE_DOALIGN(offset) \
+ (((offset) + HAMMER_BUFMASK) & ~HAMMER_BUFMASK)
+#define HAMMER_BUFSIZE64_DOALIGN(offset) \
+ (((offset) + HAMMER_BUFMASK64) & ~HAMMER_BUFMASK64)
+
+#define HAMMER_XBUFSIZE_DOALIGN(offset) \
+ (((offset) + HAMMER_XBUFMASK) & ~HAMMER_XBUFMASK)
+#define HAMMER_XBUFSIZE64_DOALIGN(offset) \
+ (((offset) + HAMMER_XBUFMASK64) & ~HAMMER_XBUFMASK64)
+
+/*
+ * The current limit of volumes that can make up a HAMMER FS
+ */
+#define HAMMER_MAX_VOLUMES 256
+
+/*
+ * Reserved space for (future) header junk after the volume header.
+ */
+#define HAMMER_MIN_VOL_JUNK (HAMMER_BUFSIZE * 16) /* 256 KB */
+#define HAMMER_MAX_VOL_JUNK HAMMER_MIN_VOL_JUNK
+#define HAMMER_VOL_JUNK_SIZE HAMMER_MIN_VOL_JUNK
+
+/*
+ * Hammer transaction ids are 64 bit unsigned integers and are usually
+ * synchronized with the time of day in nanoseconds.
+ *
+ * Hammer offsets are used for FIFO indexing and embed a cycle counter
+ * and volume number in addition to the offset. Most offsets are required
+ * to be 16 KB aligned.
+ */
+typedef uint64_t hammer_tid_t;
+typedef uint64_t hammer_off_t;
+typedef uint32_t hammer_crc_t;
+typedef uuid_t hammer_uuid_t;
+
+#define HAMMER_MIN_TID 0ULL /* unsigned */
+#define HAMMER_MAX_TID 0xFFFFFFFFFFFFFFFFULL /* unsigned */
+#define HAMMER_MIN_KEY -0x8000000000000000LL /* signed */
+#define HAMMER_MAX_KEY 0x7FFFFFFFFFFFFFFFLL /* signed */
+#define HAMMER_MIN_OBJID HAMMER_MIN_KEY /* signed */
+#define HAMMER_MAX_OBJID HAMMER_MAX_KEY /* signed */
+#define HAMMER_MIN_RECTYPE 0x0U /* unsigned */
+#define HAMMER_MAX_RECTYPE 0xFFFFU /* unsigned */
+#define HAMMER_MIN_OFFSET 0ULL /* unsigned */
+#define HAMMER_MAX_OFFSET 0xFFFFFFFFFFFFFFFFULL /* unsigned */
+
+/*
+ * hammer_off_t has several different encodings. Note that not all zones
+ * encode a vol_no. Zone bits are not a part of filesystem capacity as
+ * the zone bits aren't directly or indirectly mapped to physical volumes.
+ *
+ * In other words, HAMMER's logical filesystem offset consists of 64 bits,
+ * but the filesystem is considered 60 bits filesystem, not 64 bits.
+ * The maximum filesystem capacity is 1EB, not 16EB.
+ *
+ * zone 0: available, a big-block that contains the offset is unused
+ * zone 1 (z,v,o): raw volume relative (offset 0 is the volume header)
+ * zone 2 (z,v,o): raw buffer relative (offset 0 is the first buffer)
+ * zone 3 (z,o): undo/redo fifo - fixed zone-2 offset array in volume header
+ * zone 4 (z,v,o): freemap - only real blockmap
+ * zone 8 (z,v,o): B-Tree - actually zone-2 address
+ * zone 9 (z,v,o): meta - actually zone-2 address
+ * zone 10 (z,v,o): large-data - actually zone-2 address
+ * zone 11 (z,v,o): small-data - actually zone-2 address
+ * zone 15: unavailable, usually the offset is beyond volume size
+ *
+ * layer1/layer2 direct map:
+ * Maximum HAMMER filesystem capacity from volume aspect
+ * 2^8(max volumes) * 2^52(max volume size) = 2^60 = 1EB (long offset)
+ * <------------------------------------------------------------->
+ * 8bits 52bits (short offset)
+ * <------><----------------------------------------------------->
+ * zzzzvvvvvvvvoooo oooooooooooooooo oooooooooooooooo oooooooooooooooo
+ * ----111111111111 1111112222222222 222222222ooooooo oooooooooooooooo
+ * <-----------------><------------------><---------------------->
+ * 18bits 19bits 23bits
+ * <------------------------------------------------------------->
+ * 2^18(layer1) * 2^19(layer2) * 2^23(big-block) = 2^60 = 1EB
+ * Maximum HAMMER filesystem capacity from blockmap aspect
+ *
+ * volume#0 layout
+ * +-------------------------> offset 0 of a device/partition
+ * | volume header (1928 bytes)
+ * | the rest of header junk space (HAMMER_BUFSIZE aligned)
+ * +-------------------------> vol_bot_beg
+ * | boot area (HAMMER_BUFSIZE aligned)
+ * +-------------------------> vol_mem_beg
+ * | memory log (HAMMER_BUFSIZE aligned)
+ * +-------------------------> vol_buf_beg (physical offset of zone-2)
+ * | zone-4 big-block for layer1
+ * +-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE
+ * | zone-4 big-blocks for layer2
+ * | ... (1 big-block per 4TB space)
+ * +-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ * | zone-3 big-blocks for UNDO/REDO FIFO
+ * | ... (max 128 big-blocks)
+ * +-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ * | zone-8 big-block for root B-Tree node/etc
+ * +-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ * | zone-9 big-block for root inode/PFS/etc
+ * +-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ * | zone-X big-blocks
+ * | ... (big-blocks for new zones after newfs_hammer)
+ * | ...
+ * | ...
+ * | ...
+ * | ...
+ * +-------------------------> vol_buf_end (HAMMER_BUFSIZE aligned)
+ * +-------------------------> end of a device/partition
+ *
+ * volume#N layout (0<N<256)
+ * +-------------------------> offset 0 of a device/partition
+ * | volume header (1928 bytes)
+ * | the rest of header junk space (HAMMER_BUFSIZE aligned)
+ * +-------------------------> vol_bot_beg
+ * | boot area (HAMMER_BUFSIZE aligned)
+ * +-------------------------> vol_mem_beg
+ * | memory log (HAMMER_BUFSIZE aligned)
+ * +-------------------------> vol_buf_beg (physical offset of zone-2)
+ * | zone-4 big-blocks for layer2
+ * | ... (1 big-block per 4TB space)
+ * +-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ * | zone-X big-blocks
+ * | ... (unused until volume#(N-1) runs out of space)
+ * | ...
+ * | ...
+ * | ...
+ * | ...
+ * +-------------------------> vol_buf_end (HAMMER_BUFSIZE aligned)
+ * +-------------------------> end of a device/partition
+ */
+
+#define HAMMER_ZONE_RAW_VOLUME 0x1000000000000000ULL
+#define HAMMER_ZONE_RAW_BUFFER 0x2000000000000000ULL
+#define HAMMER_ZONE_UNDO 0x3000000000000000ULL
+#define HAMMER_ZONE_FREEMAP 0x4000000000000000ULL
+#define HAMMER_ZONE_RESERVED05 0x5000000000000000ULL /* not used */
+#define HAMMER_ZONE_RESERVED06 0x6000000000000000ULL /* not used */
+#define HAMMER_ZONE_RESERVED07 0x7000000000000000ULL /* not used */
+#define HAMMER_ZONE_BTREE 0x8000000000000000ULL
+#define HAMMER_ZONE_META 0x9000000000000000ULL
+#define HAMMER_ZONE_LARGE_DATA 0xA000000000000000ULL
+#define HAMMER_ZONE_SMALL_DATA 0xB000000000000000ULL
+#define HAMMER_ZONE_RESERVED0C 0xC000000000000000ULL /* not used */
+#define HAMMER_ZONE_RESERVED0D 0xD000000000000000ULL /* not used */
+#define HAMMER_ZONE_RESERVED0E 0xE000000000000000ULL /* not used */
+#define HAMMER_ZONE_UNAVAIL 0xF000000000000000ULL
+
+#define HAMMER_ZONE_RAW_VOLUME_INDEX 1
+#define HAMMER_ZONE_RAW_BUFFER_INDEX 2
+#define HAMMER_ZONE_UNDO_INDEX 3
+#define HAMMER_ZONE_FREEMAP_INDEX 4
+#define HAMMER_ZONE_BTREE_INDEX 8
+#define HAMMER_ZONE_META_INDEX 9
+#define HAMMER_ZONE_LARGE_DATA_INDEX 10
+#define HAMMER_ZONE_SMALL_DATA_INDEX 11
+#define HAMMER_ZONE_UNAVAIL_INDEX 15
+
+#define HAMMER_MAX_ZONES 16
+
+#define HAMMER_ZONE(offset) ((offset) & HAMMER_OFF_ZONE_MASK)
+
+#define hammer_is_zone_raw_volume(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_RAW_VOLUME)
+#define hammer_is_zone_raw_buffer(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_RAW_BUFFER)
+#define hammer_is_zone_undo(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_UNDO)
+#define hammer_is_zone_freemap(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_FREEMAP)
+#define hammer_is_zone_btree(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_BTREE)
+#define hammer_is_zone_meta(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_META)
+#define hammer_is_zone_large_data(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_LARGE_DATA)
+#define hammer_is_zone_small_data(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_SMALL_DATA)
+#define hammer_is_zone_unavail(offset) \
+ (HAMMER_ZONE(offset) == HAMMER_ZONE_UNAVAIL)
+#define hammer_is_zone_data(offset) \
+ (hammer_is_zone_large_data(offset) || hammer_is_zone_small_data(offset))
+
+#define hammer_is_index_record(zone) \
+ ((zone) >= HAMMER_ZONE_BTREE_INDEX && \
+ (zone) < HAMMER_MAX_ZONES)
+
+#define hammer_is_zone_record(offset) \
+ hammer_is_index_record(HAMMER_ZONE_DECODE(offset))
+
+#define hammer_is_index_direct_xlated(zone) \
+ (((zone) == HAMMER_ZONE_RAW_BUFFER_INDEX) || \
+ ((zone) == HAMMER_ZONE_FREEMAP_INDEX) || \
+ hammer_is_index_record(zone))
+
+#define hammer_is_zone_direct_xlated(offset) \
+ hammer_is_index_direct_xlated(HAMMER_ZONE_DECODE(offset))
+
+#define HAMMER_ZONE_ENCODE(zone, ham_off) \
+ (((hammer_off_t)(zone) << 60) | (ham_off))
+#define HAMMER_ZONE_DECODE(ham_off) \
+ ((int)(((hammer_off_t)(ham_off) >> 60)))
+
+#define HAMMER_VOL_ENCODE(vol_no) \
+ ((hammer_off_t)((vol_no) & 255) << 52)
+#define HAMMER_VOL_DECODE(ham_off) \
+ ((int)(((hammer_off_t)(ham_off) >> 52) & 255))
+
+#define HAMMER_OFF_SHORT_ENCODE(offset) \
+ ((hammer_off_t)(offset) & HAMMER_OFF_SHORT_MASK)
+#define HAMMER_OFF_LONG_ENCODE(offset) \
+ ((hammer_off_t)(offset) & HAMMER_OFF_LONG_MASK)
+
+#define HAMMER_ENCODE(zone, vol_no, offset) \
+ (((hammer_off_t)(zone) << 60) | \
+ HAMMER_VOL_ENCODE(vol_no) | \
+ HAMMER_OFF_SHORT_ENCODE(offset))
+#define HAMMER_ENCODE_RAW_VOLUME(vol_no, offset) \
+ HAMMER_ENCODE(HAMMER_ZONE_RAW_VOLUME_INDEX, vol_no, offset)
+#define HAMMER_ENCODE_RAW_BUFFER(vol_no, offset) \
+ HAMMER_ENCODE(HAMMER_ZONE_RAW_BUFFER_INDEX, vol_no, offset)
+#define HAMMER_ENCODE_UNDO(offset) \
+ HAMMER_ENCODE(HAMMER_ZONE_UNDO_INDEX, HAMMER_ROOT_VOLNO, offset)
+#define HAMMER_ENCODE_FREEMAP(vol_no, offset) \
+ HAMMER_ENCODE(HAMMER_ZONE_FREEMAP_INDEX, vol_no, offset)
+
+/*
+ * Translate a zone address to zone-X address.
+ */
+#define hammer_xlate_to_zoneX(zone, offset) \
+ HAMMER_ZONE_ENCODE((zone), (offset) & ~HAMMER_OFF_ZONE_MASK)
+#define hammer_xlate_to_zone2(offset) \
+ hammer_xlate_to_zoneX(HAMMER_ZONE_RAW_BUFFER_INDEX, (offset))
+
+#define hammer_data_zone(data_len) \
+ (((data_len) >= HAMMER_BUFSIZE) ? \
+ HAMMER_ZONE_LARGE_DATA : \
+ HAMMER_ZONE_SMALL_DATA)
+#define hammer_data_zone_index(data_len) \
+ (((data_len) >= HAMMER_BUFSIZE) ? \
+ HAMMER_ZONE_LARGE_DATA_INDEX : \
+ HAMMER_ZONE_SMALL_DATA_INDEX)
+
+/*
+ * Big-Block backing store
+ *
+ * A blockmap is a two-level map which translates a blockmap-backed zone
+ * offset into a raw zone 2 offset. The layer 1 handles 18 bits and the
+ * layer 2 handles 19 bits. The 8M big-block size is 23 bits so two
+ * layers gives us 18+19+23 = 60 bits of address space.
+ *
+ * When using hinting for a blockmap lookup, the hint is lost when the
+ * scan leaves the HINTBLOCK, which is typically several BIGBLOCK's.
+ * HINTBLOCK is a heuristic.
+ */
+#define HAMMER_HINTBLOCK_SIZE (HAMMER_BIGBLOCK_SIZE * 4)
+#define HAMMER_HINTBLOCK_MASK64 ((uint64_t)HAMMER_HINTBLOCK_SIZE - 1)
+#define HAMMER_BIGBLOCK_SIZE (8192 * 1024)
+#define HAMMER_BIGBLOCK_SIZE64 ((uint64_t)HAMMER_BIGBLOCK_SIZE)
+#define HAMMER_BIGBLOCK_MASK (HAMMER_BIGBLOCK_SIZE - 1)
+#define HAMMER_BIGBLOCK_MASK64 ((uint64_t)HAMMER_BIGBLOCK_SIZE - 1)
+#define HAMMER_BIGBLOCK_BITS 23
+#if 0
+#define HAMMER_BIGBLOCK_OVERFILL (6144 * 1024)
+#endif
+#if (1 << HAMMER_BIGBLOCK_BITS) != HAMMER_BIGBLOCK_SIZE
+#error "HAMMER_BIGBLOCK_BITS BROKEN"
+#endif
+
+#define HAMMER_BUFFERS_PER_BIGBLOCK \
+ (HAMMER_BIGBLOCK_SIZE / HAMMER_BUFSIZE)
+#define HAMMER_BUFFERS_PER_BIGBLOCK_MASK \
+ (HAMMER_BUFFERS_PER_BIGBLOCK - 1)
+#define HAMMER_BUFFERS_PER_BIGBLOCK_MASK64 \
+ ((hammer_off_t)HAMMER_BUFFERS_PER_BIGBLOCK_MASK)
+
+#define HAMMER_BIGBLOCK_DOALIGN(offset) \
+ (((offset) + HAMMER_BIGBLOCK_MASK64) & ~HAMMER_BIGBLOCK_MASK64)
+
+/*
+ * Maximum number of mirrors operating in master mode (multi-master
+ * clustering and mirroring). Note that HAMMER1 does not support
+ * multi-master clustering as of 2015.
+ */
+#define HAMMER_MAX_MASTERS 16
+
+/*
+ * The blockmap is somewhat of a degenerate structure. HAMMER only actually
+ * uses it in its original incarnation to implement the freemap.
+ *
+ * zone:1 raw volume (no blockmap)
+ * zone:2 raw buffer (no blockmap)
+ * zone:3 undomap (direct layer2 array in volume header)
+ * zone:4 freemap (the only real blockmap)
+ * zone:8-15 zone id used to classify big-block only, address is actually
+ * a zone-2 address.
+ */
+typedef struct hammer_blockmap {
+ hammer_off_t phys_offset; /* zone-2 offset only used by zone-4 */
+ hammer_off_t first_offset; /* zone-X offset only used by zone-3 */
+ hammer_off_t next_offset; /* zone-X offset for allocation */
+ hammer_off_t alloc_offset; /* zone-X offset only used by zone-3 */
+ uint32_t reserved01;
+ hammer_crc_t entry_crc;
+} *hammer_blockmap_t;
+
+#define HAMMER_BLOCKMAP_CRCSIZE \
+ offsetof(struct hammer_blockmap, entry_crc)
+
+/*
+ * The blockmap is a 2-layer entity made up of big-blocks. The first layer
+ * contains 262144 32-byte entries (18 bits), the second layer contains
+ * 524288 16-byte entries (19 bits), representing 8MB (23 bit) blockmaps.
+ * 18+19+23 = 60 bits. The top four bits are the zone id.
+ *
+ * Currently only the freemap utilizes both layers in all their glory.
+ * All primary data/meta-data zones actually encode a zone-2 address
+ * requiring no real blockmap translation.
+ *
+ * The freemap uses the upper 8 bits of layer-1 to identify the volume,
+ * thus any space allocated via the freemap can be directly translated
+ * to a zone:2 (or zone:8-15) address.
+ *
+ * zone-X blockmap offset: [zone:4][layer1:18][layer2:19][big-block:23]
+ */
+
+/*
+ * 32 bytes layer1 entry for 8MB big-block.
+ * A big-block can hold 2^23 / 2^5 = 2^18 layer1 entries,
+ * which equals bits assigned for layer1 in zone-2 address.
+ */
+typedef struct hammer_blockmap_layer1 {
+ hammer_off_t blocks_free; /* big-blocks free */
+ hammer_off_t phys_offset; /* UNAVAIL or zone-2 */
+ hammer_off_t reserved01;
+ hammer_crc_t layer2_crc; /* xor'd crc's of HAMMER_BLOCKSIZE */
+ /* (not yet used) */
+ hammer_crc_t layer1_crc; /* MUST BE LAST FIELD OF STRUCTURE*/
+} *hammer_blockmap_layer1_t;
+
+#define HAMMER_LAYER1_CRCSIZE \
+ offsetof(struct hammer_blockmap_layer1, layer1_crc)
+
+/*
+ * 16 bytes layer2 entry for 8MB big-blocks.
+ * A big-block can hold 2^23 / 2^4 = 2^19 layer2 entries,
+ * which equals bits assigned for layer2 in zone-2 address.
+ *
+ * NOTE: bytes_free is signed and can legally go negative if/when data
+ * de-dup occurs. This field will never go higher than
+ * HAMMER_BIGBLOCK_SIZE. If exactly HAMMER_BIGBLOCK_SIZE
+ * the big-block is completely free.
+ */
+typedef struct hammer_blockmap_layer2 {
+ uint8_t zone; /* typed allocation zone */
+ uint8_t reserved01;
+ uint16_t reserved02;
+ uint32_t append_off; /* allocatable space index */
+ int32_t bytes_free; /* bytes free within this big-block */
+ hammer_crc_t entry_crc;
+} *hammer_blockmap_layer2_t;
+
+#define HAMMER_LAYER2_CRCSIZE \
+ offsetof(struct hammer_blockmap_layer2, entry_crc)
+
+#define HAMMER_BLOCKMAP_UNAVAIL ((hammer_off_t)-1LL)
+
+#define HAMMER_BLOCKMAP_RADIX1 /* 2^18 = 262144 */ \
+ ((int)(HAMMER_BIGBLOCK_SIZE / sizeof(struct hammer_blockmap_layer1)))
+#define HAMMER_BLOCKMAP_RADIX2 /* 2^19 = 524288 */ \
+ ((int)(HAMMER_BIGBLOCK_SIZE / sizeof(struct hammer_blockmap_layer2)))
+
+#define HAMMER_BLOCKMAP_LAYER1 /* 2^(18+19+23) = 1EB */ \
+ (HAMMER_BLOCKMAP_RADIX1 * HAMMER_BLOCKMAP_LAYER2)
+#define HAMMER_BLOCKMAP_LAYER2 /* 2^(19+23) = 4TB */ \
+ (HAMMER_BLOCKMAP_RADIX2 * HAMMER_BIGBLOCK_SIZE64)
+
+#define HAMMER_BLOCKMAP_LAYER1_MASK (HAMMER_BLOCKMAP_LAYER1 - 1)
+#define HAMMER_BLOCKMAP_LAYER2_MASK (HAMMER_BLOCKMAP_LAYER2 - 1)
+
+#define HAMMER_BLOCKMAP_LAYER2_DOALIGN(offset) \
+ (((offset) + HAMMER_BLOCKMAP_LAYER2_MASK) & \
+ ~HAMMER_BLOCKMAP_LAYER2_MASK)
+
+/*
+ * Index within layer1 or layer2 big-block for the entry representing
+ * a zone-2 physical offset.
+ */
+#define HAMMER_BLOCKMAP_LAYER1_INDEX(zone2_offset) \
+ ((int)(((zone2_offset) & HAMMER_BLOCKMAP_LAYER1_MASK) / \
+ HAMMER_BLOCKMAP_LAYER2))
+
+#define HAMMER_BLOCKMAP_LAYER2_INDEX(zone2_offset) \
+ ((int)(((zone2_offset) & HAMMER_BLOCKMAP_LAYER2_MASK) / \
+ HAMMER_BIGBLOCK_SIZE64))
+
+/*
+ * Byte offset within layer1 or layer2 big-block for the entry representing
+ * a zone-2 physical offset. Multiply the index by sizeof(blockmap_layer).
+ */
+#define HAMMER_BLOCKMAP_LAYER1_OFFSET(zone2_offset) \
+ (HAMMER_BLOCKMAP_LAYER1_INDEX(zone2_offset) * \
+ sizeof(struct hammer_blockmap_layer1))
+
+#define HAMMER_BLOCKMAP_LAYER2_OFFSET(zone2_offset) \
+ (HAMMER_BLOCKMAP_LAYER2_INDEX(zone2_offset) * \
+ sizeof(struct hammer_blockmap_layer2))
+
+/*
+ * Move on to offset 0 of the next layer1 or layer2.
+ */
+#define HAMMER_ZONE_LAYER1_NEXT_OFFSET(offset) \
+ (((offset) + HAMMER_BLOCKMAP_LAYER2) & ~HAMMER_BLOCKMAP_LAYER2_MASK)
+
+#define HAMMER_ZONE_LAYER2_NEXT_OFFSET(offset) \
+ (((offset) + HAMMER_BIGBLOCK_SIZE) & ~HAMMER_BIGBLOCK_MASK64)
+
+/*
+ * HAMMER UNDO parameters. The UNDO fifo is mapped directly in the volume
+ * header with an array of zone-2 offsets. A maximum of (128x8MB) = 1GB,
+ * and minimum of (64x8MB) = 512MB may be reserved. The size of the undo
+ * fifo is usually set a newfs time.
+ */
+#define HAMMER_MIN_UNDO_BIGBLOCKS 64
+#define HAMMER_MAX_UNDO_BIGBLOCKS 128
+
+/*
+ * All on-disk HAMMER structures which make up elements of the UNDO FIFO
+ * contain a hammer_fifo_head and hammer_fifo_tail structure. This structure
+ * contains all the information required to validate the fifo element
+ * and to scan the fifo in either direction. The head is typically embedded
+ * in higher level hammer on-disk structures while the tail is typically
+ * out-of-band. hdr_size is the size of the whole mess, including the tail.
+ *
+ * All undo structures are guaranteed to not cross a 16K filesystem
+ * buffer boundary. Most undo structures are fairly small. Data spaces
+ * are not immediately reused by HAMMER so file data is not usually recorded
+ * as part of an UNDO.
+ *
+ * PAD elements are allowed to take up only 8 bytes of space as a special
+ * case, containing only hdr_signature, hdr_type, and hdr_size fields,
+ * and with the tail overloaded onto the head structure for 8 bytes total.
+ *
+ * Every undo record has a sequence number. This number is unrelated to
+ * transaction ids and instead collects the undo transactions associated
+ * with a single atomic operation. A larger transactional operation, such
+ * as a remove(), may consist of several smaller atomic operations
+ * representing raw meta-data operations.
+ *
+ * HAMMER VERSION 4 CHANGES
+ *
+ * In HAMMER version 4 the undo structure alignment is reduced from 16384
+ * to 512 bytes in order to ensure that each 512 byte sector begins with
+ * a header. The hdr_seq field in the header is a 32 bit sequence number
+ * which allows the recovery code to detect missing sectors
+ * without relying on the 32-bit crc and to definitively identify the current
+ * undo sequence space without having to rely on information from the volume
+ * header. In addition, new REDO entries in the undo space are used to
+ * record write, write/extend, and transaction id updates.
+ *
+ * The grand result is:
+ *
+ * (1) The volume header no longer needs to be synchronized for most
+ * flush and fsync operations.
+ *
+ * (2) Most fsync operations need only lay down REDO records
+ *
+ * (3) Data overwrite for nohistory operations covered by REDO records
+ * can be supported (instead of rolling a new block allocation),
+ * by rolling UNDO for the prior contents of the data.
+ *
+ * HAMMER VERSION 5 CHANGES
+ *
+ * Hammer version 5 contains a minor adjustment making layer2's bytes_free
+ * field signed, allowing dedup to push it into the negative domain.
+ */
+#define HAMMER_HEAD_ALIGN 8
+#define HAMMER_HEAD_ALIGN_MASK (HAMMER_HEAD_ALIGN - 1)
+#define HAMMER_HEAD_DOALIGN(bytes) \
+ (((bytes) + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK)
+
+#define HAMMER_UNDO_ALIGN 512
+#define HAMMER_UNDO_ALIGN64 ((uint64_t)512)
+#define HAMMER_UNDO_MASK (HAMMER_UNDO_ALIGN - 1)
+#define HAMMER_UNDO_MASK64 (HAMMER_UNDO_ALIGN64 - 1)
+#define HAMMER_UNDO_DOALIGN(offset) \
+ (((offset) + HAMMER_UNDO_MASK) & ~HAMMER_UNDO_MASK64)
+
+typedef struct hammer_fifo_head {
+ uint16_t hdr_signature;
+ uint16_t hdr_type;
+ uint32_t hdr_size; /* Aligned size of the whole mess */
+ uint32_t hdr_seq; /* Sequence number */
+ hammer_crc_t hdr_crc; /* XOR crc up to field w/ crc after field */
+} *hammer_fifo_head_t;
+
+#define HAMMER_FIFO_HEAD_CRCOFF offsetof(struct hammer_fifo_head, hdr_crc)
+
+typedef struct hammer_fifo_tail {
+ uint16_t tail_signature;
+ uint16_t tail_type;
+ uint32_t tail_size; /* aligned size of the whole mess */
+} *hammer_fifo_tail_t;
+
+/*
+ * Fifo header types.
+ *
+ * NOTE: 0x8000U part of HAMMER_HEAD_TYPE_PAD can be removed if the HAMMER
+ * version ever gets bumped again. It exists only to keep compatibility with
+ * older versions.
+ */
+#define HAMMER_HEAD_TYPE_PAD (0x0040U | 0x8000U)
+#define HAMMER_HEAD_TYPE_DUMMY 0x0041U /* dummy entry w/seqno */
+#define HAMMER_HEAD_TYPE_UNDO 0x0043U /* random UNDO information */
+#define HAMMER_HEAD_TYPE_REDO 0x0044U /* data REDO / fast fsync */
+
+#define HAMMER_HEAD_SIGNATURE 0xC84EU
+#define HAMMER_TAIL_SIGNATURE 0xC74FU
+
+/*
+ * Misc FIFO structures.
+ *
+ * UNDO - Raw meta-data media updates.
+ */
+typedef struct hammer_fifo_undo {
+ struct hammer_fifo_head head;
+ hammer_off_t undo_offset; /* zone-1,2 offset */
+ int32_t undo_data_bytes;
+ int32_t undo_reserved01;
+ /* followed by data */
+} *hammer_fifo_undo_t;
+
+/*
+ * REDO (HAMMER version 4+) - Logical file writes/truncates.
+ *
+ * REDOs contain information which will be duplicated in a later meta-data
+ * update, allowing fast write()+fsync() operations. REDOs can be ignored
+ * without harming filesystem integrity but must be processed if fsync()
+ * semantics are desired.
+ *
+ * Unlike UNDOs which are processed backwards within the recovery span,
+ * REDOs must be processed forwards starting further back (starting outside
+ * the recovery span).
+ *
+ * WRITE - Write logical file (with payload). Executed both
+ * out-of-span and in-span. Out-of-span WRITEs may be
+ * filtered out by TERMs.
+ *
+ * TRUNC - Truncate logical file (no payload). Executed both
+ * out-of-span and in-span. Out-of-span WRITEs may be
+ * filtered out by TERMs.
+ *
+ * TERM_* - Indicates meta-data was committed (if out-of-span) or
+ * will be rolled-back (in-span). Any out-of-span TERMs
+ * matching earlier WRITEs remove those WRITEs from
+ * consideration as they might conflict with a later data
+ * commit (which is not being rolled-back).
+ *
+ * SYNC - The earliest in-span SYNC (the last one when scanning
+ * backwards) tells the recovery code how far out-of-span
+ * it must go to run REDOs.
+ *
+ * NOTE: WRITEs do not always have matching TERMs even under
+ * perfect conditions because truncations might remove the
+ * buffers from consideration. I/O problems can also remove
+ * buffers from consideration.
+ *
+ * TRUNCSs do not always have matching TERMs because several
+ * truncations may be aggregated together into a single TERM.
+ */
+typedef struct hammer_fifo_redo {
+ struct hammer_fifo_head head;
+ int64_t redo_objid; /* file being written */
+ hammer_off_t redo_offset; /* logical offset in file */
+ int32_t redo_data_bytes;
+ uint32_t redo_flags;
+ uint32_t redo_localization;
+ uint32_t redo_reserved01;
+ uint64_t redo_reserved02;
+ /* followed by data */
+} *hammer_fifo_redo_t;
+
+#define HAMMER_REDO_WRITE 0x00000001
+#define HAMMER_REDO_TRUNC 0x00000002
+#define HAMMER_REDO_TERM_WRITE 0x00000004
+#define HAMMER_REDO_TERM_TRUNC 0x00000008
+#define HAMMER_REDO_SYNC 0x00000010
+
+typedef union hammer_fifo_any {
+ struct hammer_fifo_head head;
+ struct hammer_fifo_undo undo;
+ struct hammer_fifo_redo redo;
+} *hammer_fifo_any_t;
+
+/*
+ * Volume header types
+ */
+#define HAMMER_FSBUF_VOLUME 0xC8414D4DC5523031ULL /* HAMMER01 */
+#define HAMMER_FSBUF_VOLUME_REV 0x313052C54D4D41C8ULL /* (reverse endian) */
+
+/*
+ * HAMMER Volume header
+ *
+ * A HAMMER filesystem can be built from 1-256 block devices, each block
+ * device contains a volume header followed by however many buffers fit
+ * into the volume.
+ *
+ * One of the volumes making up a HAMMER filesystem is the root volume.
+ * The root volume is always volume #0 which is the first block device path
+ * specified by newfs_hammer(8). All HAMMER volumes have a volume header,
+ * however the root volume may be the only volume that has valid values for
+ * some fields in the header.
+ *
+ * Special field notes:
+ *
+ * vol_bot_beg - offset of boot area (mem_beg - bot_beg bytes)
+ * vol_mem_beg - offset of memory log (buf_beg - mem_beg bytes)
+ * vol_buf_beg - offset of the first buffer in volume
+ * vol_buf_end - offset of volume EOF (on buffer boundary)
+ *
+ * The memory log area allows a kernel to cache new records and data
+ * in memory without allocating space in the actual filesystem to hold
+ * the records and data. In the event that a filesystem becomes full,
+ * any records remaining in memory can be flushed to the memory log
+ * area. This allows the kernel to immediately return success.
+ *
+ * The buffer offset is a physical offset of zone-2 offset. The lower
+ * 52 bits of the zone-2 offset is added to the buffer offset of each
+ * volume to generate an actual I/O offset within the block device.
+ *
+ * NOTE: boot area and memory log are currently not used.
+ */
+
+/*
+ * Filesystem type string
+ */
+#define HAMMER_FSTYPE_STRING "DragonFly HAMMER"
+
+/*
+ * These macros are only used by userspace when userspace commands either
+ * initialize or add a new HAMMER volume.
+ */
+#define HAMMER_BOOT_MINBYTES (32*1024)
+#define HAMMER_BOOT_NOMBYTES (64LL*1024*1024)
+#define HAMMER_BOOT_MAXBYTES (256LL*1024*1024)
+
+#define HAMMER_MEM_MINBYTES (256*1024)
+#define HAMMER_MEM_NOMBYTES (1LL*1024*1024*1024)
+#define HAMMER_MEM_MAXBYTES (64LL*1024*1024*1024)
+
+typedef struct hammer_volume_ondisk {
+ uint64_t vol_signature; /* HAMMER_FSBUF_VOLUME for a valid header */
+
+ /*
+ * These are relative to block device offset, not zone offsets.
+ */
+ int64_t vol_bot_beg; /* offset of boot area */
+ int64_t vol_mem_beg; /* offset of memory log */
+ int64_t vol_buf_beg; /* offset of the first buffer in volume */
+ int64_t vol_buf_end; /* offset of volume EOF (on buffer boundary) */
+ int64_t vol_reserved01;
+
+ hammer_uuid_t vol_fsid; /* identify filesystem */
+ hammer_uuid_t vol_fstype; /* identify filesystem type */
+ char vol_label[64]; /* filesystem label */
+
+ int32_t vol_no; /* volume number within filesystem */
+ int32_t vol_count; /* number of volumes making up filesystem */
+
+ uint32_t vol_version; /* version control information */
+ hammer_crc_t vol_crc; /* header crc */
+ uint32_t vol_flags; /* volume flags */
+ uint32_t vol_rootvol; /* the root volume number (must be 0) */
+
+ uint32_t vol_reserved[8];
+
+ /*
+ * These fields are initialized and space is reserved in every
+ * volume making up a HAMMER filesytem, but only the root volume
+ * contains valid data. Note that vol0_stat_bigblocks does not
+ * include big-blocks for freemap and undomap initially allocated
+ * by newfs_hammer(8).
+ */
+ int64_t vol0_stat_bigblocks; /* total big-blocks when fs is empty */
+ int64_t vol0_stat_freebigblocks;/* number of free big-blocks */
+ int64_t vol0_reserved01;
+ int64_t vol0_stat_inodes; /* for statfs only */
+ int64_t vol0_reserved02;
+ hammer_off_t vol0_btree_root; /* B-Tree root offset in zone-8 */
+ hammer_tid_t vol0_next_tid; /* highest partially synchronized TID */
+ hammer_off_t vol0_reserved03;
+
+ /*
+ * Blockmaps for zones. Not all zones use a blockmap. Note that
+ * the entire root blockmap is cached in the hammer_mount structure.
+ */
+ struct hammer_blockmap vol0_blockmap[HAMMER_MAX_ZONES];
+
+ /*
+ * Array of zone-2 addresses for undo FIFO.
+ */
+ hammer_off_t vol0_undo_array[HAMMER_MAX_UNDO_BIGBLOCKS];
+} *hammer_volume_ondisk_t;
+
+#define HAMMER_ROOT_VOLNO 0
+
+#define HAMMER_VOLF_NEEDFLUSH 0x0004 /* volume needs flush */
+
+#define HAMMER_VOL_CRCSIZE1 \
+ offsetof(struct hammer_volume_ondisk, vol_crc)
+#define HAMMER_VOL_CRCSIZE2 \
+ (sizeof(struct hammer_volume_ondisk) - HAMMER_VOL_CRCSIZE1 - \
+ sizeof(hammer_crc_t))
+
+#define HAMMER_VOL_VERSION_MIN 1 /* minimum supported version */
+#define HAMMER_VOL_VERSION_DEFAULT 7 /* newfs default version */
+#define HAMMER_VOL_VERSION_WIP 8 /* version >= this is WIP */
+#define HAMMER_VOL_VERSION_MAX 7 /* maximum supported version */
+
+#define HAMMER_VOL_VERSION_ONE 1
+#define HAMMER_VOL_VERSION_TWO 2 /* new dirent layout (2.3+) */
+#define HAMMER_VOL_VERSION_THREE 3 /* new snapshot layout (2.5+) */
+#define HAMMER_VOL_VERSION_FOUR 4 /* new undo/flush (2.5+) */
+#define HAMMER_VOL_VERSION_FIVE 5 /* dedup (2.9+) */
+#define HAMMER_VOL_VERSION_SIX 6 /* DIRHASH_ALG1 */
+#define HAMMER_VOL_VERSION_SEVEN 7 /* use the faster iscsi_crc */
+
+/*
+ * Translate a zone-2 address to physical address
+ */
+#define hammer_xlate_to_phys(volume, zone2_offset) \
+ ((volume)->vol_buf_beg + HAMMER_OFF_SHORT_ENCODE(zone2_offset))
+
+/*
+ * Translate a zone-3 address to zone-2 address
+ */
+#define HAMMER_UNDO_INDEX(zone3_offset) \
+ (HAMMER_OFF_SHORT_ENCODE(zone3_offset) / HAMMER_BIGBLOCK_SIZE)
+
+#define hammer_xlate_to_undo(volume, zone3_offset) \
+ ((volume)->vol0_undo_array[HAMMER_UNDO_INDEX(zone3_offset)] + \
+ (zone3_offset & HAMMER_BIGBLOCK_MASK64))
+
+/*
+ * Effective per-volume filesystem capacity including big-blocks for layer1/2
+ */
+#define HAMMER_VOL_BUF_SIZE(volume) \
+ ((volume)->vol_buf_end - (volume)->vol_buf_beg)
+
+/*
+ * Record types are fairly straightforward. The B-Tree includes the record
+ * type in its index sort.
+ */
+#define HAMMER_RECTYPE_UNKNOWN 0x0000
+#define HAMMER_RECTYPE_INODE 0x0001 /* inode in obj_id space */
+#define HAMMER_RECTYPE_DATA 0x0010
+#define HAMMER_RECTYPE_DIRENTRY 0x0011
+#define HAMMER_RECTYPE_DB 0x0012
+#define HAMMER_RECTYPE_EXT 0x0013 /* ext attributes */
+#define HAMMER_RECTYPE_FIX 0x0014 /* fixed attribute */
+#define HAMMER_RECTYPE_PFS 0x0015 /* PFS management */
+#define HAMMER_RECTYPE_SNAPSHOT 0x0016 /* Snapshot management */
+#define HAMMER_RECTYPE_CONFIG 0x0017 /* hammer cleanup config */
+#define HAMMER_RECTYPE_MAX 0xFFFF
+
+#define HAMMER_RECTYPE_ENTRY_START (HAMMER_RECTYPE_INODE + 1)
+#define HAMMER_RECTYPE_CLEAN_START HAMMER_RECTYPE_EXT
+
+#define HAMMER_FIXKEY_SYMLINK 1
+
+#define HAMMER_OBJTYPE_UNKNOWN 0 /* never exists on-disk as unknown */
+#define HAMMER_OBJTYPE_DIRECTORY 1
+#define HAMMER_OBJTYPE_REGFILE 2
+#define HAMMER_OBJTYPE_DBFILE 3
+#define HAMMER_OBJTYPE_FIFO 4
+#define HAMMER_OBJTYPE_CDEV 5
+#define HAMMER_OBJTYPE_BDEV 6
+#define HAMMER_OBJTYPE_SOFTLINK 7
+#define HAMMER_OBJTYPE_PSEUDOFS 8 /* pseudo filesystem obj */
+#define HAMMER_OBJTYPE_SOCKET 9
+
+/*
+ * HAMMER inode attribute data
+ *
+ * The data reference for a HAMMER inode points to this structure. Any
+ * modifications to the contents of this structure will result in a
+ * replacement operation.
+ *
+ * parent_obj_id is only valid for directories (which cannot be hard-linked),
+ * and specifies the parent directory obj_id. This field will also be set
+ * for non-directory inodes as a recovery aid, but can wind up holding
+ * stale information. However, since object id's are not reused, the worse
+ * that happens is that the recovery code is unable to use it.
+ * A parent_obj_id of 0 means it's a root inode of root or non-root PFS.
+ *
+ * NOTE: Future note on directory hardlinks. We can implement a record type
+ * which allows us to point to multiple parent directories.
+ */
+typedef struct hammer_inode_data {
+ uint16_t version; /* inode data version */
+ uint16_t mode; /* basic unix permissions */
+ uint32_t uflags; /* chflags */
+ uint32_t rmajor; /* used by device nodes */
+ uint32_t rminor; /* used by device nodes */
+ uint64_t ctime;
+ int64_t parent_obj_id; /* parent directory obj_id */
+ hammer_uuid_t uid;
+ hammer_uuid_t gid;
+
+ uint8_t obj_type;
+ uint8_t cap_flags; /* capability support flags (extension) */
+ uint16_t reserved01;
+ uint32_t reserved02;
+ uint64_t nlinks; /* hard links */
+ uint64_t size; /* filesystem object size */
+ union {
+ char symlink[24]; /* HAMMER_INODE_BASESYMLEN */
+ } ext;
+ uint64_t mtime; /* mtime must be second-to-last */
+ uint64_t atime; /* atime must be last */
+} *hammer_inode_data_t;
+
+/*
+ * Neither mtime nor atime upates are CRCd by the B-Tree element.
+ * mtime updates have UNDO, atime updates do not.
+ */
+#define HAMMER_INODE_CRCSIZE \
+ offsetof(struct hammer_inode_data, mtime)
+
+#define HAMMER_INODE_DATA_VERSION 1
+#define HAMMER_OBJID_ROOT 1 /* root inodes # */
+#define HAMMER_INODE_BASESYMLEN 24 /* see ext.symlink */
+
+/*
+ * Capability & implementation flags.
+ *
+ * HAMMER_INODE_CAP_DIR_LOCAL_INO - Use inode B-Tree localization
+ * for directory entries. Also see HAMMER_DIR_INODE_LOCALIZATION().
+ */
+#define HAMMER_INODE_CAP_DIRHASH_MASK 0x03 /* directory: hash algorithm */
+#define HAMMER_INODE_CAP_DIRHASH_ALG0 0x00
+#define HAMMER_INODE_CAP_DIRHASH_ALG1 0x01
+#define HAMMER_INODE_CAP_DIRHASH_ALG2 0x02
+#define HAMMER_INODE_CAP_DIRHASH_ALG3 0x03
+#define HAMMER_INODE_CAP_DIR_LOCAL_INO 0x04 /* use inode localization */
+
+#define HAMMER_DATA_DOALIGN(offset) \
+ (((offset) + 15) & ~15)
+#define HAMMER_DATA_DOALIGN_WITH(type, offset) \
+ (((type)(offset) + 15) & (~(type)15))
+
+/*
+ * A HAMMER directory entry associates a HAMMER filesystem object with a
+ * namespace. It is hooked into a pseudo-filesystem (with its own inode
+ * numbering space) in the filesystem by setting the high 16 bits of the
+ * localization field. The low 16 bits must be 0 and are reserved for
+ * future use.
+ *
+ * Directory entries are indexed with a 128 bit namekey rather then an
+ * offset. A portion of the namekey is an iterator/randomizer to deal
+ * with collisions.
+ *
+ * NOTE: leaf.base.obj_type from the related B-Tree leaf entry holds
+ * the filesystem object type of obj_id, e.g. a den_type equivalent.
+ * It is not stored in hammer_direntry_data.
+ *
+ * NOTE: name field / the filename data reference is NOT terminated with \0.
+ */
+typedef struct hammer_direntry_data {
+ int64_t obj_id; /* object being referenced */
+ uint32_t localization; /* identify pseudo-filesystem */
+ uint32_t reserved01;
+ char name[16]; /* name (extended) */
+} *hammer_direntry_data_t;
+
+#define HAMMER_ENTRY_NAME_OFF offsetof(struct hammer_direntry_data, name[0])
+#define HAMMER_ENTRY_SIZE(nlen) offsetof(struct hammer_direntry_data, name[nlen])
+
+/*
+ * Symlink data which does not fit in the inode is stored in a separate
+ * FIX type record.
+ */
+typedef struct hammer_symlink_data {
+ char name[16]; /* name (extended) */
+} *hammer_symlink_data_t;
+
+#define HAMMER_SYMLINK_NAME_OFF offsetof(struct hammer_symlink_data, name[0])
+
+/*
+ * The root inode for the primary filesystem and root inode for any
+ * pseudo-fs may be tagged with an optional data structure using
+ * HAMMER_RECTYPE_PFS and localization id. This structure allows
+ * the node to be used as a mirroring master or slave.
+ *
+ * When operating as a slave CD's into the node automatically become read-only
+ * and as-of sync_end_tid.
+ *
+ * When operating as a master the read PFSD info sets sync_end_tid to
+ * the most recently flushed TID.
+ *
+ * sync_low_tid is not yet used but will represent the highest pruning
+ * end-point, after which full history is available.
+ *
+ * We need to pack this structure making it equally sized on both 32-bit and
+ * 64-bit machines as it is part of struct hammer_ioc_mrecord_pfs which is
+ * send over the wire in hammer mirror operations. Only on 64-bit machines
+ * the size of this struct differ when packed or not. This leads us to the
+ * situation where old 64-bit systems (using the non-packed structure),
+ * which were never able to mirror to/from 32-bit systems, are now no longer
+ * able to mirror to/from newer 64-bit systems (using the packed structure).
+ */
+struct hammer_pseudofs_data {
+ hammer_tid_t sync_low_tid; /* full history beyond this point */
+ hammer_tid_t sync_beg_tid; /* earliest tid w/ full history avail */
+ hammer_tid_t sync_end_tid; /* current synchronizatoin point */
+ uint64_t sync_beg_ts; /* real-time of last completed sync */
+ uint64_t sync_end_ts; /* initiation of current sync cycle */
+ hammer_uuid_t shared_uuid; /* shared uuid (match required) */
+ hammer_uuid_t unique_uuid; /* unique uuid of this master/slave */
+ int32_t reserved01; /* reserved for future master_id */
+ int32_t mirror_flags; /* misc flags */
+ char label[64]; /* filesystem space label */
+ char snapshots[64]; /* softlink dir for pruning */
+ int32_t reserved02; /* was prune_{time,freq} */
+ int32_t reserved03; /* was reblock_{time,freq} */
+ int32_t reserved04; /* was snapshot_freq */
+ int32_t prune_min; /* do not prune recent history */
+ int32_t prune_max; /* do not retain history beyond here */
+ int32_t reserved[16];
+} __packed;
+
+typedef struct hammer_pseudofs_data *hammer_pseudofs_data_t;
+
+#define HAMMER_PFSD_SLAVE 0x00000001
+#define HAMMER_PFSD_DELETED 0x80000000
+
+#define hammer_is_pfs_slave(pfsd) \
+ (((pfsd)->mirror_flags & HAMMER_PFSD_SLAVE) != 0)
+#define hammer_is_pfs_master(pfsd) \
+ (!hammer_is_pfs_slave(pfsd))
+#define hammer_is_pfs_deleted(pfsd) \
+ (((pfsd)->mirror_flags & HAMMER_PFSD_DELETED) != 0)
+
+#define HAMMER_MAX_PFS 65536
+#define HAMMER_MAX_PFSID (HAMMER_MAX_PFS - 1)
+#define HAMMER_ROOT_PFSID 0
+
+/*
+ * Snapshot meta-data { Objid = HAMMER_OBJID_ROOT, Key = tid, rectype = SNAPSHOT }.
+ *
+ * Snapshot records replace the old <fs>/snapshots/<softlink> methodology. Snapshot
+ * records are mirrored but may be independantly managed once they are laid down on
+ * a slave.
+ *
+ * NOTE: The b-tree key is signed, the tid is not, so callers must still sort the
+ * results.
+ *
+ * NOTE: Reserved fields must be zero (as usual)
+ */
+typedef struct hammer_snapshot_data {
+ hammer_tid_t tid; /* the snapshot TID itself (== key) */
+ uint64_t ts; /* real-time when snapshot was made */
+ uint64_t reserved01;
+ uint64_t reserved02;
+ char label[64]; /* user-supplied description */
+ uint64_t reserved03[4];
+} *hammer_snapshot_data_t;
+
+/*
+ * Config meta-data { ObjId = HAMMER_OBJID_ROOT, Key = 0, rectype = CONFIG }.
+ *
+ * Used to store the hammer cleanup config. This data is not mirrored.
+ */
+typedef struct hammer_config_data {
+ char text[1024];
+} *hammer_config_data_t;
+
+/*
+ * Rollup various structures embedded as record data
+ */
+typedef union hammer_data_ondisk {
+ struct hammer_direntry_data entry;
+ struct hammer_inode_data inode;
+ struct hammer_symlink_data symlink;
+ struct hammer_pseudofs_data pfsd;
+ struct hammer_snapshot_data snap;
+ struct hammer_config_data config;
+} *hammer_data_ondisk_t;
+
+/*
+ * Ondisk layout of B-Tree related structures
+ */
+#if 0 /* Not needed for fstype(8) */
+#include "hammer_btree.h"
+#endif
+
+#define HAMMER_DIR_INODE_LOCALIZATION(ino_data) \
+ (((ino_data)->cap_flags & HAMMER_INODE_CAP_DIR_LOCAL_INO) ? \
+ HAMMER_LOCALIZE_INODE : \
+ HAMMER_LOCALIZE_MISC)
+
+#endif /* !VFS_HAMMER_DISK_H_ */