diff options
| author | Warner Losh <imp@FreeBSD.org> | 2017-11-14 23:02:19 +0000 |
|---|---|---|
| committer | Warner Losh <imp@FreeBSD.org> | 2017-11-14 23:02:19 +0000 |
| commit | ca987d4641cdcd7f27e153db17c5bf064934faf5 (patch) | |
| tree | 6c3860e3ba8949be9528d644fbb7fa88d8bbbb79 /sys/boot/common/bcache.c | |
| parent | 6eac7115560381ce5c9e2939ab3fce82bb9b6a95 (diff) | |
Notes
Diffstat (limited to 'sys/boot/common/bcache.c')
| -rw-r--r-- | sys/boot/common/bcache.c | 503 |
1 files changed, 0 insertions, 503 deletions
diff --git a/sys/boot/common/bcache.c b/sys/boot/common/bcache.c deleted file mode 100644 index 198dd5f3b7ae..000000000000 --- a/sys/boot/common/bcache.c +++ /dev/null @@ -1,503 +0,0 @@ -/*- - * Copyright (c) 1998 Michael Smith <msmith@freebsd.org> - * Copyright 2015 Toomas Soome <tsoome@me.com> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -#include <sys/param.h> -__FBSDID("$FreeBSD$"); - -/* - * Simple hashed block cache - */ - -#include <sys/stdint.h> - -#include <stand.h> -#include <string.h> -#include <strings.h> - -#include "bootstrap.h" - -/* #define BCACHE_DEBUG */ - -#ifdef BCACHE_DEBUG -# define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) -#else -# define DEBUG(fmt, args...) -#endif - -struct bcachectl -{ - daddr_t bc_blkno; - int bc_count; -}; - -/* - * bcache per device node. cache is allocated on device first open and freed - * on last close, to save memory. The issue there is the size; biosdisk - * supports up to 31 (0x1f) devices. Classic setup would use single disk - * to boot from, but this has changed with zfs. - */ -struct bcache { - struct bcachectl *bcache_ctl; - caddr_t bcache_data; - size_t bcache_nblks; - size_t ra; -}; - -static u_int bcache_total_nblks; /* set by bcache_init */ -static u_int bcache_blksize; /* set by bcache_init */ -static u_int bcache_numdev; /* set by bcache_add_dev */ -/* statistics */ -static u_int bcache_units; /* number of devices with cache */ -static u_int bcache_unit_nblks; /* nblocks per unit */ -static u_int bcache_hits; -static u_int bcache_misses; -static u_int bcache_ops; -static u_int bcache_bypasses; -static u_int bcache_bcount; -static u_int bcache_rablks; - -#define BHASH(bc, blkno) ((blkno) & ((bc)->bcache_nblks - 1)) -#define BCACHE_LOOKUP(bc, blkno) \ - ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno)) -#define BCACHE_READAHEAD 256 -#define BCACHE_MINREADAHEAD 32 -#define BCACHE_MARKER 0xdeadbeef - -static void bcache_invalidate(struct bcache *bc, daddr_t blkno); -static void bcache_insert(struct bcache *bc, daddr_t blkno); -static void bcache_free_instance(struct bcache *bc); - -/* - * Initialise the cache for (nblks) of (bsize). - */ -void -bcache_init(size_t nblks, size_t bsize) -{ - /* set up control data */ - bcache_total_nblks = nblks; - bcache_blksize = bsize; -} - -/* - * add number of devices to bcache. we have to divide cache space - * between the devices, so bcache_add_dev() can be used to set up the - * number. The issue is, we need to get the number before actual allocations. - * bcache_add_dev() is supposed to be called from device init() call, so the - * assumption is, devsw dv_init is called for plain devices first, and - * for zfs, last. - */ -void -bcache_add_dev(int devices) -{ - bcache_numdev += devices; -} - -void * -bcache_allocate(void) -{ - u_int i; - struct bcache *bc = malloc(sizeof (struct bcache)); - int disks = bcache_numdev; - uint32_t *marker; - - if (disks == 0) - disks = 1; /* safe guard */ - - if (bc == NULL) { - errno = ENOMEM; - return (bc); - } - - /* - * the bcache block count must be power of 2 for hash function - */ - i = fls(disks) - 1; /* highbit - 1 */ - if (disks > (1 << i)) /* next power of 2 */ - i++; - - bc->bcache_nblks = bcache_total_nblks >> i; - bcache_unit_nblks = bc->bcache_nblks; - bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize + - sizeof(uint32_t)); - if (bc->bcache_data == NULL) { - /* dont error out yet. fall back to 32 blocks and try again */ - bc->bcache_nblks = 32; - bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize + - sizeof(uint32_t)); - } - - bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl)); - - if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) { - bcache_free_instance(bc); - errno = ENOMEM; - return (NULL); - } - /* Insert cache end marker. */ - marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize); - *marker = BCACHE_MARKER; - - /* Flush the cache */ - for (i = 0; i < bc->bcache_nblks; i++) { - bc->bcache_ctl[i].bc_count = -1; - bc->bcache_ctl[i].bc_blkno = -1; - } - bcache_units++; - bc->ra = BCACHE_READAHEAD; /* optimistic read ahead */ - return (bc); -} - -void -bcache_free(void *cache) -{ - struct bcache *bc = cache; - - if (bc == NULL) - return; - - bcache_free_instance(bc); - bcache_units--; -} - -/* - * Handle a write request; write directly to the disk, and populate the - * cache with the new values. - */ -static int -write_strategy(void *devdata, int rw, daddr_t blk, size_t size, - char *buf, size_t *rsize) -{ - struct bcache_devdata *dd = (struct bcache_devdata *)devdata; - struct bcache *bc = dd->dv_cache; - daddr_t i, nblk; - - nblk = size / bcache_blksize; - - /* Invalidate the blocks being written */ - for (i = 0; i < nblk; i++) { - bcache_invalidate(bc, blk + i); - } - - /* Write the blocks */ - return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize)); -} - -/* - * Handle a read request; fill in parts of the request that can - * be satisfied by the cache, use the supplied strategy routine to do - * device I/O and then use the I/O results to populate the cache. - */ -static int -read_strategy(void *devdata, int rw, daddr_t blk, size_t size, - char *buf, size_t *rsize) -{ - struct bcache_devdata *dd = (struct bcache_devdata *)devdata; - struct bcache *bc = dd->dv_cache; - size_t i, nblk, p_size, r_size, complete, ra; - int result; - daddr_t p_blk; - caddr_t p_buf; - uint32_t *marker; - - if (bc == NULL) { - errno = ENODEV; - return (-1); - } - - marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize); - - if (rsize != NULL) - *rsize = 0; - - nblk = size / bcache_blksize; - if (nblk == 0 && size != 0) - nblk++; - result = 0; - complete = 1; - - /* Satisfy any cache hits up front, break on first miss */ - for (i = 0; i < nblk; i++) { - if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) { - bcache_misses += (nblk - i); - complete = 0; - if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD) - bc->ra >>= 1; /* reduce read ahead */ - break; - } else { - bcache_hits++; - } - } - - if (complete) { /* whole set was in cache, return it */ - if (bc->ra < BCACHE_READAHEAD) - bc->ra <<= 1; /* increase read ahead */ - bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)), buf, size); - goto done; - } - - /* - * Fill in any misses. From check we have i pointing to first missing - * block, read in all remaining blocks + readahead. - * We have space at least for nblk - i before bcache wraps. - */ - p_blk = blk + i; - p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk)); - r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */ - - p_size = MIN(r_size, nblk - i); /* read at least those blocks */ - - /* - * The read ahead size setup. - * While the read ahead can save us IO, it also can complicate things: - * 1. We do not want to read ahead by wrapping around the - * bcache end - this would complicate the cache management. - * 2. We are using bc->ra as dynamic hint for read ahead size, - * detected cache hits will increase the read-ahead block count, and - * misses will decrease, see the code above. - * 3. The bcache is sized by 512B blocks, however, the underlying device - * may have a larger sector size, and we should perform the IO by - * taking into account these larger sector sizes. We could solve this by - * passing the sector size to bcache_allocate(), or by using ioctl(), but - * in this version we are using the constant, 16 blocks, and are rounding - * read ahead block count down to multiple of 16. - * Using the constant has two reasons, we are not entirely sure if the - * BIOS disk interface is providing the correct value for sector size. - * And secondly, this way we get the most conservative setup for the ra. - * - * The selection of multiple of 16 blocks (8KB) is quite arbitrary, however, - * we want to cover CDs (2K) and 4K disks. - * bcache_allocate() will always fall back to a minimum of 32 blocks. - * Our choice of 16 read ahead blocks will always fit inside the bcache. - */ - - if ((rw & F_NORA) == F_NORA) - ra = 0; - else - ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size); - - if (ra != 0 && ra != bc->bcache_nblks) { /* do we have RA space? */ - ra = MIN(bc->ra, ra - 1); - ra = rounddown(ra, 16); /* multiple of 16 blocks */ - p_size += ra; - } - - /* invalidate bcache */ - for (i = 0; i < p_size; i++) { - bcache_invalidate(bc, p_blk + i); - } - - r_size = 0; - /* - * with read-ahead, it may happen we are attempting to read past - * disk end, as bcache has no information about disk size. - * in such case we should get partial read if some blocks can be - * read or error, if no blocks can be read. - * in either case we should return the data in bcache and only - * return error if there is no data. - */ - rw &= F_MASK; - result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, - p_size * bcache_blksize, p_buf, &r_size); - - r_size /= bcache_blksize; - for (i = 0; i < r_size; i++) - bcache_insert(bc, p_blk + i); - - /* update ra statistics */ - if (r_size != 0) { - if (r_size < p_size) - bcache_rablks += (p_size - r_size); - else - bcache_rablks += ra; - } - - /* check how much data can we copy */ - for (i = 0; i < nblk; i++) { - if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) - break; - } - - if (size > i * bcache_blksize) - size = i * bcache_blksize; - - if (size != 0) { - bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)), buf, size); - result = 0; - } - - if (*marker != BCACHE_MARKER) { - printf("BUG: bcache corruption detected: nblks: %zu p_blk: %lu, " - "p_size: %zu, ra: %zu\n", bc->bcache_nblks, - (long unsigned)BHASH(bc, p_blk), p_size, ra); - } - - done: - if ((result == 0) && (rsize != NULL)) - *rsize = size; - return(result); -} - -/* - * Requests larger than 1/2 cache size will be bypassed and go - * directly to the disk. XXX tune this. - */ -int -bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size, - char *buf, size_t *rsize) -{ - struct bcache_devdata *dd = (struct bcache_devdata *)devdata; - struct bcache *bc = dd->dv_cache; - u_int bcache_nblks = 0; - int nblk, cblk, ret; - size_t csize, isize, total; - - bcache_ops++; - - if (bc != NULL) - bcache_nblks = bc->bcache_nblks; - - /* bypass large requests, or when the cache is inactive */ - if (bc == NULL || - ((size * 2 / bcache_blksize) > bcache_nblks)) { - DEBUG("bypass %zu from %qu", size / bcache_blksize, blk); - bcache_bypasses++; - rw &= F_MASK; - return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize)); - } - - switch (rw & F_MASK) { - case F_READ: - nblk = size / bcache_blksize; - if (size != 0 && nblk == 0) - nblk++; /* read at least one block */ - - ret = 0; - total = 0; - while(size) { - cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */ - cblk = MIN(cblk, nblk); - - if (size <= bcache_blksize) - csize = size; - else - csize = cblk * bcache_blksize; - - ret = read_strategy(devdata, rw, blk, csize, buf+total, &isize); - - /* - * we may have error from read ahead, if we have read some data - * return partial read. - */ - if (ret != 0 || isize == 0) { - if (total != 0) - ret = 0; - break; - } - blk += isize / bcache_blksize; - total += isize; - size -= isize; - nblk = size / bcache_blksize; - } - - if (rsize) - *rsize = total; - - return (ret); - case F_WRITE: - return write_strategy(devdata, F_WRITE, blk, size, buf, rsize); - } - return -1; -} - -/* - * Free allocated bcache instance - */ -static void -bcache_free_instance(struct bcache *bc) -{ - if (bc != NULL) { - if (bc->bcache_ctl) - free(bc->bcache_ctl); - if (bc->bcache_data) - free(bc->bcache_data); - free(bc); - } -} - -/* - * Insert a block into the cache. - */ -static void -bcache_insert(struct bcache *bc, daddr_t blkno) -{ - u_int cand; - - cand = BHASH(bc, blkno); - - DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount); - bc->bcache_ctl[cand].bc_blkno = blkno; - bc->bcache_ctl[cand].bc_count = bcache_bcount++; -} - -/* - * Invalidate a block from the cache. - */ -static void -bcache_invalidate(struct bcache *bc, daddr_t blkno) -{ - u_int i; - - i = BHASH(bc, blkno); - if (bc->bcache_ctl[i].bc_blkno == blkno) { - bc->bcache_ctl[i].bc_count = -1; - bc->bcache_ctl[i].bc_blkno = -1; - DEBUG("invalidate blk %llu", blkno); - } -} - -#ifndef BOOT2 -COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache); - -static int -command_bcache(int argc, char *argv[]) -{ - if (argc != 1) { - command_errmsg = "wrong number of arguments"; - return(CMD_ERROR); - } - - printf("\ncache blocks: %d\n", bcache_total_nblks); - printf("cache blocksz: %d\n", bcache_blksize); - printf("cache readahead: %d\n", bcache_rablks); - printf("unit cache blocks: %d\n", bcache_unit_nblks); - printf("cached units: %d\n", bcache_units); - printf("%d ops %d bypasses %d hits %d misses\n", bcache_ops, - bcache_bypasses, bcache_hits, bcache_misses); - return(CMD_OK); -} -#endif |
