diff options
author | Peter Wemm <peter@FreeBSD.org> | 2018-05-08 03:44:38 +0000 |
---|---|---|
committer | Peter Wemm <peter@FreeBSD.org> | 2018-05-08 03:44:38 +0000 |
commit | 3faf8d6bffc5d0fb2525ba37bb504c53366caf9d (patch) | |
tree | 7e47911263e75034b767fe34b2f8d3d17e91f66d /subversion/libsvn_fs_x/revprops.c | |
parent | a55fb3c0d5eca7d887798125d5b95942b1f01d4b (diff) |
Notes
Diffstat (limited to 'subversion/libsvn_fs_x/revprops.c')
-rw-r--r-- | subversion/libsvn_fs_x/revprops.c | 1647 |
1 files changed, 692 insertions, 955 deletions
diff --git a/subversion/libsvn_fs_x/revprops.c b/subversion/libsvn_fs_x/revprops.c index 5bc62ccc1721c..f98c04478f249 100644 --- a/subversion/libsvn_fs_x/revprops.c +++ b/subversion/libsvn_fs_x/revprops.c @@ -26,12 +26,16 @@ #include "svn_pools.h" #include "svn_hash.h" #include "svn_dirent_uri.h" +#include "svn_sorts.h" #include "fs_x.h" +#include "low_level.h" #include "revprops.h" #include "util.h" #include "transaction.h" +#include "private/svn_packed_data.h" +#include "private/svn_sorts_private.h" #include "private/svn_subr_private.h" #include "private/svn_string_private.h" #include "../libsvn_fs/fs-loader.h" @@ -48,102 +52,6 @@ giving up. */ #define GENERATION_READ_RETRY_COUNT 100 -/* Maximum size of the generation number file contents (including NUL). */ -#define CHECKSUMMED_NUMBER_BUFFER_LEN \ - (SVN_INT64_BUFFER_SIZE + 3 + APR_MD5_DIGESTSIZE * 2) - - -svn_error_t * -svn_fs_x__upgrade_pack_revprops(svn_fs_t *fs, - svn_fs_upgrade_notify_t notify_func, - void *notify_baton, - svn_cancel_func_t cancel_func, - void *cancel_baton, - apr_pool_t *scratch_pool) -{ - svn_fs_x__data_t *ffd = fs->fsap_data; - const char *revprops_shard_path; - const char *revprops_pack_file_dir; - apr_int64_t shard; - apr_int64_t first_unpacked_shard - = ffd->min_unpacked_rev / ffd->max_files_per_dir; - - apr_pool_t *iterpool = svn_pool_create(scratch_pool); - const char *revsprops_dir = svn_dirent_join(fs->path, PATH_REVPROPS_DIR, - scratch_pool); - int compression_level = ffd->compress_packed_revprops - ? SVN_DELTA_COMPRESSION_LEVEL_DEFAULT - : SVN_DELTA_COMPRESSION_LEVEL_NONE; - - /* first, pack all revprops shards to match the packed revision shards */ - for (shard = 0; shard < first_unpacked_shard; ++shard) - { - svn_pool_clear(iterpool); - - revprops_pack_file_dir = svn_dirent_join(revsprops_dir, - apr_psprintf(iterpool, - "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, - shard), - iterpool); - revprops_shard_path = svn_dirent_join(revsprops_dir, - apr_psprintf(iterpool, "%" APR_INT64_T_FMT, shard), - iterpool); - - SVN_ERR(svn_fs_x__pack_revprops_shard(revprops_pack_file_dir, - revprops_shard_path, - shard, ffd->max_files_per_dir, - (int)(0.9 * ffd->revprop_pack_size), - compression_level, - cancel_func, cancel_baton, iterpool)); - if (notify_func) - SVN_ERR(notify_func(notify_baton, shard, - svn_fs_upgrade_pack_revprops, iterpool)); - } - - svn_pool_destroy(iterpool); - - return SVN_NO_ERROR; -} - -svn_error_t * -svn_fs_x__upgrade_cleanup_pack_revprops(svn_fs_t *fs, - svn_fs_upgrade_notify_t notify_func, - void *notify_baton, - svn_cancel_func_t cancel_func, - void *cancel_baton, - apr_pool_t *scratch_pool) -{ - svn_fs_x__data_t *ffd = fs->fsap_data; - const char *revprops_shard_path; - apr_int64_t shard; - apr_int64_t first_unpacked_shard - = ffd->min_unpacked_rev / ffd->max_files_per_dir; - - apr_pool_t *iterpool = svn_pool_create(scratch_pool); - const char *revsprops_dir = svn_dirent_join(fs->path, PATH_REVPROPS_DIR, - scratch_pool); - - /* delete the non-packed revprops shards afterwards */ - for (shard = 0; shard < first_unpacked_shard; ++shard) - { - svn_pool_clear(iterpool); - - revprops_shard_path = svn_dirent_join(revsprops_dir, - apr_psprintf(iterpool, "%" APR_INT64_T_FMT, shard), - iterpool); - SVN_ERR(svn_fs_x__delete_revprops_shard(revprops_shard_path, - shard, ffd->max_files_per_dir, - cancel_func, cancel_baton, - iterpool)); - if (notify_func) - SVN_ERR(notify_func(notify_baton, shard, - svn_fs_upgrade_cleanup_revprops, iterpool)); - } - - svn_pool_destroy(iterpool); - - return SVN_NO_ERROR; -} /* Revprop caching management. * @@ -159,16 +67,7 @@ svn_fs_x__upgrade_cleanup_pack_revprops(svn_fs_t *fs, * as keys with the generation being incremented upon every revprop change. * Since the cache is process-local, the generation needs to be tracked * for at least as long as the process lives but may be reset afterwards. - * - * We track the revprop generation in a persistent, unbuffered file that - * we may keep open for the lifetime of the svn_fs_t. It is the OS' - * responsibility to provide us with the latest contents upon read. To - * detect incomplete updates due to non-atomic reads, we put a MD5 checksum - * next to the actual generation number and verify that it matches. - * - * Since we cannot guarantee that the OS will provide us with up-to-date - * data buffers for open files, we re-open and re-read the file before - * modifying it. This will prevent lost updates. + * We track the revprop generation in a file that. * * A race condition exists between switching to the modified revprop data * and bumping the generation number. In particular, the process may crash @@ -187,110 +86,6 @@ svn_fs_x__upgrade_cleanup_pack_revprops(svn_fs_t *fs, * after the crash, reader caches may be stale. */ -/* If the revprop generation file in FS is open, close it. This is a no-op - * if the file is not open. - */ -static svn_error_t * -close_revprop_generation_file(svn_fs_t *fs, - apr_pool_t *scratch_pool) -{ - svn_fs_x__data_t *ffd = fs->fsap_data; - if (ffd->revprop_generation_file) - { - SVN_ERR(svn_io_file_close(ffd->revprop_generation_file, scratch_pool)); - ffd->revprop_generation_file = NULL; - } - - return SVN_NO_ERROR; -} - -/* Make sure the revprop_generation member in FS is set. If READ_ONLY is - * set, open the file w/o write permission if the file is not open yet. - * The file is kept open if it has sufficient rights (or more) but will be - * closed and re-opened if it provided insufficient access rights. - * - * Call only for repos that support revprop caching. - */ -static svn_error_t * -open_revprop_generation_file(svn_fs_t *fs, - svn_boolean_t read_only, - apr_pool_t *scratch_pool) -{ - svn_fs_x__data_t *ffd = fs->fsap_data; - apr_int32_t flags = read_only ? APR_READ : (APR_READ | APR_WRITE); - - /* Close the current file handle if it has insufficient rights. */ - if ( ffd->revprop_generation_file - && (apr_file_flags_get(ffd->revprop_generation_file) & flags) != flags) - SVN_ERR(close_revprop_generation_file(fs, scratch_pool)); - - /* If not open already, open with sufficient rights. */ - if (ffd->revprop_generation_file == NULL) - { - const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool); - SVN_ERR(svn_io_file_open(&ffd->revprop_generation_file, path, - flags, APR_OS_DEFAULT, fs->pool)); - } - - return SVN_NO_ERROR; -} - -/* Return the textual representation of NUMBER and its checksum in *BUFFER. - */ -static svn_error_t * -checkedsummed_number(svn_stringbuf_t **buffer, - apr_int64_t number, - apr_pool_t *result_pool, - apr_pool_t *scratch_pool) -{ - svn_checksum_t *checksum; - const char *digest; - - char str[SVN_INT64_BUFFER_SIZE]; - apr_size_t len = svn__i64toa(str, number); - str[len] = 0; - - SVN_ERR(svn_checksum(&checksum, svn_checksum_md5, str, len, scratch_pool)); - digest = svn_checksum_to_cstring_display(checksum, scratch_pool); - - *buffer = svn_stringbuf_createf(result_pool, "%s %s\n", digest, str); - - return SVN_NO_ERROR; -} - -/* Extract the generation number from the text BUFFER of LEN bytes and - * verify it against the checksum in the same BUFFER. If they match, return - * the generation in *NUMBER. Otherwise, return an error. - * BUFFER does not need to be NUL-terminated. - */ -static svn_error_t * -verify_extract_number(apr_int64_t *number, - const char *buffer, - apr_size_t len, - apr_pool_t *scratch_pool) -{ - const char *digest_end = strchr(buffer, ' '); - - /* Does the buffer even contain checksum _and_ number? */ - if (digest_end != NULL) - { - svn_checksum_t *expected; - svn_checksum_t *actual; - - SVN_ERR(svn_checksum_parse_hex(&expected, svn_checksum_md5, buffer, - scratch_pool)); - SVN_ERR(svn_checksum(&actual, svn_checksum_md5, digest_end + 1, - (buffer + len) - (digest_end + 1), scratch_pool)); - - if (svn_checksum_match(expected, actual)) - return svn_error_trace(svn_cstring_atoi64(number, digest_end + 1)); - } - - /* Incomplete buffer or not a match. */ - return svn_error_create(SVN_ERR_FS_INVALID_GENERATION, NULL, - _("Invalid generation number data.")); -} - /* Read revprop generation as stored on disk for repository FS. The result is * returned in *CURRENT. Call only for repos that support revprop caching. */ @@ -299,40 +94,32 @@ read_revprop_generation_file(apr_int64_t *current, svn_fs_t *fs, apr_pool_t *scratch_pool) { - svn_fs_x__data_t *ffd = fs->fsap_data; apr_pool_t *iterpool = svn_pool_create(scratch_pool); - char buf[CHECKSUMMED_NUMBER_BUFFER_LEN]; - apr_size_t len; - apr_off_t offset = 0; int i; svn_error_t *err = SVN_NO_ERROR; + const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool); /* Retry in case of incomplete file buffer updates. */ for (i = 0; i < GENERATION_READ_RETRY_COUNT; ++i) { + svn_stringbuf_t *buf; + svn_error_clear(err); svn_pool_clear(iterpool); - /* If we can't even access the data, things are very wrong. - * Don't retry in that case. - */ - SVN_ERR(open_revprop_generation_file(fs, TRUE, iterpool)); - SVN_ERR(svn_io_file_seek(ffd->revprop_generation_file, APR_SET, &offset, - iterpool)); - - len = sizeof(buf); - SVN_ERR(svn_io_read_length_line(ffd->revprop_generation_file, buf, &len, - iterpool)); + /* Read the generation file. */ + err = svn_stringbuf_from_file2(&buf, path, iterpool); - /* Some data has been read. It will most likely be complete and - * consistent. Extract and verify anyway. */ - err = verify_extract_number(current, buf, len, iterpool); + /* If we could read the file, it should be complete due to our atomic + * file replacement scheme. */ if (!err) - break; - - /* Got unlucky and data was invalid. Retry. */ - SVN_ERR(close_revprop_generation_file(fs, iterpool)); + { + svn_stringbuf_strip_whitespace(buf); + SVN_ERR(svn_cstring_atoi64(current, buf->data)); + break; + } + /* Got unlucky the file was not available. Retry. */ #if APR_HAS_THREADS apr_thread_yield(); #else @@ -356,17 +143,21 @@ write_revprop_generation_file(svn_fs_t *fs, { svn_fs_x__data_t *ffd = fs->fsap_data; svn_stringbuf_t *buffer; - apr_off_t offset = 0; + const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool); - SVN_ERR(checkedsummed_number(&buffer, current, scratch_pool, scratch_pool)); + /* Invalidate our cached revprop generation in case the file operations + * below fail. */ + ffd->revprop_generation = -1; - SVN_ERR(open_revprop_generation_file(fs, FALSE, scratch_pool)); - SVN_ERR(svn_io_file_seek(ffd->revprop_generation_file, APR_SET, &offset, - scratch_pool)); - SVN_ERR(svn_io_file_write_full(ffd->revprop_generation_file, buffer->data, - buffer->len, NULL, scratch_pool)); - SVN_ERR(svn_io_file_flush_to_disk(ffd->revprop_generation_file, - scratch_pool)); + /* Write the new number. */ + buffer = svn_stringbuf_createf(scratch_pool, "%" APR_INT64_T_FMT "\n", + current); + SVN_ERR(svn_io_write_atomic2(path, buffer->data, buffer->len, + path /* copy_perms */, FALSE, + scratch_pool)); + + /* Remember it to spare us the re-read. */ + ffd->revprop_generation = current; return SVN_NO_ERROR; } @@ -375,49 +166,12 @@ svn_error_t * svn_fs_x__reset_revprop_generation_file(svn_fs_t *fs, apr_pool_t *scratch_pool) { - const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool); - svn_stringbuf_t *buffer; - - /* Unconditionally close the revprop generation file. - * Don't care about FS formats. This ensures consistent internal state. */ - SVN_ERR(close_revprop_generation_file(fs, scratch_pool)); - - /* Unconditionally remove any old revprop generation file. - * Don't care about FS formats. This ensures consistent on-disk state - * for old format repositories. */ - SVN_ERR(svn_io_remove_file2(path, TRUE, scratch_pool)); - - /* Write the initial revprop generation file contents, if supported by - * the current format. This ensures consistent on-disk state for new - * format repositories. */ - SVN_ERR(checkedsummed_number(&buffer, 0, scratch_pool, scratch_pool)); - SVN_ERR(svn_io_write_atomic(path, buffer->data, buffer->len, NULL, - scratch_pool)); - - /* ffd->revprop_generation_file will be re-opened on demand. */ + /* Write the initial revprop generation file contents. */ + SVN_ERR(write_revprop_generation_file(fs, 0, scratch_pool)); return SVN_NO_ERROR; } -/* Create an error object with the given MESSAGE and pass it to the - WARNING member of FS. Clears UNDERLYING_ERR. */ -static void -log_revprop_cache_init_warning(svn_fs_t *fs, - svn_error_t *underlying_err, - const char *message, - apr_pool_t *scratch_pool) -{ - svn_error_t *err = svn_error_createf( - SVN_ERR_FS_REVPROP_CACHE_INIT_FAILURE, - underlying_err, message, - svn_dirent_local_style(fs->path, scratch_pool)); - - if (fs->warning) - (fs->warning)(fs->warning_baton, err); - - svn_error_clear(err); -} - /* Test whether revprop cache and necessary infrastructure are available in FS. */ static svn_boolean_t @@ -425,29 +179,9 @@ has_revprop_cache(svn_fs_t *fs, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; - svn_error_t *error; - - /* is the cache (still) enabled? */ - if (ffd->revprop_cache == NULL) - return FALSE; - - /* try initialize our file-backed infrastructure */ - error = open_revprop_generation_file(fs, TRUE, scratch_pool); - if (error) - { - /* failure -> disable revprop cache for good */ - - ffd->revprop_cache = NULL; - log_revprop_cache_init_warning(fs, error, - "Revprop caching for '%s' disabled " - "because infrastructure for revprop " - "caching failed to initialize.", - scratch_pool); - return FALSE; - } - - return TRUE; + /* is the cache enabled? */ + return ffd->revprop_cache != NULL; } /* Baton structure for revprop_generation_fixup. */ @@ -475,9 +209,6 @@ revprop_generation_fixup(void *void_baton, svn_fs_x__data_t *ffd = baton->fs->fsap_data; assert(ffd->has_write_lock); - /* Make sure we don't operate on stale OS buffers. */ - SVN_ERR(close_revprop_generation_file(baton->fs, scratch_pool)); - /* Maybe, either the original revprop writer or some other reader has already corrected / bumped the revprop generation. Thus, we need to read it again. However, we will now be the only ones changing @@ -498,12 +229,10 @@ revprop_generation_fixup(void *void_baton, return SVN_NO_ERROR; } -/* Read the current revprop generation and return it in *GENERATION. - Also, detect aborted / crashed writers and recover from that. - Use the access object in FS to set the shared mem values. */ +/* Read the current revprop generation of FS and its value in FS->FSAP_DATA. + Also, detect aborted / crashed writers and recover from that. */ static svn_error_t * -read_revprop_generation(apr_int64_t *generation, - svn_fs_t *fs, +read_revprop_generation(svn_fs_t *fs, apr_pool_t *scratch_pool) { apr_int64_t current = 0; @@ -548,60 +277,84 @@ read_revprop_generation(apr_int64_t *generation, } /* return the value we just got */ - *generation = current; + ffd->revprop_generation = current; return SVN_NO_ERROR; } +void +svn_fs_x__invalidate_revprop_generation(svn_fs_t *fs) +{ + svn_fs_x__data_t *ffd = fs->fsap_data; + ffd->revprop_generation = -1; +} + +/* Return TRUE if the revprop generation value in FS->FSAP_DATA is valid. */ +static svn_boolean_t +is_generation_valid(svn_fs_t *fs) +{ + svn_fs_x__data_t *ffd = fs->fsap_data; + return ffd->revprop_generation >= 0; +} + /* Set the revprop generation in FS to the next odd number to indicate - that there is a revprop write process under way. Return that value - in *GENERATION. If the change times out, readers shall recover from - that state & re-read revprops. + that there is a revprop write process under way. Update the value + in FS->FSAP_DATA accordingly. If the change times out, readers shall + recover from that state & re-read revprops. This is a no-op for repo formats that don't support revprop caching. */ static svn_error_t * -begin_revprop_change(apr_int64_t *generation, - svn_fs_t *fs, +begin_revprop_change(svn_fs_t *fs, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; SVN_ERR_ASSERT(ffd->has_write_lock); - /* Close and re-open to make sure we read the latest data. */ - SVN_ERR(close_revprop_generation_file(fs, scratch_pool)); - SVN_ERR(open_revprop_generation_file(fs, FALSE, scratch_pool)); - /* Set the revprop generation to an odd value to indicate * that a write is in progress. */ - SVN_ERR(read_revprop_generation(generation, fs, scratch_pool)); - ++*generation; - SVN_ERR(write_revprop_generation_file(fs, *generation, scratch_pool)); + SVN_ERR(read_revprop_generation(fs, scratch_pool)); + ++ffd->revprop_generation; + SVN_ERR_ASSERT(ffd->revprop_generation % 2); + SVN_ERR(write_revprop_generation_file(fs, ffd->revprop_generation, + scratch_pool)); return SVN_NO_ERROR; } /* Set the revprop generation in FS to the next even generation after - the odd value in GENERATION to indicate that + the odd value in FS->FSAP_DATA to indicate that a) readers shall re-read revprops, and b) the write process has been completed (no recovery required). This is a no-op for repo formats that don't support revprop caching. */ static svn_error_t * end_revprop_change(svn_fs_t *fs, - apr_int64_t generation, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; SVN_ERR_ASSERT(ffd->has_write_lock); - SVN_ERR_ASSERT(generation % 2); + SVN_ERR_ASSERT(ffd->revprop_generation % 2); /* Set the revprop generation to an even value to indicate * that a write has been completed. Since we held the write * lock, nobody else could have updated the file contents. */ - SVN_ERR(write_revprop_generation_file(fs, generation + 1, scratch_pool)); + SVN_ERR(write_revprop_generation_file(fs, ffd->revprop_generation + 1, + scratch_pool)); return SVN_NO_ERROR; } +/* Represents an entry in the packed revprop manifest. + * There is one such entry per pack file. */ +typedef struct manifest_entry_t +{ + /* First revision in the pack file. */ + svn_revnum_t start_rev; + + /* Tag (a counter) appended to the file name to distinguish it from + outdated ones. */ + apr_uint64_t tag; +} manifest_entry_t; + /* Container for all data required to access the packed revprop file * for a given REVISION. This structure will be filled incrementally * by read_pack_revprops() its sub-routines. @@ -611,9 +364,6 @@ typedef struct packed_revprops_t /* revision number to read (not necessarily the first in the pack) */ svn_revnum_t revision; - /* current revprop generation. Used when populating the revprop cache */ - apr_int64_t generation; - /* the actual revision properties */ apr_hash_t *properties; @@ -622,8 +372,8 @@ typedef struct packed_revprops_t apr_size_t serialized_size; - /* name of the pack file (without folder path) */ - const char *filename; + /* manifest entry describing the pack file */ + manifest_entry_t entry; /* packed shard folder path */ const char *folder; @@ -631,34 +381,19 @@ typedef struct packed_revprops_t /* sum of values in SIZES */ apr_size_t total_size; - /* first revision in the pack (>= MANIFEST_START) */ - svn_revnum_t start_revision; - - /* size of the revprops in PACKED_REVPROPS */ - apr_array_header_t *sizes; - - /* offset of the revprops in PACKED_REVPROPS */ - apr_array_header_t *offsets; - - - /* concatenation of the serialized representation of all revprops - * in the pack, i.e. the pack content without header and compression */ - svn_stringbuf_t *packed_revprops; - - /* First revision covered by MANIFEST. - * Will equal the shard start revision or 1, for the 1st shard. */ - svn_revnum_t manifest_start; + /* Array of svn_string_t, containing the serialized revprops for + * REVISION * I. */ + apr_array_header_t *revprops; /* content of the manifest. - * Maps long(rev - MANIFEST_START) to const char* pack file name */ + * Sorted list of manifest_entry_t. */ apr_array_header_t *manifest; } packed_revprops_t; /* Parse the serialized revprops in CONTENT and return them in *PROPERTIES. * Also, put them into the revprop cache, if activated, for future use. * Three more parameters are being used to update the revprop cache: FS is - * our file system, the revprops belong to REVISION and the global revprop - * GENERATION is used as well. + * our file system, the revprops belong to REVISION. * * The returned hash will be allocated in RESULT_POOL, SCRATCH_POOL is * being used for temporary allocations. @@ -667,23 +402,23 @@ static svn_error_t * parse_revprop(apr_hash_t **properties, svn_fs_t *fs, svn_revnum_t revision, - apr_int64_t generation, - svn_string_t *content, + const svn_string_t *content, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { - svn_stream_t *stream = svn_stream_from_string(content, scratch_pool); - *properties = apr_hash_make(result_pool); + SVN_ERR_W(svn_fs_x__parse_properties(properties, content, result_pool), + apr_psprintf(scratch_pool, "Failed to parse revprops for r%ld.", + revision)); - SVN_ERR(svn_hash_read2(*properties, stream, SVN_HASH_TERMINATOR, - result_pool)); if (has_revprop_cache(fs, scratch_pool)) { svn_fs_x__data_t *ffd = fs->fsap_data; svn_fs_x__pair_cache_key_t key = { 0 }; + SVN_ERR_ASSERT(is_generation_valid(fs)); + key.revision = revision; - key.second = generation; + key.second = ffd->revprop_generation; SVN_ERR(svn_cache__set(ffd->revprop_cache, &key, *properties, scratch_pool)); } @@ -691,9 +426,37 @@ parse_revprop(apr_hash_t **properties, return SVN_NO_ERROR; } +/* Verify the checksum attached to CONTENT and remove it. + * Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +verify_checksum(svn_stringbuf_t *content, + apr_pool_t *scratch_pool) +{ + const apr_byte_t *digest; + svn_checksum_t *actual, *expected; + + /* Verify the checksum. */ + if (content->len < sizeof(apr_uint32_t)) + return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL, + "File too short"); + + content->len -= sizeof(apr_uint32_t); + digest = (apr_byte_t *)content->data + content->len; + + expected = svn_checksum__from_digest_fnv1a_32x4(digest, scratch_pool); + SVN_ERR(svn_checksum(&actual, svn_checksum_fnv1a_32x4, content->data, + content->len, scratch_pool)); + + if (!svn_checksum_match(actual, expected)) + SVN_ERR(svn_checksum_mismatch_err(expected, actual, scratch_pool, + "checksum mismatch")); + + return SVN_NO_ERROR; +} + /* Read the non-packed revprops for revision REV in FS, put them into the - * revprop cache if activated and return them in *PROPERTIES. GENERATION - * is the current revprop generation. + * revprop cache if activated and return them in *PROPERTIES. * * If the data could not be read due to an otherwise recoverable error, * leave *PROPERTIES unchanged. No error will be returned in that case. @@ -704,7 +467,6 @@ static svn_error_t * read_non_packed_revprop(apr_hash_t **properties, svn_fs_t *fs, svn_revnum_t rev, - apr_int64_t generation, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { @@ -726,26 +488,193 @@ read_non_packed_revprop(apr_hash_t **properties, } if (content) - SVN_ERR(parse_revprop(properties, fs, rev, generation, - svn_stringbuf__morph_into_string(content), - result_pool, iterpool)); + { + svn_string_t *as_string; + + /* Consistency check. */ + SVN_ERR_W(verify_checksum(content, scratch_pool), + apr_psprintf(scratch_pool, + "Revprop file for r%ld is corrupt", + rev)); + + /* The contents string becomes part of the *PROPERTIES structure, i.e. + * we must make sure it lives at least as long as the latter. */ + as_string = svn_string_create_from_buf(content, result_pool); + SVN_ERR(parse_revprop(properties, fs, rev, as_string, + result_pool, iterpool)); + } svn_pool_clear(iterpool); return SVN_NO_ERROR; } -/* Return the minimum length of any packed revprop file name in REVPROPS. */ -static apr_size_t -get_min_filename_len(packed_revprops_t *revprops) +/* Serialize ROOT into FILE and append a checksum to it. + * Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +write_packed_data_checksummed(svn_packed__data_root_t *root, + apr_file_t *file, + apr_pool_t *scratch_pool) { - char number_buffer[SVN_INT64_BUFFER_SIZE]; + svn_checksum_t *checksum; + svn_stream_t *stream; - /* The revprop filenames have the format <REV>.<COUNT> - with <REV> being - * at least the first rev in the shard and <COUNT> having at least one - * digit. Thus, the minimum is 2 + #decimal places in the start rev. - */ - return svn__i64toa(number_buffer, revprops->manifest_start) + 2; + stream = svn_stream_from_aprfile2(file, TRUE, scratch_pool); + stream = svn_checksum__wrap_write_stream(&checksum, stream, + svn_checksum_fnv1a_32x4, + scratch_pool); + SVN_ERR(svn_packed__data_write(stream, root, scratch_pool)); + SVN_ERR(svn_stream_close(stream)); + + /* Append the checksum */ + SVN_ERR(svn_io_file_write_full(file, checksum->digest, + svn_checksum_size(checksum), NULL, + scratch_pool)); + + return SVN_NO_ERROR; +} + +/* Serialize the packed revprops MANIFEST into FILE. + * Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +write_manifest(apr_file_t *file, + const apr_array_header_t *manifest, + apr_pool_t *scratch_pool) +{ + int i; + svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool); + + /* one top-level stream per struct element */ + svn_packed__int_stream_t *start_rev_stream + = svn_packed__create_int_stream(root, TRUE, FALSE); + svn_packed__int_stream_t *tag_stream + = svn_packed__create_int_stream(root, FALSE, FALSE); + + /* serialize ENTRIES */ + for (i = 0; i < manifest->nelts; ++i) + { + manifest_entry_t *entry = &APR_ARRAY_IDX(manifest, i, manifest_entry_t); + svn_packed__add_uint(start_rev_stream, entry->start_rev); + svn_packed__add_uint(tag_stream, entry->tag); + } + + /* Write to file and calculate the checksum. */ + SVN_ERR(write_packed_data_checksummed(root, file, scratch_pool)); + + return SVN_NO_ERROR; +} + +/* Read *ROOT from CONTENT and verify its checksum. Allocate *ROOT in + * RESULT_POOL and use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +read_packed_data_checksummed(svn_packed__data_root_t **root, + svn_stringbuf_t *content, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + svn_stream_t *stream; + + SVN_ERR(verify_checksum(content, scratch_pool)); + + stream = svn_stream_from_stringbuf(content, scratch_pool); + SVN_ERR(svn_packed__data_read(root, stream, result_pool, scratch_pool)); + + return SVN_NO_ERROR; +} + +/* Read the packed revprops manifest from the CONTENT buffer and return it + * in *MANIFEST, allocated in RESULT_POOL. REVISION is the revision number + * to put into error messages. Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +read_manifest(apr_array_header_t **manifest, + svn_stringbuf_t *content, + svn_revnum_t revision, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + apr_size_t i; + apr_size_t count; + + svn_packed__data_root_t *root; + svn_packed__int_stream_t *start_rev_stream; + svn_packed__int_stream_t *tag_stream; + + /* Verify the checksum and decode packed data. */ + SVN_ERR_W(read_packed_data_checksummed(&root, content, result_pool, + scratch_pool), + apr_psprintf(scratch_pool, + "Revprop manifest file for r%ld is corrupt", + revision)); + + /* get streams */ + start_rev_stream = svn_packed__first_int_stream(root); + tag_stream = svn_packed__next_int_stream(start_rev_stream); + + /* read ids array */ + count = svn_packed__int_count(start_rev_stream); + *manifest = apr_array_make(result_pool, (int)count, + sizeof(manifest_entry_t)); + + for (i = 0; i < count; ++i) + { + manifest_entry_t *entry = apr_array_push(*manifest); + entry->start_rev = (svn_revnum_t)svn_packed__get_int(start_rev_stream); + entry->tag = svn_packed__get_uint(tag_stream); + } + + return SVN_NO_ERROR; +} + +/* Implements the standard comparison function signature comparing the + * manifest_entry_t(lhs).start_rev to svn_revnum_t(rhs). */ +static int +compare_entry_revision(const void *lhs, + const void *rhs) +{ + const manifest_entry_t *entry = lhs; + const svn_revnum_t *revision = rhs; + + if (entry->start_rev < *revision) + return -1; + + return entry->start_rev == *revision ? 0 : 1; +} + +/* Return the index in MANIFEST that has the info for the pack file + * containing REVISION. */ +static int +get_entry(apr_array_header_t *manifest, + svn_revnum_t revision) +{ + manifest_entry_t *entry; + int idx = svn_sort__bsearch_lower_bound(manifest, &revision, + compare_entry_revision); + + assert(manifest->nelts > 0); + if (idx >= manifest->nelts) + return idx - 1; + + entry = &APR_ARRAY_IDX(manifest, idx, manifest_entry_t); + if (entry->start_rev > revision && idx > 0) + return idx - 1; + + return idx; +} + +/* Return the full path of the revprop pack file given by ENTRY within + * REVPROPS. Allocate the result in RESULT_POOL. */ +static const char * +get_revprop_pack_filepath(packed_revprops_t *revprops, + manifest_entry_t *entry, + apr_pool_t *result_pool) +{ + const char *filename = apr_psprintf(result_pool, "%ld.%" APR_UINT64_T_FMT, + entry->start_rev, entry->tag); + return svn_dirent_join(revprops->folder, filename, result_pool); } /* Given FS and REVPROPS->REVISION, fill the FILENAME, FOLDER and MANIFEST @@ -761,98 +690,59 @@ get_revprop_packname(svn_fs_t *fs, svn_fs_x__data_t *ffd = fs->fsap_data; svn_stringbuf_t *content = NULL; const char *manifest_file_path; - int idx, rev_count; - char *buffer, *buffer_end; - const char **filenames, **filenames_end; - apr_size_t min_filename_len; + int idx; + svn_revnum_t previous_start_rev; + int i; /* Determine the dimensions. Rev 0 is excluded from the first shard. */ - rev_count = ffd->max_files_per_dir; - revprops->manifest_start + int rev_count = ffd->max_files_per_dir; + svn_revnum_t manifest_start = revprops->revision - (revprops->revision % rev_count); - if (revprops->manifest_start == 0) + if (manifest_start == 0) { - ++revprops->manifest_start; + ++manifest_start; --rev_count; } - revprops->manifest = apr_array_make(result_pool, rev_count, - sizeof(const char*)); - - /* No line in the file can be less than this number of chars long. */ - min_filename_len = get_min_filename_len(revprops); - /* Read the content of the manifest file */ - revprops->folder - = svn_fs_x__path_revprops_pack_shard(fs, revprops->revision, result_pool); + revprops->folder = svn_fs_x__path_pack_shard(fs, revprops->revision, + result_pool); manifest_file_path = svn_dirent_join(revprops->folder, PATH_MANIFEST, result_pool); - SVN_ERR(svn_fs_x__read_content(&content, manifest_file_path, result_pool)); + SVN_ERR(read_manifest(&revprops->manifest, content, revprops->revision, + result_pool, scratch_pool)); - /* There CONTENT must have a certain minimal size and there no - * unterminated lines at the end of the file. Both guarantees also - * simplify the parser loop below. - */ - if ( content->len < rev_count * (min_filename_len + 1) - || content->data[content->len - 1] != '\n') - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Packed revprop manifest for r%ld not " - "properly terminated"), revprops->revision); - - /* Chop (parse) the manifest CONTENT into filenames, one per line. - * We only have to replace all newlines with NUL and add all line - * starts to REVPROPS->MANIFEST. - * - * There must be exactly REV_COUNT lines and that is the number of - * lines we parse from BUFFER to FILENAMES. Set the end pointer for - * the source BUFFER such that BUFFER+MIN_FILENAME_LEN is still valid - * BUFFER_END is always valid due to CONTENT->LEN > MIN_FILENAME_LEN. - * - * Please note that this loop is performance critical for e.g. 'svn log'. - * It is run 1000x per revprop access, i.e. per revision and about - * 50 million times per sec (and CPU core). - */ - for (filenames = (const char **)revprops->manifest->elts, - filenames_end = filenames + rev_count, - buffer = content->data, - buffer_end = buffer + content->len - min_filename_len; - (filenames < filenames_end) && (buffer < buffer_end); - ++filenames) + /* Verify the manifest data. */ + if (revprops->manifest->nelts == 0) + return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL, + "Revprop manifest for r%ld is empty", + revprops->revision); + + previous_start_rev = 0; + for (i = 0; i < revprops->manifest->nelts; ++i) { - /* BUFFER always points to the start of the next line / filename. */ - *filenames = buffer; - - /* Find the next EOL. This is guaranteed to stay within the CONTENT - * buffer because we left enough room after BUFFER_END and we know - * we will always see a newline as the last non-NUL char. */ - buffer += min_filename_len; - while (*buffer != '\n') - ++buffer; - - /* Found EOL. Turn it into the filename terminator and move BUFFER - * to the start of the next line or CONTENT buffer end. */ - *buffer = '\0'; - ++buffer; + svn_revnum_t start_rev = APR_ARRAY_IDX(revprops->manifest, i, + manifest_entry_t).start_rev; + if ( start_rev < manifest_start + || start_rev >= manifest_start + rev_count) + return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL, + "Revprop manifest for r%ld contains " + "out-of-range revision r%ld", + revprops->revision, start_rev); + + if (start_rev < previous_start_rev) + return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL, + "Entries in revprop manifest for r%ld " + "are not ordered", revprops->revision); + + previous_start_rev = start_rev; } - /* We must have reached the end of both buffers. */ - if (buffer < content->data + content->len) - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Packed revprop manifest for r%ld " - "has too many entries"), revprops->revision); - - if (filenames < filenames_end) - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Packed revprop manifest for r%ld " - "has too few entries"), revprops->revision); - - /* The target array has now exactly one entry per revision. */ - revprops->manifest->nelts = rev_count; - - /* Now get the file name */ - idx = (int)(revprops->revision - revprops->manifest_start); - revprops->filename = APR_ARRAY_IDX(revprops->manifest, idx, const char*); + /* Now get the pack file description */ + idx = get_entry(revprops->manifest, revprops->revision); + revprops->entry = APR_ARRAY_IDX(revprops->manifest, idx, + manifest_entry_t); return SVN_NO_ERROR; } @@ -868,10 +758,9 @@ same_shard(svn_fs_t *fs, return (r1 / ffd->max_files_per_dir) == (r2 / ffd->max_files_per_dir); } -/* Given FS and the full packed file content in REVPROPS->PACKED_REVPROPS, - * fill the START_REVISION member, and make PACKED_REVPROPS point to the - * first serialized revprop. If READ_ALL is set, initialize the SIZES - * and OFFSETS members as well. +/* Given FS and the full packed file content in CONTENT and make + * PACKED_REVPROPS point to the first serialized revprop. If READ_ALL + * is set, initialize the SIZES and OFFSETS members as well. * * Parse the revprops for REVPROPS->REVISION and set the PROPERTIES as * well as the SERIALIZED_SIZE member. If revprop caching has been @@ -880,33 +769,31 @@ same_shard(svn_fs_t *fs, static svn_error_t * parse_packed_revprops(svn_fs_t *fs, packed_revprops_t *revprops, + svn_stringbuf_t *content, svn_boolean_t read_all, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { - svn_stream_t *stream; - apr_int64_t first_rev, count, i; - apr_off_t offset; - const char *header_end; + apr_size_t count, i; apr_pool_t *iterpool = svn_pool_create(scratch_pool); svn_boolean_t cache_all = has_revprop_cache(fs, scratch_pool); + svn_packed__data_root_t *root; + svn_packed__byte_stream_t *revprops_stream; + svn_revnum_t first_rev = revprops->entry.start_rev; - /* decompress (even if the data is only "stored", there is still a - * length header to remove) */ - svn_stringbuf_t *compressed = revprops->packed_revprops; - svn_stringbuf_t *uncompressed = svn_stringbuf_create_empty(result_pool); - SVN_ERR(svn__decompress(compressed, uncompressed, APR_SIZE_MAX)); + /* Verify the checksum and decode packed data. */ + SVN_ERR_W(read_packed_data_checksummed(&root, content, result_pool, + scratch_pool), + apr_psprintf(scratch_pool, + "Revprop pack file for r%ld is corrupt", + first_rev)); - /* read first revision number and number of revisions in the pack */ - stream = svn_stream_from_stringbuf(uncompressed, scratch_pool); - SVN_ERR(svn_fs_x__read_number_from_stream(&first_rev, NULL, stream, - iterpool)); - SVN_ERR(svn_fs_x__read_number_from_stream(&count, NULL, stream, iterpool)); + /* get streams */ + revprops_stream = svn_packed__first_byte_stream(root); + count = svn_packed__byte_block_count(revprops_stream); /* Check revision range for validity. */ - if ( !same_shard(fs, revprops->revision, first_rev) - || !same_shard(fs, revprops->revision, first_rev + count - 1) - || count < 1) + if (!same_shard(fs, first_rev, first_rev + count - 1) || count < 1) return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, _("Revprop pack for revision r%ld" " contains revprops for r%ld .. r%ld"), @@ -923,58 +810,35 @@ parse_packed_revprops(svn_fs_t *fs, " starts at non-packed revisions r%ld"), revprops->revision, (svn_revnum_t)first_rev); - /* make PACKED_REVPROPS point to the first char after the header. - * This is where the serialized revprops are. */ - header_end = strstr(uncompressed->data, "\n\n"); - if (header_end == NULL) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Header end not found")); - - offset = header_end - uncompressed->data + 2; - - revprops->packed_revprops = svn_stringbuf_create_empty(result_pool); - revprops->packed_revprops->data = uncompressed->data + offset; - revprops->packed_revprops->len = (apr_size_t)(uncompressed->len - offset); - revprops->packed_revprops->blocksize = (apr_size_t)(uncompressed->blocksize - offset); - - /* STREAM still points to the first entry in the sizes list. */ - revprops->start_revision = (svn_revnum_t)first_rev; - if (read_all) + /* Request all data (just references to data already expanded in ROOT) */ + revprops->revprops = apr_array_make(result_pool, (int)count, + sizeof(svn_string_t)); + for (i = 0, revprops->total_size = 0; i < count; ++i) { - /* Init / construct REVPROPS members. */ - revprops->sizes = apr_array_make(result_pool, (int)count, - sizeof(offset)); - revprops->offsets = apr_array_make(result_pool, (int)count, - sizeof(offset)); + svn_string_t *props = apr_array_push(revprops->revprops); + props->data = svn_packed__get_bytes(revprops_stream, &props->len); + + revprops->total_size += props->len; } - /* Now parse, revision by revision, the size and content of each - * revisions' revprops. */ - for (i = 0, offset = 0, revprops->total_size = 0; i < count; ++i) + /* Now parse the serialized revprops. */ + for (i = 0; i < count; ++i) { - apr_int64_t size; - svn_string_t serialized; - svn_revnum_t revision = (svn_revnum_t)(first_rev + i); + const svn_string_t *serialized; + svn_revnum_t revision; + svn_pool_clear(iterpool); - /* read & check the serialized size */ - SVN_ERR(svn_fs_x__read_number_from_stream(&size, NULL, stream, - iterpool)); - if (size + offset > (apr_int64_t)revprops->packed_revprops->len) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Packed revprop size exceeds pack file size")); + serialized = &APR_ARRAY_IDX(revprops->revprops, (int)i, svn_string_t); + revision = first_rev + (long)i; /* Parse this revprops list, if necessary */ - serialized.data = revprops->packed_revprops->data + offset; - serialized.len = (apr_size_t)size; - if (revision == revprops->revision) { /* Parse (and possibly cache) the one revprop list we care about. */ SVN_ERR(parse_revprop(&revprops->properties, fs, revision, - revprops->generation, &serialized, - result_pool, iterpool)); - revprops->serialized_size = serialized.len; + serialized, result_pool, iterpool)); + revprops->serialized_size = serialized->len; /* If we only wanted the revprops for REVISION then we are done. */ if (!read_all && !cache_all) @@ -984,29 +848,20 @@ parse_packed_revprops(svn_fs_t *fs, { /* Parse and cache all other revprop lists. */ apr_hash_t *properties; - SVN_ERR(parse_revprop(&properties, fs, revision, - revprops->generation, &serialized, + SVN_ERR(parse_revprop(&properties, fs, revision, serialized, iterpool, iterpool)); } - - if (read_all) - { - /* fill REVPROPS data structures */ - APR_ARRAY_PUSH(revprops->sizes, apr_off_t) = serialized.len; - APR_ARRAY_PUSH(revprops->offsets, apr_off_t) = offset; - } - revprops->total_size += serialized.len; - - offset += serialized.len; } + svn_pool_destroy(iterpool); + return SVN_NO_ERROR; } /* In filesystem FS, read the packed revprops for revision REV into - * *REVPROPS. Use GENERATION to populate the revprop cache, if enabled. - * If you want to modify revprop contents / update REVPROPS, READ_ALL - * must be set. Otherwise, only the properties of REV are being provided. + * *REVPROPS. Populate the revprop cache, if enabled. If you want to + * modify revprop contents / update REVPROPS, READ_ALL must be set. + * Otherwise, only the properties of REV are being provided. * * Allocate *PROPERTIES in RESULT_POOL and temporaries in SCRATCH_POOL. */ @@ -1014,14 +869,12 @@ static svn_error_t * read_pack_revprop(packed_revprops_t **revprops, svn_fs_t *fs, svn_revnum_t rev, - apr_int64_t generation, svn_boolean_t read_all, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { apr_pool_t *iterpool = svn_pool_create(scratch_pool); svn_boolean_t missing = FALSE; - svn_error_t *err; packed_revprops_t *result; int i; @@ -1036,29 +889,37 @@ read_pack_revprop(packed_revprops_t **revprops, /* initialize the result data structure */ result = apr_pcalloc(result_pool, sizeof(*result)); result->revision = rev; - result->generation = generation; /* try to read the packed revprops. This may require retries if we have * concurrent writers. */ - for (i = 0; - i < SVN_FS_X__RECOVERABLE_RETRY_COUNT && !result->packed_revprops; - ++i) + for (i = 0; i < SVN_FS_X__RECOVERABLE_RETRY_COUNT; ++i) { const char *file_path; + svn_stringbuf_t *contents = NULL; + svn_pool_clear(iterpool); /* there might have been concurrent writes. * Re-read the manifest and the pack file. */ SVN_ERR(get_revprop_packname(fs, result, result_pool, iterpool)); - file_path = svn_dirent_join(result->folder, - result->filename, - iterpool); - SVN_ERR(svn_fs_x__try_stringbuf_from_file(&result->packed_revprops, + file_path = get_revprop_pack_filepath(result, &result->entry, + iterpool); + SVN_ERR(svn_fs_x__try_stringbuf_from_file(&contents, &missing, file_path, i + 1 < SVN_FS_X__RECOVERABLE_RETRY_COUNT, - result_pool)); + iterpool)); + + if (contents) + { + SVN_ERR_W(parse_packed_revprops(fs, result, contents, read_all, + result_pool, iterpool), + apr_psprintf(iterpool, + "Revprop pack file for r%ld is corrupt", + rev)); + break; + } /* If we could not find the file, there was a write. * So, we should refresh our revprop generation info as well such @@ -1066,40 +927,29 @@ read_pack_revprop(packed_revprops_t **revprops, * consider it outdated, otherwise. */ if (missing && has_revprop_cache(fs, iterpool)) - SVN_ERR(read_revprop_generation(&result->generation, fs, iterpool)); + SVN_ERR(read_revprop_generation(fs, iterpool)); } /* the file content should be available now */ - if (!result->packed_revprops) + if (!result->revprops) return svn_error_createf(SVN_ERR_FS_PACKED_REVPROP_READ_FAILURE, NULL, _("Failed to read revprop pack file for r%ld"), rev); - /* parse it. RESULT will be complete afterwards. */ - err = parse_packed_revprops(fs, result, read_all, result_pool, iterpool); - svn_pool_destroy(iterpool); - if (err) - return svn_error_createf(SVN_ERR_FS_CORRUPT, err, - _("Revprop pack file for r%ld is corrupt"), rev); - *revprops = result; return SVN_NO_ERROR; } -/* Read the revprops for revision REV in FS and return them in *PROPERTIES_P. - * - * Allocations will be done in POOL. - */ svn_error_t * svn_fs_x__get_revision_proplist(apr_hash_t **proplist_p, svn_fs_t *fs, svn_revnum_t rev, svn_boolean_t bypass_cache, + svn_boolean_t refresh, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; - apr_int64_t generation = 0; /* not found, yet */ *proplist_p = NULL; @@ -1107,16 +957,18 @@ svn_fs_x__get_revision_proplist(apr_hash_t **proplist_p, /* should they be available at all? */ SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool)); + /* Ensure that the revprop generation info is valid. */ + if (refresh || !is_generation_valid(fs)) + SVN_ERR(read_revprop_generation(fs, scratch_pool)); + /* Try cache lookup first. */ if (!bypass_cache && has_revprop_cache(fs, scratch_pool)) { svn_boolean_t is_cached; svn_fs_x__pair_cache_key_t key = { 0 }; - SVN_ERR(read_revprop_generation(&generation, fs, scratch_pool)); - key.revision = rev; - key.second = generation; + key.second = ffd->revprop_generation; SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached, ffd->revprop_cache, &key, result_pool)); if (is_cached) @@ -1129,8 +981,7 @@ svn_fs_x__get_revision_proplist(apr_hash_t **proplist_p, if (!svn_fs_x__is_packed_revprop(fs, rev)) { svn_error_t *err = read_non_packed_revprop(proplist_p, fs, rev, - generation, result_pool, - scratch_pool); + result_pool, scratch_pool); if (err) { if (!APR_STATUS_IS_ENOENT(err->apr_err)) @@ -1147,7 +998,7 @@ svn_fs_x__get_revision_proplist(apr_hash_t **proplist_p, if (!*proplist_p) { packed_revprops_t *revprops; - SVN_ERR(read_pack_revprop(&revprops, fs, rev, generation, FALSE, + SVN_ERR(read_pack_revprop(&revprops, fs, rev, FALSE, result_pool, scratch_pool)); *proplist_p = revprops->properties; } @@ -1161,10 +1012,33 @@ svn_fs_x__get_revision_proplist(apr_hash_t **proplist_p, return SVN_NO_ERROR; } +svn_error_t * +svn_fs_x__write_non_packed_revprops(apr_file_t *file, + apr_hash_t *proplist, + apr_pool_t *scratch_pool) +{ + svn_stream_t *stream; + svn_checksum_t *checksum; + + stream = svn_stream_from_aprfile2(file, TRUE, scratch_pool); + stream = svn_checksum__wrap_write_stream(&checksum, stream, + svn_checksum_fnv1a_32x4, + scratch_pool); + SVN_ERR(svn_fs_x__write_properties(stream, proplist, scratch_pool)); + SVN_ERR(svn_stream_close(stream)); + + /* Append the checksum */ + SVN_ERR(svn_io_file_write_full(file, checksum->digest, + svn_checksum_size(checksum), NULL, + scratch_pool)); + + return SVN_NO_ERROR; +} + /* Serialize the revision property list PROPLIST of revision REV in * filesystem FS to a non-packed file. Return the name of that temporary * file in *TMP_PATH and the file path that it must be moved to in - * *FINAL_PATH. + * *FINAL_PATH. Schedule necessary fsync calls in BATCH. * * Allocate *FINAL_PATH and *TMP_PATH in RESULT_POOL. Use SCRATCH_POOL * for temporary allocations. @@ -1175,29 +1049,25 @@ write_non_packed_revprop(const char **final_path, svn_fs_t *fs, svn_revnum_t rev, apr_hash_t *proplist, + svn_fs_x__batch_fsync_t *batch, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { - svn_stream_t *stream; + apr_file_t *file; *final_path = svn_fs_x__path_revprops(fs, rev, result_pool); - /* ### do we have a directory sitting around already? we really shouldn't - ### have to get the dirname here. */ - SVN_ERR(svn_stream_open_unique(&stream, tmp_path, - svn_dirent_dirname(*final_path, - scratch_pool), - svn_io_file_del_none, - result_pool, scratch_pool)); - SVN_ERR(svn_hash_write2(proplist, stream, SVN_HASH_TERMINATOR, - scratch_pool)); - SVN_ERR(svn_stream_close(stream)); + *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL); + SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path, + scratch_pool)); + + SVN_ERR(svn_fs_x__write_non_packed_revprops(file, proplist, scratch_pool)); return SVN_NO_ERROR; } /* After writing the new revprop file(s), call this function to move the * file at TMP_PATH to FINAL_PATH and give it the permissions from - * PERMS_REFERENCE. + * PERMS_REFERENCE. Schedule necessary fsync calls in BATCH. * * If indicated in BUMP_GENERATION, increase FS' revprop generation. * Finally, delete all the temporary files given in FILES_TO_DELETE. @@ -1212,21 +1082,26 @@ switch_to_new_revprop(svn_fs_t *fs, const char *perms_reference, apr_array_header_t *files_to_delete, svn_boolean_t bump_generation, + svn_fs_x__batch_fsync_t *batch, apr_pool_t *scratch_pool) { - apr_int64_t generation; - /* Now, we may actually be replacing revprops. Make sure that all other threads and processes will know about this. */ if (bump_generation) - SVN_ERR(begin_revprop_change(&generation, fs, scratch_pool)); + SVN_ERR(begin_revprop_change(fs, scratch_pool)); + + /* Ensure the new file contents makes it to disk before switching over to + * it. */ + SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool)); + /* Make the revision visible to all processes and threads. */ SVN_ERR(svn_fs_x__move_into_place(tmp_path, final_path, perms_reference, - scratch_pool)); + batch, scratch_pool)); + SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool)); /* Indicate that the update (if relevant) has been completed. */ if (bump_generation) - SVN_ERR(end_revprop_change(fs, generation, scratch_pool)); + SVN_ERR(end_revprop_change(fs, scratch_pool)); /* Clean up temporary files, if necessary. */ if (files_to_delete) @@ -1247,52 +1122,8 @@ switch_to_new_revprop(svn_fs_t *fs, return SVN_NO_ERROR; } -/* Write a pack file header to STREAM that starts at revision START_REVISION - * and contains the indexes [START,END) of SIZES. - */ -static svn_error_t * -serialize_revprops_header(svn_stream_t *stream, - svn_revnum_t start_revision, - apr_array_header_t *sizes, - int start, - int end, - apr_pool_t *scratch_pool) -{ - apr_pool_t *iterpool = svn_pool_create(scratch_pool); - int i; - - SVN_ERR_ASSERT(start < end); - - /* start revision and entry count */ - SVN_ERR(svn_stream_printf(stream, scratch_pool, "%ld\n", start_revision)); - SVN_ERR(svn_stream_printf(stream, scratch_pool, "%d\n", end - start)); - - /* the sizes array */ - for (i = start; i < end; ++i) - { - /* Non-standard pool usage. - * - * We only allocate a few bytes each iteration -- even with a - * million iterations we would still be in good shape memory-wise. - */ - apr_off_t size = APR_ARRAY_IDX(sizes, i, apr_off_t); - SVN_ERR(svn_stream_printf(stream, iterpool, "%" APR_OFF_T_FMT "\n", - size)); - } - - /* the double newline char indicates the end of the header */ - SVN_ERR(svn_stream_printf(stream, iterpool, "\n")); - - svn_pool_destroy(iterpool); - return SVN_NO_ERROR; -} - -/* Writes the a pack file to FILE_STREAM. It copies the serialized data - * from REVPROPS for the indexes [START,END) except for index CHANGED_INDEX. - * - * The data for the latter is taken from NEW_SERIALIZED. Note, that - * CHANGED_INDEX may be outside the [START,END) range, i.e. no new data is - * taken in that case but only a subset of the old data will be copied. +/* Writes the a pack file to FILE. It copies the serialized data + * from REVPROPS for the indexes [START,END). * * NEW_TOTAL_SIZE is a hint for pre-allocating buffers of appropriate size. * SCRATCH_POOL is used for temporary allocations. @@ -1302,137 +1133,99 @@ repack_revprops(svn_fs_t *fs, packed_revprops_t *revprops, int start, int end, - int changed_index, - svn_stringbuf_t *new_serialized, - apr_off_t new_total_size, - svn_stream_t *file_stream, + apr_size_t new_total_size, + apr_file_t *file, apr_pool_t *scratch_pool) { - svn_fs_x__data_t *ffd = fs->fsap_data; - svn_stream_t *stream; int i; - /* create data empty buffers and the stream object */ - svn_stringbuf_t *uncompressed - = svn_stringbuf_create_ensure((apr_size_t)new_total_size, scratch_pool); - svn_stringbuf_t *compressed - = svn_stringbuf_create_empty(scratch_pool); - stream = svn_stream_from_stringbuf(uncompressed, scratch_pool); - - /* write the header*/ - SVN_ERR(serialize_revprops_header(stream, revprops->start_revision + start, - revprops->sizes, start, end, - scratch_pool)); + svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool); + svn_packed__byte_stream_t *revprops_stream + = svn_packed__create_bytes_stream(root); /* append the serialized revprops */ for (i = start; i < end; ++i) - if (i == changed_index) - { - SVN_ERR(svn_stream_write(stream, - new_serialized->data, - &new_serialized->len)); - } - else - { - apr_size_t size - = (apr_size_t)APR_ARRAY_IDX(revprops->sizes, i, apr_off_t); - apr_size_t offset - = (apr_size_t)APR_ARRAY_IDX(revprops->offsets, i, apr_off_t); - - SVN_ERR(svn_stream_write(stream, - revprops->packed_revprops->data + offset, - &size)); - } - - /* flush the stream buffer (if any) to our underlying data buffer */ - SVN_ERR(svn_stream_close(stream)); + { + const svn_string_t *props + = &APR_ARRAY_IDX(revprops->revprops, i, svn_string_t); - /* compress / store the data */ - SVN_ERR(svn__compress(uncompressed, - compressed, - ffd->compress_packed_revprops - ? SVN_DELTA_COMPRESSION_LEVEL_DEFAULT - : SVN_DELTA_COMPRESSION_LEVEL_NONE)); + svn_packed__add_bytes(revprops_stream, props->data, props->len); + } - /* finally, write the content to the target stream and close it */ - SVN_ERR(svn_stream_write(file_stream, compressed->data, &compressed->len)); - SVN_ERR(svn_stream_close(file_stream)); + /* Write to file. */ + SVN_ERR(write_packed_data_checksummed(root, file, scratch_pool)); return SVN_NO_ERROR; } -/* Allocate a new pack file name for revisions - * [REVPROPS->START_REVISION + START, REVPROPS->START_REVISION + END - 1] - * of REVPROPS->MANIFEST. Add the name of old file to FILES_TO_DELETE, - * auto-create that array if necessary. Return an open file stream to - * the new file in *STREAM allocated in RESULT_POOL. Allocate the paths - * in *FILES_TO_DELETE from the same pool that contains the array itself. +/* Allocate a new pack file name for revisions starting at START_REV in + * REVPROPS->MANIFEST. Add the name of old file to FILES_TO_DELETE, + * auto-create that array if necessary. Return an open file *FILE that is + * allocated in RESULT_POOL. Allocate the paths in *FILES_TO_DELETE from + * the same pool that contains the array itself. Schedule necessary fsync + * calls in BATCH. * * Use SCRATCH_POOL for temporary allocations. */ static svn_error_t * -repack_stream_open(svn_stream_t **stream, - svn_fs_t *fs, - packed_revprops_t *revprops, - int start, - int end, - apr_array_header_t **files_to_delete, - apr_pool_t *result_pool, - apr_pool_t *scratch_pool) +repack_file_open(apr_file_t **file, + svn_fs_t *fs, + packed_revprops_t *revprops, + svn_revnum_t start_rev, + apr_array_header_t **files_to_delete, + svn_fs_x__batch_fsync_t *batch, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) { - apr_int64_t tag; - const char *tag_string; - svn_string_t *new_filename; - int i; - apr_file_t *file; - int manifest_offset - = (int)(revprops->start_revision - revprops->manifest_start); + manifest_entry_t new_entry; + const char *new_path; + int idx; - /* get the old (= current) file name and enlist it for later deletion */ - const char *old_filename = APR_ARRAY_IDX(revprops->manifest, - start + manifest_offset, - const char*); + /* We always replace whole pack files - possibly by more than one new file. + * When we create the file for the first part of the pack, enlist the old + * one for later deletion */ + SVN_ERR_ASSERT(start_rev >= revprops->entry.start_rev); if (*files_to_delete == NULL) *files_to_delete = apr_array_make(result_pool, 3, sizeof(const char*)); - APR_ARRAY_PUSH(*files_to_delete, const char*) - = svn_dirent_join(revprops->folder, old_filename, - (*files_to_delete)->pool); - - /* increase the tag part, i.e. the counter after the dot */ - tag_string = strchr(old_filename, '.'); - if (tag_string == NULL) - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Packed file '%s' misses a tag"), - old_filename); + if (revprops->entry.start_rev == start_rev) + APR_ARRAY_PUSH(*files_to_delete, const char*) + = get_revprop_pack_filepath(revprops, &revprops->entry, + (*files_to_delete)->pool); - SVN_ERR(svn_cstring_atoi64(&tag, tag_string + 1)); - new_filename = svn_string_createf((*files_to_delete)->pool, - "%ld.%" APR_INT64_T_FMT, - revprops->start_revision + start, - ++tag); + /* Initialize the new manifest entry. Bump the tag part. */ + new_entry.start_rev = start_rev; + new_entry.tag = revprops->entry.tag + 1; /* update the manifest to point to the new file */ - for (i = start; i < end; ++i) - APR_ARRAY_IDX(revprops->manifest, i + manifest_offset, const char*) - = new_filename->data; + idx = get_entry(revprops->manifest, start_rev); + if (revprops->entry.start_rev == start_rev) + APR_ARRAY_IDX(revprops->manifest, idx, manifest_entry_t) = new_entry; + else + svn_sort__array_insert(revprops->manifest, &new_path, idx + 1); - /* create a file stream for the new file */ - SVN_ERR(svn_io_file_open(&file, svn_dirent_join(revprops->folder, - new_filename->data, - scratch_pool), - APR_WRITE | APR_CREATE, APR_OS_DEFAULT, - result_pool)); - *stream = svn_stream_from_aprfile2(file, FALSE, result_pool); + /* open the file */ + new_path = get_revprop_pack_filepath(revprops, &new_entry, scratch_pool); + SVN_ERR(svn_fs_x__batch_fsync_open_file(file, batch, new_path, + scratch_pool)); return SVN_NO_ERROR; } +/* Return the length of the serialized reprop list of index I in REVPROPS. */ +static apr_size_t +props_len(packed_revprops_t *revprops, + int i) +{ + return APR_ARRAY_IDX(revprops->revprops, i, svn_string_t).len; +} + /* For revision REV in filesystem FS, set the revision properties to * PROPLIST. Return a new file in *TMP_PATH that the caller shall move * to *FINAL_PATH to make the change visible. Files to be deleted will * be listed in *FILES_TO_DELETE which may remain unchanged / unallocated. + * Schedule necessary fsync calls in BATCH. * * Allocate output values in RESULT_POOL and temporaries from SCRATCH_POOL. */ @@ -1443,88 +1236,87 @@ write_packed_revprop(const char **final_path, svn_fs_t *fs, svn_revnum_t rev, apr_hash_t *proplist, + svn_fs_x__batch_fsync_t *batch, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; packed_revprops_t *revprops; - apr_int64_t generation = 0; svn_stream_t *stream; + apr_file_t *file; svn_stringbuf_t *serialized; - apr_off_t new_total_size; + apr_size_t new_total_size; int changed_index; + int count; /* read the current revprop generation. This value will not change * while we hold the global write lock to this FS. */ if (has_revprop_cache(fs, scratch_pool)) - SVN_ERR(read_revprop_generation(&generation, fs, scratch_pool)); + SVN_ERR(read_revprop_generation(fs, scratch_pool)); /* read contents of the current pack file */ - SVN_ERR(read_pack_revprop(&revprops, fs, rev, generation, TRUE, + SVN_ERR(read_pack_revprop(&revprops, fs, rev, TRUE, scratch_pool, scratch_pool)); /* serialize the new revprops */ serialized = svn_stringbuf_create_empty(scratch_pool); stream = svn_stream_from_stringbuf(serialized, scratch_pool); - SVN_ERR(svn_hash_write2(proplist, stream, SVN_HASH_TERMINATOR, - scratch_pool)); + SVN_ERR(svn_fs_x__write_properties(stream, proplist, scratch_pool)); SVN_ERR(svn_stream_close(stream)); - /* calculate the size of the new data */ - changed_index = (int)(rev - revprops->start_revision); + /* estimate the size of the new data */ + count = revprops->revprops->nelts; + changed_index = (int)(rev - revprops->entry.start_rev); new_total_size = revprops->total_size - revprops->serialized_size + serialized->len - + (revprops->offsets->nelts + 2) * SVN_INT64_BUFFER_SIZE; + + (count + 2) * SVN_INT64_BUFFER_SIZE; - APR_ARRAY_IDX(revprops->sizes, changed_index, apr_off_t) = serialized->len; + APR_ARRAY_IDX(revprops->revprops, changed_index, svn_string_t) + = *svn_stringbuf__morph_into_string(serialized); /* can we put the new data into the same pack as the before? */ - if ( new_total_size < ffd->revprop_pack_size - || revprops->sizes->nelts == 1) + if (new_total_size < ffd->revprop_pack_size || count == 1) { /* simply replace the old pack file with new content as we do it * in the non-packed case */ - *final_path = svn_dirent_join(revprops->folder, revprops->filename, - result_pool); - SVN_ERR(svn_stream_open_unique(&stream, tmp_path, revprops->folder, - svn_io_file_del_none, result_pool, - scratch_pool)); - SVN_ERR(repack_revprops(fs, revprops, 0, revprops->sizes->nelts, - changed_index, serialized, new_total_size, - stream, scratch_pool)); + *final_path = get_revprop_pack_filepath(revprops, &revprops->entry, + result_pool); + *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL); + SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path, + scratch_pool)); + SVN_ERR(repack_revprops(fs, revprops, 0, count, + new_total_size, file, scratch_pool)); } else { /* split the pack file into two of roughly equal size */ - int right_count, left_count, i; + int right_count, left_count; int left = 0; - int right = revprops->sizes->nelts - 1; - apr_off_t left_size = 2 * SVN_INT64_BUFFER_SIZE; - apr_off_t right_size = 2 * SVN_INT64_BUFFER_SIZE; + int right = count - 1; + apr_size_t left_size = 2 * SVN_INT64_BUFFER_SIZE; + apr_size_t right_size = 2 * SVN_INT64_BUFFER_SIZE; /* let left and right side grow such that their size difference * is minimal after each step. */ while (left <= right) - if ( left_size + APR_ARRAY_IDX(revprops->sizes, left, apr_off_t) - < right_size + APR_ARRAY_IDX(revprops->sizes, right, apr_off_t)) + if ( left_size + props_len(revprops, left) + < right_size + props_len(revprops, right)) { - left_size += APR_ARRAY_IDX(revprops->sizes, left, apr_off_t) - + SVN_INT64_BUFFER_SIZE; + left_size += props_len(revprops, left) + SVN_INT64_BUFFER_SIZE; ++left; } else { - right_size += APR_ARRAY_IDX(revprops->sizes, right, apr_off_t) - + SVN_INT64_BUFFER_SIZE; + right_size += props_len(revprops, right) + SVN_INT64_BUFFER_SIZE; --right; } /* since the items need much less than SVN_INT64_BUFFER_SIZE * bytes to represent their length, the split may not be optimal */ left_count = left; - right_count = revprops->sizes->nelts - left; + right_count = count - left; /* if new_size is large, one side may exceed the pack size limit. * In that case, split before and after the modified revprop.*/ @@ -1532,7 +1324,7 @@ write_packed_revprop(const char **final_path, || right_size > ffd->revprop_pack_size) { left_count = changed_index; - right_count = revprops->sizes->nelts - left_count - 1; + right_count = count - left_count - 1; } /* Allocate this here such that we can call the repack functions with @@ -1544,54 +1336,40 @@ write_packed_revprop(const char **final_path, /* write the new, split files */ if (left_count) { - SVN_ERR(repack_stream_open(&stream, fs, revprops, 0, - left_count, files_to_delete, - scratch_pool, scratch_pool)); + SVN_ERR(repack_file_open(&file, fs, revprops, + revprops->entry.start_rev, + files_to_delete, batch, + scratch_pool, scratch_pool)); SVN_ERR(repack_revprops(fs, revprops, 0, left_count, - changed_index, serialized, new_total_size, - stream, scratch_pool)); + new_total_size, file, scratch_pool)); } - if (left_count + right_count < revprops->sizes->nelts) + if (left_count + right_count < count) { - SVN_ERR(repack_stream_open(&stream, fs, revprops, changed_index, - changed_index + 1, files_to_delete, - scratch_pool, scratch_pool)); + SVN_ERR(repack_file_open(&file, fs, revprops, rev, + files_to_delete, batch, + scratch_pool, scratch_pool)); SVN_ERR(repack_revprops(fs, revprops, changed_index, changed_index + 1, - changed_index, serialized, new_total_size, - stream, scratch_pool)); + new_total_size, file, scratch_pool)); } if (right_count) { - SVN_ERR(repack_stream_open(&stream, fs, revprops, - revprops->sizes->nelts - right_count, - revprops->sizes->nelts, - files_to_delete, scratch_pool, - scratch_pool)); - SVN_ERR(repack_revprops(fs, revprops, - revprops->sizes->nelts - right_count, - revprops->sizes->nelts, changed_index, - serialized, new_total_size, stream, - scratch_pool)); + SVN_ERR(repack_file_open(&file, fs, revprops, rev + 1, + files_to_delete, batch, + scratch_pool, scratch_pool)); + SVN_ERR(repack_revprops(fs, revprops, count - right_count, count, + new_total_size, file, scratch_pool)); } /* write the new manifest */ *final_path = svn_dirent_join(revprops->folder, PATH_MANIFEST, result_pool); - SVN_ERR(svn_stream_open_unique(&stream, tmp_path, revprops->folder, - svn_io_file_del_none, result_pool, - scratch_pool)); - - for (i = 0; i < revprops->manifest->nelts; ++i) - { - const char *filename = APR_ARRAY_IDX(revprops->manifest, i, - const char*); - SVN_ERR(svn_stream_printf(stream, scratch_pool, "%s\n", filename)); - } - - SVN_ERR(svn_stream_close(stream)); + *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL); + SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path, + scratch_pool)); + SVN_ERR(write_manifest(file, revprops->manifest, scratch_pool)); } return SVN_NO_ERROR; @@ -1611,9 +1389,15 @@ svn_fs_x__set_revision_proplist(svn_fs_t *fs, const char *tmp_path; const char *perms_reference; apr_array_header_t *files_to_delete = NULL; + svn_fs_x__batch_fsync_t *batch; + svn_fs_x__data_t *ffd = fs->fsap_data; SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool)); + /* Perform all fsyncs through this instance. */ + SVN_ERR(svn_fs_x__batch_fsync_create(&batch, ffd->flush_to_disk, + scratch_pool)); + /* this info will not change while we hold the global FS write lock */ is_packed = svn_fs_x__is_packed_revprop(fs, rev); @@ -1636,12 +1420,12 @@ svn_fs_x__set_revision_proplist(svn_fs_t *fs, /* Serialize the new revprop data */ if (is_packed) SVN_ERR(write_packed_revprop(&final_path, &tmp_path, &files_to_delete, - fs, rev, proplist, scratch_pool, + fs, rev, proplist, batch, scratch_pool, scratch_pool)); else SVN_ERR(write_non_packed_revprop(&final_path, &tmp_path, - fs, rev, proplist, scratch_pool, - scratch_pool)); + fs, rev, proplist, batch, + scratch_pool, scratch_pool)); /* We use the rev file of this revision as the perms reference, * because when setting revprops for the first time, the revprop @@ -1652,7 +1436,7 @@ svn_fs_x__set_revision_proplist(svn_fs_t *fs, /* Now, switch to the new revprop data. */ SVN_ERR(switch_to_new_revprop(fs, final_path, tmp_path, perms_reference, - files_to_delete, bump_generation, + files_to_delete, bump_generation, batch, scratch_pool)); return SVN_NO_ERROR; @@ -1668,20 +1452,14 @@ svn_fs_x__packed_revprop_available(svn_boolean_t *missing, svn_revnum_t revision, apr_pool_t *scratch_pool) { - svn_fs_x__data_t *ffd = fs->fsap_data; - svn_stringbuf_t *content = NULL; + svn_node_kind_t kind; + packed_revprops_t *revprops; + svn_error_t *err; /* try to read the manifest file */ - const char *folder = svn_fs_x__path_revprops_pack_shard(fs, revision, - scratch_pool); - const char *manifest_path = svn_dirent_join(folder, PATH_MANIFEST, - scratch_pool); - - svn_error_t *err = svn_fs_x__try_stringbuf_from_file(&content, - missing, - manifest_path, - FALSE, - scratch_pool); + revprops = apr_pcalloc(scratch_pool, sizeof(*revprops)); + revprops->revision = revision; + err = get_revprop_packname(fs, revprops, scratch_pool, scratch_pool); /* if the manifest cannot be read, consider the pack files inaccessible * even if the file itself exists. */ @@ -1691,114 +1469,93 @@ svn_fs_x__packed_revprop_available(svn_boolean_t *missing, return FALSE; } - if (*missing) - return FALSE; - - /* parse manifest content until we find the entry for REVISION. - * Revision 0 is never packed. */ - revision = revision < ffd->max_files_per_dir - ? revision - 1 - : revision % ffd->max_files_per_dir; - while (content->data) + /* the respective pack file must exist (and be a file) */ + err = svn_io_check_path(get_revprop_pack_filepath(revprops, + &revprops->entry, + scratch_pool), + &kind, scratch_pool); + if (err) { - char *next = strchr(content->data, '\n'); - if (next) - { - *next = 0; - ++next; - } - - if (revision-- == 0) - { - /* the respective pack file must exist (and be a file) */ - svn_node_kind_t kind; - err = svn_io_check_path(svn_dirent_join(folder, content->data, - scratch_pool), - &kind, scratch_pool); - if (err) - { - svn_error_clear(err); - return FALSE; - } - - *missing = kind == svn_node_none; - return kind == svn_node_file; - } - - content->data = next; + svn_error_clear(err); + return FALSE; } - return FALSE; + *missing = kind == svn_node_none; + return kind == svn_node_file; } /****** Packing FSX shards *********/ -svn_error_t * -svn_fs_x__copy_revprops(const char *pack_file_dir, - const char *pack_filename, - const char *shard_path, - svn_revnum_t start_rev, - svn_revnum_t end_rev, - apr_array_header_t *sizes, - apr_size_t total_size, - int compression_level, - svn_cancel_func_t cancel_func, - void *cancel_baton, - apr_pool_t *scratch_pool) +/* Copy revprop files for revisions [START_REV, END_REV) from SHARD_PATH + * in filesystem FS to the pack file at PACK_FILE_NAME in PACK_FILE_DIR. + * + * The file sizes have already been determined and written to SIZES. + * Please note that this function will be executed while the filesystem + * has been locked and that revprops files will therefore not be modified + * while the pack is in progress. + * + * COMPRESSION_LEVEL defines how well the resulting pack file shall be + * compressed or whether is shall be compressed at all. TOTAL_SIZE is + * a hint on which initial buffer size we should use to hold the pack file + * content. Schedule necessary fsync calls in BATCH. + * + * CANCEL_FUNC and CANCEL_BATON are used as usual. Temporary allocations + * are done in SCRATCH_POOL. + */ +static svn_error_t * +copy_revprops(svn_fs_t *fs, + const char *pack_file_dir, + const char *pack_filename, + const char *shard_path, + svn_revnum_t start_rev, + svn_revnum_t end_rev, + apr_array_header_t *sizes, + apr_size_t total_size, + int compression_level, + svn_fs_x__batch_fsync_t *batch, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *scratch_pool) { - svn_stream_t *pack_stream; apr_file_t *pack_file; svn_revnum_t rev; apr_pool_t *iterpool = svn_pool_create(scratch_pool); - svn_stream_t *stream; - - /* create empty data buffer and a write stream on top of it */ - svn_stringbuf_t *uncompressed - = svn_stringbuf_create_ensure(total_size, scratch_pool); - svn_stringbuf_t *compressed - = svn_stringbuf_create_empty(scratch_pool); - pack_stream = svn_stream_from_stringbuf(uncompressed, scratch_pool); - - /* write the pack file header */ - SVN_ERR(serialize_revprops_header(pack_stream, start_rev, sizes, 0, - sizes->nelts, iterpool)); - /* Some useful paths. */ - SVN_ERR(svn_io_file_open(&pack_file, svn_dirent_join(pack_file_dir, - pack_filename, - scratch_pool), - APR_WRITE | APR_CREATE, APR_OS_DEFAULT, - scratch_pool)); + svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool); + svn_packed__byte_stream_t *stream + = svn_packed__create_bytes_stream(root); /* Iterate over the revisions in this shard, squashing them together. */ for (rev = start_rev; rev <= end_rev; rev++) { const char *path; + svn_stringbuf_t *props; svn_pool_clear(iterpool); /* Construct the file name. */ - path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), - iterpool); + path = svn_fs_x__path_revprops(fs, rev, iterpool); /* Copy all the bits from the non-packed revprop file to the end of * the pack file. */ - SVN_ERR(svn_stream_open_readonly(&stream, path, iterpool, iterpool)); - SVN_ERR(svn_stream_copy3(stream, pack_stream, - cancel_func, cancel_baton, iterpool)); + SVN_ERR(svn_stringbuf_from_file2(&props, path, iterpool)); + SVN_ERR_W(verify_checksum(props, iterpool), + apr_psprintf(iterpool, "Failed to read revprops for r%ld.", + rev)); + + svn_packed__add_bytes(stream, props->data, props->len); } - /* flush stream buffers to content buffer */ - SVN_ERR(svn_stream_close(pack_stream)); + /* Create the auto-fsync'ing pack file. */ + SVN_ERR(svn_fs_x__batch_fsync_open_file(&pack_file, batch, + svn_dirent_join(pack_file_dir, + pack_filename, + scratch_pool), + scratch_pool)); - /* compress the content (or just store it for COMPRESSION_LEVEL 0) */ - SVN_ERR(svn__compress(uncompressed, compressed, compression_level)); - - /* write the pack file content to disk */ - stream = svn_stream_from_aprfile2(pack_file, FALSE, scratch_pool); - SVN_ERR(svn_stream_write(stream, compressed->data, &compressed->len)); - SVN_ERR(svn_stream_close(stream)); + /* write all to disk */ + SVN_ERR(write_packed_data_checksummed(root, pack_file, scratch_pool)); svn_pool_destroy(iterpool); @@ -1806,49 +1563,62 @@ svn_fs_x__copy_revprops(const char *pack_file_dir, } svn_error_t * -svn_fs_x__pack_revprops_shard(const char *pack_file_dir, +svn_fs_x__pack_revprops_shard(svn_fs_t *fs, + const char *pack_file_dir, const char *shard_path, apr_int64_t shard, int max_files_per_dir, - apr_off_t max_pack_size, + apr_int64_t max_pack_size, int compression_level, + svn_fs_x__batch_fsync_t *batch, svn_cancel_func_t cancel_func, void *cancel_baton, apr_pool_t *scratch_pool) { const char *manifest_file_path, *pack_filename = NULL; - svn_stream_t *manifest_stream; + apr_file_t *manifest_file; svn_revnum_t start_rev, end_rev, rev; - apr_off_t total_size; + apr_size_t total_size; apr_pool_t *iterpool = svn_pool_create(scratch_pool); apr_array_header_t *sizes; + apr_array_header_t *manifest; + + /* Sanitize config file values. */ + apr_size_t max_size = (apr_size_t)MIN(MAX(max_pack_size, 1), + SVN_MAX_OBJECT_SIZE); /* Some useful paths. */ manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, scratch_pool); - /* Remove any existing pack file for this shard, since it is incomplete. */ - SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton, - scratch_pool)); - - /* Create the new directory and manifest file stream. */ - SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool)); - SVN_ERR(svn_stream_open_writable(&manifest_stream, manifest_file_path, - scratch_pool, scratch_pool)); + /* Create the manifest file. */ + SVN_ERR(svn_fs_x__batch_fsync_open_file(&manifest_file, batch, + manifest_file_path, scratch_pool)); /* revisions to handle. Special case: revision 0 */ start_rev = (svn_revnum_t) (shard * max_files_per_dir); end_rev = (svn_revnum_t) ((shard + 1) * (max_files_per_dir) - 1); if (start_rev == 0) - ++start_rev; - /* Special special case: if max_files_per_dir is 1, then at this point - start_rev == 1 and end_rev == 0 (!). Fortunately, everything just - works. */ + { + /* Never pack revprops for r0, just copy it. */ + SVN_ERR(svn_io_copy_file(svn_fs_x__path_revprops(fs, 0, iterpool), + svn_dirent_join(pack_file_dir, "p0", + scratch_pool), + TRUE, + iterpool)); + + ++start_rev; + /* Special special case: if max_files_per_dir is 1, then at this point + start_rev == 1 and end_rev == 0 (!). Fortunately, everything just + works. */ + } /* initialize the revprop size info */ - sizes = apr_array_make(scratch_pool, max_files_per_dir, sizeof(apr_off_t)); + sizes = apr_array_make(scratch_pool, max_files_per_dir, sizeof(apr_size_t)); total_size = 2 * SVN_INT64_BUFFER_SIZE; + manifest = apr_array_make(scratch_pool, 4, sizeof(manifest_entry_t)); + /* Iterate over the revisions in this shard, determine their size and * squashing them together into pack files. */ for (rev = start_rev; rev <= end_rev; rev++) @@ -1859,20 +1629,22 @@ svn_fs_x__pack_revprops_shard(const char *pack_file_dir, svn_pool_clear(iterpool); /* Get the size of the file. */ - path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), - iterpool); + path = svn_fs_x__path_revprops(fs, rev, iterpool); SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool)); - /* if we already have started a pack file and this revprop cannot be - * appended to it, write the previous pack file. */ - if (sizes->nelts != 0 && - total_size + SVN_INT64_BUFFER_SIZE + finfo.size > max_pack_size) + /* If we already have started a pack file and this revprop cannot be + * appended to it, write the previous pack file. Note this overflow + * check works because we enforced MAX_SIZE <= SVN_MAX_OBJECT_SIZE. */ + if (sizes->nelts != 0 + && ( finfo.size > max_size + || total_size > max_size + || SVN_INT64_BUFFER_SIZE + finfo.size > max_size - total_size)) { - SVN_ERR(svn_fs_x__copy_revprops(pack_file_dir, pack_filename, - shard_path, start_rev, rev-1, - sizes, (apr_size_t)total_size, - compression_level, cancel_func, - cancel_baton, iterpool)); + SVN_ERR(copy_revprops(fs, pack_file_dir, pack_filename, + shard_path, start_rev, rev-1, + sizes, (apr_size_t)total_size, + compression_level, batch, cancel_func, + cancel_baton, iterpool)); /* next pack file starts empty again */ apr_array_clear(sizes); @@ -1883,66 +1655,31 @@ svn_fs_x__pack_revprops_shard(const char *pack_file_dir, /* Update the manifest. Allocate a file name for the current pack * file if it is a new one */ if (sizes->nelts == 0) - pack_filename = apr_psprintf(scratch_pool, "%ld.0", rev); + { + manifest_entry_t *entry = apr_array_push(manifest); + entry->start_rev = rev; + entry->tag = 0; - SVN_ERR(svn_stream_printf(manifest_stream, iterpool, "%s\n", - pack_filename)); + pack_filename = apr_psprintf(scratch_pool, "%ld.0", rev); + } /* add to list of files to put into the current pack file */ - APR_ARRAY_PUSH(sizes, apr_off_t) = finfo.size; + APR_ARRAY_PUSH(sizes, apr_size_t) = finfo.size; total_size += SVN_INT64_BUFFER_SIZE + finfo.size; } /* write the last pack file */ if (sizes->nelts != 0) - SVN_ERR(svn_fs_x__copy_revprops(pack_file_dir, pack_filename, shard_path, - start_rev, rev-1, sizes, - (apr_size_t)total_size, compression_level, - cancel_func, cancel_baton, iterpool)); + SVN_ERR(copy_revprops(fs, pack_file_dir, pack_filename, shard_path, + start_rev, rev-1, sizes, + (apr_size_t)total_size, compression_level, + batch, cancel_func, cancel_baton, iterpool)); - /* flush the manifest file and update permissions */ - SVN_ERR(svn_stream_close(manifest_stream)); - SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, iterpool)); + SVN_ERR(write_manifest(manifest_file, manifest, iterpool)); + /* flush all data to disk and update permissions */ + SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, iterpool)); svn_pool_destroy(iterpool); return SVN_NO_ERROR; } - -svn_error_t * -svn_fs_x__delete_revprops_shard(const char *shard_path, - apr_int64_t shard, - int max_files_per_dir, - svn_cancel_func_t cancel_func, - void *cancel_baton, - apr_pool_t *scratch_pool) -{ - if (shard == 0) - { - apr_pool_t *iterpool = svn_pool_create(scratch_pool); - int i; - - /* delete all files except the one for revision 0 */ - for (i = 1; i < max_files_per_dir; ++i) - { - const char *path; - svn_pool_clear(iterpool); - - path = svn_dirent_join(shard_path, - apr_psprintf(iterpool, "%d", i), - iterpool); - if (cancel_func) - SVN_ERR((*cancel_func)(cancel_baton)); - - SVN_ERR(svn_io_remove_file2(path, TRUE, iterpool)); - } - - svn_pool_destroy(iterpool); - } - else - SVN_ERR(svn_io_remove_dir2(shard_path, TRUE, - cancel_func, cancel_baton, scratch_pool)); - - return SVN_NO_ERROR; -} - |