diff options
Diffstat (limited to 'subversion/libsvn_fs_fs/cached_data.c')
-rw-r--r-- | subversion/libsvn_fs_fs/cached_data.c | 648 |
1 files changed, 465 insertions, 183 deletions
diff --git a/subversion/libsvn_fs_fs/cached_data.c b/subversion/libsvn_fs_fs/cached_data.c index 6581a6c8831c6..f8fa2d05bfd68 100644 --- a/subversion/libsvn_fs_fs/cached_data.c +++ b/subversion/libsvn_fs_fs/cached_data.c @@ -57,7 +57,7 @@ block_read(void **result, apr_pool_t *scratch_pool); -/* Defined this to enable access logging via dgb__log_access +/* Define this to enable access logging via dbg_log_access #define SVN_FS_FS__LOG_ACCESS */ @@ -91,7 +91,7 @@ dbg_log_access(svn_fs_t *fs, svn_fs_fs__revision_file_t *rev_file; SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision, - scratch_pool)); + scratch_pool, scratch_pool)); /* determine rev / pack file offset */ SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL, @@ -158,7 +158,8 @@ dbg_log_access(svn_fs_t *fs, { /* reverse index lookup: get item description in ENTRY */ SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision, - offset, scratch_pool)); + offset, scratch_pool, + scratch_pool)); if (entry) { /* more details */ @@ -183,6 +184,10 @@ dbg_log_access(svn_fs_t *fs, description); } + /* We don't know when SCRATCH_POOL will be cleared, so close the rev file + explicitly. */ + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + #endif return SVN_NO_ERROR; @@ -286,6 +291,114 @@ use_block_read(svn_fs_t *fs) return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read; } +svn_error_t * +svn_fs_fs__fixup_expanded_size(svn_fs_t *fs, + representation_t *rep, + apr_pool_t *scratch_pool) +{ + svn_checksum_t checksum; + svn_checksum_t *empty_md5; + svn_fs_fs__revision_file_t *revision_file; + svn_fs_fs__rep_header_t *rep_header; + + /* Anything to do at all? + * + * Note that a 0 SIZE is only possible for PLAIN reps due to the SVN\1 + * magic prefix in any DELTA rep. */ + if (!rep || rep->expanded_size != 0 || rep->size == 0) + return SVN_NO_ERROR; + + /* This function may only be called for committed data. */ + assert(!svn_fs_fs__id_txn_used(&rep->txn_id)); + + /* EXPANDED_SIZE is 0. If the MD5 does not match the one for empty + * contents, we know that EXPANDED_SIZE == 0 is wrong and needs to + * be set to the actual value given by SIZE. + * + * Using svn_checksum_match() will also accept all-zero values for + * the MD5 digest and only report a mismatch if the MD5 has actually + * been given. */ + empty_md5 = svn_checksum_empty_checksum(svn_checksum_md5, scratch_pool); + + checksum.digest = rep->md5_digest; + checksum.kind = svn_checksum_md5; + if (!svn_checksum_match(empty_md5, &checksum)) + { + rep->expanded_size = rep->size; + return SVN_NO_ERROR; + } + + /* Data in the rep-cache.db does not have MD5 checksums (all zero) on it. + * Compare SHA1 instead. */ + if (rep->has_sha1) + { + svn_checksum_t *empty_sha1 + = svn_checksum_empty_checksum(svn_checksum_sha1, scratch_pool); + + checksum.digest = rep->sha1_digest; + checksum.kind = svn_checksum_sha1; + if (!svn_checksum_match(empty_sha1, &checksum)) + { + rep->expanded_size = rep->size; + return SVN_NO_ERROR; + } + } + + /* Only two cases are left here. + * (1) A non-empty PLAIN rep with a MD5 collision on EMPTY_MD5. + * (2) A DELTA rep with zero-length output. */ + + /* SVN always stores a DELTA rep with zero-length output as an empty + * sequence of txdelta windows, i.e. as "SVN\1". In that case, SIZE is + * 4 bytes. There is no other possible DELTA rep of that size and any + * PLAIN rep of 4 bytes would produce a different MD5. Hence, if SIZE is + * actually 4 here, we know that this is an empty DELTA rep. + * + * Note that it is technically legal to have DELTA reps with a 0 length + * output window. Their on-disk size would be longer. We handle that + * case later together with the equally unlikely MD5 collision. */ + if (rep->size == 4) + { + /* EXPANDED_SIZE is already 0. */ + return SVN_NO_ERROR; + } + + /* We still have the two options, PLAIN or DELTA rep. At this point, we + * are in an extremely unlikely case and can spend some time to figure it + * out. So, let's just look at the representation header. */ + SVN_ERR(open_and_seek_revision(&revision_file, fs, rep->revision, + rep->item_index, scratch_pool)); + SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, revision_file->stream, + scratch_pool, scratch_pool)); + SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); + + /* Only for PLAIN reps do we have to correct EXPANDED_SIZE. */ + if (rep_header->type == svn_fs_fs__rep_plain) + rep->expanded_size = rep->size; + + return SVN_NO_ERROR; +} + +/* Correct known issues with committed NODEREV in FS. + * Uses SCRATCH_POOL for temporaries. + */ +static svn_error_t * +fixup_node_revision(svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *scratch_pool) +{ + /* Workaround issue #4031: is-fresh-txn-root in revision files. */ + noderev->is_fresh_txn_root = FALSE; + + /* Make sure EXPANDED_SIZE has the correct value for every rep. */ + SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->data_rep, + scratch_pool)); + SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->prop_rep, + scratch_pool)); + + return SVN_NO_ERROR; +} + /* Get the node-revision for the node ID in FS. Set *NODEREV_P to the new node-revision structure, allocated in POOL. See svn_fs_fs__get_node_revision, which wraps this and adds another @@ -312,14 +425,13 @@ get_node_revision_body(node_revision_t **noderev_p, scratch_pool), APR_READ | APR_BUFFERED, APR_OS_DEFAULT, scratch_pool); - if (err) + if (err && APR_STATUS_IS_ENOENT(err->apr_err)) + { + svn_error_clear(err); + return svn_error_trace(err_dangling_id(fs, id)); + } + else if (err) { - if (APR_STATUS_IS_ENOENT(err->apr_err)) - { - svn_error_clear(err); - return svn_error_trace(err_dangling_id(fs, id)); - } - return svn_error_trace(err); } @@ -376,9 +488,7 @@ get_node_revision_body(node_revision_t **noderev_p, revision_file->stream, result_pool, scratch_pool)); - - /* Workaround issue #4031: is-fresh-txn-root in revision files. */ - (*noderev_p)->is_fresh_txn_root = FALSE; + SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool)); /* The noderev is not in cache, yet. Add it, if caching has been enabled. */ if (ffd->node_revision_cache) @@ -636,15 +746,15 @@ typedef struct rep_state_t int chunk_index; /* number of the window to read */ } rep_state_t; -/* Simple wrapper around svn_fs_fs__get_file_offset to simplify callers. */ +/* Simple wrapper around svn_io_file_get_offset to simplify callers. */ static svn_error_t * get_file_offset(apr_off_t *offset, rep_state_t *rs, apr_pool_t *pool) { - return svn_error_trace(svn_fs_fs__get_file_offset(offset, - rs->sfile->rfile->file, - pool)); + return svn_error_trace(svn_io_file_get_offset(offset, + rs->sfile->rfile->file, + pool)); } /* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */ @@ -756,7 +866,7 @@ create_rep_state_body(rep_state_t **rep_state, rs->size = rep->size; rs->revision = rep->revision; rs->item_index = rep->item_index; - rs->raw_window_cache = ffd->raw_window_cache; + rs->raw_window_cache = use_block_read(fs) ? ffd->raw_window_cache : NULL; rs->ver = -1; rs->start = -1; @@ -765,9 +875,7 @@ create_rep_state_body(rep_state_t **rep_state, Since we don't know the depth of the delta chain, let's assume, the whole contents get rewritten 3 times. */ - estimated_window_storage - = 4 * ( (rep->expanded_size ? rep->expanded_size : rep->size) - + SVN_DELTA_WINDOW_SIZE); + estimated_window_storage = 4 * (rep->expanded_size + SVN_DELTA_WINDOW_SIZE); estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX); rs->window_cache = ffd->txdelta_window_cache @@ -1160,7 +1268,7 @@ parse_raw_window(void **out, stream = svn_stream_from_string(&raw_window, result_pool); /* parse it */ - SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, 1, + SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, window->ver, result_pool)); /* complete the window and return it */ @@ -1322,15 +1430,11 @@ set_cached_combined_window(svn_stringbuf_t *window, ID, and representation REP. Also, set *WINDOW_P to the base window content for *LIST, if it could be found in cache. Otherwise, *LIST will contain the base - representation for the whole delta chain. - Finally, return the expanded size of the representation in - *EXPANDED_SIZE. It will take care of cases where only the on-disk - size is known. */ + representation for the whole delta chain. */ static svn_error_t * build_rep_list(apr_array_header_t **list, svn_stringbuf_t **window_p, rep_state_t **src_state, - svn_filesize_t *expanded_size, svn_fs_t *fs, representation_t *first_rep, apr_pool_t *pool) @@ -1345,24 +1449,9 @@ build_rep_list(apr_array_header_t **list, *list = apr_array_make(pool, 1, sizeof(rep_state_t *)); rep = *first_rep; - /* The value as stored in the data struct. - 0 is either for unknown length or actually zero length. */ - *expanded_size = first_rep->expanded_size; - /* for the top-level rep, we need the rep_args */ SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool, iterpool)); - - /* Unknown size or empty representation? - That implies the this being the first iteration. - Usually size equals on-disk size, except for empty, - compressed representations (delta, size = 4). - Please note that for all non-empty deltas have - a 4-byte header _plus_ some data. */ - if (*expanded_size == 0) - if (rep_header->type == svn_fs_fs__rep_plain || first_rep->size != 4) - *expanded_size = first_rep->size; - while (1) { svn_pool_clear(iterpool); @@ -1373,7 +1462,8 @@ build_rep_list(apr_array_header_t **list, &rep, fs, pool, iterpool)); /* for txn reps, there won't be a cached combined window */ - if (!svn_fs_fs__id_txn_used(&rep.txn_id)) + if ( !svn_fs_fs__id_txn_used(&rep.txn_id) + && rep.expanded_size < SVN_DELTA_WINDOW_SIZE) SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool)); if (is_cached) @@ -1686,7 +1776,7 @@ get_combined_window(svn_stringbuf_t **result, } /* Returns whether or not the expanded fulltext of the file is cachable - * based on its size SIZE. The decision depends on the cache used by RB. + * based on its size SIZE. The decision depends on the cache used by FFD. */ static svn_boolean_t fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size) @@ -1732,10 +1822,10 @@ get_contents_from_windows(struct rep_read_baton *rb, This is where we need the pseudo rep_state created by build_rep_list(). */ apr_size_t offset = (apr_size_t)rs->current; - if (copy_len + offset > rb->base_window->len) - copy_len = offset < rb->base_window->len - ? rb->base_window->len - offset - : 0ul; + if (offset >= rb->base_window->len) + copy_len = 0ul; + else if (copy_len > rb->base_window->len - offset) + copy_len = rb->base_window->len - offset; memcpy (cur, rb->base_window->data + offset, copy_len); } @@ -1969,11 +2059,21 @@ skip_contents(struct rep_read_baton *baton, len -= to_read; buffer += to_read; } + + /* Make the MD5 calculation catch up with the data delivered + * (we did not run MD5 on the data that we took from the cache). */ + if (!err) + { + SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx, + baton->current_fulltext->data, + baton->current_fulltext->len)); + baton->off += baton->current_fulltext->len; + } } else if (len > 0) { /* Simply drain LEN bytes from the window stream. */ - apr_pool_t *subpool = subpool = svn_pool_create(baton->pool); + apr_pool_t *subpool = svn_pool_create(baton->pool); char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE); while (len > 0 && !err) @@ -1984,6 +2084,15 @@ skip_contents(struct rep_read_baton *baton, err = get_contents_from_windows(baton, buffer, &to_read); len -= to_read; + + /* Make the MD5 calculation catch up with the data delivered + * (we did not run MD5 on the data that we took from the cache). */ + if (!err) + { + SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx, + buffer, to_read)); + baton->off += to_read; + } } svn_pool_destroy(subpool); @@ -2019,8 +2128,9 @@ rep_read_contents(void *baton, if (!rb->rs_list) { /* Window stream not initialized, yet. Do it now. */ + rb->len = rb->rep.expanded_size; SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window, - &rb->src_state, &rb->len, rb->fs, &rb->rep, + &rb->src_state, rb->fs, &rb->rep, rb->filehandle_pool)); /* In case we did read from the fulltext cache before, make the @@ -2092,7 +2202,6 @@ svn_fs_fs__get_contents(svn_stream_t **contents_p, else { fs_fs_data_t *ffd = fs->fsap_data; - svn_filesize_t len = rep->expanded_size ? rep->expanded_size : rep->size; struct rep_read_baton *rb; pair_cache_key_t fulltext_cache_key = { 0 }; @@ -2108,7 +2217,7 @@ svn_fs_fs__get_contents(svn_stream_t **contents_p, * cache it. */ if (ffd->fulltext_cache && cache_fulltext && SVN_IS_VALID_REVNUM(rep->revision) - && fulltext_size_is_cachable(ffd, len)) + && fulltext_size_is_cachable(ffd, rep->expanded_size)) { rb->fulltext_cache = ffd->fulltext_cache; } @@ -2202,7 +2311,7 @@ svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p, svn_fs_fs__id_txn_reset(&next_rep.txn_id); SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window, - &rb->src_state, &rb->len, rb->fs, &next_rep, + &rb->src_state, rb->fs, &next_rep, rb->filehandle_pool)); /* Insert the access to REP as the first element of the delta chain. */ @@ -2447,12 +2556,12 @@ compare_dirent_name(const void *a, const void *b) return strcmp(lhs->name, rhs); } -/* Into ENTRIES, read all directories entries from the key-value text in +/* Into *ENTRIES_P, read all directories entries from the key-value text in * STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and * update the data. ID is provided for nicer error messages. */ static svn_error_t * -read_dir_entries(apr_array_header_t *entries, +read_dir_entries(apr_array_header_t **entries_p, svn_stream_t *stream, svn_boolean_t incremental, const svn_fs_id_t *id, @@ -2460,8 +2569,14 @@ read_dir_entries(apr_array_header_t *entries, apr_pool_t *scratch_pool) { apr_pool_t *iterpool = svn_pool_create(scratch_pool); - apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL; + apr_hash_t *hash = NULL; const char *terminator = SVN_HASH_TERMINATOR; + apr_array_header_t *entries = NULL; + + if (incremental) + hash = svn_hash__make(scratch_pool); + else + entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *)); /* Read until the terminator (non-incremental) or the end of STREAM (incremental mode). In the latter mode, we use a temporary HASH @@ -2473,8 +2588,11 @@ read_dir_entries(apr_array_header_t *entries, char *str; svn_pool_clear(iterpool); - SVN_ERR(svn_hash__read_entry(&entry, stream, terminator, - incremental, iterpool)); + SVN_ERR_W(svn_hash__read_entry(&entry, stream, terminator, + incremental, iterpool), + apr_psprintf(iterpool, + _("Directory representation corrupt in '%s'"), + svn_fs_fs__id_unparse(id, scratch_pool)->data)); /* End of directory? */ if (entry.key == NULL) @@ -2542,6 +2660,9 @@ read_dir_entries(apr_array_header_t *entries, if (incremental) { apr_hash_index_t *hi; + + entries = apr_array_make(result_pool, apr_hash_count(hash), + sizeof(svn_fs_dirent_t *)); for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi)) APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi); } @@ -2551,14 +2672,45 @@ read_dir_entries(apr_array_header_t *entries, svn_pool_destroy(iterpool); + *entries_p = entries; return SVN_NO_ERROR; } -/* Fetch the contents of a directory into ENTRIES. Values are stored +/* For directory NODEREV in FS, return the *FILESIZE of its in-txn + * representation. If the directory representation is comitted data, + * set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries. + */ +static svn_error_t * +get_txn_dir_info(svn_filesize_t *filesize, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *scratch_pool) +{ + if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id)) + { + const svn_io_dirent2_t *dirent; + const char *filename; + + filename = svn_fs_fs__path_txn_node_children(fs, noderev->id, + scratch_pool); + + SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE, + scratch_pool, scratch_pool)); + *filesize = dirent->filesize; + } + else + { + *filesize = SVN_INVALID_FILESIZE; + } + + return SVN_NO_ERROR; +} + +/* Fetch the contents of a directory into DIR. Values are stored as filename to string mappings; further conversion is necessary to convert them into svn_fs_dirent_t values. */ static svn_error_t * -get_dir_contents(apr_array_header_t **entries, +get_dir_contents(svn_fs_fs__dir_data_t *dir, svn_fs_t *fs, node_revision_t *noderev, apr_pool_t *result_pool, @@ -2566,18 +2718,30 @@ get_dir_contents(apr_array_header_t **entries, { svn_stream_t *contents; - *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *)); + /* Initialize the result. */ + dir->txn_filesize = SVN_INVALID_FILESIZE; + + /* Read dir contents - unless there is none in which case we are done. */ if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id)) { - const char *filename - = svn_fs_fs__path_txn_node_children(fs, noderev->id, scratch_pool); + /* Get location & current size of the directory representation. */ + const char *filename; + apr_file_t *file; + + filename = svn_fs_fs__path_txn_node_children(fs, noderev->id, + scratch_pool); /* The representation is mutable. Read the old directory contents from the mutable children file, followed by the changes we've made in this transaction. */ - SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool, - scratch_pool)); - SVN_ERR(read_dir_entries(*entries, contents, TRUE, noderev->id, + SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED, + APR_OS_DEFAULT, scratch_pool)); + + /* Obtain txn children file size. */ + SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool)); + + contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool); + SVN_ERR(read_dir_entries(&dir->entries, contents, TRUE, noderev->id, result_pool, scratch_pool)); SVN_ERR(svn_stream_close(contents)); } @@ -2586,9 +2750,7 @@ get_dir_contents(apr_array_header_t **entries, /* Undeltify content before parsing it. Otherwise, we could only * parse it byte-by-byte. */ - apr_size_t len = noderev->data_rep->expanded_size - ? (apr_size_t)noderev->data_rep->expanded_size - : (apr_size_t)noderev->data_rep->size; + apr_size_t len = noderev->data_rep->expanded_size; svn_stringbuf_t *text; /* The representation is immutable. Read it normally. */ @@ -2599,9 +2761,13 @@ get_dir_contents(apr_array_header_t **entries, /* de-serialize hash */ contents = svn_stream_from_stringbuf(text, scratch_pool); - SVN_ERR(read_dir_entries(*entries, contents, FALSE, noderev->id, + SVN_ERR(read_dir_entries(&dir->entries, contents, FALSE, noderev->id, result_pool, scratch_pool)); } + else + { + dir->entries = apr_array_make(result_pool, 0, sizeof(svn_fs_dirent_t *)); + } return SVN_NO_ERROR; } @@ -2620,27 +2786,27 @@ locate_dir_cache(svn_fs_t *fs, apr_pool_t *pool) { fs_fs_data_t *ffd = fs->fsap_data; - if (svn_fs_fs__id_is_txn(noderev->id)) + if (!noderev->data_rep) + { + /* no data rep -> empty directory. + A NULL key causes a cache miss. */ + *key = NULL; + return ffd->dir_cache; + } + + if (svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id)) { /* data in txns requires the expensive fs_id-based addressing mode */ *key = svn_fs_fs__id_unparse(noderev->id, pool)->data; + return ffd->txn_dir_cache; } else { /* committed data can use simple rev,item pairs */ - if (noderev->data_rep) - { - pair_key->revision = noderev->data_rep->revision; - pair_key->second = noderev->data_rep->item_index; - *key = pair_key; - } - else - { - /* no data rep -> empty directory. - A NULL key causes a cache miss. */ - *key = NULL; - } + pair_key->revision = noderev->data_rep->revision; + pair_key->second = noderev->data_rep->item_index; + *key = pair_key; return ffd->dir_cache; } @@ -2655,6 +2821,7 @@ svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p, { pair_cache_key_t pair_key = { 0 }; const void *key; + svn_fs_fs__dir_data_t *dir; /* find the cache we may use */ svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev, @@ -2663,23 +2830,36 @@ svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p, { svn_boolean_t found; - SVN_ERR(svn_cache__get((void **)entries_p, &found, cache, key, + SVN_ERR(svn_cache__get((void **)&dir, &found, cache, key, result_pool)); if (found) - return SVN_NO_ERROR; + { + /* Verify that the cached dir info is not stale + * (no-op for committed data). */ + svn_filesize_t filesize; + SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool)); + + if (filesize == dir->txn_filesize) + { + /* Still valid. Done. */ + *entries_p = dir->entries; + return SVN_NO_ERROR; + } + } } /* Read in the directory contents. */ - SVN_ERR(get_dir_contents(entries_p, fs, noderev, result_pool, - scratch_pool)); + dir = apr_pcalloc(scratch_pool, sizeof(*dir)); + SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool)); + *entries_p = dir->entries; /* Update the cache, if we are to use one. * * Don't even attempt to serialize very large directories; it would cause * an unnecessary memory allocation peak. 150 bytes/entry is about right. */ - if (cache && svn_cache__is_cachable(cache, 150 * (*entries_p)->nelts)) - SVN_ERR(svn_cache__set(cache, key, *entries_p, scratch_pool)); + if (cache && svn_cache__is_cachable(cache, 150 * dir->entries->nelts)) + SVN_ERR(svn_cache__set(cache, key, dir, scratch_pool)); return SVN_NO_ERROR; } @@ -2702,6 +2882,7 @@ svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { + extract_dir_entry_baton_t baton; svn_boolean_t found = FALSE; /* find the cache we may use */ @@ -2711,30 +2892,42 @@ svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent, scratch_pool); if (cache) { + svn_filesize_t filesize; + SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool)); + /* Cache lookup. */ + baton.txn_filesize = filesize; + baton.name = name; SVN_ERR(svn_cache__get_partial((void **)dirent, &found, cache, key, svn_fs_fs__extract_dir_entry, - (void*)name, + &baton, result_pool)); } /* fetch data from disk if we did not find it in the cache */ - if (! found) + if (! found || baton.out_of_date) { - apr_array_header_t *entries; svn_fs_dirent_t *entry; svn_fs_dirent_t *entry_copy = NULL; + svn_fs_fs__dir_data_t dir; - /* read the dir from the file system. It will probably be put it - into the cache for faster lookup in future calls. */ - SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev, - scratch_pool, scratch_pool)); + /* Read in the directory contents. */ + SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool, + scratch_pool)); + + /* Update the cache, if we are to use one. + * + * Don't even attempt to serialize very large directories; it would + * cause an unnecessary memory allocation peak. 150 bytes / entry is + * about right. */ + if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts)) + SVN_ERR(svn_cache__set(cache, key, &dir, scratch_pool)); /* find desired entry and return a copy in POOL, if found */ - entry = svn_fs_fs__find_dir_entry(entries, name, NULL); + entry = svn_fs_fs__find_dir_entry(dir.entries, name, NULL); if (entry) { entry_copy = apr_palloc(result_pool, sizeof(*entry_copy)); @@ -2771,7 +2964,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p, { svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool); - svn_error_clear(svn_stream_close(stream)); + err = svn_error_compose_create(err, svn_stream_close(stream)); return svn_error_quick_wrapf(err, _("malformed property list for node-revision '%s' in '%s'"), id_str->data, filename); @@ -2803,8 +2996,8 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p, if (err) { svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool); - - svn_error_clear(svn_stream_close(stream)); + + err = svn_error_compose_create(err, svn_stream_close(stream)); return svn_error_quick_wrapf(err, _("malformed property list for node-revision '%s'"), id_str->data); @@ -2826,23 +3019,42 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p, } svn_error_t * +svn_fs_fs__create_changes_context(svn_fs_fs__changes_context_t **context, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *result_pool) +{ + svn_fs_fs__changes_context_t *result = apr_pcalloc(result_pool, + sizeof(*result)); + result->fs = fs; + result->revision = rev; + result->rev_file_pool = result_pool; + + *context = result; + return SVN_NO_ERROR; +} + +svn_error_t * svn_fs_fs__get_changes(apr_array_header_t **changes, - svn_fs_t *fs, - svn_revnum_t rev, - apr_pool_t *result_pool) + svn_fs_fs__changes_context_t *context, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) { - apr_off_t changes_offset = SVN_FS_FS__ITEM_INDEX_CHANGES; - svn_fs_fs__revision_file_t *revision_file; + apr_off_t item_index = SVN_FS_FS__ITEM_INDEX_CHANGES; svn_boolean_t found; - fs_fs_data_t *ffd = fs->fsap_data; - apr_pool_t *scratch_pool = svn_pool_create(result_pool); + fs_fs_data_t *ffd = context->fs->fsap_data; + svn_fs_fs__changes_list_t *changes_list; + + pair_cache_key_t key; + key.revision = context->revision; + key.second = context->next; /* try cache lookup first */ if (ffd->changes_cache) { - SVN_ERR(svn_cache__get((void **) changes, &found, ffd->changes_cache, - &rev, result_pool)); + SVN_ERR(svn_cache__get((void **)&changes_list, &found, + ffd->changes_cache, &key, result_pool)); } else { @@ -2853,61 +3065,113 @@ svn_fs_fs__get_changes(apr_array_header_t **changes, { /* read changes from revision file */ - SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool)); - SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev, - scratch_pool, scratch_pool)); + if (!context->revision_file) + { + SVN_ERR(svn_fs_fs__ensure_revision_exists(context->revision, + context->fs, + scratch_pool)); + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&context->revision_file, + context->fs, + context->revision, + context->rev_file_pool, + scratch_pool)); + } - if (use_block_read(fs)) + if (use_block_read(context->fs)) { - /* 'block-read' will also provide us with the desired data */ - SVN_ERR(block_read((void **)changes, fs, - rev, SVN_FS_FS__ITEM_INDEX_CHANGES, - revision_file, result_pool, scratch_pool)); + /* 'block-read' will probably populate the cache with the data + * that we want. However, we won't want to force it to process + * very large change lists as part of this prefetching mechanism. + * Those would be better handled by the iterative code below. */ + SVN_ERR(block_read(NULL, context->fs, + context->revision, SVN_FS_FS__ITEM_INDEX_CHANGES, + context->revision_file, scratch_pool, + scratch_pool)); + + /* This may succeed now ... */ + SVN_ERR(svn_cache__get((void **)&changes_list, &found, + ffd->changes_cache, &key, result_pool)); } - else + + /* If we still have no data, read it here. */ + if (!found) { + apr_off_t changes_offset; + /* Addressing is very different for old formats * (needs to read the revision trailer). */ - if (svn_fs_fs__use_log_addressing(fs)) - SVN_ERR(svn_fs_fs__item_offset(&changes_offset, fs, - revision_file, rev, NULL, - SVN_FS_FS__ITEM_INDEX_CHANGES, - scratch_pool)); + if (svn_fs_fs__use_log_addressing(context->fs)) + { + SVN_ERR(svn_fs_fs__item_offset(&changes_offset, context->fs, + context->revision_file, + context->revision, NULL, + SVN_FS_FS__ITEM_INDEX_CHANGES, + scratch_pool)); + } else - SVN_ERR(get_root_changes_offset(NULL, &changes_offset, - revision_file, fs, rev, - scratch_pool)); + { + SVN_ERR(get_root_changes_offset(NULL, &changes_offset, + context->revision_file, + context->fs, context->revision, + scratch_pool)); + + /* This variable will be used for debug logging only. */ + item_index = changes_offset; + } /* Actual reading and parsing are the same, though. */ - SVN_ERR(aligned_seek(fs, revision_file->file, NULL, changes_offset, + SVN_ERR(aligned_seek(context->fs, context->revision_file->file, + NULL, changes_offset + context->next_offset, scratch_pool)); - SVN_ERR(svn_fs_fs__read_changes(changes, revision_file->stream, + + SVN_ERR(svn_fs_fs__read_changes(changes, + context->revision_file->stream, + SVN_FS_FS__CHANGES_BLOCK_SIZE, result_pool, scratch_pool)); + /* Construct the info object for the entries block we just read. */ + changes_list = apr_pcalloc(scratch_pool, sizeof(*changes_list)); + SVN_ERR(svn_io_file_get_offset(&changes_list->end_offset, + context->revision_file->file, + scratch_pool)); + changes_list->end_offset -= changes_offset; + changes_list->start_offset = context->next_offset; + changes_list->count = (*changes)->nelts; + changes_list->changes = (change_t **)(*changes)->elts; + changes_list->eol = changes_list->count < SVN_FS_FS__CHANGES_BLOCK_SIZE; + /* cache for future reference */ if (ffd->changes_cache) - { - /* Guesstimate for the size of the in-cache representation. */ - apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts; - - /* Don't even serialize data that probably won't fit into the - * cache. This often implies that either CHANGES is very - * large, memory is scarce or both. Having a huge temporary - * copy would not be a good thing in either case. */ - if (svn_cache__is_cachable(ffd->changes_cache, estimated_size)) - SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes, - scratch_pool)); - } + SVN_ERR(svn_cache__set(ffd->changes_cache, &key, changes_list, + scratch_pool)); } + } - SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); + if (found) + { + /* Return the block as a "proper" APR array. */ + (*changes) = apr_array_make(result_pool, 0, sizeof(void *)); + (*changes)->elts = (char *)changes_list->changes; + (*changes)->nelts = changes_list->count; + (*changes)->nalloc = changes_list->count; + } + + /* Where to look next - if there is more data. */ + context->next += (*changes)->nelts; + context->next_offset = changes_list->end_offset; + context->eol = changes_list->eol; + + /* Close the revision file after we read all data. */ + if (context->eol && context->revision_file) + { + SVN_ERR(svn_fs_fs__close_revision_file(context->revision_file)); + context->revision_file = NULL; } - SVN_ERR(dbg_log_access(fs, rev, changes_offset, *changes, + SVN_ERR(dbg_log_access(context->fs, context->revision, item_index, *changes, SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool)); - svn_pool_destroy(scratch_pool); return SVN_NO_ERROR; } @@ -2942,7 +3206,7 @@ init_rep_state(rep_state_t *rs, rs->start = entry->offset + rs->header_size; rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4; rs->size = entry->size - rep_header->header_size - 7; - rs->ver = 1; + rs->ver = -1; rs->chunk_index = 0; rs->raw_window_cache = ffd->raw_window_cache; rs->window_cache = ffd->txdelta_window_cache; @@ -3000,6 +3264,9 @@ cache_windows(svn_fs_t *fs, apr_pool_t *pool) { apr_pool_t *iterpool = svn_pool_create(pool); + + SVN_ERR(auto_read_diff_version(rs, iterpool)); + while (rs->current < rs->size) { apr_off_t end_offset; @@ -3060,6 +3327,7 @@ cache_windows(svn_fs_t *fs, window.end_offset = rs->current; window.window.len = window_len; window.window.data = buf; + window.ver = rs->ver; /* cache the window now */ SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window, @@ -3181,9 +3449,8 @@ read_rep_header(svn_fs_fs__rep_header_t **rep_header, /* Fetch the representation data (header, txdelta / plain windows) * addressed by ENTRY->ITEM in FS and cache it if caches are enabled. - * Read the data from the already open FILE and the wrapping - * STREAM object. If MAX_OFFSET is not -1, don't read windows that start - * at or beyond that offset. + * Read the data from REV_FILE. If MAX_OFFSET is not -1, don't read + * windows that start at or beyond that offset. * Use SCRATCH_POOL for temporary allocations. */ static svn_error_t * @@ -3191,7 +3458,6 @@ block_read_contents(svn_fs_t *fs, svn_fs_fs__revision_file_t *rev_file, svn_fs_fs__p2l_entry_t* entry, apr_off_t max_offset, - apr_pool_t *result_pool, apr_pool_t *scratch_pool) { pair_cache_key_t header_key = { 0 }; @@ -3201,9 +3467,9 @@ block_read_contents(svn_fs_t *fs, header_key.second = entry->item.number; SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key, - result_pool, scratch_pool)); + scratch_pool, scratch_pool)); SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset, - result_pool, scratch_pool)); + scratch_pool, scratch_pool)); return SVN_NO_ERROR; } @@ -3252,37 +3518,39 @@ read_item(svn_stream_t **stream, _("Low-level checksum mismatch while reading\n" "%s bytes of meta data at offset %s " "for item %s in revision %ld"), - apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size), - apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset), + apr_off_t_toa(pool, entry->size), + apr_off_t_toa(pool, entry->offset), apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number), entry->item.revision); } -/* If not already cached or if MUST_READ is set, read the changed paths - * list addressed by ENTRY in FS and retúrn it in *CHANGES. Cache the - * result if caching is enabled. Read the data from the already open - * FILE and wrapping FILE_STREAM. Use POOL for allocations. +/* If not already cached, read the changed paths list addressed by ENTRY in + * FS and cache it if it has no more than SVN_FS_FS__CHANGES_BLOCK_SIZE + * entries and caching is enabled. Read the data from REV_FILE. + * Allocate temporaries in SCRATCH_POOL. */ static svn_error_t * -block_read_changes(apr_array_header_t **changes, - svn_fs_t *fs, +block_read_changes(svn_fs_t *fs, svn_fs_fs__revision_file_t *rev_file, svn_fs_fs__p2l_entry_t *entry, - svn_boolean_t must_read, - apr_pool_t *result_pool, apr_pool_t *scratch_pool) { fs_fs_data_t *ffd = fs->fsap_data; svn_stream_t *stream; - if (!must_read && !ffd->changes_cache) + apr_array_header_t *changes; + + pair_cache_key_t key; + key.revision = entry->item.revision; + key.second = 0; + + if (!ffd->changes_cache) return SVN_NO_ERROR; /* already in cache? */ - if (!must_read && ffd->changes_cache) + if (ffd->changes_cache) { svn_boolean_t is_cached; - SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, - &entry->item.revision, + SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key, scratch_pool)); if (is_cached) return SVN_NO_ERROR; @@ -3290,22 +3558,40 @@ block_read_changes(apr_array_header_t **changes, SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); - /* read changes from revision file */ - SVN_ERR(svn_fs_fs__read_changes(changes, stream, result_pool, - scratch_pool)); + /* Read changes from revision file. But read just past the first block to + enable us to determine whether the first block already hit the EOL. - /* cache for future reference */ - if (ffd->changes_cache) - SVN_ERR(svn_cache__set(ffd->changes_cache, &entry->item.revision, - *changes, scratch_pool)); + Note: A 100 entries block is already > 10kB on disk. With a 4kB default + disk block size, this function won't even be called for larger + changed paths lists. */ + SVN_ERR(svn_fs_fs__read_changes(&changes, stream, + SVN_FS_FS__CHANGES_BLOCK_SIZE + 1, + scratch_pool, scratch_pool)); + + /* We can only cache small lists that don't need to be split up. + For longer lists, we miss the file offset info for the respective */ + if (changes->nelts <= SVN_FS_FS__CHANGES_BLOCK_SIZE) + { + svn_fs_fs__changes_list_t changes_list; + + /* Construct the info object for the entries block we just read. */ + changes_list.end_offset = entry->size; + changes_list.start_offset = 0; + changes_list.count = changes->nelts; + changes_list.changes = (change_t **)changes->elts; + changes_list.eol = TRUE; + + SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list, + scratch_pool)); + } return SVN_NO_ERROR; } -/* If not already cached or if MUST_READ is set, read the nod revision +/* If not already cached or if MUST_READ is set, read the node revision * addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the - * result if caching is enabled. Read the data from the already open - * FILE and wrapping FILE_STREAM. Use SCRATCH_POOL for temporary allocations. + * result if caching is enabled. Read the data from REV_FILE. Allocate + * *NODEREV_P in RESUSLT_POOL and allocate temporaries in SCRATCH_POOL. */ static svn_error_t * block_read_noderev(node_revision_t **noderev_p, @@ -3341,9 +3627,7 @@ block_read_noderev(node_revision_t **noderev_p, /* read node rev from revision file */ SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream, result_pool, scratch_pool)); - - /* Workaround issue #4031: is-fresh-txn-root in revision files. */ - (*noderev_p)->is_fresh_txn_root = FALSE; + SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool)); if (ffd->node_revision_cache) SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p, @@ -3457,7 +3741,7 @@ block_read(void **result, is_wanted ? -1 : block_start + ffd->block_size, - pool, iterpool)); + iterpool)); break; case SVN_FS_FS__ITEM_TYPE_NODEREV: @@ -3469,10 +3753,8 @@ block_read(void **result, break; case SVN_FS_FS__ITEM_TYPE_CHANGES: - SVN_ERR(block_read_changes((apr_array_header_t **)&item, - fs, revision_file, - entry, is_result, - pool, iterpool)); + SVN_ERR(block_read_changes(fs, revision_file, + entry, iterpool)); break; default: @@ -3485,7 +3767,7 @@ block_read(void **result, /* if we crossed a block boundary, read the remainder of * the last block as well */ offset = entry->offset + entry->size; - if (offset > block_start + ffd->block_size) + if (offset - block_start > ffd->block_size) ++run_count; } } |