summaryrefslogtreecommitdiff
path: root/subversion/libsvn_fs_fs/cached_data.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_fs_fs/cached_data.c')
-rw-r--r--subversion/libsvn_fs_fs/cached_data.c648
1 files changed, 465 insertions, 183 deletions
diff --git a/subversion/libsvn_fs_fs/cached_data.c b/subversion/libsvn_fs_fs/cached_data.c
index 6581a6c8831c6..f8fa2d05bfd68 100644
--- a/subversion/libsvn_fs_fs/cached_data.c
+++ b/subversion/libsvn_fs_fs/cached_data.c
@@ -57,7 +57,7 @@ block_read(void **result,
apr_pool_t *scratch_pool);
-/* Defined this to enable access logging via dgb__log_access
+/* Define this to enable access logging via dbg_log_access
#define SVN_FS_FS__LOG_ACCESS
*/
@@ -91,7 +91,7 @@ dbg_log_access(svn_fs_t *fs,
svn_fs_fs__revision_file_t *rev_file;
SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
- scratch_pool));
+ scratch_pool, scratch_pool));
/* determine rev / pack file offset */
SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL,
@@ -158,7 +158,8 @@ dbg_log_access(svn_fs_t *fs,
{
/* reverse index lookup: get item description in ENTRY */
SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision,
- offset, scratch_pool));
+ offset, scratch_pool,
+ scratch_pool));
if (entry)
{
/* more details */
@@ -183,6 +184,10 @@ dbg_log_access(svn_fs_t *fs,
description);
}
+ /* We don't know when SCRATCH_POOL will be cleared, so close the rev file
+ explicitly. */
+ SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
+
#endif
return SVN_NO_ERROR;
@@ -286,6 +291,114 @@ use_block_read(svn_fs_t *fs)
return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read;
}
+svn_error_t *
+svn_fs_fs__fixup_expanded_size(svn_fs_t *fs,
+ representation_t *rep,
+ apr_pool_t *scratch_pool)
+{
+ svn_checksum_t checksum;
+ svn_checksum_t *empty_md5;
+ svn_fs_fs__revision_file_t *revision_file;
+ svn_fs_fs__rep_header_t *rep_header;
+
+ /* Anything to do at all?
+ *
+ * Note that a 0 SIZE is only possible for PLAIN reps due to the SVN\1
+ * magic prefix in any DELTA rep. */
+ if (!rep || rep->expanded_size != 0 || rep->size == 0)
+ return SVN_NO_ERROR;
+
+ /* This function may only be called for committed data. */
+ assert(!svn_fs_fs__id_txn_used(&rep->txn_id));
+
+ /* EXPANDED_SIZE is 0. If the MD5 does not match the one for empty
+ * contents, we know that EXPANDED_SIZE == 0 is wrong and needs to
+ * be set to the actual value given by SIZE.
+ *
+ * Using svn_checksum_match() will also accept all-zero values for
+ * the MD5 digest and only report a mismatch if the MD5 has actually
+ * been given. */
+ empty_md5 = svn_checksum_empty_checksum(svn_checksum_md5, scratch_pool);
+
+ checksum.digest = rep->md5_digest;
+ checksum.kind = svn_checksum_md5;
+ if (!svn_checksum_match(empty_md5, &checksum))
+ {
+ rep->expanded_size = rep->size;
+ return SVN_NO_ERROR;
+ }
+
+ /* Data in the rep-cache.db does not have MD5 checksums (all zero) on it.
+ * Compare SHA1 instead. */
+ if (rep->has_sha1)
+ {
+ svn_checksum_t *empty_sha1
+ = svn_checksum_empty_checksum(svn_checksum_sha1, scratch_pool);
+
+ checksum.digest = rep->sha1_digest;
+ checksum.kind = svn_checksum_sha1;
+ if (!svn_checksum_match(empty_sha1, &checksum))
+ {
+ rep->expanded_size = rep->size;
+ return SVN_NO_ERROR;
+ }
+ }
+
+ /* Only two cases are left here.
+ * (1) A non-empty PLAIN rep with a MD5 collision on EMPTY_MD5.
+ * (2) A DELTA rep with zero-length output. */
+
+ /* SVN always stores a DELTA rep with zero-length output as an empty
+ * sequence of txdelta windows, i.e. as "SVN\1". In that case, SIZE is
+ * 4 bytes. There is no other possible DELTA rep of that size and any
+ * PLAIN rep of 4 bytes would produce a different MD5. Hence, if SIZE is
+ * actually 4 here, we know that this is an empty DELTA rep.
+ *
+ * Note that it is technically legal to have DELTA reps with a 0 length
+ * output window. Their on-disk size would be longer. We handle that
+ * case later together with the equally unlikely MD5 collision. */
+ if (rep->size == 4)
+ {
+ /* EXPANDED_SIZE is already 0. */
+ return SVN_NO_ERROR;
+ }
+
+ /* We still have the two options, PLAIN or DELTA rep. At this point, we
+ * are in an extremely unlikely case and can spend some time to figure it
+ * out. So, let's just look at the representation header. */
+ SVN_ERR(open_and_seek_revision(&revision_file, fs, rep->revision,
+ rep->item_index, scratch_pool));
+ SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, revision_file->stream,
+ scratch_pool, scratch_pool));
+ SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
+
+ /* Only for PLAIN reps do we have to correct EXPANDED_SIZE. */
+ if (rep_header->type == svn_fs_fs__rep_plain)
+ rep->expanded_size = rep->size;
+
+ return SVN_NO_ERROR;
+}
+
+/* Correct known issues with committed NODEREV in FS.
+ * Uses SCRATCH_POOL for temporaries.
+ */
+static svn_error_t *
+fixup_node_revision(svn_fs_t *fs,
+ node_revision_t *noderev,
+ apr_pool_t *scratch_pool)
+{
+ /* Workaround issue #4031: is-fresh-txn-root in revision files. */
+ noderev->is_fresh_txn_root = FALSE;
+
+ /* Make sure EXPANDED_SIZE has the correct value for every rep. */
+ SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->data_rep,
+ scratch_pool));
+ SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->prop_rep,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
/* Get the node-revision for the node ID in FS.
Set *NODEREV_P to the new node-revision structure, allocated in POOL.
See svn_fs_fs__get_node_revision, which wraps this and adds another
@@ -312,14 +425,13 @@ get_node_revision_body(node_revision_t **noderev_p,
scratch_pool),
APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
scratch_pool);
- if (err)
+ if (err && APR_STATUS_IS_ENOENT(err->apr_err))
+ {
+ svn_error_clear(err);
+ return svn_error_trace(err_dangling_id(fs, id));
+ }
+ else if (err)
{
- if (APR_STATUS_IS_ENOENT(err->apr_err))
- {
- svn_error_clear(err);
- return svn_error_trace(err_dangling_id(fs, id));
- }
-
return svn_error_trace(err);
}
@@ -376,9 +488,7 @@ get_node_revision_body(node_revision_t **noderev_p,
revision_file->stream,
result_pool,
scratch_pool));
-
- /* Workaround issue #4031: is-fresh-txn-root in revision files. */
- (*noderev_p)->is_fresh_txn_root = FALSE;
+ SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
/* The noderev is not in cache, yet. Add it, if caching has been enabled. */
if (ffd->node_revision_cache)
@@ -636,15 +746,15 @@ typedef struct rep_state_t
int chunk_index; /* number of the window to read */
} rep_state_t;
-/* Simple wrapper around svn_fs_fs__get_file_offset to simplify callers. */
+/* Simple wrapper around svn_io_file_get_offset to simplify callers. */
static svn_error_t *
get_file_offset(apr_off_t *offset,
rep_state_t *rs,
apr_pool_t *pool)
{
- return svn_error_trace(svn_fs_fs__get_file_offset(offset,
- rs->sfile->rfile->file,
- pool));
+ return svn_error_trace(svn_io_file_get_offset(offset,
+ rs->sfile->rfile->file,
+ pool));
}
/* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
@@ -756,7 +866,7 @@ create_rep_state_body(rep_state_t **rep_state,
rs->size = rep->size;
rs->revision = rep->revision;
rs->item_index = rep->item_index;
- rs->raw_window_cache = ffd->raw_window_cache;
+ rs->raw_window_cache = use_block_read(fs) ? ffd->raw_window_cache : NULL;
rs->ver = -1;
rs->start = -1;
@@ -765,9 +875,7 @@ create_rep_state_body(rep_state_t **rep_state,
Since we don't know the depth of the delta chain, let's assume, the
whole contents get rewritten 3 times.
*/
- estimated_window_storage
- = 4 * ( (rep->expanded_size ? rep->expanded_size : rep->size)
- + SVN_DELTA_WINDOW_SIZE);
+ estimated_window_storage = 4 * (rep->expanded_size + SVN_DELTA_WINDOW_SIZE);
estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
rs->window_cache = ffd->txdelta_window_cache
@@ -1160,7 +1268,7 @@ parse_raw_window(void **out,
stream = svn_stream_from_string(&raw_window, result_pool);
/* parse it */
- SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, 1,
+ SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, window->ver,
result_pool));
/* complete the window and return it */
@@ -1322,15 +1430,11 @@ set_cached_combined_window(svn_stringbuf_t *window,
ID, and representation REP.
Also, set *WINDOW_P to the base window content for *LIST, if it
could be found in cache. Otherwise, *LIST will contain the base
- representation for the whole delta chain.
- Finally, return the expanded size of the representation in
- *EXPANDED_SIZE. It will take care of cases where only the on-disk
- size is known. */
+ representation for the whole delta chain. */
static svn_error_t *
build_rep_list(apr_array_header_t **list,
svn_stringbuf_t **window_p,
rep_state_t **src_state,
- svn_filesize_t *expanded_size,
svn_fs_t *fs,
representation_t *first_rep,
apr_pool_t *pool)
@@ -1345,24 +1449,9 @@ build_rep_list(apr_array_header_t **list,
*list = apr_array_make(pool, 1, sizeof(rep_state_t *));
rep = *first_rep;
- /* The value as stored in the data struct.
- 0 is either for unknown length or actually zero length. */
- *expanded_size = first_rep->expanded_size;
-
/* for the top-level rep, we need the rep_args */
SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool,
iterpool));
-
- /* Unknown size or empty representation?
- That implies the this being the first iteration.
- Usually size equals on-disk size, except for empty,
- compressed representations (delta, size = 4).
- Please note that for all non-empty deltas have
- a 4-byte header _plus_ some data. */
- if (*expanded_size == 0)
- if (rep_header->type == svn_fs_fs__rep_plain || first_rep->size != 4)
- *expanded_size = first_rep->size;
-
while (1)
{
svn_pool_clear(iterpool);
@@ -1373,7 +1462,8 @@ build_rep_list(apr_array_header_t **list,
&rep, fs, pool, iterpool));
/* for txn reps, there won't be a cached combined window */
- if (!svn_fs_fs__id_txn_used(&rep.txn_id))
+ if ( !svn_fs_fs__id_txn_used(&rep.txn_id)
+ && rep.expanded_size < SVN_DELTA_WINDOW_SIZE)
SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool));
if (is_cached)
@@ -1686,7 +1776,7 @@ get_combined_window(svn_stringbuf_t **result,
}
/* Returns whether or not the expanded fulltext of the file is cachable
- * based on its size SIZE. The decision depends on the cache used by RB.
+ * based on its size SIZE. The decision depends on the cache used by FFD.
*/
static svn_boolean_t
fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size)
@@ -1732,10 +1822,10 @@ get_contents_from_windows(struct rep_read_baton *rb,
This is where we need the pseudo rep_state created
by build_rep_list(). */
apr_size_t offset = (apr_size_t)rs->current;
- if (copy_len + offset > rb->base_window->len)
- copy_len = offset < rb->base_window->len
- ? rb->base_window->len - offset
- : 0ul;
+ if (offset >= rb->base_window->len)
+ copy_len = 0ul;
+ else if (copy_len > rb->base_window->len - offset)
+ copy_len = rb->base_window->len - offset;
memcpy (cur, rb->base_window->data + offset, copy_len);
}
@@ -1969,11 +2059,21 @@ skip_contents(struct rep_read_baton *baton,
len -= to_read;
buffer += to_read;
}
+
+ /* Make the MD5 calculation catch up with the data delivered
+ * (we did not run MD5 on the data that we took from the cache). */
+ if (!err)
+ {
+ SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
+ baton->current_fulltext->data,
+ baton->current_fulltext->len));
+ baton->off += baton->current_fulltext->len;
+ }
}
else if (len > 0)
{
/* Simply drain LEN bytes from the window stream. */
- apr_pool_t *subpool = subpool = svn_pool_create(baton->pool);
+ apr_pool_t *subpool = svn_pool_create(baton->pool);
char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
while (len > 0 && !err)
@@ -1984,6 +2084,15 @@ skip_contents(struct rep_read_baton *baton,
err = get_contents_from_windows(baton, buffer, &to_read);
len -= to_read;
+
+ /* Make the MD5 calculation catch up with the data delivered
+ * (we did not run MD5 on the data that we took from the cache). */
+ if (!err)
+ {
+ SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
+ buffer, to_read));
+ baton->off += to_read;
+ }
}
svn_pool_destroy(subpool);
@@ -2019,8 +2128,9 @@ rep_read_contents(void *baton,
if (!rb->rs_list)
{
/* Window stream not initialized, yet. Do it now. */
+ rb->len = rb->rep.expanded_size;
SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
- &rb->src_state, &rb->len, rb->fs, &rb->rep,
+ &rb->src_state, rb->fs, &rb->rep,
rb->filehandle_pool));
/* In case we did read from the fulltext cache before, make the
@@ -2092,7 +2202,6 @@ svn_fs_fs__get_contents(svn_stream_t **contents_p,
else
{
fs_fs_data_t *ffd = fs->fsap_data;
- svn_filesize_t len = rep->expanded_size ? rep->expanded_size : rep->size;
struct rep_read_baton *rb;
pair_cache_key_t fulltext_cache_key = { 0 };
@@ -2108,7 +2217,7 @@ svn_fs_fs__get_contents(svn_stream_t **contents_p,
* cache it. */
if (ffd->fulltext_cache && cache_fulltext
&& SVN_IS_VALID_REVNUM(rep->revision)
- && fulltext_size_is_cachable(ffd, len))
+ && fulltext_size_is_cachable(ffd, rep->expanded_size))
{
rb->fulltext_cache = ffd->fulltext_cache;
}
@@ -2202,7 +2311,7 @@ svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
svn_fs_fs__id_txn_reset(&next_rep.txn_id);
SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
- &rb->src_state, &rb->len, rb->fs, &next_rep,
+ &rb->src_state, rb->fs, &next_rep,
rb->filehandle_pool));
/* Insert the access to REP as the first element of the delta chain. */
@@ -2447,12 +2556,12 @@ compare_dirent_name(const void *a, const void *b)
return strcmp(lhs->name, rhs);
}
-/* Into ENTRIES, read all directories entries from the key-value text in
+/* Into *ENTRIES_P, read all directories entries from the key-value text in
* STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and
* update the data. ID is provided for nicer error messages.
*/
static svn_error_t *
-read_dir_entries(apr_array_header_t *entries,
+read_dir_entries(apr_array_header_t **entries_p,
svn_stream_t *stream,
svn_boolean_t incremental,
const svn_fs_id_t *id,
@@ -2460,8 +2569,14 @@ read_dir_entries(apr_array_header_t *entries,
apr_pool_t *scratch_pool)
{
apr_pool_t *iterpool = svn_pool_create(scratch_pool);
- apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
+ apr_hash_t *hash = NULL;
const char *terminator = SVN_HASH_TERMINATOR;
+ apr_array_header_t *entries = NULL;
+
+ if (incremental)
+ hash = svn_hash__make(scratch_pool);
+ else
+ entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
/* Read until the terminator (non-incremental) or the end of STREAM
(incremental mode). In the latter mode, we use a temporary HASH
@@ -2473,8 +2588,11 @@ read_dir_entries(apr_array_header_t *entries,
char *str;
svn_pool_clear(iterpool);
- SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
- incremental, iterpool));
+ SVN_ERR_W(svn_hash__read_entry(&entry, stream, terminator,
+ incremental, iterpool),
+ apr_psprintf(iterpool,
+ _("Directory representation corrupt in '%s'"),
+ svn_fs_fs__id_unparse(id, scratch_pool)->data));
/* End of directory? */
if (entry.key == NULL)
@@ -2542,6 +2660,9 @@ read_dir_entries(apr_array_header_t *entries,
if (incremental)
{
apr_hash_index_t *hi;
+
+ entries = apr_array_make(result_pool, apr_hash_count(hash),
+ sizeof(svn_fs_dirent_t *));
for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi);
}
@@ -2551,14 +2672,45 @@ read_dir_entries(apr_array_header_t *entries,
svn_pool_destroy(iterpool);
+ *entries_p = entries;
return SVN_NO_ERROR;
}
-/* Fetch the contents of a directory into ENTRIES. Values are stored
+/* For directory NODEREV in FS, return the *FILESIZE of its in-txn
+ * representation. If the directory representation is comitted data,
+ * set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries.
+ */
+static svn_error_t *
+get_txn_dir_info(svn_filesize_t *filesize,
+ svn_fs_t *fs,
+ node_revision_t *noderev,
+ apr_pool_t *scratch_pool)
+{
+ if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
+ {
+ const svn_io_dirent2_t *dirent;
+ const char *filename;
+
+ filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
+ scratch_pool);
+
+ SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE,
+ scratch_pool, scratch_pool));
+ *filesize = dirent->filesize;
+ }
+ else
+ {
+ *filesize = SVN_INVALID_FILESIZE;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Fetch the contents of a directory into DIR. Values are stored
as filename to string mappings; further conversion is necessary to
convert them into svn_fs_dirent_t values. */
static svn_error_t *
-get_dir_contents(apr_array_header_t **entries,
+get_dir_contents(svn_fs_fs__dir_data_t *dir,
svn_fs_t *fs,
node_revision_t *noderev,
apr_pool_t *result_pool,
@@ -2566,18 +2718,30 @@ get_dir_contents(apr_array_header_t **entries,
{
svn_stream_t *contents;
- *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
+ /* Initialize the result. */
+ dir->txn_filesize = SVN_INVALID_FILESIZE;
+
+ /* Read dir contents - unless there is none in which case we are done. */
if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
{
- const char *filename
- = svn_fs_fs__path_txn_node_children(fs, noderev->id, scratch_pool);
+ /* Get location & current size of the directory representation. */
+ const char *filename;
+ apr_file_t *file;
+
+ filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
+ scratch_pool);
/* The representation is mutable. Read the old directory
contents from the mutable children file, followed by the
changes we've made in this transaction. */
- SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool,
- scratch_pool));
- SVN_ERR(read_dir_entries(*entries, contents, TRUE, noderev->id,
+ SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED,
+ APR_OS_DEFAULT, scratch_pool));
+
+ /* Obtain txn children file size. */
+ SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
+
+ contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
+ SVN_ERR(read_dir_entries(&dir->entries, contents, TRUE, noderev->id,
result_pool, scratch_pool));
SVN_ERR(svn_stream_close(contents));
}
@@ -2586,9 +2750,7 @@ get_dir_contents(apr_array_header_t **entries,
/* Undeltify content before parsing it. Otherwise, we could only
* parse it byte-by-byte.
*/
- apr_size_t len = noderev->data_rep->expanded_size
- ? (apr_size_t)noderev->data_rep->expanded_size
- : (apr_size_t)noderev->data_rep->size;
+ apr_size_t len = noderev->data_rep->expanded_size;
svn_stringbuf_t *text;
/* The representation is immutable. Read it normally. */
@@ -2599,9 +2761,13 @@ get_dir_contents(apr_array_header_t **entries,
/* de-serialize hash */
contents = svn_stream_from_stringbuf(text, scratch_pool);
- SVN_ERR(read_dir_entries(*entries, contents, FALSE, noderev->id,
+ SVN_ERR(read_dir_entries(&dir->entries, contents, FALSE, noderev->id,
result_pool, scratch_pool));
}
+ else
+ {
+ dir->entries = apr_array_make(result_pool, 0, sizeof(svn_fs_dirent_t *));
+ }
return SVN_NO_ERROR;
}
@@ -2620,27 +2786,27 @@ locate_dir_cache(svn_fs_t *fs,
apr_pool_t *pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
- if (svn_fs_fs__id_is_txn(noderev->id))
+ if (!noderev->data_rep)
+ {
+ /* no data rep -> empty directory.
+ A NULL key causes a cache miss. */
+ *key = NULL;
+ return ffd->dir_cache;
+ }
+
+ if (svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
{
/* data in txns requires the expensive fs_id-based addressing mode */
*key = svn_fs_fs__id_unparse(noderev->id, pool)->data;
+
return ffd->txn_dir_cache;
}
else
{
/* committed data can use simple rev,item pairs */
- if (noderev->data_rep)
- {
- pair_key->revision = noderev->data_rep->revision;
- pair_key->second = noderev->data_rep->item_index;
- *key = pair_key;
- }
- else
- {
- /* no data rep -> empty directory.
- A NULL key causes a cache miss. */
- *key = NULL;
- }
+ pair_key->revision = noderev->data_rep->revision;
+ pair_key->second = noderev->data_rep->item_index;
+ *key = pair_key;
return ffd->dir_cache;
}
@@ -2655,6 +2821,7 @@ svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
{
pair_cache_key_t pair_key = { 0 };
const void *key;
+ svn_fs_fs__dir_data_t *dir;
/* find the cache we may use */
svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
@@ -2663,23 +2830,36 @@ svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
{
svn_boolean_t found;
- SVN_ERR(svn_cache__get((void **)entries_p, &found, cache, key,
+ SVN_ERR(svn_cache__get((void **)&dir, &found, cache, key,
result_pool));
if (found)
- return SVN_NO_ERROR;
+ {
+ /* Verify that the cached dir info is not stale
+ * (no-op for committed data). */
+ svn_filesize_t filesize;
+ SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
+
+ if (filesize == dir->txn_filesize)
+ {
+ /* Still valid. Done. */
+ *entries_p = dir->entries;
+ return SVN_NO_ERROR;
+ }
+ }
}
/* Read in the directory contents. */
- SVN_ERR(get_dir_contents(entries_p, fs, noderev, result_pool,
- scratch_pool));
+ dir = apr_pcalloc(scratch_pool, sizeof(*dir));
+ SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool));
+ *entries_p = dir->entries;
/* Update the cache, if we are to use one.
*
* Don't even attempt to serialize very large directories; it would cause
* an unnecessary memory allocation peak. 150 bytes/entry is about right.
*/
- if (cache && svn_cache__is_cachable(cache, 150 * (*entries_p)->nelts))
- SVN_ERR(svn_cache__set(cache, key, *entries_p, scratch_pool));
+ if (cache && svn_cache__is_cachable(cache, 150 * dir->entries->nelts))
+ SVN_ERR(svn_cache__set(cache, key, dir, scratch_pool));
return SVN_NO_ERROR;
}
@@ -2702,6 +2882,7 @@ svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
+ extract_dir_entry_baton_t baton;
svn_boolean_t found = FALSE;
/* find the cache we may use */
@@ -2711,30 +2892,42 @@ svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
scratch_pool);
if (cache)
{
+ svn_filesize_t filesize;
+ SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
+
/* Cache lookup. */
+ baton.txn_filesize = filesize;
+ baton.name = name;
SVN_ERR(svn_cache__get_partial((void **)dirent,
&found,
cache,
key,
svn_fs_fs__extract_dir_entry,
- (void*)name,
+ &baton,
result_pool));
}
/* fetch data from disk if we did not find it in the cache */
- if (! found)
+ if (! found || baton.out_of_date)
{
- apr_array_header_t *entries;
svn_fs_dirent_t *entry;
svn_fs_dirent_t *entry_copy = NULL;
+ svn_fs_fs__dir_data_t dir;
- /* read the dir from the file system. It will probably be put it
- into the cache for faster lookup in future calls. */
- SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev,
- scratch_pool, scratch_pool));
+ /* Read in the directory contents. */
+ SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool,
+ scratch_pool));
+
+ /* Update the cache, if we are to use one.
+ *
+ * Don't even attempt to serialize very large directories; it would
+ * cause an unnecessary memory allocation peak. 150 bytes / entry is
+ * about right. */
+ if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts))
+ SVN_ERR(svn_cache__set(cache, key, &dir, scratch_pool));
/* find desired entry and return a copy in POOL, if found */
- entry = svn_fs_fs__find_dir_entry(entries, name, NULL);
+ entry = svn_fs_fs__find_dir_entry(dir.entries, name, NULL);
if (entry)
{
entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
@@ -2771,7 +2964,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
{
svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
- svn_error_clear(svn_stream_close(stream));
+ err = svn_error_compose_create(err, svn_stream_close(stream));
return svn_error_quick_wrapf(err,
_("malformed property list for node-revision '%s' in '%s'"),
id_str->data, filename);
@@ -2803,8 +2996,8 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
if (err)
{
svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
-
- svn_error_clear(svn_stream_close(stream));
+
+ err = svn_error_compose_create(err, svn_stream_close(stream));
return svn_error_quick_wrapf(err,
_("malformed property list for node-revision '%s'"),
id_str->data);
@@ -2826,23 +3019,42 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
}
svn_error_t *
+svn_fs_fs__create_changes_context(svn_fs_fs__changes_context_t **context,
+ svn_fs_t *fs,
+ svn_revnum_t rev,
+ apr_pool_t *result_pool)
+{
+ svn_fs_fs__changes_context_t *result = apr_pcalloc(result_pool,
+ sizeof(*result));
+ result->fs = fs;
+ result->revision = rev;
+ result->rev_file_pool = result_pool;
+
+ *context = result;
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
svn_fs_fs__get_changes(apr_array_header_t **changes,
- svn_fs_t *fs,
- svn_revnum_t rev,
- apr_pool_t *result_pool)
+ svn_fs_fs__changes_context_t *context,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
{
- apr_off_t changes_offset = SVN_FS_FS__ITEM_INDEX_CHANGES;
- svn_fs_fs__revision_file_t *revision_file;
+ apr_off_t item_index = SVN_FS_FS__ITEM_INDEX_CHANGES;
svn_boolean_t found;
- fs_fs_data_t *ffd = fs->fsap_data;
- apr_pool_t *scratch_pool = svn_pool_create(result_pool);
+ fs_fs_data_t *ffd = context->fs->fsap_data;
+ svn_fs_fs__changes_list_t *changes_list;
+
+ pair_cache_key_t key;
+ key.revision = context->revision;
+ key.second = context->next;
/* try cache lookup first */
if (ffd->changes_cache)
{
- SVN_ERR(svn_cache__get((void **) changes, &found, ffd->changes_cache,
- &rev, result_pool));
+ SVN_ERR(svn_cache__get((void **)&changes_list, &found,
+ ffd->changes_cache, &key, result_pool));
}
else
{
@@ -2853,61 +3065,113 @@ svn_fs_fs__get_changes(apr_array_header_t **changes,
{
/* read changes from revision file */
- SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
- SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
- scratch_pool, scratch_pool));
+ if (!context->revision_file)
+ {
+ SVN_ERR(svn_fs_fs__ensure_revision_exists(context->revision,
+ context->fs,
+ scratch_pool));
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&context->revision_file,
+ context->fs,
+ context->revision,
+ context->rev_file_pool,
+ scratch_pool));
+ }
- if (use_block_read(fs))
+ if (use_block_read(context->fs))
{
- /* 'block-read' will also provide us with the desired data */
- SVN_ERR(block_read((void **)changes, fs,
- rev, SVN_FS_FS__ITEM_INDEX_CHANGES,
- revision_file, result_pool, scratch_pool));
+ /* 'block-read' will probably populate the cache with the data
+ * that we want. However, we won't want to force it to process
+ * very large change lists as part of this prefetching mechanism.
+ * Those would be better handled by the iterative code below. */
+ SVN_ERR(block_read(NULL, context->fs,
+ context->revision, SVN_FS_FS__ITEM_INDEX_CHANGES,
+ context->revision_file, scratch_pool,
+ scratch_pool));
+
+ /* This may succeed now ... */
+ SVN_ERR(svn_cache__get((void **)&changes_list, &found,
+ ffd->changes_cache, &key, result_pool));
}
- else
+
+ /* If we still have no data, read it here. */
+ if (!found)
{
+ apr_off_t changes_offset;
+
/* Addressing is very different for old formats
* (needs to read the revision trailer). */
- if (svn_fs_fs__use_log_addressing(fs))
- SVN_ERR(svn_fs_fs__item_offset(&changes_offset, fs,
- revision_file, rev, NULL,
- SVN_FS_FS__ITEM_INDEX_CHANGES,
- scratch_pool));
+ if (svn_fs_fs__use_log_addressing(context->fs))
+ {
+ SVN_ERR(svn_fs_fs__item_offset(&changes_offset, context->fs,
+ context->revision_file,
+ context->revision, NULL,
+ SVN_FS_FS__ITEM_INDEX_CHANGES,
+ scratch_pool));
+ }
else
- SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
- revision_file, fs, rev,
- scratch_pool));
+ {
+ SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
+ context->revision_file,
+ context->fs, context->revision,
+ scratch_pool));
+
+ /* This variable will be used for debug logging only. */
+ item_index = changes_offset;
+ }
/* Actual reading and parsing are the same, though. */
- SVN_ERR(aligned_seek(fs, revision_file->file, NULL, changes_offset,
+ SVN_ERR(aligned_seek(context->fs, context->revision_file->file,
+ NULL, changes_offset + context->next_offset,
scratch_pool));
- SVN_ERR(svn_fs_fs__read_changes(changes, revision_file->stream,
+
+ SVN_ERR(svn_fs_fs__read_changes(changes,
+ context->revision_file->stream,
+ SVN_FS_FS__CHANGES_BLOCK_SIZE,
result_pool, scratch_pool));
+ /* Construct the info object for the entries block we just read. */
+ changes_list = apr_pcalloc(scratch_pool, sizeof(*changes_list));
+ SVN_ERR(svn_io_file_get_offset(&changes_list->end_offset,
+ context->revision_file->file,
+ scratch_pool));
+ changes_list->end_offset -= changes_offset;
+ changes_list->start_offset = context->next_offset;
+ changes_list->count = (*changes)->nelts;
+ changes_list->changes = (change_t **)(*changes)->elts;
+ changes_list->eol = changes_list->count < SVN_FS_FS__CHANGES_BLOCK_SIZE;
+
/* cache for future reference */
if (ffd->changes_cache)
- {
- /* Guesstimate for the size of the in-cache representation. */
- apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts;
-
- /* Don't even serialize data that probably won't fit into the
- * cache. This often implies that either CHANGES is very
- * large, memory is scarce or both. Having a huge temporary
- * copy would not be a good thing in either case. */
- if (svn_cache__is_cachable(ffd->changes_cache, estimated_size))
- SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes,
- scratch_pool));
- }
+ SVN_ERR(svn_cache__set(ffd->changes_cache, &key, changes_list,
+ scratch_pool));
}
+ }
- SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
+ if (found)
+ {
+ /* Return the block as a "proper" APR array. */
+ (*changes) = apr_array_make(result_pool, 0, sizeof(void *));
+ (*changes)->elts = (char *)changes_list->changes;
+ (*changes)->nelts = changes_list->count;
+ (*changes)->nalloc = changes_list->count;
+ }
+
+ /* Where to look next - if there is more data. */
+ context->next += (*changes)->nelts;
+ context->next_offset = changes_list->end_offset;
+ context->eol = changes_list->eol;
+
+ /* Close the revision file after we read all data. */
+ if (context->eol && context->revision_file)
+ {
+ SVN_ERR(svn_fs_fs__close_revision_file(context->revision_file));
+ context->revision_file = NULL;
}
- SVN_ERR(dbg_log_access(fs, rev, changes_offset, *changes,
+ SVN_ERR(dbg_log_access(context->fs, context->revision, item_index, *changes,
SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool));
- svn_pool_destroy(scratch_pool);
return SVN_NO_ERROR;
}
@@ -2942,7 +3206,7 @@ init_rep_state(rep_state_t *rs,
rs->start = entry->offset + rs->header_size;
rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4;
rs->size = entry->size - rep_header->header_size - 7;
- rs->ver = 1;
+ rs->ver = -1;
rs->chunk_index = 0;
rs->raw_window_cache = ffd->raw_window_cache;
rs->window_cache = ffd->txdelta_window_cache;
@@ -3000,6 +3264,9 @@ cache_windows(svn_fs_t *fs,
apr_pool_t *pool)
{
apr_pool_t *iterpool = svn_pool_create(pool);
+
+ SVN_ERR(auto_read_diff_version(rs, iterpool));
+
while (rs->current < rs->size)
{
apr_off_t end_offset;
@@ -3060,6 +3327,7 @@ cache_windows(svn_fs_t *fs,
window.end_offset = rs->current;
window.window.len = window_len;
window.window.data = buf;
+ window.ver = rs->ver;
/* cache the window now */
SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window,
@@ -3181,9 +3449,8 @@ read_rep_header(svn_fs_fs__rep_header_t **rep_header,
/* Fetch the representation data (header, txdelta / plain windows)
* addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
- * Read the data from the already open FILE and the wrapping
- * STREAM object. If MAX_OFFSET is not -1, don't read windows that start
- * at or beyond that offset.
+ * Read the data from REV_FILE. If MAX_OFFSET is not -1, don't read
+ * windows that start at or beyond that offset.
* Use SCRATCH_POOL for temporary allocations.
*/
static svn_error_t *
@@ -3191,7 +3458,6 @@ block_read_contents(svn_fs_t *fs,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_fs__p2l_entry_t* entry,
apr_off_t max_offset,
- apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
pair_cache_key_t header_key = { 0 };
@@ -3201,9 +3467,9 @@ block_read_contents(svn_fs_t *fs,
header_key.second = entry->item.number;
SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
- result_pool, scratch_pool));
+ scratch_pool, scratch_pool));
SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset,
- result_pool, scratch_pool));
+ scratch_pool, scratch_pool));
return SVN_NO_ERROR;
}
@@ -3252,37 +3518,39 @@ read_item(svn_stream_t **stream,
_("Low-level checksum mismatch while reading\n"
"%s bytes of meta data at offset %s "
"for item %s in revision %ld"),
- apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size),
- apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset),
+ apr_off_t_toa(pool, entry->size),
+ apr_off_t_toa(pool, entry->offset),
apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number),
entry->item.revision);
}
-/* If not already cached or if MUST_READ is set, read the changed paths
- * list addressed by ENTRY in FS and retúrn it in *CHANGES. Cache the
- * result if caching is enabled. Read the data from the already open
- * FILE and wrapping FILE_STREAM. Use POOL for allocations.
+/* If not already cached, read the changed paths list addressed by ENTRY in
+ * FS and cache it if it has no more than SVN_FS_FS__CHANGES_BLOCK_SIZE
+ * entries and caching is enabled. Read the data from REV_FILE.
+ * Allocate temporaries in SCRATCH_POOL.
*/
static svn_error_t *
-block_read_changes(apr_array_header_t **changes,
- svn_fs_t *fs,
+block_read_changes(svn_fs_t *fs,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_fs__p2l_entry_t *entry,
- svn_boolean_t must_read,
- apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
svn_stream_t *stream;
- if (!must_read && !ffd->changes_cache)
+ apr_array_header_t *changes;
+
+ pair_cache_key_t key;
+ key.revision = entry->item.revision;
+ key.second = 0;
+
+ if (!ffd->changes_cache)
return SVN_NO_ERROR;
/* already in cache? */
- if (!must_read && ffd->changes_cache)
+ if (ffd->changes_cache)
{
svn_boolean_t is_cached;
- SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache,
- &entry->item.revision,
+ SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key,
scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
@@ -3290,22 +3558,40 @@ block_read_changes(apr_array_header_t **changes,
SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
- /* read changes from revision file */
- SVN_ERR(svn_fs_fs__read_changes(changes, stream, result_pool,
- scratch_pool));
+ /* Read changes from revision file. But read just past the first block to
+ enable us to determine whether the first block already hit the EOL.
- /* cache for future reference */
- if (ffd->changes_cache)
- SVN_ERR(svn_cache__set(ffd->changes_cache, &entry->item.revision,
- *changes, scratch_pool));
+ Note: A 100 entries block is already > 10kB on disk. With a 4kB default
+ disk block size, this function won't even be called for larger
+ changed paths lists. */
+ SVN_ERR(svn_fs_fs__read_changes(&changes, stream,
+ SVN_FS_FS__CHANGES_BLOCK_SIZE + 1,
+ scratch_pool, scratch_pool));
+
+ /* We can only cache small lists that don't need to be split up.
+ For longer lists, we miss the file offset info for the respective */
+ if (changes->nelts <= SVN_FS_FS__CHANGES_BLOCK_SIZE)
+ {
+ svn_fs_fs__changes_list_t changes_list;
+
+ /* Construct the info object for the entries block we just read. */
+ changes_list.end_offset = entry->size;
+ changes_list.start_offset = 0;
+ changes_list.count = changes->nelts;
+ changes_list.changes = (change_t **)changes->elts;
+ changes_list.eol = TRUE;
+
+ SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list,
+ scratch_pool));
+ }
return SVN_NO_ERROR;
}
-/* If not already cached or if MUST_READ is set, read the nod revision
+/* If not already cached or if MUST_READ is set, read the node revision
* addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the
- * result if caching is enabled. Read the data from the already open
- * FILE and wrapping FILE_STREAM. Use SCRATCH_POOL for temporary allocations.
+ * result if caching is enabled. Read the data from REV_FILE. Allocate
+ * *NODEREV_P in RESUSLT_POOL and allocate temporaries in SCRATCH_POOL.
*/
static svn_error_t *
block_read_noderev(node_revision_t **noderev_p,
@@ -3341,9 +3627,7 @@ block_read_noderev(node_revision_t **noderev_p,
/* read node rev from revision file */
SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream,
result_pool, scratch_pool));
-
- /* Workaround issue #4031: is-fresh-txn-root in revision files. */
- (*noderev_p)->is_fresh_txn_root = FALSE;
+ SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
if (ffd->node_revision_cache)
SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p,
@@ -3457,7 +3741,7 @@ block_read(void **result,
is_wanted
? -1
: block_start + ffd->block_size,
- pool, iterpool));
+ iterpool));
break;
case SVN_FS_FS__ITEM_TYPE_NODEREV:
@@ -3469,10 +3753,8 @@ block_read(void **result,
break;
case SVN_FS_FS__ITEM_TYPE_CHANGES:
- SVN_ERR(block_read_changes((apr_array_header_t **)&item,
- fs, revision_file,
- entry, is_result,
- pool, iterpool));
+ SVN_ERR(block_read_changes(fs, revision_file,
+ entry, iterpool));
break;
default:
@@ -3485,7 +3767,7 @@ block_read(void **result,
/* if we crossed a block boundary, read the remainder of
* the last block as well */
offset = entry->offset + entry->size;
- if (offset > block_start + ffd->block_size)
+ if (offset - block_start > ffd->block_size)
++run_count;
}
}