summaryrefslogtreecommitdiff
path: root/subversion/libsvn_fs_fs/transaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_fs_fs/transaction.c')
-rw-r--r--subversion/libsvn_fs_fs/transaction.c251
1 files changed, 169 insertions, 82 deletions
diff --git a/subversion/libsvn_fs_fs/transaction.c b/subversion/libsvn_fs_fs/transaction.c
index fabfe3e3e03b4..bc93a5c27b824 100644
--- a/subversion/libsvn_fs_fs/transaction.c
+++ b/subversion/libsvn_fs_fs/transaction.c
@@ -2128,6 +2128,11 @@ rep_write_get_baton(struct rep_write_baton **wb_p,
there may be new duplicate representations within the same uncommitted
revision, those can be passed in REPS_HASH (maps a sha1 digest onto
representation_t*), otherwise pass in NULL for REPS_HASH.
+
+ The content of both representations will be compared, taking REP's content
+ from FILE at OFFSET. Only if they actually match, will *OLD_REP not be
+ NULL.
+
Use RESULT_POOL for *OLD_REP allocations and SCRATCH_POOL for temporaries.
The lifetime of *OLD_REP is limited by both, RESULT_POOL and REP lifetime.
*/
@@ -2135,6 +2140,8 @@ static svn_error_t *
get_shared_rep(representation_t **old_rep,
svn_fs_t *fs,
representation_t *rep,
+ apr_file_t *file,
+ apr_off_t offset,
apr_hash_t *reps_hash,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
@@ -2142,6 +2149,10 @@ get_shared_rep(representation_t **old_rep,
svn_error_t *err;
fs_fs_data_t *ffd = fs->fsap_data;
+ svn_checksum_t checksum;
+ checksum.digest = rep->sha1_digest;
+ checksum.kind = svn_checksum_sha1;
+
/* Return NULL, if rep sharing has been disabled. */
*old_rep = NULL;
if (!ffd->rep_sharing_allowed)
@@ -2158,9 +2169,6 @@ get_shared_rep(representation_t **old_rep,
/* If we haven't found anything yet, try harder and consult our DB. */
if (*old_rep == NULL)
{
- svn_checksum_t checksum;
- checksum.digest = rep->sha1_digest;
- checksum.kind = svn_checksum_sha1;
err = svn_fs_fs__get_rep_reference(old_rep, fs, &checksum, result_pool);
/* ### Other error codes that we shouldn't mask out? */
if (err == SVN_NO_ERROR)
@@ -2235,6 +2243,72 @@ get_shared_rep(representation_t **old_rep,
(*old_rep)->uniquifier = rep->uniquifier;
}
+ /* If we (very likely) found a matching representation, compare the actual
+ * contents such that we can be sure that no rep-cache.db corruption or
+ * hash collision produced a false positive. */
+ if (*old_rep)
+ {
+ apr_off_t old_position;
+ svn_stream_t *contents;
+ svn_stream_t *old_contents;
+ svn_boolean_t same;
+
+ /* The existing representation may itself be part of the current
+ * transaction. In that case, it may be in different stages of
+ * the commit finalization process.
+ *
+ * OLD_REP_NORM is the same as that OLD_REP but it is assigned
+ * explicitly to REP's transaction if OLD_REP does not point
+ * to an already committed revision. This then prevents the
+ * revision lookup and the txn data will be accessed.
+ */
+ representation_t old_rep_norm = **old_rep;
+ if ( !SVN_IS_VALID_REVNUM(old_rep_norm.revision)
+ || old_rep_norm.revision > ffd->youngest_rev_cache)
+ old_rep_norm.txn_id = rep->txn_id;
+
+ /* Make sure we can later restore FILE's current position. */
+ SVN_ERR(svn_fs_fs__get_file_offset(&old_position, file, scratch_pool));
+
+ /* Compare the two representations.
+ * Note that the stream comparison might also produce MD5 checksum
+ * errors or other failures in case of SHA1 collisions. */
+ SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
+ offset, scratch_pool));
+ SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
+ FALSE, scratch_pool));
+ err = svn_stream_contents_same2(&same, contents, old_contents,
+ scratch_pool);
+
+ /* A mismatch should be extremely rare.
+ * If it does happen, reject the commit. */
+ if (!same || err)
+ {
+ /* SHA1 collision or worse. */
+ svn_stringbuf_t *old_rep_str
+ = svn_fs_fs__unparse_representation(*old_rep,
+ ffd->format, FALSE,
+ scratch_pool,
+ scratch_pool);
+ svn_stringbuf_t *rep_str
+ = svn_fs_fs__unparse_representation(rep,
+ ffd->format, FALSE,
+ scratch_pool,
+ scratch_pool);
+ const char *checksum__str
+ = svn_checksum_to_cstring_display(&checksum, scratch_pool);
+
+ return svn_error_createf(SVN_ERR_FS_GENERAL,
+ err, "SHA1 of reps '%s' and '%s' "
+ "matches (%s) but contents differ",
+ old_rep_str->data, rep_str->data,
+ checksum__str);
+ }
+
+ /* Restore FILE's read / write position. */
+ SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));
+ }
+
return SVN_NO_ERROR;
}
@@ -2293,8 +2367,8 @@ rep_write_contents_close(void *baton)
/* Check and see if we already have a representation somewhere that's
identical to the one we just wrote out. */
- SVN_ERR(get_shared_rep(&old_rep, b->fs, rep, NULL, b->result_pool,
- b->scratch_pool));
+ SVN_ERR(get_shared_rep(&old_rep, b->fs, rep, b->file, b->rep_offset, NULL,
+ b->result_pool, b->scratch_pool));
if (old_rep)
{
@@ -2519,11 +2593,16 @@ write_directory_to_stream(svn_stream_t *stream,
/* Write out the COLLECTION as a text representation to file FILE using
WRITER. In the process, record position, the total size of the dump and
MD5 as well as SHA1 in REP. Add the representation of type ITEM_TYPE to
- the indexes if necessary. If rep sharing has been enabled and REPS_HASH
- is not NULL, it will be used in addition to the on-disk cache to find
- earlier reps with the same content. When such existing reps can be
- found, we will truncate the one just written from the file and return
- the existing rep. Perform temporary allocations in SCRATCH_POOL. */
+ the indexes if necessary.
+
+ If ALLOW_REP_SHARING is FALSE, rep-sharing will not be used, regardless
+ of any other option and rep-sharing settings. If rep sharing has been
+ enabled and REPS_HASH is not NULL, it will be used in addition to the
+ on-disk cache to find earlier reps with the same content. If such
+ existing reps can be found, we will truncate the one just written from
+ the file and return the existing rep.
+
+ Perform temporary allocations in SCRATCH_POOL. */
static svn_error_t *
write_container_rep(representation_t *rep,
apr_file_t *file,
@@ -2531,14 +2610,15 @@ write_container_rep(representation_t *rep,
collection_writer_t writer,
svn_fs_t *fs,
apr_hash_t *reps_hash,
+ svn_boolean_t allow_rep_sharing,
apr_uint32_t item_type,
apr_pool_t *scratch_pool)
{
svn_stream_t *stream;
struct write_container_baton *whb;
svn_checksum_ctx_t *fnv1a_checksum_ctx;
- representation_t *old_rep;
apr_off_t offset = 0;
+ svn_fs_fs__p2l_entry_t entry;
SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, scratch_pool));
@@ -2562,45 +2642,46 @@ write_container_rep(representation_t *rep,
/* Store the results. */
SVN_ERR(digests_final(rep, whb->md5_ctx, whb->sha1_ctx, scratch_pool));
+ /* Update size info. */
+ rep->expanded_size = whb->size;
+ rep->size = whb->size;
+
/* Check and see if we already have a representation somewhere that's
identical to the one we just wrote out. */
- SVN_ERR(get_shared_rep(&old_rep, fs, rep, reps_hash, scratch_pool,
- scratch_pool));
-
- if (old_rep)
+ if (allow_rep_sharing)
{
- /* We need to erase from the protorev the data we just wrote. */
- SVN_ERR(svn_io_file_trunc(file, offset, scratch_pool));
+ representation_t *old_rep;
+ SVN_ERR(get_shared_rep(&old_rep, fs, rep, file, offset, reps_hash,
+ scratch_pool, scratch_pool));
- /* Use the old rep for this content. */
- memcpy(rep, old_rep, sizeof (*rep));
- }
- else
- {
- svn_fs_fs__p2l_entry_t entry;
+ if (old_rep)
+ {
+ /* We need to erase from the protorev the data we just wrote. */
+ SVN_ERR(svn_io_file_trunc(file, offset, scratch_pool));
- /* Write out our cosmetic end marker. */
- SVN_ERR(svn_stream_puts(whb->stream, "ENDREP\n"));
+ /* Use the old rep for this content. */
+ memcpy(rep, old_rep, sizeof (*rep));
+ return SVN_NO_ERROR;
+ }
+ }
- SVN_ERR(allocate_item_index(&rep->item_index, fs, &rep->txn_id,
- offset, scratch_pool));
+ /* Write out our cosmetic end marker. */
+ SVN_ERR(svn_stream_puts(whb->stream, "ENDREP\n"));
- entry.offset = offset;
- SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, scratch_pool));
- entry.size = offset - entry.offset;
- entry.type = item_type;
- entry.item.revision = SVN_INVALID_REVNUM;
- entry.item.number = rep->item_index;
- SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum,
- fnv1a_checksum_ctx,
- scratch_pool));
+ SVN_ERR(allocate_item_index(&rep->item_index, fs, &rep->txn_id,
+ offset, scratch_pool));
- SVN_ERR(store_p2l_index_entry(fs, &rep->txn_id, &entry, scratch_pool));
+ entry.offset = offset;
+ SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, scratch_pool));
+ entry.size = offset - entry.offset;
+ entry.type = item_type;
+ entry.item.revision = SVN_INVALID_REVNUM;
+ entry.item.number = rep->item_index;
+ SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum,
+ fnv1a_checksum_ctx,
+ scratch_pool));
- /* update the representation */
- rep->size = whb->size;
- rep->expanded_size = whb->size;
- }
+ SVN_ERR(store_p2l_index_entry(fs, &rep->txn_id, &entry, scratch_pool));
return SVN_NO_ERROR;
}
@@ -2608,11 +2689,14 @@ write_container_rep(representation_t *rep,
/* Write out the COLLECTION pertaining to the NODEREV in FS as a deltified
text representation to file FILE using WRITER. In the process, record the
total size and the md5 digest in REP and add the representation of type
- ITEM_TYPE to the indexes if necessary. If rep sharing has been enabled and
- REPS_HASH is not NULL, it will be used in addition to the on-disk cache to
- find earlier reps with the same content. When such existing reps can be
- found, we will truncate the one just written from the file and return the
- existing rep.
+ ITEM_TYPE to the indexes if necessary.
+
+ If ALLOW_REP_SHARING is FALSE, rep-sharing will not be used, regardless
+ of any other option and rep-sharing settings. If rep sharing has been
+ enabled and REPS_HASH is not NULL, it will be used in addition to the
+ on-disk cache to find earlier reps with the same content. If such
+ existing reps can be found, we will truncate the one just written from
+ the file and return the existing rep.
If ITEM_TYPE is IS_PROPS equals SVN_FS_FS__ITEM_TYPE_*_PROPS, assume
that we want to a props representation as the base for our delta.
@@ -2626,6 +2710,7 @@ write_container_delta_rep(representation_t *rep,
svn_fs_t *fs,
node_revision_t *noderev,
apr_hash_t *reps_hash,
+ svn_boolean_t allow_rep_sharing,
apr_uint32_t item_type,
apr_pool_t *scratch_pool)
{
@@ -2635,10 +2720,10 @@ write_container_delta_rep(representation_t *rep,
svn_stream_t *file_stream;
svn_stream_t *stream;
representation_t *base_rep;
- representation_t *old_rep;
svn_checksum_ctx_t *fnv1a_checksum_ctx;
svn_stream_t *source;
svn_fs_fs__rep_header_t header = { 0 };
+ svn_fs_fs__p2l_entry_t entry;
apr_off_t rep_end = 0;
apr_off_t delta_start = 0;
@@ -2701,46 +2786,47 @@ write_container_delta_rep(representation_t *rep,
/* Store the results. */
SVN_ERR(digests_final(rep, whb->md5_ctx, whb->sha1_ctx, scratch_pool));
+ /* Update size info. */
+ SVN_ERR(svn_fs_fs__get_file_offset(&rep_end, file, scratch_pool));
+ rep->size = rep_end - delta_start;
+ rep->expanded_size = whb->size;
+
/* Check and see if we already have a representation somewhere that's
identical to the one we just wrote out. */
- SVN_ERR(get_shared_rep(&old_rep, fs, rep, reps_hash, scratch_pool,
- scratch_pool));
-
- if (old_rep)
+ if (allow_rep_sharing)
{
- /* We need to erase from the protorev the data we just wrote. */
- SVN_ERR(svn_io_file_trunc(file, offset, scratch_pool));
+ representation_t *old_rep;
+ SVN_ERR(get_shared_rep(&old_rep, fs, rep, file, offset, reps_hash,
+ scratch_pool, scratch_pool));
- /* Use the old rep for this content. */
- memcpy(rep, old_rep, sizeof (*rep));
- }
- else
- {
- svn_fs_fs__p2l_entry_t entry;
+ if (old_rep)
+ {
+ /* We need to erase from the protorev the data we just wrote. */
+ SVN_ERR(svn_io_file_trunc(file, offset, scratch_pool));
- /* Write out our cosmetic end marker. */
- SVN_ERR(svn_fs_fs__get_file_offset(&rep_end, file, scratch_pool));
- SVN_ERR(svn_stream_puts(file_stream, "ENDREP\n"));
+ /* Use the old rep for this content. */
+ memcpy(rep, old_rep, sizeof (*rep));
+ return SVN_NO_ERROR;
+ }
+ }
- SVN_ERR(allocate_item_index(&rep->item_index, fs, &rep->txn_id,
- offset, scratch_pool));
+ /* Write out our cosmetic end marker. */
+ SVN_ERR(svn_stream_puts(file_stream, "ENDREP\n"));
- entry.offset = offset;
- SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, scratch_pool));
- entry.size = offset - entry.offset;
- entry.type = item_type;
- entry.item.revision = SVN_INVALID_REVNUM;
- entry.item.number = rep->item_index;
- SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum,
- fnv1a_checksum_ctx,
- scratch_pool));
+ SVN_ERR(allocate_item_index(&rep->item_index, fs, &rep->txn_id,
+ offset, scratch_pool));
- SVN_ERR(store_p2l_index_entry(fs, &rep->txn_id, &entry, scratch_pool));
+ entry.offset = offset;
+ SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, scratch_pool));
+ entry.size = offset - entry.offset;
+ entry.type = item_type;
+ entry.item.revision = SVN_INVALID_REVNUM;
+ entry.item.number = rep->item_index;
+ SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum,
+ fnv1a_checksum_ctx,
+ scratch_pool));
- /* update the representation */
- rep->expanded_size = whb->size;
- rep->size = rep_end - delta_start;
- }
+ SVN_ERR(store_p2l_index_entry(fs, &rep->txn_id, &entry, scratch_pool));
return SVN_NO_ERROR;
}
@@ -2920,13 +3006,14 @@ write_final_rev(const svn_fs_id_t **new_id_p,
SVN_ERR(write_container_delta_rep(noderev->data_rep, file,
entries,
write_directory_to_stream,
- fs, noderev, NULL,
+ fs, noderev, NULL, FALSE,
SVN_FS_FS__ITEM_TYPE_DIR_REP,
pool));
else
SVN_ERR(write_container_rep(noderev->data_rep, file, entries,
write_directory_to_stream, fs, NULL,
- SVN_FS_FS__ITEM_TYPE_DIR_REP, pool));
+ FALSE, SVN_FS_FS__ITEM_TYPE_DIR_REP,
+ pool));
reset_txn_in_rep(noderev->data_rep);
}
@@ -2971,11 +3058,11 @@ write_final_rev(const svn_fs_id_t **new_id_p,
if (ffd->deltify_properties)
SVN_ERR(write_container_delta_rep(noderev->prop_rep, file, proplist,
write_hash_to_stream, fs, noderev,
- reps_hash, item_type, pool));
+ reps_hash, TRUE, item_type, pool));
else
SVN_ERR(write_container_rep(noderev->prop_rep, file, proplist,
write_hash_to_stream, fs, reps_hash,
- item_type, pool));
+ TRUE, item_type, pool));
reset_txn_in_rep(noderev->prop_rep);
}