diff options
Diffstat (limited to 'subversion/libsvn_fs_fs/stats.c')
-rw-r--r-- | subversion/libsvn_fs_fs/stats.c | 256 |
1 files changed, 200 insertions, 56 deletions
diff --git a/subversion/libsvn_fs_fs/stats.c b/subversion/libsvn_fs_fs/stats.c index 97a2ed7736f36..36992dbaeb66e 100644 --- a/subversion/libsvn_fs_fs/stats.c +++ b/subversion/libsvn_fs_fs/stats.c @@ -70,8 +70,9 @@ typedef enum rep_kind_t */ typedef struct rep_stats_t { - /* absolute offset in the file */ - apr_off_t offset; + /* offset in the revision file (phys. addressing) / + * item index within REVISION (log. addressing) */ + apr_uint64_t item_index; /* item length in bytes */ apr_uint64_t size; @@ -92,8 +93,36 @@ typedef struct rep_stats_t /* classification of the representation. values of rep_kind_t */ char kind; + /* length of the delta chain, including this representation, + * saturated to 255 - if need be */ + apr_byte_t chain_length; } rep_stats_t; +/* Represents a link in the rep delta chain. REVISION + ITEM_INDEX points + * to BASE_REVISION + BASE_ITEM_INDEX. We collect this info while scanning + * a f7 repo in a single pass and resolve it afterwards. */ +typedef struct rep_ref_t +{ + /* Revision that contains this representation. */ + svn_revnum_t revision; + + /* Item index of this rep within REVISION. */ + apr_uint64_t item_index; + + /* Revision of the representation we deltified against. + * -1 if this representation is either PLAIN or a self-delta. */ + svn_revnum_t base_revision; + + /* Item index of that rep within BASE_REVISION. */ + apr_uint64_t base_item_index; + + /* Length of the PLAIN / DELTA line in the source file in bytes. + * We use this to update the info in the rep stats after scanning the + * whole file. */ + apr_uint16_t header_size; + +} rep_ref_t; + /* Represents a single revision. * There will be only one instance per revision. */ typedef struct revision_info_t @@ -176,23 +205,6 @@ typedef struct query_t void *cancel_baton; } query_t; -/* Return the length of REV_FILE in *FILE_SIZE. - * Use SCRATCH_POOL for temporary allocations. - */ -static svn_error_t * -get_file_size(apr_off_t *file_size, - svn_fs_fs__revision_file_t *rev_file, - apr_pool_t *scratch_pool) -{ - apr_finfo_t finfo; - - SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, rev_file->file, - scratch_pool)); - - *file_size = finfo.size; - return SVN_NO_ERROR; -} - /* Initialize the LARGEST_CHANGES member in STATS with a capacity of COUNT * entries. Allocate the result in RESULT_POOL. */ @@ -345,13 +357,13 @@ add_change(svn_fs_fs__stats_t *stats, /* Comparator used for binary search comparing the absolute file offset * of a representation to some other offset. DATA is a *rep_stats_t, - * KEY is a pointer to an apr_off_t. + * KEY is a pointer to an apr_uint64_t. */ static int -compare_representation_offsets(const void *data, const void *key) +compare_representation_item_index(const void *data, const void *key) { - apr_off_t lhs = (*(const rep_stats_t *const *)data)->offset; - apr_off_t rhs = *(const apr_off_t *)key; + apr_uint64_t lhs = (*(const rep_stats_t *const *)data)->item_index; + apr_uint64_t rhs = *(const apr_uint64_t *)key; if (lhs < rhs) return -1; @@ -362,7 +374,7 @@ compare_representation_offsets(const void *data, const void *key) * return it in *REVISION_INFO. For performance reasons, we skip the * lookup if the info is already provided. * - * In that revision, look for the rep_stats_t object for offset OFFSET. + * In that revision, look for the rep_stats_t object for item ITEM_INDEX. * If it already exists, set *IDX to its index in *REVISION_INFO's * representations list and return the representation object. Otherwise, * set the index to where it must be inserted and return NULL. @@ -372,7 +384,7 @@ find_representation(int *idx, query_t *query, revision_info_t **revision_info, svn_revnum_t revision, - apr_off_t offset) + apr_uint64_t item_index) { revision_info_t *info; *idx = -1; @@ -392,14 +404,14 @@ find_representation(int *idx, /* look for the representation */ *idx = svn_sort__bsearch_lower_bound(info->representations, - &offset, - compare_representation_offsets); + &item_index, + compare_representation_item_index); if (*idx < info->representations->nelts) { /* return the representation, if this is the one we were looking for */ rep_stats_t *result = APR_ARRAY_IDX(info->representations, *idx, rep_stats_t *); - if (result->offset == offset) + if (result->item_index == item_index) return result; } @@ -428,7 +440,7 @@ parse_representation(rep_stats_t **representation, /* look it up */ result = find_representation(&idx, query, &revision_info, rep->revision, - (apr_off_t)rep->item_index); + rep->item_index); if (!result) { /* not parsed, yet (probably a rep in the same revision). @@ -436,9 +448,8 @@ parse_representation(rep_stats_t **representation, */ result = apr_pcalloc(result_pool, sizeof(*result)); result->revision = rep->revision; - result->expanded_size = (rep->expanded_size ? rep->expanded_size - : rep->size); - result->offset = (apr_off_t)rep->item_index; + result->expanded_size = rep->expanded_size; + result->item_index = rep->item_index; result->size = rep->size; /* In phys. addressing mode, follow link to the actual representation. @@ -447,7 +458,8 @@ parse_representation(rep_stats_t **representation, if (!svn_fs_fs__use_log_addressing(query->fs)) { svn_fs_fs__rep_header_t *header; - apr_off_t offset = revision_info->offset + result->offset; + apr_off_t offset = revision_info->offset + + (apr_off_t)rep->item_index; SVN_ERR_ASSERT(revision_info->rev_file); SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET, @@ -457,6 +469,23 @@ parse_representation(rep_stats_t **representation, scratch_pool, scratch_pool)); result->header_size = header->header_size; + + /* Determine length of the delta chain. */ + if (header->type == svn_fs_fs__rep_delta) + { + int base_idx; + rep_stats_t *base_rep + = find_representation(&base_idx, query, NULL, + header->base_revision, + header->base_item_index); + + result->chain_length = 1 + MIN(base_rep->chain_length, + (apr_byte_t)0xfe); + } + else + { + result->chain_length = 1; + } } svn_sort__array_insert(revision_info->representations, &result, idx); @@ -588,6 +617,10 @@ read_noderev(query_t *query, svn_stream_t *stream = svn_stream_from_stringbuf(noderev_str, scratch_pool); SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, scratch_pool, scratch_pool)); + SVN_ERR(svn_fs_fs__fixup_expanded_size(query->fs, noderev->data_rep, + scratch_pool)); + SVN_ERR(svn_fs_fs__fixup_expanded_size(query->fs, noderev->prop_rep, + scratch_pool)); if (noderev->data_rep) { @@ -652,19 +685,25 @@ get_phys_change_count(query_t *query, revision_info_t *revision_info, apr_pool_t *scratch_pool) { - /* We are going to use our own sub-pool here because the changes object - * may well be >100MB and SCRATCH_POOL may not get cleared until all other - * info has been read by read_phys_revision(). Therefore, tidy up early. - */ - apr_pool_t *subpool = svn_pool_create(scratch_pool); - apr_array_header_t *changes; + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + svn_fs_fs__changes_context_t *context; - SVN_ERR(svn_fs_fs__get_changes(&changes, query->fs, - revision_info->revision, subpool)); - revision_info->change_count = changes->nelts; + /* Fetch the first block of data. */ + SVN_ERR(svn_fs_fs__create_changes_context(&context, query->fs, + revision_info->revision, + scratch_pool)); - /* Release potentially tons of memory. */ - svn_pool_destroy(subpool); + revision_info->change_count = 0; + while (!context->eol) + { + apr_array_header_t *changes; + + svn_pool_clear(iterpool); + SVN_ERR(svn_fs_fs__get_changes(&changes, context, iterpool, iterpool)); + revision_info->change_count = changes->nelts; + } + + svn_pool_destroy(iterpool); return SVN_NO_ERROR; } @@ -729,12 +768,12 @@ read_phys_pack_file(query_t *query, { apr_pool_t *iterpool = svn_pool_create(scratch_pool); int i; - apr_off_t file_size = 0; + svn_filesize_t file_size = 0; svn_fs_fs__revision_file_t *rev_file; SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base, scratch_pool, scratch_pool)); - SVN_ERR(get_file_size(&file_size, rev_file, scratch_pool)); + SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); /* process each revision in the pack file */ for (i = 0; i < query->shard_size; ++i) @@ -798,7 +837,7 @@ read_phys_revision_file(query_t *query, apr_pool_t *scratch_pool) { revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info)); - apr_off_t file_size = 0; + svn_filesize_t file_size = 0; svn_fs_fs__revision_file_t *rev_file; /* cancellation support */ @@ -808,7 +847,7 @@ read_phys_revision_file(query_t *query, /* read the whole pack file into memory */ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision, scratch_pool, scratch_pool)); - SVN_ERR(get_file_size(&file_size, rev_file, scratch_pool)); + SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); /* create the revision info for the current rev */ info->representations = apr_array_make(result_pool, 4, sizeof(rep_stats_t*)); @@ -885,6 +924,70 @@ read_item(svn_stringbuf_t **contents, return SVN_NO_ERROR; } +/* Predicate comparing the two rep_ref_t** LHS and RHS by the respective + * representation's revision. + */ +static int +compare_representation_refs(const void *lhs, const void *rhs) +{ + svn_revnum_t lhs_rev = (*(const rep_ref_t *const *)lhs)->revision; + svn_revnum_t rhs_rev = (*(const rep_ref_t *const *)rhs)->revision; + + if (lhs_rev < rhs_rev) + return -1; + return (lhs_rev > rhs_rev ? 1 : 0); +} + +/* Given all the presentations found in a single rev / pack file as + * rep_ref_t * in REP_REFS, update the delta chain lengths in QUERY. + * REP_REFS and its contents can then be discarded. + */ +static svn_error_t * +resolve_representation_refs(query_t *query, + apr_array_header_t *rep_refs) +{ + int i; + + /* Because delta chains can only point to previous revs, after sorting + * REP_REFS, all base refs have already been updated. */ + svn_sort__array(rep_refs, compare_representation_refs); + + /* Build up the CHAIN_LENGTH values. */ + for (i = 0; i < rep_refs->nelts; ++i) + { + int idx; + rep_ref_t *ref = APR_ARRAY_IDX(rep_refs, i, rep_ref_t *); + rep_stats_t *rep = find_representation(&idx, query, NULL, + ref->revision, ref->item_index); + + /* No dangling pointers and all base reps have been processed. */ + SVN_ERR_ASSERT(rep); + SVN_ERR_ASSERT(!rep->chain_length); + + /* Set the HEADER_SIZE as we found it during the scan. */ + rep->header_size = ref->header_size; + + /* The delta chain got 1 element longer. */ + if (ref->base_revision == SVN_INVALID_REVNUM) + { + rep->chain_length = 1; + } + else + { + rep_stats_t *base; + + base = find_representation(&idx, query, NULL, ref->base_revision, + ref->base_item_index); + SVN_ERR_ASSERT(base); + SVN_ERR_ASSERT(base->chain_length); + + rep->chain_length = 1 + MIN(base->chain_length, (apr_byte_t)0xfe); + } + } + + return SVN_NO_ERROR; +} + /* Process the logically addressed revision contents of revisions BASE to * BASE + COUNT - 1 in QUERY. * @@ -905,6 +1008,12 @@ read_log_rev_or_packfile(query_t *query, int i; svn_fs_fs__revision_file_t *rev_file; + /* We collect the delta chain links as we scan the file. Afterwards, + * we determine the lengths of those delta chains and throw this + * temporary container away. */ + apr_array_header_t *rep_refs = apr_array_make(scratch_pool, 64, + sizeof(rep_ref_t *)); + /* we will process every revision in the rev / pack file */ for (i = 0; i < count; ++i) { @@ -947,6 +1056,8 @@ read_log_rev_or_packfile(query_t *query, /* process all entries (and later continue with the next block) */ for (i = 0; i < entries->nelts; ++i) { + svn_stringbuf_t *item; + revision_info_t *info; svn_fs_fs__p2l_entry_t *entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); @@ -959,32 +1070,64 @@ read_log_rev_or_packfile(query_t *query, continue; /* read and process interesting items */ + info = APR_ARRAY_IDX(query->revisions, entry->item.revision, + revision_info_t*); + if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV) { - svn_stringbuf_t *item; - revision_info_t *info = APR_ARRAY_IDX(query->revisions, - entry->item.revision, - revision_info_t*); SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool)); SVN_ERR(read_noderev(query, item, info, result_pool, iterpool)); } else if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES) { - svn_stringbuf_t *item; - revision_info_t *info = APR_ARRAY_IDX(query->revisions, - entry->item.revision, - revision_info_t*); SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool)); info->change_count = get_log_change_count(item->data + 0, item->len); info->changes_len += entry->size; } + else if ( (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP) + || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP) + || (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS) + || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)) + { + /* Collect the delta chain link. */ + svn_fs_fs__rep_header_t *header; + rep_ref_t *ref = apr_pcalloc(scratch_pool, sizeof(*ref)); + + SVN_ERR(svn_io_file_aligned_seek(rev_file->file, + rev_file->block_size, + NULL, entry->offset, + iterpool)); + SVN_ERR(svn_fs_fs__read_rep_header(&header, + rev_file->stream, + iterpool, iterpool)); + + ref->header_size = header->header_size; + ref->revision = entry->item.revision; + ref->item_index = entry->item.number; + + if (header->type == svn_fs_fs__rep_delta) + { + ref->base_item_index = header->base_item_index; + ref->base_revision = header->base_revision; + } + else + { + ref->base_item_index = SVN_FS_FS__ITEM_INDEX_UNUSED; + ref->base_revision = SVN_INVALID_REVNUM; + } + + APR_ARRAY_PUSH(rep_refs, rep_ref_t *) = ref; + } /* advance offset */ offset += entry->size; } } + /* Resolve the delta chain links. */ + SVN_ERR(resolve_representation_refs(query, rep_refs)); + /* clean up and close file handles */ svn_pool_destroy(iterpool); @@ -1111,6 +1254,7 @@ add_rep_stats(svn_fs_fs__representation_stats_t *stats, stats->references += rep->ref_count; stats->expanded_size += rep->ref_count * rep->expanded_size; + stats->chain_len += rep->chain_length; } /* Aggregate the info the in revision_info_t * array REVISIONS into the |