diff options
author | Peter Wemm <peter@FreeBSD.org> | 2018-05-08 03:44:38 +0000 |
---|---|---|
committer | Peter Wemm <peter@FreeBSD.org> | 2018-05-08 03:44:38 +0000 |
commit | 3faf8d6bffc5d0fb2525ba37bb504c53366caf9d (patch) | |
tree | 7e47911263e75034b767fe34b2f8d3d17e91f66d /subversion/libsvn_diff/parse-diff.c | |
parent | a55fb3c0d5eca7d887798125d5b95942b1f01d4b (diff) |
Notes
Diffstat (limited to 'subversion/libsvn_diff/parse-diff.c')
-rw-r--r-- | subversion/libsvn_diff/parse-diff.c | 996 |
1 files changed, 911 insertions, 85 deletions
diff --git a/subversion/libsvn_diff/parse-diff.c b/subversion/libsvn_diff/parse-diff.c index 3f794b8b1fa7..f2159694c4f6 100644 --- a/subversion/libsvn_diff/parse-diff.c +++ b/subversion/libsvn_diff/parse-diff.c @@ -40,8 +40,13 @@ #include "private/svn_eol_private.h" #include "private/svn_dep_compat.h" +#include "private/svn_diff_private.h" #include "private/svn_sorts_private.h" +#include "diff.h" + +#include "svn_private_config.h" + /* Helper macro for readability */ #define starts_with(str, start) \ (strncmp((str), (start), strlen(start)) == 0) @@ -59,7 +64,7 @@ struct svn_diff__hunk_range { struct svn_diff_hunk_t { /* The patch this hunk belongs to. */ - svn_patch_t *patch; + const svn_patch_t *patch; /* APR file handle to the patch file this hunk came from. */ apr_file_t *apr_file; @@ -80,8 +85,150 @@ struct svn_diff_hunk_t { /* Number of lines of leading and trailing hunk context. */ svn_linenum_t leading_context; svn_linenum_t trailing_context; + + /* Did we see a 'file does not end with eol' marker in this hunk? */ + svn_boolean_t original_no_final_eol; + svn_boolean_t modified_no_final_eol; + + /* Fuzz penalty, triggered by bad patch targets */ + svn_linenum_t original_fuzz; + svn_linenum_t modified_fuzz; }; +struct svn_diff_binary_patch_t { + /* The patch this hunk belongs to. */ + const svn_patch_t *patch; + + /* APR file handle to the patch file this hunk came from. */ + apr_file_t *apr_file; + + /* Offsets inside APR_FILE representing the location of the patch */ + apr_off_t src_start; + apr_off_t src_end; + svn_filesize_t src_filesize; /* Expanded/final size */ + + /* Offsets inside APR_FILE representing the location of the patch */ + apr_off_t dst_start; + apr_off_t dst_end; + svn_filesize_t dst_filesize; /* Expanded/final size */ +}; + +/* Common guts of svn_diff_hunk__create_adds_single_line() and + * svn_diff_hunk__create_deletes_single_line(). + * + * ADD is TRUE if adding and FALSE if deleting. + */ +static svn_error_t * +add_or_delete_single_line(svn_diff_hunk_t **hunk_out, + const char *line, + const svn_patch_t *patch, + svn_boolean_t add, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + svn_diff_hunk_t *hunk = apr_pcalloc(result_pool, sizeof(*hunk)); + static const char *hunk_header[] = { "@@ -1 +0,0 @@\n", "@@ -0,0 +1 @@\n" }; + const apr_size_t header_len = strlen(hunk_header[add]); + const apr_size_t len = strlen(line); + const apr_size_t end = header_len + (1 + len); /* The +1 is for the \n. */ + svn_stringbuf_t *buf = svn_stringbuf_create_ensure(end + 1, scratch_pool); + + hunk->patch = patch; + + /* hunk->apr_file is created below. */ + + hunk->diff_text_range.start = header_len; + hunk->diff_text_range.current = header_len; + + if (add) + { + hunk->original_text_range.start = 0; /* There's no "original" text. */ + hunk->original_text_range.current = 0; + hunk->original_text_range.end = 0; + hunk->original_no_final_eol = FALSE; + + hunk->modified_text_range.start = header_len; + hunk->modified_text_range.current = header_len; + hunk->modified_text_range.end = end; + hunk->modified_no_final_eol = TRUE; + + hunk->original_start = 0; + hunk->original_length = 0; + + hunk->modified_start = 1; + hunk->modified_length = 1; + } + else /* delete */ + { + hunk->original_text_range.start = header_len; + hunk->original_text_range.current = header_len; + hunk->original_text_range.end = end; + hunk->original_no_final_eol = TRUE; + + hunk->modified_text_range.start = 0; /* There's no "original" text. */ + hunk->modified_text_range.current = 0; + hunk->modified_text_range.end = 0; + hunk->modified_no_final_eol = FALSE; + + hunk->original_start = 1; + hunk->original_length = 1; + + hunk->modified_start = 0; + hunk->modified_length = 0; /* setting to '1' works too */ + } + + hunk->leading_context = 0; + hunk->trailing_context = 0; + + /* Create APR_FILE and put just a hunk in it (without a diff header). + * Save the offset of the last byte of the diff line. */ + svn_stringbuf_appendbytes(buf, hunk_header[add], header_len); + svn_stringbuf_appendbyte(buf, add ? '+' : '-'); + svn_stringbuf_appendbytes(buf, line, len); + svn_stringbuf_appendbyte(buf, '\n'); + svn_stringbuf_appendcstr(buf, "\\ No newline at end of hunk\n"); + + hunk->diff_text_range.end = buf->len; + + SVN_ERR(svn_io_open_unique_file3(&hunk->apr_file, NULL /* filename */, + NULL /* system tempdir */, + svn_io_file_del_on_pool_cleanup, + result_pool, scratch_pool)); + SVN_ERR(svn_io_file_write_full(hunk->apr_file, + buf->data, buf->len, + NULL, scratch_pool)); + /* No need to seek. */ + + *hunk_out = hunk; + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t **hunk_out, + const char *line, + const svn_patch_t *patch, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + SVN_ERR(add_or_delete_single_line(hunk_out, line, patch, + (!patch->reverse), + result_pool, scratch_pool)); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t **hunk_out, + const char *line, + const svn_patch_t *patch, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + SVN_ERR(add_or_delete_single_line(hunk_out, line, patch, + patch->reverse, + result_pool, scratch_pool)); + return SVN_NO_ERROR; +} + void svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk) { @@ -142,6 +289,222 @@ svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk) return hunk->trailing_context; } +svn_linenum_t +svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t *hunk) +{ + return hunk->patch->reverse ? hunk->original_fuzz : hunk->modified_fuzz; +} + +/* Baton for the base85 stream implementation */ +struct base85_baton_t +{ + apr_file_t *file; + apr_pool_t *iterpool; + char buffer[52]; /* Bytes on current line */ + apr_off_t next_pos; /* Start position of next line */ + apr_off_t end_pos; /* Position after last line */ + apr_size_t buf_size; /* Bytes available (52 unless at eof) */ + apr_size_t buf_pos; /* Bytes in linebuffer */ + svn_boolean_t done; /* At eof? */ +}; + +/* Implements svn_read_fn_t for the base85 read stream */ +static svn_error_t * +read_handler_base85(void *baton, char *buffer, apr_size_t *len) +{ + struct base85_baton_t *b85b = baton; + apr_pool_t *iterpool = b85b->iterpool; + apr_size_t remaining = *len; + char *dest = buffer; + + svn_pool_clear(iterpool); + + if (b85b->done) + { + *len = 0; + return SVN_NO_ERROR; + } + + while (remaining && (b85b->buf_size > b85b->buf_pos + || b85b->next_pos < b85b->end_pos)) + { + svn_stringbuf_t *line; + svn_boolean_t at_eof; + + apr_size_t available = b85b->buf_size - b85b->buf_pos; + if (available) + { + apr_size_t n = (remaining < available) ? remaining : available; + + memcpy(dest, b85b->buffer + b85b->buf_pos, n); + dest += n; + remaining -= n; + b85b->buf_pos += n; + + if (!remaining) + return SVN_NO_ERROR; /* *len = OK */ + } + + if (b85b->next_pos >= b85b->end_pos) + break; /* At EOF */ + SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos, + iterpool)); + SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof, + APR_SIZE_MAX, iterpool, iterpool)); + if (at_eof) + b85b->next_pos = b85b->end_pos; + else + { + SVN_ERR(svn_io_file_get_offset(&b85b->next_pos, b85b->file, + iterpool)); + } + + if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z') + b85b->buf_size = line->data[0] - 'A' + 1; + else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z') + b85b->buf_size = line->data[0] - 'a' + 26 + 1; + else + return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL, + _("Unexpected data in base85 section")); + + if (b85b->buf_size < 52) + b85b->next_pos = b85b->end_pos; /* Handle as EOF */ + + SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size, + line->data + 1, line->len - 1, + iterpool)); + b85b->buf_pos = 0; + } + + *len -= remaining; + b85b->done = TRUE; + + return SVN_NO_ERROR; +} + +/* Implements svn_close_fn_t for the base85 read stream */ +static svn_error_t * +close_handler_base85(void *baton) +{ + struct base85_baton_t *b85b = baton; + + svn_pool_destroy(b85b->iterpool); + + return SVN_NO_ERROR; +} + +/* Gets a stream that reads decoded base85 data from a segment of a file. + The current implementation might assume that both start_pos and end_pos + are located at line boundaries. */ +static svn_stream_t * +get_base85_data_stream(apr_file_t *file, + apr_off_t start_pos, + apr_off_t end_pos, + apr_pool_t *result_pool) +{ + struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b)); + svn_stream_t *base85s = svn_stream_create(b85b, result_pool); + + b85b->file = file; + b85b->iterpool = svn_pool_create(result_pool); + b85b->next_pos = start_pos; + b85b->end_pos = end_pos; + + svn_stream_set_read2(base85s, NULL /* only full read support */, + read_handler_base85); + svn_stream_set_close(base85s, close_handler_base85); + return base85s; +} + +/* Baton for the length verification stream functions */ +struct length_verify_baton_t +{ + svn_stream_t *inner; + svn_filesize_t remaining; +}; + +/* Implements svn_read_fn_t for the length verification stream */ +static svn_error_t * +read_handler_length_verify(void *baton, char *buffer, apr_size_t *len) +{ + struct length_verify_baton_t *lvb = baton; + apr_size_t requested_len = *len; + + SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len)); + + if (*len > lvb->remaining) + return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL, + _("Base85 data expands to longer than declared " + "filesize")); + else if (requested_len > *len && *len != lvb->remaining) + return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL, + _("Base85 data expands to smaller than declared " + "filesize")); + + lvb->remaining -= *len; + + return SVN_NO_ERROR; +} + +/* Implements svn_close_fn_t for the length verification stream */ +static svn_error_t * +close_handler_length_verify(void *baton) +{ + struct length_verify_baton_t *lvb = baton; + + return svn_error_trace(svn_stream_close(lvb->inner)); +} + +/* Gets a stream that verifies on reads that the inner stream is exactly + of the specified length */ +static svn_stream_t * +get_verify_length_stream(svn_stream_t *inner, + svn_filesize_t expected_size, + apr_pool_t *result_pool) +{ + struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb)); + svn_stream_t *len_stream = svn_stream_create(lvb, result_pool); + + lvb->inner = inner; + lvb->remaining = expected_size; + + svn_stream_set_read2(len_stream, NULL /* only full read support */, + read_handler_length_verify); + svn_stream_set_close(len_stream, close_handler_length_verify); + + return len_stream; +} + +svn_stream_t * +svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch, + apr_pool_t *result_pool) +{ + svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start, + bpatch->src_end, result_pool); + + s = svn_stream_compressed(s, result_pool); + + /* ### If we (ever) want to support the DELTA format, then we should hook the + undelta handling here */ + + return get_verify_length_stream(s, bpatch->src_filesize, result_pool); +} + +svn_stream_t * +svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch, + apr_pool_t *result_pool) +{ + svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start, + bpatch->dst_end, result_pool); + + s = svn_stream_compressed(s, result_pool); + + /* ### If we (ever) want to support the DELTA format, then we should hook the + undelta handling here */ + + return get_verify_length_stream(s, bpatch->dst_filesize, result_pool); +} + /* Try to parse a positive number from a decimal number encoded * in the string NUMBER. Return parsed number in OFFSET, and return * TRUE if parsing was successful. */ @@ -279,7 +642,8 @@ parse_hunk_header(const char *header, svn_diff_hunk_t *hunk, * Leading unidiff symbols ('+', '-', and ' ') are removed from the line, * Any lines commencing with the VERBOTEN character are discarded. * VERBOTEN should be '+' or '-', depending on which form of hunk text - * is being read. + * is being read. NO_FINAL_EOL declares if the hunk contains a no final + * EOL marker. * * All other parameters are as in svn_diff_hunk_readline_original_text() * and svn_diff_hunk_readline_modified_text(). @@ -291,6 +655,7 @@ hunk_readline_original_or_modified(apr_file_t *file, const char **eol, svn_boolean_t *eof, char verboten, + svn_boolean_t no_final_eol, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { @@ -298,27 +663,35 @@ hunk_readline_original_or_modified(apr_file_t *file, svn_boolean_t filtered; apr_off_t pos; svn_stringbuf_t *str; + const char *eol_p; + apr_pool_t *last_pool; + + if (!eol) + eol = &eol_p; if (range->current >= range->end) { /* We're past the range. Indicate that no bytes can be read. */ *eof = TRUE; - if (eol) - *eol = NULL; + *eol = NULL; *stringbuf = svn_stringbuf_create_empty(result_pool); return SVN_NO_ERROR; } - pos = 0; - SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos, scratch_pool)); + SVN_ERR(svn_io_file_get_offset(&pos, file, scratch_pool)); SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool)); + + /* It's not ITERPOOL because we use data allocated in LAST_POOL out + of the loop. */ + last_pool = svn_pool_create(scratch_pool); do { + svn_pool_clear(last_pool); + max_len = range->end - range->current; SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len, - result_pool, scratch_pool)); - range->current = 0; - SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool)); + last_pool, last_pool)); + SVN_ERR(svn_io_file_get_offset(&range->current, file, last_pool)); filtered = (str->data[0] == verboten || str->data[0] == '\\'); } while (filtered && ! *eof); @@ -327,6 +700,7 @@ hunk_readline_original_or_modified(apr_file_t *file, { /* EOF, return an empty string. */ *stringbuf = svn_stringbuf_create_ensure(0, result_pool); + *eol = NULL; } else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ') { @@ -335,12 +709,37 @@ hunk_readline_original_or_modified(apr_file_t *file, } else { - /* Return the line as-is. */ + /* Return the line as-is. Handle as a chopped leading spaces */ *stringbuf = svn_stringbuf_dup(str, result_pool); } + if (!filtered && *eof && !*eol && *str->data) + { + /* Ok, we miss a final EOL in the patch file, but didn't see a + no eol marker line. + + We should report that we had an EOL or the patch code will + misbehave (and it knows nothing about no eol markers) */ + + if (!no_final_eol && eol != &eol_p) + { + apr_off_t start = 0; + + SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool)); + + SVN_ERR(svn_io_file_readline(file, &str, eol, NULL, APR_SIZE_MAX, + scratch_pool, scratch_pool)); + + /* Every patch file that has hunks has at least one EOL*/ + SVN_ERR_ASSERT(*eol != NULL); + } + + *eof = FALSE; + /* Fall through to seek back to the right location */ + } SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool)); + svn_pool_destroy(last_pool); return SVN_NO_ERROR; } @@ -359,6 +758,9 @@ svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk, &hunk->original_text_range, stringbuf, eol, eof, hunk->patch->reverse ? '-' : '+', + hunk->patch->reverse + ? hunk->modified_no_final_eol + : hunk->original_no_final_eol, result_pool, scratch_pool)); } @@ -377,6 +779,9 @@ svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk, &hunk->modified_text_range, stringbuf, eol, eof, hunk->patch->reverse ? '+' : '-', + hunk->patch->reverse + ? hunk->original_no_final_eol + : hunk->modified_no_final_eol, result_pool, scratch_pool)); } @@ -391,28 +796,60 @@ svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk, svn_stringbuf_t *line; apr_size_t max_len; apr_off_t pos; + const char *eol_p; + + if (!eol) + eol = &eol_p; if (hunk->diff_text_range.current >= hunk->diff_text_range.end) { /* We're past the range. Indicate that no bytes can be read. */ *eof = TRUE; - if (eol) - *eol = NULL; + *eol = NULL; *stringbuf = svn_stringbuf_create_empty(result_pool); return SVN_NO_ERROR; } - pos = 0; - SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool)); + SVN_ERR(svn_io_file_get_offset(&pos, hunk->apr_file, scratch_pool)); SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &hunk->diff_text_range.current, scratch_pool)); max_len = hunk->diff_text_range.end - hunk->diff_text_range.current; SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len, result_pool, scratch_pool)); - hunk->diff_text_range.current = 0; - SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, - &hunk->diff_text_range.current, scratch_pool)); + SVN_ERR(svn_io_file_get_offset(&hunk->diff_text_range.current, + hunk->apr_file, scratch_pool)); + + if (*eof && !*eol && *line->data) + { + /* Ok, we miss a final EOL in the patch file, but didn't see a + no eol marker line. + + We should report that we had an EOL or the patch code will + misbehave (and it knows nothing about no eol markers) */ + + if (eol != &eol_p) + { + /* Lets pick the first eol we find in our patch file */ + apr_off_t start = 0; + svn_stringbuf_t *str; + + SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &start, + scratch_pool)); + + SVN_ERR(svn_io_file_readline(hunk->apr_file, &str, eol, NULL, + APR_SIZE_MAX, + scratch_pool, scratch_pool)); + + /* Every patch file that has hunks has at least one EOL*/ + SVN_ERR_ASSERT(*eol != NULL); + } + + *eof = FALSE; + + /* Fall through to seek back to the right location */ + } + SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool)); if (hunk->patch->reverse) @@ -619,6 +1056,8 @@ parse_next_hunk(svn_diff_hunk_t **hunk, apr_off_t start, end; apr_off_t original_end; apr_off_t modified_end; + svn_boolean_t original_no_final_eol = FALSE; + svn_boolean_t modified_no_final_eol = FALSE; svn_linenum_t original_lines; svn_linenum_t modified_lines; svn_linenum_t leading_context; @@ -654,9 +1093,8 @@ parse_next_hunk(svn_diff_hunk_t **hunk, modified_end = 0; *hunk = apr_pcalloc(result_pool, sizeof(**hunk)); - /* Get current seek position -- APR has no ftell() :( */ - pos = 0; - SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool)); + /* Get current seek position. */ + SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool)); /* Start out assuming noise. */ last_line_type = noise_line; @@ -673,8 +1111,7 @@ parse_next_hunk(svn_diff_hunk_t **hunk, iterpool, iterpool)); /* Update line offset for next iteration. */ - pos = 0; - SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool)); + SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool)); /* Lines starting with a backslash indicate a missing EOL: * "\ No newline at end of file" or "end of property". */ @@ -715,6 +1152,11 @@ parse_next_hunk(svn_diff_hunk_t **hunk, } SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool)); + /* Set for the type and context by using != the other type */ + if (last_line_type != modified_line) + original_no_final_eol = TRUE; + if (last_line_type != original_line) + modified_no_final_eol = TRUE; } continue; @@ -728,7 +1170,13 @@ parse_next_hunk(svn_diff_hunk_t **hunk, SVN_ERR(parse_mergeinfo(&found_mergeinfo, line, *hunk, patch, result_pool, iterpool)); if (found_mergeinfo) - continue; /* Proceed to the next line in the patch. */ + continue; /* Proceed to the next line in the svn:mergeinfo hunk. */ + else + { + /* Perhaps we can also use original_lines/modified_lines here */ + + in_hunk = FALSE; /* On to next property */ + } } if (in_hunk) @@ -745,24 +1193,38 @@ parse_next_hunk(svn_diff_hunk_t **hunk, } c = line->data[0]; - if (original_lines > 0 && modified_lines > 0 && - ((c == ' ') + if (c == ' ' + || ((original_lines > 0 && modified_lines > 0) + && ( /* Tolerate chopped leading spaces on empty lines. */ - || (! eof && line->len == 0) + (! eof && line->len == 0) /* Maybe tolerate chopped leading spaces on non-empty lines. */ - || (ignore_whitespace && c != del && c != add))) + || (ignore_whitespace && c != del && c != add)))) { /* It's a "context" line in the hunk. */ hunk_seen = TRUE; - original_lines--; - modified_lines--; + if (original_lines > 0) + original_lines--; + else + { + (*hunk)->original_length++; + (*hunk)->original_fuzz++; + } + if (modified_lines > 0) + modified_lines--; + else + { + (*hunk)->modified_length++; + (*hunk)->modified_fuzz++; + } if (changed_line_seen) trailing_context++; else leading_context++; last_line_type = context_line; } - else if (original_lines > 0 && c == del) + else if (c == del + && (original_lines > 0 || line->data[1] != del)) { /* It's a "deleted" line in the hunk. */ hunk_seen = TRUE; @@ -773,10 +1235,17 @@ parse_next_hunk(svn_diff_hunk_t **hunk, if (trailing_context > 0) trailing_context = 0; - original_lines--; + if (original_lines > 0) + original_lines--; + else + { + (*hunk)->original_length++; + (*hunk)->original_fuzz++; + } last_line_type = original_line; } - else if (modified_lines > 0 && c == add) + else if (c == add + && (modified_lines > 0 || line->data[1] != add)) { /* It's an "added" line in the hunk. */ hunk_seen = TRUE; @@ -787,7 +1256,13 @@ parse_next_hunk(svn_diff_hunk_t **hunk, if (trailing_context > 0) trailing_context = 0; - modified_lines--; + if (modified_lines > 0) + modified_lines--; + else + { + (*hunk)->modified_length++; + (*hunk)->modified_fuzz++; + } last_line_type = modified_line; } else @@ -803,7 +1278,6 @@ parse_next_hunk(svn_diff_hunk_t **hunk, * after the hunk text. */ end = last_line; } - if (original_end == 0) original_end = end; if (modified_end == 0) @@ -843,14 +1317,16 @@ parse_next_hunk(svn_diff_hunk_t **hunk, SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ", result_pool)); if (*prop_name) - *prop_operation = svn_diff_op_added; + *prop_operation = (patch->reverse ? svn_diff_op_deleted + : svn_diff_op_added); } else if (starts_with(line->data, "Deleted: ")) { SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ", result_pool)); if (*prop_name) - *prop_operation = svn_diff_op_deleted; + *prop_operation = (patch->reverse ? svn_diff_op_added + : svn_diff_op_deleted); } else if (starts_with(line->data, "Modified: ")) { @@ -878,6 +1354,21 @@ parse_next_hunk(svn_diff_hunk_t **hunk, if (hunk_seen && start < end) { + /* Did we get the number of context lines announced in the header? + + If not... let's limit the number from the header to what we + actually have, and apply a fuzz penalty */ + if (original_lines) + { + (*hunk)->original_length -= original_lines; + (*hunk)->original_fuzz += original_lines; + } + if (modified_lines) + { + (*hunk)->modified_length -= modified_lines; + (*hunk)->modified_fuzz += modified_lines; + } + (*hunk)->patch = patch; (*hunk)->apr_file = apr_file; (*hunk)->leading_context = leading_context; @@ -891,6 +1382,8 @@ parse_next_hunk(svn_diff_hunk_t **hunk, (*hunk)->modified_text_range.start = start; (*hunk)->modified_text_range.current = start; (*hunk)->modified_text_range.end = modified_end; + (*hunk)->original_no_final_eol = original_no_final_eol; + (*hunk)->modified_no_final_eol = modified_no_final_eol; } else /* Something went wrong, just discard the result. */ @@ -917,16 +1410,19 @@ compare_hunks(const void *a, const void *b) /* Possible states of the diff header parser. */ enum parse_state { - state_start, /* initial */ - state_git_diff_seen, /* diff --git */ - state_git_tree_seen, /* a tree operation, rather then content change */ - state_git_minus_seen, /* --- /dev/null; or --- a/ */ - state_git_plus_seen, /* +++ /dev/null; or +++ a/ */ - state_move_from_seen, /* rename from foo.c */ - state_copy_from_seen, /* copy from foo.c */ - state_minus_seen, /* --- foo.c */ - state_unidiff_found, /* valid start of a regular unidiff header */ - state_git_header_found /* valid start of a --git diff header */ + state_start, /* initial */ + state_git_diff_seen, /* diff --git */ + state_git_tree_seen, /* a tree operation, rather than content change */ + state_git_minus_seen, /* --- /dev/null; or --- a/ */ + state_git_plus_seen, /* +++ /dev/null; or +++ a/ */ + state_old_mode_seen, /* old mode 100644 */ + state_git_mode_seen, /* new mode 100644 */ + state_move_from_seen, /* rename from foo.c */ + state_copy_from_seen, /* copy from foo.c */ + state_minus_seen, /* --- foo.c */ + state_unidiff_found, /* valid start of a regular unidiff header */ + state_git_header_found, /* valid start of a --git diff header */ + state_binary_patch_found /* valid start of binary patch */ }; /* Data type describing a valid state transition of the parser. */ @@ -1153,6 +1649,139 @@ git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, return SVN_NO_ERROR; } +/* Helper for git_old_mode() and git_new_mode(). Translate the git + * file mode MODE_STR into a binary "executable?" and "symlink?" state. */ +static svn_error_t * +parse_git_mode_bits(svn_tristate_t *executable_p, + svn_tristate_t *symlink_p, + const char *mode_str) +{ + apr_uint64_t mode; + SVN_ERR(svn_cstring_strtoui64(&mode, mode_str, + 0 /* min */, + 0777777 /* max: six octal digits */, + 010 /* radix (octal) */)); + + /* Note: 0644 and 0755 are the only modes that can occur for plain files. + * We deliberately choose to parse only those values: we are strict in what + * we accept _and_ in what we produce. + * + * (Having said that, though, we could consider relaxing the parser to also + * map + * (mode & 0111) == 0000 -> svn_tristate_false + * (mode & 0111) == 0111 -> svn_tristate_true + * [anything else] -> svn_tristate_unknown + * .) + */ + + switch (mode & 0777) + { + case 0644: + *executable_p = svn_tristate_false; + break; + + case 0755: + *executable_p = svn_tristate_true; + break; + + default: + /* Ignore unknown values. */ + *executable_p = svn_tristate_unknown; + break; + } + + switch (mode & 0170000 /* S_IFMT */) + { + case 0120000: /* S_IFLNK */ + *symlink_p = svn_tristate_true; + break; + + case 0100000: /* S_IFREG */ + case 0040000: /* S_IFDIR */ + *symlink_p = svn_tristate_false; + break; + + default: + /* Ignore unknown values. + (Including those generated by Subversion <= 1.9) */ + *symlink_p = svn_tristate_unknown; + break; + } + + return SVN_NO_ERROR; +} + +/* Parse the 'old mode ' line of a git extended unidiff. */ +static svn_error_t * +git_old_mode(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit, + &patch->old_symlink_bit, + line + STRLEN_LITERAL("old mode "))); + +#ifdef SVN_DEBUG + /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */ + SVN_ERR_ASSERT(patch->old_executable_bit != svn_tristate_unknown); +#endif + + *new_state = state_old_mode_seen; + return SVN_NO_ERROR; +} + +/* Parse the 'new mode ' line of a git extended unidiff. */ +static svn_error_t * +git_new_mode(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit, + &patch->new_symlink_bit, + line + STRLEN_LITERAL("new mode "))); + +#ifdef SVN_DEBUG + /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */ + SVN_ERR_ASSERT(patch->new_executable_bit != svn_tristate_unknown); +#endif + + /* Don't touch patch->operation. */ + + *new_state = state_git_mode_seen; + return SVN_NO_ERROR; +} + +static svn_error_t * +git_index(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + /* We either have something like "index 33e5b38..0000000" (which we just + ignore as we are not interested in git specific shas) or something like + "index 33e5b38..0000000 120000" which tells us the mode, that isn't + changed by applying this patch. + + If the mode would have changed then we would see 'old mode' and 'new mode' + lines. + */ + line = strchr(line + STRLEN_LITERAL("index "), ' '); + + if (line && patch->new_executable_bit == svn_tristate_unknown + && patch->new_symlink_bit == svn_tristate_unknown + && patch->operation != svn_diff_op_added + && patch->operation != svn_diff_op_deleted) + { + SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit, + &patch->new_symlink_bit, + line + 1)); + + /* There is no change.. so set the old values to the new values */ + patch->old_executable_bit = patch->new_executable_bit; + patch->old_symlink_bit = patch->new_symlink_bit; + } + + /* This function doesn't change the state! */ + /* *new_state = *new_state */ + return SVN_NO_ERROR; +} + /* Parse the 'rename from ' line of a git extended unidiff. */ static svn_error_t * git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch, @@ -1213,6 +1842,10 @@ static svn_error_t * git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { + SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit, + &patch->new_symlink_bit, + line + STRLEN_LITERAL("new file mode "))); + patch->operation = svn_diff_op_added; /* Filename already retrieved from diff --git header. */ @@ -1226,6 +1859,10 @@ static svn_error_t * git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { + SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit, + &patch->old_symlink_bit, + line + STRLEN_LITERAL("deleted file mode "))); + patch->operation = svn_diff_op_deleted; /* Filename already retrieved from diff --git header. */ @@ -1234,6 +1871,16 @@ git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch, return SVN_NO_ERROR; } +/* Parse the 'GIT binary patch' header */ +static svn_error_t * +binary_patch_start(enum parse_state *new_state, char *line, svn_patch_t *patch, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + *new_state = state_binary_patch_found; + return SVN_NO_ERROR; +} + + /* Add a HUNK associated with the property PROP_NAME to PATCH. */ static svn_error_t * add_property_hunk(svn_patch_t *patch, const char *prop_name, @@ -1346,24 +1993,163 @@ parse_hunks(svn_patch_t *patch, apr_file_t *apr_file, return SVN_NO_ERROR; } +static svn_error_t * +parse_binary_patch(svn_patch_t *patch, apr_file_t *apr_file, + svn_boolean_t reverse, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) +{ + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + apr_off_t pos, last_line; + svn_stringbuf_t *line; + svn_boolean_t eof = FALSE; + svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch)); + svn_boolean_t in_blob = FALSE; + svn_boolean_t in_src = FALSE; + + bpatch->apr_file = apr_file; + + patch->prop_patches = apr_hash_make(result_pool); + + SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool)); + + while (!eof) + { + last_line = pos; + SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX, + iterpool, iterpool)); + + /* Update line offset for next iteration. */ + SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool)); + + if (in_blob) + { + char c = line->data[0]; + + /* 66 = len byte + (52/4*5) chars */ + if (((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) + && line->len <= 66 + && !strchr(line->data, ':') + && !strchr(line->data, ' ')) + { + /* One more blop line */ + if (in_src) + bpatch->src_end = pos; + else + bpatch->dst_end = pos; + } + else if (svn_stringbuf_first_non_whitespace(line) < line->len + && !(in_src && bpatch->src_start < last_line)) + { + break; /* Bad patch */ + } + else if (in_src) + { + patch->binary_patch = bpatch; /* SUCCESS! */ + break; + } + else + { + in_blob = FALSE; + in_src = TRUE; + } + } + else if (starts_with(line->data, "literal ")) + { + apr_uint64_t expanded_size; + svn_error_t *err = svn_cstring_strtoui64(&expanded_size, + &line->data[8], + 0, APR_UINT64_MAX, 10); + + if (err) + { + svn_error_clear(err); + break; + } + + if (in_src) + { + bpatch->src_start = pos; + bpatch->src_filesize = expanded_size; + } + else + { + bpatch->dst_start = pos; + bpatch->dst_filesize = expanded_size; + } + in_blob = TRUE; + } + else + break; /* We don't support GIT deltas (yet) */ + } + svn_pool_destroy(iterpool); + + if (!eof) + /* Rewind to the start of the line just read, so subsequent calls + * don't end up skipping the line. It may contain a patch or hunk header.*/ + SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool)); + else if (in_src + && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize)) + { + patch->binary_patch = bpatch; /* SUCCESS */ + } + + /* Reverse patch if requested */ + if (reverse && patch->binary_patch) + { + apr_off_t tmp_start = bpatch->src_start; + apr_off_t tmp_end = bpatch->src_end; + svn_filesize_t tmp_filesize = bpatch->src_filesize; + + bpatch->src_start = bpatch->dst_start; + bpatch->src_end = bpatch->dst_end; + bpatch->src_filesize = bpatch->dst_filesize; + + bpatch->dst_start = tmp_start; + bpatch->dst_end = tmp_end; + bpatch->dst_filesize = tmp_filesize; + } + + return SVN_NO_ERROR; +} + /* State machine for the diff header parser. * Expected Input Required state Function to call */ static struct transition transitions[] = { - {"--- ", state_start, diff_minus}, - {"+++ ", state_minus_seen, diff_plus}, - {"diff --git", state_start, git_start}, - {"--- a/", state_git_diff_seen, git_minus}, - {"--- a/", state_git_tree_seen, git_minus}, - {"--- /dev/null", state_git_tree_seen, git_minus}, - {"+++ b/", state_git_minus_seen, git_plus}, - {"+++ /dev/null", state_git_minus_seen, git_plus}, - {"rename from ", state_git_diff_seen, git_move_from}, - {"rename to ", state_move_from_seen, git_move_to}, - {"copy from ", state_git_diff_seen, git_copy_from}, - {"copy to ", state_copy_from_seen, git_copy_to}, - {"new file ", state_git_diff_seen, git_new_file}, - {"deleted file ", state_git_diff_seen, git_deleted_file}, + {"--- ", state_start, diff_minus}, + {"+++ ", state_minus_seen, diff_plus}, + + {"diff --git", state_start, git_start}, + {"--- a/", state_git_diff_seen, git_minus}, + {"--- a/", state_git_mode_seen, git_minus}, + {"--- a/", state_git_tree_seen, git_minus}, + {"--- /dev/null", state_git_mode_seen, git_minus}, + {"--- /dev/null", state_git_tree_seen, git_minus}, + {"+++ b/", state_git_minus_seen, git_plus}, + {"+++ /dev/null", state_git_minus_seen, git_plus}, + + {"old mode ", state_git_diff_seen, git_old_mode}, + {"new mode ", state_old_mode_seen, git_new_mode}, + + {"rename from ", state_git_diff_seen, git_move_from}, + {"rename from ", state_git_mode_seen, git_move_from}, + {"rename to ", state_move_from_seen, git_move_to}, + + {"copy from ", state_git_diff_seen, git_copy_from}, + {"copy from ", state_git_mode_seen, git_copy_from}, + {"copy to ", state_copy_from_seen, git_copy_to}, + + {"new file ", state_git_diff_seen, git_new_file}, + + {"deleted file ", state_git_diff_seen, git_deleted_file}, + + {"index ", state_git_diff_seen, git_index}, + {"index ", state_git_tree_seen, git_index}, + {"index ", state_git_mode_seen, git_index}, + + {"GIT binary patch", state_git_diff_seen, binary_patch_start}, + {"GIT binary patch", state_git_tree_seen, binary_patch_start}, + {"GIT binary patch", state_git_mode_seen, binary_patch_start}, }; svn_error_t * @@ -1389,6 +2175,10 @@ svn_diff_parse_next_patch(svn_patch_t **patch_p, } patch = apr_pcalloc(result_pool, sizeof(*patch)); + patch->old_executable_bit = svn_tristate_unknown; + patch->new_executable_bit = svn_tristate_unknown; + patch->old_symlink_bit = svn_tristate_unknown; + patch->new_symlink_bit = svn_tristate_unknown; pos = patch_file->next_patch_offset; SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool)); @@ -1410,9 +2200,8 @@ svn_diff_parse_next_patch(svn_patch_t **patch_p, if (! eof) { /* Update line offset for next iteration. */ - pos = 0; - SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos, - iterpool)); + SVN_ERR(svn_io_file_get_offset(&pos, patch_file->apr_file, + iterpool)); } /* Run the state machine. */ @@ -1428,32 +2217,32 @@ svn_diff_parse_next_patch(svn_patch_t **patch_p, } } - if (state == state_unidiff_found || state == state_git_header_found) + if (state == state_unidiff_found + || state == state_git_header_found + || state == state_binary_patch_found) { /* We have a valid diff header, yay! */ break; } - else if (state == state_git_tree_seen && line_after_tree_header_read) + else if ((state == state_git_tree_seen || state == state_git_mode_seen) + && line_after_tree_header_read + && !valid_header_line) { - /* git patches can contain an index line after the file mode line */ - if (!starts_with(line->data, "index ")) - { - /* We have a valid diff header for a patch with only tree changes. - * Rewind to the start of the line just read, so subsequent calls - * to this function don't end up skipping the line -- it may - * contain a patch. */ - SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, - scratch_pool)); - break; - } + /* We have a valid diff header for a patch with only tree changes. + * Rewind to the start of the line just read, so subsequent calls + * to this function don't end up skipping the line -- it may + * contain a patch. */ + SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, + scratch_pool)); + break; } - else if (state == state_git_tree_seen) + else if (state == state_git_tree_seen + || state == state_git_mode_seen) { line_after_tree_header_read = TRUE; } else if (! valid_header_line && state != state_start - && state != state_git_diff_seen - && !starts_with(line->data, "index ")) + && state != state_git_diff_seen) { /* We've encountered an invalid diff header. * @@ -1471,9 +2260,38 @@ svn_diff_parse_next_patch(svn_patch_t **patch_p, if (reverse) { const char *temp; + svn_tristate_t ts_tmp; + temp = patch->old_filename; patch->old_filename = patch->new_filename; patch->new_filename = temp; + + switch (patch->operation) + { + case svn_diff_op_added: + patch->operation = svn_diff_op_deleted; + break; + case svn_diff_op_deleted: + patch->operation = svn_diff_op_added; + break; + + case svn_diff_op_modified: + break; /* Stays modified. */ + + case svn_diff_op_copied: + case svn_diff_op_moved: + break; /* Stays copied or moved, just in the other direction. */ + case svn_diff_op_unchanged: + break; /* Stays unchanged, of course. */ + } + + ts_tmp = patch->old_executable_bit; + patch->old_executable_bit = patch->new_executable_bit; + patch->new_executable_bit = ts_tmp; + + ts_tmp = patch->old_symlink_bit; + patch->old_symlink_bit = patch->new_symlink_bit; + patch->new_symlink_bit = ts_tmp; } if (patch->old_filename == NULL || patch->new_filename == NULL) @@ -1482,16 +2300,24 @@ svn_diff_parse_next_patch(svn_patch_t **patch_p, patch = NULL; } else - SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace, - result_pool, iterpool)); + { + if (state == state_binary_patch_found) + { + SVN_ERR(parse_binary_patch(patch, patch_file->apr_file, reverse, + result_pool, iterpool)); + /* And fall through in property parsing */ + } + + SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace, + result_pool, iterpool)); + } svn_pool_destroy(iterpool); - patch_file->next_patch_offset = 0; - SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, - &patch_file->next_patch_offset, scratch_pool)); + SVN_ERR(svn_io_file_get_offset(&patch_file->next_patch_offset, + patch_file->apr_file, scratch_pool)); - if (patch) + if (patch && patch->hunks) { /* Usually, hunks appear in the patch sorted by their original line * offset. But just in case they weren't parsed in this order for |