aboutsummaryrefslogtreecommitdiff
path: root/lib/diff_output_unidiff.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/diff_output_unidiff.c')
-rw-r--r--lib/diff_output_unidiff.c602
1 files changed, 602 insertions, 0 deletions
diff --git a/lib/diff_output_unidiff.c b/lib/diff_output_unidiff.c
new file mode 100644
index 000000000000..d480a022a9a7
--- /dev/null
+++ b/lib/diff_output_unidiff.c
@@ -0,0 +1,602 @@
+/* Produce a unidiff output from a diff_result. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
+
+#include "diff_internal.h"
+#include "diff_debug.h"
+
+off_t
+diff_chunk_get_left_start_pos(const struct diff_chunk *c)
+{
+ return c->left_start->pos;
+}
+
+off_t
+diff_chunk_get_right_start_pos(const struct diff_chunk *c)
+{
+ return c->right_start->pos;
+}
+
+bool
+diff_chunk_context_empty(const struct diff_chunk_context *cc)
+{
+ return diff_range_empty(&cc->chunk);
+}
+
+int
+diff_chunk_get_left_start(const struct diff_chunk *c,
+ const struct diff_result *r, int context_lines)
+{
+ int left_start = diff_atom_root_idx(r->left, c->left_start);
+ return MAX(0, left_start - context_lines);
+}
+
+int
+diff_chunk_get_left_end(const struct diff_chunk *c,
+ const struct diff_result *r, int context_lines)
+{
+ int left_start = diff_chunk_get_left_start(c, r, 0);
+ return MIN(r->left->atoms.len,
+ left_start + c->left_count + context_lines);
+}
+
+int
+diff_chunk_get_right_start(const struct diff_chunk *c,
+ const struct diff_result *r, int context_lines)
+{
+ int right_start = diff_atom_root_idx(r->right, c->right_start);
+ return MAX(0, right_start - context_lines);
+}
+
+int
+diff_chunk_get_right_end(const struct diff_chunk *c,
+ const struct diff_result *r, int context_lines)
+{
+ int right_start = diff_chunk_get_right_start(c, r, 0);
+ return MIN(r->right->atoms.len,
+ right_start + c->right_count + context_lines);
+}
+
+struct diff_chunk *
+diff_chunk_get(const struct diff_result *r, int chunk_idx)
+{
+ return &r->chunks.head[chunk_idx];
+}
+
+int
+diff_chunk_get_left_count(struct diff_chunk *c)
+{
+ return c->left_count;
+}
+
+int
+diff_chunk_get_right_count(struct diff_chunk *c)
+{
+ return c->right_count;
+}
+
+void
+diff_chunk_context_get(struct diff_chunk_context *cc, const struct diff_result *r,
+ int chunk_idx, int context_lines)
+{
+ const struct diff_chunk *c = &r->chunks.head[chunk_idx];
+ int left_start = diff_chunk_get_left_start(c, r, context_lines);
+ int left_end = diff_chunk_get_left_end(c, r, context_lines);
+ int right_start = diff_chunk_get_right_start(c, r, context_lines);
+ int right_end = diff_chunk_get_right_end(c, r, context_lines);
+
+ *cc = (struct diff_chunk_context){
+ .chunk = {
+ .start = chunk_idx,
+ .end = chunk_idx + 1,
+ },
+ .left = {
+ .start = left_start,
+ .end = left_end,
+ },
+ .right = {
+ .start = right_start,
+ .end = right_end,
+ },
+ };
+}
+
+bool
+diff_chunk_contexts_touch(const struct diff_chunk_context *cc,
+ const struct diff_chunk_context *other)
+{
+ return diff_ranges_touch(&cc->chunk, &other->chunk)
+ || diff_ranges_touch(&cc->left, &other->left)
+ || diff_ranges_touch(&cc->right, &other->right);
+}
+
+void
+diff_chunk_contexts_merge(struct diff_chunk_context *cc,
+ const struct diff_chunk_context *other)
+{
+ diff_ranges_merge(&cc->chunk, &other->chunk);
+ diff_ranges_merge(&cc->left, &other->left);
+ diff_ranges_merge(&cc->right, &other->right);
+}
+
+void
+diff_chunk_context_load_change(struct diff_chunk_context *cc,
+ int *nchunks_used,
+ struct diff_result *result,
+ int start_chunk_idx,
+ int context_lines)
+{
+ int i;
+ int seen_minus = 0, seen_plus = 0;
+
+ if (nchunks_used)
+ *nchunks_used = 0;
+
+ for (i = start_chunk_idx; i < result->chunks.len; i++) {
+ struct diff_chunk *chunk = &result->chunks.head[i];
+ enum diff_chunk_type t = diff_chunk_type(chunk);
+ struct diff_chunk_context next;
+
+ if (t != CHUNK_MINUS && t != CHUNK_PLUS) {
+ if (nchunks_used)
+ (*nchunks_used)++;
+ if (seen_minus || seen_plus)
+ break;
+ else
+ continue;
+ } else if (t == CHUNK_MINUS)
+ seen_minus = 1;
+ else if (t == CHUNK_PLUS)
+ seen_plus = 1;
+
+ if (diff_chunk_context_empty(cc)) {
+ /* Note down the start point, any number of subsequent
+ * chunks may be joined up to this chunk by being
+ * directly adjacent. */
+ diff_chunk_context_get(cc, result, i, context_lines);
+ if (nchunks_used)
+ (*nchunks_used)++;
+ continue;
+ }
+
+ /* There already is a previous chunk noted down for being
+ * printed. Does it join up with this one? */
+ diff_chunk_context_get(&next, result, i, context_lines);
+
+ if (diff_chunk_contexts_touch(cc, &next)) {
+ /* This next context touches or overlaps the previous
+ * one, join. */
+ diff_chunk_contexts_merge(cc, &next);
+ if (nchunks_used)
+ (*nchunks_used)++;
+ continue;
+ } else
+ break;
+ }
+}
+
+struct diff_output_unidiff_state {
+ bool header_printed;
+ char prototype[DIFF_FUNCTION_CONTEXT_SIZE];
+ int last_prototype_idx;
+};
+
+struct diff_output_unidiff_state *
+diff_output_unidiff_state_alloc(void)
+{
+ struct diff_output_unidiff_state *state;
+
+ state = calloc(1, sizeof(struct diff_output_unidiff_state));
+ if (state != NULL)
+ diff_output_unidiff_state_reset(state);
+ return state;
+}
+
+void
+diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state)
+{
+ state->header_printed = false;
+ memset(state->prototype, 0, sizeof(state->prototype));
+ state->last_prototype_idx = 0;
+}
+
+void
+diff_output_unidiff_state_free(struct diff_output_unidiff_state *state)
+{
+ free(state);
+}
+
+static int
+output_unidiff_chunk(struct diff_output_info *outinfo, FILE *dest,
+ struct diff_output_unidiff_state *state,
+ const struct diff_input_info *info,
+ const struct diff_result *result,
+ bool print_header, bool show_function_prototypes,
+ const struct diff_chunk_context *cc)
+{
+ int rc, left_start, left_len, right_start, right_len;
+ off_t outoff = 0, *offp;
+ uint8_t *typep;
+
+ if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right))
+ return DIFF_RC_OK;
+
+ if (outinfo && outinfo->line_offsets.len > 0) {
+ unsigned int idx = outinfo->line_offsets.len - 1;
+ outoff = outinfo->line_offsets.head[idx];
+ }
+
+ if (print_header && !(state->header_printed)) {
+ rc = fprintf(dest, "--- %s\n",
+ diff_output_get_label_left(info));
+ if (rc < 0)
+ return errno;
+ if (outinfo) {
+ ARRAYLIST_ADD(offp, outinfo->line_offsets);
+ if (offp == NULL)
+ return ENOMEM;
+ outoff += rc;
+ *offp = outoff;
+ ARRAYLIST_ADD(typep, outinfo->line_types);
+ if (typep == NULL)
+ return ENOMEM;
+ *typep = DIFF_LINE_MINUS;
+ }
+ rc = fprintf(dest, "+++ %s\n",
+ diff_output_get_label_right(info));
+ if (rc < 0)
+ return errno;
+ if (outinfo) {
+ ARRAYLIST_ADD(offp, outinfo->line_offsets);
+ if (offp == NULL)
+ return ENOMEM;
+ outoff += rc;
+ *offp = outoff;
+ ARRAYLIST_ADD(typep, outinfo->line_types);
+ if (typep == NULL)
+ return ENOMEM;
+ *typep = DIFF_LINE_PLUS;
+ }
+ state->header_printed = true;
+ }
+
+ left_len = cc->left.end - cc->left.start;
+ if (result->left->atoms.len == 0)
+ left_start = 0;
+ else if (left_len == 0 && cc->left.start > 0)
+ left_start = cc->left.start;
+ else
+ left_start = cc->left.start + 1;
+
+ right_len = cc->right.end - cc->right.start;
+ if (result->right->atoms.len == 0)
+ right_start = 0;
+ else if (right_len == 0 && cc->right.start > 0)
+ right_start = cc->right.start;
+ else
+ right_start = cc->right.start + 1;
+
+ if (show_function_prototypes) {
+ rc = diff_output_match_function_prototype(state->prototype,
+ sizeof(state->prototype), &state->last_prototype_idx,
+ result, cc);
+ if (rc)
+ return rc;
+ }
+
+ if (left_len == 1 && right_len == 1) {
+ rc = fprintf(dest, "@@ -%d +%d @@%s%s\n",
+ left_start, right_start,
+ state->prototype[0] ? " " : "",
+ state->prototype[0] ? state->prototype : "");
+ } else if (left_len == 1 && right_len != 1) {
+ rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n",
+ left_start, right_start, right_len,
+ state->prototype[0] ? " " : "",
+ state->prototype[0] ? state->prototype : "");
+ } else if (left_len != 1 && right_len == 1) {
+ rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n",
+ left_start, left_len, right_start,
+ state->prototype[0] ? " " : "",
+ state->prototype[0] ? state->prototype : "");
+ } else {
+ rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n",
+ left_start, left_len, right_start, right_len,
+ state->prototype[0] ? " " : "",
+ state->prototype[0] ? state->prototype : "");
+ }
+ if (rc < 0)
+ return errno;
+ if (outinfo) {
+ ARRAYLIST_ADD(offp, outinfo->line_offsets);
+ if (offp == NULL)
+ return ENOMEM;
+ outoff += rc;
+ *offp = outoff;
+ ARRAYLIST_ADD(typep, outinfo->line_types);
+ if (typep == NULL)
+ return ENOMEM;
+ *typep = DIFF_LINE_HUNK;
+ }
+
+ /* Got the absolute line numbers where to start printing, and the index
+ * of the interesting (non-context) chunk.
+ * To print context lines above the interesting chunk, nipping on the
+ * previous chunk index may be necessary.
+ * It is guaranteed to be only context lines where left == right, so it
+ * suffices to look on the left. */
+ const struct diff_chunk *first_chunk;
+ int chunk_start_line;
+ first_chunk = &result->chunks.head[cc->chunk.start];
+ chunk_start_line = diff_atom_root_idx(result->left,
+ first_chunk->left_start);
+ if (cc->left.start < chunk_start_line) {
+ rc = diff_output_lines(outinfo, dest, " ",
+ &result->left->atoms.head[cc->left.start],
+ chunk_start_line - cc->left.start);
+ if (rc)
+ return rc;
+ }
+
+ /* Now write out all the joined chunks and contexts between them */
+ int c_idx;
+ for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) {
+ const struct diff_chunk *c = &result->chunks.head[c_idx];
+
+ if (c->left_count && c->right_count)
+ rc = diff_output_lines(outinfo, dest,
+ c->solved ? " " : "?",
+ c->left_start, c->left_count);
+ else if (c->left_count && !c->right_count)
+ rc = diff_output_lines(outinfo, dest,
+ c->solved ? "-" : "?",
+ c->left_start, c->left_count);
+ else if (c->right_count && !c->left_count)
+ rc = diff_output_lines(outinfo, dest,
+ c->solved ? "+" : "?",
+ c->right_start, c->right_count);
+ if (rc)
+ return rc;
+
+ if (cc->chunk.end == result->chunks.len) {
+ rc = diff_output_trailing_newline_msg(outinfo, dest, c);
+ if (rc != DIFF_RC_OK)
+ return rc;
+ }
+ }
+
+ /* Trailing context? */
+ const struct diff_chunk *last_chunk;
+ int chunk_end_line;
+ last_chunk = &result->chunks.head[cc->chunk.end - 1];
+ chunk_end_line = diff_atom_root_idx(result->left,
+ last_chunk->left_start
+ + last_chunk->left_count);
+ if (cc->left.end > chunk_end_line) {
+ rc = diff_output_lines(outinfo, dest, " ",
+ &result->left->atoms.head[chunk_end_line],
+ cc->left.end - chunk_end_line);
+ if (rc)
+ return rc;
+
+ if (cc->left.end == result->left->atoms.len) {
+ rc = diff_output_trailing_newline_msg(outinfo, dest,
+ &result->chunks.head[result->chunks.len - 1]);
+ if (rc != DIFF_RC_OK)
+ return rc;
+ }
+ }
+
+ return DIFF_RC_OK;
+}
+
+int
+diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
+ struct diff_output_unidiff_state *state,
+ const struct diff_input_info *info,
+ const struct diff_result *result,
+ const struct diff_chunk_context *cc)
+{
+ struct diff_output_info *outinfo = NULL;
+ int flags = (result->left->root->diff_flags |
+ result->right->root->diff_flags);
+ bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
+
+ if (output_info) {
+ *output_info = diff_output_info_alloc();
+ if (*output_info == NULL)
+ return ENOMEM;
+ outinfo = *output_info;
+ }
+
+ return output_unidiff_chunk(outinfo, dest, state, info,
+ result, false, show_function_prototypes, cc);
+}
+
+int
+diff_output_unidiff(struct diff_output_info **output_info,
+ FILE *dest, const struct diff_input_info *info,
+ const struct diff_result *result,
+ unsigned int context_lines)
+{
+ struct diff_output_unidiff_state *state;
+ struct diff_chunk_context cc = {};
+ struct diff_output_info *outinfo = NULL;
+ int atomizer_flags = (result->left->atomizer_flags|
+ result->right->atomizer_flags);
+ int flags = (result->left->root->diff_flags |
+ result->right->root->diff_flags);
+ bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
+ bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
+ bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
+ off_t outoff = 0, *offp;
+ uint8_t *typep;
+ int rc, i;
+
+ if (!result)
+ return EINVAL;
+ if (result->rc != DIFF_RC_OK)
+ return result->rc;
+
+ if (output_info) {
+ *output_info = diff_output_info_alloc();
+ if (*output_info == NULL)
+ return ENOMEM;
+ outinfo = *output_info;
+ }
+
+ if (have_binary && !force_text) {
+ for (i = 0; i < result->chunks.len; i++) {
+ struct diff_chunk *c = &result->chunks.head[i];
+ enum diff_chunk_type t = diff_chunk_type(c);
+
+ if (t != CHUNK_MINUS && t != CHUNK_PLUS)
+ continue;
+
+ if (outinfo && outinfo->line_offsets.len > 0) {
+ unsigned int idx =
+ outinfo->line_offsets.len - 1;
+ outoff = outinfo->line_offsets.head[idx];
+ }
+
+ rc = fprintf(dest, "Binary files %s and %s differ\n",
+ diff_output_get_label_left(info),
+ diff_output_get_label_right(info));
+ if (outinfo) {
+ ARRAYLIST_ADD(offp, outinfo->line_offsets);
+ if (offp == NULL)
+ return ENOMEM;
+ outoff += rc;
+ *offp = outoff;
+ ARRAYLIST_ADD(typep, outinfo->line_types);
+ if (typep == NULL)
+ return ENOMEM;
+ *typep = DIFF_LINE_NONE;
+ }
+ break;
+ }
+
+ return DIFF_RC_OK;
+ }
+
+ state = diff_output_unidiff_state_alloc();
+ if (state == NULL) {
+ if (output_info) {
+ diff_output_info_free(*output_info);
+ *output_info = NULL;
+ }
+ return ENOMEM;
+ }
+
+#if DEBUG
+ unsigned int check_left_pos, check_right_pos;
+ check_left_pos = 0;
+ check_right_pos = 0;
+ for (i = 0; i < result->chunks.len; i++) {
+ struct diff_chunk *c = &result->chunks.head[i];
+ enum diff_chunk_type t = diff_chunk_type(c);
+
+ debug("[%d] %s lines L%d R%d @L %d @R %d\n",
+ i, (t == CHUNK_MINUS ? "minus" :
+ (t == CHUNK_PLUS ? "plus" :
+ (t == CHUNK_SAME ? "same" : "?"))),
+ c->left_count,
+ c->right_count,
+ c->left_start ? diff_atom_root_idx(result->left, c->left_start) : -1,
+ c->right_start ? diff_atom_root_idx(result->right, c->right_start) : -1);
+ assert(check_left_pos == diff_atom_root_idx(result->left, c->left_start));
+ assert(check_right_pos == diff_atom_root_idx(result->right, c->right_start));
+ check_left_pos += c->left_count;
+ check_right_pos += c->right_count;
+
+ }
+ assert(check_left_pos == result->left->atoms.len);
+ assert(check_right_pos == result->right->atoms.len);
+#endif
+
+ for (i = 0; i < result->chunks.len; i++) {
+ struct diff_chunk *c = &result->chunks.head[i];
+ enum diff_chunk_type t = diff_chunk_type(c);
+ struct diff_chunk_context next;
+
+ if (t != CHUNK_MINUS && t != CHUNK_PLUS)
+ continue;
+
+ if (diff_chunk_context_empty(&cc)) {
+ /* These are the first lines being printed.
+ * Note down the start point, any number of subsequent
+ * chunks may be joined up to this unidiff chunk by
+ * context lines or by being directly adjacent. */
+ diff_chunk_context_get(&cc, result, i, context_lines);
+ debug("new chunk to be printed:"
+ " chunk %d-%d left %d-%d right %d-%d\n",
+ cc.chunk.start, cc.chunk.end,
+ cc.left.start, cc.left.end,
+ cc.right.start, cc.right.end);
+ continue;
+ }
+
+ /* There already is a previous chunk noted down for being
+ * printed. Does it join up with this one? */
+ diff_chunk_context_get(&next, result, i, context_lines);
+ debug("new chunk to be printed:"
+ " chunk %d-%d left %d-%d right %d-%d\n",
+ next.chunk.start, next.chunk.end,
+ next.left.start, next.left.end,
+ next.right.start, next.right.end);
+
+ if (diff_chunk_contexts_touch(&cc, &next)) {
+ /* This next context touches or overlaps the previous
+ * one, join. */
+ diff_chunk_contexts_merge(&cc, &next);
+ debug("new chunk to be printed touches previous chunk,"
+ " now: left %d-%d right %d-%d\n",
+ cc.left.start, cc.left.end,
+ cc.right.start, cc.right.end);
+ continue;
+ }
+
+ /* No touching, so the previous context is complete with a gap
+ * between it and this next one. Print the previous one and
+ * start fresh here. */
+ debug("new chunk to be printed does not touch previous chunk;"
+ " print left %d-%d right %d-%d\n",
+ cc.left.start, cc.left.end, cc.right.start, cc.right.end);
+ output_unidiff_chunk(outinfo, dest, state, info, result,
+ true, show_function_prototypes, &cc);
+ cc = next;
+ debug("new unprinted chunk is left %d-%d right %d-%d\n",
+ cc.left.start, cc.left.end, cc.right.start, cc.right.end);
+ }
+
+ if (!diff_chunk_context_empty(&cc))
+ output_unidiff_chunk(outinfo, dest, state, info, result,
+ true, show_function_prototypes, &cc);
+ diff_output_unidiff_state_free(state);
+ return DIFF_RC_OK;
+}