diff options
Diffstat (limited to 'usr.bin/diff')
47 files changed, 5276 insertions, 0 deletions
diff --git a/usr.bin/diff/Makefile b/usr.bin/diff/Makefile new file mode 100644 index 000000000000..20eaaf8e1dff --- /dev/null +++ b/usr.bin/diff/Makefile @@ -0,0 +1,12 @@ +.include <src.opts.mk> + +PROG= diff +SRCS= diff.c diffdir.c diffreg.c xmalloc.c pr.c diffreg_new.c + +LIBADD= m diff +CFLAGS+= -I${.CURDIR} -I${SRCTOP}/contrib/libdiff/lib -I${SRCTOP}/contrib/libdiff/include + +HAS_TESTS= +SUBDIR.${MK_TESTS}+= tests + +.include <bsd.prog.mk> diff --git a/usr.bin/diff/TODO b/usr.bin/diff/TODO new file mode 100644 index 000000000000..c958ee2c2826 --- /dev/null +++ b/usr.bin/diff/TODO @@ -0,0 +1,9 @@ +to be implemented: +--horizon-lines +--ignore-tab-expansion +--line-format + +Will probably be not implemented: +--GTYPE-group-format (partially implement - minimal) +--LTYPE-line-format +--help (We have a manpage already) diff --git a/usr.bin/diff/diff.1 b/usr.bin/diff/diff.1 new file mode 100644 index 000000000000..47f9c11eb3db --- /dev/null +++ b/usr.bin/diff/diff.1 @@ -0,0 +1,825 @@ +.\" +.\" SPDX-License-Identifier: BSD-3-Clause +.\" +.\" $OpenBSD: diff.1,v 1.47 2015/11/24 19:35:41 jmc Exp $ +.\" +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January 7, 2025 +.Dt DIFF 1 +.Os +.Sh NAME +.Nm diff +.Nd differential file and directory comparator +.Sh SYNOPSIS +.Nm diff +.Op Fl aBbdipTtw +.Oo +.Fl c | e | f | +.Fl n | q | u | y +.Oc +.Op Fl A Ar algo | Fl -algorithm Ar algo +.Op Fl -brief +.Op Fl -color Ns = Ns Ar when +.Op Fl -changed-group-format Ar GFMT +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -no-dereference +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -rcs +.Op Fl -show-c-function +.Op Fl -starting-file +.Op Fl -speed-large-files +.Op Fl -strip-trailing-cr +.Op Fl -tabsize Ar number +.Op Fl -text +.Op Fl -unified +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl F Ar pattern | Fl -show-function-line Ar pattern +.Op Fl L Ar label | Fl -label Ar label +.Ar file1 file2 +.Nm diff +.Op Fl aBbdilpTtw +.Op Fl A Ar algo | Fl -algorithm Ar algo +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl F Ar pattern | Fl -show-function-line Ar pattern +.Op Fl L Ar label | Fl -label Ar label +.Op Fl -brief +.Op Fl -color Ns = Ns Ar when +.Op Fl -changed-group-format Ar GFMT +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -no-dereference +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -paginate +.Op Fl -rcs +.Op Fl -show-c-function +.Op Fl -speed-large-files +.Op Fl -starting-file +.Op Fl -strip-trailing-cr +.Op Fl -tabsize Ar number +.Op Fl -text +.Fl C Ar number | Fl -context Ar number +.Ar file1 file2 +.Nm diff +.Op Fl aBbdiltw +.Op Fl A Ar algo | Fl -algorithm Ar algo +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl -brief +.Op Fl -color Ns = Ns Ar when +.Op Fl -changed-group-format Ar GFMT +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -no-dereference +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -paginate +.Op Fl -rcs +.Op Fl -show-c-function +.Op Fl -speed-large-files +.Op Fl -starting-file +.Op Fl -strip-trailing-cr +.Op Fl -tabsize Ar number +.Op Fl -text +.Fl D Ar string | Fl -ifdef Ar string +.Ar file1 file2 +.Nm diff +.Op Fl aBbdilpTtw +.Op Fl A Ar algo | Fl -algorithm Ar algo +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl F Ar pattern | Fl -show-function-line Ar pattern +.Op Fl L Ar label | Fl -label Ar label +.Op Fl -brief +.Op Fl -color Ns = Ns Ar when +.Op Fl -changed-group-format Ar GFMT +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -no-dereference +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -paginate +.Op Fl -rcs +.Op Fl -show-c-function +.Op Fl -speed-large-files +.Op Fl -starting-file +.Op Fl -strip-trailing-cr +.Op Fl -tabsize Ar number +.Op Fl -text +.Fl U Ar number | Fl -unified Ar number +.Ar file1 file2 +.Nm diff +.Op Fl aBbdilNPprsTtw +.Oo +.Fl c | e | f | +.Fl n | q | u +.Oc +.Op Fl A Ar algo | Fl -algorithm Ar algo +.Op Fl -brief +.Op Fl -color Ns = Ns Ar when +.Op Fl -changed-group-format Ar GFMT +.Op Fl -context +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -new-file +.Op Fl -no-dereference +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -paginate +.Op Fl -rcs +.Op Fl -recursive +.Op Fl -report-identical-files +.Op Fl -show-c-function +.Op Fl -speed-large-files +.Op Fl -strip-trailing-cr +.Op Fl -tabsize Ar number +.Op Fl -text +.Op Fl -unidirectional-new-file +.Op Fl -unified +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl F Ar pattern | Fl -show-function-line Ar pattern +.Bk -words +.Op Fl L Ar label | Fl -label Ar label +.Op Fl S Ar name | Fl -starting-file Ar name +.Op Fl X Ar file | Fl -exclude-from Ar file +.Op Fl x Ar pattern | Fl -exclude Ar pattern +.Ek +.Ar dir1 dir2 +.Nm diff +.Op Fl aBbditwW +.Op Fl -color Ns = Ns Ar when +.Op Fl -expand-tabs +.Op Fl -ignore-all-space +.Op Fl -ignore-blank-lines +.Op Fl -ignore-case +.Op Fl -minimal +.Op Fl -no-dereference +.Op Fl -no-ignore-file-name-case +.Op Fl -strip-trailing-cr +.Op Fl -suppress-common-lines +.Op Fl -tabsize Ar number +.Op Fl -text +.Op Fl -width +.Fl y | Fl -side-by-side +.Ar file1 file2 +.Nm diff +.Op Fl -help +.Op Fl -version +.Sh DESCRIPTION +The +.Nm +utility compares the contents of +.Ar file1 +and +.Ar file2 +and writes to the standard output the list of changes necessary to +convert one file into the other. +No output is produced if the files are identical. +.Pp +Output options (mutually exclusive): +.Bl -tag -width Ds +.It Fl C Ar number Fl -context Ar number +Like +.Fl c +but produces a diff with +.Ar number +lines of context. +.It Fl c +Produces a diff with 3 lines of context. +With +.Fl c +the output format is modified slightly: +the output begins with identification of the files involved and +their creation dates and then each change is separated +by a line with fifteen +.Li * Ns 's . +The lines removed from +.Ar file1 +are marked with +.Sq \&-\ \& ; +those added to +.Ar file2 +are marked +.Sq +\ \& . +Lines which are changed from one file to the other are marked in +both files with +.Sq !\ \& . +Changes which lie within 3 lines of each other are grouped together on +output. +.It Fl D Ar string Fl -ifdef Ar string +Creates a merged version of +.Ar file1 +and +.Ar file2 +on the standard output, with C preprocessor controls included so that +a compilation of the result without defining +.Ar string +is equivalent to compiling +.Ar file1 , +while defining +.Ar string +will yield +.Ar file2 . +.It Fl e -ed +Produces output in a form suitable as input for the editor utility, +.Xr ed 1 , +which can then be used to convert file1 into file2. +.Pp +Extra commands are added to the output when comparing directories with +.Fl e , +so that the result is a +.Xr sh 1 +script for converting text files which are common to the two directories +from their state in +.Ar dir1 +to their state in +.Ar dir2 . +Note that when comparing directories with +.Fl e , +the resulting file may no longer be interpreted as an +.Xr ed 1 +script. +Output is added to indicate which file each set of +.Xr ed 1 +commands applies to. +These hunks can be manually extracted to produce an +.Xr ed 1 +script, which can also be applied with +.Xr patch 1 . +.It Fl f -forward-ed +Identical output to that of the +.Fl e +flag, but in reverse order. +It cannot be digested by +.Xr ed 1 . +.It Fl -help +This option prints a summary to stdout and exits with status 0. +.It Fl n +Produces a script similar to that of +.Fl e , +but in the opposite order and with a count of changed lines on each +insert or delete command. +This is the form used by rcsdiff. +.It Fl q -brief +Just print a line when the files differ. +Does not output a list of changes. +.It Fl U Ar number Fl -unified Ar number +Like +.Fl u +but produces a diff with +.Ar number +lines of context. +.It Fl u +Produces a +.Em unified +diff with 3 lines of context. +A unified diff is similar to the context diff produced by the +.Fl c +option. +However, unlike with +.Fl c , +all lines to be changed (added and/or removed) are present in +a single section. +.It Fl -version +This option prints a version string to stdout and exits with status 0. +.It Fl y Fl -side-by-side +Output in two columns with a marker between them. +The marker can be one +of the following: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It space +Corresponding lines are identical. +.It '|' +Corresponding lines are different. +.It '<' +Files differ and only the first file contains the line. +.It '>' +Files differ and only the second file contains the line. +.El +.El +.Pp +Comparison options: +.Bl -tag -width Ds +.It Fl A Ar algo , Fl -algorithm Ar algo +Configure the algorithm used when comparing files. +.Nm +supports 3 algorithms: +.Pp +.Bl -tag -width Ds -compact +.It Cm myers +The Myers diff algorithm finds the shortest edit which transforms one +input into the other. +It generally runs in O(N+D\(S2) time, requiring O(N) space, where N is +the sum of the lengths of the inputs and D is the length of the +difference between them, with a theoretical O(N\(pcD) worst case. +If it encounters worst-case input, the implementation used by +.Nm +falls back to a less optimal but faster algorithm. +.It Cm patience +The Patience variant of the Myers algorithm attempts to create more +aesthetically pleasing diff output by logically grouping lines. +.It Cm stone +The Stone algorithm (commonly known as Hunt-McIlroy or Hunt-Szymanski) +looks for the longest common subsequence between compared files. +Stone encounters worst case performance when there are long common +subsequences. +In large files this can lead to a significant performance impact. +The Stone algorithm is maintained for compatibility. +.El +.Pp +The +.Nm +utility defaults to the Myers algorithm, but will fall back to the +Stone algorithm if the input or output options are not supported by +the Myers implementation. +.It Fl a -text +Treat all files as ASCII text. +Normally +.Nm +will simply print +.Dq Binary files ... differ +if files contain binary characters. +Use of this option forces +.Nm +to produce a diff. +.It Fl B Fl -ignore-blank-lines +Causes chunks that include only blank lines to be ignored. +.It Fl b -ignore-space-change +Causes trailing blanks (spaces and tabs) to be ignored, and other +strings of blanks to compare equal. +.It Fl -color= Ns Oo Ar when Oc +Color the additions green, and removals red, or the value in the +.Ev DIFFCOLORS +environment variable. +The possible values of +.Ar when +are +.Dq Cm never , +.Dq Cm always +and +.Dq Cm auto . +.Cm auto +will use color if the output is a tty and the +.Ev COLORTERM +environment variable is set to a non-empty string. +.It Fl d -minimal +Try very hard to produce a diff as small as possible. +This may consume a lot of processing power and memory when processing +large files with many changes. +.It Fl F Ar pattern , Fl -show-function-line Ar pattern +Like +.Fl p , +but display the last line that matches provided pattern. +.It Fl I Ar pattern Fl -ignore-matching-lines Ar pattern +Ignores changes, insertions, and deletions whose lines match the +extended regular expression +.Ar pattern . +Multiple +.Fl I +patterns may be specified. +All lines in the change must match some pattern for the change to be +ignored. +See +.Xr re_format 7 +for more information on regular expression patterns. +.It Fl i -ignore-case +Ignores the case of letters. +E.g., +.Dq A +will compare equal to +.Dq a . +.It Fl l -paginate +Pass the output through +.Xr pr 1 +to paginate it. +.It Fl L Ar label Fl -label Ar label +Print +.Ar label +instead of the first (and second, if this option is specified twice) +file name and time in the context or unified diff header. +.It Fl p -show-c-function +With unified and context diffs, show with each change +the first 40 characters of the last line before the context beginning +with a letter, an underscore or a dollar sign. +For C and Objective-C source code following standard layout conventions, this +will show the prototype of the function the change applies to. +.It Fl T -initial-tab +Print a tab rather than a space before the rest of the line for the +normal, context or unified output formats. +This makes the alignment of tabs in the line consistent. +.It Fl t -expand-tabs +Will expand tabs in output lines. +Normal or +.Fl c +output adds character(s) to the front of each line which may screw up +the indentation of the original source lines and make the output listing +difficult to interpret. +This option will preserve the original source's indentation. +.It Fl w -ignore-all-space +Is similar to +.Fl b -ignore-space-change +but causes whitespace (blanks and tabs) to be totally ignored. +E.g., +.Dq if (\ \&a == b \&) +will compare equal to +.Dq if(a==b) . +.It Fl W Ar number Fl -width Ar number +Output at most +.Ar number +columns when using side by side format. +The default value is 130. +Note that unless +.Fl t +was specified, +.Nm +will always align the second column to a tab stop, so values of +.Fl -width +smaller than approximately five times the value of +.Fl -tabsize +may yield surprising results. +.It Fl -changed-group-format Ar GFMT +Format input groups in the provided +.Pp +the format is a string with special keywords: +.Bl -tag -width %< +.It %< +lines from FILE1 +.It %< +lines from FILE2 +.El +.It Fl -ignore-file-name-case +ignore case when comparing file names +.It Fl -no-dereference +do not follow symbolic links +.It Fl -no-ignore-file-name-case +do not ignore case when comparing file names (default) +.It Fl -normal +default diff output +.It Fl -speed-large-files +stub option for compatibility with GNU diff +.It Fl -strip-trailing-cr +strip carriage return on input files +.It Fl -suppress-common-lines +Do not output common lines when using the side by side format +.It Fl -tabsize Ar number +Number of spaces representing a tab (default 8) +.El +.Pp +Directory comparison options: +.Bl -tag -width Ds +.It Fl N -new-file +If a file is found in only one directory, act as if it was found in the +other directory too but was of zero size. +.It Fl P -unidirectional-new-file +If a file is found only in +.Ar dir2 , +act as if it was found in +.Ar dir1 +too but was of zero size. +.It Fl r -recursive +Causes application of +.Nm +recursively to common subdirectories encountered. +.It Fl S Ar name Fl -starting-file Ar name +Re-starts a directory +.Nm +in the middle, beginning with file +.Ar name . +.It Fl s -report-identical-files +Causes +.Nm +to report files which are the same, which are otherwise not mentioned. +.It Fl X Ar file Fl -exclude-from Ar file +Exclude files and subdirectories from comparison whose basenames match +lines in +.Ar file . +Multiple +.Fl X +options may be specified. +.It Fl x Ar pattern Fl -exclude Ar pattern +Exclude files and subdirectories from comparison whose basenames match +.Ar pattern . +Patterns are matched using shell-style globbing via +.Xr fnmatch 3 . +Multiple +.Fl x +options may be specified. +.El +.Pp +If both arguments are directories, +.Nm +sorts the contents of the directories by name, and then runs the +regular file +.Nm +algorithm, producing a change list, +on text files which are different. +Binary files which differ, +common subdirectories, and files which appear in only one directory +are described as such. +In directory mode only regular files and directories are compared. +If a non-regular file such as a device special file or FIFO is encountered, +a diagnostic message is printed. +.Pp +If only one of +.Ar file1 +and +.Ar file2 +is a directory, +.Nm +is applied to the non-directory file and the file contained in +the directory file with a filename that is the same as the +last component of the non-directory file. +.Pp +If either +.Ar file1 +or +.Ar file2 +is +.Sq - , +the standard input is +used in its place. +.Ss Output Style +The default (without +.Fl e , +.Fl c , +or +.Fl n -rcs +.\" -C +options) +output contains lines of these forms, where +.Va XX , YY , ZZ , QQ +are line numbers respective of file order. +.Pp +.Bl -tag -width "XX,YYcZZ,QQ" -compact +.It Li XX Ns Ic a Ns Li YY +At (the end of) line +.Va XX +of +.Ar file1 , +append the contents +of line +.Va YY +of +.Ar file2 +to make them equal. +.It Li XX Ns Ic a Ns Li YY,ZZ +Same as above, but append the range of lines, +.Va YY +through +.Va ZZ +of +.Ar file2 +to line +.Va XX +of file1. +.It Li XX Ns Ic d Ns Li YY +At line +.Va XX +delete +the line. +The value +.Va YY +tells to which line the change would bring +.Ar file1 +in line with +.Ar file2 . +.It Li XX,YY Ns Ic d Ns Li ZZ +Delete the range of lines +.Va XX +through +.Va YY +in +.Ar file1 . +.It Li XX Ns Ic c Ns Li YY +Change the line +.Va XX +in +.Ar file1 +to the line +.Va YY +in +.Ar file2 . +.It Li XX,YY Ns Ic c Ns Li ZZ +Replace the range of specified lines with the line +.Va ZZ . +.It Li XX,YY Ns Ic c Ns Li ZZ,QQ +Replace the range +.Va XX , Ns Va YY +from +.Ar file1 +with the range +.Va ZZ , Ns Va QQ +from +.Ar file2 . +.El +.Pp +These lines resemble +.Xr ed 1 +subcommands to convert +.Ar file1 +into +.Ar file2 . +The line numbers before the action letters pertain to +.Ar file1 ; +those after pertain to +.Ar file2 . +Thus, by exchanging +.Ic a +for +.Ic d +and reading the line in reverse order, one can also +determine how to convert +.Ar file2 +into +.Ar file1 . +As in +.Xr ed 1 , +identical +pairs (where num1 = num2) are abbreviated as a single +number. +.Sh ENVIRONMENT +.Bl -tag -width DIFFCOLORS +.It Ev DIFFCOLORS +The value of this variable is the form +.Ar add : Ns Ar rm , +where +.Ar add +is the ASCII escape sequence for additions and +.Ar rm +is the ASCII escape sequence for deletions. +If this is unset, +.Nm +uses green for additions and red for removals. +.El +.Sh FILES +.Bl -tag -width /tmp/diff.XXXXXXXX -compact +.It Pa /tmp/diff.XXXXXXXX +Temporary file used when comparing a device or the standard input. +Note that the temporary file is unlinked as soon as it is created +so it will not show up in a directory listing. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +No differences were found. +.It 1 +Differences were found. +.It >1 +An error occurred. +.El +.Pp +The +.Fl -help +and +.Fl -version +options exit with a status of 0. +.Sh EXAMPLES +Compare +.Pa old_dir +and +.Pa new_dir +recursively generating an unified diff and treating files found only in one +of those directories as new files: +.Bd -literal -offset indent +$ diff -ruN /path/to/old_dir /path/to/new_dir +.Ed +.Pp +Same as above but excluding files matching the expressions +.Dq *.h +and +.Dq *.c : +.Bd -literal -offset indent +$ diff -ruN -x '*.h' -x '*.c' /path/to/old_dir /path/to/new_dir +.Ed +.Pp +Show a single line indicating if the files differ: +.Bd -literal -offset indent +$ diff -q /boot/loader.conf /boot/defaults/loader.conf +Files /boot/loader.conf and /boot/defaults/loader.conf differ +.Ed +.Pp +Assuming a file named +.Pa example.txt +with the following contents: +.Bd -literal -offset indent +FreeBSD is an operating system +Linux is a kernel +OpenBSD is an operating system +.Ed +.Pp +Compare stdin with +.Pa example.txt +excluding from the comparison those lines containing either +.Qq Linux +or +.Qq Open : +.Bd -literal -offset indent +$ echo "FreeBSD is an operating system" | diff -q -I 'Linux|Open' example.txt - +.Ed +.Sh SEE ALSO +.Xr cmp 1 , +.Xr comm 1 , +.Xr diff3 1 , +.Xr ed 1 , +.Xr patch 1 , +.Xr pr 1 , +.Xr sdiff 1 +.Rs +.%A James W. Hunt +.%A M. Douglas McIlroy +.%T "An Algorithm for Differential File Comparison" +.%J Computing Science Technical Report +.%Q Bell Laboratories 41 +.%D June 1976 +.Re +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl AaDdIiLlNnPpqSsTtwXxy +are extensions to that specification. +.Sh HISTORY +A +.Nm +command appeared in +.At v6 . +The +.Nm +implementation used in +.Fx +was GNU diff until +.Fx 11.4 . +This was replaced in +.Fx 12.0 +by a BSD-licensed implementation written by +.An Todd Miller . +Some GNUisms were lost in the process. +.Pp +libdiff was imported from the Game of Trees version control system and default +algorithm was changed to Myers for +.Fx 15 . diff --git a/usr.bin/diff/diff.c b/usr.bin/diff/diff.c new file mode 100644 index 000000000000..83aa20c52cf3 --- /dev/null +++ b/usr.bin/diff/diff.c @@ -0,0 +1,689 @@ +/* $OpenBSD: diff.c,v 1.67 2019/06/28 13:35:00 deraadt Exp $ */ + +/* + * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "diff.h" +#include "xmalloc.h" + +static const char diff_version[] = "FreeBSD diff 20240307"; +bool lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag; +bool ignore_file_case, suppress_common, color, noderef; +static bool help = false; +int diff_format, diff_context, diff_algorithm, status; +bool diff_algorithm_set; +int tabsize = 8, width = 130; +static int colorflag = COLORFLAG_NEVER; +char *start, *ifdefname, *diffargs, *label[2]; +char *ignore_pats, *most_recent_pat; +char *group_format = NULL; +const char *add_code, *del_code; +struct stat stb1, stb2; +struct excludes *excludes_list; +regex_t ignore_re, most_recent_re; + +static struct algorithm { + const char *name; + int id; +} algorithms[] = { + {"stone", D_DIFFSTONE}, + {"myers", D_DIFFMYERS}, + {"patience", D_DIFFPATIENCE}, + {NULL, D_DIFFNONE} +}; + +#define OPTIONS "0123456789A:aBbC:cdD:efF:HhI:iL:lnNPpqrS:sTtU:uwW:X:x:y" +enum { + OPT_TSIZE = CHAR_MAX + 1, + OPT_STRIPCR, + OPT_IGN_FN_CASE, + OPT_NO_IGN_FN_CASE, + OPT_NORMAL, + OPT_HELP, + OPT_HORIZON_LINES, + OPT_CHANGED_GROUP_FORMAT, + OPT_SUPPRESS_COMMON, + OPT_COLOR, + OPT_NO_DEREFERENCE, + OPT_VERSION, +}; + +static struct option longopts[] = { + { "algorithm", required_argument, 0, 'A' }, + { "text", no_argument, 0, 'a' }, + { "ignore-space-change", no_argument, 0, 'b' }, + { "context", optional_argument, 0, 'C' }, + { "ifdef", required_argument, 0, 'D' }, + { "minimal", no_argument, 0, 'd' }, + { "ed", no_argument, 0, 'e' }, + { "forward-ed", no_argument, 0, 'f' }, + { "show-function-line", required_argument, 0, 'F' }, + { "speed-large-files", no_argument, NULL, 'H' }, + { "ignore-blank-lines", no_argument, 0, 'B' }, + { "ignore-matching-lines", required_argument, 0, 'I' }, + { "ignore-case", no_argument, 0, 'i' }, + { "paginate", no_argument, NULL, 'l' }, + { "label", required_argument, 0, 'L' }, + { "new-file", no_argument, 0, 'N' }, + { "rcs", no_argument, 0, 'n' }, + { "unidirectional-new-file", no_argument, 0, 'P' }, + { "show-c-function", no_argument, 0, 'p' }, + { "brief", no_argument, 0, 'q' }, + { "recursive", no_argument, 0, 'r' }, + { "report-identical-files", no_argument, 0, 's' }, + { "starting-file", required_argument, 0, 'S' }, + { "expand-tabs", no_argument, 0, 't' }, + { "initial-tab", no_argument, 0, 'T' }, + { "unified", optional_argument, 0, 'U' }, + { "ignore-all-space", no_argument, 0, 'w' }, + { "width", required_argument, 0, 'W' }, + { "exclude", required_argument, 0, 'x' }, + { "exclude-from", required_argument, 0, 'X' }, + { "side-by-side", no_argument, NULL, 'y' }, + { "ignore-file-name-case", no_argument, NULL, OPT_IGN_FN_CASE }, + { "help", no_argument, NULL, OPT_HELP}, + { "horizon-lines", required_argument, NULL, OPT_HORIZON_LINES }, + { "no-dereference", no_argument, NULL, OPT_NO_DEREFERENCE}, + { "no-ignore-file-name-case", no_argument, NULL, OPT_NO_IGN_FN_CASE }, + { "normal", no_argument, NULL, OPT_NORMAL }, + { "strip-trailing-cr", no_argument, NULL, OPT_STRIPCR }, + { "tabsize", required_argument, NULL, OPT_TSIZE }, + { "changed-group-format", required_argument, NULL, OPT_CHANGED_GROUP_FORMAT}, + { "suppress-common-lines", no_argument, NULL, OPT_SUPPRESS_COMMON }, + { "color", optional_argument, NULL, OPT_COLOR }, + { "version", no_argument, NULL, OPT_VERSION}, + { NULL, 0, 0, '\0'} +}; + +static void checked_regcomp(char const *, regex_t *); +static void usage(void) __dead2; +static void conflicting_format(void) __dead2; +static void push_excludes(char *); +static void push_ignore_pats(char *); +static void read_excludes_file(char *file); +static void set_argstr(char **, char **); +static char *splice(char *, char *); +static bool do_color(void); + +int +main(int argc, char **argv) +{ + const char *errstr = NULL; + char *ep, **oargv; + long l; + int ch, dflags, lastch, gotstdin, prevoptind, newarg; + + oargv = argv; + gotstdin = 0; + dflags = 0; + lastch = '\0'; + prevoptind = 1; + newarg = 1; + diff_context = 3; + diff_format = D_UNSET; + diff_algorithm = D_DIFFMYERS; + diff_algorithm_set = false; +#define FORMAT_MISMATCHED(type) \ + (diff_format != D_UNSET && diff_format != (type)) + while ((ch = getopt_long(argc, argv, OPTIONS, longopts, NULL)) != -1) { + switch (ch) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (newarg) + usage(); /* disallow -[0-9]+ */ + else if (lastch == 'c' || lastch == 'u') + diff_context = 0; + else if (!isdigit(lastch) || diff_context > INT_MAX / 10) + usage(); + diff_context = (diff_context * 10) + (ch - '0'); + break; + case 'A': + diff_algorithm = D_DIFFNONE; + for (struct algorithm *a = algorithms; a->name;a++) { + if(strcasecmp(optarg, a->name) == 0) { + diff_algorithm = a->id; + diff_algorithm_set = true; + break; + } + } + + if (diff_algorithm == D_DIFFNONE) { + printf("unknown algorithm: %s\n", optarg); + usage(); + } + break; + case 'a': + dflags |= D_FORCEASCII; + break; + case 'b': + dflags |= D_FOLDBLANKS; + break; + case 'C': + case 'c': + if (FORMAT_MISMATCHED(D_CONTEXT)) + conflicting_format(); + cflag = true; + diff_format = D_CONTEXT; + if (optarg != NULL) { + l = strtol(optarg, &ep, 10); + if (*ep != '\0' || l < 0 || l >= INT_MAX) + usage(); + diff_context = (int)l; + } + break; + case 'd': + dflags |= D_MINIMAL; + break; + case 'D': + if (FORMAT_MISMATCHED(D_IFDEF)) + conflicting_format(); + diff_format = D_IFDEF; + ifdefname = optarg; + break; + case 'e': + if (FORMAT_MISMATCHED(D_EDIT)) + conflicting_format(); + diff_format = D_EDIT; + break; + case 'f': + if (FORMAT_MISMATCHED(D_REVERSE)) + conflicting_format(); + diff_format = D_REVERSE; + break; + case 'H': + /* ignore but needed for compatibility with GNU diff */ + break; + case 'h': + /* silently ignore for backwards compatibility */ + break; + case 'B': + dflags |= D_SKIPBLANKLINES; + break; + case 'F': + if (dflags & D_PROTOTYPE) + conflicting_format(); + dflags |= D_MATCHLAST; + most_recent_pat = xstrdup(optarg); + break; + case 'I': + push_ignore_pats(optarg); + break; + case 'i': + dflags |= D_IGNORECASE; + break; + case 'L': + if (label[0] == NULL) + label[0] = optarg; + else if (label[1] == NULL) + label[1] = optarg; + else + usage(); + break; + case 'l': + lflag = true; + break; + case 'N': + Nflag = true; + break; + case 'n': + if (FORMAT_MISMATCHED(D_NREVERSE)) + conflicting_format(); + diff_format = D_NREVERSE; + break; + case 'p': + if (dflags & D_MATCHLAST) + conflicting_format(); + dflags |= D_PROTOTYPE; + break; + case 'P': + Pflag = true; + break; + case 'r': + rflag = true; + break; + case 'q': + if (FORMAT_MISMATCHED(D_BRIEF)) + conflicting_format(); + diff_format = D_BRIEF; + break; + case 'S': + start = optarg; + break; + case 's': + sflag = true; + break; + case 'T': + Tflag = true; + break; + case 't': + dflags |= D_EXPANDTABS; + break; + case 'U': + case 'u': + if (FORMAT_MISMATCHED(D_UNIFIED)) + conflicting_format(); + diff_format = D_UNIFIED; + if (optarg != NULL) { + l = strtol(optarg, &ep, 10); + if (*ep != '\0' || l < 0 || l >= INT_MAX) + usage(); + diff_context = (int)l; + } + break; + case 'w': + dflags |= D_IGNOREBLANKS; + break; + case 'W': + width = (int) strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr) { + warnx("Invalid argument for width"); + usage(); + } + break; + case 'X': + read_excludes_file(optarg); + break; + case 'x': + push_excludes(optarg); + break; + case 'y': + if (FORMAT_MISMATCHED(D_SIDEBYSIDE)) + conflicting_format(); + diff_format = D_SIDEBYSIDE; + break; + case OPT_CHANGED_GROUP_FORMAT: + if (FORMAT_MISMATCHED(D_GFORMAT)) + conflicting_format(); + diff_format = D_GFORMAT; + group_format = optarg; + break; + case OPT_HELP: + help = true; + usage(); + break; + case OPT_HORIZON_LINES: + break; /* XXX TODO for compatibility with GNU diff3 */ + case OPT_IGN_FN_CASE: + ignore_file_case = true; + break; + case OPT_NO_IGN_FN_CASE: + ignore_file_case = false; + break; + case OPT_NORMAL: + if (FORMAT_MISMATCHED(D_NORMAL)) + conflicting_format(); + diff_format = D_NORMAL; + break; + case OPT_TSIZE: + tabsize = (int) strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr) { + warnx("Invalid argument for tabsize"); + usage(); + } + break; + case OPT_STRIPCR: + dflags |= D_STRIPCR; + break; + case OPT_SUPPRESS_COMMON: + suppress_common = 1; + break; + case OPT_COLOR: + if (optarg == NULL || strncmp(optarg, "auto", 4) == 0) + colorflag = COLORFLAG_AUTO; + else if (strncmp(optarg, "always", 6) == 0) + colorflag = COLORFLAG_ALWAYS; + else if (strncmp(optarg, "never", 5) == 0) + colorflag = COLORFLAG_NEVER; + else + errx(2, "unsupported --color value '%s' (must be always, auto, or never)", + optarg); + break; + case OPT_NO_DEREFERENCE: + noderef = true; + break; + case OPT_VERSION: + printf("%s\n", diff_version); + exit(0); + default: + usage(); + break; + } + lastch = ch; + newarg = optind != prevoptind; + prevoptind = optind; + } + if (diff_format == D_UNSET && (dflags & D_PROTOTYPE) != 0) + diff_format = D_CONTEXT; + if (diff_format == D_UNSET) + diff_format = D_NORMAL; + argc -= optind; + argv += optind; + + if (do_color()) { + char *p; + const char *env; + + color = true; + add_code = "32"; + del_code = "31"; + env = getenv("DIFFCOLORS"); + if (env != NULL && *env != '\0' && (p = strdup(env))) { + add_code = p; + strsep(&p, ":"); + if (p != NULL) + del_code = p; + } + } + +#ifdef __OpenBSD__ + if (pledge("stdio rpath tmppath", NULL) == -1) + err(2, "pledge"); +#endif + + /* + * Do sanity checks, fill in stb1 and stb2 and call the appropriate + * driver routine. Both drivers use the contents of stb1 and stb2. + */ + if (argc != 2) + usage(); + checked_regcomp(ignore_pats, &ignore_re); + checked_regcomp(most_recent_pat, &most_recent_re); + if (strcmp(argv[0], "-") == 0) { + fstat(STDIN_FILENO, &stb1); + gotstdin = 1; + } else if (stat(argv[0], &stb1) != 0) { + if (!Nflag || errno != ENOENT) + err(2, "%s", argv[0]); + dflags |= D_EMPTY1; + memset(&stb1, 0, sizeof(struct stat)); + } + + if (strcmp(argv[1], "-") == 0) { + fstat(STDIN_FILENO, &stb2); + gotstdin = 1; + } else if (stat(argv[1], &stb2) != 0) { + if (!Nflag || errno != ENOENT) + err(2, "%s", argv[1]); + dflags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + } + + if (dflags & D_EMPTY1 && dflags & D_EMPTY2){ + warn("%s", argv[0]); + warn("%s", argv[1]); + exit(2); + } + + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + + if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode))) + errx(2, "can't compare - to a directory"); + set_argstr(oargv, argv); + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + if (diff_format == D_IFDEF) + errx(2, "-D option not supported with directories"); + diffdir(argv[0], argv[1], dflags); + } else { + if (S_ISDIR(stb1.st_mode)) { + argv[0] = splice(argv[0], argv[1]); + if (stat(argv[0], &stb1) == -1) + err(2, "%s", argv[0]); + } + if (S_ISDIR(stb2.st_mode)) { + argv[1] = splice(argv[1], argv[0]); + if (stat(argv[1], &stb2) == -1) + err(2, "%s", argv[1]); + } + print_status(diffreg(argv[0], argv[1], dflags, 1), argv[0], + argv[1], ""); + } + if (fflush(stdout) != 0) + err(2, "stdout"); + exit(status); +} + +static void +checked_regcomp(char const *pattern, regex_t *comp) +{ + char buf[BUFSIZ]; + int error; + + if (pattern == NULL) + return; + + error = regcomp(comp, pattern, REG_NEWLINE | REG_EXTENDED); + if (error != 0) { + regerror(error, comp, buf, sizeof(buf)); + if (*pattern != '\0') + errx(2, "%s: %s", pattern, buf); + else + errx(2, "%s", buf); + } +} + +static void +set_argstr(char **av, char **ave) +{ + size_t argsize; + char **ap; + + argsize = 4 + *ave - *av + 1; + diffargs = xmalloc(argsize); + strlcpy(diffargs, "diff", argsize); + for (ap = av + 1; ap < ave; ap++) { + if (strcmp(*ap, "--") != 0) { + strlcat(diffargs, " ", argsize); + strlcat(diffargs, *ap, argsize); + } + } +} + +/* + * Read in an excludes file and push each line. + */ +static void +read_excludes_file(char *file) +{ + FILE *fp; + char *pattern = NULL; + size_t blen = 0; + ssize_t len; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) + err(2, "%s", file); + while ((len = getline(&pattern, &blen, fp)) >= 0) { + if ((len > 0) && (pattern[len - 1] == '\n')) + pattern[len - 1] = '\0'; + push_excludes(pattern); + /* we allocate a new string per line */ + pattern = NULL; + blen = 0; + } + free(pattern); + if (strcmp(file, "-") != 0) + fclose(fp); +} + +/* + * Push a pattern onto the excludes list. + */ +static void +push_excludes(char *pattern) +{ + struct excludes *entry; + + entry = xmalloc(sizeof(*entry)); + entry->pattern = pattern; + entry->next = excludes_list; + excludes_list = entry; +} + +static void +push_ignore_pats(char *pattern) +{ + size_t len; + + if (ignore_pats == NULL) + ignore_pats = xstrdup(pattern); + else { + /* old + "|" + new + NUL */ + len = strlen(ignore_pats) + strlen(pattern) + 2; + ignore_pats = xreallocarray(ignore_pats, 1, len); + strlcat(ignore_pats, "|", len); + strlcat(ignore_pats, pattern, len); + } +} + +void +print_status(int val, char *path1, char *path2, const char *entry) +{ + if (label[0] != NULL) + path1 = label[0]; + if (label[1] != NULL) + path2 = label[1]; + + switch (val) { + case D_BINARY: + printf("Binary files %s%s and %s%s differ\n", + path1, entry, path2, entry); + break; + case D_DIFFER: + if (diff_format == D_BRIEF) + printf("Files %s%s and %s%s differ\n", + path1, entry, path2, entry); + break; + case D_SAME: + if (sflag) + printf("Files %s%s and %s%s are identical\n", + path1, entry, path2, entry); + break; + case D_MISMATCH1: + printf("File %s%s is a directory while file %s%s is a regular file\n", + path1, entry, path2, entry); + break; + case D_MISMATCH2: + printf("File %s%s is a regular file while file %s%s is a directory\n", + path1, entry, path2, entry); + break; + case D_SKIPPED1: + printf("File %s%s is not a regular file or directory and was skipped\n", + path1, entry); + break; + case D_SKIPPED2: + printf("File %s%s is not a regular file or directory and was skipped\n", + path2, entry); + break; + case D_ERROR: + break; + } +} + +static void +usage(void) +{ + (void)fprintf(help ? stdout : stderr, + "usage: diff [-aBbdilpTtw] [-c | -e | -f | -n | -q | -u] [--ignore-case]\n" + " [--no-ignore-case] [--normal] [--strip-trailing-cr] [--tabsize]\n" + " [-I pattern] [-F pattern] [-L label] file1 file2\n" + " diff [-aBbdilpTtw] [-I pattern] [-L label] [--ignore-case]\n" + " [--no-ignore-case] [--normal] [--strip-trailing-cr] [--tabsize]\n" + " [-F pattern] -C number file1 file2\n" + " diff [-aBbdiltw] [-I pattern] [--ignore-case] [--no-ignore-case]\n" + " [--normal] [--strip-trailing-cr] [--tabsize] -D string file1 file2\n" + " diff [-aBbdilpTtw] [-I pattern] [-L label] [--ignore-case]\n" + " [--no-ignore-case] [--normal] [--tabsize] [--strip-trailing-cr]\n" + " [-F pattern] -U number file1 file2\n" + " diff [-aBbdilNPprsTtw] [-c | -e | -f | -n | -q | -u] [--ignore-case]\n" + " [--no-ignore-case] [--normal] [--tabsize] [-I pattern] [-L label]\n" + " [-F pattern] [-S name] [-X file] [-x pattern] dir1 dir2\n" + " diff [-aBbditwW] [--expand-tabs] [--ignore-all-space]\n" + " [--ignore-blank-lines] [--ignore-case] [--minimal]\n" + " [--no-ignore-file-name-case] [--strip-trailing-cr]\n" + " [--suppress-common-lines] [--tabsize] [--text] [--width]\n" + " -y | --side-by-side file1 file2\n" + " diff [--help] [--version]\n"); + + if (help) + exit(0); + else + exit(2); +} + +static void +conflicting_format(void) +{ + + fprintf(stderr, "error: conflicting output format options.\n"); + usage(); +} + +static bool +do_color(void) +{ + const char *p, *p2; + + switch (colorflag) { + case COLORFLAG_AUTO: + p = getenv("CLICOLOR"); + p2 = getenv("COLORTERM"); + if ((p != NULL && *p != '\0') || (p2 != NULL && *p2 != '\0')) + return isatty(STDOUT_FILENO); + break; + case COLORFLAG_ALWAYS: + return (true); + case COLORFLAG_NEVER: + return (false); + } + + return (false); +} + +static char * +splice(char *dir, char *path) +{ + char *tail, *buf; + size_t dirlen; + + dirlen = strlen(dir); + while (dirlen != 0 && dir[dirlen - 1] == '/') + dirlen--; + if ((tail = strrchr(path, '/')) == NULL) + tail = path; + else + tail++; + xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); + return (buf); +} diff --git a/usr.bin/diff/diff.h b/usr.bin/diff/diff.h new file mode 100644 index 000000000000..74be55db8a33 --- /dev/null +++ b/usr.bin/diff/diff.h @@ -0,0 +1,128 @@ +/* $OpenBSD: diff.h,v 1.34 2020/11/01 18:16:08 jcs Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +#include <stdbool.h> +#include <regex.h> + +/* + * Output format options + */ +#define D_NORMAL 0 /* Normal output */ +#define D_EDIT -1 /* Editor script out */ +#define D_REVERSE 1 /* Reverse editor script */ +#define D_CONTEXT 2 /* Diff with context */ +#define D_UNIFIED 3 /* Unified context diff */ +#define D_IFDEF 4 /* Diff with merged #ifdef's */ +#define D_NREVERSE 5 /* Reverse ed script with numbered + lines and no trailing . */ +#define D_BRIEF 6 /* Say if the files differ */ +#define D_GFORMAT 7 /* Diff with defined changed group format */ +#define D_SIDEBYSIDE 8 /* Side by side */ + +#define D_UNSET -2 + +/* + * Algorithms + */ + +#define D_DIFFNONE 0 +#define D_DIFFSTONE 1 /* Stone or 'old diff' algorithm */ +#define D_DIFFMYERS 2 /* Myers diff algorithm */ +#define D_DIFFPATIENCE 3 /* Patience diff algorithm */ + +/* + * Output flags + */ +#define D_HEADER 0x001 /* Print a header/footer between files */ +#define D_EMPTY1 0x002 /* Treat first file as empty (/dev/null) */ +#define D_EMPTY2 0x004 /* Treat second file as empty (/dev/null) */ + +/* + * Command line flags + */ +#define D_FORCEASCII 0x008 /* Treat file as ascii regardless of content */ +#define D_FOLDBLANKS 0x010 /* Treat all white space as equal */ +#define D_MINIMAL 0x020 /* Make diff as small as possible */ +#define D_IGNORECASE 0x040 /* Case-insensitive matching */ +#define D_PROTOTYPE 0x080 /* Display C function prototype */ +#define D_EXPANDTABS 0x100 /* Expand tabs to spaces */ +#define D_IGNOREBLANKS 0x200 /* Ignore white space changes */ +#define D_STRIPCR 0x400 /* Strip trailing cr */ +#define D_SKIPBLANKLINES 0x800 /* Skip blank lines */ +#define D_MATCHLAST 0x1000 /* Display last line matching provided regex */ + +/* Features supported by new algorithms */ +#define D_NEWALGO_FLAGS (D_FORCEASCII | D_PROTOTYPE | D_IGNOREBLANKS) + +/* + * Status values for print_status() and diffreg() return values + */ +#define D_SAME 0 /* Files are the same */ +#define D_DIFFER 1 /* Files are different */ +#define D_BINARY 2 /* Binary files are different */ +#define D_MISMATCH1 3 /* path1 was a dir, path2 a file */ +#define D_MISMATCH2 4 /* path1 was a file, path2 a dir */ +#define D_SKIPPED1 5 /* path1 was a special file */ +#define D_SKIPPED2 6 /* path2 was a special file */ +#define D_ERROR 7 /* A file access error occurred */ + +/* + * Color options + */ +#define COLORFLAG_NEVER 0 +#define COLORFLAG_AUTO 1 +#define COLORFLAG_ALWAYS 2 + +struct excludes { + char *pattern; + struct excludes *next; +}; + +extern bool lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag; +extern bool ignore_file_case, suppress_common, color, noderef, algorithm_set; +extern int diff_format, diff_context, diff_algorithm, status; +extern bool diff_algorithm_set; +extern int tabsize, width; +extern char *start, *ifdefname, *diffargs, *label[2]; +extern char *ignore_pats, *most_recent_pat; +extern char *group_format; +extern const char *add_code, *del_code; +extern struct stat stb1, stb2; +extern struct excludes *excludes_list; +extern regex_t ignore_re, most_recent_re; + +int diffreg(char *, char *, int, int); +int diffreg_new(char *, char *, int, int); +bool can_libdiff(int); +void diffdir(char *, char *, int); +void print_status(int, char *, char *, const char *); diff --git a/usr.bin/diff/diffdir.c b/usr.bin/diff/diffdir.c new file mode 100644 index 000000000000..a55a2bec70ee --- /dev/null +++ b/usr.bin/diff/diffdir.c @@ -0,0 +1,379 @@ +/* $OpenBSD: diffdir.c,v 1.45 2015/10/05 20:15:00 millert Exp $ */ + +/* + * Copyright (c) 2003, 2010 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <sys/stat.h> +#include <sys/tree.h> + +#include <dirent.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "diff.h" + +static int selectfile(const struct dirent *); +static void diffit(struct dirent *, char *, size_t, struct dirent *, + char *, size_t, int); +static void print_only(const char *, size_t, const char *); + +#define d_status d_type /* we need to store status for -l */ + +struct inode { + dev_t dev; + ino_t ino; + RB_ENTRY(inode) entry; +}; + +static int +inodecmp(struct inode *a, struct inode *b) +{ + return (a->dev < b->dev ? -1 : a->dev > b->dev ? 1 : + a->ino < b->ino ? -1 : a->ino > b->ino ? 1 : 0); +} + +RB_HEAD(inodetree, inode); +static struct inodetree v1 = RB_INITIALIZER(&v1); +static struct inodetree v2 = RB_INITIALIZER(&v2); +RB_GENERATE_STATIC(inodetree, inode, entry, inodecmp); + +static int +vscandir(struct inodetree *tree, const char *path, struct dirent ***dirp, + int (*selectf)(const struct dirent *), + int (*comparf)(const struct dirent **, const struct dirent **)) +{ + struct stat sb; + struct inode *ino = NULL; + int fd = -1, ret, serrno; + + if ((fd = open(path, O_DIRECTORY | O_RDONLY)) < 0 || + (ino = calloc(1, sizeof(*ino))) == NULL || + fstat(fd, &sb) != 0) + goto fail; + ino->dev = sb.st_dev; + ino->ino = sb.st_ino; + if (RB_FIND(inodetree, tree, ino)) { + free(ino); + close(fd); + warnx("%s: Directory loop detected", path); + *dirp = NULL; + return (0); + } + if ((ret = fdscandir(fd, dirp, selectf, comparf)) < 0) + goto fail; + RB_INSERT(inodetree, tree, ino); + close(fd); + return (ret); +fail: + serrno = errno; + if (ino != NULL) + free(ino); + if (fd >= 0) + close(fd); + errno = serrno; + return (-1); +} + +/* + * Diff directory traversal. Will be called recursively if -r was specified. + */ +void +diffdir(char *p1, char *p2, int flags) +{ + struct dirent *dent1, **dp1, **edp1, **dirp1 = NULL; + struct dirent *dent2, **dp2, **edp2, **dirp2 = NULL; + size_t dirlen1, dirlen2; + char path1[PATH_MAX], path2[PATH_MAX]; + int pos; + + edp1 = edp2 = NULL; + + dirlen1 = strlcpy(path1, *p1 ? p1 : ".", sizeof(path1)); + if (dirlen1 >= sizeof(path1) - 1) { + warnc(ENAMETOOLONG, "%s", p1); + status |= 2; + return; + } + while (dirlen1 > 1 && path1[dirlen1 - 1] == '/') + path1[--dirlen1] = '\0'; + dirlen2 = strlcpy(path2, *p2 ? p2 : ".", sizeof(path2)); + if (dirlen2 >= sizeof(path2) - 1) { + warnc(ENAMETOOLONG, "%s", p2); + status |= 2; + return; + } + while (dirlen2 > 1 && path2[dirlen2 - 1] == '/') + path2[--dirlen2] = '\0'; + + /* + * Get a list of entries in each directory, skipping "excluded" files + * and sorting alphabetically. + */ + pos = vscandir(&v1, path1, &dirp1, selectfile, alphasort); + if (pos == -1) { + if (errno == ENOENT && (Nflag || Pflag)) { + pos = 0; + } else { + warn("%s", path1); + goto closem; + } + } + dp1 = dirp1; + edp1 = dirp1 + pos; + + pos = vscandir(&v2, path2, &dirp2, selectfile, alphasort); + if (pos == -1) { + if (errno == ENOENT && Nflag) { + pos = 0; + } else { + warn("%s", path2); + goto closem; + } + } + dp2 = dirp2; + edp2 = dirp2 + pos; + + /* + * If we were given a starting point, find it. + */ + if (start != NULL) { + while (dp1 != edp1 && strcmp((*dp1)->d_name, start) < 0) + dp1++; + while (dp2 != edp2 && strcmp((*dp2)->d_name, start) < 0) + dp2++; + } + + /* + * Append separator so children's names can be appended directly. + */ + if (path1[dirlen1 - 1] != '/') { + path1[dirlen1++] = '/'; + path1[dirlen1] = '\0'; + } + if (path2[dirlen2 - 1] != '/') { + path2[dirlen2++] = '/'; + path2[dirlen2] = '\0'; + } + + /* + * Iterate through the two directory lists, diffing as we go. + */ + while (dp1 != edp1 || dp2 != edp2) { + dent1 = dp1 != edp1 ? *dp1 : NULL; + dent2 = dp2 != edp2 ? *dp2 : NULL; + + pos = dent1 == NULL ? 1 : dent2 == NULL ? -1 : + ignore_file_case ? strcasecmp(dent1->d_name, dent2->d_name) : + strcmp(dent1->d_name, dent2->d_name) ; + if (pos == 0) { + /* file exists in both dirs, diff it */ + diffit(dent1, path1, dirlen1, dent2, path2, dirlen2, flags); + dp1++; + dp2++; + } else if (pos < 0) { + /* file only in first dir, only diff if -N */ + if (Nflag) { + diffit(dent1, path1, dirlen1, dent2, path2, + dirlen2, flags); + } else { + print_only(path1, dirlen1, dent1->d_name); + status |= 1; + } + dp1++; + } else { + /* file only in second dir, only diff if -N or -P */ + if (Nflag || Pflag) + diffit(dent2, path1, dirlen1, dent1, path2, + dirlen2, flags); + else { + print_only(path2, dirlen2, dent2->d_name); + status |= 1; + } + dp2++; + } + } + +closem: + if (dirp1 != NULL) { + for (dp1 = dirp1; dp1 < edp1; dp1++) + free(*dp1); + free(dirp1); + } + if (dirp2 != NULL) { + for (dp2 = dirp2; dp2 < edp2; dp2++) + free(*dp2); + free(dirp2); + } +} + +/* + * Do the actual diff by calling either diffreg() or diffdir(). + */ +static void +diffit(struct dirent *dp, char *path1, size_t plen1, struct dirent *dp2, + char *path2, size_t plen2, int flags) +{ + flags |= D_HEADER; + strlcpy(path1 + plen1, dp->d_name, PATH_MAX - plen1); + + /* + * If we are ignoring file case, use dent2s name here if both names are + * the same apart from case. + */ + if (ignore_file_case && strcasecmp(dp2->d_name, dp2->d_name) == 0) + strlcpy(path2 + plen2, dp2->d_name, PATH_MAX - plen2); + else + strlcpy(path2 + plen2, dp->d_name, PATH_MAX - plen2); + + if (noderef) { + if (lstat(path1, &stb1) != 0) { + if (!(Nflag || Pflag) || errno != ENOENT) { + warn("%s", path1); + return; + } + flags |= D_EMPTY1; + memset(&stb1, 0, sizeof(stb1)); + } + + if (lstat(path2, &stb2) != 0) { + if (!Nflag || errno != ENOENT) { + warn("%s", path2); + return; + } + flags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + } + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + if (S_ISLNK(stb1.st_mode) || S_ISLNK(stb2.st_mode)) { + if (S_ISLNK(stb1.st_mode) && S_ISLNK(stb2.st_mode)) { + char buf1[PATH_MAX]; + char buf2[PATH_MAX]; + ssize_t len1 = 0; + ssize_t len2 = 0; + + len1 = readlink(path1, buf1, sizeof(buf1)); + len2 = readlink(path2, buf2, sizeof(buf2)); + + if (len1 < 0 || len2 < 0) { + perror("reading links"); + return; + } + buf1[len1] = '\0'; + buf2[len2] = '\0'; + + if (len1 != len2 || strncmp(buf1, buf2, len1) != 0) { + printf("Symbolic links %s and %s differ\n", + path1, path2); + status |= 1; + } + + return; + } + + printf("File %s is a %s while file %s is a %s\n", + path1, S_ISLNK(stb1.st_mode) ? "symbolic link" : + (S_ISDIR(stb1.st_mode) ? "directory" : + (S_ISREG(stb1.st_mode) ? "file" : "error")), + path2, S_ISLNK(stb2.st_mode) ? "symbolic link" : + (S_ISDIR(stb2.st_mode) ? "directory" : + (S_ISREG(stb2.st_mode) ? "file" : "error"))); + status |= 1; + return; + } + } else { + if (stat(path1, &stb1) != 0) { + if (!(Nflag || Pflag) || errno != ENOENT) { + warn("%s", path1); + return; + } + flags |= D_EMPTY1; + memset(&stb1, 0, sizeof(stb1)); + } + + if (stat(path2, &stb2) != 0) { + if (!Nflag || errno != ENOENT) { + warn("%s", path2); + return; + } + flags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + } + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + } + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + if (rflag) + diffdir(path1, path2, flags); + else + printf("Common subdirectories: %s and %s\n", + path1, path2); + return; + } + if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) + dp->d_status = D_SKIPPED1; + else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) + dp->d_status = D_SKIPPED2; + else + dp->d_status = diffreg(path1, path2, flags, 0); + print_status(dp->d_status, path1, path2, ""); +} + +/* + * Returns 1 if the directory entry should be included in the + * diff, else 0. Checks the excludes list. + */ +static int +selectfile(const struct dirent *dp) +{ + struct excludes *excl; + + if (dp->d_fileno == 0) + return (0); + + /* always skip "." and ".." */ + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) + return (0); + + /* check excludes list */ + for (excl = excludes_list; excl != NULL; excl = excl->next) + if (fnmatch(excl->pattern, dp->d_name, FNM_PATHNAME) == 0) + return (0); + + return (1); +} + +void +print_only(const char *path, size_t dirlen, const char *entry) +{ + if (dirlen > 1) + dirlen--; + printf("Only in %.*s: %s\n", (int)dirlen, path, entry); +} diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c new file mode 100644 index 000000000000..ffa5568bf442 --- /dev/null +++ b/usr.bin/diff/diffreg.c @@ -0,0 +1,1736 @@ +/* $OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-4-Clause + * + * Copyright (C) Caldera International Inc. 2001-2002. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code and documentation must retain the above + * copyright notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * 4. Neither the name of Caldera International, Inc. nor the names of other + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, + * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/capsicum.h> +#include <sys/stat.h> + +#include <capsicum_helpers.h> +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <math.h> +#include <paths.h> +#include <regex.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "pr.h" +#include "diff.h" +#include "xmalloc.h" + +/* + * diff - compare two files. + */ + +/* + * Uses an algorithm due to Harold Stone, which finds a pair of longest + * identical subsequences in the two files. + * + * The major goal is to generate the match vector J. J[i] is the index of + * the line in file1 corresponding to line i file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential matches are + * located by sorting the lines of each file on the hash (called + * ``value''). In particular, this collects the equivalence classes in + * file1 together. Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its matching equivalence in + * (the reordered) file1. To save space equiv squeezes file1 into a single + * array member in which the equivalence classes are simply concatenated, + * except that their first members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into array class + * according to the original order of file0. + * + * The cleverness lies in routine stone. This marches through the lines of + * file0, developing a vector klist of "k-candidates". At step i + * a k-candidate is a matched pair of lines x,y (x in file0 y in file1) + * such that there is a common subsequence of length k between the first + * i lines of file0 and the first y lines of file1, but there is no such + * subsequence for any smaller y. x is the earliest possible mate to y that + * occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of lines in file1 + * matable to a line in file0 has serial number less than the y of some + * k-candidate, that k-candidate with the smallest such y is replaced. The + * new k-candidate is chained (via pred) to the current k-1 candidate so + * that the actual subsequence can be recovered. When a member has serial + * number greater that the y of all k-candidates, the klist is extended. At + * the end, the longest subsequence is pulled out and placed in the array J + * by unravel. + * + * With J in hand, the matches there recorded are check'ed against reality + * to assure that no spurious matches have crept in due to hashing. If they + * have, they are broken, and "jackpot" is recorded -- a harmless matter + * except that a true match for a spuriously mated line may now be + * unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply from trying to + * minimize core utilization and maximize the range of doable problems by + * dynamically allocating what is needed and reusing what is not. The core + * requirements for problems larger than somewhat are (in words) + * 2*length(file0) + length(file1) + 3*(number of k-candidates installed), + * typically about 6n words for files of length n. + */ + +struct cand { + int x; + int y; + int pred; +}; + +static struct line { + int serial; + int value; +} *file[2]; + +/* + * The following struct is used to record change information when + * doing a "context" or "unified" diff. (see routine "change" to + * understand the highly mnemonic field names) + */ +struct context_vec { + int a; /* start line in old file */ + int b; /* end line in old file */ + int c; /* start line in new file */ + int d; /* end line in new file */ +}; + +enum readhash { RH_BINARY, RH_OK, RH_EOF }; + +static int diffreg_stone(char *, char *, int, int); +static FILE *opentemp(const char *); +static void output(char *, FILE *, char *, FILE *, int); +static void check(FILE *, FILE *, int); +static void range(int, int, const char *); +static void uni_range(int, int); +static void dump_context_vec(FILE *, FILE *, int); +static void dump_unified_vec(FILE *, FILE *, int); +static bool prepare(int, FILE *, size_t, int); +static void prune(void); +static void equiv(struct line *, int, struct line *, int, int *); +static void unravel(int); +static void unsort(struct line *, int, int *); +static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); +static void sort(struct line *, int); +static void print_header(const char *, const char *); +static void print_space(int, int, int); +static bool ignoreline_pattern(char *); +static bool ignoreline(char *, bool); +static int asciifile(FILE *); +static int fetch(long *, int, int, FILE *, int, int, int); +static int newcand(int, int, int); +static int search(int *, int, int); +static int skipline(FILE *); +static int stone(int *, int, int *, int *, int); +static enum readhash readhash(FILE *, int, unsigned *); +static int files_differ(FILE *, FILE *, int); +static char *match_function(const long *, int, FILE *); +static char *preadline(int, size_t, off_t); + +static int *J; /* will be overlaid on class */ +static int *class; /* will be overlaid on file[0] */ +static int *klist; /* will be overlaid on file[0] after class */ +static int *member; /* will be overlaid on file[1] */ +static int clen; +static int inifdef; /* whether or not we are in a #ifdef block */ +static size_t len[2]; /* lengths of files in lines */ +static size_t pref, suff; /* lengths of prefix and suffix */ +static size_t slen[2]; /* lengths of files minus pref / suff */ +static int anychange; +static int hw, lpad,rpad; /* half width and padding */ +static int edoffset; +static long *ixnew; /* will be overlaid on file[1] */ +static long *ixold; /* will be overlaid on klist */ +static struct cand *clist; /* merely a free storage pot for candidates */ +static int clistlen; /* the length of clist */ +static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ +static int (*chrtran)(int); /* translation table for case-folding */ +static struct context_vec *context_vec_start; +static struct context_vec *context_vec_end; +static struct context_vec *context_vec_ptr; + +#define FUNCTION_CONTEXT_SIZE 55 +static char lastbuf[FUNCTION_CONTEXT_SIZE]; +static int lastline; +static int lastmatchline; + +int +diffreg(char *file1, char *file2, int flags, int capsicum) +{ + /* + * If we have set the algorithm with -A or --algorithm use that if we + * can and if not print an error. + */ + if (diff_algorithm_set) { + if (diff_algorithm == D_DIFFMYERS || + diff_algorithm == D_DIFFPATIENCE) { + if (can_libdiff(flags)) + return diffreg_new(file1, file2, flags, capsicum); + else + errx(2, "cannot use Myers algorithm with selected options"); + } else { + /* Fallback to using stone. */ + return diffreg_stone(file1, file2, flags, capsicum); + } + } else { + if (can_libdiff(flags)) + return diffreg_new(file1, file2, flags, capsicum); + else + return diffreg_stone(file1, file2, flags, capsicum); + } +} + +static int +clow2low(int c) +{ + + return (c); +} + +static int +cup2low(int c) +{ + + return (tolower(c)); +} + +int +diffreg_stone(char *file1, char *file2, int flags, int capsicum) +{ + FILE *f1, *f2; + int i, rval; + struct pr *pr = NULL; + cap_rights_t rights_ro; + + f1 = f2 = NULL; + rval = D_SAME; + anychange = 0; + lastline = 0; + lastmatchline = 0; + + /* + * In side-by-side mode, we need to print the left column, a + * change marker surrounded by padding, and the right column. + * + * If expanding tabs, we don't care about alignment, so we simply + * subtract 3 from the width and divide by two. + * + * If not expanding tabs, we need to ensure that the right column + * is aligned to a tab stop. We start with the same formula, then + * decrement until we reach a size that lets us tab-align the + * right column. We then adjust the width down if necessary for + * the padding calculation to work. + * + * Left padding is half the space left over, rounded down; right + * padding is whatever is needed to match the width. + */ + if (diff_format == D_SIDEBYSIDE) { + if (flags & D_EXPANDTABS) { + if (width > 3) { + hw = (width - 3) / 2; + } else { + /* not enough space */ + hw = 0; + } + } else if (width <= 3 || width <= tabsize) { + /* not enough space */ + hw = 0; + } else { + hw = (width - 3) / 2; + while (hw > 0 && roundup(hw + 3, tabsize) + hw > width) + hw--; + if (width - (roundup(hw + 3, tabsize) + hw) < tabsize) + width = roundup(hw + 3, tabsize) + hw; + } + lpad = (width - hw * 2 - 1) / 2; + rpad = (width - hw * 2 - 1) - lpad; + } + + if (flags & D_IGNORECASE) + chrtran = cup2low; + else + chrtran = clow2low; + if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) + return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); + if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) + goto closem; + + if (flags & D_EMPTY1) + f1 = fopen(_PATH_DEVNULL, "r"); + else { + if (!S_ISREG(stb1.st_mode)) { + if ((f1 = opentemp(file1)) == NULL || + fstat(fileno(f1), &stb1) == -1) { + warn("%s", file1); + rval = D_ERROR; + status |= 2; + goto closem; + } + } else if (strcmp(file1, "-") == 0) + f1 = stdin; + else + f1 = fopen(file1, "r"); + } + if (f1 == NULL) { + warn("%s", file1); + rval = D_ERROR; + status |= 2; + goto closem; + } + + if (flags & D_EMPTY2) + f2 = fopen(_PATH_DEVNULL, "r"); + else { + if (!S_ISREG(stb2.st_mode)) { + if ((f2 = opentemp(file2)) == NULL || + fstat(fileno(f2), &stb2) == -1) { + warn("%s", file2); + rval = D_ERROR; + status |= 2; + goto closem; + } + } else if (strcmp(file2, "-") == 0) + f2 = stdin; + else + f2 = fopen(file2, "r"); + } + if (f2 == NULL) { + warn("%s", file2); + rval = D_ERROR; + status |= 2; + goto closem; + } + + if (lflag) + pr = start_pr(file1, file2); + + if (capsicum) { + cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); + if (caph_rights_limit(fileno(f1), &rights_ro) < 0) + err(2, "unable to limit rights on: %s", file1); + if (caph_rights_limit(fileno(f2), &rights_ro) < 0) + err(2, "unable to limit rights on: %s", file2); + if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { + /* stdin has already been limited */ + if (caph_limit_stderr() == -1) + err(2, "unable to limit stderr"); + if (caph_limit_stdout() == -1) + err(2, "unable to limit stdout"); + } else if (caph_limit_stdio() == -1) + err(2, "unable to limit stdio"); + + caph_cache_catpages(); + caph_cache_tzdata(); + if (caph_enter() < 0) + err(2, "unable to enter capability mode"); + } + + switch (files_differ(f1, f2, flags)) { + case 0: + goto closem; + case 1: + break; + default: + /* error */ + rval = D_ERROR; + status |= 2; + goto closem; + } + + if (diff_format == D_BRIEF && ignore_pats == NULL && + (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE| + D_SKIPBLANKLINES|D_STRIPCR)) == 0) + { + rval = D_DIFFER; + status |= 1; + goto closem; + } + if ((flags & D_FORCEASCII) != 0) { + (void)prepare(0, f1, stb1.st_size, flags); + (void)prepare(1, f2, stb2.st_size, flags); + } else if (!asciifile(f1) || !asciifile(f2) || + !prepare(0, f1, stb1.st_size, flags) || + !prepare(1, f2, stb2.st_size, flags)) { + rval = D_BINARY; + status |= 1; + goto closem; + } + if (len[0] > INT_MAX - 2) + errc(1, EFBIG, "%s", file1); + if (len[1] > INT_MAX - 2) + errc(1, EFBIG, "%s", file2); + + prune(); + sort(sfile[0], slen[0]); + sort(sfile[1], slen[1]); + + member = (int *)file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = xreallocarray(member, slen[1] + 2, sizeof(*member)); + + class = (int *)file[0]; + unsort(sfile[0], slen[0], class); + class = xreallocarray(class, slen[0] + 2, sizeof(*class)); + + klist = xcalloc(slen[0] + 2, sizeof(*klist)); + clen = 0; + clistlen = 100; + clist = xcalloc(clistlen, sizeof(*clist)); + i = stone(class, slen[0], member, klist, flags); + free(member); + free(class); + + J = xreallocarray(J, len[0] + 2, sizeof(*J)); + unravel(klist[i]); + free(clist); + free(klist); + + ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); + ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); + check(f1, f2, flags); + output(file1, f1, file2, f2, flags); + +closem: + if (pr != NULL) + stop_pr(pr); + if (anychange) { + status |= 1; + if (rval == D_SAME) + rval = D_DIFFER; + } + if (f1 != NULL) + fclose(f1); + if (f2 != NULL) + fclose(f2); + + return (rval); +} + +/* + * Check to see if the given files differ. + * Returns 0 if they are the same, 1 if different, and -1 on error. + * XXX - could use code from cmp(1) [faster] + */ +static int +files_differ(FILE *f1, FILE *f2, int flags) +{ + char buf1[BUFSIZ], buf2[BUFSIZ]; + size_t i, j; + + if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || + (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) + return (1); + + if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino) + return (0); + + for (;;) { + i = fread(buf1, 1, sizeof(buf1), f1); + j = fread(buf2, 1, sizeof(buf2), f2); + if ((!i && ferror(f1)) || (!j && ferror(f2))) + return (-1); + if (i != j) + return (1); + if (i == 0) + return (0); + if (memcmp(buf1, buf2, i) != 0) + return (1); + } +} + +static FILE * +opentemp(const char *f) +{ + char buf[BUFSIZ], tempfile[PATH_MAX]; + ssize_t nread; + int ifd, ofd; + + if (strcmp(f, "-") == 0) + ifd = STDIN_FILENO; + else if ((ifd = open(f, O_RDONLY, 0644)) == -1) + return (NULL); + + (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); + + if ((ofd = mkstemp(tempfile)) == -1) { + close(ifd); + return (NULL); + } + unlink(tempfile); + while ((nread = read(ifd, buf, BUFSIZ)) > 0) { + if (write(ofd, buf, nread) != nread) { + close(ifd); + close(ofd); + return (NULL); + } + } + close(ifd); + lseek(ofd, (off_t)0, SEEK_SET); + return (fdopen(ofd, "r")); +} + +static bool +prepare(int i, FILE *fd, size_t filesize, int flags) +{ + struct line *p; + unsigned h; + size_t sz, j = 0; + enum readhash r; + + rewind(fd); + + sz = MIN(filesize, SIZE_MAX) / 25; + if (sz < 100) + sz = 100; + + p = xcalloc(sz + 3, sizeof(*p)); + while ((r = readhash(fd, flags, &h)) != RH_EOF) { + if (r == RH_BINARY) + return (false); + if (j == SIZE_MAX) + break; + if (j == sz) { + sz = sz * 3 / 2; + p = xreallocarray(p, sz + 3, sizeof(*p)); + } + p[++j].value = h; + } + + len[i] = j; + file[i] = p; + + return (true); +} + +static void +prune(void) +{ + size_t i, j; + + for (pref = 0; pref < len[0] && pref < len[1] && + file[0][pref + 1].value == file[1][pref + 1].value; + pref++) + ; + for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && + file[0][len[0] - suff].value == file[1][len[1] - suff].value; + suff++) + ; + for (j = 0; j < 2; j++) { + sfile[j] = file[j] + pref; + slen[j] = len[j] - pref - suff; + for (i = 0; i <= slen[j]; i++) + sfile[j][i].serial = i; + } +} + +static void +equiv(struct line *a, int n, struct line *b, int m, int *c) +{ + int i, j; + + i = j = 1; + while (i <= n && j <= m) { + if (a[i].value < b[j].value) + a[i++].value = 0; + else if (a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while (i <= n) + a[i++].value = 0; + b[m + 1].value = 0; + j = 0; + while (++j <= m) { + c[j] = -b[j].serial; + while (b[j + 1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + +static int +stone(int *a, int n, int *b, int *c, int flags) +{ + int i, k, y, j, l; + int oldc, tc, oldl, sq; + unsigned numtries, bound; + + if (flags & D_MINIMAL) + bound = UINT_MAX; + else { + sq = sqrt(n); + bound = MAX(256, sq); + } + + k = 0; + c[0] = newcand(0, 0, 0); + for (i = 1; i <= n; i++) { + j = a[i]; + if (j == 0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + numtries = 0; + do { + if (y <= clist[oldc].y) + continue; + l = search(c, k, y); + if (l != oldl + 1) + oldc = c[l - 1]; + if (l <= k) { + if (clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i, y, oldc); + oldc = tc; + oldl = l; + numtries++; + } else { + c[l] = newcand(i, y, oldc); + k++; + break; + } + } while ((y = b[++j]) > 0 && numtries < bound); + } + return (k); +} + +static int +newcand(int x, int y, int pred) +{ + struct cand *q; + + if (clen == clistlen) { + clistlen = clistlen * 11 / 10; + clist = xreallocarray(clist, clistlen, sizeof(*clist)); + } + q = clist + clen; + q->x = x; + q->y = y; + q->pred = pred; + return (clen++); +} + +static int +search(int *c, int k, int y) +{ + int i, j, l, t; + + if (clist[c[k]].y < y) /* quick look for typical case */ + return (k + 1); + i = 0; + j = k + 1; + for (;;) { + l = (i + j) / 2; + if (l <= i) + break; + t = clist[c[l]].y; + if (t > y) + j = l; + else if (t < y) + i = l; + else + return (l); + } + return (l + 1); +} + +static void +unravel(int p) +{ + struct cand *q; + size_t i; + + for (i = 0; i <= len[0]; i++) + J[i] = i <= pref ? i : + i > len[0] - suff ? i + len[1] - len[0] : 0; + for (q = clist + p; q->y != 0; q = clist + q->pred) + J[q->x + pref] = q->y + pref; +} + +/* + * Check does double duty: + * 1. ferret out any fortuitous correspondences due to confounding by + * hashing (which result in "jackpot") + * 2. collect random access indexes to the two files + */ +static void +check(FILE *f1, FILE *f2, int flags) +{ + int i, j, /* jackpot, */ c, d; + long ctold, ctnew; + + rewind(f1); + rewind(f2); + j = 1; + ixold[0] = ixnew[0] = 0; + /* jackpot = 0; */ + ctold = ctnew = 0; + for (i = 1; i <= (int)len[0]; i++) { + if (J[i] == 0) { + ixold[i] = ctold += skipline(f1); + continue; + } + while (j < J[i]) { + ixnew[j] = ctnew += skipline(f2); + j++; + } + if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS | D_IGNORECASE | D_STRIPCR)) { + for (;;) { + c = getc(f1); + d = getc(f2); + /* + * GNU diff ignores a missing newline + * in one file for -b or -w. + */ + if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS)) { + if (c == EOF && isspace(d)) { + ctnew++; + break; + } else if (isspace(c) && d == EOF) { + ctold++; + break; + } + } + ctold++; + ctnew++; + if (flags & D_STRIPCR && (c == '\r' || d == '\r')) { + if (c == '\r') { + if ((c = getc(f1)) == '\n') { + ctold++; + } else { + ungetc(c, f1); + } + } + if (d == '\r') { + if ((d = getc(f2)) == '\n') { + ctnew++; + } else { + ungetc(d, f2); + } + } + break; + } + if ((flags & D_FOLDBLANKS) && isspace(c) && + isspace(d)) { + do { + if (c == '\n') + break; + ctold++; + } while (isspace(c = getc(f1))); + do { + if (d == '\n') + break; + ctnew++; + } while (isspace(d = getc(f2))); + } else if (flags & D_IGNOREBLANKS) { + while (isspace(c) && c != '\n') { + c = getc(f1); + ctold++; + } + while (isspace(d) && d != '\n') { + d = getc(f2); + ctnew++; + } + } + if (chrtran(c) != chrtran(d)) { + /* jackpot++; */ + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } else { + for (;;) { + ctold++; + ctnew++; + if ((c = getc(f1)) != (d = getc(f2))) { + /* jackpot++; */ + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for (; j <= (int)len[1]; j++) { + ixnew[j] = ctnew += skipline(f2); + } + /* + * if (jackpot) + * fprintf(stderr, "jackpot\n"); + */ +} + +/* shellsort CACM #201 */ +static void +sort(struct line *a, int n) +{ + struct line *ai, *aim, w; + int j, m = 0, k; + + if (n == 0) + return; + for (j = 1; j <= n; j *= 2) + m = 2 * j - 1; + for (m /= 2; m != 0; m /= 2) { + k = n - m; + for (j = 1; j <= k; j++) { + for (ai = &a[j]; ai > a; ai -= m) { + aim = &ai[m]; + if (aim < ai) + break; /* wraparound */ + if (aim->value > ai[0].value || + (aim->value == ai[0].value && + aim->serial > ai[0].serial)) + break; + w.value = ai[0].value; + ai[0].value = aim->value; + aim->value = w.value; + w.serial = ai[0].serial; + ai[0].serial = aim->serial; + aim->serial = w.serial; + } + } + } +} + +static void +unsort(struct line *f, int l, int *b) +{ + int *a, i; + + a = xcalloc(l + 1, sizeof(*a)); + for (i = 1; i <= l; i++) + a[f[i].serial] = f[i].value; + for (i = 1; i <= l; i++) + b[i] = a[i]; + free(a); +} + +static int +skipline(FILE *f) +{ + int i, c; + + for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) + continue; + return (i); +} + +static void +output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) +{ + int i, j, m, i0, i1, j0, j1, nc; + + rewind(f1); + rewind(f2); + m = len[0]; + J[0] = 0; + J[m + 1] = len[1] + 1; + if (diff_format != D_EDIT) { + for (i0 = 1; i0 <= m; i0 = i1 + 1) { + while (i0 <= m && J[i0] == J[i0 - 1] + 1) { + if (diff_format == D_SIDEBYSIDE && suppress_common != 1) { + nc = fetch(ixold, i0, i0, f1, '\0', 1, flags); + print_space(nc, hw - nc + lpad + 1 + rpad, flags); + fetch(ixnew, J[i0], J[i0], f2, '\0', 0, flags); + printf("\n"); + } + i0++; + } + j0 = J[i0 - 1] + 1; + i1 = i0 - 1; + while (i1 < m && J[i1 + 1] == 0) + i1++; + j1 = J[i1 + 1] - 1; + J[i1] = j1; + + /* + * When using side-by-side, lines from both of the files are + * printed. The algorithm used by diff(1) identifies the ranges + * in which two files differ. + * See the change() function below. + * The for loop below consumes the shorter range, whereas one of + * the while loops deals with the longer one. + */ + if (diff_format == D_SIDEBYSIDE) { + for (i = i0, j = j0; i <= i1 && j <= j1; i++, j++) + change(file1, f1, file2, f2, i, i, j, j, &flags); + + while (i <= i1) { + change(file1, f1, file2, f2, i, i, j + 1, j, &flags); + i++; + } + + while (j <= j1) { + change(file1, f1, file2, f2, i + 1, i, j, j, &flags); + j++; + } + } else + change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); + } + } else { + for (i0 = m; i0 >= 1; i0 = i1 - 1) { + while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) + i0--; + j0 = J[i0 + 1] - 1; + i1 = i0 + 1; + while (i1 > 1 && J[i1 - 1] == 0) + i1--; + j1 = J[i1 - 1] + 1; + J[i1] = j1; + change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); + } + } + if (m == 0) + change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); + if (diff_format == D_IFDEF || diff_format == D_GFORMAT) { + for (;;) { +#define c i0 + if ((c = getc(f1)) == EOF) + return; + printf("%c", c); + } +#undef c + } + if (anychange != 0) { + if (diff_format == D_CONTEXT) + dump_context_vec(f1, f2, flags); + else if (diff_format == D_UNIFIED) + dump_unified_vec(f1, f2, flags); + } +} + +static void +range(int a, int b, const char *separator) +{ + printf("%d", a > b ? b : a); + if (a < b) + printf("%s%d", separator, b); +} + +static void +uni_range(int a, int b) +{ + if (a < b) + printf("%d,%d", a, b - a + 1); + else if (a == b) + printf("%d", b); + else + printf("%d,0", b); +} + +static char * +preadline(int fd, size_t rlen, off_t off) +{ + char *line; + ssize_t nr; + + line = xmalloc(rlen + 1); + if ((nr = pread(fd, line, rlen, off)) == -1) + err(2, "preadline"); + if (nr > 0 && line[nr-1] == '\n') + nr--; + line[nr] = '\0'; + return (line); +} + +static bool +ignoreline_pattern(char *line) +{ + int ret; + + ret = regexec(&ignore_re, line, 0, NULL, 0); + return (ret == 0); /* if it matched, it should be ignored. */ +} + +static bool +ignoreline(char *line, bool skip_blanks) +{ + + if (skip_blanks && *line == '\0') + return (true); + if (ignore_pats != NULL && ignoreline_pattern(line)) + return (true); + return (false); +} + +/* + * Indicate that there is a difference between lines a and b of the from file + * to get to lines c to d of the to file. If a is greater then b then there + * are no lines in the from file involved and this means that there were + * lines appended (beginning at b). If c is greater than d then there are + * lines missing from the to file. + */ +static void +change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, + int *pflags) +{ + static size_t max_context = 64; + long curpos; + int i, nc; + const char *walk; + bool skip_blanks, ignore; + + skip_blanks = (*pflags & D_SKIPBLANKLINES); +restart: + if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) && + a > b && c > d) + return; + if (ignore_pats != NULL || skip_blanks) { + char *line; + /* + * All lines in the change, insert, or delete must match an ignore + * pattern for the change to be ignored. + */ + if (a <= b) { /* Changes and deletes. */ + for (i = a; i <= b; i++) { + line = preadline(fileno(f1), + ixold[i] - ixold[i - 1], ixold[i - 1]); + ignore = ignoreline(line, skip_blanks); + free(line); + if (!ignore) + goto proceed; + } + } + if (a > b || c <= d) { /* Changes and inserts. */ + for (i = c; i <= d; i++) { + line = preadline(fileno(f2), + ixnew[i] - ixnew[i - 1], ixnew[i - 1]); + ignore = ignoreline(line, skip_blanks); + free(line); + if (!ignore) + goto proceed; + } + } + return; + } +proceed: + if (*pflags & D_HEADER && diff_format != D_BRIEF) { + printf("%s %s %s\n", diffargs, file1, file2); + *pflags &= ~D_HEADER; + } + if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { + /* + * Allocate change records as needed. + */ + if (context_vec_start == NULL || + context_vec_ptr == context_vec_end - 1) { + ptrdiff_t offset = -1; + + if (context_vec_start != NULL) + offset = context_vec_ptr - context_vec_start; + max_context <<= 1; + context_vec_start = xreallocarray(context_vec_start, + max_context, sizeof(*context_vec_start)); + context_vec_end = context_vec_start + max_context; + context_vec_ptr = context_vec_start + offset; + } + if (anychange == 0) { + /* + * Print the context/unidiff header first time through. + */ + print_header(file1, file2); + anychange = 1; + } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && + c > context_vec_ptr->d + (2 * diff_context) + 1) { + /* + * If this change is more than 'diff_context' lines from the + * previous change, dump the record and reset it. + */ + if (diff_format == D_CONTEXT) + dump_context_vec(f1, f2, *pflags); + else + dump_unified_vec(f1, f2, *pflags); + } + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + return; + } + if (anychange == 0) + anychange = 1; + switch (diff_format) { + case D_BRIEF: + return; + case D_NORMAL: + case D_EDIT: + range(a, b, ","); + printf("%c", a > b ? 'a' : c > d ? 'd' : 'c'); + if (diff_format == D_NORMAL) + range(c, d, ","); + printf("\n"); + break; + case D_REVERSE: + printf("%c", a > b ? 'a' : c > d ? 'd' : 'c'); + range(a, b, " "); + printf("\n"); + break; + case D_NREVERSE: + if (a > b) + printf("a%d %d\n", b, d - c + 1); + else { + printf("d%d %d\n", a, b - a + 1); + if (!(c > d)) + /* add changed lines */ + printf("a%d %d\n", b, d - c + 1); + } + break; + } + if (diff_format == D_GFORMAT) { + curpos = ftell(f1); + /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ + nc = ixold[a > b ? b : a - 1] - curpos; + for (i = 0; i < nc; i++) + printf("%c", getc(f1)); + for (walk = group_format; *walk != '\0'; walk++) { + if (*walk == '%') { + walk++; + switch (*walk) { + case '<': + fetch(ixold, a, b, f1, '<', 1, *pflags); + break; + case '>': + fetch(ixnew, c, d, f2, '>', 0, *pflags); + break; + default: + printf("%%%c", *walk); + break; + } + continue; + } + printf("%c", *walk); + } + } + if (diff_format == D_SIDEBYSIDE) { + if (color && a > b) + printf("\033[%sm", add_code); + else if (color && c > d) + printf("\033[%sm", del_code); + if (a > b) { + print_space(0, hw + lpad, *pflags); + } else { + nc = fetch(ixold, a, b, f1, '\0', 1, *pflags); + print_space(nc, hw - nc + lpad, *pflags); + } + if (color && a > b) + printf("\033[%sm", add_code); + else if (color && c > d) + printf("\033[%sm", del_code); + printf("%c", (a > b) ? '>' : ((c > d) ? '<' : '|')); + if (color && c > d) + printf("\033[m"); + print_space(hw + lpad + 1, rpad, *pflags); + fetch(ixnew, c, d, f2, '\0', 0, *pflags); + printf("\n"); + } + if (diff_format == D_NORMAL || diff_format == D_IFDEF) { + fetch(ixold, a, b, f1, '<', 1, *pflags); + if (a <= b && c <= d && diff_format == D_NORMAL) + printf("---\n"); + } + if (diff_format != D_GFORMAT && diff_format != D_SIDEBYSIDE) + fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); + if (edoffset != 0 && diff_format == D_EDIT) { + /* + * A non-zero edoffset value for D_EDIT indicates that the last line + * printed was a bare dot (".") that has been escaped as ".." to + * prevent ed(1) from misinterpreting it. We have to add a + * substitute command to change this back and restart where we left + * off. + */ + printf(".\n"); + printf("%ds/.//\n", a + edoffset - 1); + b = a + edoffset - 1; + a = b + 1; + c += edoffset; + goto restart; + } + if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) + printf(".\n"); + if (inifdef) { + printf("#endif /* %s */\n", ifdefname); + inifdef = 0; + } +} + +static int +fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) +{ + int i, j, c, lastc, col, nc, newcol; + + edoffset = 0; + nc = 0; + col = 0; + /* + * When doing #ifdef's, copy down to current line + * if this is the first file, so that stuff makes it to output. + */ + if ((diff_format == D_IFDEF) && oldfile) { + long curpos = ftell(lb); + /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ + nc = f[a > b ? b : a - 1] - curpos; + for (i = 0; i < nc; i++) + printf("%c", getc(lb)); + } + if (a > b) + return (0); + if (diff_format == D_IFDEF) { + if (inifdef) { + printf("#else /* %s%s */\n", + oldfile == 1 ? "!" : "", ifdefname); + } else { + if (oldfile) + printf("#ifndef %s\n", ifdefname); + else + printf("#ifdef %s\n", ifdefname); + } + inifdef = 1 + oldfile; + } + for (i = a; i <= b; i++) { + fseek(lb, f[i - 1], SEEK_SET); + nc = f[i] - f[i - 1]; + if (diff_format == D_SIDEBYSIDE && hw < nc) + nc = hw; + if (diff_format != D_IFDEF && diff_format != D_GFORMAT && + ch != '\0') { + if (color && (ch == '>' || ch == '+')) + printf("\033[%sm", add_code); + else if (color && (ch == '<' || ch == '-')) + printf("\033[%sm", del_code); + printf("%c", ch); + if (Tflag && (diff_format == D_NORMAL || + diff_format == D_CONTEXT || + diff_format == D_UNIFIED)) + printf("\t"); + else if (diff_format != D_UNIFIED) + printf(" "); + } + col = j = 0; + lastc = '\0'; + while (j < nc && (hw == 0 || col < hw)) { + c = getc(lb); + if (flags & D_STRIPCR && c == '\r') { + if ((c = getc(lb)) == '\n') + j++; + else { + ungetc(c, lb); + c = '\r'; + } + } + if (c == EOF) { + if (diff_format == D_EDIT || + diff_format == D_REVERSE || + diff_format == D_NREVERSE) + warnx("No newline at end of file"); + else + printf("\n\\ No newline at end of file\n"); + return (col); + } + /* + * when using --side-by-side, col needs to be increased + * in any case to keep the columns aligned + */ + if (c == '\t') { + /* + * Calculate where the tab would bring us. + * If it would take us to the end of the + * column, either clip it (if expanding + * tabs) or return right away (if not). + */ + newcol = roundup(col + 1, tabsize); + if ((flags & D_EXPANDTABS) == 0) { + if (hw > 0 && newcol >= hw) + return (col); + printf("\t"); + } else { + if (hw > 0 && newcol > hw) + newcol = hw; + printf("%*s", newcol - col, ""); + } + col = newcol; + } else { + if (diff_format == D_EDIT && j == 1 && c == '\n' && + lastc == '.') { + /* + * Don't print a bare "." line since that will confuse + * ed(1). Print ".." instead and set the, global variable + * edoffset to an offset from which to restart. The + * caller must check the value of edoffset + */ + printf(".\n"); + edoffset = i - a + 1; + return (edoffset); + } + /* when side-by-side, do not print a newline */ + if (diff_format != D_SIDEBYSIDE || c != '\n') { + if (color && c == '\n') + printf("\033[m%c", c); + else + printf("%c", c); + col++; + } + } + + j++; + lastc = c; + } + } + if (color && diff_format == D_SIDEBYSIDE) + printf("\033[m"); + return (col); +} + +/* + * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. + */ +static enum readhash +readhash(FILE *f, int flags, unsigned *hash) +{ + int i, t, space; + unsigned sum; + + sum = 1; + space = 0; + for (i = 0;;) { + switch (t = getc(f)) { + case '\0': + if ((flags & D_FORCEASCII) == 0) + return (RH_BINARY); + goto hashchar; + case '\r': + if (flags & D_STRIPCR) { + t = getc(f); + if (t == '\n') + break; + ungetc(t, f); + } + /* FALLTHROUGH */ + case '\t': + case '\v': + case '\f': + case ' ': + if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) != 0) { + space++; + continue; + } + /* FALLTHROUGH */ + default: + hashchar: + if (space && (flags & D_IGNOREBLANKS) == 0) { + i++; + space = 0; + } + sum = sum * 127 + chrtran(t); + i++; + continue; + case EOF: + if (i == 0) + return (RH_EOF); + /* FALLTHROUGH */ + case '\n': + break; + } + break; + } + *hash = sum; + return (RH_OK); +} + +static int +asciifile(FILE *f) +{ + unsigned char buf[BUFSIZ]; + size_t cnt; + + if (f == NULL) + return (1); + + rewind(f); + cnt = fread(buf, 1, sizeof(buf), f); + return (memchr(buf, '\0', cnt) == NULL); +} + +#define begins_with(s, pre) (strncmp(s, pre, sizeof(pre) - 1) == 0) + +static char * +match_function(const long *f, int pos, FILE *fp) +{ + unsigned char buf[FUNCTION_CONTEXT_SIZE]; + size_t nc; + int last = lastline; + const char *state = NULL; + + lastline = pos; + for (; pos > last; pos--) { + fseek(fp, f[pos - 1], SEEK_SET); + nc = f[pos] - f[pos - 1]; + if (nc >= sizeof(buf)) + nc = sizeof(buf) - 1; + nc = fread(buf, 1, nc, fp); + if (nc == 0) + continue; + buf[nc] = '\0'; + buf[strcspn(buf, "\n")] = '\0'; + if (most_recent_pat != NULL) { + int ret = regexec(&most_recent_re, buf, 0, NULL, 0); + + if (ret != 0) + continue; + strlcpy(lastbuf, buf, sizeof(lastbuf)); + lastmatchline = pos; + return (lastbuf); + } else if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$' + || buf[0] == '-' || buf[0] == '+') { + if (begins_with(buf, "private:")) { + if (!state) + state = " (private)"; + } else if (begins_with(buf, "protected:")) { + if (!state) + state = " (protected)"; + } else if (begins_with(buf, "public:")) { + if (!state) + state = " (public)"; + } else { + strlcpy(lastbuf, buf, sizeof(lastbuf)); + if (state) + strlcat(lastbuf, state, sizeof(lastbuf)); + lastmatchline = pos; + return (lastbuf); + } + } + } + return (lastmatchline > 0 ? lastbuf : NULL); +} + +/* dump accumulated "context" diff changes */ +static void +dump_context_vec(FILE *f1, FILE *f2, int flags) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd, do_output; + int a, b, c, d; + char ch, *f; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAX(1, cvp->a - diff_context); + upb = MIN((int)len[0], context_vec_ptr->b + diff_context); + lowc = MAX(1, cvp->c - diff_context); + upd = MIN((int)len[1], context_vec_ptr->d + diff_context); + + printf("***************"); + if (flags & (D_PROTOTYPE | D_MATCHLAST)) { + f = match_function(ixold, cvp->a - 1, f1); + if (f != NULL) + printf(" %s", f); + } + printf("\n*** "); + range(lowa, upb, ","); + printf(" ****\n"); + + /* + * Output changes to the "old" file. The first loop suppresses + * output if there were no changes to the "old" file (we'll see + * the "old" lines as context in the "new" list). + */ + do_output = 0; + for (; cvp <= context_vec_ptr; cvp++) + if (cvp->a <= cvp->b) { + cvp = context_vec_start; + do_output++; + break; + } + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'a') + fetch(ixold, lowa, b, f1, ' ', 0, flags); + else { + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, + ch == 'c' ? '!' : '-', 0, flags); + } + lowa = b + 1; + cvp++; + } + fetch(ixold, b + 1, upb, f1, ' ', 0, flags); + } + /* output changes to the "new" file */ + printf("--- "); + range(lowc, upd, ","); + printf(" ----\n"); + + do_output = 0; + for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) + if (cvp->c <= cvp->d) { + cvp = context_vec_start; + do_output++; + break; + } + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'd') + fetch(ixnew, lowc, d, f2, ' ', 0, flags); + else { + fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); + fetch(ixnew, c, d, f2, + ch == 'c' ? '!' : '+', 0, flags); + } + lowc = d + 1; + cvp++; + } + fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); + } + context_vec_ptr = context_vec_start - 1; +} + +/* dump accumulated "unified" diff changes */ +static void +dump_unified_vec(FILE *f1, FILE *f2, int flags) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd; + int a, b, c, d; + char ch, *f; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAX(1, cvp->a - diff_context); + upb = MIN((int)len[0], context_vec_ptr->b + diff_context); + lowc = MAX(1, cvp->c - diff_context); + upd = MIN((int)len[1], context_vec_ptr->d + diff_context); + + printf("@@ -"); + uni_range(lowa, upb); + printf(" +"); + uni_range(lowc, upd); + printf(" @@"); + if (flags & (D_PROTOTYPE | D_MATCHLAST)) { + f = match_function(ixold, cvp->a - 1, f1); + if (f != NULL) + printf(" %s", f); + } + printf("\n"); + + /* + * Output changes in "unified" diff format--the old and new lines + * are printed together. + */ + for (; cvp <= context_vec_ptr; cvp++) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + /* + * c: both new and old changes + * d: only changes in the old file + * a: only changes in the new file + */ + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + switch (ch) { + case 'c': + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, '-', 0, flags); + fetch(ixnew, c, d, f2, '+', 0, flags); + break; + case 'd': + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, '-', 0, flags); + break; + case 'a': + fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); + fetch(ixnew, c, d, f2, '+', 0, flags); + break; + } + lowa = b + 1; + lowc = d + 1; + } + fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); + + context_vec_ptr = context_vec_start - 1; +} + +static void +print_header(const char *file1, const char *file2) +{ + const char *time_format; + char buf[256]; + struct tm tm1, tm2, *tm_ptr1, *tm_ptr2; + int nsec1 = stb1.st_mtim.tv_nsec; + int nsec2 = stb2.st_mtim.tv_nsec; + + time_format = "%Y-%m-%d %H:%M:%S"; + + if (cflag) + time_format = "%c"; + tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1); + tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2); + if (label[0] != NULL) + printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", + label[0]); + else { + strftime(buf, sizeof(buf), time_format, tm_ptr1); + printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---", + file1, buf); + if (!cflag) { + strftime(buf, sizeof(buf), "%z", tm_ptr1); + printf(".%.9d %s", nsec1, buf); + } + printf("\n"); + } + if (label[1] != NULL) + printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", + label[1]); + else { + strftime(buf, sizeof(buf), time_format, tm_ptr2); + printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++", + file2, buf); + if (!cflag) { + strftime(buf, sizeof(buf), "%z", tm_ptr2); + printf(".%.9d %s", nsec2, buf); + } + printf("\n"); + } +} + +/* + * Prints n number of space characters either by using tab + * or single space characters. + * nc is the preceding number of characters + */ +static void +print_space(int nc, int n, int flags) +{ + int col, newcol, tabstop; + + col = nc; + newcol = nc + n; + /* first, use tabs if allowed */ + if ((flags & D_EXPANDTABS) == 0) { + while ((tabstop = roundup(col + 1, tabsize)) <= newcol) { + printf("\t"); + col = tabstop; + } + } + /* finish with spaces */ + printf("%*s", newcol - col, ""); +} diff --git a/usr.bin/diff/diffreg_new.c b/usr.bin/diff/diffreg_new.c new file mode 100644 index 000000000000..f54cd554ccad --- /dev/null +++ b/usr.bin/diff/diffreg_new.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2018 Martin Pieuchot + * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/capsicum.h> +#ifndef DIFF_NO_MMAP +#include <sys/mman.h> +#endif +#include <sys/stat.h> + +#include <capsicum_helpers.h> +#include <err.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "diff.h" +#include <arraylist.h> +#include <diff_main.h> +#include <diff_output.h> + +const char *format_label(const char *, struct stat *); + +enum diffreg_algo { + DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0, + DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1, + DIFFREG_ALGO_PATIENCE = 2, + DIFFREG_ALGO_NONE = 3, +}; + +int diffreg_new(char *, char *, int, int); +FILE * openfile(const char *, char **, struct stat *); + +static const struct diff_algo_config myers_then_patience; +static const struct diff_algo_config myers_then_myers_divide; +static const struct diff_algo_config patience; +static const struct diff_algo_config myers_divide; + +static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){ + .impl = diff_algo_myers, + .permitted_state_size = 1024 * 1024 * sizeof(int), + .fallback_algo = &patience, +}; + +static const struct diff_algo_config myers_then_myers_divide = + (struct diff_algo_config){ + .impl = diff_algo_myers, + .permitted_state_size = 1024 * 1024 * sizeof(int), + .fallback_algo = &myers_divide, +}; + +static const struct diff_algo_config patience = (struct diff_algo_config){ + .impl = diff_algo_patience, + /* After subdivision, do Patience again: */ + .inner_algo = &patience, + /* If subdivision failed, do Myers Divide et Impera: */ + .fallback_algo = &myers_then_myers_divide, +}; + +static const struct diff_algo_config myers_divide = (struct diff_algo_config){ + .impl = diff_algo_myers_divide, + /* When division succeeded, start from the top: */ + .inner_algo = &myers_then_myers_divide, + /* (fallback_algo = NULL implies diff_algo_none). */ +}; + +static const struct diff_algo_config no_algo = (struct diff_algo_config){ + .impl = diff_algo_none, +}; + +/* If the state for a forward-Myers is small enough, use Myers, otherwise first + * do a Myers-divide. */ +static const struct diff_config diff_config_myers_then_myers_divide = { + .atomize_func = diff_atomize_text_by_line, + .algo = &myers_then_myers_divide, +}; + +/* If the state for a forward-Myers is small enough, use Myers, otherwise first + * do a Patience. */ +static const struct diff_config diff_config_myers_then_patience = { + .atomize_func = diff_atomize_text_by_line, + .algo = &myers_then_patience, +}; + +/* Directly force Patience as a first divider of the source file. */ +static const struct diff_config diff_config_patience = { + .atomize_func = diff_atomize_text_by_line, + .algo = &patience, +}; + +/* Directly force Patience as a first divider of the source file. */ +static const struct diff_config diff_config_no_algo = { + .atomize_func = diff_atomize_text_by_line, +}; + +const char * +format_label(const char *oldlabel, struct stat *stb) +{ + const char *time_format = "%Y-%m-%d %H:%M:%S"; + char *newlabel; + char buf[256]; + char end[10]; + struct tm tm, *tm_ptr; + int nsec = stb->st_mtim.tv_nsec; + size_t newlabellen, timelen, endlen; + tm_ptr = localtime_r(&stb->st_mtime, &tm); + + timelen = strftime(buf, 256, time_format, tm_ptr); + endlen = strftime(end, 10, "%z", tm_ptr); + + /* + * The new label is the length of the time, old label, timezone, + * 9 characters for nanoseconds, and 4 characters for a period + * and for formatting. + */ + newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4; + newlabel = calloc(newlabellen, sizeof(char)); + + snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n", + oldlabel, buf, nsec, end); + + return newlabel; +} + +int +diffreg_new(char *file1, char *file2, int flags, int capsicum) +{ + char *str1, *str2; + FILE *f1, *f2; + struct stat st1, st2; + struct diff_input_info info; + struct diff_data left = {}, right = {}; + struct diff_result *result = NULL; + bool force_text, have_binary; + int rc, atomizer_flags, rflags, diff_flags = 0; + int context_lines = diff_context; + const struct diff_config *cfg; + enum diffreg_algo algo; + cap_rights_t rights_ro; + + algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE; + + switch (algo) { + default: + case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE: + cfg = &diff_config_myers_then_myers_divide; + break; + case DIFFREG_ALGO_MYERS_THEN_PATIENCE: + cfg = &diff_config_myers_then_patience; + break; + case DIFFREG_ALGO_PATIENCE: + cfg = &diff_config_patience; + break; + case DIFFREG_ALGO_NONE: + cfg = &diff_config_no_algo; + break; + } + + f1 = openfile(file1, &str1, &st1); + f2 = openfile(file2, &str2, &st2); + + if (capsicum) { + cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); + if (caph_rights_limit(fileno(f1), &rights_ro) < 0) + err(2, "unable to limit rights on: %s", file1); + if (caph_rights_limit(fileno(f2), &rights_ro) < 0) + err(2, "unable to limit rights on: %s", file2); + if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { + /* stdin has already been limited */ + if (caph_limit_stderr() == -1) + err(2, "unable to limit stderr"); + if (caph_limit_stdout() == -1) + err(2, "unable to limit stdout"); + } else if (caph_limit_stdio() == -1) + err(2, "unable to limit stdio"); + caph_cache_catpages(); + caph_cache_tzdata(); + if (caph_enter() < 0) + err(2, "unable to enter capability mode"); + } + /* + * If we have been given a label use that for the paths, if not format + * the path with the files modification time. + */ + info.flags = 0; + info.left_path = (label[0] != NULL) ? + label[0] : format_label(file1, &stb1); + info.right_path = (label[1] != NULL) ? + label[1] : format_label(file2, &stb2); + + if (flags & D_FORCEASCII) + diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA; + if (flags & D_IGNOREBLANKS) + diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE; + if (flags & D_PROTOTYPE) + diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES; + + if (diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, diff_flags)) { + rc = D_ERROR; + goto done; + } + if (left.atomizer_flags & DIFF_ATOMIZER_FILE_TRUNCATED) + warnx("%s truncated", file1); + if (diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, diff_flags)) { + rc = D_ERROR; + goto done; + } + if (right.atomizer_flags & DIFF_ATOMIZER_FILE_TRUNCATED) + warnx("%s truncated", file2); + + result = diff_main(cfg, &left, &right); + if (result->rc != DIFF_RC_OK) { + rc = D_ERROR; + status |= 2; + goto done; + } + /* + * If there wasn't an error, but we don't have any printable chunks + * then the files must match. + */ + if (!diff_result_contains_printable_chunks(result)) { + rc = D_SAME; + goto done; + } + + atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags); + rflags = (result->left->root->diff_flags | result->right->root->diff_flags); + force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA); + have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA); + + if (have_binary && !force_text) { + rc = D_BINARY; + status |= 1; + goto done; + } + + if (color) + diff_output_set_colors(color, del_code, add_code); + if (diff_format == D_NORMAL) { + rc = diff_output_plain(NULL, stdout, &info, result, false); + } else if (diff_format == D_EDIT) { + rc = diff_output_edscript(NULL, stdout, &info, result); + } else { + rc = diff_output_unidiff(NULL, stdout, &info, result, + context_lines); + } + if (rc != DIFF_RC_OK) { + rc = D_ERROR; + status |= 2; + } else { + rc = D_DIFFER; + status |= 1; + } +done: + diff_result_free(result); + diff_data_free(&left); + diff_data_free(&right); +#ifndef DIFF_NO_MMAP + if (str1) + munmap(str1, st1.st_size); + if (str2) + munmap(str2, st2.st_size); +#endif + fclose(f1); + fclose(f2); + + return rc; +} + +FILE * +openfile(const char *path, char **p, struct stat *st) +{ + FILE *f = NULL; + + if (strcmp(path, "-") == 0) + f = stdin; + else + f = fopen(path, "r"); + + if (f == NULL) + err(2, "%s", path); + + if (fstat(fileno(f), st) == -1) + err(2, "%s", path); + +#ifndef DIFF_NO_MMAP + *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0); + if (*p == MAP_FAILED) +#endif + *p = NULL; /* fall back on file I/O */ + + return f; +} + +bool +can_libdiff(int flags) +{ + /* libdiff's atomizer can only deal with files */ + if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode)) + return false; + + /* Is this one of the supported input/output modes for diffreg_new? */ + if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) && + ignore_pats == NULL && ( + diff_format == D_NORMAL || +#if 0 + diff_format == D_EDIT || +#endif + diff_format == D_UNIFIED) && + (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) { + return true; + } + + /* Fallback to using stone. */ + return false; +} diff --git a/usr.bin/diff/pr.c b/usr.bin/diff/pr.c new file mode 100644 index 000000000000..c3ea280073af --- /dev/null +++ b/usr.bin/diff/pr.c @@ -0,0 +1,122 @@ +/*- + * Copyright (c) 2017 Baptiste Daroussin <bapt@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/procdesc.h> +#include <sys/wait.h> + +#include <err.h> +#include <paths.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "pr.h" +#include "diff.h" +#include "xmalloc.h" + +#define _PATH_PR "/usr/bin/pr" + +struct pr * +start_pr(char *file1, char *file2) +{ + int pfd[2]; + int pr_pd; + pid_t pid; + char *header; + struct pr *pr; + + pr = xcalloc(1, sizeof(*pr)); + + xasprintf(&header, "%s %s %s", diffargs, file1, file2); + signal(SIGPIPE, SIG_IGN); + fflush(stdout); + rewind(stdout); + if (pipe(pfd) == -1) + err(2, "pipe"); + switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { + case -1: + status |= 2; + free(header); + err(2, "No more processes"); + case 0: + /* child */ + if (pfd[0] != STDIN_FILENO) { + dup2(pfd[0], STDIN_FILENO); + close(pfd[0]); + } + close(pfd[1]); + execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); + _exit(127); + default: + + /* parent */ + if (pfd[1] != STDOUT_FILENO) { + pr->ostdout = dup(STDOUT_FILENO); + dup2(pfd[1], STDOUT_FILENO); + close(pfd[1]); + } + close(pfd[0]); + rewind(stdout); + free(header); + pr->kq = kqueue(); + if (pr->kq == -1) + err(2, "kqueue"); + pr->e = xmalloc(sizeof(struct kevent)); + EV_SET(pr->e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, + NULL); + if (kevent(pr->kq, pr->e, 1, NULL, 0, NULL) == -1) + err(2, "kevent"); + } + return (pr); +} + +/* close the pipe to pr and restore stdout */ +void +stop_pr(struct pr *pr) +{ + int wstatus; + + if (pr == NULL) + return; + + fflush(stdout); + if (pr->ostdout != STDOUT_FILENO) { + close(STDOUT_FILENO); + dup2(pr->ostdout, STDOUT_FILENO); + close(pr->ostdout); + } + if (kevent(pr->kq, NULL, 0, pr->e, 1, NULL) == -1) + err(2, "kevent"); + wstatus = pr->e[0].data; + close(pr->kq); + free(pr); + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) + errx(2, "pr exited abnormally"); + else if (WIFSIGNALED(wstatus)) + errx(2, "pr killed by signal %d", + WTERMSIG(wstatus)); +} diff --git a/usr.bin/diff/pr.h b/usr.bin/diff/pr.h new file mode 100644 index 000000000000..2ff5949f282f --- /dev/null +++ b/usr.bin/diff/pr.h @@ -0,0 +1,36 @@ +/*- + * Copyright (c) 2017 Baptiste Daroussin <bapt@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/event.h> + +struct pr { + int ostdout; + int kq; + struct kevent *e; +}; + +struct pr *start_pr(char *file1, char *file2); +void stop_pr(struct pr *); diff --git a/usr.bin/diff/tests/Bflag_C.out b/usr.bin/diff/tests/Bflag_C.out new file mode 100644 index 000000000000..c7f130aff396 --- /dev/null +++ b/usr.bin/diff/tests/Bflag_C.out @@ -0,0 +1,2 @@ +1a2 +> diff --git a/usr.bin/diff/tests/Bflag_D.out b/usr.bin/diff/tests/Bflag_D.out new file mode 100644 index 000000000000..4c1170334935 --- /dev/null +++ b/usr.bin/diff/tests/Bflag_D.out @@ -0,0 +1,2 @@ +1a2 +> C diff --git a/usr.bin/diff/tests/Bflag_F.out b/usr.bin/diff/tests/Bflag_F.out new file mode 100644 index 000000000000..9dc5e5694048 --- /dev/null +++ b/usr.bin/diff/tests/Bflag_F.out @@ -0,0 +1,4 @@ +7c8 +< G +--- +> X diff --git a/usr.bin/diff/tests/Makefile b/usr.bin/diff/tests/Makefile new file mode 100644 index 000000000000..242e76260249 --- /dev/null +++ b/usr.bin/diff/tests/Makefile @@ -0,0 +1,50 @@ +PACKAGE= tests + +ATF_TESTS_SH= diff_test + +${PACKAGE}FILES+= \ + Bflag_C.out \ + Bflag_D.out \ + Bflag_F.out \ + input1.in \ + input2.in \ + input_c1.in \ + input_c2.in \ + side_by_side_tabbed_a.in \ + side_by_side_tabbed_b.in \ + simple.out \ + simple_e.out \ + simple_n.out \ + simple_u.out \ + simple_i.out \ + simple_w.out \ + simple_b.out \ + simple_p.out \ + unified_p.out \ + unified_c9999.out \ + unified_9999.out \ + header.out \ + header_ns.out \ + ifdef.out \ + group-format.out \ + strip_o.out \ + functionname.in \ + functionname_c.in \ + functionname_c.out \ + functionname_objcclassm.in \ + functionname_objcclassm.out \ + functionname_objcm.in \ + functionname_objcm.out + +NETBSD_ATF_TESTS_SH+= netbsd_diff_test + +${PACKAGE}FILES+= \ + d_mallocv1.in \ + d_mallocv2.in + +ATF_TESTS_SH_SED_netbsd_diff_test+= -e 's/t_diff/`basename $$0`/g' +ATF_TESTS_SH_SRC_netbsd_diff_test= t_diff.sh + +.include <netbsd-tests.test.mk> + +.include <bsd.test.mk> diff --git a/usr.bin/diff/tests/Makefile.depend b/usr.bin/diff/tests/Makefile.depend new file mode 100644 index 000000000000..11aba52f82cf --- /dev/null +++ b/usr.bin/diff/tests/Makefile.depend @@ -0,0 +1,10 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/usr.bin/diff/tests/diff_test.sh b/usr.bin/diff/tests/diff_test.sh new file mode 100755 index 000000000000..691b649813a1 --- /dev/null +++ b/usr.bin/diff/tests/diff_test.sh @@ -0,0 +1,410 @@ + +atf_test_case simple +atf_test_case unified +atf_test_case header +atf_test_case header_ns +atf_test_case ifdef +atf_test_case group_format +atf_test_case side_by_side +atf_test_case side_by_side_tabbed +atf_test_case brief_format +atf_test_case b230049 +atf_test_case stripcr_o +atf_test_case b252515 +atf_test_case b278988 +atf_test_case Bflag +atf_test_case Nflag +atf_test_case tabsize +atf_test_case conflicting_format +atf_test_case label +atf_test_case report_identical +atf_test_case non_regular_file +atf_test_case binary +atf_test_case functionname +atf_test_case noderef +atf_test_case ignorecase +atf_test_case dirloop + +simple_body() +{ + atf_check -o file:$(atf_get_srcdir)/simple.out -s eq:1 \ + diff "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_e.out -s eq:1 \ + diff -e "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_u.out -s eq:1 \ + diff -u -L input1 -L input2 "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_n.out -s eq:1 \ + diff -n "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check -o inline:"Files $(atf_get_srcdir)/input1.in and $(atf_get_srcdir)/input2.in differ\n" -s eq:1 \ + diff -q "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check \ + diff -q "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input1.in" + + atf_check -o file:$(atf_get_srcdir)/simple_i.out -s eq:1 \ + diff -i "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_w.out -s eq:1 \ + diff -w "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_b.out -s eq:1 \ + diff -b "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_p.out -s eq:1 \ + diff --label input_c1.in --label input_c2.in -p "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" +} + +unified_body() +{ + atf_check -o file:$(atf_get_srcdir)/unified_p.out -s eq:1 \ + diff -up -L input_c1.in -L input_c2.in "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + atf_check -o file:$(atf_get_srcdir)/unified_9999.out -s eq:1 \ + diff -u9999 -L input_c1.in -L input_c2.in "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" +} + +b230049_body() +{ + printf 'a\nb\r\nc\n' > b230049_a.in + printf 'a\r\nb\r\nc\r\n' > b230049_b.in + atf_check -o empty -s eq:0 \ + diff -up --strip-trailing-cr -L b230049_a.in -L b230049_b.in \ + b230049_a.in b230049_b.in +} + +stripcr_o_body() +{ + printf 'a\nX\nc\n' > stripcr_o_X.in + printf 'a\r\nY\r\nc\r\n' > stripcr_o_Y.in + atf_check -o "file:$(atf_get_srcdir)/strip_o.out" -s eq:1 \ + diff -L1 -L2 -u --strip-trailing-cr stripcr_o_X.in stripcr_o_Y.in +} + +b252515_body() +{ + printf 'a b\n' > b252515_a.in + printf 'a b\n' > b252515_b.in + atf_check -o empty -s eq:0 \ + diff -qw b252515_a.in b252515_b.in +} + +b278988_body() +{ + printf 'a\nb\nn' > b278988.a.in + printf 'a\n\nb\nn' > b278988.b.in + atf_check -o empty -s eq:0 \ + diff -Bw b278988.a.in b278988.b.in +} + +header_body() +{ + export TZ=UTC + : > empty + echo hello > hello + touch -d 2015-04-03T01:02:03 empty + touch -d 2016-12-22T11:22:33 hello + atf_check -o "file:$(atf_get_srcdir)/header.out" -s eq:1 \ + diff -u empty hello +} + +header_ns_body() +{ + export TZ=UTC + : > empty + echo hello > hello + touch -d 2015-04-03T01:02:03.123456789 empty + touch -d 2016-12-22T11:22:33.987654321 hello + atf_check -o "file:$(atf_get_srcdir)/header_ns.out" -s eq:1 \ + diff -u empty hello +} + +ifdef_body() +{ + atf_check -o file:$(atf_get_srcdir)/ifdef.out -s eq:1 \ + diff -D PLOP "$(atf_get_srcdir)/input_c1.in" \ + "$(atf_get_srcdir)/input_c2.in" +} + +group_format_body() +{ + atf_check -o file:$(atf_get_srcdir)/group-format.out -s eq:1 \ + diff --changed-group-format='<<<<<<< (local) +%<======= +%>>>>>>>> (stock) +' "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" +} + +side_by_side_body() +{ + atf_check -o save:A printf "A\nB\nC\n" + atf_check -o save:B printf "D\nB\nE\n" + + exp_output=$(printf "A[[:space:]]+|[[:space:]]+D\nB[[:space:]]+B\nC[[:space:]]+|[[:space:]]+E") + exp_output_suppressed=$(printf "A[[:space:]]+|[[:space:]]+D\nC[[:space:]]+|[[:space:]]+E") + + atf_check -o match:"$exp_output" -s exit:1 \ + diff --side-by-side A B + atf_check -o match:"$exp_output" -s exit:1 \ + diff -y A B + atf_check -o match:"$exp_output_suppressed" -s exit:1 \ + diff -y --suppress-common-lines A B + atf_check -o match:"$exp_output_suppressed" -s exit:1 \ + diff -W 65 -y --suppress-common-lines A B +} + +side_by_side_tabbed_body() +{ + file_a=$(atf_get_srcdir)/side_by_side_tabbed_a.in + file_b=$(atf_get_srcdir)/side_by_side_tabbed_b.in + + atf_check -o save:diffout -s not-exit:0 \ + diff -y ${file_a} ${file_b} + atf_check -o save:diffout_expanded -s not-exit:0 \ + diff -yt ${file_a} ${file_b} + + atf_check -o not-empty grep -Ee 'file A.+file B' diffout + atf_check -o not-empty grep -Ee 'file A.+file B' diffout_expanded + + atf_check -o not-empty grep -Ee 'tabs.+tabs' diffout + atf_check -o not-empty grep -Ee 'tabs.+tabs' diffout_expanded +} + +brief_format_body() +{ + atf_check mkdir A B + + atf_check -x "echo 1 > A/test-file" + atf_check -x "echo 2 > B/test-file" + + atf_check cp -Rf A C + atf_check cp -Rf A D + + atf_check -x "echo 3 > D/another-test-file" + + atf_check \ + -s exit:1 \ + -o inline:"Files A/test-file and B/test-file differ\n" \ + diff -rq A B + + atf_check diff -rq A C + + atf_check \ + -s exit:1 \ + -o inline:"Only in D: another-test-file\n" \ + diff -rq A D + + atf_check \ + -s exit:1 \ + -o inline:"Files A/another-test-file and D/another-test-file differ\n" \ + diff -Nrq A D +} + +Bflag_body() +{ + atf_check -x 'printf "A\nB\n" > A' + atf_check -x 'printf "A\n\nB\n" > B' + atf_check -x 'printf "A\n \nB\n" > C' + atf_check -x 'printf "A\nC\nB\n" > D' + atf_check -x 'printf "A\nB\nC\nD\nE\nF\nG\nH" > E' + atf_check -x 'printf "A\n\nB\nC\nD\nE\nF\nX\nH" > F' + + atf_check -s exit:0 -o inline:"" diff -B A B + atf_check -s exit:1 -o file:"$(atf_get_srcdir)/Bflag_C.out" diff -B A C + atf_check -s exit:1 -o file:"$(atf_get_srcdir)/Bflag_D.out" diff -B A D + atf_check -s exit:1 -o file:"$(atf_get_srcdir)/Bflag_F.out" diff -B E F +} + +Nflag_body() +{ + atf_check -x 'printf "foo" > A' + + atf_check -s exit:1 -o ignore -e ignore diff -N A NOFILE + atf_check -s exit:1 -o ignore -e ignore diff -N NOFILE A + atf_check -s exit:2 -o ignore -e ignore diff -N NOFILE1 NOFILE2 +} + +tabsize_body() +{ + printf "\tA\n" > A + printf "\tB\n" > B + + atf_check -s exit:1 \ + -o inline:"1c1\n< A\n---\n> B\n" \ + diff -t --tabsize 1 A B +} + +conflicting_format_body() +{ + printf "\tA\n" > A + printf "\tB\n" > B + + atf_check -s exit:2 -e ignore diff -c -u A B + atf_check -s exit:2 -e ignore diff -e -f A B + atf_check -s exit:2 -e ignore diff -y -q A B + atf_check -s exit:2 -e ignore diff -q -u A B + atf_check -s exit:2 -e ignore diff -q -c A B + atf_check -s exit:2 -e ignore diff --normal -c A B + atf_check -s exit:2 -e ignore diff -c --normal A B + + atf_check -s exit:1 -o ignore -e ignore diff -u -u A B + atf_check -s exit:1 -o ignore -e ignore diff -e -e A B + atf_check -s exit:1 -o ignore -e ignore diff -y -y A B + atf_check -s exit:1 -o ignore -e ignore diff -q -q A B + atf_check -s exit:1 -o ignore -e ignore diff -c -c A B + atf_check -s exit:1 -o ignore -e ignore diff --normal --normal A B +} + +label_body() +{ + printf "\tA\n" > A + + atf_check -o inline:"Files hello and world are identical\n" \ + -s exit:0 diff --label hello --label world -s A A + + atf_check -o inline:"Binary files hello and world differ\n" \ + -s exit:1 diff --label hello --label world `which diff` `which ls` +} + +report_identical_head() +{ + atf_set "require.user" unprivileged +} +report_identical_body() +{ + printf "\tA\n" > A + printf "\tB\n" > B + atf_check -s exit:0 -o match:"are identical" \ + diff -s A A + atf_check -s exit:1 -o not-match:"are identical" \ + diff -s A B + chmod -r B + atf_check -s exit:2 -e inline:"diff: B: Permission denied\n" \ + -o empty diff -s A B +} + +non_regular_file_body() +{ + printf "\tA\n" > A + mkfifo B + printf "\tA\n" > B & + + atf_check diff A B + printf "\tB\n" > B & + atf_check -s exit:1 \ + -o inline:"--- A\n+++ B\n@@ -1 +1 @@\n-\tA\n+\tB\n" \ + diff --label A --label B -u A B +} + +binary_body() +{ + # the NUL byte has to be after at least BUFSIZ bytes to trick asciifile() + yes 012345678901234567890123456789012345678901234567890 | head -n 174 > A + cp A B + printf '\n\0\n' >> A + printf '\nx\n' >> B + + atf_check -o inline:"Binary files A and B differ\n" -s exit:1 diff A B + atf_check -o inline:"176c\nx\n.\n" -s exit:1 diff -ae A B +} + +functionname_body() +{ + atf_check -o file:$(atf_get_srcdir)/functionname_c.out -s exit:1 \ + diff -u -p -L functionname.in -L functionname_c.in \ + "$(atf_get_srcdir)/functionname.in" "$(atf_get_srcdir)/functionname_c.in" + + atf_check -o file:$(atf_get_srcdir)/functionname_objcm.out -s exit:1 \ + diff -u -p -L functionname.in -L functionname_objcm.in \ + "$(atf_get_srcdir)/functionname.in" "$(atf_get_srcdir)/functionname_objcm.in" + + atf_check -o file:$(atf_get_srcdir)/functionname_objcclassm.out -s exit:1 \ + diff -u -p -L functionname.in -L functionname_objcclassm.in \ + "$(atf_get_srcdir)/functionname.in" "$(atf_get_srcdir)/functionname_objcclassm.in" +} + +noderef_body() +{ + atf_check mkdir A B + + atf_check -x "echo 1 > A/test-file" + atf_check -x "echo 1 > test-file" + atf_check -x "echo 1 > test-file2" + + atf_check ln -s $(pwd)/test-file B/test-file + + atf_check -o empty -s exit:0 diff -r A B + atf_check -o inline:"File A/test-file is a file while file B/test-file is a symbolic link\n" \ + -s exit:1 diff -r --no-dereference A B + + # both test files are now the same symbolic link + atf_check rm A/test-file + + atf_check ln -s $(pwd)/test-file A/test-file + atf_check -o empty -s exit:0 diff -r A B + atf_check -o empty -s exit:0 diff -r --no-dereference A B + + # make test files different symbolic links, but same contents + atf_check unlink A/test-file + atf_check ln -s $(pwd)/test-file2 A/test-file + + atf_check -o empty -s exit:0 diff -r A B + atf_check -o inline:"Symbolic links A/test-file and B/test-file differ\n" -s exit:1 diff -r --no-dereference A B +} + +ignorecase_body() +{ + atf_check mkdir A + atf_check mkdir B + + atf_check -x "echo hello > A/foo" + atf_check -x "echo hello > B/FOO" + + atf_check -o empty -s exit:0 diff -u -r --ignore-file-name-case A B +} + +dirloop_head() +{ + atf_set "timeout" "10" +} +dirloop_body() +{ + atf_check mkdir -p a/foo/bar + atf_check ln -s .. a/foo/bar/up + atf_check cp -a a b + atf_check \ + -e match:"a/foo/bar/up: Directory loop detected" \ + -e match:"b/foo/bar/up: Directory loop detected" \ + diff -r a b +} + +atf_init_test_cases() +{ + atf_add_test_case simple + atf_add_test_case unified + atf_add_test_case header + atf_add_test_case header_ns + atf_add_test_case ifdef + atf_add_test_case group_format + atf_add_test_case side_by_side + atf_add_test_case side_by_side_tabbed + atf_add_test_case brief_format + atf_add_test_case b230049 + atf_add_test_case stripcr_o + atf_add_test_case b252515 + atf_add_test_case b278988 + atf_add_test_case Bflag + atf_add_test_case Nflag + atf_add_test_case tabsize + atf_add_test_case conflicting_format + atf_add_test_case label + atf_add_test_case report_identical + atf_add_test_case non_regular_file + atf_add_test_case binary + atf_add_test_case functionname + atf_add_test_case noderef + atf_add_test_case ignorecase + atf_add_test_case dirloop +} diff --git a/usr.bin/diff/tests/functionname.in b/usr.bin/diff/tests/functionname.in new file mode 100644 index 000000000000..7b4c50c86cd9 --- /dev/null +++ b/usr.bin/diff/tests/functionname.in @@ -0,0 +1,29 @@ +static void +doSomethingThenPrintHello(int test) +{ + test = test << 4; + if (test % 8 == 6) { + return; + } + + print("goodbye\n"); +} + + +- (long) readOffset:(FILE*)file +{ + if( version >= 11){ + long offset; + fread(&offset, sizeof(long), 1, file); + return offset; + } else { + int offset; + fread(&offset, sizeof(int), 1, file); + return offset; + } +} + ++ (BOOL) isEdible:(NSString *)mushroom +{ + return TRUE; +} diff --git a/usr.bin/diff/tests/functionname_c.in b/usr.bin/diff/tests/functionname_c.in new file mode 100644 index 000000000000..84f6846783ca --- /dev/null +++ b/usr.bin/diff/tests/functionname_c.in @@ -0,0 +1,29 @@ +static void +doSomethingThenPrintHello(int test) +{ + test = test << 4; + if (test % 8 == 6) { + return; + } + + print("hello\n"); +} + + +- (long) readOffset:(FILE*)file +{ + if( version >= 11){ + long offset; + fread(&offset, sizeof(long), 1, file); + return offset; + } else { + int offset; + fread(&offset, sizeof(int), 1, file); + return offset; + } +} + ++ (BOOL) isEdible:(NSString *)mushroom +{ + return TRUE; +} diff --git a/usr.bin/diff/tests/functionname_c.out b/usr.bin/diff/tests/functionname_c.out new file mode 100644 index 000000000000..b17ce05d04ca --- /dev/null +++ b/usr.bin/diff/tests/functionname_c.out @@ -0,0 +1,11 @@ +--- functionname.in ++++ functionname_c.in +@@ -6,7 +6,7 @@ doSomethingThenPrintHello(int test) + return; + } + +- print("goodbye\n"); ++ print("hello\n"); + } + + diff --git a/usr.bin/diff/tests/functionname_objcclassm.in b/usr.bin/diff/tests/functionname_objcclassm.in new file mode 100644 index 000000000000..37a9a76c6e6a --- /dev/null +++ b/usr.bin/diff/tests/functionname_objcclassm.in @@ -0,0 +1,31 @@ +static void +doSomethingThenPrintHello(int test) +{ + test = test << 4; + if (test % 8 == 6) { + return; + } + + print("goodbye\n"); +} + + +- (long) readOffset:(FILE*)file +{ + if( version >= 11){ + long offset; + fread(&offset, sizeof(long), 1, file); + return offset; + } else { + int offset; + fread(&offset, sizeof(int), 1, file); + return offset; + } +} + ++ (BOOL) isEdible:(NSString *)mushroom +{ + /* With a solid guide book (such as Phillips 2006) assume we can't eat + * the fungus */ + return FALSE; +} diff --git a/usr.bin/diff/tests/functionname_objcclassm.out b/usr.bin/diff/tests/functionname_objcclassm.out new file mode 100644 index 000000000000..b68b732fb7c3 --- /dev/null +++ b/usr.bin/diff/tests/functionname_objcclassm.out @@ -0,0 +1,11 @@ +--- functionname.in ++++ functionname_objcclassm.in +@@ -25,5 +25,7 @@ + (BOOL) isEdible:(NSString *)mushroom + + + (BOOL) isEdible:(NSString *)mushroom + { +- return TRUE; ++ /* With a solid guide book (such as Phillips 2006) assume we can't eat ++ * the fungus */ ++ return FALSE; + } diff --git a/usr.bin/diff/tests/functionname_objcm.in b/usr.bin/diff/tests/functionname_objcm.in new file mode 100644 index 000000000000..06c3e9b2722d --- /dev/null +++ b/usr.bin/diff/tests/functionname_objcm.in @@ -0,0 +1,29 @@ +static void +doSomethingThenPrintHello(int test) +{ + test = test << 4; + if (test % 8 == 6) { + return; + } + + print("goodbye\n"); +} + + +- (long) readOffset:(FILE*)file +{ + if( version >= 11){ + long offset; + fread(&offset, sizeof(long), 1, file); + return offset; + } else { + int offset; + fread(&offset-1, sizeof(int), 1, file); + return offset; + } +} + ++ (BOOL) isEdible:(NSString *)mushroom +{ + return TRUE; +} diff --git a/usr.bin/diff/tests/functionname_objcm.out b/usr.bin/diff/tests/functionname_objcm.out new file mode 100644 index 000000000000..cb29b0892115 --- /dev/null +++ b/usr.bin/diff/tests/functionname_objcm.out @@ -0,0 +1,11 @@ +--- functionname.in ++++ functionname_objcm.in +@@ -18,7 +18,7 @@ - (long) readOffset:(FILE*)file + return offset; + } else { + int offset; +- fread(&offset, sizeof(int), 1, file); ++ fread(&offset-1, sizeof(int), 1, file); + return offset; + } + } diff --git a/usr.bin/diff/tests/group-format.out b/usr.bin/diff/tests/group-format.out new file mode 100644 index 000000000000..4e1bf85fbba7 --- /dev/null +++ b/usr.bin/diff/tests/group-format.out @@ -0,0 +1,27 @@ +/* + * A comment + * +<<<<<<< (local) + * And another bla +======= + * And another bla +>>>>>>> (stock) + * +<<<<<<< (local) + * And yet another +======= + * and yet another +>>>>>>> (stock) + */ + +int +main(void) +{ +<<<<<<< (local) +======= + +>>>>>>> (stock) + printf("something"); + + return (0); +} diff --git a/usr.bin/diff/tests/header.out b/usr.bin/diff/tests/header.out new file mode 100644 index 000000000000..2e1665a30e6d --- /dev/null +++ b/usr.bin/diff/tests/header.out @@ -0,0 +1,4 @@ +--- empty 2015-04-03 01:02:03.000000000 +0000 ++++ hello 2016-12-22 11:22:33.000000000 +0000 +@@ -0,0 +1 @@ ++hello diff --git a/usr.bin/diff/tests/header_ns.out b/usr.bin/diff/tests/header_ns.out new file mode 100644 index 000000000000..b1316dfc12b9 --- /dev/null +++ b/usr.bin/diff/tests/header_ns.out @@ -0,0 +1,4 @@ +--- empty 2015-04-03 01:02:03.123456789 +0000 ++++ hello 2016-12-22 11:22:33.987654321 +0000 +@@ -0,0 +1 @@ ++hello diff --git a/usr.bin/diff/tests/ifdef.out b/usr.bin/diff/tests/ifdef.out new file mode 100644 index 000000000000..cc72cac08635 --- /dev/null +++ b/usr.bin/diff/tests/ifdef.out @@ -0,0 +1,26 @@ +/* + * A comment + * +#ifndef PLOP + * And another bla +#else /* PLOP */ + * And another bla +#endif /* PLOP */ + * +#ifndef PLOP + * And yet another +#else /* PLOP */ + * and yet another +#endif /* PLOP */ + */ + +int +main(void) +{ +#ifdef PLOP + +#endif /* PLOP */ + printf("something"); + + return (0); +} diff --git a/usr.bin/diff/tests/input1.in b/usr.bin/diff/tests/input1.in new file mode 100644 index 000000000000..3892e8400f86 --- /dev/null +++ b/usr.bin/diff/tests/input1.in @@ -0,0 +1,2 @@ +Simple input file designed +to be able to test diff diff --git a/usr.bin/diff/tests/input2.in b/usr.bin/diff/tests/input2.in new file mode 100644 index 000000000000..c38b487353a7 --- /dev/null +++ b/usr.bin/diff/tests/input2.in @@ -0,0 +1,3 @@ +Simple input file designed +and written +to be able to test diff utility diff --git a/usr.bin/diff/tests/input_c1.in b/usr.bin/diff/tests/input_c1.in new file mode 100644 index 000000000000..d39dfbdc511b --- /dev/null +++ b/usr.bin/diff/tests/input_c1.in @@ -0,0 +1,15 @@ +/* + * A comment + * + * And another bla + * + * And yet another + */ + +int +main(void) +{ + printf("something"); + + return (0); +} diff --git a/usr.bin/diff/tests/input_c2.in b/usr.bin/diff/tests/input_c2.in new file mode 100644 index 000000000000..933ec67dc175 --- /dev/null +++ b/usr.bin/diff/tests/input_c2.in @@ -0,0 +1,16 @@ +/* + * A comment + * + * And another bla + * + * and yet another + */ + +int +main(void) +{ + + printf("something"); + + return (0); +} diff --git a/usr.bin/diff/tests/side_by_side_tabbed_a.in b/usr.bin/diff/tests/side_by_side_tabbed_a.in new file mode 100644 index 000000000000..3198d29f9203 --- /dev/null +++ b/usr.bin/diff/tests/side_by_side_tabbed_a.in @@ -0,0 +1,4 @@ +This + is my test file A + it has tabs +Thanks diff --git a/usr.bin/diff/tests/side_by_side_tabbed_b.in b/usr.bin/diff/tests/side_by_side_tabbed_b.in new file mode 100644 index 000000000000..6698182a8c5d --- /dev/null +++ b/usr.bin/diff/tests/side_by_side_tabbed_b.in @@ -0,0 +1,4 @@ +This + is my test file B + it has tabs +Thanks diff --git a/usr.bin/diff/tests/simple.out b/usr.bin/diff/tests/simple.out new file mode 100644 index 000000000000..fcbcaa041e8c --- /dev/null +++ b/usr.bin/diff/tests/simple.out @@ -0,0 +1,5 @@ +2c2,3 +< to be able to test diff +--- +> and written +> to be able to test diff utility diff --git a/usr.bin/diff/tests/simple_b.out b/usr.bin/diff/tests/simple_b.out new file mode 100644 index 000000000000..704be9d621a8 --- /dev/null +++ b/usr.bin/diff/tests/simple_b.out @@ -0,0 +1,6 @@ +6c6 +< * And yet another +--- +> * and yet another +11a12 +> diff --git a/usr.bin/diff/tests/simple_e.out b/usr.bin/diff/tests/simple_e.out new file mode 100644 index 000000000000..0c7e2b5c752b --- /dev/null +++ b/usr.bin/diff/tests/simple_e.out @@ -0,0 +1,4 @@ +2c +and written +to be able to test diff utility +. diff --git a/usr.bin/diff/tests/simple_i.out b/usr.bin/diff/tests/simple_i.out new file mode 100644 index 000000000000..9edc1f98d72d --- /dev/null +++ b/usr.bin/diff/tests/simple_i.out @@ -0,0 +1,6 @@ +4c4 +< * And another bla +--- +> * And another bla +11a12 +> diff --git a/usr.bin/diff/tests/simple_n.out b/usr.bin/diff/tests/simple_n.out new file mode 100644 index 000000000000..33ca7090cf97 --- /dev/null +++ b/usr.bin/diff/tests/simple_n.out @@ -0,0 +1,4 @@ +d2 1 +a2 2 +and written +to be able to test diff utility diff --git a/usr.bin/diff/tests/simple_p.out b/usr.bin/diff/tests/simple_p.out new file mode 100644 index 000000000000..f5aebb0d1199 --- /dev/null +++ b/usr.bin/diff/tests/simple_p.out @@ -0,0 +1,34 @@ +*** input_c1.in +--- input_c2.in +*************** +*** 1,14 **** + /* + * A comment + * +! * And another bla + * +! * And yet another + */ + + int + main(void) + { + printf("something"); + + return (0); +--- 1,15 ---- + /* + * A comment + * +! * And another bla + * +! * and yet another + */ + + int + main(void) + { ++ + printf("something"); + + return (0); diff --git a/usr.bin/diff/tests/simple_u.out b/usr.bin/diff/tests/simple_u.out new file mode 100644 index 000000000000..f341987ebec6 --- /dev/null +++ b/usr.bin/diff/tests/simple_u.out @@ -0,0 +1,7 @@ +--- input1 ++++ input2 +@@ -1,2 +1,3 @@ + Simple input file designed +-to be able to test diff ++and written ++to be able to test diff utility diff --git a/usr.bin/diff/tests/simple_w.out b/usr.bin/diff/tests/simple_w.out new file mode 100644 index 000000000000..704be9d621a8 --- /dev/null +++ b/usr.bin/diff/tests/simple_w.out @@ -0,0 +1,6 @@ +6c6 +< * And yet another +--- +> * and yet another +11a12 +> diff --git a/usr.bin/diff/tests/strip_o.out b/usr.bin/diff/tests/strip_o.out new file mode 100644 index 000000000000..47bbfbd4e921 --- /dev/null +++ b/usr.bin/diff/tests/strip_o.out @@ -0,0 +1,7 @@ +--- 1 ++++ 2 +@@ -1,3 +1,3 @@ + a +-X ++Y + c diff --git a/usr.bin/diff/tests/unified_9999.out b/usr.bin/diff/tests/unified_9999.out new file mode 100644 index 000000000000..0f9303fbdc7c --- /dev/null +++ b/usr.bin/diff/tests/unified_9999.out @@ -0,0 +1,21 @@ +--- input_c1.in ++++ input_c2.in +@@ -1,15 +1,16 @@ + /* + * A comment + * +- * And another bla ++ * And another bla + * +- * And yet another ++ * and yet another + */ + + int + main(void) + { ++ + printf("something"); + + return (0); + } diff --git a/usr.bin/diff/tests/unified_c9999.out b/usr.bin/diff/tests/unified_c9999.out new file mode 100644 index 000000000000..87f439463e5d --- /dev/null +++ b/usr.bin/diff/tests/unified_c9999.out @@ -0,0 +1,36 @@ +*** input_c1.in +--- input_c2.in +*************** +*** 1,15 **** + /* + * A comment + * +! * And another bla + * +! * And yet another + */ + + int + main(void) + { + printf("something"); + + return (0); + } +--- 1,16 ---- + /* + * A comment + * +! * And another bla + * +! * and yet another + */ + + int + main(void) + { ++ + printf("something"); + + return (0); + } diff --git a/usr.bin/diff/tests/unified_p.out b/usr.bin/diff/tests/unified_p.out new file mode 100644 index 000000000000..938b07890fbc --- /dev/null +++ b/usr.bin/diff/tests/unified_p.out @@ -0,0 +1,20 @@ +--- input_c1.in ++++ input_c2.in +@@ -1,14 +1,15 @@ + /* + * A comment + * +- * And another bla ++ * And another bla + * +- * And yet another ++ * and yet another + */ + + int + main(void) + { ++ + printf("something"); + + return (0); diff --git a/usr.bin/diff/xmalloc.c b/usr.bin/diff/xmalloc.c new file mode 100644 index 000000000000..ce0f4545aee8 --- /dev/null +++ b/usr.bin/diff/xmalloc.c @@ -0,0 +1,85 @@ +/* $OpenBSD: xmalloc.c,v 1.10 2019/06/28 05:44:09 deraadt Exp $ */ +/* + * Author: Tatu Ylonen <ylo@cs.hut.fi> + * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland + * All rights reserved + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#include <err.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xmalloc.h" + +void * +xmalloc(size_t size) +{ + void *ptr; + + if (size == 0) + errx(2, "xmalloc: zero size"); + ptr = malloc(size); + if (ptr == NULL) + err(2, "xmalloc: allocating %zu bytes", size); + return ptr; +} + +void * +xcalloc(size_t nmemb, size_t size) +{ + void *ptr; + + ptr = calloc(nmemb, size); + if (ptr == NULL) + err(2, "xcalloc: allocating %zu * %zu bytes", nmemb, size); + return ptr; +} + +void * +xreallocarray(void *ptr, size_t nmemb, size_t size) +{ + void *new_ptr; + + new_ptr = reallocarray(ptr, nmemb, size); + if (new_ptr == NULL) + err(2, "xreallocarray: allocating %zu * %zu bytes", + nmemb, size); + return new_ptr; +} + +char * +xstrdup(const char *str) +{ + char *cp; + + if ((cp = strdup(str)) == NULL) + err(2, "xstrdup"); + return cp; +} + +int +xasprintf(char **ret, const char *fmt, ...) +{ + va_list ap; + int i; + + va_start(ap, fmt); + i = vasprintf(ret, fmt, ap); + va_end(ap); + + if (i == -1) + err(2, "xasprintf"); + + return i; +} diff --git a/usr.bin/diff/xmalloc.h b/usr.bin/diff/xmalloc.h new file mode 100644 index 000000000000..21396717fa48 --- /dev/null +++ b/usr.bin/diff/xmalloc.h @@ -0,0 +1,30 @@ +/* $OpenBSD: xmalloc.h,v 1.4 2015/11/12 16:30:30 mmcc Exp $ */ + +/* + * Author: Tatu Ylonen <ylo@cs.hut.fi> + * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland + * All rights reserved + * Created: Mon Mar 20 22:09:17 1995 ylo + * + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#ifndef XMALLOC_H +#define XMALLOC_H + +void *xmalloc(size_t); +void *xcalloc(size_t, size_t); +void *xreallocarray(void *, size_t, size_t); +char *xstrdup(const char *); +int xasprintf(char **, const char *, ...) + __attribute__((__format__ (printf, 2, 3))) + __attribute__((__nonnull__ (2))); + +#endif /* XMALLOC_H */ |