From f7da613142721e65628ed69f838ed0445f490c65 Mon Sep 17 00:00:00 2001 From: Kubilay Kocak Date: Sun, 31 Jan 2016 08:38:33 +0000 Subject: [NEW] sysutils/py-diffoscope: In-depth comparison of files, archives, and directories diffoscope will try to get to the bottom of what makes files or directories different. It will recursively unpack archives of many kinds and transform various binary formats into more human readable form to compare them. It can compare two tarballs, ISO images, or PDF just as easily. It can be scripted through error codes, and a report can be produced with the detected differences. The report can be text or HTML. When no type of report has been selected, diffoscope defaults to write a text report on the standard output. diffoscope is developed as part of the 'reproducible builds' Deian project. It is meant to be able to quickly understand why two builds of the same package produce different outputs. diffoscope was previously named debbindiff. WWW: https://diffoscope.org/ Thank you to bapt for providing the background context, information and the getfacl patch to get this port going. Requested by: emaste, bapt (for reproducible builds & FOSDEM talk) --- .../files/patch-diffoscope_____init____.py | 24 ++++++++ .../files/patch-diffoscope_comparators_deb.py | 36 ++++++++++++ .../patch-diffoscope_comparators_directory.py | 47 +++++++++++++++ .../files/patch-diffoscope_comparators_elf.py | 38 ++++++++++++ .../files/patch-diffoscope_comparators_utils.py | 28 +++++++++ .../files/patch-tests_comparators_test__deb.py | 67 ++++++++++++++++++++++ .../files/patch-tests_comparators_test__elf.py | 22 +++++++ .../patch-tests_data_deb__metadata__expected__diff | 26 +++++++++ ...h-tests_data_elf__lib__metadata__expected__diff | 22 +++++++ 9 files changed, 310 insertions(+) create mode 100644 sysutils/py-diffoscope/files/patch-diffoscope_____init____.py create mode 100644 sysutils/py-diffoscope/files/patch-diffoscope_comparators_deb.py create mode 100644 sysutils/py-diffoscope/files/patch-diffoscope_comparators_directory.py create mode 100644 sysutils/py-diffoscope/files/patch-diffoscope_comparators_elf.py create mode 100644 sysutils/py-diffoscope/files/patch-diffoscope_comparators_utils.py create mode 100644 sysutils/py-diffoscope/files/patch-tests_comparators_test__deb.py create mode 100644 sysutils/py-diffoscope/files/patch-tests_comparators_test__elf.py create mode 100644 sysutils/py-diffoscope/files/patch-tests_data_deb__metadata__expected__diff create mode 100644 sysutils/py-diffoscope/files/patch-tests_data_elf__lib__metadata__expected__diff (limited to 'sysutils/py-diffoscope/files') diff --git a/sysutils/py-diffoscope/files/patch-diffoscope_____init____.py b/sysutils/py-diffoscope/files/patch-diffoscope_____init____.py new file mode 100644 index 000000000000..9eb88c59aad0 --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-diffoscope_____init____.py @@ -0,0 +1,24 @@ +commit 8df464ebf0db8b04d0baae6a14504f3796e355a0 +Author: Jeremy Bobbio +Date: Sat Jan 30 13:47:47 2016 +0000 + + Also use libarchive to read metadata from ar archives + + The output is more precise and less dependent on binutils version. + + The command line `ar` tool is not used any more so remove it from the + required tools. + +--- diffoscope/__init__.py.orig 2016-01-31 06:30:01 UTC ++++ diffoscope/__init__.py +@@ -39,9 +39,7 @@ OS_NAMES = { 'arch': 'Arch Linux' + } + + class RequiredToolNotFound(Exception): +- PROVIDERS = { 'ar': { 'debian': 'binutils-multiarch', +- 'arch': 'binutils'} +- , 'bzip2': { 'debian': 'bzip2', ++ PROVIDERS = { 'bzip2': { 'debian': 'bzip2', + 'arch': 'bzip2'} + , 'cbfstool': {} + , 'cmp': { 'debian': 'diffutils', diff --git a/sysutils/py-diffoscope/files/patch-diffoscope_comparators_deb.py b/sysutils/py-diffoscope/files/patch-diffoscope_comparators_deb.py new file mode 100644 index 000000000000..d4d906bcf6e6 --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-diffoscope_comparators_deb.py @@ -0,0 +1,36 @@ +commit 8df464ebf0db8b04d0baae6a14504f3796e355a0 +Author: Jeremy Bobbio +Date: Sat Jan 30 13:47:47 2016 +0000 + + Also use libarchive to read metadata from ar archives + + The output is more precise and less dependent on binutils version. + + The command line `ar` tool is not used any more so remove it from the + required tools. + +--- diffoscope/comparators/deb.py.orig 2016-01-31 06:31:13 UTC ++++ diffoscope/comparators/deb.py +@@ -29,7 +29,7 @@ import diffoscope.comparators + from diffoscope.comparators.binary import File + from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive + from diffoscope.comparators.utils import \ +- Archive, ArchiveMember, get_ar_content ++ Archive, ArchiveMember + from diffoscope.comparators.tar import TarContainer + + +@@ -86,10 +86,9 @@ class DebFile(File): + return self._control + + def compare_details(self, other, source=None): +- my_content = get_ar_content(self.path) +- other_content = get_ar_content(other.path) +- return [Difference.from_text(my_content, other_content, self.path, other.path, source="metadata")] +- ++ return [Difference.from_text_readers(list_libarchive(self.path), ++ list_libarchive(other.path), ++ self.path, other.path, source="file list")] + + class Md5sumsFile(File): + @staticmethod diff --git a/sysutils/py-diffoscope/files/patch-diffoscope_comparators_directory.py b/sysutils/py-diffoscope/files/patch-diffoscope_comparators_directory.py new file mode 100644 index 000000000000..94a3bf29a1bf --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-diffoscope_comparators_directory.py @@ -0,0 +1,47 @@ +--- diffoscope/comparators/directory.py.orig 2016-01-30 11:54:52 UTC ++++ diffoscope/comparators/directory.py +@@ -72,8 +72,11 @@ def lsattr(path): + class Getfacl(Command): + @tool_required('getfacl') + def cmdline(self): +- return ['getfacl', '-p', '-c', self.path] +- ++ osname = os.uname()[0] ++ if osname == 'Linux': ++ return ['getfacl', '-p', '-c', self.path] ++ else: # FreeBSD ++ return ['getfacl', '-q', '-h', self.path] + + def compare_meta(path1, path2): + logger.debug('compare_meta(%s, %s)', path1, path2) +@@ -84,17 +87,19 @@ def compare_meta(path1, path2): + logger.warn("'stat' not found! Is PATH wrong?") + if os.path.islink(path1) or os.path.islink(path2): + return [d for d in differences if d is not None] +- try: +- lsattr1 = lsattr(path1) +- lsattr2 = lsattr(path2) +- differences.append(Difference.from_text( +- lsattr1, lsattr2, path1, path2, source="lattr")) +- except RequiredToolNotFound: +- logger.info("Unable to find 'lsattr'.") +- try: +- differences.append(Difference.from_command(Getfacl, path1, path2)) +- except RequiredToolNotFound: +- logger.info("Unable to find 'getfacl'.") ++ osname = os.uname()[0] ++ if osname == "Linux" or osname == "FreeBSD": ++ try: ++ lsattr1 = lsattr(path1) ++ lsattr2 = lsattr(path2) ++ differences.append(Difference.from_text( ++ lsattr1, lsattr2, path1, path2, source="lattr")) ++ except RequiredToolNotFound: ++ logger.info("Unable to find 'lsattr'.") ++ try: ++ differences.append(Difference.from_command(Getfacl, path1, path2)) ++ except RequiredToolNotFound: ++ logger.info("Unable to find 'getfacl'.") + return [d for d in differences if d is not None] + + diff --git a/sysutils/py-diffoscope/files/patch-diffoscope_comparators_elf.py b/sysutils/py-diffoscope/files/patch-diffoscope_comparators_elf.py new file mode 100644 index 000000000000..50ade789872b --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-diffoscope_comparators_elf.py @@ -0,0 +1,38 @@ +commit 8df464ebf0db8b04d0baae6a14504f3796e355a0 +Author: Jeremy Bobbio +Date: Sat Jan 30 13:47:47 2016 +0000 + + Also use libarchive to read metadata from ar archives + + The output is more precise and less dependent on binutils version. + + The command line `ar` tool is not used any more so remove it from the + required tools. + +--- diffoscope/comparators/elf.py.orig 2016-01-31 06:32:02 UTC ++++ diffoscope/comparators/elf.py +@@ -24,8 +24,9 @@ import subprocess + from diffoscope import tool_required, OutputParsingError + from diffoscope import logger + from diffoscope.comparators.binary import File ++from diffoscope.comparators.libarchive import list_libarchive + from diffoscope.comparators.deb import DebFile, get_build_id_map +-from diffoscope.comparators.utils import get_ar_content, Command, Container ++from diffoscope.comparators.utils import Command, Container + from diffoscope.difference import Difference + + +@@ -415,10 +416,8 @@ class StaticLibFile(File): + + def compare_details(self, other, source=None): + differences = [] +- # look up differences in metadata +- content1 = get_ar_content(self.path) +- content2 = get_ar_content(other.path) +- differences.append(Difference.from_text( +- content1, content2, self.path, other.path, source="metadata")) ++ differences.append(Difference.from_text_readers(list_libarchive(self.path), ++ list_libarchive(other.path), ++ self.path, other.path, source="file list")) + differences.extend(_compare_elf_data(self.path, other.path)) + return differences diff --git a/sysutils/py-diffoscope/files/patch-diffoscope_comparators_utils.py b/sysutils/py-diffoscope/files/patch-diffoscope_comparators_utils.py new file mode 100644 index 000000000000..e73b451a2126 --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-diffoscope_comparators_utils.py @@ -0,0 +1,28 @@ +commit 8df464ebf0db8b04d0baae6a14504f3796e355a0 +Author: Jeremy Bobbio +Date: Sat Jan 30 13:47:47 2016 +0000 + + Also use libarchive to read metadata from ar archives + + The output is more precise and less dependent on binutils version. + + The command line `ar` tool is not used any more so remove it from the + required tools. + +--- diffoscope/comparators/utils.py.orig 2016-01-31 06:33:12 UTC ++++ diffoscope/comparators/utils.py +@@ -36,14 +36,6 @@ from diffoscope.difference import Differ + from diffoscope import logger, tool_required, get_temporary_directory + + +-@tool_required('ar') +-def get_ar_content(path): +- if path == '/dev/null': +- return '' +- return subprocess.check_output( +- ['ar', 'tv', path], stderr=subprocess.STDOUT, shell=False).decode('utf-8') +- +- + class Command(object, metaclass=ABCMeta): + def __init__(self, path): + self._path = path diff --git a/sysutils/py-diffoscope/files/patch-tests_comparators_test__deb.py b/sysutils/py-diffoscope/files/patch-tests_comparators_test__deb.py new file mode 100644 index 000000000000..ea75602352d5 --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-tests_comparators_test__deb.py @@ -0,0 +1,67 @@ +commit 8df464ebf0db8b04d0baae6a14504f3796e355a0 +Author: Jeremy Bobbio +Date: Sat Jan 30 13:47:47 2016 +0000 + + Also use libarchive to read metadata from ar archives + + The output is more precise and less dependent on binutils version. + + The command line `ar` tool is not used any more so remove it from the + required tools. + +--- tests/comparators/test_deb.py.orig 2016-01-31 06:34:26 UTC ++++ tests/comparators/test_deb.py +@@ -48,12 +48,10 @@ def test_no_differences(deb1): + def differences(deb1, deb2): + return deb1.compare(deb2).details + +-@pytest.mark.skipif(tool_missing('ar'), reason='missing ar') + def test_metadata(differences): + expected_diff = open(os.path.join(os.path.dirname(__file__), '../data/deb_metadata_expected_diff')).read() + assert differences[0].unified_diff == expected_diff + +-@pytest.mark.skipif(tool_missing('ar'), reason='missing ar') + def test_compressed_files(differences): + assert differences[1].source1 == 'control.tar.gz' + assert differences[2].source1 == 'data.tar.gz' +@@ -64,7 +62,6 @@ def test_identification_of_md5sums_outsi + f = specialize(FilesystemFile(path)) + assert type(f) is FilesystemFile + +-@pytest.mark.skipif(tool_missing('ar'), reason='missing ar') + def test_identification_of_md5sums_in_deb(deb1, deb2, monkeypatch): + orig_func = Md5sumsFile.recognizes + @staticmethod +@@ -78,16 +75,13 @@ def test_identification_of_md5sums_in_de + deb1.compare(deb2) + assert test_identification_of_md5sums_in_deb.found + +-@pytest.mark.skipif(tool_missing('ar'), reason='missing ar') + def test_md5sums(differences): + assert differences[1].details[0].details[1].comment == 'Files in package differs' + +-@pytest.mark.skipif(tool_missing('ar'), reason='missing ar') + def test_identical_files_in_md5sums(deb1, deb2): + for name in ['./usr/share/doc/test/README.Debian', './usr/share/doc/test/copyright']: + assert deb1.md5sums[name] == deb2.md5sums[name] + +-@pytest.mark.skipif(tool_missing('ar'), reason='missing ar') + def test_identification_of_data_tar(deb1, deb2, monkeypatch): + orig_func = DebDataTarFile.recognizes + @staticmethod +@@ -101,7 +95,6 @@ def test_identification_of_data_tar(deb1 + deb1.compare(deb2) + assert test_identification_of_data_tar.found + +-@pytest.mark.skipif(tool_missing('ar'), reason='missing ar') + def test_skip_comparison_of_known_identical_files(deb1, deb2, monkeypatch): + compared = set() + orig_func = diffoscope.comparators.compare_files +@@ -112,7 +105,6 @@ def test_skip_comparison_of_known_identi + deb1.compare(deb2) + assert './usr/share/doc/test/README.Debian' not in compared + +-@pytest.mark.skipif(tool_missing('ar'), reason='missing ar') + def test_compare_non_existing(monkeypatch, deb1): + monkeypatch.setattr(Config.general, 'new_file', True) + difference = deb1.compare(NonExistingFile('/nonexisting', deb1)) diff --git a/sysutils/py-diffoscope/files/patch-tests_comparators_test__elf.py b/sysutils/py-diffoscope/files/patch-tests_comparators_test__elf.py new file mode 100644 index 000000000000..b1656d2058cc --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-tests_comparators_test__elf.py @@ -0,0 +1,22 @@ +commit 8df464ebf0db8b04d0baae6a14504f3796e355a0 +Author: Jeremy Bobbio +Date: Sat Jan 30 13:47:47 2016 +0000 + + Also use libarchive to read metadata from ar archives + + The output is more precise and less dependent on binutils version. + + The command line `ar` tool is not used any more so remove it from the + required tools. + +--- tests/comparators/test_elf.py.orig 2016-01-31 06:35:23 UTC ++++ tests/comparators/test_elf.py +@@ -92,7 +92,7 @@ def lib_differences(lib1, lib2): + @pytest.mark.skipif(tool_missing('readelf') or tool_missing('objdump'), reason='missing readelf or objdump') + def test_lib_differences(lib_differences): + assert len(lib_differences) == 2 +- assert lib_differences[0].source1 == 'metadata' ++ assert lib_differences[0].source1 == 'file list' + expected_metadata_diff = open(os.path.join(os.path.dirname(__file__), '../data/elf_lib_metadata_expected_diff')).read() + assert lib_differences[0].unified_diff == expected_metadata_diff + assert 'objdump' in lib_differences[1].source1 diff --git a/sysutils/py-diffoscope/files/patch-tests_data_deb__metadata__expected__diff b/sysutils/py-diffoscope/files/patch-tests_data_deb__metadata__expected__diff new file mode 100644 index 000000000000..242fd1c1aa14 --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-tests_data_deb__metadata__expected__diff @@ -0,0 +1,26 @@ +commit 8df464ebf0db8b04d0baae6a14504f3796e355a0 +Author: Jeremy Bobbio +Date: Sat Jan 30 13:47:47 2016 +0000 + + Also use libarchive to read metadata from ar archives + + The output is more precise and less dependent on binutils version. + + The command line `ar` tool is not used any more so remove it from the + required tools. + +--- tests/data/deb_metadata_expected_diff.orig 2016-01-31 06:42:09 UTC ++++ tests/data/deb_metadata_expected_diff +@@ -1,6 +1,7 @@ + @@ -1,3 +1,3 @@ +- rw-r--r-- 0/0 4 Jun 24 17:40 2015 debian-binary +--rw-r--r-- 0/0 444 Jun 24 17:40 2015 control.tar.gz +--rw-r--r-- 0/0 1626 Jun 24 17:40 2015 data.tar.gz +-+rw-r--r-- 0/0 442 Jun 24 17:40 2015 control.tar.gz +-+rw-r--r-- 0/0 1754 Jun 24 17:40 2015 data.tar.gz ++--rw-r--r-- 0 0 0 4 2015-06-24 17:40:03.000000 debian-binary ++--rw-r--r-- 0 0 0 444 2015-06-24 17:40:03.000000 control.tar.gz ++--rw-r--r-- 0 0 0 1626 2015-06-24 17:40:03.000000 data.tar.gz +++-rw-r--r-- 0 0 0 4 2015-06-24 17:40:26.000000 debian-binary +++-rw-r--r-- 0 0 0 442 2015-06-24 17:40:26.000000 control.tar.gz +++-rw-r--r-- 0 0 0 1754 2015-06-24 17:40:26.000000 data.tar.gz diff --git a/sysutils/py-diffoscope/files/patch-tests_data_elf__lib__metadata__expected__diff b/sysutils/py-diffoscope/files/patch-tests_data_elf__lib__metadata__expected__diff new file mode 100644 index 000000000000..8015cb965a12 --- /dev/null +++ b/sysutils/py-diffoscope/files/patch-tests_data_elf__lib__metadata__expected__diff @@ -0,0 +1,22 @@ +commit 8df464ebf0db8b04d0baae6a14504f3796e355a0 +Author: Jeremy Bobbio +Date: Sat Jan 30 13:47:47 2016 +0000 + + Also use libarchive to read metadata from ar archives + + The output is more precise and less dependent on binutils version. + + The command line `ar` tool is not used any more so remove it from the + required tools. + +--- tests/data/elf_lib_metadata_expected_diff.orig 2016-01-31 06:43:41 UTC ++++ tests/data/elf_lib_metadata_expected_diff +@@ -1,3 +1,5 @@ +-@@ -1 +1 @@ +--rw-r--r-- 1000/1000 1216 Jun 24 12:13 2015 test.o +-+rw-r--r-- 1000/1000 1216 Jun 24 12:14 2015 test.o ++@@ -1,2 +1,2 @@ ++----------- 0 0 0 10 2015-06-24 12:14:19.000000 / ++--rw-r--r-- 0 1000 1000 1216 2015-06-24 12:13:47.000000 test.o +++---------- 0 0 0 10 2015-06-24 12:14:29.000000 / +++-rw-r--r-- 0 1000 1000 1216 2015-06-24 12:14:27.000000 test.o -- cgit v1.2.3