summaryrefslogtreecommitdiff
path: root/utils/docker/scripts/llvm_checksum/llvm_checksum.py
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /utils/docker/scripts/llvm_checksum/llvm_checksum.py
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'utils/docker/scripts/llvm_checksum/llvm_checksum.py')
-rwxr-xr-xutils/docker/scripts/llvm_checksum/llvm_checksum.py198
1 files changed, 198 insertions, 0 deletions
diff --git a/utils/docker/scripts/llvm_checksum/llvm_checksum.py b/utils/docker/scripts/llvm_checksum/llvm_checksum.py
new file mode 100755
index 0000000000000..584efa2598bfd
--- /dev/null
+++ b/utils/docker/scripts/llvm_checksum/llvm_checksum.py
@@ -0,0 +1,198 @@
+#!/usr/bin/python
+""" A small program to compute checksums of LLVM checkout.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import logging
+import re
+import sys
+from argparse import ArgumentParser
+from project_tree import *
+
+SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
+
+
+def main():
+ parser = ArgumentParser()
+ parser.add_argument(
+ "-v", "--verbose", action="store_true", help="enable debug logging")
+ parser.add_argument(
+ "-c",
+ "--check",
+ metavar="reference_file",
+ help="read checksums from reference_file and " +
+ "check they match checksums of llvm_path.")
+ parser.add_argument(
+ "--partial",
+ action="store_true",
+ help="ignore projects from reference_file " +
+ "that are not checked out in llvm_path.")
+ parser.add_argument(
+ "--multi_dir",
+ action="store_true",
+ help="indicates llvm_path contains llvm, checked out " +
+ "into multiple directories, as opposed to a " +
+ "typical single source tree checkout.")
+ parser.add_argument("llvm_path")
+
+ args = parser.parse_args()
+ if args.check is not None:
+ with open(args.check, "r") as f:
+ reference_checksums = ReadLLVMChecksums(f)
+ else:
+ reference_checksums = None
+
+ if args.verbose:
+ logging.basicConfig(level=logging.DEBUG)
+
+ llvm_projects = CreateLLVMProjects(not args.multi_dir)
+ checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
+
+ if reference_checksums is None:
+ WriteLLVMChecksums(checksums, sys.stdout)
+ sys.exit(0)
+
+ if not ValidateChecksums(reference_checksums, checksums, args.partial):
+ sys.stdout.write("Checksums differ.\nNew checksums:\n")
+ WriteLLVMChecksums(checksums, sys.stdout)
+ sys.stdout.write("Reference checksums:\n")
+ WriteLLVMChecksums(reference_checksums, sys.stdout)
+ sys.exit(1)
+ else:
+ sys.stdout.write("Checksums match.")
+
+
+def ComputeLLVMChecksums(root_path, projects):
+ """Compute checksums for LLVM sources checked out using svn.
+
+ Args:
+ root_path: a directory of llvm checkout.
+ projects: a list of LLVMProject instances, which describe checkout paths,
+ relative to root_path.
+
+ Returns:
+ A dict mapping from project name to project checksum.
+ """
+ hash_algo = hashlib.sha256
+
+ def collapse_svn_substitutions(contents):
+ # Replace svn substitutions for $Date$ and $LastChangedDate$.
+ # Unfortunately, these are locale-specific.
+ return SVN_DATES_REGEX.sub("$\1$", contents)
+
+ def read_and_collapse_svn_subsitutions(file_path):
+ with open(file_path, "rb") as f:
+ contents = f.read()
+ new_contents = collapse_svn_substitutions(contents)
+ if contents != new_contents:
+ logging.debug("Replaced svn keyword substitutions in %s", file_path)
+ logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
+ return new_contents
+
+ project_checksums = dict()
+ # Hash each project.
+ for proj in projects:
+ project_root = os.path.join(root_path, proj.relpath)
+ if not os.path.exists(project_root):
+ logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath,
+ proj.name)
+ continue
+
+ files = list()
+
+ def add_file_hash(file_path):
+ if os.path.islink(file_path) and not os.path.exists(file_path):
+ content = os.readlink(file_path)
+ else:
+ content = read_and_collapse_svn_subsitutions(file_path)
+ hasher = hash_algo()
+ hasher.update(content)
+ file_digest = hasher.hexdigest()
+ logging.debug("Checksum %s for file %s", file_digest, file_path)
+ files.append((file_path, file_digest))
+
+ logging.info("Computing checksum for %s", proj.name)
+ WalkProjectFiles(root_path, projects, proj, add_file_hash)
+
+ # Compute final checksum.
+ files.sort(key=lambda x: x[0])
+ hasher = hash_algo()
+ for file_path, file_digest in files:
+ file_path = os.path.relpath(file_path, project_root)
+ hasher.update(file_path)
+ hasher.update(file_digest)
+ project_checksums[proj.name] = hasher.hexdigest()
+ return project_checksums
+
+
+def WriteLLVMChecksums(checksums, f):
+ """Writes checksums to a text file.
+
+ Args:
+ checksums: a dict mapping from project name to project checksum (result of
+ ComputeLLVMChecksums).
+ f: a file object to write into.
+ """
+
+ for proj in sorted(checksums.keys()):
+ f.write("{} {}\n".format(checksums[proj], proj))
+
+
+def ReadLLVMChecksums(f):
+ """Reads checksums from a text file, produced by WriteLLVMChecksums.
+
+ Returns:
+ A dict, mapping from project name to project checksum.
+ """
+ checksums = {}
+ while True:
+ line = f.readline()
+ if line == "":
+ break
+ checksum, proj = line.split()
+ checksums[proj] = checksum
+ return checksums
+
+
+def ValidateChecksums(reference_checksums,
+ new_checksums,
+ allow_missing_projects=False):
+ """Validates that reference_checksums and new_checksums match.
+
+ Args:
+ reference_checksums: a dict of reference checksums, mapping from a project
+ name to a project checksum.
+ new_checksums: a dict of checksums to be checked, mapping from a project
+ name to a project checksum.
+ allow_missing_projects:
+ When True, reference_checksums may contain more projects than
+ new_checksums. Projects missing from new_checksums are ignored.
+ When False, new_checksums and reference_checksums must contain checksums
+ for the same set of projects. If there is a project in
+ reference_checksums, missing from new_checksums, ValidateChecksums
+ will return False.
+
+ Returns:
+ True, if checksums match with regards to allow_missing_projects flag value.
+ False, otherwise.
+ """
+ if not allow_missing_projects:
+ if len(new_checksums) != len(reference_checksums):
+ return False
+
+ for proj, checksum in new_checksums.iteritems():
+ # We never computed a checksum for this project.
+ if proj not in reference_checksums:
+ return False
+ # Checksum did not match.
+ if reference_checksums[proj] != checksum:
+ return False
+
+ return True
+
+
+if __name__ == "__main__":
+ main()