diff --git a/contrib/devtools/github-merge.py b/contrib/devtools/github-merge.py index f1b6a12fd..3fee39143 100755 --- a/contrib/devtools/github-merge.py +++ b/contrib/devtools/github-merge.py @@ -78,24 +78,53 @@ def get_symlink_files(): ret.append(f.decode('utf-8').split("\t")[1]) return ret -def tree_sha512sum(): - files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines()) +def tree_sha512sum(commit='HEAD'): + # request metadata for entire tree, recursively + files = [] + blob_by_name = {} + for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines(): + name_sep = line.index(b'\t') + metadata = line[:name_sep].split() # perms, 'blob', blobid + assert(metadata[1] == b'blob') + name = line[name_sep+1:] + files.append(name) + blob_by_name[name] = metadata[2] + + files.sort() + # open connection to git-cat-file in batch mode to request data for all blobs + # this is much faster than launching it per file + p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE) overall = hashlib.sha512() for f in files: + blob = blob_by_name[f] + # request blob + p.stdin.write(blob + b'\n') + p.stdin.flush() + # read header: blob, "blob", size + reply = p.stdout.readline().split() + assert(reply[0] == blob and reply[1] == b'blob') + size = int(reply[2]) + # hash the blob data intern = hashlib.sha512() - fi = open(f, 'rb') - while True: - piece = fi.read(65536) - if piece: + ptr = 0 + while ptr < size: + bs = min(65536, size - ptr) + piece = p.stdout.read(bs) + if len(piece) == bs: intern.update(piece) else: - break - fi.close() + raise IOError('Premature EOF reading git cat-file output') + ptr += bs dig = intern.hexdigest() + assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data + # update overall hash with file hash overall.update(dig.encode("utf-8")) overall.update(" ".encode("utf-8")) overall.update(f) overall.update("\n".encode("utf-8")) + p.stdin.close() + if p.wait(): + raise IOError('Non-zero return value executing git cat-file') return overall.hexdigest()