From e3524c90de918d0b4a74d70768175561758f0d70 Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Mon, 28 Nov 2016 23:24:27 -0800
Subject: [PATCH] scripts: Make archive build reproducible

We used to use the current timestamp when building the archive; switch to using
the timestamp of the tag with the version we're packaging.

This requires some monkey patching since tarfile module is always using current
timestamp when writing gzip header...

Also exclude archive.py from archive and simplify release file list in Makefile.
---
 Makefile           |  4 ++--
 scripts/archive.py | 29 +++++++++++++++++------------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/Makefile b/Makefile
index 84290c70..13eb1cb4 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ SOURCES=src/pugixml.cpp $(filter-out tests/fuzz_%,$(wildcard tests/*.cpp))
 EXECUTABLE=$(BUILD)/test
 
 VERSION=$(shell sed -n 's/.*version \(.*\).*/\1/p' src/pugiconfig.hpp)
-RELEASE=$(shell git ls-files src docs/*.html docs/*.css docs/samples docs/images scripts contrib CMakeLists.txt readme.txt)
+RELEASE=$(filter-out scripts/archive.py docs/%.adoc,$(shell git ls-files contrib docs scripts src CMakeLists.txt readme.txt))
 
 CXXFLAGS=-g -Wall -Wextra -Werror -pedantic -Wundef -Wshadow -Wcast-align -Wcast-qual -Wold-style-cast
 LDFLAGS=
@@ -82,7 +82,7 @@ docs: docs/quickstart.html docs/manual.html
 
 build/pugixml-%: .FORCE | $(RELEASE)
 	@mkdir -p $(BUILD)
-	python scripts/archive.py $@ pugixml-$(VERSION) $|
+	TIMESTAMP=`git show v$(VERSION) -s --format=%ct` && python scripts/archive.py $@ pugixml-$(VERSION) $$TIMESTAMP $|
 
 $(EXECUTABLE): $(OBJECTS)
 	$(CXX) $(OBJECTS) $(LDFLAGS) -o $@
diff --git a/scripts/archive.py b/scripts/archive.py
index ac62dc2a..dbe54d9b 100644
--- a/scripts/archive.py
+++ b/scripts/archive.py
@@ -16,33 +16,38 @@ def read_file(path, use_crlf):
 
 	return data
 
-def write_zip(target, arcprefix, sources):
-	with zipfile.ZipFile(target, 'w', zipfile.ZIP_DEFLATED) as archive:
+def write_zip(target, arcprefix, timestamp, sources):
+	with zipfile.ZipFile(target, 'w') as archive:
 		for source in sorted(sources):
 			data = read_file(source, use_crlf = True)
 			path = os.path.join(arcprefix, source)
-			archive.writestr(path, data)
+			info = zipfile.ZipInfo(path)
+			info.date_time = time.localtime(timestamp)
+			info.compress_type = zipfile.ZIP_DEFLATED
+			archive.writestr(info, data)
 
-def write_tar(target, arcprefix, sources, compression):
+def write_tar(target, arcprefix, timestamp, sources, compression):
 	with tarfile.open(target, 'w:' + compression) as archive:
 		for source in sorted(sources):
 			data = read_file(source, use_crlf = False)
 			path = os.path.join(arcprefix, source)
 			info = tarfile.TarInfo(path)
 			info.size = len(data)
-			info.mtime = time.time()
+			info.mtime = timestamp
 			archive.addfile(info, StringIO.StringIO(data))
 
-if len(sys.argv) < 4:
-	raise RuntimeError('Usage: python archive.py <target> <archive prefix> <source files>')
+if len(sys.argv) < 5:
+	raise RuntimeError('Usage: python archive.py <target> <archive prefix> <timestamp> <source files>')
 
-target = sys.argv[1]
-arcprefix = sys.argv[2]
-sources = sys.argv[3:]
+target, arcprefix, timestamp = sys.argv[1:4]
+sources = sys.argv[4:]
+
+# tarfile._Stream._init_write_gz always writes current time to gzip header
+time.time = lambda: timestamp
 
 if target.endswith('.zip'):
-	write_zip(target, arcprefix, sources)
+	write_zip(target, arcprefix, int(timestamp), sources)
 elif target.endswith('.tar.gz') or target.endswith('.tar.bz2'):
-	write_tar(target, arcprefix, sources, compression = os.path.splitext(target)[1][1:])
+	write_tar(target, arcprefix, int(timestamp), sources, compression = os.path.splitext(target)[1][1:])
 else:
 	raise NotImplementedError('File type not supported: ' + target)
-- 
GitLab