git » huecotes » commit f9f7c29

Initial commit

author Alberto Bertogli
2009-10-05 21:52:02 UTC
committer Alberto Bertogli
2009-10-05 21:52:02 UTC

Initial commit

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>

huecotes +180 -0

diff --git a/huecotes b/huecotes
new file mode 100755
index 0000000..96c212a
--- /dev/null
+++ b/huecotes
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+# encoding: utf8
+
+"""
+huecotes - Make a file sparse using a fixed amount of extra space
+Alberto Bertogli (albertito@blitiri.com.ar)
+-----------------------------------------------------------------------------
+
+
+I don't like licenses, because I don't like having to worry about all this
+legal stuff just for a simple piece of software I don't really mind anyone
+using. But I also believe that it's important that people share and give back;
+so I'm placing this work under the following license.
+
+
+BOLA - Buena Onda License Agreement (v1.1)
+------------------------------------------
+
+This work is provided 'as-is', without any express or implied warranty. In no
+event will the authors be held liable for any damages arising from the use of
+this work.
+
+To all effects and purposes, this work is to be considered Public Domain.
+
+
+However, if you want to be "buena onda", you should:
+
+1. Not take credit for it, and give proper recognition to the authors.
+2. Share your modifications, so everybody benefits from them.
+3. Do something nice for the authors.
+4. Help someone who needs it: sign up for some volunteer work or help your
+   neighbour paint the house.
+5. Don't waste. Anything, but specially energy that comes from natural
+   non-renewable resources. Extra points if you discover or invent something
+   to replace them.
+6. Be tolerant. Everything that's good in nature comes from cooperation.
+"""
+
+
+import sys
+import os
+import optparse
+
+# TODO:
+#  - Use mmap to avoid storing the buffer in memory.
+#  - If something goes wrong, try to recover the original file (tricky, as we
+#    could mess things up even more).
+#  - Better presets and defaults.
+
+
+def dig_hole(fname, bufsize, sparsesize):
+	"""Digs a hole in the given file. bufsize is the size of additional
+	space to use, and sparsesize is used to do hole detection (see
+	pwrite_with_holes() below).
+
+	It works as follows:
+	 - Create a temporary file, truncate() it to the same size as the
+	   original one.
+	 - While the original file still has data in it:
+	    - Read the last bufsize bytes of the original file, write them to
+	      the temporary one performing hole detection.
+	    - Remove the last bufsize bytes of the original file using
+	      truncate().
+	 - Rename the temporary file to the original one, which by now should
+	   have size 0.
+	"""
+	dstfname = fname + '.tmp'
+	dstfd = open(dstfname, 'w')
+	srcfd = open(fname, 'r+')
+
+	# find out the size
+	srcfd.seek(0, os.SEEK_END)
+	fsize = srcfd.tell()
+	srcfd.seek(0, os.SEEK_SET)
+
+	# resize dst to the same size as the original
+	dstfd.truncate(fsize)
+
+	lastpos = fsize
+	while lastpos > 0:
+		offset = max(0, lastpos - bufsize)
+		srcfd.seek(offset, os.SEEK_SET)
+		buf = srcfd.read(bufsize)
+
+		pwrite_with_holes(dstfd, buf, offset, sparsesize)
+
+		# shink source file
+		srcfd.truncate(offset)
+
+		lastpos = offset
+
+	del srcfd
+	del dstfd
+	os.rename(dstfname, fname)
+
+	return True
+
+
+def pwrite_with_holes(fd, buf, offset, spsize):
+	"""Writes buf to fd at the given offset, while performing hole
+	detection. Tries to detect holes of spsize size, and seeks over the
+	file to make it sparse."""
+	zeros = '\0' * spsize
+
+	fd.seek(offset, os.SEEK_SET)
+
+	cp = 0
+	coff = offset
+	while cp < len(buf):
+		t = buf[cp : cp + spsize]
+		if t == zeros:
+			fd.seek(coff + len(t), os.SEEK_SET)
+		else:
+			fd.write(t)
+
+		cp += spsize
+		coff += spsize
+
+
+def main():
+	usage = """%prog [options] <file name>
+
+This program will process the input file and make it sparse, using a fixed
+amount of extra space (defaults to 32Mb, see the --bufsize option below).
+
+It can be used instead of cp --sparse when there is not enough disk space to
+hold the two copies at the same time.
+
+It creates a temporary file but modifies the source while operating. The
+sparse detection can be tuned for better performance at the expense of
+aditional space.
+
+The default values are sane for big files like VM images. There are also
+aggressive and fast presets that you can use instead of the fine-grained
+options. In case both kinds are specified, the preset take precedence.
+
+Comments, bug reports and patches are welcome at albertito@blitiri.com.ar.
+"""
+
+	parser = optparse.OptionParser(usage = usage)
+	parser.add_option("-b", "--bufsize", dest = "bufsize", type = float,
+		default = 32,
+		help = "buffer size, in megabytes (defaults to %default)")
+	parser.add_option("-s", "--sparsesize", dest = "sparsesize", type = float,
+		default = 16,
+		help = "size used to look for sparse blocks, in kilobytes" + \
+			" (defaults to %default)")
+
+	presets = optparse.OptionGroup(parser, "Presets")
+	presets.add_option("", "--aggressive", dest = "preset",
+		action = "store_const", const = "aggressive",
+		help = "aggressive preset (better hole detection, slower)")
+	presets.add_option("", "--fast", dest = "preset",
+		action = "store_const", const = "fast",
+		help = "fast preset (worse hole detection, faster)")
+	parser.add_option_group(presets)
+
+	options, args = parser.parse_args()
+
+	if len(args) != 1:
+		parser.print_help()
+		return 1
+
+	fname = args[0]
+	bufsize = int(options.bufsize * 1024 * 1024)
+
+	if options.preset == 'aggressive':
+		sparsesize = 512
+	elif options.preset == 'fast':
+		sparsesize = 128 * 1024
+	else:
+		sparsesize = int(options.sparsesize * 1024)
+
+	dig_hole(fname, bufsize, sparsesize)
+	return 0
+
+if __name__ == '__main__':
+	sys.exit(main())
+
+