git » huecotes » master » tree

[master] / huecotes

#!/usr/bin/env python
# encoding: utf8

"""
huecotes - Make a file sparse using a fixed amount of extra space
Alberto Bertogli (albertito@blitiri.com.ar)
-----------------------------------------------------------------------------


I don't like licenses, because I don't like having to worry about all this
legal stuff just for a simple piece of software I don't really mind anyone
using. But I also believe that it's important that people share and give back;
so I'm placing this work under the following license.


BOLA - Buena Onda License Agreement (v1.1)
------------------------------------------

This work is provided 'as-is', without any express or implied warranty. In no
event will the authors be held liable for any damages arising from the use of
this work.

To all effects and purposes, this work is to be considered Public Domain.


However, if you want to be "buena onda", you should:

1. Not take credit for it, and give proper recognition to the authors.
2. Share your modifications, so everybody benefits from them.
3. Do something nice for the authors.
4. Help someone who needs it: sign up for some volunteer work or help your
   neighbour paint the house.
5. Don't waste. Anything, but specially energy that comes from natural
   non-renewable resources. Extra points if you discover or invent something
   to replace them.
6. Be tolerant. Everything that's good in nature comes from cooperation.
"""


import sys
import os
import optparse

# TODO:
#  - Use mmap to avoid storing the buffer in memory.
#  - If something goes wrong, try to recover the original file (tricky, as we
#    could mess things up even more).
#  - Better presets and defaults.


def dig_hole(fname, bufsize, sparsesize):
	"""Digs a hole in the given file. bufsize is the size of additional
	space to use, and sparsesize is used to do hole detection (see
	pwrite_with_holes() below).

	It works as follows:
	 - Create a temporary file, truncate() it to the same size as the
	   original one.
	 - While the original file still has data in it:
	    - Read the last bufsize bytes of the original file, write them to
	      the temporary one performing hole detection.
	    - Remove the last bufsize bytes of the original file using
	      truncate().
	 - Rename the temporary file to the original one, which by now should
	   have size 0.
	"""
	dstfname = fname + '.tmp'
	dstfd = open(dstfname, 'w')
	srcfd = open(fname, 'r+')

	# find out the size
	srcfd.seek(0, os.SEEK_END)
	fsize = srcfd.tell()
	srcfd.seek(0, os.SEEK_SET)

	# resize dst to the same size as the original
	dstfd.truncate(fsize)

	lastpos = fsize
	while lastpos > 0:
		offset = max(0, lastpos - bufsize)
		srcfd.seek(offset, os.SEEK_SET)
		buf = srcfd.read(bufsize)

		pwrite_with_holes(dstfd, buf, offset, sparsesize)

		# shink source file
		srcfd.truncate(offset)

		lastpos = offset

	del srcfd
	del dstfd
	os.rename(dstfname, fname)

	return True


def pwrite_with_holes(fd, buf, offset, spsize):
	"""Writes buf to fd at the given offset, while performing hole
	detection. Tries to detect holes of spsize size, and seeks over the
	file to make it sparse."""
	zeros = '\0' * spsize

	fd.seek(offset, os.SEEK_SET)

	cp = 0
	coff = offset
	while cp < len(buf):
		t = buf[cp : cp + spsize]
		if t == zeros:
			fd.seek(coff + len(t), os.SEEK_SET)
		else:
			fd.write(t)

		cp += spsize
		coff += spsize


def main():
	usage = """%prog [options] <file name>

This program will process the input file and make it sparse, using a fixed
amount of extra space (defaults to 32Mb, see the --bufsize option below).

It can be used instead of cp --sparse when there is not enough disk space to
hold the two copies at the same time.

It creates a temporary file but modifies the source while operating. The
sparse detection can be tuned for better performance at the expense of
aditional space.

The default values are sane for big files like VM images. There are also
aggressive and fast presets that you can use instead of the fine-grained
options. In case both kinds are specified, the preset take precedence.

Comments, bug reports and patches are welcome at albertito@blitiri.com.ar.
"""

	parser = optparse.OptionParser(usage = usage)
	parser.add_option("-b", "--bufsize", dest = "bufsize", type = float,
		default = 32,
		help = "buffer size, in megabytes (defaults to %default)")
	parser.add_option("-s", "--sparsesize", dest = "sparsesize", type = float,
		default = 16,
		help = "size used to look for sparse blocks, in kilobytes" + \
			" (defaults to %default)")

	presets = optparse.OptionGroup(parser, "Presets")
	presets.add_option("", "--aggressive", dest = "preset",
		action = "store_const", const = "aggressive",
		help = "aggressive preset (better hole detection, slower)")
	presets.add_option("", "--fast", dest = "preset",
		action = "store_const", const = "fast",
		help = "fast preset (worse hole detection, faster)")
	parser.add_option_group(presets)

	options, args = parser.parse_args()

	if len(args) != 1:
		parser.print_help()
		return 1

	fname = args[0]
	bufsize = int(options.bufsize * 1024 * 1024)

	if options.preset == 'aggressive':
		sparsesize = 512
	elif options.preset == 'fast':
		sparsesize = 128 * 1024
	else:
		sparsesize = int(options.sparsesize * 1024)

	dig_hole(fname, bufsize, sparsesize)
	return 0

if __name__ == '__main__':
	sys.exit(main())