author | Alberto Bertogli
<albertito@blitiri.com.ar> 2009-10-05 21:52:02 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2009-10-05 21:52:02 UTC |
huecotes | +180 | -0 |
diff --git a/huecotes b/huecotes new file mode 100755 index 0000000..96c212a --- /dev/null +++ b/huecotes @@ -0,0 +1,180 @@ +#!/usr/bin/env python +# encoding: utf8 + +""" +huecotes - Make a file sparse using a fixed amount of extra space +Alberto Bertogli (albertito@blitiri.com.ar) +----------------------------------------------------------------------------- + + +I don't like licenses, because I don't like having to worry about all this +legal stuff just for a simple piece of software I don't really mind anyone +using. But I also believe that it's important that people share and give back; +so I'm placing this work under the following license. + + +BOLA - Buena Onda License Agreement (v1.1) +------------------------------------------ + +This work is provided 'as-is', without any express or implied warranty. In no +event will the authors be held liable for any damages arising from the use of +this work. + +To all effects and purposes, this work is to be considered Public Domain. + + +However, if you want to be "buena onda", you should: + +1. Not take credit for it, and give proper recognition to the authors. +2. Share your modifications, so everybody benefits from them. +3. Do something nice for the authors. +4. Help someone who needs it: sign up for some volunteer work or help your + neighbour paint the house. +5. Don't waste. Anything, but specially energy that comes from natural + non-renewable resources. Extra points if you discover or invent something + to replace them. +6. Be tolerant. Everything that's good in nature comes from cooperation. +""" + + +import sys +import os +import optparse + +# TODO: +# - Use mmap to avoid storing the buffer in memory. +# - If something goes wrong, try to recover the original file (tricky, as we +# could mess things up even more). +# - Better presets and defaults. + + +def dig_hole(fname, bufsize, sparsesize): + """Digs a hole in the given file. bufsize is the size of additional + space to use, and sparsesize is used to do hole detection (see + pwrite_with_holes() below). + + It works as follows: + - Create a temporary file, truncate() it to the same size as the + original one. + - While the original file still has data in it: + - Read the last bufsize bytes of the original file, write them to + the temporary one performing hole detection. + - Remove the last bufsize bytes of the original file using + truncate(). + - Rename the temporary file to the original one, which by now should + have size 0. + """ + dstfname = fname + '.tmp' + dstfd = open(dstfname, 'w') + srcfd = open(fname, 'r+') + + # find out the size + srcfd.seek(0, os.SEEK_END) + fsize = srcfd.tell() + srcfd.seek(0, os.SEEK_SET) + + # resize dst to the same size as the original + dstfd.truncate(fsize) + + lastpos = fsize + while lastpos > 0: + offset = max(0, lastpos - bufsize) + srcfd.seek(offset, os.SEEK_SET) + buf = srcfd.read(bufsize) + + pwrite_with_holes(dstfd, buf, offset, sparsesize) + + # shink source file + srcfd.truncate(offset) + + lastpos = offset + + del srcfd + del dstfd + os.rename(dstfname, fname) + + return True + + +def pwrite_with_holes(fd, buf, offset, spsize): + """Writes buf to fd at the given offset, while performing hole + detection. Tries to detect holes of spsize size, and seeks over the + file to make it sparse.""" + zeros = '\0' * spsize + + fd.seek(offset, os.SEEK_SET) + + cp = 0 + coff = offset + while cp < len(buf): + t = buf[cp : cp + spsize] + if t == zeros: + fd.seek(coff + len(t), os.SEEK_SET) + else: + fd.write(t) + + cp += spsize + coff += spsize + + +def main(): + usage = """%prog [options] <file name> + +This program will process the input file and make it sparse, using a fixed +amount of extra space (defaults to 32Mb, see the --bufsize option below). + +It can be used instead of cp --sparse when there is not enough disk space to +hold the two copies at the same time. + +It creates a temporary file but modifies the source while operating. The +sparse detection can be tuned for better performance at the expense of +aditional space. + +The default values are sane for big files like VM images. There are also +aggressive and fast presets that you can use instead of the fine-grained +options. In case both kinds are specified, the preset take precedence. + +Comments, bug reports and patches are welcome at albertito@blitiri.com.ar. +""" + + parser = optparse.OptionParser(usage = usage) + parser.add_option("-b", "--bufsize", dest = "bufsize", type = float, + default = 32, + help = "buffer size, in megabytes (defaults to %default)") + parser.add_option("-s", "--sparsesize", dest = "sparsesize", type = float, + default = 16, + help = "size used to look for sparse blocks, in kilobytes" + \ + " (defaults to %default)") + + presets = optparse.OptionGroup(parser, "Presets") + presets.add_option("", "--aggressive", dest = "preset", + action = "store_const", const = "aggressive", + help = "aggressive preset (better hole detection, slower)") + presets.add_option("", "--fast", dest = "preset", + action = "store_const", const = "fast", + help = "fast preset (worse hole detection, faster)") + parser.add_option_group(presets) + + options, args = parser.parse_args() + + if len(args) != 1: + parser.print_help() + return 1 + + fname = args[0] + bufsize = int(options.bufsize * 1024 * 1024) + + if options.preset == 'aggressive': + sparsesize = 512 + elif options.preset == 'fast': + sparsesize = 128 * 1024 + else: + sparsesize = int(options.sparsesize * 1024) + + dig_hole(fname, bufsize, sparsesize) + return 0 + +if __name__ == '__main__': + sys.exit(main()) + +