#!/usr/bin/env python
# encoding: utf8
"""
huecotes - Make a file sparse using a fixed amount of extra space
Alberto Bertogli (albertito@blitiri.com.ar)
-----------------------------------------------------------------------------
I don't like licenses, because I don't like having to worry about all this
legal stuff just for a simple piece of software I don't really mind anyone
using. But I also believe that it's important that people share and give back;
so I'm placing this work under the following license.
BOLA - Buena Onda License Agreement (v1.1)
------------------------------------------
This work is provided 'as-is', without any express or implied warranty. In no
event will the authors be held liable for any damages arising from the use of
this work.
To all effects and purposes, this work is to be considered Public Domain.
However, if you want to be "buena onda", you should:
1. Not take credit for it, and give proper recognition to the authors.
2. Share your modifications, so everybody benefits from them.
3. Do something nice for the authors.
4. Help someone who needs it: sign up for some volunteer work or help your
neighbour paint the house.
5. Don't waste. Anything, but specially energy that comes from natural
non-renewable resources. Extra points if you discover or invent something
to replace them.
6. Be tolerant. Everything that's good in nature comes from cooperation.
"""
import sys
import os
import optparse
# TODO:
# - Use mmap to avoid storing the buffer in memory.
# - If something goes wrong, try to recover the original file (tricky, as we
# could mess things up even more).
# - Better presets and defaults.
def dig_hole(fname, bufsize, sparsesize):
"""Digs a hole in the given file. bufsize is the size of additional
space to use, and sparsesize is used to do hole detection (see
pwrite_with_holes() below).
It works as follows:
- Create a temporary file, truncate() it to the same size as the
original one.
- While the original file still has data in it:
- Read the last bufsize bytes of the original file, write them to
the temporary one performing hole detection.
- Remove the last bufsize bytes of the original file using
truncate().
- Rename the temporary file to the original one, which by now should
have size 0.
"""
dstfname = fname + '.tmp'
dstfd = open(dstfname, 'w')
srcfd = open(fname, 'r+')
# find out the size
srcfd.seek(0, os.SEEK_END)
fsize = srcfd.tell()
srcfd.seek(0, os.SEEK_SET)
# resize dst to the same size as the original
dstfd.truncate(fsize)
lastpos = fsize
while lastpos > 0:
offset = max(0, lastpos - bufsize)
srcfd.seek(offset, os.SEEK_SET)
buf = srcfd.read(bufsize)
pwrite_with_holes(dstfd, buf, offset, sparsesize)
# shink source file
srcfd.truncate(offset)
lastpos = offset
del srcfd
del dstfd
os.rename(dstfname, fname)
return True
def pwrite_with_holes(fd, buf, offset, spsize):
"""Writes buf to fd at the given offset, while performing hole
detection. Tries to detect holes of spsize size, and seeks over the
file to make it sparse."""
zeros = '\0' * spsize
fd.seek(offset, os.SEEK_SET)
cp = 0
coff = offset
while cp < len(buf):
t = buf[cp : cp + spsize]
if t == zeros:
fd.seek(coff + len(t), os.SEEK_SET)
else:
fd.write(t)
cp += spsize
coff += spsize
def main():
usage = """%prog [options] <file name>
This program will process the input file and make it sparse, using a fixed
amount of extra space (defaults to 32Mb, see the --bufsize option below).
It can be used instead of cp --sparse when there is not enough disk space to
hold the two copies at the same time.
It creates a temporary file but modifies the source while operating. The
sparse detection can be tuned for better performance at the expense of
aditional space.
The default values are sane for big files like VM images. There are also
aggressive and fast presets that you can use instead of the fine-grained
options. In case both kinds are specified, the preset take precedence.
Comments, bug reports and patches are welcome at albertito@blitiri.com.ar.
"""
parser = optparse.OptionParser(usage = usage)
parser.add_option("-b", "--bufsize", dest = "bufsize", type = float,
default = 32,
help = "buffer size, in megabytes (defaults to %default)")
parser.add_option("-s", "--sparsesize", dest = "sparsesize", type = float,
default = 16,
help = "size used to look for sparse blocks, in kilobytes" + \
" (defaults to %default)")
presets = optparse.OptionGroup(parser, "Presets")
presets.add_option("", "--aggressive", dest = "preset",
action = "store_const", const = "aggressive",
help = "aggressive preset (better hole detection, slower)")
presets.add_option("", "--fast", dest = "preset",
action = "store_const", const = "fast",
help = "fast preset (worse hole detection, faster)")
parser.add_option_group(presets)
options, args = parser.parse_args()
if len(args) != 1:
parser.print_help()
return 1
fname = args[0]
bufsize = int(options.bufsize * 1024 * 1024)
if options.preset == 'aggressive':
sparsesize = 512
elif options.preset == 'fast':
sparsesize = 128 * 1024
else:
sparsesize = int(options.sparsesize * 1024)
dig_hole(fname, bufsize, sparsesize)
return 0
if __name__ == '__main__':
sys.exit(main())