#!/usr/bin/env python3 """ This application is a stress tester for libjio. It's not a traditional stress test like fsx (which can be used to test libjio using the preloading library), but uses fault injection to check how the library behaves under random failures. """ import sys import os import random import traceback import libjio try: import fiu except ImportError: print() print("Error: unable to load fiu module. This test needs libfiu") print("support. Please install libfiu and recompile libjio with FI=1.") print() raise # # Auxiliary stuff # gbcount = 0 def getbytes(n): global gbcount gbcount = (gbcount + 1) % 10 return bytes(str(gbcount) * n, 'ascii') def randfrange(maxend, maxsize): start = random.randint(0, maxend - 1) size = random.randint(0, (maxend - 1) - start) % maxsize return start, start + size class ConsistencyError (Exception): pass # # The test itself # class Stresser: def __init__(self, fname, fsize, nops, use_fi, use_as): self.fname = fname self.fsize = fsize self.nops = nops self.use_fi = use_fi self.use_as = use_as self.maxoplen = min(int(self.fsize / 256), 64 * 1024) jflags = 0 if use_as: jflags = libjio.J_LINGER self.jf = libjio.open(fname, libjio.O_RDWR | libjio.O_CREAT, 0o600, jflags) self.f = open(fname, mode = 'rb') self.jf.truncate(fsize) if use_as: self.jf.autosync_start(5, 2 * 1024 * 1024) # data used for consistency checks self.current_range = (0, 0) self.prev_data = b"" self.new_data = b"" def pread(self, start, end): ppos = self.f.tell() self.f.seek(start, 0) r = self.f.read(end - start) self.f.seek(ppos, 0) return r def randwrite(self): start, end = randfrange(self.fsize, self.maxoplen) # read an extended range so we can check we # only wrote what we were supposed to estart = max(0, start - 32) eend = min(self.fsize, end + 32) self.current_range = (estart, eend) self.prev_data = self.pread(estart, eend) nd = getbytes(end - start) self.new_data = self.prev_data[:start - estart] \ + nd + self.prev_data[- (eend - end):] self.jf.pwrite(nd, start) return True def randwrite_fork(self): pid = os.fork() if pid == 0: # child try: self.randwrite() except IOError: sys.exit(1) except: traceback.print_exc() sys.exit(1) sys.exit(0) else: # parent id, status = os.waitpid(pid, 0) if not os.WIFEXITED(status): raise RuntimeError(status) if os.WEXITSTATUS(status) != 0: return False return True def verify(self): # NOTE: must not use self.jf real_data = self.pread(self.current_range[0], self.current_range[1]) if real_data not in (self.prev_data, self.new_data): raise ConsistencyError def reopen(self): self.jf = None r = libjio.jfsck(self.fname) self.verify() self.jf = libjio.open(self.fname, libjio.O_RDWR | libjio.O_CREAT, 0o600) return r def fiu_enable(self): if self.use_fi: fiu.enable_random('jio/*', probability = 0.02) def fiu_disable(self): if self.use_fi: fiu.disable('jio/*') def run(self): self.fiu_enable() nfailures = 0 sys.stdout.write(" ") for i in range(1, self.nops + 1): sys.stdout.write(".") if i % 10 == 0: sys.stdout.write(" ") if i % 50 == 0: sys.stdout.write(" %d\n" % i) sys.stdout.write(" ") sys.stdout.flush() if self.use_fi: r = self.randwrite_fork() else: r = self.randwrite() if not r: nfailures += 1 self.fiu_disable() r = self.reopen() assert r['total'] <= 1 self.fiu_enable() self.verify() sys.stdout.write("\n") sys.stdout.flush() self.fiu_disable() return nfailures # # Main # def usage(): print(""" Use: jiostress [] [--fi] [--as] If the number of operations is not provided, the default (1000) will be used. If the "--fi" option is passed, the test will perform fault injection. This option conflicts with "--as". If the "--as" option is passed, lingering transactions will be used, along with the automatic syncing thread. This option conflicts with "--fi". """) def main(): try: fname = sys.argv[1] fsize = int(sys.argv[2]) * 1024 * 1024 nops = 1000 if len(sys.argv) >= 4 and sys.argv[3].isnumeric(): nops = int(sys.argv[3]) use_fi = False if '--fi' in sys.argv: use_fi = True use_as = False if '--as' in sys.argv: use_as = True except: usage() sys.exit(1) if use_fi and use_as: print("Error: --fi and --as cannot be used together") sys.exit(1) s = Stresser(fname, fsize, nops, use_fi, use_as) print("Running stress test") nfailures = s.run() del s print("Stress test completed") print(" %d operations" % nops) print(" %d simulated failures" % nfailures) r = libjio.jfsck(fname) assert r['total'] == 0 print("Final check completed") #os.unlink(fname) if __name__ == '__main__': main()