author | Alberto Bertogli
<albertito@blitiri.com.ar> 2009-07-10 09:40:57 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2009-07-11 03:33:41 UTC |
parent | f30c385b5288a682f0a16cf36d81a1c0fc23476f |
UPGRADING | +5 | -0 |
doc/libjio.rst | +10 | -10 |
libjio/check.c | +5 | -91 |
libjio/checksum.c | +84 | -22 |
libjio/common.c | +54 | -0 |
libjio/common.h | +4 | -2 |
libjio/journal.c | +279 | -74 |
libjio/journal.h | +4 | -0 |
libjio/trans.c | +6 | -0 |
libjio/trans.h | +0 | -22 |
tests/behaviour/t_corruption.py | +5 | -2 |
tests/behaviour/tf.py | +24 | -12 |
diff --git a/UPGRADING b/UPGRADING index dc7ded7..759a658 100644 --- a/UPGRADING +++ b/UPGRADING @@ -6,6 +6,11 @@ You should always clean all your files before upgrading. While I don't expect the transaction on-disk format to change, it's a good practise and it doesn't take much effort. When it's mandatory, it will be noted. +-> 0.51 (On-disk format change) + - The way transactions are stored on disk has changed. It is mandatory that + you jfsck all your files before upgrading. Hopefully this will be the last + backwards-incompatible format change before 1.0. + -> 0.50 (Big API change) - Structures are now opaque types: struct jfs -> jfs_t; jopen() returns a pointer to one, jclose() frees it. diff --git a/doc/libjio.rst b/doc/libjio.rst index 6bc4f3c..c166bf4 100644 --- a/doc/libjio.rst +++ b/doc/libjio.rst @@ -59,20 +59,20 @@ careful when doing strange things with files while working on them. The transaction file ~~~~~~~~~~~~~~~~~~~~ -The transaction file is composed of two main parts: the header and the -payload. +The transaction file is composed of three main parts: the header, the +operations, and the trailer. The header holds basic information about the transaction itself, including the -ID, some flags, and the amount of operations it includes. Then the payload has -all the operations one after the other, divided in two parts: the first one -includes static information about the operation (the length of the data, the -offset of the file where it should be applied, etc.) and the data itself, -which is saved by the library prior applying the commit, so transactions can -be rollbacked. +version, the transaction ID, and its flags. -At the end of the transaction file, a checksum is stored, to detect journal -corruption. +Then the operation part has all the operations one after the other, prepending +the operation data with a per-operation header that includes the length of the +data and the offset of the file where it should be applied, and then the data +itself. +Finally, the trailer contains the number of operations included in it and a +checksum of the whole file. Both fields are used to detect broken or corrupted +transactions. The commit procedure -------------------- diff --git a/libjio/check.c b/libjio/check.c index 03458ab..c39ce49 100644 --- a/libjio/check.c +++ b/libjio/check.c @@ -17,84 +17,10 @@ #include "libjio.h" #include "common.h" +#include "journal.h" #include "trans.h" -/** Fill a transaction structure from a mmapped transaction file */ -static off_t fill_trans(unsigned char *map, off_t len, struct jtrans *ts) -{ - int i; - unsigned char *p; - struct joper *op, *tmp; - off_t translen; - - if (len < J_DISKHEADSIZE) - return 0; - - p = map; - - ts->id = *( (uint32_t *) p); - p += 4; - - ts->flags = *( (uint32_t *) p); - p += 4; - - ts->numops = *( (uint32_t *) p); - p += 4; - - translen = J_DISKHEADSIZE; - - for (i = 0; i < ts->numops; i++) { - if (p + J_DISKOPHEADSIZE > map + len) - goto error; - - op = malloc(sizeof(struct joper)); - if (op == NULL) - goto error; - - op->len = *( (uint32_t *) p); - p += 4; - - op->plen = *( (uint32_t *) p); - p += 4; - - op->offset = *( (uint64_t *) p); - p += 8; - - if (p + op->len > map + len) - goto error; - - op->buf = (void *) p; - p += op->len; - - op->pdata = NULL; - - if (ts->op == NULL) { - ts->op = op; - op->prev = NULL; - op->next = NULL; - } else { - for (tmp = ts->op; tmp->next != NULL; tmp = tmp->next) - ; - tmp->next = op; - op->prev = tmp; - op->next = NULL; - } - - translen += J_DISKOPHEADSIZE + op->len; - } - - return translen; - -error: - while (ts->op != NULL) { - tmp = ts->op->next; - free(ts->op); - ts->op = tmp; - } - return 0; -} - /** Remove all the files in the journal directory (if any). * * @param name path to the file @@ -158,7 +84,6 @@ enum jfsck_return jfsck(const char *name, const char *jdir, { int tfd, rv, i, ret; unsigned int maxtid; - uint32_t csum1, csum2; char jlockfile[PATH_MAX], tname[PATH_MAX]; struct stat sinfo; struct jfs fs; @@ -167,7 +92,7 @@ enum jfsck_return jfsck(const char *name, const char *jdir, DIR *dir; struct dirent *dent; unsigned char *map; - off_t filelen, translen, lr; + off_t filelen, lr; tfd = -1; filelen = 0; @@ -310,23 +235,12 @@ enum jfsck_return jfsck(const char *name, const char *jdir, map = NULL; goto loop; } - translen = fill_trans(map, filelen, curts); - if (translen == 0) { - res->broken++; - goto loop; - } - /* see if there's enough room for the checksum after the - * transaction information */ - if (filelen != translen + sizeof(uint32_t)) { + rv = fill_trans(map, filelen, curts); + if (rv == -1) { res->broken++; goto loop; - } - - /* verify the checksum */ - csum1 = checksum_map(map, filelen - (sizeof(uint32_t))); - csum2 = * (uint32_t *) (map + filelen - (sizeof(uint32_t))); - if (csum1 != csum2) { + } else if (rv == -2) { res->corrupt++; goto loop; } diff --git a/libjio/checksum.c b/libjio/checksum.c index 250a96d..225a8af 100644 --- a/libjio/checksum.c +++ b/libjio/checksum.c @@ -1,7 +1,7 @@ /* * Checksum functions - * Based on RFC 1071, "Computing the Internet Checksum" + * Uses CRC32c, just because it's decent enough. As defined in RFC 3309. */ #include <stddef.h> @@ -9,10 +9,91 @@ #include <sys/mman.h> #include "common.h" +static uint32_t table[256] = +{ + 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, + 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, + 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, + 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, + 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, + 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, + 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, + 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, + 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL, + 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, + 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, + 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, + 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, + 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL, + 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, + 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, + 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, + 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L, + 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, + 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, + 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, + 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, + 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L, + 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, + 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, + 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, + 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, + 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, + 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, + 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, + 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, + 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, + 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, + 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, + 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, + 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, + 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, + 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, + 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, + 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, + 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, + 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, + 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, + 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, + 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, + 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, + 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, + 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, + 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, + 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, + 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, + 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, + 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, + 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, + 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, + 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, + 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, + 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, + 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, + 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, + 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, + 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, + 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL, + 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L, +}; + +/** Calculates the checksum of the given buffer, up to count bytes. Returns the + * checksum. The initial crc32 must be 0. */ +uint32_t checksum_buf(uint32_t crc32, const unsigned char *buf, size_t count) +{ + crc32 = ~crc32; + + while (count--) { + crc32 = (crc32 >> 8) ^ table[(crc32 ^ *buf) & 0xFFL]; + buf++; + } + + return ~crc32; +} /** Reads the contents of the given fd, up to len bytes, and stores the * checksum in csum. Returns 1 on success, 0 on error. */ -int checksum(int fd, size_t len, uint32_t *csum) +int checksum_fd(int fd, size_t len, uint32_t *csum) { uint8_t *map; @@ -20,29 +101,10 @@ int checksum(int fd, size_t len, uint32_t *csum) if (map == MAP_FAILED) return 0; - *csum = checksum_map(map, len); + *csum = checksum_buf(0, map, len); munmap(map, len); return 1; } -/** Calculates the checksum of the given buffer, up to count bytes. Returns - * the checksum. */ -uint32_t checksum_map(uint8_t *map, size_t count) -{ - uint32_t sum = 0; - - while( count > 1 ) { - sum += * (uint16_t *) map++; - count -= 2; - } - - if( count > 0 ) - sum += * (uint8_t *) map; - - while (sum >> 16) - sum = (sum & 0xffff) + (sum >> 16); - - return ~sum; -} diff --git a/libjio/common.c b/libjio/common.c index 7a3cdf9..12c92ef 100644 --- a/libjio/common.c +++ b/libjio/common.c @@ -11,6 +11,7 @@ #include <limits.h> #include <stdio.h> #include <stdlib.h> +#include <arpa/inet.h> /* htonl() and friends */ #include "libjio.h" #include "common.h" @@ -133,3 +134,56 @@ void get_jtfile(struct jfs *fs, unsigned int tid, char *jtfile) } +/* The ntohll() and htonll() functions are not standard, so we define them + * using an UGLY trick because there is no standard way to check for + * endianness at runtime. */ + +/** Convert a 64-bit value between network byte order and host byte order. */ +uint64_t ntohll(uint64_t x) +{ + static int endianness = 0; + + /* determine the endianness by checking how htonl() behaves; use -1 + * for little endian and 1 for big endian */ + if (endianness == 0) { + if (htonl(1) == 1) + endianness = 1; + else + endianness = -1; + } + + if (endianness == 1) { + /* big endian */ + return x; + } + + /* little endian */ + return ( ntohl( (x >> 32) & 0xFFFFFFFF ) | \ + ( (uint64_t) ntohl(x & 0xFFFFFFFF) ) << 32 ); +} + +/** Convert a 64-bit value between host byte order and network byte order. */ +uint64_t htonll(uint64_t x) +{ + static int endianness = 0; + + /* determine the endianness by checking how htonl() behaves; use -1 + * for little endian and 1 for big endian */ + if (endianness == 0) { + if (htonl(1) == 1) + endianness = 1; + else + endianness = -1; + } + + if (endianness == 1) { + /* big endian */ + return x; + } + + /* little endian */ + return ( htonl( (x >> 32) & 0xFFFFFFFF ) | \ + ( (uint64_t) htonl(x & 0xFFFFFFFF) ) << 32 ); +} + + diff --git a/libjio/common.h b/libjio/common.h index 4879eb5..d0f3ad8 100644 --- a/libjio/common.h +++ b/libjio/common.h @@ -74,9 +74,11 @@ ssize_t spread(int fd, void *buf, size_t count, off_t offset); ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset); int get_jdir(const char *filename, char *jdir); void get_jtfile(struct jfs *fs, unsigned int tid, char *jtfile); +uint64_t ntohll(uint64_t x); +uint64_t htonll(uint64_t x); -int checksum(int fd, size_t len, uint32_t *csum); -uint32_t checksum_map(uint8_t *map, size_t count); +int checksum_fd(int fd, size_t len, uint32_t *csum); +uint32_t checksum_buf(uint32_t sum, const unsigned char *buf, size_t count); void autosync_check(struct jfs *fs); diff --git a/libjio/journal.c b/libjio/journal.c index 0666504..324a8f9 100644 --- a/libjio/journal.c +++ b/libjio/journal.c @@ -8,13 +8,13 @@ #include <fcntl.h> /* open() */ #include <unistd.h> /* f[data]sync(), close() */ #include <stdlib.h> /* malloc() and friends */ -#include <limits.h> /* MAX_PATH */ +#include <limits.h> /* PATH_MAX */ #include <string.h> /* memcpy() */ -#include <libgen.h> /* basename(), dirname() */ #include <stdio.h> /* fprintf() */ -#include <dirent.h> /* readdir() and friends */ #include <errno.h> /* errno */ -#include <sys/mman.h> /* mmap() */ +#include <stdint.h> /* uintX_t */ +#include <arpa/inet.h> /* htonl() and friends */ +#include <netinet/in.h> /* htonl() and friends (on some platforms) */ #include "libjio.h" #include "common.h" @@ -24,7 +24,87 @@ /* - * helper functions + * On-disk structures + * + * Each transaction will be stored on disk as a single file, composed of a + * header, operation information, and a trailer. The operation information is + * composed of repeated operation headers followed by their corresponding + * data, one for each operation. A special operation header containing all 0s + * marks the end of the operations. + * + * Visually, something like this: + * + * +--------+---------+----------+---------+----------+-----+-----+---------+ + * | header | op1 hdr | op1 data | op2 hdr | op2 data | ... | eoo | trailer | + * +--------+---------+----------+---------+----------+-----+-----+---------+ + * \ / + * +--------------- operations ----------------+ + * + * The details of each part can be seen on the following structures. All + * integers are stored in network byte order. + */ + +/** Transaction file header */ +struct on_disk_hdr { + uint16_t ver; + uint16_t flags; + uint32_t trans_id; +} __attribute__((packed)); + +/** Transaction file operation header */ +struct on_disk_ophdr { + uint32_t len; + uint64_t offset; +} __attribute__((packed)); + +/** Transaction file trailer */ +struct on_disk_trailer { + uint32_t numops; + uint32_t checksum; +} __attribute__((packed)); + + +/* Convert structs to/from host to network (disk) endian */ + +static void hdr_hton(struct on_disk_hdr *hdr) +{ + hdr->ver = htons(hdr->ver); + hdr->flags = htons(hdr->flags); + hdr->trans_id = htonl(hdr->trans_id); +} + +static void hdr_ntoh(struct on_disk_hdr *hdr) +{ + hdr->ver = ntohs(hdr->ver); + hdr->flags = ntohs(hdr->flags); + hdr->trans_id = ntohl(hdr->trans_id); +} + +static void ophdr_hton(struct on_disk_ophdr *ophdr) +{ + ophdr->len = htonl(ophdr->len); + ophdr->offset = htonll(ophdr->offset); +} + +static void ophdr_ntoh(struct on_disk_ophdr *ophdr) +{ + ophdr->len = ntohl(ophdr->len); + ophdr->offset = ntohll(ophdr->offset); +} + +static void trailer_hton(struct on_disk_trailer *trailer) { + trailer->numops = htonl(trailer->numops); + trailer->checksum = htonl(trailer->checksum); +} + +static void trailer_ntoh(struct on_disk_trailer *trailer) { + trailer->numops = ntohl(trailer->numops); + trailer->checksum = ntohl(trailer->checksum); +} + + +/* + * Helper functions */ /** Get a new transaction id */ @@ -130,9 +210,8 @@ struct journal_op *journal_new(struct jtrans *ts) int fd, id; ssize_t rv; char *name = NULL; - unsigned char buf_init[J_DISKHEADSIZE]; - unsigned char *bufp; - struct journal_op *jop = NULL; + struct journal_op *jop; + struct on_disk_hdr hdr; jop = malloc(sizeof(struct journal_op)); if (jop == NULL) @@ -156,6 +235,7 @@ struct journal_op *journal_new(struct jtrans *ts) jop->fd = fd; jop->name = name; jop->curpos = 0; + jop->csum = 0; jop->ts = ts; jop->fs = ts->fs; @@ -167,22 +247,18 @@ struct journal_op *journal_new(struct jtrans *ts) ts->id = id; /* save the header */ - bufp = buf_init; - - memcpy(bufp, (void *) &(ts->id), 4); - bufp += 4; + hdr.ver = 1; + hdr.trans_id = id; + hdr.flags = ts->flags; - memcpy(bufp, (void *) &(ts->flags), 4); - bufp += 4; - - memcpy(bufp, (void *) &(ts->numops), 4); - bufp += 4; - - rv = spwrite(fd, buf_init, J_DISKHEADSIZE, 0); - if (rv != J_DISKHEADSIZE) + hdr_hton(&hdr); + rv = spwrite(fd, &hdr, sizeof(hdr), 0); + if (rv != sizeof(hdr)) goto unlink_error; - jop->curpos = J_DISKHEADSIZE; + jop->curpos = sizeof(hdr); + jop->csum = checksum_buf(jop->csum, (unsigned char *) &hdr, + sizeof(hdr)); fiu_exit_on("jio/commit/tf_header"); @@ -200,81 +276,105 @@ error: return NULL; } -/** Save the given transaction in the journal */ -int journal_save(struct journal_op *jop) +/** Saves a single joper in the journal file */ +static int save_joper(struct journal_op *jop, const struct jtrans *ts, + struct joper *op) { ssize_t rv; - uint32_t csum; - struct joper *op; - unsigned char hdr[J_DISKOPHEADSIZE]; - unsigned char *hdrp; - const struct jtrans *ts = jop->ts; + struct on_disk_ophdr ophdr; + + /* read the current content only if the transaction is not + * marked as NOROLLBACK, and if the data is not there yet, + * which is the normal case, but for rollbacking we fill it + * ourselves */ + if (!(ts->flags & J_NOROLLBACK) && (op->pdata == NULL)) { + op->pdata = malloc(op->len); + if (op->pdata == NULL) + goto error; - /* save each transacion in the file */ - for (op = ts->op; op != NULL; op = op->next) { - /* read the current content only if the transaction is not - * marked as NOROLLBACK, and if the data is not there yet, - * which is the normal case, but for rollbacking we fill it - * ourselves */ - if (!(ts->flags & J_NOROLLBACK) && (op->pdata == NULL)) { - op->pdata = malloc(op->len); - if (op->pdata == NULL) - goto error; - - op->plen = op->len; - - rv = spread(ts->fs->fd, op->pdata, op->len, - op->offset); - if (rv < 0) - goto error; - if (rv < op->len) { - /* we are extending the file! */ - /* ftruncate(ts->fs->fd, op->offset + op->len); */ - op->plen = rv; - } + op->plen = op->len; + + rv = spread(ts->fs->fd, op->pdata, op->len, + op->offset); + if (rv < 0) + goto error; + if (rv < op->len) { + /* we are extending the file! */ + /* ftruncate(ts->fs->fd, op->offset + op->len); */ + op->plen = rv; } + } - /* save the operation's header */ - hdrp = hdr; + /* save the operation's header */ + ophdr.len = op->len; + ophdr.offset = op->offset; - memcpy(hdrp, (void *) &(op->len), 4); - hdrp += 4; + ophdr_hton(&ophdr); + rv = spwrite(jop->fd, &ophdr, sizeof(ophdr), jop->curpos); + if (rv != sizeof(ophdr)) + goto error; - memcpy(hdrp, (void *) &(op->plen), 4); - hdrp += 4; + fiu_exit_on("jio/commit/tf_ophdr"); - memcpy(hdrp, (void *) &(op->offset), 8); - hdrp += 8; + jop->curpos += sizeof(ophdr); + jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr, + sizeof(ophdr)); - rv = spwrite(jop->fd, hdr, J_DISKOPHEADSIZE, jop->curpos); - if (rv != J_DISKOPHEADSIZE) - goto error; + /* and save it to the disk */ + rv = spwrite(jop->fd, op->buf, op->len, jop->curpos); + if (rv != op->len) + goto error; - fiu_exit_on("jio/commit/tf_ophdr"); + jop->curpos += op->len; + jop->csum = checksum_buf(jop->csum, op->buf, op->len); - jop->curpos += J_DISKOPHEADSIZE; + return 0; - /* and save it to the disk */ - rv = spwrite(jop->fd, op->buf, op->len, jop->curpos); - if (rv != op->len) - goto error; +error: + return -1; +} + +/** Save the given transaction in the journal */ +int journal_save(struct journal_op *jop) +{ + ssize_t rv; + struct joper *op; + const struct jtrans *ts = jop->ts; + struct on_disk_ophdr ophdr; + struct on_disk_trailer trailer; - jop->curpos += op->len; + /* save each transacion in the file */ + for (op = ts->op; op != NULL; op = op->next) { + rv = save_joper(jop, ts, op); + if (rv != 0) + goto error; fiu_exit_on("jio/commit/tf_opdata"); } fiu_exit_on("jio/commit/tf_data"); - /* compute and save the checksum (curpos is always small, so there's - * no overflow possibility when we convert to size_t) */ - if (!checksum(jop->fd, jop->curpos, &csum)) + /* write the the empty ophdr to mark there are no more operations, and + * then the trailer */ + ophdr.len = 0; + ophdr.offset = 0; + + ophdr_hton(&ophdr); + rv = spwrite(jop->fd, &ophdr, sizeof(ophdr), jop->curpos); + if (rv != sizeof(ophdr)) goto error; - rv = spwrite(jop->fd, &csum, sizeof(uint32_t), jop->curpos); - if (rv != sizeof(uint32_t)) + jop->curpos += sizeof(ophdr); + jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr, + sizeof(ophdr)); + + trailer.checksum = jop->csum; + trailer.numops = ts->numops; + + trailer_hton(&trailer); + rv = spwrite(jop->fd, &trailer, sizeof(trailer), jop->curpos); + if (rv != sizeof(trailer)) goto error; - jop->curpos += sizeof(uint32_t); /* this is a simple but efficient optimization: instead of doing * everything O_SYNC, we sync at this point only, this way we avoid @@ -335,3 +435,108 @@ exit: } +/** Fill a transaction structure from a mmapped transaction file. Useful for + * checking purposes. + * @returns 0 on success, -1 if the file was broken, -2 if the checksums didn't + * match + */ +int fill_trans(unsigned char *map, off_t len, struct jtrans *ts) +{ + int rv, numops; + unsigned char *p; + struct joper *op, *tmp; + struct on_disk_hdr hdr; + struct on_disk_ophdr ophdr; + struct on_disk_trailer trailer; + + rv = -1; + + if (len < sizeof(hdr) + sizeof(ophdr) + sizeof(trailer)) + return -1; + + p = map; + + memcpy(&hdr, p, sizeof(hdr)); + p += sizeof(hdr); + + hdr_ntoh(&hdr); + if (hdr.ver != 1) + return -1; + + ts->id = hdr.trans_id; + ts->flags = hdr.flags; + + numops = 0; + for (;;) { + if (p + sizeof(ophdr) > map + len) + goto error; + + memcpy(&ophdr, p, sizeof(ophdr)); + p += sizeof(ophdr); + + ophdr_ntoh(&ophdr); + + if (ophdr.len == 0 && ophdr.offset == 0) { + /* This header marks the end of the operations */ + break; + } + + if (p + ophdr.len > map + len) + goto error; + + op = malloc(sizeof(struct joper)); + if (op == NULL) + goto error; + + op->len = ophdr.len; + op->offset = ophdr.offset; + + op->buf = (void *) p; + p += op->len; + + op->pdata = NULL; + + if (ts->op == NULL) { + ts->op = op; + op->prev = NULL; + op->next = NULL; + } else { + for (tmp = ts->op; tmp->next != NULL; tmp = tmp->next) + ; + tmp->next = op; + op->prev = tmp; + op->next = NULL; + } + + numops++; + } + + if (p + sizeof(trailer) > map + len) + goto error; + + memcpy(&trailer, p, sizeof(trailer)); + p += sizeof(trailer); + + trailer_ntoh(&trailer); + + if (trailer.numops != numops) + goto error; + + ts->numops = numops; + + if (checksum_buf(0, map, len - sizeof(trailer)) != trailer.checksum) { + rv = -2; + goto error; + } + + return 0; + +error: + while (ts->op != NULL) { + tmp = ts->op->next; + free(ts->op); + ts->op = tmp; + } + return rv; +} + diff --git a/libjio/journal.h b/libjio/journal.h index 5a4666c..765571b 100644 --- a/libjio/journal.h +++ b/libjio/journal.h @@ -2,6 +2,7 @@ #ifndef _JOURNAL_H #define _JOURNAL_H +#include <stdint.h> #include "libjio.h" @@ -10,6 +11,7 @@ struct journal_op { int fd; char *name; off_t curpos; + uint32_t csum; struct jtrans *ts; struct jfs *fs; }; @@ -20,5 +22,7 @@ struct journal_op *journal_new(struct jtrans *ts); int journal_save(struct journal_op *jop); int journal_free(struct journal_op *jop); +int fill_trans(unsigned char *map, off_t len, struct jtrans *ts); + #endif diff --git a/libjio/trans.c b/libjio/trans.c index c6fe458..4a4474e 100644 --- a/libjio/trans.c +++ b/libjio/trans.c @@ -87,6 +87,12 @@ int jtrans_add(struct jtrans *ts, const void *buf, size_t count, off_t offset) return -1; } + /* fail for 0 length transactions */ + if (count == 0) { + pthread_mutex_unlock(&(ts->lock)); + return 1; + } + if ((long long) ts->len + count > MAX_TSIZE) { pthread_mutex_unlock(&(ts->lock)); return -1; diff --git a/libjio/trans.h b/libjio/trans.h index 366585e..e7fa689 100644 --- a/libjio/trans.h +++ b/libjio/trans.h @@ -49,27 +49,5 @@ struct jlinger { }; -/* on-disk structures */ - -/* header (fixed length, defined below) */ -struct disk_header { - uint32_t id; /* id */ - uint32_t flags; /* flags about this transaction */ - uint32_t numops; /* number of operations */ -}; - -/* operation */ -struct disk_operation { - uint32_t len; /* data length */ - uint32_t plen; /* previous data length */ - uint64_t offset; /* offset relative to the BOF */ - char *prevdata; /* previous data for rollback */ -}; - -/* disk constants */ -#define J_DISKHEADSIZE 12 /* length of disk_header */ -#define J_DISKOPHEADSIZE 16 /* length of disk_operation header */ - - #endif diff --git a/tests/behaviour/t_corruption.py b/tests/behaviour/t_corruption.py index 76a50a5..ed1bedf 100644 --- a/tests/behaviour/t_corruption.py +++ b/tests/behaviour/t_corruption.py @@ -27,8 +27,11 @@ def test_c01(): n = run_with_tmp(f1) assert content(n) == '' tc = open(transpath(n, 1)).read() - # flip just one bit of the first byte - tc = chr((ord(tc[0]) & 0xFE) | (~ ord(tc[0]) & 0x1) & 0xFF) + tc[1:] + # flip just one bit in the middle byte + pos = len(tc) / 2 + tc = tc[:pos] + \ + chr((ord(tc[pos]) & 0xFE) | (~ ord(tc[pos]) & 0x1) & 0xFF) + \ + tc[pos + 1:] open(transpath(n, 1), 'w').write(tc) fsck_verify(n, corrupt = 1) assert content(n) == '' diff --git a/tests/behaviour/tf.py b/tests/behaviour/tf.py index 786f24c..20450ee 100644 --- a/tests/behaviour/tf.py +++ b/tests/behaviour/tf.py @@ -17,9 +17,10 @@ import struct import libjio -# Useful constants, must match libjio.h -DHS = 12 # disk header size -DOHS = 16 # disk op header size +# Useful constants, must match journal.h +DHS = 8 # disk header size +DOHS = 12 # disk op header size +DTS = 8 # disk trailer size def tmppath(): @@ -159,9 +160,11 @@ class attrdict (dict): class TransFile (object): def __init__(self, path = ''): + self.ver = 0 self.id = -1 self.flags = 0 self.numops = -1 + self.checksum = -1 self.ops = [] self.path = path if path: @@ -171,30 +174,39 @@ class TransFile (object): fd = open(self.path) # header - hdrfmt = "III" - self.id, self.flags, self.numops = struct.unpack(hdrfmt, + hdrfmt = "!HHI" + self.ver, self.flags, self.id = struct.unpack(hdrfmt, fd.read(struct.calcsize(hdrfmt))) # operations (header only) - opfmt = "IIQ" + opfmt = "!IQ" self.ops = [] - for i in range(self.numops): - tlen, plen, offset = struct.unpack(opfmt, + while True: + tlen, offset = struct.unpack(opfmt, fd.read(struct.calcsize(opfmt))) + if tlen == offset == 0: + break payload = fd.read(tlen) assert len(payload) == tlen - self.ops.append(attrdict(tlen = tlen, plen = plen, - offset = offset, payload = payload)) + self.ops.append(attrdict(tlen = tlen, offset = offset, + payload = payload)) + + # trailer + trailerfmt = "!II" + self.numops, self.checksum = struct.unpack(trailerfmt, + fd.read(struct.calcsize(trailerfmt))) def save(self): # the lack of integrity checking in this function is # intentional, so we can write broken transactions and see how # jfsck() copes with them fd = open(self.path, 'w') - fd.write(struct.pack("III", self.id, self.flags, self.numops)) + fd.write(struct.pack("!HHI", 1, self.flags, self.id)) for o in self.ops: - fd.write(struct.pack("IIQs", o.tlen, o.plen, o.offset, + fd.write(struct.pack("!IQs", o.tlen, o.offset, o.payload)) + fd.write(struct.pack("!IQ", 0, 0)) + fd.write(struct.pack("!II", self.numops, self.checksum)) def __repr__(self): return '<TransFile %s: id:%d f:%s n:%d ops:%s>' % \