author | Alberto Bertogli
<albertito@gmail.com> 2004-06-20 02:57:36 UTC |
committer | Alberto Bertogli
<albertito@gmail.com> 2007-07-15 13:02:29 UTC |
parent | edc7dc01863a248e52f8e7f30fb9134ed48d915b |
Makefile | +1 | -1 |
check.c | +260 | -0 |
common.c | +54 | -0 |
common.h | +2 | -0 |
libjio.h | +39 | -25 |
samples/jio3.c | +16 | -10 |
trans.c | +211 | -380 |
unix.c | +19 | -39 |
diff --git a/Makefile b/Makefile index 8d068e6..a032e05 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ include Make.conf # objects to build -OBJS = common.o trans.o unix.o ansi.o +OBJS = common.o trans.o check.o unix.o ansi.o # rules default: all diff --git a/check.c b/check.c new file mode 100644 index 0000000..86c9f46 --- /dev/null +++ b/check.c @@ -0,0 +1,260 @@ + +/* + * libjio - A library for Journaled I/O + * Alberto Bertogli (albertogli@telpin.com.ar) + * + * Recovery functions + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include <dirent.h> +#include <errno.h> +#include <sys/mman.h> + +#include "libjio.h" +#include "common.h" + + +/* fill a transaction structure from a mmapped transaction file */ +static int fill_trans(unsigned char *map, off_t len, struct jtrans *ts) +{ + int i; + unsigned char *p; + struct joper *op, *tmp; + + if (len < J_DISKHEADSIZE) + return 0; + + p = map; + + ts->id = *( (uint32_t *) p); + p += 4; + + ts->flags = *( (uint32_t *) p); + p += 4; + + ts->numops = *( (uint32_t *) p); + p += 4; + + for (i = 0; i < ts->numops; i++) { + if (len < (p - map) + J_DISKOPHEADSIZE) + goto error; + + op = malloc(sizeof(struct joper)); + if (op == NULL) + goto error; + + op->len = *( (uint32_t *) p); + p += 4; + + op->plen = *( (uint32_t *) p); + p += 4; + + op->offset = *( (uint64_t *) p); + p += 8; + + if (len < (p - map) + op->plen) + goto error; + + op->pdata = (void *) p; + p += op->plen; + + if (ts->op == NULL) { + ts->op = op; + op->prev = NULL; + op->next = NULL; + } else { + for(tmp = ts->op; tmp->next != NULL; tmp = tmp->next) + ; + tmp->next = op; + op->prev = tmp; + op->next = NULL; + } + } + + return 1; + +error: + while (ts->op != NULL) { + tmp = ts->op->next; + free(ts->op); + ts->op = tmp; + } + return 0; +} + +/* check the journal and rollback incomplete transactions */ +int jfsck(const char *name, struct jfsck_result *res) +{ + int fd, tfd, rv, i, maxtid; + char jdir[PATH_MAX], jlockfile[PATH_MAX], tname[PATH_MAX]; + struct stat sinfo; + struct jfs fs; + struct jtrans *curts; + DIR *dir; + struct dirent *dent; + void *map; + off_t filelen; + + + fd = open(name, O_RDWR | O_SYNC | O_LARGEFILE); + if (fd < 0) + return J_ENOENT; + + fs.fd = fd; + fs.name = (char *) name; + + if (!get_jdir(name, jdir)) + return J_ENOMEM; + rv = lstat(jdir, &sinfo); + if (rv < 0 || !S_ISDIR(sinfo.st_mode)) + return J_ENOJOURNAL; + + /* open the lock file, which is only used to complete the jfs + * structure */ + snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock"); + rv = open(jlockfile, O_RDWR | O_CREAT, 0600); + if (rv < 0) + return J_ENOJOURNAL; + fs.jfd = rv; + + dir = opendir(jdir); + if (dir == NULL) + return J_ENOJOURNAL; + + /* loop for each file in the journal directory to find out the greater + * transaction number */ + maxtid = 0; + for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) { + /* see if the file is named like a transaction, ignore + * otherwise; as transactions are named as numbers > 0, a + * simple atoi() is enough testing */ + rv = atoi(dent->d_name); + if (rv <= 0) + continue; + if (rv > maxtid) + maxtid = rv; + } + closedir(dir); + + /* rewrite the lockfile, writing the new maxtid on it, so that when we + * rollback a transaction it doesn't step over existing ones */ + rv = spwrite(fs.jfd, &maxtid, sizeof(maxtid), 0); + if (rv != sizeof(maxtid)) { + return J_ENOMEM; + } + + /* we loop all the way up to the max transaction id */ + for (i = 1; i <= maxtid; i++) { + curts = malloc(sizeof(struct jtrans)); + if (curts == NULL) + return J_ENOMEM; + + jtrans_init(&fs, curts); + curts->id = i; + + /* open the transaction file, using i as its name, so we are + * really looping in order (recovering transaction in a + * different order as they were applied means instant + * corruption) */ + if (!get_jtfile(name, i, tname)) + return J_ENOMEM; + tfd = open(tname, O_RDWR | O_SYNC | O_LARGEFILE, 0600); + if (tfd < 0) { + res->invalid++; + goto loop; + } + + /* try to lock the transaction file, if it's locked then it is + * currently being used so we skip it */ + rv = plockf(tfd, F_TLOCK, 0, 0); + if (rv == -1) { + res->in_progress++; + goto loop; + } + + filelen = lseek(tfd, 0, SEEK_END); + map = mmap(0, filelen, PROT_READ, MAP_SHARED, tfd, 0); + rv = fill_trans((unsigned char *) map, filelen, curts); + if (rv != 1) { + res->broken++; + goto loop; + } + + rv = jtrans_rollback(curts); + + munmap(map, filelen); + + if (rv < 0) { + res->apply_error++; + goto loop; + } + res->rollbacked++; + + +loop: + if (tfd >= 0) { + close(tfd); + tfd = -1; + } + + free(curts); + + res->total++; + } + + close(fs.fd); + close(fs.jfd); + + return 0; + +} + +/* remove all the files in the journal directory (if any) */ +int jfsck_cleanup(const char *name) +{ + char jdir[PATH_MAX], tfile[PATH_MAX*3]; + DIR *dir; + struct dirent *dent; + + if (!get_jdir(name, jdir)) + return 0; + + dir = opendir(jdir); + if (dir == NULL && errno == ENOENT) + /* it doesn't exist, so it's clean */ + return 1; + else if (dir == NULL) + return 0; + + for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) { + /* we only care about transactions (named as numbers > 0) and + * the lockfile (named "lock"); ignore everything else */ + if (strcmp(dent->d_name, "lock") && atoi(dent->d_name) <= 0) + continue; + + /* build the full path to the transaction file */ + memset(tfile, 0, PATH_MAX * 3); + strcat(tfile, jdir); + strcat(tfile, "/"); + strcat(tfile, dent->d_name); + + /* the full filename is too large */ + if (strlen(tfile) > PATH_MAX) + return 0; + + /* and remove it */ + unlink(tfile); + } + closedir(dir); + + return 1; +} + diff --git a/common.c b/common.c index e0abfd3..6e39520 100644 --- a/common.c +++ b/common.c @@ -9,6 +9,11 @@ #include <sys/types.h> #include <fcntl.h> #include <unistd.h> +#include <string.h> +#include <libgen.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> #include "common.h" @@ -90,3 +95,52 @@ ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset) return count; } +/* build the journal directory name out of the filename */ +int get_jdir(const char *filename, char *jdir) +{ + char *base, *baset; + char *dir, *dirt; + + baset = strdup(filename); + if (baset == NULL) + return 0; + base = basename(baset); + + dirt = strdup(filename); + if (dirt == NULL) + return 0; + dir = dirname(dirt); + + snprintf(jdir, PATH_MAX, "%s/.%s.jio", dir, base); + + free(baset); + free(dirt); + + return 1; +} + +/* build the filename of a given transaction */ +int get_jtfile(const char *filename, int tid, char *jtfile) +{ + char *base, *baset; + char *dir, *dirt; + + baset = strdup(filename); + if (baset == NULL) + return 0; + base = basename(baset); + + dirt = strdup(filename); + if (dirt == NULL) + return 0; + dir = dirname(dirt); + + snprintf(jtfile, PATH_MAX, "%s/.%s.jio/%d", dir, base, tid); + + free(baset); + free(dirt); + + return 1; +} + + diff --git a/common.h b/common.h index a048f91..adcfcfc 100644 --- a/common.h +++ b/common.h @@ -12,6 +12,8 @@ off_t plockf(int fd, int cmd, off_t offset, off_t len); ssize_t spread(int fd, void *buf, size_t count, off_t offset); ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset); +int get_jdir(const char *filename, char *jdir); +int get_jtfile(const char *filename, int tid, char *jtfile); #endif diff --git a/libjio.h b/libjio.h index 07d54c4..8204f46 100644 --- a/libjio.h +++ b/libjio.h @@ -19,64 +19,77 @@ extern "C" { /* logical structures */ + +/* the main file structure */ struct jfs { int fd; /* main file descriptor */ char *name; /* and its name */ int jfd; /* journal's lock file descriptor */ - int flags; /* journal mode options used in jopen() */ + int flags; /* journal flags */ pthread_mutex_t lock; /* a soft lock used in some operations */ }; +/* a single operation */ +struct joper { + int locked; /* is the region is locked? */ + off_t offset; /* operation's offset */ + size_t len; /* data length */ + void *buf; /* data */ + size_t plen; /* previous data length */ + void *pdata; /* previous data */ + struct joper *prev; + struct joper *next; +}; + +/* a transaction */ struct jtrans { struct jfs *fs; /* journal file structure to operate on */ char *name; /* name of the transaction file */ int id; /* transaction id */ int flags; /* misc flags */ - const void *buf; /* buffer */ - size_t len; /* buffer lenght */ - off_t offset; /* file offset to operate on */ - void *udata; /* user-supplied data */ - size_t ulen; /* udata lenght */ - void *pdata; /* previous data, for rollback */ - size_t plen; /* pdata lenght */ + unsigned int numops; /* quantity of operations in the list */ + pthread_mutex_t lock; /* used to modify the operation list */ + struct joper *op; /* list of operations */ }; struct jfsck_result { int total; /* total transactions files we looked at */ int invalid; /* invalid files in the journal directory */ int in_progress; /* transactions in progress */ - int broken_head; /* transactions broken (header missing) */ - int broken_body; /* transactions broken (body missing) */ - int load_error; /* errors loading the transaction */ + int broken; /* transactions broken */ int apply_error; /* errors applying the transaction */ int rollbacked; /* transactions that were rollbacked */ }; -/* on-disk structure */ -struct disk_trans { - - /* header (fixed lenght, defined below) */ + +/* on-disk structures */ + +/* header (fixed length, defined below) */ +struct disk_header { uint32_t id; /* id */ uint32_t flags; /* flags about this transaction */ - uint32_t len; /* data lenght */ - uint32_t plen; /* previous data lenght */ - uint32_t ulen; /* user-supplied information lenght */ + uint32_t numops; /* number of operations */ +}; + +/* operation */ +struct disk_operation { + uint32_t len; /* data length */ + uint32_t plen; /* previous data length */ uint64_t offset; /* offset relative to the BOF */ - - /* payload (variable lenght) */ - char *udata; /* user-supplied data */ char *prevdata; /* previous data for rollback */ }; -/* core operations */ +/* core functions */ int jopen(struct jfs *fs, const char *name, int flags, int mode, int jflags); void jtrans_init(struct jfs *fs, struct jtrans *ts); +int jtrans_add(struct jtrans *ts, const void *buf, size_t count, off_t offset); int jtrans_commit(struct jtrans *ts); int jtrans_rollback(struct jtrans *ts); void jtrans_free(struct jtrans *ts); int jclose(struct jfs *fs); + /* journal checker */ int jfsck(const char *name, struct jfsck_result *res); int jfsck_cleanup(const char *name); @@ -88,7 +101,7 @@ ssize_t jreadv(struct jfs *fs, struct iovec *vector, int count); ssize_t jwrite(struct jfs *fs, const void *buf, size_t count); ssize_t jpwrite(struct jfs *fs, const void *buf, size_t count, off_t offset); ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count); -int jtruncate(struct jfs *fs, off_t lenght); +int jtruncate(struct jfs *fs, off_t length); /* ANSI C stdio wrappers */ struct jfs *jfopen(const char *path, const char *mode); @@ -113,8 +126,9 @@ FILE *jfsopen(struct jfs *stream, const char *mode); #define J_COMMITED 1 /* mark a transaction as commited */ #define J_ROLLBACKED 2 /* mark a transaction as rollbacked */ -/* disk_trans constants */ -#define J_DISKTFIXSIZE 28 /* lenght of disk_trans' header */ +/* disk constants */ +#define J_DISKHEADSIZE 12 /* length of disk_header */ +#define J_DISKOPHEADSIZE 16 /* length of disk_operation header */ /* jfsck constants (return values) */ #define J_ESUCCESS 0 /* success - shouldn't be used */ diff --git a/samples/jio3.c b/samples/jio3.c index f239020..b5a9f5d 100644 --- a/samples/jio3.c +++ b/samples/jio3.c @@ -16,28 +16,34 @@ int main(int argc, char **argv) struct jfs fs; struct jtrans ts; - fd = jopen(&fs, "test1", O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0660, 0); + fd = jopen(&fs, "test1", O_RDWR | O_CREAT | O_SYNC, 0660, 0); if (fd < 0) perror("OPEN"); -#define str "ROLLBACKTEST!\n" - jtrans_init(&fs, &ts); - ts.offset = 0; - ts.buf = str; - ts.len = strlen(str); - +#define str1 "1ROLLBACKTEST1!\n" + jtrans_add(&ts, str1, strlen(str1), 0); + +#define str2 "2ROLLBACKTEST2!\n" + jtrans_add(&ts, str2, strlen(str2), strlen(str1)); + +#define str3 "3ROLLBACKTEST3!\n" + jtrans_add(&ts, str3, strlen(str3), strlen(str1) + strlen(str2)); + + rv = jtrans_commit(&ts); - if (rv != strlen(str)) + if (rv != strlen(str1) + strlen(str2) + strlen(str3)) perror("COMMIT"); + printf("COMMIT OK: %d\n", rv); + rv = jtrans_rollback(&ts); - if (rv != 0) + if (rv < 0) perror("ROLLBACK"); + printf("ROLLBACK OK: %d\n", rv); return 0; } - diff --git a/trans.c b/trans.c index 870c01a..1867a58 100644 --- a/trans.c +++ b/trans.c @@ -22,53 +22,9 @@ #include "common.h" -/* build the journal directory name out of the filename */ -static int get_jdir(const char *filename, char *jdir) -{ - char *base, *baset; - char *dir, *dirt; - - baset = strdup(filename); - if (baset == NULL) - return 0; - base = basename(baset); - - dirt = strdup(filename); - if (dirt == NULL) - return 0; - dir = dirname(dirt); - - snprintf(jdir, PATH_MAX, "%s/.%s.jio", dir, base); - - free(baset); - free(dirt); - - return 1; -} - -/* build the filename of a given transaction */ -static int get_jtfile(const char *filename, int tid, char *jtfile) -{ - char *base, *baset; - char *dir, *dirt; - - baset = strdup(filename); - if (baset == NULL) - return 0; - base = basename(baset); - - dirt = strdup(filename); - if (dirt == NULL) - return 0; - dir = dirname(dirt); - - snprintf(jtfile, PATH_MAX, "%s/.%s.jio/%d", dir, base, tid); - - free(baset); - free(dirt); - - return 1; -} +/* + * helper functions + */ /* gets a new transaction id */ static unsigned int get_tid(struct jfs *fs) @@ -158,53 +114,108 @@ void jtrans_init(struct jfs *fs, struct jtrans *ts) ts->fs = fs; ts->name = NULL; ts->id = 0; - ts->flags = 0; - ts->buf = NULL; - ts->len = 0; - ts->offset = 0; - ts->udata = NULL; - ts->ulen = 0; - ts->pdata = NULL; - ts->plen = 0; + ts->flags = fs->flags; + ts->op = NULL; + ts->numops = 0; + pthread_mutex_init( &(ts->lock), NULL); } + /* free the contents of a transaction structure */ void jtrans_free(struct jtrans *ts) { - /* NOTE: we only really free the name and previous data, which are the - * things _we_ allocate; the user data is caller stuff */ + struct joper *tmpop; + ts->fs = NULL; + if (ts->name) free(ts->name); - if (ts->pdata) - free(ts->pdata); - /* don't free ts itself, it's very common to allocate it in the stack, - * so let the caller take care of it; and, after all, he was the one - * doing the alloc in the first place */ + while (ts->op != NULL) { + tmpop = ts->op->next; + + if (ts->op->buf) + free(ts->op->buf); + if (ts->op->pdata) + free(ts->op->pdata); + free(ts->op); + + ts->op = tmpop; + } +} + + +int jtrans_add(struct jtrans *ts, const void *buf, size_t count, off_t offset) +{ + struct joper *jop, *tmpop; + + /* find the last operation in the transaction and create a new one at + * the end */ + pthread_mutex_lock(&(ts->lock)); + if (ts->op == NULL) { + ts->op = malloc(sizeof(struct joper)); + jop = ts->op; + jop->prev = NULL; + } else { + for (tmpop = ts->op; tmpop->next != NULL; tmpop = tmpop->next) + ; + tmpop->next = malloc(sizeof(struct joper)); + tmpop->next->prev = tmpop; + jop = tmpop->next; + } + pthread_mutex_unlock(&(ts->lock)); + + if (jop == NULL) { + /* malloc() failed */ + return 0; + } + + jop->buf = malloc(count); + if (jop->buf == NULL) { + free(jop); + return 0; + } + + /* we copy the buffer because then the caller can reuse it */ + memcpy(jop->buf, buf, count); + jop->len = count; + jop->offset = offset; + jop->next = NULL; + jop->plen = 0; + jop->pdata = NULL; + jop->locked = 0; + + ts->numops++; + + return 1; } /* commit a transaction */ int jtrans_commit(struct jtrans *ts) { - int id, fd, rv, t; + int id, rv, fd = -1; char *name; unsigned char *buf_init, *bufp; + struct joper *op; + off_t curpos = 0; + size_t written = 0; + + pthread_mutex_lock(&(ts->lock)); name = (char *) malloc(PATH_MAX); if (name == NULL) - return -1; + goto exit; id = get_tid(ts->fs); if (id == 0) - return -1; + goto exit; /* open the transaction file */ if (!get_jtfile(ts->fs->name, id, name)) - return -1; + goto exit; fd = open(name, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0600); if (fd < 0) - return -1; + goto exit; /* and lock it */ plockf(fd, F_LOCK, 0, 0); @@ -212,73 +223,102 @@ int jtrans_commit(struct jtrans *ts) ts->id = id; ts->name = name; - /* lock the file region to work on */ - if (!(ts->fs->flags & J_NOLOCK)) - plockf(ts->fs->fd, F_LOCK, ts->offset, ts->len); - - /* read the current content and fill in the transaction structure */ - ts->pdata = malloc(ts->len); - if (ts->pdata == NULL) - goto exit; - - ts->plen = ts->len; - - rv = spread(ts->fs->fd, ts->pdata, ts->len, ts->offset); - if (rv < 0) - goto exit; - if (rv < ts->len) { - /* we are extending the file! use ftruncate() to do it */ - ftruncate(ts->fs->fd, ts->offset + ts->len); - ts->plen = rv; - } - - /* now save the transaction to the file, static data first */ - - buf_init = malloc(J_DISKTFIXSIZE); + /* save the header */ + buf_init = malloc(J_DISKHEADSIZE); if (buf_init == NULL) - return -1; + goto exit; bufp = buf_init; - /* the sizes are put explicitly (instead of using sizeof()) because - * they're really fixed and defined in the on-disk format */ memcpy(bufp, (void *) &(ts->id), 4); bufp += 4; memcpy(bufp, (void *) &(ts->flags), 4); bufp += 4; - memcpy(bufp, (void *) &(ts->len), 4); + memcpy(bufp, (void *) &(ts->numops), 4); bufp += 4; - memcpy(bufp, (void *) &(ts->plen), 4); - bufp += 4; + rv = spwrite(fd, buf_init, J_DISKHEADSIZE, 0); + if (rv != J_DISKHEADSIZE) { + free(buf_init); + goto exit; + } - memcpy(bufp, (void *) &(ts->ulen), 4); - bufp += 4; + free(buf_init); - memcpy(bufp, (void *) &(ts->offset), 8); - bufp += 8; + curpos = J_DISKHEADSIZE; + + /* first of all lock all the regions we're going to work with; + * otherwise there could be another transaction trying to write the + * same spots and we could end up with interleaved writes, that could + * break atomicity warantees if we need to rollback */ + if (!(ts->flags & J_NOLOCK)) { + for (op = ts->op; op != NULL; op = op->next) { + rv = plockf(ts->fs->fd, F_LOCK, op->offset, op->len); + if (rv == -1) + /* note it can fail with EDEADLK */ + goto exit; + op->locked = 1; + } + } - rv = spwrite(fd, buf_init, J_DISKTFIXSIZE, 0); - if (rv != J_DISKTFIXSIZE) - goto exit; + /* save each transacion in the file */ + for (op = ts->op; op != NULL; op = op->next) { + /* read the current content only if it's not there yet, which + * is the normal case, but for rollbacking we fill it + * ourselves */ + if (op->pdata == NULL) { + op->pdata = malloc(op->len); + if (op->pdata == NULL) + goto exit; + + op->plen = op->len; + + rv = spread(ts->fs->fd, op->pdata, op->len, + op->offset); + if (rv < 0) + goto exit; + if (rv < op->len) { + /* we are extending the file! */ + /* ftruncate(ts->fs->fd, op->offset + op->len); */ + op->plen = rv; + } + } - free(buf_init); + /* save the operation's header */ + buf_init = malloc(J_DISKOPHEADSIZE); + if (buf_init == NULL) + goto exit; + bufp = buf_init; - /* and now the variable data */ + memcpy(bufp, (void *) &(op->len), 4); + bufp += 4; - if (ts->udata) { - rv = spwrite(fd, ts->udata, ts->ulen, J_DISKTFIXSIZE); - if (rv != ts->ulen) + memcpy(bufp, (void *) &(op->plen), 4); + bufp += 4; + + memcpy(bufp, (void *) &(op->offset), 8); + bufp += 8; + + rv = spwrite(fd, buf_init, J_DISKOPHEADSIZE, curpos); + if (rv != J_DISKOPHEADSIZE) { + free(buf_init); goto exit; - } + } - t = J_DISKTFIXSIZE + ts->ulen; - rv = spwrite(fd, ts->pdata, ts->plen, t); - if (rv != ts->plen) - goto exit; + free(buf_init); + + curpos += J_DISKOPHEADSIZE; + + /* and save it to the disk */ + rv = spwrite(fd, op->pdata, op->plen, curpos); + if (rv != op->plen) + goto exit; + + curpos += op->plen; + } /* this is a simple but efficient optimization: instead of doing * everything O_SYNC, we sync at this point only, this way we avoid @@ -288,9 +328,18 @@ int jtrans_commit(struct jtrans *ts) fsync(fd); /* now that we have a safe transaction file, let's apply it */ - rv = spwrite(ts->fs->fd, ts->buf, ts->len, ts->offset); - if (rv != ts->len) - goto exit; + written = 0; + for (op = ts->op; op != NULL; op = op->next) { + rv = spwrite(ts->fs->fd, op->buf, op->len, op->offset); + + plockf(ts->fs->fd, F_ULOCK, op->offset, op->len); + op->locked = 0; + + if (rv != op->len) + goto exit; + + written += rv; + } /* the transaction has been applied, so we cleanup and remove it from * the disk */ @@ -303,13 +352,16 @@ int jtrans_commit(struct jtrans *ts) exit: close(fd); + for (op = ts->op; op != NULL; op = op->next) { + if (op->locked) + plockf(ts->fs->fd, F_ULOCK, op->offset, op->len); + } - if (!(ts->fs->flags & J_NOLOCK)) - plockf(ts->fs->fd, F_ULOCK, ts->offset, ts->len); + pthread_mutex_unlock(&(ts->lock)); - /* return the lenght only if it was properly commited */ + /* return the length only if it was properly commited */ if (ts->flags & J_COMMITED) - return ts->len; + return written; else return -1; @@ -318,41 +370,61 @@ exit: /* rollback a transaction */ int jtrans_rollback(struct jtrans *ts) { - int rv; struct jtrans newts; + struct joper *op, *curop, *lop; - /* copy the old transaction to the new one */ - jtrans_init(ts->fs, &newts); + /* FIXME: this looks like a mess! */ + + if (ts->op == NULL) { + /* we're trying to rollback an empty transaction */ + return 0; + } + jtrans_init(ts->fs, &newts); newts.flags = ts->flags; - newts.offset = ts->offset; - newts.buf = ts->pdata; - newts.len = ts->plen; + /* find the last operation */ + for (op = ts->op; op->next != NULL; op = op->next) + ; - if (ts->plen < ts->len) { - /* we extended the data in the previous transaction, so we + /* and traverse the list backwards */ + for ( ; op != NULL; op = op->prev) { + /* if we extended the data in the previous transaction, we * should truncate it back */ /* DANGEROUS: this is one of the main reasons why rollbacking * is dangerous and should only be done with extreme caution: * if for some reason, after the previous transacton, we have * extended the file further, this will cut it back to what it * was; read the docs for more detail */ - ftruncate(ts->fs->fd, ts->offset + ts->plen); - + if (op->plen < op->len) + ftruncate(ts->fs->fd, op->offset + op->plen); + + /* manually add the operation to the new transaction */ + curop = malloc(sizeof(struct joper)); + curop->offset = op->offset; + curop->len = op->plen; + curop->buf = op->pdata; + curop->plen = op->plen; + curop->pdata = op->pdata; + curop->locked = 0; + + /* add the new transaction to the list */ + if (newts.op == NULL) { + newts.op = curop; + curop->prev = NULL; + curop->next = NULL; + } else { + for (lop = newts.op; lop->next != NULL; lop = lop->next) + ; + lop->next = curop; + curop->prev = lop; + curop->next = NULL; + } } - newts.pdata = ts->pdata; - newts.plen = ts->plen; - - newts.udata = ts->udata; - newts.ulen = ts->ulen; - - rv = jtrans_commit(&newts); - return rv; + return jtrans_commit(&newts); } - /* * basic operations */ @@ -392,7 +464,6 @@ int jopen(struct jfs *fs, const char *name, int flags, int mode, int jflags) * make it easier for them by taking care of it here. If performance * is essential, the jpread/jpwrite functions should be used, just as * real life. */ - pthread_mutex_init( &(fs->lock), NULL); if (!get_jdir(name, jdir)) @@ -440,243 +511,3 @@ int jclose(struct jfs *fs) return 0; } - -/* - * journal recovery - */ - -/* check the journal and replay the incomplete transactions */ -int jfsck(const char *name, struct jfsck_result *res) -{ - int fd, tfd, rv, i, maxtid; - char jdir[PATH_MAX], jlockfile[PATH_MAX], tname[PATH_MAX]; - unsigned char *buf = NULL; - struct stat sinfo; - struct jfs fs; - struct jtrans *curts; - DIR *dir; - struct dirent *dent; - off_t offset; - - fd = open(name, O_RDWR | O_SYNC | O_LARGEFILE); - if (fd < 0) - return J_ENOENT; - - fs.fd = fd; - fs.name = (char *) name; - - if (!get_jdir(name, jdir)) - return J_ENOMEM; - rv = lstat(jdir, &sinfo); - if (rv < 0 || !S_ISDIR(sinfo.st_mode)) - return J_ENOJOURNAL; - - /* open the lock file, which is only used to complete the jfs - * structure */ - snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock"); - rv = open(jlockfile, O_RDWR | O_CREAT, 0600); - if (rv < 0) - return J_ENOJOURNAL; - fs.jfd = rv; - - dir = opendir(jdir); - if (dir == NULL) - return J_ENOJOURNAL; - - /* loop for each file in the journal directory to find out the greater - * transaction number */ - maxtid = 0; - for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) { - /* see if the file is named like a transaction, ignore - * otherwise; as transactions are named as numbers > 0, a - * simple atoi() is enough testing */ - rv = atoi(dent->d_name); - if (rv <= 0) - continue; - if (rv > maxtid) - maxtid = rv; - } - closedir(dir); - - /* rewrite the lockfile, writing the new maxtid on it, so that when we - * rollback a transaction it doesn't step over existing ones */ - rv = spwrite(fs.jfd, &maxtid, sizeof(maxtid), 0); - if (rv != sizeof(maxtid)) { - return J_ENOMEM; - } - - /* we loop all the way up to the max transaction id */ - for (i = 1; i <= maxtid; i++) { - curts = malloc(sizeof(struct jtrans)); - if (curts == NULL) - return J_ENOMEM; - - jtrans_init(&fs, curts); - curts->id = i; - - /* open the transaction file, using i as its name, so we are - * really looping in order (recovering transaction in a - * different order as they were applied means instant - * corruption) */ - if (!get_jtfile(name, i, tname)) - return J_ENOMEM; - tfd = open(tname, O_RDWR | O_SYNC | O_LARGEFILE, 0600); - if (tfd < 0) { - res->invalid++; - goto loop; - } - - /* try to lock the transaction file, if it's locked then it is - * currently being used so we skip it */ - rv = plockf(tfd, F_TLOCK, 0, 0); - if (rv == -1) { - res->in_progress++; - goto loop; - } - - /* load from disk, header first */ - buf = (unsigned char *) malloc(J_DISKTFIXSIZE); - if (buf == NULL) { - res->load_error++; - goto loop; - } - - rv = read(tfd, buf, J_DISKTFIXSIZE); - if (rv != J_DISKTFIXSIZE) { - res->broken_head++; - free(buf); - goto loop; - } - - curts->flags = *( (uint32_t *) (buf + 4)); - curts->len = *( (uint32_t *) (buf + 8)); - curts->plen = *( (uint32_t *) (buf + 12)); - curts->ulen = *( (uint32_t *) (buf + 16)); - curts->offset = *( (uint64_t *) (buf + 20)); - - free(buf); - - /* if we got here, the transaction was not applied, so we - * check if the transaction file is complete (we only need to - * rollback it) or not (so we can't do anything but ignore it) - */ - - lstat(tname, &sinfo); - rv = J_DISKTFIXSIZE + curts->ulen + curts->plen; - if (sinfo.st_size != rv) { - /* the transaction file is incomplete, some of the - * body is missing */ - res->broken_body++; - goto loop; - } - - /* we have a complete transaction file which commit was not - * successful, so we read it to complete the transaction - * structure and rollback it */ - curts->pdata = malloc(curts->plen); - if (curts->pdata == NULL) { - res->load_error++; - goto loop; - } - - curts->udata = malloc(curts->ulen); - if (curts->udata == NULL) { - res->load_error++; - goto loop; - } - - /* user data */ - offset = J_DISKTFIXSIZE; - rv = spread(tfd, curts->udata, curts->ulen, offset); - if (rv != curts->ulen) { - res->load_error++; - goto loop; - } - - /* previous data */ - offset = J_DISKTFIXSIZE + curts->ulen; - rv = spread(tfd, curts->pdata, curts->plen, offset); - if (rv != curts->plen) { - res->load_error++; - goto loop; - } - - /* rollback */ - rv = jtrans_rollback(curts); - if (rv < 0) { - res->apply_error++; - goto loop; - } - res->rollbacked++; - - /* free the data we just allocated */ - if (curts->plen) { - free(curts->pdata); - curts->pdata = NULL; - } - if (curts->ulen) { - free(curts->udata); - curts->udata = NULL; - } - if (curts->name) { - free(curts->name); - curts->name = NULL; - } - -loop: - if (tfd > 0) - close(tfd); - - free(curts); - - res->total++; - } - - close(fs.fd); - close(fs.jfd); - - return 0; - -} - -/* remove all the files in the journal directory (if any) */ -int jfsck_cleanup(const char *name) -{ - char jdir[PATH_MAX], tfile[PATH_MAX*3]; - DIR *dir; - struct dirent *dent; - - if (!get_jdir(name, jdir)) - return 0; - - dir = opendir(jdir); - if (dir == NULL && errno == ENOENT) - /* it doesn't exist, so it's clean */ - return 1; - else if (dir == NULL) - return 0; - - for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) { - /* we only care about transactions (named as numbers > 0) and - * the lockfile (named "lock"); ignore everything else */ - if (strcmp(dent->d_name, "lock") && atoi(dent->d_name) <= 0) - continue; - - /* build the full path to the transaction file */ - memset(tfile, 0, PATH_MAX * 3); - strcat(tfile, jdir); - strcat(tfile, "/"); - strcat(tfile, dent->d_name); - - /* the full filename is too large */ - if (strlen(tfile) > PATH_MAX) - return 0; - - /* and remove it */ - unlink(tfile); - } - closedir(dir); - - return 1; -} - diff --git a/unix.c b/unix.c index 2d82655..bce8647 100644 --- a/unix.c +++ b/unix.c @@ -88,10 +88,7 @@ ssize_t jwrite(struct jfs *fs, const void *buf, size_t count) jtrans_init(fs, &ts); pos = lseek(fs->fd, 0, SEEK_CUR); - ts.offset = pos; - - ts.buf = buf; - ts.len = count; + jtrans_add(&ts, buf, count, pos); rv = jtrans_commit(&ts); @@ -114,10 +111,7 @@ ssize_t jpwrite(struct jfs *fs, const void *buf, size_t count, off_t offset) struct jtrans ts; jtrans_init(fs, &ts); - ts.offset = offset; - - ts.buf = buf; - ts.len = count; + jtrans_add(&ts, buf, count, offset); rv = jtrans_commit(&ts); @@ -129,43 +123,29 @@ ssize_t jpwrite(struct jfs *fs, const void *buf, size_t count, off_t offset) /* writev wrapper */ ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count) { - int rv, i, bufp; - ssize_t sum; - char *buf; - off_t pos; + int rv, i; + size_t sum; + off_t ipos, t; struct jtrans ts; - sum = 0; - for (i = 0; i < count; i++) - sum += vector[i].iov_len; - - /* unify the buffers into one big chunk to commit */ - /* FIXME: can't we do this more efficient? It ruins the whole purpose - * of using writev()! maybe we should do one transaction per vector */ - buf = malloc(sum); - if (buf == NULL) - return -1; - bufp = 0; - - for (i = 0; i < count; i++) { - memcpy(buf + bufp, vector[i].iov_base, vector[i].iov_len); - bufp += vector[i].iov_len; - } - pthread_mutex_lock(&(fs->lock)); jtrans_init(fs, &ts); - pos = lseek(fs->fd, 0, SEEK_CUR); - ts.offset = pos; + ipos = lseek(fs->fd, 0, SEEK_CUR); + t = ipos; - ts.buf = buf; - ts.len = sum; + sum = 0; + for (i = 0; i < count; i++) { + jtrans_add(&ts, vector[i].iov_base, vector[i].iov_len, t); + sum += vector[i].iov_len; + t += vector[i].iov_len; + } rv = jtrans_commit(&ts); if (rv >= 0) { /* if success, advance the file pointer */ - lseek(fs->fd, count, SEEK_CUR); + lseek(fs->fd, sum, SEEK_CUR); } pthread_mutex_unlock(&(fs->lock)); @@ -177,14 +157,14 @@ ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count) } /* truncate a file - be careful with this */ -int jtruncate(struct jfs *fs, off_t lenght) +int jtruncate(struct jfs *fs, off_t length) { int rv; - /* lock from lenght to the end of file */ - plockf(fs->fd, F_LOCK, lenght, 0); - rv = ftruncate(fs->fd, lenght); - plockf(fs->fd, F_ULOCK, lenght, 0); + /* lock from length to the end of file */ + plockf(fs->fd, F_LOCK, length, 0); + rv = ftruncate(fs->fd, length); + plockf(fs->fd, F_ULOCK, length, 0); return rv; }