

This is a big rework of the commit procedure, which make transactions really
useful by bundling operations up and apply all of them atomically, which is
what one would expect.

So now you create a transaction, add some operations to it (writes to
different offsets) and then commit it as a whole.

It's a big patch, it could have been splitted in more parts (ie. the move of
jfsck() is pretty much orthogonal to this) but as I'm changing code all over
the place this is easier to work with.



---

 cur-root/Makefile       |    2 
 cur-root/check.c        |  260 +++++++++++++++++++++
 cur-root/common.c       |   54 ++++
 cur-root/common.h       |    2 
 cur-root/libjio.h       |   64 +++--
 cur-root/samples/jio3.c |   26 +-
 cur-root/trans.c        |  591 +++++++++++++++++-------------------------------
 cur-root/unix.c         |   58 +---
 8 files changed, 602 insertions(+), 455 deletions(-)

diff -puN trans.c~new_commit trans.c
--- cur/trans.c~new_commit	2004-06-14 12:10:06.000000000 -0300
+++ cur-root/trans.c	2004-06-19 23:57:20.814099768 -0300
@@ -22,53 +22,9 @@
 #include "common.h"
 
 
-/* build the journal directory name out of the filename */
-static int get_jdir(const char *filename, char *jdir)
-{
-	char *base, *baset;
-	char *dir, *dirt;
-
-	baset = strdup(filename);
-	if (baset == NULL)
-		return 0;
-	base = basename(baset);
-
-	dirt = strdup(filename);
-	if (dirt == NULL)
-		return 0;
-	dir = dirname(dirt);
-
-	snprintf(jdir, PATH_MAX, "%s/.%s.jio", dir, base);
-
-	free(baset);
-	free(dirt);
-
-	return 1;
-}
-
-/* build the filename of a given transaction */
-static int get_jtfile(const char *filename, int tid, char *jtfile)
-{
-	char *base, *baset;
-	char *dir, *dirt;
-
-	baset = strdup(filename);
-	if (baset == NULL)
-		return 0;
-	base = basename(baset);
-
-	dirt = strdup(filename);
-	if (dirt == NULL)
-		return 0;
-	dir = dirname(dirt);
-
-	snprintf(jtfile, PATH_MAX, "%s/.%s.jio/%d", dir, base, tid);
-
-	free(baset);
-	free(dirt);
-
-	return 1;
-}
+/*
+ * helper functions
+ */
 
 /* gets a new transaction id */
 static unsigned int get_tid(struct jfs *fs)
@@ -158,53 +114,108 @@ void jtrans_init(struct jfs *fs, struct 
 	ts->fs = fs;
 	ts->name = NULL;
 	ts->id = 0;
-	ts->flags = 0;
-	ts->buf = NULL;
-	ts->len = 0;
-	ts->offset = 0;
-	ts->udata = NULL;
-	ts->ulen = 0;
-	ts->pdata = NULL;
-	ts->plen = 0;
+	ts->flags = fs->flags;
+	ts->op = NULL;
+	ts->numops = 0;
+	pthread_mutex_init( &(ts->lock), NULL);
 }
 
+
 /* free the contents of a transaction structure */
 void jtrans_free(struct jtrans *ts)
 {
-	/* NOTE: we only really free the name and previous data, which are the
-	 * things _we_ allocate; the user data is caller stuff */
+	struct joper *tmpop;
+
 	ts->fs = NULL;
+
 	if (ts->name)
 		free(ts->name);
-	if (ts->pdata)
-		free(ts->pdata);
 
-	/* don't free ts itself, it's very common to allocate it in the stack,
-	 * so let the caller take care of it; and, after all, he was the one
-	 * doing the alloc in the first place */
+	while (ts->op != NULL) {
+		tmpop = ts->op->next;
+
+		if (ts->op->buf)
+			free(ts->op->buf);
+		if (ts->op->pdata)
+			free(ts->op->pdata);
+		free(ts->op);
+
+		ts->op = tmpop;
+	}
+}
+
+
+int jtrans_add(struct jtrans *ts, const void *buf, size_t count, off_t offset)
+{
+	struct joper *jop, *tmpop;
+
+	/* find the last operation in the transaction and create a new one at
+	 * the end */
+	pthread_mutex_lock(&(ts->lock));
+	if (ts->op == NULL) {
+		ts->op = malloc(sizeof(struct joper));
+		jop = ts->op;
+		jop->prev = NULL;
+	} else {
+		for (tmpop = ts->op; tmpop->next != NULL; tmpop = tmpop->next)
+			;
+		tmpop->next = malloc(sizeof(struct joper));
+		tmpop->next->prev = tmpop;
+		jop = tmpop->next;
+	}
+	pthread_mutex_unlock(&(ts->lock));
+
+	if (jop == NULL) {
+		/* malloc() failed */
+		return 0;
+	}
+
+	jop->buf = malloc(count);
+	if (jop->buf == NULL) {
+		free(jop);
+		return 0;
+	}
+
+	/* we copy the buffer because then the caller can reuse it */
+	memcpy(jop->buf, buf, count);
+	jop->len = count;
+	jop->offset = offset;
+	jop->next = NULL;
+	jop->plen = 0;
+	jop->pdata = NULL;
+	jop->locked = 0;
+
+	ts->numops++;
+
+	return 1;
 }
 
 /* commit a transaction */
 int jtrans_commit(struct jtrans *ts)
 {
-	int id, fd, rv, t;
+	int id, rv, fd = -1;
 	char *name;
 	unsigned char *buf_init, *bufp;
+	struct joper *op;
+	off_t curpos = 0;
+	size_t written = 0;
+
+	pthread_mutex_lock(&(ts->lock));
 
 	name = (char *) malloc(PATH_MAX);
 	if (name == NULL)
-		return -1;
+		goto exit;
 
 	id = get_tid(ts->fs);
 	if (id == 0)
-		return -1;
+		goto exit;
 
 	/* open the transaction file */
 	if (!get_jtfile(ts->fs->name, id, name))
-		return -1;
+		goto exit;
 	fd = open(name, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0600);
 	if (fd < 0)
-		return -1;
+		goto exit;
 
 	/* and lock it */
 	plockf(fd, F_LOCK, 0, 0);
@@ -212,73 +223,102 @@ int jtrans_commit(struct jtrans *ts)
 	ts->id = id;
 	ts->name = name;
 
-	/* lock the file region to work on */
-	if (!(ts->fs->flags & J_NOLOCK))
-		plockf(ts->fs->fd, F_LOCK, ts->offset, ts->len);
-
-	/* read the current content and fill in the transaction structure */
-	ts->pdata = malloc(ts->len);
-	if (ts->pdata == NULL)
-		goto exit;
-
-	ts->plen = ts->len;
-
-	rv = spread(ts->fs->fd, ts->pdata, ts->len, ts->offset);
-	if (rv < 0)
-		goto exit;
-	if (rv < ts->len) {
-		/* we are extending the file! use ftruncate() to do it */
-		ftruncate(ts->fs->fd, ts->offset + ts->len);
-		ts->plen = rv;
-	}
-
-	/* now save the transaction to the file, static data first */
-
-	buf_init = malloc(J_DISKTFIXSIZE);
+	/* save the header */
+	buf_init = malloc(J_DISKHEADSIZE);
 	if (buf_init == NULL)
-		return -1;
+		goto exit;
 
 	bufp = buf_init;
 
-	/* the sizes are put explicitly (instead of using sizeof()) because
-	 * they're really fixed and defined in the on-disk format */
 	memcpy(bufp, (void *) &(ts->id), 4);
 	bufp += 4;
 
 	memcpy(bufp, (void *) &(ts->flags), 4);
 	bufp += 4;
 
-	memcpy(bufp, (void *) &(ts->len), 4);
+	memcpy(bufp, (void *) &(ts->numops), 4);
 	bufp += 4;
 
-	memcpy(bufp, (void *) &(ts->plen), 4);
-	bufp += 4;
+	rv = spwrite(fd, buf_init, J_DISKHEADSIZE, 0);
+	if (rv != J_DISKHEADSIZE) {
+		free(buf_init);
+		goto exit;
+	}
 
-	memcpy(bufp, (void *) &(ts->ulen), 4);
-	bufp += 4;
+	free(buf_init);
 
-	memcpy(bufp, (void *) &(ts->offset), 8);
-	bufp += 8;
+	curpos = J_DISKHEADSIZE;
 
-	rv = spwrite(fd, buf_init, J_DISKTFIXSIZE, 0);
-	if (rv != J_DISKTFIXSIZE)
-		goto exit;
+	/* first of all lock all the regions we're going to work with;
+	 * otherwise there could be another transaction trying to write the
+	 * same spots and we could end up with interleaved writes, that could
+	 * break atomicity warantees if we need to rollback */
+	if (!(ts->flags & J_NOLOCK)) {
+		for (op = ts->op; op != NULL; op = op->next) {
+			rv = plockf(ts->fs->fd, F_LOCK, op->offset, op->len);
+			if (rv == -1)
+				/* note it can fail with EDEADLK */
+				goto exit;
+			op->locked = 1;
+		}
+	}
 
-	free(buf_init);
+	/* save each transacion in the file */
+	for (op = ts->op; op != NULL; op = op->next) {
+		/* read the current content only if it's not there yet, which
+		 * is the normal case, but for rollbacking we fill it
+		 * ourselves */
+		if (op->pdata == NULL) {
+			op->pdata = malloc(op->len);
+			if (op->pdata == NULL)
+				goto exit;
+
+			op->plen = op->len;
+
+			rv = spread(ts->fs->fd, op->pdata, op->len,
+					op->offset);
+			if (rv < 0)
+				goto exit;
+			if (rv < op->len) {
+				/* we are extending the file! */
+				/* ftruncate(ts->fs->fd, op->offset + op->len); */
+				op->plen = rv;
+			}
+		}
+
+		/* save the operation's header */
+		buf_init = malloc(J_DISKOPHEADSIZE);
+		if (buf_init == NULL)
+			goto exit;
+
+		bufp = buf_init;
+
+		memcpy(bufp, (void *) &(op->len), 4);
+		bufp += 4;
 
+		memcpy(bufp, (void *) &(op->plen), 4);
+		bufp += 4;
 
-	/* and now the variable data */
+		memcpy(bufp, (void *) &(op->offset), 8);
+		bufp += 8;
 
-	if (ts->udata) {
-		rv = spwrite(fd, ts->udata, ts->ulen, J_DISKTFIXSIZE);
-		if (rv != ts->ulen)
+		rv = spwrite(fd, buf_init, J_DISKOPHEADSIZE, curpos);
+		if (rv != J_DISKOPHEADSIZE) {
+			free(buf_init);
 			goto exit;
-	}
+		}
 
-	t = J_DISKTFIXSIZE + ts->ulen;
-	rv = spwrite(fd, ts->pdata, ts->plen, t);
-	if (rv != ts->plen)
-		goto exit;
+		free(buf_init);
+
+		curpos += J_DISKOPHEADSIZE;
+
+		/* and save it to the disk */
+		rv = spwrite(fd, op->pdata, op->plen, curpos);
+		if (rv != op->plen)
+			goto exit;
+
+		curpos += op->plen;
+	}
 
 	/* this is a simple but efficient optimization: instead of doing
 	 * everything O_SYNC, we sync at this point only, this way we avoid
@@ -288,9 +328,18 @@ int jtrans_commit(struct jtrans *ts)
 	fsync(fd);
 
 	/* now that we have a safe transaction file, let's apply it */
-	rv = spwrite(ts->fs->fd, ts->buf, ts->len, ts->offset);
-	if (rv != ts->len)
-		goto exit;
+	written = 0;
+	for (op = ts->op; op != NULL; op = op->next) {
+		rv = spwrite(ts->fs->fd, op->buf, op->len, op->offset);
+
+		plockf(ts->fs->fd, F_ULOCK, op->offset, op->len);
+		op->locked = 0;
+
+		if (rv != op->len)
+			goto exit;
+
+		written += rv;
+	}
 
 	/* the transaction has been applied, so we cleanup and remove it from
 	 * the disk */
@@ -303,13 +352,16 @@ int jtrans_commit(struct jtrans *ts)
 
 exit:
 	close(fd);
+	for (op = ts->op; op != NULL; op = op->next) {
+		if (op->locked)
+			plockf(ts->fs->fd, F_ULOCK, op->offset, op->len);
+	}
 
-	if (!(ts->fs->flags & J_NOLOCK))
-		plockf(ts->fs->fd, F_ULOCK, ts->offset, ts->len);
+	pthread_mutex_unlock(&(ts->lock));
 
-	/* return the lenght only if it was properly commited */
+	/* return the length only if it was properly commited */
 	if (ts->flags & J_COMMITED)
-		return ts->len;
+		return written;
 	else
 		return -1;
 
@@ -318,41 +370,61 @@ exit:
 /* rollback a transaction */
 int jtrans_rollback(struct jtrans *ts)
 {
-	int rv;
 	struct jtrans newts;
+	struct joper *op, *curop, *lop;
 
-	/* copy the old transaction to the new one */
-	jtrans_init(ts->fs, &newts);
+	/* FIXME: this looks like a mess! */
 
-	newts.flags = ts->flags;
-	newts.offset = ts->offset;
+	if (ts->op == NULL) {
+		/* we're trying to rollback an empty transaction */
+		return 0;
+	}
 
-	newts.buf = ts->pdata;
-	newts.len = ts->plen;
+	jtrans_init(ts->fs, &newts);
+	newts.flags = ts->flags;
 
-	if (ts->plen < ts->len) {
-		/* we extended the data in the previous transaction, so we
+	/* find the last operation */
+	for (op = ts->op; op->next != NULL; op = op->next)
+		;
+
+	/* and traverse the list backwards */
+	for ( ; op != NULL; op = op->prev) {
+		/* if we extended the data in the previous transaction, we
 		 * should truncate it back */
 		/* DANGEROUS: this is one of the main reasons why rollbacking
 		 * is dangerous and should only be done with extreme caution:
 		 * if for some reason, after the previous transacton, we have
 		 * extended the file further, this will cut it back to what it
 		 * was; read the docs for more detail */
-		ftruncate(ts->fs->fd, ts->offset + ts->plen);
+		if (op->plen < op->len)
+			ftruncate(ts->fs->fd, op->offset + op->plen);
 
+		/* manually add the operation to the new transaction */
+		curop = malloc(sizeof(struct joper));
+		curop->offset = op->offset;
+		curop->len = op->plen;
+		curop->buf = op->pdata;
+		curop->plen = op->plen;
+		curop->pdata = op->pdata;
+		curop->locked = 0;
+
+		/* add the new transaction to the list */
+		if (newts.op == NULL) {
+			newts.op = curop;
+			curop->prev = NULL;
+			curop->next = NULL;
+		} else {
+			for (lop = newts.op; lop->next != NULL; lop = lop->next)
+				;
+			lop->next = curop;
+			curop->prev = lop;
+			curop->next = NULL;
+		}
 	}
 
-	newts.pdata = ts->pdata;
-	newts.plen = ts->plen;
-
-	newts.udata = ts->udata;
-	newts.ulen = ts->ulen;
-
-	rv = jtrans_commit(&newts);
-	return rv;
+	return jtrans_commit(&newts);
 }
 
-
 /*
  * basic operations
  */
@@ -392,7 +464,6 @@ int jopen(struct jfs *fs, const char *na
 	 * make it easier for them by taking care of it here. If performance
 	 * is essential, the jpread/jpwrite functions should be used, just as
 	 * real life. */
-
 	pthread_mutex_init( &(fs->lock), NULL);
 
 	if (!get_jdir(name, jdir))
@@ -440,243 +511,3 @@ int jclose(struct jfs *fs)
 	return 0;
 }
 
-
-/*
- * journal recovery
- */
-
-/* check the journal and replay the incomplete transactions */
-int jfsck(const char *name, struct jfsck_result *res)
-{
-	int fd, tfd, rv, i, maxtid;
-	char jdir[PATH_MAX], jlockfile[PATH_MAX], tname[PATH_MAX];
-	unsigned char *buf = NULL;
-	struct stat sinfo;
-	struct jfs fs;
-	struct jtrans *curts;
-	DIR *dir;
-	struct dirent *dent;
-	off_t offset;
-
-	fd = open(name, O_RDWR | O_SYNC | O_LARGEFILE);
-	if (fd < 0)
-		return J_ENOENT;
-
-	fs.fd = fd;
-	fs.name = (char *) name;
-
-	if (!get_jdir(name, jdir))
-		return J_ENOMEM;
-	rv = lstat(jdir, &sinfo);
-	if (rv < 0 || !S_ISDIR(sinfo.st_mode))
-		return J_ENOJOURNAL;
-
-	/* open the lock file, which is only used to complete the jfs
-	 * structure */
-	snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock");
-	rv = open(jlockfile, O_RDWR | O_CREAT, 0600);
-	if (rv < 0)
-		return J_ENOJOURNAL;
-	fs.jfd = rv;
-
-	dir = opendir(jdir);
-	if (dir == NULL)
-		return J_ENOJOURNAL;
-
-	/* loop for each file in the journal directory to find out the greater
-	 * transaction number */
-	maxtid = 0;
-	for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
-		/* see if the file is named like a transaction, ignore
-		 * otherwise; as transactions are named as numbers > 0, a
-		 * simple atoi() is enough testing */
-		rv = atoi(dent->d_name);
-		if (rv <= 0)
-			continue;
-		if (rv > maxtid)
-			maxtid = rv;
-	}
-	closedir(dir);
-
-	/* rewrite the lockfile, writing the new maxtid on it, so that when we
-	 * rollback a transaction it doesn't step over existing ones */
-	rv = spwrite(fs.jfd, &maxtid, sizeof(maxtid), 0);
-	if (rv != sizeof(maxtid)) {
-		return J_ENOMEM;
-	}
-
-	/* we loop all the way up to the max transaction id */
-	for (i = 1; i <= maxtid; i++) {
-		curts = malloc(sizeof(struct jtrans));
-		if (curts == NULL)
-			return J_ENOMEM;
-
-		jtrans_init(&fs, curts);
-		curts->id = i;
-
-		/* open the transaction file, using i as its name, so we are
-		 * really looping in order (recovering transaction in a
-		 * different order as they were applied means instant
-		 * corruption) */
-		if (!get_jtfile(name, i, tname))
-			return J_ENOMEM;
-		tfd = open(tname, O_RDWR | O_SYNC | O_LARGEFILE, 0600);
-		if (tfd < 0) {
-			res->invalid++;
-			goto loop;
-		}
-
-		/* try to lock the transaction file, if it's locked then it is
-		 * currently being used so we skip it */
-		rv = plockf(tfd, F_TLOCK, 0, 0);
-		if (rv == -1) {
-			res->in_progress++;
-			goto loop;
-		}
-
-		/* load from disk, header first */
-		buf = (unsigned char *) malloc(J_DISKTFIXSIZE);
-		if (buf == NULL) {
-			res->load_error++;
-			goto loop;
-		}
-
-		rv = read(tfd, buf, J_DISKTFIXSIZE);
-		if (rv != J_DISKTFIXSIZE) {
-			res->broken_head++;
-			free(buf);
-			goto loop;
-		}
-
-		curts->flags = *( (uint32_t *) (buf + 4));
-		curts->len = *( (uint32_t *) (buf + 8));
-		curts->plen = *( (uint32_t *) (buf + 12));
-		curts->ulen = *( (uint32_t *) (buf + 16));
-		curts->offset = *( (uint64_t *) (buf + 20));
-
-		free(buf);
-
-		/* if we got here, the transaction was not applied, so we
-		 * check if the transaction file is complete (we only need to
-		 * rollback it) or not (so we can't do anything but ignore it)
-		 */
-
-		lstat(tname, &sinfo);
-		rv = J_DISKTFIXSIZE + curts->ulen + curts->plen;
-		if (sinfo.st_size != rv) {
-			/* the transaction file is incomplete, some of the
-			 * body is missing */
-			res->broken_body++;
-			goto loop;
-		}
-
-		/* we have a complete transaction file which commit was not
-		 * successful, so we read it to complete the transaction
-		 * structure and rollback it */
-		curts->pdata = malloc(curts->plen);
-		if (curts->pdata == NULL) {
-			res->load_error++;
-			goto loop;
-		}
-
-		curts->udata = malloc(curts->ulen);
-		if (curts->udata == NULL) {
-			res->load_error++;
-			goto loop;
-		}
-
-		/* user data */
-		offset = J_DISKTFIXSIZE;
-		rv = spread(tfd, curts->udata, curts->ulen, offset);
-		if (rv != curts->ulen) {
-			res->load_error++;
-			goto loop;
-		}
-
-		/* previous data */
-		offset = J_DISKTFIXSIZE + curts->ulen;
-		rv = spread(tfd, curts->pdata, curts->plen, offset);
-		if (rv != curts->plen) {
-			res->load_error++;
-			goto loop;
-		}
-
-		/* rollback */
-		rv = jtrans_rollback(curts);
-		if (rv < 0) {
-			res->apply_error++;
-			goto loop;
-		}
-		res->rollbacked++;
-
-		/* free the data we just allocated */
-		if (curts->plen) {
-			free(curts->pdata);
-			curts->pdata = NULL;
-		}
-		if (curts->ulen) {
-			free(curts->udata);
-			curts->udata = NULL;
-		}
-		if (curts->name) {
-			free(curts->name);
-			curts->name = NULL;
-		}
-
-loop:
-		if (tfd > 0)
-			close(tfd);
-
-		free(curts);
-
-		res->total++;
-	}
-
-	close(fs.fd);
-	close(fs.jfd);
-
-	return 0;
-
-}
-
-/* remove all the files in the journal directory (if any) */
-int jfsck_cleanup(const char *name)
-{
-	char jdir[PATH_MAX], tfile[PATH_MAX*3];
-	DIR *dir;
-	struct dirent *dent;
-
-	if (!get_jdir(name, jdir))
-		return 0;
-
-	dir = opendir(jdir);
-	if (dir == NULL && errno == ENOENT)
-		/* it doesn't exist, so it's clean */
-		return 1;
-	else if (dir == NULL)
-		return 0;
-
-	for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
-		/* we only care about transactions (named as numbers > 0) and
-		 * the lockfile (named "lock"); ignore everything else */
-		if (strcmp(dent->d_name, "lock") && atoi(dent->d_name) <= 0)
-			continue;
-
-		/* build the full path to the transaction file */
-		memset(tfile, 0, PATH_MAX * 3);
-		strcat(tfile, jdir);
-		strcat(tfile, "/");
-		strcat(tfile, dent->d_name);
-
-		/* the full filename is too large */
-		if (strlen(tfile) > PATH_MAX)
-			return 0;
-
-		/* and remove it */
-		unlink(tfile);
-	}
-	closedir(dir);
-
-	return 1;
-}
-
diff -puN libjio.h~new_commit libjio.h
--- cur/libjio.h~new_commit	2004-06-14 12:10:21.000000000 -0300
+++ cur-root/libjio.h	2004-06-16 03:22:20.000000000 -0300
@@ -19,64 +19,77 @@ extern "C" {
 
 
 /* logical structures */
+
+/* the main file structure */
 struct jfs {
 	int fd;			/* main file descriptor */
 	char *name;		/* and its name */
 	int jfd;		/* journal's lock file descriptor */
-	int flags;		/* journal mode options used in jopen() */
+	int flags;		/* journal flags */
 	pthread_mutex_t lock;	/* a soft lock used in some operations */
 };
 
+/* a single operation */
+struct joper {
+	int locked;		/* is the region is locked? */
+	off_t offset;		/* operation's offset */
+	size_t len;		/* data length */
+	void *buf;		/* data */
+	size_t plen;		/* previous data length */
+	void *pdata;		/* previous data */
+	struct joper *prev;
+	struct joper *next;
+};
+
+/* a transaction */
 struct jtrans {
 	struct jfs *fs;		/* journal file structure to operate on */
 	char *name;		/* name of the transaction file */
 	int id;			/* transaction id */
 	int flags;		/* misc flags */
-	const void *buf;	/* buffer */
-	size_t len;		/* buffer lenght */
-	off_t offset;		/* file offset to operate on */
-	void *udata;		/* user-supplied data */
-	size_t ulen;		/* udata lenght */
-	void *pdata;		/* previous data, for rollback */
-	size_t plen;		/* pdata lenght */
+	unsigned int numops;	/* quantity of operations in the list */
+	pthread_mutex_t lock;	/* used to modify the operation list */
+	struct joper *op;	/* list of operations */
 };
 
 struct jfsck_result {
 	int total;		/* total transactions files we looked at */
 	int invalid;		/* invalid files in the journal directory */
 	int in_progress;	/* transactions in progress */
-	int broken_head;	/* transactions broken (header missing) */
-	int broken_body;	/* transactions broken (body missing) */
-	int load_error;		/* errors loading the transaction */
+	int broken;		/* transactions broken */
 	int apply_error;	/* errors applying the transaction */
 	int rollbacked;		/* transactions that were rollbacked */
 };
 
-/* on-disk structure */
-struct disk_trans {
-	
-	/* header (fixed lenght, defined below) */
+
+/* on-disk structures */
+
+/* header (fixed length, defined below) */
+struct disk_header {
 	uint32_t id;		/* id */
 	uint32_t flags;		/* flags about this transaction */
-	uint32_t len;		/* data lenght */
-	uint32_t plen;		/* previous data lenght */
-	uint32_t ulen;		/* user-supplied information lenght */
+	uint32_t numops;	/* number of operations */
+};
+
+/* operation */
+struct disk_operation {
+	uint32_t len;		/* data length */
+	uint32_t plen;		/* previous data length */
 	uint64_t offset;	/* offset relative to the BOF */
-	
-	/* payload (variable lenght) */
-	char *udata;		/* user-supplied data */
 	char *prevdata;		/* previous data for rollback */
 };
 
 
-/* core operations */
+/* core functions */
 int jopen(struct jfs *fs, const char *name, int flags, int mode, int jflags);
 void jtrans_init(struct jfs *fs, struct jtrans *ts);
+int jtrans_add(struct jtrans *ts, const void *buf, size_t count, off_t offset);
 int jtrans_commit(struct jtrans *ts);
 int jtrans_rollback(struct jtrans *ts);
 void jtrans_free(struct jtrans *ts);
 int jclose(struct jfs *fs);
 
+
 /* journal checker */
 int jfsck(const char *name, struct jfsck_result *res);
 int jfsck_cleanup(const char *name);
@@ -88,7 +101,7 @@ ssize_t jreadv(struct jfs *fs, struct io
 ssize_t jwrite(struct jfs *fs, const void *buf, size_t count);
 ssize_t jpwrite(struct jfs *fs, const void *buf, size_t count, off_t offset);
 ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count);
-int jtruncate(struct jfs *fs, off_t lenght);
+int jtruncate(struct jfs *fs, off_t length);
 
 /* ANSI C stdio wrappers */
 struct jfs *jfopen(const char *path, const char *mode);
@@ -113,8 +126,9 @@ FILE *jfsopen(struct jfs *stream, const 
 #define J_COMMITED	1	/* mark a transaction as commited */
 #define J_ROLLBACKED	2	/* mark a transaction as rollbacked */
 
-/* disk_trans constants */
-#define J_DISKTFIXSIZE	 28	/* lenght of disk_trans' header */ 
+/* disk constants */
+#define J_DISKHEADSIZE	 12	/* length of disk_header */
+#define J_DISKOPHEADSIZE 16	/* length of disk_operation header */
 
 /* jfsck constants (return values) */
 #define J_ESUCCESS	0	/* success - shouldn't be used */
diff -puN /dev/null check.c
--- /dev/null	2004-04-13 23:59:22.000000000 -0300
+++ cur-root/check.c	2004-06-15 03:03:38.000000000 -0300
@@ -0,0 +1,260 @@
+
+/*
+ * libjio - A library for Journaled I/O
+ * Alberto Bertogli (albertogli@telpin.com.ar)
+ *
+ * Recovery functions
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/mman.h>
+
+#include "libjio.h"
+#include "common.h"
+
+
+/* fill a transaction structure from a mmapped transaction file */
+static int fill_trans(unsigned char *map, off_t len, struct jtrans *ts)
+{
+	int i;
+	unsigned char *p;
+	struct joper *op, *tmp;
+
+	if (len < J_DISKHEADSIZE)
+		return 0;
+
+	p = map;
+
+	ts->id = *( (uint32_t *) p);
+	p += 4;
+
+	ts->flags = *( (uint32_t *) p);
+	p += 4;
+
+	ts->numops = *( (uint32_t *) p);
+	p += 4;
+
+	for (i = 0; i < ts->numops; i++) {
+		if (len < (p - map) + J_DISKOPHEADSIZE)
+			goto error;
+
+		op = malloc(sizeof(struct joper));
+		if (op == NULL)
+			goto error;
+
+		op->len = *( (uint32_t *) p);
+		p += 4;
+
+		op->plen = *( (uint32_t *) p);
+		p += 4;
+
+		op->offset = *( (uint64_t *) p);
+		p += 8;
+
+		if (len < (p - map) + op->plen)
+			goto error;
+
+		op->pdata = (void *) p;
+		p += op->plen;
+
+		if (ts->op == NULL) {
+			ts->op = op;
+			op->prev = NULL;
+			op->next = NULL;
+		} else {
+			for(tmp = ts->op; tmp->next != NULL; tmp = tmp->next)
+				;
+			tmp->next = op;
+			op->prev = tmp;
+			op->next = NULL;
+		}
+	}
+
+	return 1;
+
+error:
+	while (ts->op != NULL) {
+		tmp = ts->op->next;
+		free(ts->op);
+		ts->op = tmp;
+	}
+	return 0;
+}
+
+/* check the journal and rollback incomplete transactions */
+int jfsck(const char *name, struct jfsck_result *res)
+{
+	int fd, tfd, rv, i, maxtid;
+	char jdir[PATH_MAX], jlockfile[PATH_MAX], tname[PATH_MAX];
+	struct stat sinfo;
+	struct jfs fs;
+	struct jtrans *curts;
+	DIR *dir;
+	struct dirent *dent;
+	void *map;
+	off_t filelen;
+
+
+	fd = open(name, O_RDWR | O_SYNC | O_LARGEFILE);
+	if (fd < 0)
+		return J_ENOENT;
+
+	fs.fd = fd;
+	fs.name = (char *) name;
+
+	if (!get_jdir(name, jdir))
+		return J_ENOMEM;
+	rv = lstat(jdir, &sinfo);
+	if (rv < 0 || !S_ISDIR(sinfo.st_mode))
+		return J_ENOJOURNAL;
+
+	/* open the lock file, which is only used to complete the jfs
+	 * structure */
+	snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock");
+	rv = open(jlockfile, O_RDWR | O_CREAT, 0600);
+	if (rv < 0)
+		return J_ENOJOURNAL;
+	fs.jfd = rv;
+
+	dir = opendir(jdir);
+	if (dir == NULL)
+		return J_ENOJOURNAL;
+
+	/* loop for each file in the journal directory to find out the greater
+	 * transaction number */
+	maxtid = 0;
+	for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
+		/* see if the file is named like a transaction, ignore
+		 * otherwise; as transactions are named as numbers > 0, a
+		 * simple atoi() is enough testing */
+		rv = atoi(dent->d_name);
+		if (rv <= 0)
+			continue;
+		if (rv > maxtid)
+			maxtid = rv;
+	}
+	closedir(dir);
+
+	/* rewrite the lockfile, writing the new maxtid on it, so that when we
+	 * rollback a transaction it doesn't step over existing ones */
+	rv = spwrite(fs.jfd, &maxtid, sizeof(maxtid), 0);
+	if (rv != sizeof(maxtid)) {
+		return J_ENOMEM;
+	}
+
+	/* we loop all the way up to the max transaction id */
+	for (i = 1; i <= maxtid; i++) {
+		curts = malloc(sizeof(struct jtrans));
+		if (curts == NULL)
+			return J_ENOMEM;
+
+		jtrans_init(&fs, curts);
+		curts->id = i;
+
+		/* open the transaction file, using i as its name, so we are
+		 * really looping in order (recovering transaction in a
+		 * different order as they were applied means instant
+		 * corruption) */
+		if (!get_jtfile(name, i, tname))
+			return J_ENOMEM;
+		tfd = open(tname, O_RDWR | O_SYNC | O_LARGEFILE, 0600);
+		if (tfd < 0) {
+			res->invalid++;
+			goto loop;
+		}
+
+		/* try to lock the transaction file, if it's locked then it is
+		 * currently being used so we skip it */
+		rv = plockf(tfd, F_TLOCK, 0, 0);
+		if (rv == -1) {
+			res->in_progress++;
+			goto loop;
+		}
+
+		filelen = lseek(tfd, 0, SEEK_END);
+		map = mmap(0, filelen, PROT_READ, MAP_SHARED, tfd, 0);
+		rv = fill_trans((unsigned char *) map, filelen, curts);
+		if (rv != 1) {
+			res->broken++;
+			goto loop;
+		}
+
+		rv = jtrans_rollback(curts);
+
+		munmap(map, filelen);
+
+		if (rv < 0) {
+			res->apply_error++;
+			goto loop;
+		}
+		res->rollbacked++;
+
+
+loop:
+		if (tfd >= 0) {
+			close(tfd);
+			tfd = -1;
+		}
+
+		free(curts);
+
+		res->total++;
+	}
+
+	close(fs.fd);
+	close(fs.jfd);
+
+	return 0;
+
+}
+
+/* remove all the files in the journal directory (if any) */
+int jfsck_cleanup(const char *name)
+{
+	char jdir[PATH_MAX], tfile[PATH_MAX*3];
+	DIR *dir;
+	struct dirent *dent;
+
+	if (!get_jdir(name, jdir))
+		return 0;
+
+	dir = opendir(jdir);
+	if (dir == NULL && errno == ENOENT)
+		/* it doesn't exist, so it's clean */
+		return 1;
+	else if (dir == NULL)
+		return 0;
+
+	for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
+		/* we only care about transactions (named as numbers > 0) and
+		 * the lockfile (named "lock"); ignore everything else */
+		if (strcmp(dent->d_name, "lock") && atoi(dent->d_name) <= 0)
+			continue;
+
+		/* build the full path to the transaction file */
+		memset(tfile, 0, PATH_MAX * 3);
+		strcat(tfile, jdir);
+		strcat(tfile, "/");
+		strcat(tfile, dent->d_name);
+
+		/* the full filename is too large */
+		if (strlen(tfile) > PATH_MAX)
+			return 0;
+
+		/* and remove it */
+		unlink(tfile);
+	}
+	closedir(dir);
+
+	return 1;
+}
+
diff -puN common.c~new_commit common.c
--- cur/common.c~new_commit	2004-06-14 16:40:42.000000000 -0300
+++ cur-root/common.c	2004-06-14 17:41:24.000000000 -0300
@@ -9,6 +9,11 @@
 #include <sys/types.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <string.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
 
 #include "common.h"
 
@@ -90,3 +95,52 @@ ssize_t spwrite(int fd, const void *buf,
 	return count;
 }
 
+/* build the journal directory name out of the filename */
+int get_jdir(const char *filename, char *jdir)
+{
+	char *base, *baset;
+	char *dir, *dirt;
+
+	baset = strdup(filename);
+	if (baset == NULL)
+		return 0;
+	base = basename(baset);
+
+	dirt = strdup(filename);
+	if (dirt == NULL)
+		return 0;
+	dir = dirname(dirt);
+
+	snprintf(jdir, PATH_MAX, "%s/.%s.jio", dir, base);
+
+	free(baset);
+	free(dirt);
+
+	return 1;
+}
+
+/* build the filename of a given transaction */
+int get_jtfile(const char *filename, int tid, char *jtfile)
+{
+	char *base, *baset;
+	char *dir, *dirt;
+
+	baset = strdup(filename);
+	if (baset == NULL)
+		return 0;
+	base = basename(baset);
+
+	dirt = strdup(filename);
+	if (dirt == NULL)
+		return 0;
+	dir = dirname(dirt);
+
+	snprintf(jtfile, PATH_MAX, "%s/.%s.jio/%d", dir, base, tid);
+
+	free(baset);
+	free(dirt);
+
+	return 1;
+}
+
+
diff -puN Makefile~new_commit Makefile
--- cur/Makefile~new_commit	2004-06-14 16:42:47.000000000 -0300
+++ cur-root/Makefile	2004-06-14 16:43:00.000000000 -0300
@@ -3,7 +3,7 @@ include Make.conf
 
 
 # objects to build
-OBJS = common.o trans.o unix.o ansi.o
+OBJS = common.o trans.o check.o unix.o ansi.o
 
 # rules
 default: all
diff -puN common.h~new_commit common.h
--- cur/common.h~new_commit	2004-06-14 16:48:07.000000000 -0300
+++ cur-root/common.h	2004-06-14 16:48:11.000000000 -0300
@@ -12,6 +12,8 @@
 off_t plockf(int fd, int cmd, off_t offset, off_t len);
 ssize_t spread(int fd, void *buf, size_t count, off_t offset);
 ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset);
+int get_jdir(const char *filename, char *jdir);
+int get_jtfile(const char *filename, int tid, char *jtfile);
 
 #endif
 
diff -puN unix.c~new_commit unix.c
--- cur/unix.c~new_commit	2004-06-14 17:49:33.000000000 -0300
+++ cur-root/unix.c	2004-06-16 03:22:44.000000000 -0300
@@ -88,10 +88,7 @@ ssize_t jwrite(struct jfs *fs, const voi
 
 	jtrans_init(fs, &ts);
 	pos = lseek(fs->fd, 0, SEEK_CUR);
-	ts.offset = pos;
-
-	ts.buf = buf;
-	ts.len = count;
+	jtrans_add(&ts, buf, count, pos);
 
 	rv = jtrans_commit(&ts);
 
@@ -114,10 +111,7 @@ ssize_t jpwrite(struct jfs *fs, const vo
 	struct jtrans ts;
 
 	jtrans_init(fs, &ts);
-	ts.offset = offset;
-
-	ts.buf = buf;
-	ts.len = count;
+	jtrans_add(&ts, buf, count, offset);
 
 	rv = jtrans_commit(&ts);
 
@@ -129,43 +123,29 @@ ssize_t jpwrite(struct jfs *fs, const vo
 /* writev wrapper */
 ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count)
 {
-	int rv, i, bufp;
-	ssize_t sum;
-	char *buf;
-	off_t pos;
+	int rv, i;
+	size_t sum;
+	off_t ipos, t;
 	struct jtrans ts;
 
-	sum = 0;
-	for (i = 0; i < count; i++)
-		sum += vector[i].iov_len;
-
-	/* unify the buffers into one big chunk to commit */
-	/* FIXME: can't we do this more efficient? It ruins the whole purpose
-	 * of using writev()! maybe we should do one transaction per vector */
-	buf = malloc(sum);
-	if (buf == NULL)
-		return -1;
-	bufp = 0;
-
-	for (i = 0; i < count; i++) {
-		memcpy(buf + bufp, vector[i].iov_base, vector[i].iov_len);
-		bufp += vector[i].iov_len;
-	}
-
 	pthread_mutex_lock(&(fs->lock));
 
 	jtrans_init(fs, &ts);
-	pos = lseek(fs->fd, 0, SEEK_CUR);
-	ts.offset = pos;
+	ipos = lseek(fs->fd, 0, SEEK_CUR);
+	t = ipos;
 
-	ts.buf = buf;
-	ts.len = sum;
+	sum = 0;
+	for (i = 0; i < count; i++) {
+		jtrans_add(&ts, vector[i].iov_base, vector[i].iov_len, t);
+		sum += vector[i].iov_len;
+		t += vector[i].iov_len;
+	}
 
 	rv = jtrans_commit(&ts);
 
 	if (rv >= 0) {
 		/* if success, advance the file pointer */
-		lseek(fs->fd, count, SEEK_CUR);
+		lseek(fs->fd, sum, SEEK_CUR);
 	}
 
 	pthread_mutex_unlock(&(fs->lock));
@@ -177,14 +157,14 @@ ssize_t jwritev(struct jfs *fs, const st
 }
 
 /* truncate a file - be careful with this */
-int jtruncate(struct jfs *fs, off_t lenght)
+int jtruncate(struct jfs *fs, off_t length)
 {
 	int rv;
 
-	/* lock from lenght to the end of file */
-	plockf(fs->fd, F_LOCK, lenght, 0);
-	rv = ftruncate(fs->fd, lenght);
-	plockf(fs->fd, F_ULOCK, lenght, 0);
+	/* lock from length to the end of file */
+	plockf(fs->fd, F_LOCK, length, 0);
+	rv = ftruncate(fs->fd, length);
+	plockf(fs->fd, F_ULOCK, length, 0);
 
 	return rv;
 }
diff -puN samples/jio3.c~new_commit samples/jio3.c
--- cur/samples/jio3.c~new_commit	2004-06-14 17:54:09.000000000 -0300
+++ cur-root/samples/jio3.c	2004-06-15 02:28:22.000000000 -0300
@@ -16,28 +16,34 @@ int main(int argc, char **argv)
 	struct jfs fs;
 	struct jtrans ts;
 
-	fd = jopen(&fs, "test1", O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0660, 0);
+	fd = jopen(&fs, "test1", O_RDWR | O_CREAT | O_SYNC, 0660, 0);
 	if (fd < 0)
 		perror("OPEN");
 
-#define str "ROLLBACKTEST!\n"
-
 	jtrans_init(&fs, &ts);
 
-	ts.offset = 0;
-	ts.buf = str;
-	ts.len = strlen(str);
-		
+#define str1 "1ROLLBACKTEST1!\n"
+	jtrans_add(&ts, str1, strlen(str1), 0);
+
+#define str2 "2ROLLBACKTEST2!\n"
+	jtrans_add(&ts, str2, strlen(str2), strlen(str1));
+
+#define str3 "3ROLLBACKTEST3!\n"
+	jtrans_add(&ts, str3, strlen(str3), strlen(str1) + strlen(str2));
+
+
 	rv = jtrans_commit(&ts);
-	if (rv != strlen(str))
+	if (rv != strlen(str1) + strlen(str2) + strlen(str3))
 		perror("COMMIT");
+	printf("COMMIT OK: %d\n", rv);
+
 
 	rv = jtrans_rollback(&ts);
-	if (rv != 0)
+	if (rv < 0)
 		perror("ROLLBACK");
+	printf("ROLLBACK OK: %d\n", rv);
 
 	return 0;
 
 }
 
-

_
