git » libjio » commit 4d14be7

libjio: Use writev() to write journal files

author Alberto Bertogli
2009-07-15 18:45:54 UTC
committer Alberto Bertogli
2009-07-15 18:45:54 UTC
parent d2d9ad3804e85cd6adf1f9ef42a2d933be633b03

libjio: Use writev() to write journal files

This patch makes libjio use writev() to write journal files. While at this
point doesn't make much difference, in the future we will be able to
submit all ops at once, and it will help to implement eager operation
writing.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>

libjio/common.c +43 -0
libjio/common.h +2 -0
libjio/journal.c +27 -29
libjio/journal.h +0 -1
tests/behaviour/t_corruption.py +2 -2
tests/behaviour/t_fi.py +4 -4
tests/behaviour/tf.py +14 -0

diff --git a/libjio/common.c b/libjio/common.c
index c85278f..02f5f4a 100644
--- a/libjio/common.c
+++ b/libjio/common.c
@@ -98,6 +98,49 @@ ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset)
 	return count;
 }
 
+/** Like writev() but either fails, or return a complete write.
+ * Note that, as opposed to writev() it WILL MODIFY iov, in particular the
+ * iov_len fields. */
+ssize_t swritev(int fd, struct iovec *iov, int iovcnt)
+{
+	int i;
+	ssize_t rv, c, t;
+	size_t total;
+
+	total = 0;
+	for (i = 0; i < iovcnt; i++)
+		total += iov[i].iov_len;
+
+	c = 0;
+	while (c < total) {
+		rv = writev(fd, iov, iovcnt);
+
+		if (rv == total)
+			return total;
+		else if (rv < 0)
+			return rv;
+
+		/* incomplete write, advance iov and try again */
+		c += rv;
+		t = 0;
+		for (i = 0; i < iovcnt; i++) {
+			if (t + iov[i].iov_len > rv) {
+				iov[i].iov_base = (unsigned char *)
+					iov[i].iov_base + rv - t;
+				iov[i].iov_len -= rv - t;
+				break;
+			} else {
+				t += iov[i].iov_len;
+			}
+		}
+
+		iovcnt -= i;
+		iov = iov + i;
+	}
+
+	return c;
+}
+
 /** Store in jdir the default journal directory path of the given filename */
 int get_jdir(const char *filename, char *jdir)
 {
diff --git a/libjio/common.h b/libjio/common.h
index 512355a..71ccc49 100644
--- a/libjio/common.h
+++ b/libjio/common.h
@@ -8,6 +8,7 @@
 
 #include <sys/types.h>	/* for ssize_t and off_t */
 #include <stdint.h>	/* for uint*_t */
+#include <sys/uio.h>	/* for struct iovec */
 
 #include "libjio.h"	/* for struct jfs */
 #include "fiu-local.h"	/* for fault injection functions */
@@ -72,6 +73,7 @@ struct jfs {
 off_t plockf(int fd, int cmd, off_t offset, off_t len);
 ssize_t spread(int fd, void *buf, size_t count, off_t offset);
 ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset);
+ssize_t swritev(int fd, struct iovec *iov, int iovcnt);
 int get_jdir(const char *filename, char *jdir);
 void get_jtfile(struct jfs *fs, unsigned int tid, char *jtfile);
 uint64_t ntohll(uint64_t x);
diff --git a/libjio/journal.c b/libjio/journal.c
index 353d848..51c63ea 100644
--- a/libjio/journal.c
+++ b/libjio/journal.c
@@ -212,6 +212,7 @@ struct journal_op *journal_new(struct jfs *fs, unsigned int flags)
 	char *name = NULL;
 	struct journal_op *jop;
 	struct on_disk_hdr hdr;
+	struct iovec iov[1];
 
 	jop = malloc(sizeof(struct journal_op));
 	if (jop == NULL)
@@ -235,7 +236,6 @@ struct journal_op *journal_new(struct jfs *fs, unsigned int flags)
 	jop->fd = fd;
 	jop->numops = 0;
 	jop->name = name;
-	jop->curpos = 0;
 	jop->csum = 0;
 	jop->fs = fs;
 
@@ -248,13 +248,14 @@ struct journal_op *journal_new(struct jfs *fs, unsigned int flags)
 	hdr.ver = 1;
 	hdr.trans_id = id;
 	hdr.flags = flags;
-
 	hdr_hton(&hdr);
-	rv = spwrite(fd, &hdr, sizeof(hdr), 0);
+
+	iov[0].iov_base = (unsigned char *) &hdr;
+	iov[0].iov_len = sizeof(hdr);
+	rv = swritev(fd, iov, 1);
 	if (rv != sizeof(hdr))
 		goto unlink_error;
 
-	jop->curpos = sizeof(hdr);
 	jop->csum = checksum_buf(jop->csum, (unsigned char *) &hdr,
 			sizeof(hdr));
 
@@ -280,29 +281,28 @@ int journal_add_op(struct journal_op *jop, unsigned char *buf, size_t len,
 {
 	ssize_t rv;
 	struct on_disk_ophdr ophdr;
+	struct iovec iov[2];
 
-	/* save the operation's header, */
 	ophdr.len = len;
 	ophdr.offset = offset;
-
 	ophdr_hton(&ophdr);
-	rv = spwrite(jop->fd, &ophdr, sizeof(ophdr), jop->curpos);
-	if (rv != sizeof(ophdr))
-		goto error;
 
-	fiu_exit_on("jio/commit/tf_ophdr");
-
-	jop->curpos += sizeof(ophdr);
+	iov[0].iov_base = (unsigned char *) &ophdr;
+	iov[0].iov_len = sizeof(ophdr);
 	jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr,
 			sizeof(ophdr));
 
-	/* and save it to the disk */
-	rv = spwrite(jop->fd, buf, len, jop->curpos);
-	if (rv != len)
+	iov[1].iov_base = buf;
+	iov[1].iov_len = len;
+	jop->csum = checksum_buf(jop->csum, buf, len);
+
+	fiu_exit_on("jio/commit/tf_pre_addop");
+
+	rv = swritev(jop->fd, iov, 2);
+	if (rv != sizeof(ophdr) + len)
 		goto error;
 
-	jop->curpos += len;
-	jop->csum = checksum_buf(jop->csum, buf, len);
+	fiu_exit_on("jio/commit/tf_addop");
 
 	jop->numops++;
 
@@ -327,27 +327,26 @@ int journal_commit(struct journal_op *jop)
 	ssize_t rv;
 	struct on_disk_ophdr ophdr;
 	struct on_disk_trailer trailer;
+	struct iovec iov[2];
 
 	/* write the empty ophdr to mark there are no more operations, and
 	 * then the trailer */
 	ophdr.len = 0;
 	ophdr.offset = 0;
-
 	ophdr_hton(&ophdr);
-	rv = spwrite(jop->fd, &ophdr, sizeof(ophdr), jop->curpos);
-	if (rv != sizeof(ophdr))
-		goto error;
-
-	jop->curpos += sizeof(ophdr);
+	iov[0].iov_base = (unsigned char *) &ophdr;
+	iov[0].iov_len = sizeof(ophdr);
 	jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr,
 			sizeof(ophdr));
 
 	trailer.checksum = jop->csum;
 	trailer.numops = jop->numops;
-
 	trailer_hton(&trailer);
-	rv = spwrite(jop->fd, &trailer, sizeof(trailer), jop->curpos);
-	if (rv != sizeof(trailer))
+	iov[1].iov_base = (unsigned char *) &trailer;
+	iov[1].iov_len = sizeof(trailer);
+
+	rv = swritev(jop->fd, iov, 2);
+	if (rv != sizeof(ophdr) + sizeof(trailer))
 		goto error;
 
 	/* this is a simple but efficient optimization: instead of doing
@@ -381,10 +380,9 @@ int journal_free(struct journal_op *jop)
 		/* we do not want to leave a possibly complete transaction
 		 * file around when the transaction was not commited and the
 		 * unlink failed, so we attempt to truncate it, and if that
-		 * fails we corrupt the checksum as a last resort */
+		 * fails we corrupt the header as a last resort */
 		if (ftruncate(jop->fd, 0) != 0) {
-			if (pwrite(jop->fd, "\0\0\0\0", 4, jop->curpos - 4)
-					!= 4)
+			if (pwrite(jop->fd, "\0\0\0\0", 4, 0) != 4)
 				goto exit;
 			if (fdatasync(jop->fd) != 0)
 				goto exit;
diff --git a/libjio/journal.h b/libjio/journal.h
index 80060c2..5f7acd6 100644
--- a/libjio/journal.h
+++ b/libjio/journal.h
@@ -11,7 +11,6 @@ struct journal_op {
 	int fd;
 	int numops;
 	char *name;
-	off_t curpos;
 	uint32_t csum;
 	struct jfs *fs;
 };
diff --git a/tests/behaviour/t_corruption.py b/tests/behaviour/t_corruption.py
index 1524d36..4d228ab 100644
--- a/tests/behaviour/t_corruption.py
+++ b/tests/behaviour/t_corruption.py
@@ -27,8 +27,8 @@ def test_c01():
 	n = run_with_tmp(f1)
 	assert content(n) == ''
 	tc = open(transpath(n, 1)).read()
-	# flip just one bit in the middle byte
-	pos = len(tc) / 2
+	# flip just one bit in the transaction data
+	pos = DHS + DOHS + len(c) / 2
 	tc = tc[:pos] + \
 		chr((ord(tc[pos]) & 0xFE) | (~ ord(tc[pos]) & 0x1) & 0xFF) + \
 		tc[pos + 1:]
diff --git a/tests/behaviour/t_fi.py b/tests/behaviour/t_fi.py
index 7490a08..6c36d40 100644
--- a/tests/behaviour/t_fi.py
+++ b/tests/behaviour/t_fi.py
@@ -65,12 +65,12 @@ def test_f03():
 	cleanup(n)
 
 def test_f04():
-	"fail jio/commit/tf_ophdr"
+	"fail jio/commit/tf_pre_addop"
 	c = gencontent()
 
 	def f1(f, jf):
-		fiu.enable_external("jio/commit/tf_ophdr",
-				gen_ret_after(1, 0, 1))
+		fiu.enable_external("jio/commit/tf_pre_addop",
+				gen_ret_seq((0, 1)))
 		t = jf.new_trans()
 		t.add(c, 0)
 		t.add(c, len(c) + 200)
@@ -78,7 +78,7 @@ def test_f04():
 
 	n = run_with_tmp(f1)
 
-	assert len(content(transpath(n, 1))) == DHS + DOHS + len(c) + DOHS
+	assert len(content(transpath(n, 1))) == DHS + DOHS + len(c)
 	assert content(n) == ''
 	fsck_verify(n, broken = 1)
 	assert content(n) == ''
diff --git a/tests/behaviour/tf.py b/tests/behaviour/tf.py
index 7cf4d61..ad793ea 100644
--- a/tests/behaviour/tf.py
+++ b/tests/behaviour/tf.py
@@ -226,6 +226,20 @@ def gen_ret_after(n, notyet, itstime):
 		return itstime
 	return newf
 
+def gen_ret_seq(seq):
+	"""Returns a function that each time it is called returns a value of
+	the given sequence, in order. When the sequence is exhausted, returns
+	the last value."""
+	it = iter(seq)
+	last = [0]
+	def newf(*args, **kwargs):
+		try:
+			r = it.next()
+			last[0] = r
+			return r
+		except StopIteration:
+			return last[0]
+	return newf
 
 def autorun(module, specific_test = None):
 	"Runs all the functions in the given module that begin with 'test'."