git » libfiu » commit 252f8e7

Add a preloader library

author Alberto Bertogli
2009-05-22 00:48:18 UTC
committer Alberto Bertogli
2009-05-22 02:06:20 UTC
parent 314fda6e2d8e1fb38f9fe176a456d85365d8a286

Add a preloader library

It loads function definition from files, and generates code that is then
built into a preloader library.

Useful for simulating faults from external libraries like libc.

At the moment, only a couple of I/O related functions are wrapped.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>

.gitignore +5 -0
Makefile +9 -1
preload/Makefile +51 -0
preload/codegen.c +24 -0
preload/codegen.h +131 -0
preload/generate +262 -0
preload/modules/linux.io.mod +12 -0
preload/modules/posix.custom.c +64 -0
preload/modules/posix.io.mod +133 -0
preload/utils/extract_from_man +164 -0

diff --git a/.gitignore b/.gitignore
index 7e4eb4e..6680650 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,8 @@ libfiu/*.o
 libfiu/libfiu.a
 libfiu/libfiu.pc
 libfiu/libfiu.so
+preload/*.o
+preload/*.so
+preload/modules/*.o
+preload/modules/*.mod.c
+
diff --git a/Makefile b/Makefile
index 31ae8e0..f42608a 100644
--- a/Makefile
+++ b/Makefile
@@ -28,12 +28,20 @@ python3_install:
 python3_clean:
 	cd bindings/python3 && rm -rf build/
 
+
+preload:
+	$(MAKE) -C preload
+
+preload_clean:
+	$(MAKE) -C preload clean
+
 clean: python2_clean python3_clean
 	$(MAKE) -C libfiu clean
 
 
 .PHONY: default all clean libfiu utils \
 	python2 python2_install python2_clean \
-	python3 python3_install python3_clean
+	python3 python3_install python3_clean \
+	preload preload_clean
 
 
diff --git a/preload/Makefile b/preload/Makefile
new file mode 100644
index 0000000..ec22c67
--- /dev/null
+++ b/preload/Makefile
@@ -0,0 +1,51 @@
+
+CFLAGS += -std=c99 -Wall -O3
+ALL_CFLAGS = -D_XOPEN_SOURCE=500 -fPIC -DENABLE_FIU=1 -I. $(CFLAGS)
+
+ifdef DEBUG
+ALL_CFLAGS += -g
+endif
+
+ifdef PROFILE
+ALL_CFLAGS += -g -pg -fprofile-arcs -ftest-coverage
+endif
+
+
+GEN_C = $(addsuffix .c,$(wildcard modules/*.mod))
+GEN_OBJS = $(addsuffix .o,$(wildcard modules/*.mod))
+CUSTOM_OBJS = $(patsubst %.c,%.o,$(wildcard modules/*.custom.c))
+OBJS = codegen.o $(GEN_OBJS) $(CUSTOM_OBJS)
+
+
+ifneq ($(V), 1)
+	NICE_CC = @echo "  CC  $@"; $(CC)
+	NICE_GEN = @echo "  GEN $@"; ./generate
+else
+	NICE_CC = $(CC)
+	NICE_GEN = ./generate
+endif
+
+
+default: all
+	
+all: fiu_preload.so
+
+$(GEN_OBJS): $(GEN_C)
+
+%.mod.c: %.mod
+	$(NICE_GEN) $< $@
+
+.c.o:
+	$(NICE_CC) $(ALL_CFLAGS) -c $< -o $@
+
+fiu_preload.so: $(OBJS)
+	$(NICE_CC) $(ALL_CFLAGS) -shared -fPIC $(OBJS) -lfiu -ldl \
+		-o fiu_preload.so
+
+clean:
+	rm -f $(OBJS) $(GEN_OBJS:.o=.c) fiu_preload.so
+	rm -f *.bb *.bbg *.da *.gcov *.gcda *.gcno gmon.out
+
+.PHONY: default clean
+
+
diff --git a/preload/codegen.c b/preload/codegen.c
new file mode 100644
index 0000000..33dbee6
--- /dev/null
+++ b/preload/codegen.c
@@ -0,0 +1,24 @@
+
+#include <dlfcn.h>
+#include "codegen.h"
+
+/* Dynamically load libc */
+void *_fiu_libc;
+
+/* Recursion counter, per-thread */
+int __thread _fiu_called;
+
+static int __attribute__((constructor)) init(void)
+{
+	_fiu_called = 0;
+
+	_fiu_libc = dlopen("libc.so.6", RTLD_NOW);
+	if (_fiu_libc == NULL) {
+		printd("Error loading libc: %s\n", dlerror());
+		return 0;
+	}
+
+	printd("done\n");
+	return 1;
+}
+
diff --git a/preload/codegen.h b/preload/codegen.h
new file mode 100644
index 0000000..44923e7
--- /dev/null
+++ b/preload/codegen.h
@@ -0,0 +1,131 @@
+
+#ifndef _FIU_CODEGEN
+#define _FIU_CODEGEN
+
+#include <dlfcn.h>		/* dlsym() */
+#include <fiu.h>		/* fiu_* */
+#include <stdlib.h>		/* NULL, random() */
+
+/* Pointer to the dynamically loaded library */
+extern void *_fiu_libc;
+
+/* Recursion counter, per-thread */
+extern int __thread _fiu_called;
+
+/* Useful macros for recursion and debugging */
+#if 0
+	#define rec_inc() do { _fiu_called++; } while(0)
+	#define rec_dec() do { _fiu_called--; } while(0)
+	#define printd(...) do { } while(0)
+
+#else
+	/* debug variants */
+	#include <stdio.h>
+	#include <unistd.h>
+
+	#define rec_inc()				\
+		do {					\
+			_fiu_called++;			\
+			fprintf(stderr, "I: %d\n", _fiu_called); \
+			fflush(stderr);			\
+		} while (0)
+
+	#define rec_dec()				\
+		do {					\
+			_fiu_called--;			\
+			fprintf(stderr, "D: %d\n", _fiu_called); \
+			fflush(stderr);			\
+		} while (0)
+
+	#define printd(...)				\
+		do {					\
+			if (_fiu_called)		\
+				fprintf(stderr, "\t");	\
+			_fiu_called++;			\
+			fprintf(stderr, "%5.5d ", getpid()); \
+			fprintf(stderr, "%s(): ", __FUNCTION__ ); \
+			fprintf(stderr, __VA_ARGS__);	\
+			fflush(stderr);			\
+			_fiu_called--;			\
+		} while(0)
+#endif
+
+
+/*
+ * Wrapper generator macros
+ */
+
+/* Generates the common top of the wrapped function */
+#define mkwrap_top(RTYPE, NAME, PARAMS, PARAMSN, PARAMST)	\
+	static RTYPE (*_fiu_orig_##NAME) PARAMS = NULL;		\
+	RTYPE NAME PARAMS					\
+	{ 							\
+		RTYPE r;					\
+		int fstatus;					\
+		void *finfo;					\
+								\
+		/* cast it just to be sure */			\
+		if (_fiu_orig_##NAME == NULL)			\
+			_fiu_orig_##NAME = (RTYPE (*) PARAMST) dlsym(_fiu_libc, #NAME); \
+								\
+		if (_fiu_called) {				\
+			printd("orig\n");			\
+			return (*_fiu_orig_##NAME) PARAMSN;	\
+		}						\
+								\
+		printd("fiu\n");				\
+								\
+		/* fiu_fail() may call anything */		\
+		rec_inc();
+
+
+/* Generates the body of the function for normal, non-errno usage. The return
+ * value is taken from failinfo. */
+#define mkwrap_body_failinfo(FIU_NAME, RTYPE)			\
+								\
+		fstatus = fiu_fail(FIU_NAME);			\
+		if (fstatus != 0) {				\
+			r = (RTYPE) fiu_failinfo();		\
+			goto exit;				\
+		}
+
+/* Generates the body of the function for normal, non-errno usage. The return
+ * value is hardcoded. */
+#define mkwrap_body_hardcoded(FIU_NAME, FAIL_RET)		\
+								\
+		fstatus = fiu_fail(FIU_NAME);			\
+		if (fstatus != 0) {				\
+			r = FAIL_RET;				\
+			goto exit;				\
+		}
+
+/* Generates the body of the function for functions that affect errno. The
+ * return value is hardcoded. Assumes int valid_errnos[] exist was properly
+ * defined. */
+#define mkwrap_body_errno(FIU_NAME, FAIL_RET, NVERRNOS) \
+								\
+		fstatus = fiu_fail(FIU_NAME);			\
+		if (fstatus != 0) {				\
+			finfo = fiu_failinfo();			\
+			if (finfo == NULL) {			\
+				errno = valid_errnos[random() % NVERRNOS]; \
+			} else {				\
+				errno = (long) finfo;		\
+			}					\
+			r = FAIL_RET;				\
+			goto exit;				\
+		}
+
+
+#define mkwrap_bottom(NAME, PARAMSN)				\
+								\
+		r = (*_fiu_orig_##NAME) PARAMSN;		\
+								\
+	exit:							\
+		rec_dec();					\
+		return r;					\
+	}
+
+
+#endif /* _FIU_CODEGEN */
+
diff --git a/preload/generate b/preload/generate
new file mode 100755
index 0000000..a2e293a
--- /dev/null
+++ b/preload/generate
@@ -0,0 +1,262 @@
+#!/usr/bin/env python
+
+"""
+Reads function information and generates code for the preloader library.
+
+The code is NOT nice. It just does the trick.
+"""
+
+import sys
+import re
+
+
+# Function definition regular expression
+func_def_re = re.compile(
+		r'(?P<ret_type>.*)\s+(?P<name>\w+).*\((?P<params>.*)\).*;')
+
+# Regular expression to extract the types and names of the parameters from a
+# string containing the definition parameters (e.g. from
+# "int a, const char *b" extracts [('int ', 'a'), ('const char *', 'b')]
+params_info_re = \
+	re.compile(r"(?:(?P<type>(?:[\w\*]+\s+\**)+)+(?P<name>\w+),?\s*)+?")
+
+
+class Context:
+	"""Represents the current context information within a module
+	definition file."""
+	def __init__(self):
+		self.fiu_name_base = 'UNKNOWN'
+
+
+class Function:
+	"Represents a function to be wrapped"
+
+	def __init__(self, definition, ctx):
+		"Constructor, takes the C definition as a string"
+		self.definition = definition
+		self.load_from_definition(definition)
+
+		# fiu name, constructed by default from the context but can be
+		# overriden by info
+		self.fiu_name = ctx.fiu_name_base + '/' + self.name
+
+		# what to return on error, by default set to None, which means
+		# "take it from failinfo"
+		self.on_error = None
+
+		# whether to set errno or not, and the list of valid errnos;
+		# in any case if failinfo is set we take the errno value from
+		# there
+		self.use_errno = False
+		self.valid_errnos = []
+
+	def load_from_definition(self, definition):
+		m = func_def_re.match(definition)
+		self.name = m.group("name")
+		self.ret_type = m.group("ret_type")
+		self.params =  m.group("params")
+		self.params_info = params_info_re.findall(self.params)
+
+	def load_info(self, info):
+		"Loads additional information from the given string"
+		if ':' in info:
+			s = info.split(':', 1)
+			k, v = s[0].strip(), s[1].strip()
+
+			if k == 'fiu name':
+				self.fiu_name = v
+			elif k == 'on error':
+				self.on_error = v
+			elif k == 'valid errnos':
+				self.use_errno = True
+				self.valid_errnos = v.split()
+			else:
+				raise SyntaxError, \
+					"Unknown information: " + k
+
+	def __repr__(self):
+		s = '<F %(rt)s %(n)s ( %(p)s ) -- %(fn)s %(oe)s %(ve)s>' % \
+			{
+				'rt': self.ret_type,
+				'n': self.name,
+				'p': self.params,
+				'fn': self.fiu_name,
+				'oe': self.on_error,
+				've': str(self.valid_errnos),
+			}
+		return s
+
+	def generate_to(self, f):
+		"""Generates code to the given file. Strongly related to
+		codegen.h."""
+		f.write('/* Wrapper for %s() */\n' % self.name)
+
+		# extract params names and types
+		paramst = ', '.join(i[0] for i in self.params_info)
+		paramsn = ', '.join(i[1] for i in self.params_info)
+
+		f.write('mkwrap_top(%s, %s, (%s), (%s), (%s))\n' % \
+				(self.ret_type, self.name, self.params,
+					paramsn, paramst) )
+
+		if self.use_errno:
+			if self.on_error is None:
+				desc = "%s uses errno but has no on_error" % \
+					self.name
+				raise RuntimeError, desc
+
+			# We can't put this as a macro parameter, so it has to
+			# be explicit 
+			valid_errnos = '{ ' + ', '.join(self.valid_errnos) \
+				+ ' }'
+			f.write("\tint valid_errnos[] = %s;\n" % valid_errnos)
+
+			f.write('mkwrap_body_errno("%s", %s, %d)\n' % \
+					(self.fiu_name, self.on_error,
+						len(self.valid_errnos)) )
+		elif self.on_error is not None:
+			f.write('mkwrap_body_hardcoded("%s", %s)\n' % \
+					(self.fiu_name, self.on_error) )
+		else:
+			f.write('mkwrap_body_failinfo("%s", %s)\n' % \
+					(self.fiu_name, self.ret_type) )
+
+		f.write('mkwrap_bottom(%s, (%s))\n' % (self.name, paramsn))
+		f.write('\n\n')
+
+
+class Include:
+	"Represents an include directive"
+	def __init__(self, path):
+		self.path = path
+
+	def __repr__(self):
+		return '<I %s>' % self.path
+
+	def generate_to(self, f):
+		f.write("#include %s\n" % self.path)
+
+
+class EmptyLine:
+	"Represents an empty line"
+	def __repr__(self):
+		return '<E>'
+
+	def generate_to(self, f):
+		f.write('\n')
+
+class Comment:
+	"Represents a full-line comment"
+	def __init__(self, line):
+		self.body = line.strip()[1:].strip()
+
+	def __repr__(self):
+		return '<C %s>' % self.body
+
+	def generate_to(self, f):
+		f.write("// %s \n" % self.body)
+
+
+def parse_module(path):
+	"Parses a module definition"
+
+	f = open(path)
+
+	directives = []
+	ctx = Context()
+	current_func = None
+
+	while True:
+		l = f.readline()
+
+		# handle EOF
+		if not l:
+			break
+
+		# handle \ at the end of the line
+		while l.endswith("\\\n"):
+			nl = f.readline()
+			l = l[:-2] + nl
+
+		if not l.strip():
+			directives.append(EmptyLine())
+			continue
+
+		if l.strip().startswith("#"):
+			directives.append(Comment(l))
+			continue
+
+
+		if not l.startswith(" ") and not l.startswith("\t"):
+			# either a new function or a directive, but in either
+			# case the current function is done
+			if current_func:
+				directives.append(current_func)
+			current_func = None
+
+			l = l.strip()
+
+			if ':' in l:
+				# directive
+				s = l.split(':', 1)
+				k, v = s[0].strip(), s[1].strip()
+				if k == 'fiu name base':
+					v = v.strip().strip('/')
+					ctx.fiu_name_base = v
+				elif k == 'include':
+					directives.append(Include(v))
+				else:
+					raise SyntaxError, \
+						("Unknown directive", l)
+			else:
+				current_func = Function(l, ctx)
+		else:
+			# function information
+			current_func.load_info(l.strip())
+
+	if current_func:
+		directives.append(current_func)
+
+	return directives
+
+
+#
+# Code generation
+#
+
+# Templates
+
+gen_header = """
+/*
+ * AUTOGENERATED FILE - DO NOT EDIT
+ *
+ * This file was automatically generated by libfiu, do not edit it directly,
+ * but see libfiu's "preload" directory.
+ */
+
+#include "codegen.h"
+
+"""
+
+
+def generate_code(directives, path):
+	"""Generates code to the file in the given path"""
+	f = open(path, 'w')
+
+	f.write(gen_header)
+
+	for directive in directives:
+		directive.generate_to(f)
+
+
+def main():
+	input_name = sys.argv[1]
+	output_name = sys.argv[2]
+	directives = parse_module(input_name)
+	#import pprint
+	#pprint.pprint(directives)
+	generate_code(directives, output_name)
+
+if __name__ == '__main__':
+	main()
+
diff --git a/preload/modules/linux.io.mod b/preload/modules/linux.io.mod
new file mode 100644
index 0000000..e3f2582
--- /dev/null
+++ b/preload/modules/linux.io.mod
@@ -0,0 +1,12 @@
+
+include: <fcntl.h>
+include: <errno.h>
+
+fiu name base: linux/io/
+
+int sync_file_range(int fd, off_t offset, off_t nbytes, \
+		unsigned int flags);
+	on error: -1
+	valid errnos: EBADF EINVAL EIO ENOMEM ENOSPC
+
+
diff --git a/preload/modules/posix.custom.c b/preload/modules/posix.custom.c
new file mode 100644
index 0000000..30cb17e
--- /dev/null
+++ b/preload/modules/posix.custom.c
@@ -0,0 +1,64 @@
+
+/*
+ * Custom-made wrappers for some special POSIX functions.
+ */
+
+#include "codegen.h"
+
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+
+
+/* Wrapper for open(), we can't generate it because it has a variable number
+ * of arguments */
+static int (*_fiu_orig_open) (const char *pathname, int flags, ...) = NULL;
+int open(const char *pathname, int flags, ...)
+{
+	int r;
+	int fstatus;
+	void *finfo;
+
+	/* Differences from the generated code begin here */
+
+	int mode;
+	va_list l;
+
+	if (flags & O_CREAT) {
+		va_start(l, flags);
+		mode = va_arg(l, mode_t);
+		va_end(l);
+	} else {
+		/* set it to 0, it's ignored anyway */
+		mode = 0;
+	}
+
+	/* cast it just to be sure */
+	if (_fiu_orig_open == NULL)
+		_fiu_orig_open = (int (*) (const char *, int, ...)) \
+				dlsym(_fiu_libc, "open");
+
+	if (_fiu_called) {
+		printd("orig\n");
+		return (*_fiu_orig_open) (pathname, flags, mode);
+	}
+
+	/* Differences from the generated code end here */
+
+	printd("fiu\n");
+
+	/* fiu_fail() may call anything */
+	rec_inc();
+
+	/* Use the normal macros to complete the function, now that we have a
+	 * set mode to something */
+
+	int valid_errnos[] = { EACCES, EFAULT, EFBIG, EOVERFLOW, ELOOP, EMFILE, ENAMETOOLONG, ENFILE, ENOENT, ENOMEM, ENOSPC, ENOTDIR, EROFS };
+mkwrap_body_errno("posix/io/oc/open", -1, 13)
+mkwrap_bottom(open, (pathname, flags, mode))
+
+
diff --git a/preload/modules/posix.io.mod b/preload/modules/posix.io.mod
new file mode 100644
index 0000000..17bcb8c
--- /dev/null
+++ b/preload/modules/posix.io.mod
@@ -0,0 +1,133 @@
+
+# Posix I/O
+
+include: <sys/types.h>
+include: <sys/uio.h>
+include: <unistd.h>
+include: <sys/socket.h>
+include: <sys/stat.h>
+include: <fcntl.h>
+include: <errno.h>
+
+fiu name base: posix/io/oc/
+
+# open() has its own custom wrapper
+
+int close(int fd);
+	on error: -1
+	valid errnos: EBADFD EINTR EIO
+
+
+fiu name base: posix/io/sync/
+
+int fsync(int fd);
+	on error: -1
+	valid errnos: EBADFD EIO EROFS EINVAL
+
+int fdatasync(int fd);
+	on error: -1
+	valid errnos: EBADFD EIO EROFS EINVAL
+
+
+fiu name base: posix/io/rw/
+
+ssize_t read(int fd, void *buf, size_t count);
+	on error: -1
+	valid errnos: EBADFD EFAULT EINTR EINVAL EIO EISDIR EOVERFLOW
+
+ssize_t pread(int fd, void *buf, size_t count, off_t offset);
+	on error: -1
+	valid errnos: EBADFD EFAULT EINTR EINVAL EIO EISDIR EOVERFLOW
+
+ssize_t readv(int fd, const struct iovec *iov, int iovcnt);
+	on error: -1
+	valid errnos: EBADFD EFAULT EINTR EINVAL EIO EISDIR EOVERFLOW
+
+
+ssize_t write(int fd, const void *buf, size_t count);
+	on error: -1
+	valid errnos: EBADFD EFAULT EFBIG EINTR EINVAL EIO ENOSPC
+
+ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset);
+	on error: -1
+	valid errnos: EBADFD EFAULT EFBIG EINTR EINVAL EIO ENOSPC \
+		EOVERFLOW
+
+ssize_t writev(int fd, const struct iovec *iov, int iovcnt);
+	on error: -1
+	valid errnos: EBADFD EFAULT EFBIG EINTR EINVAL EIO ENOSPC
+
+
+fiu name base: posix/io/net/
+
+int socket(int domain, int type, int protocol);
+	on error: -1
+	valid errnos: EAFNOSUPPORT EMFILE ENFILE EPROTONOSUPPORT EPROTOTYPE \
+		EACCES ENOBUFS ENOMEM
+
+int bind(int socket, const struct sockaddr *address, socklen_t address_len);
+	on error: -1
+	valid errnos: EADDRINUSE EADDRNOTAVAIL EAFNOSUPPORT EBADF EINVAL ENOTSOCK \
+		EOPNOTSUPP EACCES EDESTADDRREQ EIO ELOOP ENAMETOOLONG ENOENT \
+		ENOTDIR EROFS EACCES EINVAL EISCONN ELOOP ENAMETOOLONG \
+		ENOBUFS
+
+int listen(int socket, int backlog);
+	on error: -1
+	valid errnos: EBADF EDESTADDRREQ EINVAL ENOTSOCK EOPNOTSUPP EACCES EINVAL \
+		ENOBUFS
+
+int accept(int socket, struct sockaddr *restrict address, socklen_t *restrict address_len);
+	on error: -1
+	valid errnos:  EAGAIN EBADF ECONNABORTED EINTR EINVAL EMFILE ENFILE \
+		ENOTSOCK EOPNOTSUPP ENOBUFS ENOMEM EPROTO
+
+int connect(int socket, const struct sockaddr *address, socklen_t address_len);
+	on error: -1
+	valid errnos:  EADDRNOTAVAIL EAFNOSUPPORT EALREADY EBADF ECONNREFUSED \
+		EINPROGRESS EINTR EISCONN ENETUNREACH ENOTSOCK EPROTOTYPE \
+		ETIMEDOUT EIO ELOOP ENAMETOOLONG ENOENT ENOTDIR EACCES \
+		EADDRINUSE ECONNRESET EHOSTUNREACH EINVAL ELOOP ENAMETOOLONG \
+		ENETDOWN ENOBUFS EOPNOTSUPP
+
+ssize_t recv(int socket, void *buffer, size_t length, int flags);
+	on error: -1
+	valid errnos:  EAGAIN EBADF ECONNRESET EINTR EINVAL ENOTCONN ENOTSOCK \
+		EOPNOTSUPP ETIMEDOUT EIO ENOBUFS ENOMEM
+
+ssize_t recvfrom(int socket, void *restrict buffer, size_t length, int flags, struct sockaddr *restrict address, socklen_t *restrict address_len);
+	on error: -1
+	valid errnos:  EAGAIN EBADF ECONNRESET EINTR EINVAL ENOTCONN ENOTSOCK \
+		EOPNOTSUPP ETIMEDOUT EIO ENOBUFS ENOMEM
+
+ssize_t recvmsg(int socket, struct msghdr *message, int flags);
+	on error: -1
+	valid errnos:  EAGAIN EBADF ECONNRESET EINTR EINVAL EMSGSIZE ENOTCONN \
+		ENOTSOCK EOPNOTSUPP ETIMEDOUT EIO ENOBUFS ENOMEM
+
+ssize_t send(int socket, const void *buffer, size_t length, int flags);
+	on error: -1
+	valid errnos:  EAGAIN EBADF ECONNRESET EDESTADDRREQ EINTR EMSGSIZE \
+		ENOTCONN ENOTSOCK EOPNOTSUPP EPIPE EACCES EIO ENETDOWN \
+		ENETUNREACH ENOBUFS
+
+ssize_t sendto(int socket, const void *message, size_t length, int flags, const struct sockaddr *dest_addr, socklen_t dest_len);
+	on error: -1
+	valid errnos:  EAFNOSUPPORT EAGAIN EBADF ECONNRESET EINTR EMSGSIZE \
+		ENOTCONN ENOTSOCK EOPNOTSUPP EPIPE EIO ELOOP ENAMETOOLONG \
+		ENOENT ENOTDIR EACCES EDESTADDRREQ EHOSTUNREACH EINVAL EIO \
+		EISCONN ENETDOWN ENETUNREACH ENOBUFS ENOMEM ELOOP \
+		ENAMETOOLONG
+
+ssize_t sendmsg(int socket, const struct msghdr *message, int flags);
+	on error: -1
+	valid errnos:  EAGAIN EAFNOSUPPORT EBADF ECONNRESET EINTR EINVAL EMSGSIZE \
+		ENOTCONN ENOTSOCK EOPNOTSUPP EPIPE EIO ELOOP ENAMETOOLONG \
+		ENOENT ENOTDIR EACCES EDESTADDRREQ EHOSTUNREACH EIO EISCONN \
+		ENETDOWN ENETUNREACH ENOBUFS ENOMEM ELOOP ENAMETOOLONG
+
+int shutdown(int socket, int how);
+	on error: -1
+	valid errnos:  EBADF EINVAL ENOTCONN ENOTSOCK ENOBUFS
+
+
diff --git a/preload/utils/extract_from_man b/preload/utils/extract_from_man
new file mode 100755
index 0000000..00b65f0
--- /dev/null
+++ b/preload/utils/extract_from_man
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# encoding: utf8
+
+"""
+Extracts information from a manpage (read from stdin) that can be useful to
+create modules for the code generator.
+
+Example usage:
+	man 3posix chmod | extract_from_man
+
+Or, in a loop:
+
+	rm -f gen.mod;
+	for f in chmod chown chdir; do
+		man 3posix $f | extract_from_man >> gen.mod;
+	done
+
+"""
+
+import sys
+import re
+
+
+def wrap(s, cols, indent = 1):
+	ns = ''
+	line = ''
+	for w in s.split():
+		if len(line + ' ' + w) > cols:
+			ns += line + ' \\\n' + '\t' * indent
+			line = w
+		else:
+			if line:
+				line += ' ' + w
+			else:
+				line = w
+
+	ns += line
+
+	return ns.rstrip()
+
+
+def extract_sections(f):
+	"Reads a manpage from the file, returns a dictionary of sections."
+	sec_name = ''
+	sec_data = ''
+	sections = {}
+
+	for l in f:
+		if not l.strip():
+			continue
+
+		if l.startswith((' ', '\t')):
+			sec_data += l
+		else:
+			sections[sec_name] = sec_data
+			sec_name = l.strip()
+			sec_data = ''
+
+	sections[sec_name] = sec_data
+
+	return sections
+
+def get_ret_on_error(sections):
+	"Tries to find out what the function returns on error."
+	if 'RETURN VALUE' not in sections:
+		return None
+
+	# remove spaces and newlines to make it easier detect the patterns
+	s = ' '.join(sections['RETURN VALUE'].split())
+	print s
+
+	# Note: the '(-|‐)' regexp matches both the normal minus sign ('-')
+	# and the UTF-8 hypen sign ('‐', or \xe2\x80\x90); sadly both usually
+	# look the same
+	regexps = [
+		r'On error,? (?P<ev>[-\w]+) is returned',
+		r'On error,? .* returns? (?P<ev>[-\w]+).',
+		r'some error occurs,? (?P<ev>[-\w]+) is returned',
+		r'and (?P<ev>[-\w]+) if an error occurr(s|ed)',
+		r'[Oo]ther((-|‐) )?wise, (?P<ev>[-\w]+) shall be returned',
+		r'Other((-|‐) )?wise, the functions shall return (?P<ev>[-\w]+) and'
+	]
+	regexps = map(re.compile, regexps)
+
+	possible_errors = []
+	for regexp in regexps:
+		m = regexp.search(s)
+		if m:
+			possible_errors.append(m.group('ev'))
+	return possible_errors
+
+def get_possible_errnos(sections):
+	"""Tries to find out the possible valid errno values after the
+	function has failed."""
+	if 'ERRORS' not in sections:
+		return None
+
+	errnos = []
+
+	for l in sections['ERRORS'].split('\n'):
+		m = re.match(r'\s+(?P<e>([A-Z]{3,},? *)+)\s*', l)
+		if m:
+			s = m.group('e').strip()
+			if not s:
+				continue
+
+			s = [ x.strip() for x in s.split(',') ]
+			errnos.extend(s)
+
+	return errnos
+
+def get_defs(sections):
+	"Tries to find out the includes and function definitions."
+	if 'SYNOPSIS' not in sections:
+		return None
+
+	includes = []
+	funcs = []
+
+	fre = re.compile(r'\s+(?P<f>[\w,\*\s]+\(?(\w|,|\*|\s|\.\.\.)*\)?[,;])$')
+
+	for l in sections['SYNOPSIS'].split('\n'):
+		sl = l.strip()
+		if sl.startswith('#include'):
+			includes.append(sl.split(' ', 1)[1])
+
+		m = fre.match(l.rstrip())
+		if m:
+			f = m.group('f')
+
+			# long functions are split in multiple lines, this
+			# tries to detect that and append to the last seen
+			# function
+			if funcs and not funcs[-1].endswith(';'):
+				funcs[-1] += ' ' + f
+			else:
+				funcs.append(f)
+	return (includes, funcs)
+
+
+if __name__ == '__main__':
+
+	if len(sys.argv) > 1:
+		print __doc__
+		sys.exit(1)
+
+	s = extract_sections(sys.stdin)
+	on_error = get_ret_on_error(s)
+	errnos = get_possible_errnos(s)
+	incs, funcs = get_defs(s)
+
+	print '\n'.join( 'include: ' + i for i in incs)
+	print
+
+	print '\n'.join(funcs)
+
+	if on_error:
+		print '\ton error:', ' || '.join(on_error)
+
+	if errnos:
+		print '\tvalid errnos:', wrap(' '.join(errnos), 60,
+				indent = 2)
+
+