/*
 * libjio C preloader
 * Alberto Bertogli (albertito@blitiri.com.ar)
 *
 * This generates a shared object that, when prelinked, can be used to make an
 * existing application to use libjio for UNIX I/O.
 * It's not nice or pretty, and does some nasty tricks to work both with and
 * without LFS. I don't think it builds or works without glibc.
 */
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <dlfcn.h>
/* we don't build this with LFS, however, it's essential that the proper
 * environment is set for libjio's loading; otherwise we would mess the ABI
 * up */
typedef long long off64_t;
#define _FILE_OFFSET_BITS 64
#define off_t off64_t
#include <libjio.h>
#undef off_t
#undef _FILE_OFFSET_BITS
/* maximum number of simultaneous open file descriptors we support */
#define MAXFD (4096 * 2)
/* recursion counter, per-thread */
static int __thread called = 0;
/* C library functions, filled via the dynamic loader */
static void *libc;
static int (*c_open)(const char *pathname, int flags, mode_t mode);
static int (*c_open64)(const char *pathname, int flags, mode_t mode);
static int (*c_close)(int fd);
static int (*c_unlink)(const char *pathname);
static ssize_t (*c_read)(int fd, void *buf, size_t count);
static ssize_t (*c_pread)(int fd, void *buf, size_t count, off_t offset);
static ssize_t (*c_pread64)(int fd, void *buf, size_t count, off64_t offset);
static ssize_t (*c_readv)(int fd, const struct iovec *vector, int count);
static ssize_t (*c_write)(int fd, const void *buf, size_t count);
static ssize_t (*c_pwrite)(int fd, const void *buf, size_t count, off_t offset);
static ssize_t (*c_pwrite64)(int fd, const void *buf, size_t count, off64_t offset);
static ssize_t (*c_writev)(int fd, const struct iovec *vector, int count);
static int (*c_ftruncate)(int fd, off_t length);
static int (*c_ftruncate64)(int fd, off64_t length);
static off_t (*c_lseek)(int fd, off_t offset, int whence);
static off64_t (*c_lseek64)(int fd, off64_t offset, int whence);
static int (*c_fsync)(int fd);
static int (*c_dup)(int oldfd);
static int (*c_dup2)(int oldfd, int newfd);
/* file descriptor table, to translate fds to jfs */
struct fd_entry {
	int fd;
	unsigned int *refcount;
	jfs_t *fs;
	pthread_mutex_t lock;
};
static struct fd_entry fd_table[MAXFD];
/* useful macros, mostly for debugging purposes */
#if 1
	#define rec_inc() do { called++; } while(0)
	#define rec_dec() do { called--; } while(0)
	#define printd(...) do { } while(0)
#else
	/* debug variants */
	#define rec_inc()				\
		do {					\
			called++;			\
			fprintf(stderr, "I: %d\n", called); \
			fflush(stderr);			\
		} while (0)
	#define rec_dec()				\
		do {					\
			called--;			\
			fprintf(stderr, "D: %d\n", called); \
			fflush(stderr);			\
		} while (0)
	#define printd(...)				\
		do {					\
			if (called)			\
				fprintf(stderr, "\t");	\
			called++;			\
			fprintf(stderr, "%5.5d ", getpid()); \
			fprintf(stderr, "%s(): ", __FUNCTION__ ); \
			fprintf(stderr, __VA_ARGS__);	\
			fflush(stderr);			\
			called--;			\
		} while(0)
#endif
/* functions used to lock fds from the table; they do boundary checks so we
 * catch out of bounds accesses */
static inline int fd_lock(int fd)
{
	int r;
	if (fd < 0 || fd >= MAXFD) {
		printd("locking out of bounds fd %d\n", fd);
		return 0;
	}
	//printd("L %d\n", fd);
	r = pthread_mutex_lock(&(fd_table[fd].lock));
	//printd("OK %d\n", fd);
	return !r;
}
static inline int fd_unlock(int fd)
{
	int r;
	if (fd < 0 || fd >= MAXFD) {
		printd("unlocking out of bounds fd %d\n", fd);
		return 0;
	}
	//printd("U %d\n", fd);
	r = pthread_mutex_unlock(&(fd_table[fd].lock));
	//printd("OK %d\n", fd);
	return !r;
}
/*
 * library intialization
 */
static int __attribute__((constructor)) init(void)
{
	int i;
	pthread_mutexattr_t attr;
	printd("starting\n");
	/* initialize fd_table */
	pthread_mutexattr_init(&attr);
	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL);
	for (i = 0; i < MAXFD; i++) {
		fd_table[i].fd = -1;
		fd_table[i].fs = NULL;
		pthread_mutex_init(&(fd_table[i].lock), &attr);
	}
	pthread_mutexattr_destroy(&attr);
	/* dynamically load the C library */
	libc = dlopen("libc.so.6", RTLD_NOW);
	if (libc == NULL) {
		printd("Error loading libc: %s\n", dlerror());
		return 0;
	}
	/* load symbols from the C library */
	#define libc_load(F) c_##F = dlsym(libc, #F)
	libc_load(open);
	libc_load(open64);
	libc_load(close);
	libc_load(unlink);
	libc_load(read);
	libc_load(pread);
	libc_load(pread64);
	libc_load(readv);
	libc_load(write);
	libc_load(pwrite);
	libc_load(pwrite64);
	libc_load(writev);
	libc_load(ftruncate);
	libc_load(ftruncate64);
	libc_load(lseek);
	libc_load(lseek64);
	libc_load(fsync);
	libc_load(dup);
	libc_load(dup2);
	printd("done\n");
	return 1;
}
/*
 * wrappers
 */
int open(const char *pathname, int flags, ...)
{
	int r, fd;
	jfs_t *fs;
	mode_t mode;
	struct stat st;
	va_list l;
	if (flags & O_CREAT) {
		va_start(l, flags);
		mode = va_arg(l, mode_t);
		va_end(l);
	} else {
		/* set it to 0, it's ignored anyway */
		mode = 0;
	}
	if (called) {
		printd("orig (r)\n");
		return (*c_open)(pathname, flags, mode);
	}
	printd("libjio\n");
	/* skip special files */
	r = stat(pathname, &st);
	if (r == 0 && ( S_ISDIR(st.st_mode) \
			|| S_ISCHR(st.st_mode) \
			|| S_ISFIFO(st.st_mode) ) ) {
		printd("orig (s)\n");
		return (*c_open)(pathname, flags, mode);
	}
	/* skip /proc and /sys (not /dev, the problematic files are taken care
	 * of with the stat test above */
	/* FIXME: this breaks with relative paths */
	if ( (strncmp("/proc", pathname, 5) == 0) ||
			(strncmp("/sys", pathname, 4) == 0) ) {
		printd("orig (f)\n");
		return (*c_open)(pathname, flags, mode);
	}
	rec_inc();
	fs = jopen(pathname, flags, mode, 0);
	if (fs == NULL) {
		rec_dec();
		return -1;
	}
	rec_dec();
	fd = jfileno(fs);
	fd_lock(fd);
	fd_table[fd].fd = fd;
	fd_table[fd].refcount = malloc(sizeof(unsigned int));
	*fd_table[fd].refcount = 1;
	fd_table[fd].fs = fs;
	fd_unlock(fd);
	printd("return %d\n", fd);
	return fd;
}
/* exact copy of open(), but call c_open64 instead of c_open */
int open64(const char *pathname, int flags, ...)
{
	int r, fd;
	jfs_t *fs;
	mode_t mode;
	struct stat st;
	va_list l;
	if (flags & O_CREAT) {
		va_start(l, flags);
		mode = va_arg(l, mode_t);
		va_end(l);
	} else {
		/* set it to 0, it's ignored anyway */
		mode = 0;
	}
	if (called) {
		printd("orig (r)\n");
		return (*c_open64)(pathname, flags, mode);
	}
	printd("libjio\n");
	/* skip special files */
	r = stat(pathname, &st);
	if (r == 0 && ( S_ISDIR(st.st_mode) \
			|| S_ISCHR(st.st_mode) \
			|| S_ISFIFO(st.st_mode) ) ) {
		printd("orig (s)\n");
		return (*c_open64)(pathname, flags, mode);
	}
	/* skip /proc and /sys (not /dev, the problematic files are taken care
	 * of with the stat test above */
	/* FIXME: this breaks with relative paths */
	if ( (strncmp("/proc", pathname, 5) == 0) ||
			(strncmp("/sys", pathname, 4) == 0) ) {
		printd("orig (f)\n");
		return (*c_open64)(pathname, flags, mode);
	}
	rec_inc();
	fs = jopen(pathname, flags, mode, 0);
	if (fs == NULL) {
		rec_dec();
		return -1;
	}
	rec_dec();
	fd = jfileno(fs);
	fd_lock(fd);
	fd_table[fd].fd = fd;
	fd_table[fd].refcount = malloc(sizeof(unsigned int));
	*fd_table[fd].refcount = 1;
	fd_table[fd].fs = fs;
	fd_unlock(fd);
	printd("return %d\n", fd);
	return fd;
}
/* close() is split in two functions: unlocked_close() that performs the real
 * actual close and cleanup, and close() which takes care of the locking and
 * calls unlocked_close(); this is because in dup*() we need to close with
 * locks already held to avoid races. */
int unlocked_close(int fd)
{
	int r;
	if (*fd_table[fd].refcount > 1) {
		/* we still have references, don't really close */
		printd("not closing, refcount: %d\n", *fd_table[fd].refcount);
		(*fd_table[fd].refcount)--;
		fd_table[fd].fd = -1;
		fd_table[fd].refcount = NULL;
		fd_table[fd].fs = NULL;
		return 0;
	}
	rec_inc();
	r = jclose(fd_table[fd].fs);
	rec_dec();
	fd_table[fd].fd = -1;
	free(fd_table[fd].refcount);
	fd_table[fd].refcount = NULL;
	fd_table[fd].fs = NULL;
	return r;
}
int close(int fd)
{
	int r;
	jfs_t *fs;
	if (called) {
		printd("orig\n");
		return (*c_close)(fd);
	}
	if (!fd_lock(fd)) {
		printd("out of bounds fd: %d\n", fd);
		return -1;
	}
	fs = fd_table[fd].fs;
	if (fs == NULL) {
		printd("NULL fs, fd %d\n", fd);
		fd_unlock(fd);
		return (*c_close)(fd);
	}
	printd("libjio\n");
	r = unlocked_close(fd);
	fd_unlock(fd);
	printd("return %d\n", r);
	return r;
}
int unlink(const char *pathname)
{
	int r;
	struct jfsck_result res;
	if (called) {
		printd("orig\n");
		return (*c_unlink)(pathname);
	}
	printd("libjio\n");
	rec_inc();
	r = jfsck(pathname, NULL, &res, 0);
	rec_dec();
	r = (*c_unlink)(pathname);
	printd("return %d\n", r);
	return r;
}
int dup(int oldfd)
{
	int r;
	if (called) {
		printd("orig\n");
		return (*c_dup)(oldfd);
	}
	if (fd_table[oldfd].fs == NULL) {
		printd("NULL fs, fd %d\n", oldfd);
		fd_unlock(oldfd);
		return (*c_dup)(oldfd);
	}
	if (!fd_lock(oldfd)) {
		printd("out of bounds fd: %d\n", oldfd);
		return -1;
	}
	printd("libjio\n");
	rec_inc();
	r = (*c_dup)(oldfd);
	rec_dec();
	if (r >= 0) {
		fd_lock(r);
		fd_table[r].fd = r;
		fd_table[r].refcount = fd_table[oldfd].refcount;
		(*fd_table[r].refcount)++;
		fd_table[r].fs = fd_table[oldfd].fs;
		fd_unlock(r);
	}
	fd_unlock(oldfd);
	printd("return %d\n", r);
	return r;
}
int dup2(int oldfd, int newfd)
{
	int r;
	if (called) {
		printd("orig\n");
		return (*c_dup2)(oldfd, newfd);
	}
	if (!fd_lock(oldfd)) {
		printd("out of bounds fd: %d\n", oldfd);
		return -1;
	}
	if (fd_table[oldfd].fs == NULL) {
		printd("NULL fs, fd %d\n", oldfd);
		fd_unlock(oldfd);
		return (*c_dup2)(oldfd, newfd);
	}
	printd("libjio\n");
	rec_inc();
	r = (*c_dup2)(oldfd, newfd);
	rec_dec();
	if (r >= 0) {
		fd_lock(newfd);
		if (fd_table[newfd].fs != NULL) {
			unlocked_close(newfd);
		}
		fd_table[newfd].fd = newfd;
		fd_table[newfd].refcount = fd_table[oldfd].refcount;
		(*fd_table[newfd].refcount)++;
		fd_table[newfd].fs = fd_table[oldfd].fs;
		fd_unlock(newfd);
	}
	fd_unlock(oldfd);
	printd("return %d\n", r);
	return r;
}
/* the rest of the functions are automagically generated from the following
 * macro. The ugliest. I'm so proud. */
#define mkwrapper(rtype, name, DEF, INVR, INVM)			\
	rtype name DEF						\
	{ 							\
		rtype r;					\
		jfs_t *fs;					\
								\
		if (called) {					\
			printd("orig\n");			\
			return (*c_##name) INVR;		\
		}						\
								\
		if (!fd_lock(fd)) {				\
			printd("out of bounds fd: %d\n", fd);	\
			return -1;				\
		}						\
		fs = fd_table[fd].fs;				\
		if (fs == NULL) {				\
			printd("(): NULL fs, fd %d\n", fd); 	\
			fd_unlock(fd);				\
			return (*c_##name) INVR;		\
		}						\
		printd("libjio\n");				\
								\
		rec_inc();					\
		r = j##name INVM;				\
		rec_dec();					\
		fd_unlock(fd);					\
								\
		printd("return %lld\n", (long long) r); 	\
		return r;					\
	}
/* 32-bit versions */
mkwrapper(ssize_t, read, (int fd, void *buf, size_t count),
		(fd, buf, count), (fs, buf, count) );
mkwrapper(ssize_t, pread, (int fd, void *buf, size_t count, off_t offset),
		(fd, buf, count, offset), (fs, buf, count, offset) );
mkwrapper(ssize_t, readv, (int fd, const struct iovec *vector, int count),
		(fd, vector, count), (fs, vector, count) );
mkwrapper(ssize_t, write, (int fd, const void *buf, size_t count),
		(fd, buf, count), (fs, buf, count) );
mkwrapper(ssize_t, pwrite,
		(int fd, const void *buf, size_t count, off_t offset),
		(fd, buf, count, offset), (fs, buf, count, offset) );
mkwrapper(ssize_t, writev, (int fd, const struct iovec *vector, int count),
		(fd, vector, count), (fs, vector, count) );
mkwrapper(off_t, lseek, (int fd, off_t offset, int whence),
		(fd, offset, whence), (fs, offset, whence) );
/* libjio defines jtruncate and jsync, not jftruncate and jfsync, which breaks
 * the macro; so we add a nice #define to unbreak it */
#define jftruncate jtruncate
mkwrapper(int, ftruncate, (int fd, off_t length),
		(fd, length), (fs, length) );
#define jfsync jsync
mkwrapper(int, fsync, (int fd), (fd), (fs) );
/* 64-bit versions */
#define jpread64 jpread
mkwrapper(ssize_t, pread64, (int fd, void *buf, size_t count, off64_t offset),
		(fd, buf, count, offset), (fs, buf, count, offset) );
#define jpwrite64 jpwrite
mkwrapper(ssize_t, pwrite64,
		(int fd, const void *buf, size_t count, off64_t offset),
		(fd, buf, count, offset), (fs, buf, count, offset) );
#define jlseek64 jlseek
mkwrapper(off64_t, lseek64, (int fd, off64_t offset, int whence),
		(fd, offset, whence), (fs, offset, whence) );
#define jftruncate64 jtruncate
mkwrapper(int, ftruncate64, (int fd, off64_t length),
		(fd, length), (fs, length) );