libfiu (internal) 0.90
fiu.c
Go to the documentation of this file.
00001 
00002 #include <stdlib.h>             /* malloc() and friends */
00003 #include <string.h>             /* strcmp() and friends */
00004 #include <pthread.h>            /* mutexes */
00005 #include <sys/time.h>           /* gettimeofday() */
00006 #include <time.h>               /* gettimeofday() */
00007 #include <limits.h>             /* ULONG_MAX */
00008 
00009 /* Enable us, so we get the real prototypes from the headers */
00010 #define FIU_ENABLE 1
00011 
00012 #include "fiu.h"
00013 #include "fiu-control.h"
00014 #include "internal.h"
00015 
00016 
00017 /* Different methods to decide when a point of failure fails */
00018 enum pf_method {
00019         PF_ALWAYS = 1,
00020         PF_PROB,
00021         PF_EXTERNAL,
00022         PF_STACK,
00023 };
00024 
00025 /* Point of failure information */
00026 struct pf_info {
00027         char *name;
00028         unsigned int namelen;
00029         int failnum;
00030         void *failinfo;
00031         unsigned int flags;
00032 
00033         /* How to decide when this point of failure fails, and the information
00034          * needed to take the decision */
00035         enum pf_method method;
00036         union {
00037                 /* To use when method == PF_PROB */
00038                 float probability;
00039 
00040                 /* To use when method == PF_EXTERNAL */
00041                 external_cb_t *external_cb;
00042 
00043                 /* To use when method == PF_STACK */
00044                 struct stack {
00045                         void *func_start;
00046                         void *func_end;
00047                         int func_pos_in_stack;
00048                 } stack;
00049         } minfo;
00050 };
00051 
00052 
00053 /* Array used to keep the information about the enabled points of failure.
00054  * It's an array because we assume it's going to be short enough for the
00055  * linear lookup not matter.
00056  * In the future, if it turns out it's normal that it grows large enough, we
00057  * may be interested in a more sophisticated structure like a hash table
00058  * and/or a bloom filter. */
00059 static struct pf_info *enabled_fails = NULL;
00060 static struct pf_info *enabled_fails_last = NULL;
00061 static size_t enabled_fails_len = 0;
00062 static size_t enabled_fails_nfree = 0;
00063 static pthread_rwlock_t enabled_fails_lock = PTHREAD_RWLOCK_INITIALIZER;
00064 
00065 #define ef_rlock() do { pthread_rwlock_rdlock(&enabled_fails_lock); } while (0)
00066 #define ef_wlock() do { pthread_rwlock_wrlock(&enabled_fails_lock); } while (0)
00067 #define ef_runlock() do { pthread_rwlock_unlock(&enabled_fails_lock); } while (0)
00068 #define ef_wunlock() do { pthread_rwlock_unlock(&enabled_fails_lock); } while (0)
00069 
00070 /* To prevent unwanted recursive calls that would deadlock, we use a
00071  * thread-local recursion count. Unwanted recursive calls can result from
00072  * using functions that have been modified to call fiu_fail(), which can
00073  * happen when using the POSIX preloader library: fiu_enable() takes the lock
00074  * for writing, and can call malloc() (for example), which can in turn call
00075  * fiu_fail() which can take the lock for reading.
00076  *
00077  * It is also modified at fiu-rc.c, to prevent failing within the remote
00078  * control thread.
00079  *
00080  * Sadly, we have to use the GNU extension for TLS, so we do not resort to
00081  * pthread_[get|set]specific() which could be wrapped. Luckily it's available
00082  * almost everywhere. */
00083 __thread int rec_count = 0;
00084 
00085 
00086 /* Maximum number of free elements in enabled_fails (used to decide when to
00087  * shrink). */
00088 #define EF_MAX_FREE 3
00089 
00090 /* How much to grow enabled_fails by each time, it's recommended that this is
00091  * less than EF_MAX_FREE. */
00092 #define EF_GROW 2
00093 
00094 
00095 /* Used to keep the last failinfo via TLS */
00096 static pthread_key_t last_failinfo_key;
00097 
00098 
00099 /*
00100  * Miscelaneous internal functions
00101  */
00102 
00103 /* Disables the given pf_info, assuming it's inside enabled_fails. Must be
00104  * called with enabled_fails_lock acquired. */
00105 static void disable_pf(struct pf_info *pf)
00106 {
00107         /* free the name we've allocated in setup_fail() via strdup() */
00108         free(pf->name);
00109         pf->name = NULL;
00110         pf->namelen = 0;
00111         pf->failnum = 0;
00112         pf->failinfo = NULL;
00113         pf->flags = 0;
00114 }
00115 
00116 /* Return the last position where s1 and s2 match. */
00117 static unsigned int strlast(const char *s1, const char *s2)
00118 {
00119         unsigned int i = 0;
00120 
00121         while (*s1 != '\0' && *s2 != '\0' && *s1 == *s2) {
00122                 i++;
00123                 s1++;
00124                 s2++;
00125         }
00126 
00127         return i;
00128 }
00129 
00130 /* Checks if pf's name matches the one given. pf->name can be NULL. */
00131 static int name_matches(const struct pf_info *pf, const char *name, int exact)
00132 {
00133         if (pf->name == NULL || name == NULL)
00134                 return 0;
00135 
00136         if (exact || pf->name[pf->namelen - 1] != '*')
00137                 return strcmp(pf->name, name) == 0;
00138 
00139         /* Inexact match */
00140         return strlast(pf->name, name) >= pf->namelen - 1;
00141 }
00142 
00143 /* Shrink enabled_fails, used when it has too many free elements. Must be
00144  * called with enabled_fails_lock acquired. */
00145 static int shrink_enabled_fails(void)
00146 {
00147         int i;
00148         size_t new_len;
00149         struct pf_info *new, *pf;
00150 
00151         new_len = enabled_fails_len - enabled_fails_nfree + EF_GROW;
00152 
00153         new = malloc(new_len * sizeof(struct pf_info));
00154         if (new == NULL)
00155                 return -1;
00156 
00157         i = 0;
00158         for (pf = enabled_fails; pf <= enabled_fails_last; pf++) {
00159                 if (pf->name == NULL)
00160                         continue;
00161 
00162                 memcpy(new + i, pf, sizeof(struct pf_info));
00163                 i++;
00164         }
00165 
00166         memset(new + i, 0, (new_len - i) * sizeof(struct pf_info));
00167 
00168         free(enabled_fails);
00169         enabled_fails = new;
00170         enabled_fails_len = new_len;
00171         enabled_fails_last = new + new_len - 1;
00172         enabled_fails_nfree = EF_GROW;
00173 
00174         return 0;
00175 }
00176 
00177 /* Determines if the given address is within the function code. */
00178 static int pc_in_func(struct pf_info *pf, void *pc)
00179 {
00180         /* We don't know if the platform allows us to know func_end,
00181          * so we use different methods depending on its availability. */
00182         if (pf->minfo.stack.func_end) {
00183                 return (pc > pf->minfo.stack.func_start &&
00184                                 pc < pf->minfo.stack.func_end);
00185         } else {
00186                 return pf->minfo.stack.func_start == get_func_start(pc);
00187         }
00188 }
00189 
00190 /* Determines wether to fail or not the given failure point, which is of type
00191  * PF_STACK. Returns 1 if it should fail, or 0 if it should not. */
00192 static int should_stack_fail(struct pf_info *pf)
00193 {
00194         // TODO: Find the right offset for pos_in_stack: we should look for
00195         // fiu_fail(), and start counting from there.
00196         int nptrs, i;
00197         void *buffer[100];
00198 
00199         nptrs = get_backtrace(buffer, 100);
00200 
00201         for (i = 0; i < nptrs; i++) {
00202                 if (pc_in_func(pf, buffer[i]) &&
00203                                 (pf->minfo.stack.func_pos_in_stack == -1 ||
00204                                  i == pf->minfo.stack.func_pos_in_stack)) {
00205                         return 1;
00206                 }
00207         }
00208 
00209         return 0;
00210 }
00211 
00212 /* Pseudorandom number generator.
00213  *
00214  * The performance of the PRNG is very sensitive to us, so we implement our
00215  * own instead of just use drand48() or similar.
00216  *
00217  * As we don't really need a very good, thread-safe or secure random source,
00218  * we use an algorithm similar to the one used in rand() and drand48() (a
00219  * linear congruential generator, see
00220  * http://en.wikipedia.org/wiki/Linear_congruential_generator for more
00221  * information). Coefficients are the ones used in rand(), so we assume
00222  * sizeof(int) >= 4.
00223  *
00224  * To seed it, we use the current microseconds. To prevent seed reuse, we
00225  * re-seed after each fork (see atfork_child()). */
00226 static unsigned int randd_xn = 0xA673F42D;
00227 
00228 static void prng_seed(void)
00229 {
00230         struct timeval tv;
00231 
00232         gettimeofday(&tv, NULL);
00233 
00234         randd_xn = tv.tv_usec;
00235 }
00236 
00237 static double randd(void)
00238 {
00239         randd_xn = 1103515245 * randd_xn + 12345;
00240 
00241         return (double) randd_xn / UINT_MAX;
00242 }
00243 
00244 /* Function that runs after the process has been forked, at the child. It's
00245  * registered via pthread_atfork() in fiu_init(). */
00246 static void atfork_child(void)
00247 {
00248         prng_seed();
00249 }
00250 
00251 
00252 /*
00253  * Core API
00254  */
00255 
00256 /* Initializes the library. It should be safe to call this more than once at
00257  * any time, to allow several independant libraries to use fiu at the same
00258  * time without clashes. */
00259 int fiu_init(unsigned int flags)
00260 {
00261         /* Used to avoid re-initialization, protected by enabled_fails_lock */
00262         static int initialized = 0;
00263 
00264         rec_count++;
00265         ef_wlock();
00266         if (initialized) {
00267                 ef_wunlock();
00268                 rec_count--;
00269                 return 0;
00270         }
00271 
00272         pthread_key_create(&last_failinfo_key, NULL);
00273 
00274         enabled_fails = NULL;
00275         enabled_fails_last = NULL;
00276         enabled_fails_len = 0;
00277         enabled_fails_nfree = 0;
00278 
00279         if (pthread_atfork(NULL, NULL, atfork_child) != 0) {
00280                 ef_wunlock();
00281                 rec_count--;
00282                 return -1;
00283         }
00284 
00285         prng_seed();
00286 
00287         initialized = 1;
00288 
00289         ef_wunlock();
00290         rec_count--;
00291         return 0;
00292 }
00293 
00294 /* Returns the failure status of the given name. Must work well even before
00295  * fiu_init() is called assuming no points of failure are enabled; although it
00296  * can (and does) assume fiu_init() will be called before enabling any. */
00297 int fiu_fail(const char *name)
00298 {
00299         struct pf_info *pf;
00300         int failnum;
00301 
00302         rec_count++;
00303 
00304         /* We must do this before acquiring the lock and calling any
00305          * (potentially wrapped) functions. */
00306         if (rec_count > 1) {
00307                 rec_count--;
00308                 return 0;
00309         }
00310 
00311         ef_rlock();
00312 
00313         if (enabled_fails == NULL) {
00314                 ef_runlock();
00315                 rec_count--;
00316                 return 0;
00317         }
00318 
00319         for (pf = enabled_fails; pf <= enabled_fails_last; pf++) {
00320                 if (name_matches(pf, name, 0)) {
00321                         switch (pf->method) {
00322                         case PF_ALWAYS:
00323                                 goto exit_fail;
00324                                 break;
00325                         case PF_PROB:
00326                                 if (pf->minfo.probability > randd())
00327                                         goto exit_fail;
00328                                 break;
00329                         case PF_EXTERNAL:
00330                                 if (pf->minfo.external_cb(pf->name,
00331                                                 &(pf->failnum),
00332                                                 &(pf->failinfo),
00333                                                 &(pf->flags)))
00334                                         goto exit_fail;
00335                                 break;
00336                         case PF_STACK:
00337                                 if (should_stack_fail(pf))
00338                                         goto exit_fail;
00339                                 break;
00340                         default:
00341                                 break;
00342                         }
00343 
00344                         break;
00345                 }
00346         }
00347 
00348         ef_runlock();
00349         rec_count--;
00350         return 0;
00351 
00352 exit_fail:
00353         pthread_setspecific(last_failinfo_key,
00354                         pf->failinfo);
00355         failnum = pf->failnum;
00356 
00357         if (pf->flags & FIU_ONETIME) {
00358                 disable_pf(pf);
00359                 enabled_fails_nfree++;
00360         }
00361 
00362         ef_runlock();
00363         rec_count--;
00364         return failnum;
00365 }
00366 
00367 /* Returns the information associated with the last fail. */
00368 void *fiu_failinfo(void)
00369 {
00370         return pthread_getspecific(last_failinfo_key);
00371 }
00372 
00373 
00374 /*
00375  * Control API
00376  */
00377 
00378 /* Sets up the given pf.
00379  * Only the common fields are filled, the caller should take care of the
00380  * method-specific ones. For internal use only. */
00381 static int setup_fail(struct pf_info *pf, const char *name, int failnum,
00382                 void *failinfo, unsigned int flags, enum pf_method method)
00383 {
00384         pf->name = strdup(name);
00385         if (pf->name == NULL)
00386                 return -1;
00387 
00388         pf->namelen = strlen(name);
00389         pf->failnum = failnum;
00390         pf->failinfo = failinfo;
00391         pf->flags = flags;
00392         pf->method = method;
00393 
00394         return 0;
00395 }
00396 
00397 /* Creates a new pf in the enabled_fails table.
00398  * Only the common fields are filled, the caller should take care of the
00399  * method-specific ones. For internal use only. */
00400 static struct pf_info *insert_new_fail(const char *name, int failnum,
00401                 void *failinfo, unsigned int flags, enum pf_method method)
00402 {
00403         struct pf_info *pf = NULL;
00404         int rv = -1;
00405         size_t prev_len;
00406 
00407         rec_count++;
00408 
00409         /* See if it's already there and update the data if so, or if we have
00410          * a free spot where to put it */
00411         ef_wlock();
00412         if (enabled_fails != NULL && enabled_fails_nfree > 0) {
00413                 for (pf = enabled_fails; pf <= enabled_fails_last; pf++) {
00414                         if (pf->name == NULL || strcmp(pf->name, name) == 0) {
00415                                 rv = setup_fail(pf, name, failnum, failinfo,
00416                                                 flags, method);
00417                                 if (rv != 0) {
00418                                         pf = NULL;
00419                                         goto exit;
00420                                 }
00421 
00422                                 enabled_fails_nfree--;
00423                                 goto exit;
00424                         }
00425                 }
00426 
00427                 /* There should be a free slot, but couldn't find one! This
00428                  * shouldn't happen */
00429                 pf = NULL;
00430                 goto exit;
00431         }
00432 
00433         /* There are no free slots in enabled_fails, so we must grow it */
00434         enabled_fails = realloc(enabled_fails,
00435                         (enabled_fails_len + EF_GROW) * sizeof(struct pf_info));
00436         if (enabled_fails == NULL) {
00437                 enabled_fails_last = NULL;
00438                 enabled_fails_len = 0;
00439                 enabled_fails_nfree = 0;
00440                 pf = NULL;
00441                 goto exit;
00442         }
00443 
00444         prev_len = enabled_fails_len;
00445         enabled_fails_len += EF_GROW;
00446         enabled_fails_nfree = EF_GROW;
00447 
00448         memset(enabled_fails + prev_len, 0,
00449                         EF_GROW * sizeof(struct pf_info));
00450 
00451         enabled_fails_last = enabled_fails + enabled_fails_len - 1;
00452 
00453         pf = enabled_fails + prev_len;
00454         rv = setup_fail(pf, name, failnum, failinfo, flags, method);
00455         if (rv != 0) {
00456                 pf = NULL;
00457                 goto exit;
00458         }
00459 
00460         enabled_fails_nfree--;
00461 
00462 exit:
00463         ef_wunlock();
00464         rec_count--;
00465         return pf;
00466 }
00467 
00468 /* Makes the given name fail. */
00469 int fiu_enable(const char *name, int failnum, void *failinfo,
00470                 unsigned int flags)
00471 {
00472         struct pf_info *pf;
00473 
00474         pf = insert_new_fail(name, failnum, failinfo, flags, PF_ALWAYS);
00475         if (pf == NULL)
00476                 return -1;
00477 
00478         return 0;
00479 }
00480 
00481 /* Makes the given name fail with the given probability. */
00482 int fiu_enable_random(const char *name, int failnum, void *failinfo,
00483                 unsigned int flags, float probability)
00484 {
00485         struct pf_info *pf;
00486 
00487         pf = insert_new_fail(name, failnum, failinfo, flags, PF_PROB);
00488         if (pf == NULL)
00489                 return -1;
00490 
00491         pf->minfo.probability = probability;
00492         return 0;
00493 }
00494 
00495 /* Makes the given name fail when the external function returns != 0. */
00496 int fiu_enable_external(const char *name, int failnum, void *failinfo,
00497                 unsigned int flags, external_cb_t *external_cb)
00498 {
00499         struct pf_info *pf;
00500 
00501         pf = insert_new_fail(name, failnum, failinfo, flags, PF_EXTERNAL);
00502         if (pf == NULL)
00503                 return -1;
00504 
00505         pf->minfo.external_cb = external_cb;
00506         return 0;
00507 }
00508 
00509 /* Makes the given name fail when func is in the stack at func_pos.
00510  * If func_pos is -1, then any position will match. */
00511 int fiu_enable_stack(const char *name, int failnum, void *failinfo,
00512                 unsigned int flags, void *func, int func_pos_in_stack)
00513 {
00514         struct pf_info *pf;
00515 
00516         /* Specifying the stack position is unsupported for now */
00517         if (func_pos_in_stack != -1)
00518                 return -1;
00519 
00520         pf = insert_new_fail(name, failnum, failinfo, flags, PF_STACK);
00521         if (pf == NULL)
00522                 return -1;
00523 
00524         pf->minfo.stack.func_start = func;
00525         pf->minfo.stack.func_end = get_func_end(func);
00526         pf->minfo.stack.func_pos_in_stack = func_pos_in_stack;
00527         return 0;
00528 }
00529 
00530 /* Same as fiu_enable_stack(), but takes a function name. */
00531 int fiu_enable_stack_by_name(const char *name, int failnum, void *failinfo,
00532                 unsigned int flags, const char *func_name,
00533                 int func_pos_in_stack)
00534 {
00535         void *fp;
00536 
00537         fp = get_func_addr(func_name);
00538         if (fp == NULL)
00539                 return -1;
00540 
00541         return fiu_enable_stack(name, failnum, failinfo, flags, fp,
00542                         func_pos_in_stack);
00543 }
00544 
00545 /* Makes the given name NOT fail. */
00546 int fiu_disable(const char *name)
00547 {
00548         struct pf_info *pf;
00549 
00550         rec_count++;
00551 
00552         /* just find the point of failure and mark it as free by setting its
00553          * name to NULL */
00554         ef_wlock();
00555 
00556         if (enabled_fails == NULL) {
00557                 ef_wunlock();
00558                 rec_count--;
00559                 return -1;
00560         }
00561 
00562         for (pf = enabled_fails; pf <= enabled_fails_last; pf++) {
00563                 if (name_matches(pf, name, 1)) {
00564                         disable_pf(pf);
00565                         enabled_fails_nfree++;
00566                         if (enabled_fails_nfree > EF_MAX_FREE)
00567                                 shrink_enabled_fails();
00568                         ef_wunlock();
00569                         rec_count--;
00570                         return 0;
00571                 }
00572         }
00573 
00574         ef_wunlock();
00575         rec_count--;
00576         return -1;
00577 }
00578 
00579