00001
00002
00003
00004
00005
00006 #include <sys/types.h>
00007 #include <sys/stat.h>
00008 #include <fcntl.h>
00009 #include <unistd.h>
00010 #include <stdlib.h>
00011 #include <limits.h>
00012 #include <string.h>
00013 #include <stdio.h>
00014 #include <errno.h>
00015 #include <stdint.h>
00016 #include <arpa/inet.h>
00017 #include <netinet/in.h>
00018
00019 #include "libjio.h"
00020 #include "common.h"
00021 #include "compat.h"
00022 #include "journal.h"
00023 #include "trans.h"
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00048 struct on_disk_hdr {
00049 uint16_t ver;
00050 uint16_t flags;
00051 uint32_t trans_id;
00052 } __attribute__((packed));
00053
00055 struct on_disk_ophdr {
00056 uint32_t len;
00057 uint64_t offset;
00058 } __attribute__((packed));
00059
00061 struct on_disk_trailer {
00062 uint32_t numops;
00063 uint32_t checksum;
00064 } __attribute__((packed));
00065
00066
00067
00068
00069 static void hdr_hton(struct on_disk_hdr *hdr)
00070 {
00071 hdr->ver = htons(hdr->ver);
00072 hdr->flags = htons(hdr->flags);
00073 hdr->trans_id = htonl(hdr->trans_id);
00074 }
00075
00076 static void hdr_ntoh(struct on_disk_hdr *hdr)
00077 {
00078 hdr->ver = ntohs(hdr->ver);
00079 hdr->flags = ntohs(hdr->flags);
00080 hdr->trans_id = ntohl(hdr->trans_id);
00081 }
00082
00083 static void ophdr_hton(struct on_disk_ophdr *ophdr)
00084 {
00085 ophdr->len = htonl(ophdr->len);
00086 ophdr->offset = htonll(ophdr->offset);
00087 }
00088
00089 static void ophdr_ntoh(struct on_disk_ophdr *ophdr)
00090 {
00091 ophdr->len = ntohl(ophdr->len);
00092 ophdr->offset = ntohll(ophdr->offset);
00093 }
00094
00095 static void trailer_hton(struct on_disk_trailer *trailer) {
00096 trailer->numops = htonl(trailer->numops);
00097 trailer->checksum = htonl(trailer->checksum);
00098 }
00099
00100 static void trailer_ntoh(struct on_disk_trailer *trailer) {
00101 trailer->numops = ntohl(trailer->numops);
00102 trailer->checksum = ntohl(trailer->checksum);
00103 }
00104
00105
00106
00107
00108
00109
00111 static unsigned int get_tid(struct jfs *fs)
00112 {
00113 unsigned int curid, rv;
00114
00115
00116 plockf(fs->jfd, F_LOCKW, 0, 0);
00117
00118
00119 curid = *(fs->jmap);
00120
00121 fiu_do_on("jio/get_tid/overflow", curid = -1);
00122
00123
00124 rv = curid + 1;
00125 if (rv == 0)
00126 goto exit;
00127
00128
00129 *(fs->jmap) = rv;
00130
00131 exit:
00132 plockf(fs->jfd, F_UNLOCK, 0, 0);
00133 return rv;
00134 }
00135
00137 static void free_tid(struct jfs *fs, unsigned int tid)
00138 {
00139 unsigned int curid, i;
00140 char name[PATH_MAX];
00141
00142
00143 plockf(fs->jfd, F_LOCKW, 0, 0);
00144
00145
00146 curid = *(fs->jmap);
00147
00148
00149
00150 if (tid == curid) {
00151
00152 for (i = curid - 1; i > 0; i--) {
00153 get_jtfile(fs, i, name);
00154 if (access(name, R_OK | W_OK) == 0) {
00155 break;
00156 } else if (errno != EACCES) {
00157
00158
00159
00160 break;
00161 }
00162 }
00163
00164
00165 *(fs->jmap) = i;
00166 }
00167
00168 plockf(fs->jfd, F_UNLOCK, 0, 0);
00169 return;
00170 }
00171
00172
00173 static int already_warned_about_sync = 0;
00174
00176 static int fsync_dir(int fd)
00177 {
00178 int rv;
00179
00180 rv = fsync(fd);
00181
00182 if (rv != 0 && (errno == EINVAL || errno == EBADF)) {
00183
00184
00185
00186
00187
00188 sync();
00189 rv = 0;
00190
00191 if (!already_warned_about_sync) {
00192 fprintf(stderr, "libjio warning: falling back on " \
00193 "sync() for directory syncing\n");
00194 already_warned_about_sync = 1;
00195 }
00196 }
00197
00198 return rv;
00199 }
00200
00203 static int corrupt_journal_file(struct journal_op *jop)
00204 {
00205 off_t pos;
00206 struct on_disk_trailer trailer;
00207
00208
00209
00210
00211 trailer.numops = 0;
00212 trailer.checksum = 0xffffffff;
00213
00214 pos = lseek(jop->fd, 0, SEEK_END);
00215 if (pos == (off_t) -1)
00216 return -1;
00217
00218 if (pwrite(jop->fd, (void *) &trailer, sizeof(trailer), pos)
00219 != sizeof(trailer))
00220 return -1;
00221
00222 if (fdatasync(jop->fd) != 0)
00223 return -1;
00224
00225 return 0;
00226 }
00227
00232 static int mark_broken(struct jfs *fs)
00233 {
00234 char broken_path[PATH_MAX];
00235 int fd;
00236
00237 snprintf(broken_path, PATH_MAX, "%s/broken", fs->jdir);
00238 fd = creat(broken_path, 0600);
00239 close(fd);
00240
00241 return fd >= 0;
00242 }
00243
00245 static int is_broken(struct jfs *fs)
00246 {
00247 char broken_path[PATH_MAX];
00248
00249 snprintf(broken_path, PATH_MAX, "%s/broken", fs->jdir);
00250 return access(broken_path, F_OK) == 0;
00251 }
00252
00253
00254
00255
00256
00257
00260 struct journal_op *journal_new(struct jfs *fs, unsigned int flags)
00261 {
00262 int fd, id;
00263 ssize_t rv;
00264 char *name = NULL;
00265 struct journal_op *jop = NULL;
00266 struct on_disk_hdr hdr;
00267 struct iovec iov[1];
00268
00269 if (is_broken(fs))
00270 goto error;
00271
00272 jop = malloc(sizeof(struct journal_op));
00273 if (jop == NULL)
00274 goto error;
00275
00276 name = (char *) malloc(PATH_MAX);
00277 if (name == NULL)
00278 goto error;
00279
00280 id = get_tid(fs);
00281 if (id == 0)
00282 goto error;
00283
00284
00285 get_jtfile(fs, id, name);
00286 fd = open(name, O_RDWR | O_CREAT | O_TRUNC, 0600);
00287 if (fd < 0)
00288 goto error;
00289
00290 if (plockf(fd, F_LOCKW, 0, 0) != 0)
00291 goto unlink_error;
00292
00293 jop->id = id;
00294 jop->fd = fd;
00295 jop->numops = 0;
00296 jop->name = name;
00297 jop->csum = 0;
00298 jop->fs = fs;
00299
00300 fiu_exit_on("jio/commit/created_tf");
00301
00302
00303 hdr.ver = 1;
00304 hdr.trans_id = id;
00305 hdr.flags = flags;
00306 hdr_hton(&hdr);
00307
00308 iov[0].iov_base = (void *) &hdr;
00309 iov[0].iov_len = sizeof(hdr);
00310 rv = swritev(fd, iov, 1);
00311 if (rv != sizeof(hdr))
00312 goto unlink_error;
00313
00314 jop->csum = checksum_buf(jop->csum, (unsigned char *) &hdr,
00315 sizeof(hdr));
00316
00317 fiu_exit_on("jio/commit/tf_header");
00318
00319 return jop;
00320
00321 unlink_error:
00322 unlink(name);
00323 free_tid(fs, id);
00324 close(fd);
00325
00326 error:
00327 free(name);
00328 free(jop);
00329
00330 return NULL;
00331 }
00332
00334 int journal_add_op(struct journal_op *jop, unsigned char *buf, size_t len,
00335 off_t offset)
00336 {
00337 ssize_t rv;
00338 struct on_disk_ophdr ophdr;
00339 struct iovec iov[2];
00340
00341 ophdr.len = len;
00342 ophdr.offset = offset;
00343 ophdr_hton(&ophdr);
00344
00345 iov[0].iov_base = (void *) &ophdr;
00346 iov[0].iov_len = sizeof(ophdr);
00347 jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr,
00348 sizeof(ophdr));
00349
00350 iov[1].iov_base = (void *) buf;
00351 iov[1].iov_len = len;
00352 jop->csum = checksum_buf(jop->csum, buf, len);
00353
00354 fiu_exit_on("jio/commit/tf_pre_addop");
00355
00356 rv = swritev(jop->fd, iov, 2);
00357 if (rv != sizeof(ophdr) + len)
00358 goto error;
00359
00360 fiu_exit_on("jio/commit/tf_addop");
00361
00362 jop->numops++;
00363
00364 return 0;
00365
00366 error:
00367 return -1;
00368 }
00369
00371 void journal_pre_commit(struct journal_op *jop)
00372 {
00373
00374
00375
00376 sync_range_submit(jop->fd, 0, 0);
00377 }
00378
00380 int journal_commit(struct journal_op *jop)
00381 {
00382 ssize_t rv;
00383 struct on_disk_ophdr ophdr;
00384 struct on_disk_trailer trailer;
00385 struct iovec iov[2];
00386
00387
00388
00389 ophdr.len = 0;
00390 ophdr.offset = 0;
00391 ophdr_hton(&ophdr);
00392 iov[0].iov_base = (void *) &ophdr;
00393 iov[0].iov_len = sizeof(ophdr);
00394 jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr,
00395 sizeof(ophdr));
00396
00397 trailer.checksum = jop->csum;
00398 trailer.numops = jop->numops;
00399 trailer_hton(&trailer);
00400 iov[1].iov_base = (void *) &trailer;
00401 iov[1].iov_len = sizeof(trailer);
00402
00403 rv = swritev(jop->fd, iov, 2);
00404 if (rv != sizeof(ophdr) + sizeof(trailer))
00405 goto error;
00406
00407
00408
00409
00410
00411
00412 if (fsync(jop->fd) != 0)
00413 goto error;
00414 if (fsync_dir(jop->fs->jdirfd) != 0)
00415 goto error;
00416
00417 fiu_exit_on("jio/commit/tf_sync");
00418
00419 return 0;
00420
00421 error:
00422 return -1;
00423 }
00424
00428 int journal_free(struct journal_op *jop, int do_unlink)
00429 {
00430 int rv;
00431
00432 if (!do_unlink) {
00433 rv = 0;
00434 goto exit;
00435 }
00436
00437 rv = -1;
00438
00439 if (unlink(jop->name)) {
00440
00441
00442
00443
00444 if (ftruncate(jop->fd, 0) != 0) {
00445 if (corrupt_journal_file(jop) != 0) {
00446 mark_broken(jop->fs);
00447 goto exit;
00448 }
00449 }
00450 }
00451
00452 if (fsync_dir(jop->fs->jdirfd) != 0) {
00453 mark_broken(jop->fs);
00454 goto exit;
00455 }
00456
00457 fiu_exit_on("jio/commit/pre_ok_free_tid");
00458 free_tid(jop->fs, jop->id);
00459
00460 rv = 0;
00461
00462 exit:
00463 close(jop->fd);
00464
00465 free(jop->name);
00466 free(jop);
00467
00468 return rv;
00469 }
00470
00476 int fill_trans(unsigned char *map, off_t len, struct jtrans *ts)
00477 {
00478 int rv;
00479 unsigned char *p;
00480 struct operation *op, *tmp;
00481 struct on_disk_hdr hdr;
00482 struct on_disk_ophdr ophdr;
00483 struct on_disk_trailer trailer;
00484
00485 rv = -1;
00486
00487 if (len < sizeof(hdr) + sizeof(ophdr) + sizeof(trailer))
00488 return -1;
00489
00490 p = map;
00491
00492 memcpy(&hdr, p, sizeof(hdr));
00493 p += sizeof(hdr);
00494
00495 hdr_ntoh(&hdr);
00496 if (hdr.ver != 1)
00497 return -1;
00498
00499 ts->id = hdr.trans_id;
00500 ts->flags = hdr.flags;
00501 ts->numops_r = 0;
00502 ts->numops_w = 0;
00503 ts->len_w = 0;
00504
00505 for (;;) {
00506 if (p + sizeof(ophdr) > map + len)
00507 goto error;
00508
00509 memcpy(&ophdr, p, sizeof(ophdr));
00510 p += sizeof(ophdr);
00511
00512 ophdr_ntoh(&ophdr);
00513
00514 if (ophdr.len == 0 && ophdr.offset == 0) {
00515
00516 break;
00517 }
00518
00519 if (p + ophdr.len > map + len)
00520 goto error;
00521
00522 op = malloc(sizeof(struct operation));
00523 if (op == NULL)
00524 goto error;
00525
00526 op->len = ophdr.len;
00527 op->offset = ophdr.offset;
00528 op->direction = D_WRITE;
00529
00530 op->buf = (void *) p;
00531 p += op->len;
00532
00533 op->pdata = NULL;
00534
00535 if (ts->op == NULL) {
00536 ts->op = op;
00537 op->prev = NULL;
00538 op->next = NULL;
00539 } else {
00540 for (tmp = ts->op; tmp->next != NULL; tmp = tmp->next)
00541 ;
00542 tmp->next = op;
00543 op->prev = tmp;
00544 op->next = NULL;
00545 }
00546
00547 ts->numops_w++;
00548 ts->len_w += op->len;
00549 }
00550
00551 if (p + sizeof(trailer) > map + len)
00552 goto error;
00553
00554 memcpy(&trailer, p, sizeof(trailer));
00555 p += sizeof(trailer);
00556
00557 trailer_ntoh(&trailer);
00558
00559 if (trailer.numops != ts->numops_w)
00560 goto error;
00561
00562 if (checksum_buf(0, map, len - sizeof(trailer)) != trailer.checksum) {
00563 rv = -2;
00564 goto error;
00565 }
00566
00567 return 0;
00568
00569 error:
00570 while (ts->op != NULL) {
00571 tmp = ts->op->next;
00572 free(ts->op);
00573 ts->op = tmp;
00574 }
00575 return rv;
00576 }
00577