/* * LIB/HISTORY.C * * (c)Copyright 1997, Matthew Dillon, All Rights Reserved. Refer to * the COPYRIGHT file in the base directory of this distribution * for specific rights granted. * * Generally speaking, the history mechanism can be pictured from the * point of view of a reader or from the point of view of a writer. From * the point of view of a reader, A base table lookup begins a hash chain * of history records that runs backwards through the history file. More * recent entries are encountered first, resulting in a certain degree of * locality of reference based on age. * * A writer must scan the chain to ensure that the message-id does not * already exist. The new history record is appended to the history file * and inserted at the base of the hash chain. The file append requires * an exclusive lock to ensure atomic operation (O_APPEND is often not atomic * on heavily loaded systems, the exclusive lock is required). * * In a heavily loaded system, the exclusive lock and append may become a * bottleneck. * * WARNING! offsets stored in history records / hash table index are signed * 32 bits but cast to unsigned in any lseek() operations. The history file * is thus currently limited to 4GB even with 64 bit capable filesystems. * 2GB on linux, 4GB under FreeBSD (which is 64 bit capable). */ #include "defs.h" Prototype int HistoryOpen(const char *fileName, int fastMode); Prototype int HistoryClose(void); Prototype int HistoryLookup(const char *msgid, History *h); Prototype int HistoryLookupByHash(hash_t hv, History *h); Prototype int32 HistoryPosLookupByHash(hash_t hv, History *h); Prototype int HistoryAdd(const char *msgid, History *h); Prototype int NewHSize; #define HBLKINCR 16 #define HBLKSIZE 256 HistHead HHead; HistHead *HHeadMap; uint32 *HAry; int HFd = -1; int LoggedDHistCorrupt; int HFlags; int HSize; int HMask; off_t HScanOff; int NewHSize = 0; int HBlkIncr = HBLKINCR; int HBlkGood; char HistoryFileName[PATH_MAX]; int DoingReOpen = 0; int HistoryOpen(const char *fileName, int hflags) { int fd; struct stat st; int openflags; if (fileName == NULL) fileName = strdup(PatDbExpand(DHistoryPat)); strcpy(HistoryFileName, fileName); HFlags = hflags; if (NewHSize == 0) NewHSize = DOpts.HashSize; /* which may also be 0 */ /* * open the history file */ if (HFlags & HGF_READONLY) openflags = O_RDONLY; else openflags = O_RDWR|O_CREAT; if (HFlags & HGF_EXCHECK && stat(fileName, &st) == 0) { fprintf(stderr, "%s in fast mode already exists - exiting\n", fileName); exit(1); } fd = open(fileName, openflags, 0644); if (fd < 0) { if (DoingReOpen) return(-2); logit(LOG_ERR, "open %s failed (%s)", fileName, strerror(errno)); fprintf(stderr, "open %s failed (%s)", fileName, strerror(errno)); exit(1); } if (fstat(fd, &st) < 0) { logit(LOG_ERR, "fstat %s failed", fileName); exit(1); } /* * initial history file creation, if necessary */ bzero(&HHead, sizeof(HHead)); if (st.st_size == 0 || read(fd, &HHead, sizeof(HHead)) != sizeof(HHead) || HHead.hmagic != HMAGIC ) { /* * Is this history an old one and we need to wait for a new one? */ if (DoingReOpen && HHead.hmagic == HDEADMAGIC) { close(fd); return(-2); } /* * lock after finding the history file to be invalid and recheck. */ hflock(fd, 0, XLOCK_EX); fstat(fd, &st); lseek(fd, 0L, 0); if (st.st_size == 0 || read(fd, &HHead, sizeof(HHead)) != sizeof(HHead) || HHead.hmagic != HMAGIC ) { int n; int b; char *z = calloc(4096, 1); /* * check for old version of history file */ if (st.st_size) { logit(LOG_ERR, "Incompatible history file version or corrupted history file\n"); exit(1); } if (HFlags & HGF_READONLY) { logit(LOG_ERR, "Unable to create %s - read-only mode\n", fileName); fprintf(stderr, "Unable to create %s - read-only mode\n", fileName); exit(1); } /* * create new history file */ logit(LOG_INFO, "Creating history file"); lseek(fd, 0L, 0); ftruncate(fd, 0); bzero(&HHead, sizeof(HHead)); HHead.hashSize = NewHSize; HHead.version = HVERSION; HHead.henSize = sizeof(History); HHead.headSize = sizeof(HHead); write(fd, &HHead, sizeof(HHead)); /* * write out the hash table */ n = 0; b = HHead.hashSize * sizeof(int32); while (n < b) { int r = (b - n > 4096) ? 4096 : b - n; write(fd, z, r); n += r; } fsync(fd); /* * rewrite header with magic number */ lseek(fd, 0L, 0); HHead.hmagic = HMAGIC; write(fd, &HHead, sizeof(HHead)); free(z); logit(LOG_INFO, "History file creation complete"); } hflock(fd, 0, XLOCK_UN); } if (HHead.version < HVERSION) { logit(LOG_ERR, "DHistory version %d, expected %d, use the biweekly adm script to regenerate the history file", HHead.version, HVERSION ); fprintf(stderr, "DHistory version %d, expected %d, use the biweekly adm script to regenerate the history file\n", HHead.version, HVERSION ); exit(1); } if (HHead.version > HVERSION) { logit(LOG_ERR, "DHistory version %d, expected %d, YOU ARE RUNNING AN OLD DIABLO ON A NEW HISTORY FILE!", HHead.version, HVERSION ); exit(1); } /* * Map the history header so that we can check it quickly */ HHeadMap = xmap(NULL, HHead.headSize, PROT_READ, MAP_SHARED, fd, 0); if (HHeadMap == 0) { if (fd >= 0) close(fd); logit(LOG_CRIT, "dhistory header mmap error: %s", strerror(errno)); exit(1); } /* * Map history file */ HSize = HHead.hashSize; HMask = HSize - 1; /* * In FAST mode we leave the history file locked in order to * cache the hash table array at the beginning, which in turn * allows us to essentially append new entries to the end of * the file without having to seek back and forth updating * the hash table. * * When we aren't in FAST mode, we memory-map the hash table * portion of the history file. */ if (HFlags & HGF_FAST && hflock(fd, 0, XLOCK_EX|XLOCK_NB) == -1) { fprintf(stderr, "Unable to lock history file in fast mode (%s)\n", strerror(errno)); exit(1); } if (HFlags & HGF_FAST) { HAry = calloc(HSize, sizeof(int32)); if (HAry == NULL) { perror("calloc"); exit(1); } lseek(fd, HHead.headSize, 0); if (read(fd, HAry, HSize * sizeof(int32)) != HSize * sizeof(int32)) { perror("read"); exit(1); } } else { HAry = xmap(NULL, HSize * sizeof(int32), PROT_READ, MAP_SHARED, fd, HHead.headSize); if (HFlags & HGF_MLOCK) mlock(HAry, HSize * sizeof(int32)); } if (HAry == NULL || HAry == (uint32 *)-1) { if (fd >= 0) close(fd); logit(LOG_CRIT, "dhistory mmap error: %s", strerror(errno)); exit(1); } HFd = fd; HScanOff = HHead.headSize + HHead.hashSize * sizeof(int32); return(0); } /* * On close, we have to commit the hash table if we were in * FAST mode, otherwise we need only unmap the file before * closing it. */ int HistoryClose(void) { int r = RCOK; if (HFd >= 0 && !(HFlags & HGF_READONLY)) { if (HFlags & HGF_FAST) { lseek(HFd, HHead.headSize, 0); if (write(HFd, HAry, HSize * sizeof(int32)) != HSize * sizeof(int32)) r = RCTRYAGAIN; else free(HAry); } else { if (HAry && HAry != (uint32 *)-1) { if (HFlags & HGF_MLOCK) munlock(HAry, HSize * sizeof(int32)); xunmap((void *)HAry, HSize * sizeof(int32)); } } } if (r == RCOK) { if (HFd >= 0) { if (HFlags & HGF_FAST) hflock(HFd, 0, XLOCK_UN); close(HFd); } HFd = -1; HAry = NULL; } return(r); } void historyReOpen(void) { int count = 0; DoingReOpen = 1; HistoryClose(); while (HistoryOpen(HistoryFileName, HFlags) == -2 && count++ < 360) sleep(1); if (count++ >= 360) { fprintf(stderr, "Unable to reopen history file due to old magic\n"); logit(LOG_ERR, "Unable to reopen history file due to old magic"); exit(1); } DoingReOpen = 0; } void printHistory(History *h) { printf(" [hv=%08x.%08x gm=%d ex=%d off=%d len=%d F=%s]\n", h->hv.h1, h->hv.h2, (int)h->gmt, (int)h->exp, (int)h->boffset, (int)h->bsize, ((h->exp & EXPF_HEADONLY) ? "H" : "") ); } int HistoryLookup(const char *msgid, History *nh) { hash_t hv = hhash(msgid); int32 poff = HHead.headSize + ((hv.h1 ^ hv.h2) & HMask) * sizeof(int32); int32 off = HAry[(hv.h1 ^ hv.h2) & HMask]; History h = { 0 }; static int HLAlt; int r = -1; int counter = 0; int statfailed = 0; if (HHeadMap->hmagic != HMAGIC) historyReOpen(); while (off) { lseek(HFd, (off_t)(uint32)off, 0); if (read(HFd, &h, sizeof(h)) != sizeof(h)) { if ((LoggedDHistCorrupt & 1) == 0 || DebugOpt) { LoggedDHistCorrupt |= 1; logit(LOG_ERR, "dhistory file corrupted on lookup @ %d->%d chain %d", (int)poff, (int)off, (int)((hv.h1 ^ hv.h2) & HMask)); sleep(1); } break; } if (h.hv.h1 == hv.h1 && h.hv.h2 == hv.h2) break; poff = off; off = h.next; if (counter++ > 5000) { logit(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d (%s)", (int)poff, (int)off, (int)((hv.h1 ^ hv.h2) & HMask), msgid); off = 0; } } if (off != 0) r = 0; /* * On failure, try alternate hash method (for lookup only) */ if (r < 0 && DOpts.CompatHashMethod >= 0 && HLAlt == 0 && !statfailed) { int save = DOpts.HashMethod; DOpts.HashMethod = DOpts.CompatHashMethod; HLAlt = 1; r = HistoryLookup(msgid, nh); DOpts.HashMethod = save; HLAlt = 0; } if (r == 0 && nh != NULL) memcpy(nh, &h, sizeof(h)); return(r); } int HistoryLookupByHash(hash_t hv, History *h) { int32 poff = HHead.headSize + ((hv.h1 ^ hv.h2) & HMask) * sizeof(int32); int32 off = HAry[(hv.h1 ^ hv.h2) & HMask]; int counter = 0; if (HHeadMap->hmagic != HMAGIC) historyReOpen(); while (off) { lseek(HFd, (off_t)(uint32)off, 0); if (read(HFd, h, sizeof(*h)) != sizeof(*h)) { if ((LoggedDHistCorrupt & 1) == 0 || DebugOpt) { LoggedDHistCorrupt |= 1; logit(LOG_ERR, "dhistory file corrupted on lookup"); sleep(1); } break; } if (h->hv.h1 == hv.h1 && h->hv.h2 == hv.h2) break; poff = off; off = h->next; if (counter++ > 5000) { logit(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d", (int)poff, (int)off, (int)((hv.h1 ^ hv.h2) & HMask)); off = 0; } } if (off != 0) { return(0); } return(-1); } int32 HistoryPosLookupByHash(hash_t hv, History *h) { int32 poff = HHead.headSize + ((hv.h1 ^ hv.h2) & HMask) * sizeof(int32); int32 off = HAry[(hv.h1 ^ hv.h2) & HMask]; int counter = 0; while (off) { lseek(HFd, (off_t)(uint32)off, 0); if (read(HFd, h, sizeof(*h)) != sizeof(*h)) { if ((LoggedDHistCorrupt & 1) == 0 || DebugOpt) { LoggedDHistCorrupt |= 1; logit(LOG_ERR, "dhistory file corrupted on lookup"); sleep(1); } break; } if (h->hv.h1 == hv.h1 && h->hv.h2 == hv.h2) break; poff = off; off = h->next; if (counter++ > 5000) { logit(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d", (int)poff, (int)off, (int)((hv.h1 ^ hv.h2) & HMask)); off = 0; } } if (off != 0) { return(off); } return(-1); } int HistoryAddOld(const char *msgid, History *h) { int32 hi = (h->hv.h1 ^ h->hv.h2) & HMask; int32 poff = HHead.headSize + hi * sizeof(int32); int32 boff = poff; int r = RCOK; if (HHeadMap->hmagic != HMAGIC) historyReOpen(); /* * This should not happen, but if it does, log it and pretend we did it */ if (h->gmt == 0) { logit(LOG_ERR, "Not adding history entry with gmt=0"); return(RCOK); } if (HFlags & HGF_READONLY) return(RCOK); /* * record lock, search for message id * */ if ((HFlags & HGF_FAST) == 0) /* lock hash chain */ hflock(HFd, boff, XLOCK_EX); /* * make sure message is not already in hash table */ { int32 off; off = HAry[hi]; if ((HFlags & HGF_NOSEARCH) == 0) { int counter = 0; while (off) { History ht; lseek(HFd, (off_t)(uint32)off, 0); if (read(HFd, &ht, sizeof(ht)) != sizeof(ht)) { if ((LoggedDHistCorrupt & 2) == 0 || DebugOpt) { LoggedDHistCorrupt |= 2; logit(LOG_ERR, "dhistory file corrupted @ %d", off); sleep(1); } break; } if (ht.hv.h1 == h->hv.h1 && ht.hv.h2 == h->hv.h2) { r = RCALREADY; break; } poff = off; off = ht.next; if (counter++ > 5000) { logit(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d (%s)", (int)poff, (int)off, (int)((ht.hv.h1 ^ ht.hv.h2) & HMask), msgid); off = 0; } } } } /* * If we are ok, reserve space in the history file for the record * and insert the record. This may loop if intermediate scans determine * that we must append a new block to the file. */ while (r == RCOK) { off_t offBeg; off_t offEnd; History hary[HBLKSIZE]; /* * append/scan lock */ if ((HFlags & HGF_FAST) == 0) /* append/scan lock */ hflock(HFd, 4, XLOCK_EX); /* * Figure out scan start & end point. We only look at the last * HBLKSIZE*2 records worth of entries in the worst case. */ offEnd = lseek(HFd, 0L, 2); { off_t n; n = offEnd - HHead.headSize - HHead.hashSize * sizeof(int32); n = n % HHead.henSize; offEnd -= n; } offBeg = HScanOff; if (offBeg > offEnd) offBeg = offEnd; if (offBeg < offEnd - HBLKSIZE * sizeof(History)) { off_t n; offBeg = offEnd - HBLKSIZE * sizeof(History); n = offBeg - HHead.headSize - HHead.hashSize * sizeof(int32); n = n % HHead.henSize; offBeg -= n; } /* * Look for a blank record (gmt value is 0) */ lseek(HFd, offBeg, 0); while (offBeg < offEnd) { int q = (offEnd - offBeg) / sizeof(History); int i; int n; if (q > arysize(hary)) q = arysize(hary); if (q > HBlkIncr) q = HBlkIncr; n = read(HFd, hary, q * sizeof(History)); if (n != q * sizeof(History)) { r = RCTRYAGAIN; break; } for (i = 0; r == RCOK && i < q; ++i) { if (hary[i].gmt == 0 && ((HFlags & HGF_FAST) || hflock(HFd, offBeg, XLOCK_NB|XLOCK_EX) >= 0) ) { break; } offBeg += sizeof(History); } /* * Found, break, or not found, continue. Adjust HBlkIncr * appropriately. */ if (i != q) { if (++HBlkGood == 4) { HBlkGood = 0; if ((HBlkIncr -= HBLKINCR) < HBLKINCR) HBlkIncr = HBLKINCR; } break; } if (offBeg != offEnd) { HBlkGood = 0; if ((HBlkIncr += HBLKINCR) > HBLKSIZE) { HBlkIncr = HBLKSIZE; } } } HScanOff = offBeg; /* * break if something went wrong or if we found the record. With * the proper record explicitly locked, we can release the append * lock. */ if (r != RCOK || offBeg != offEnd) { if ((HFlags & HGF_FAST) == 0) /* append/scan unlock */ hflock(HFd, 4, XLOCK_UN); break; } /* * Otherwise append a new block and retry. XXX put on filesystem * block boundry. */ lseek(HFd, 0L, 2); bzero(hary, sizeof(hary)); write(HFd, hary, sizeof(hary)); if ((HFlags & HGF_FAST) == 0) hflock(HFd, 4, XLOCK_UN); } /* WHILE */ /* * If we did find a blank record, HScanOff is set to it. The record * is locked at this point. * * We insert the record into the chain at the beginning of the * chain, even though we are appending the record. This is for * time-locality of reference and also reduces 'append-to-history' * file overhead by localizing disk writes a bit better. * * XXX next/linkages are 32 bits. At the moment we can go to 4G even * though we are using a signed 32 bit int because we cast seeks to * unsigned 32 bits. */ if (r == RCOK) { int n; h->next = HAry[hi]; lseek(HFd, HScanOff, 0); n = write(HFd, h, sizeof(History)); if ((HFlags & HGF_FAST) == 0) hflock(HFd, HScanOff, XLOCK_UN); if (n == sizeof(History)) { if ((HFlags & HGF_FAST) == 0) { int32 off = (int32)HScanOff; /* for write XXX */ lseek(HFd, HHead.headSize + hi * sizeof(int32), 0); if (write(HFd, &off, sizeof(int32)) != sizeof(int32)) { r = RCTRYAGAIN; } } else { HAry[hi] = (int)HScanOff; } } else { r = RCTRYAGAIN; } } if ((HFlags & HGF_FAST) == 0) /* unlock hash chain */ hflock(HFd, boff, XLOCK_UN); return(r); } /* * This is a new and simplifed HistoryAdd that locks an append lock * (pos 4) and just appends the single history record and unlocks. * It then updates the hash chain (already locked) by adding the * new history entry at the beginning as usual. */ int HistoryAdd(const char *msgid, History *h) { int32 hi = (h->hv.h1 ^ h->hv.h2) & HMask; int32 poff = HHead.headSize + hi * sizeof(int32); int32 boff = poff; int r = RCOK; if (HHeadMap->hmagic != HMAGIC) historyReOpen(); /* * This should not happen, but if it does, log it and pretend we did it */ if (h->gmt == 0) { logit(LOG_ERR, "Not adding history entry with gmt=0"); return(RCOK); } /* * record lock, search for message id * */ if ((HFlags & HGF_FAST) == 0) /* lock hash chain */ hflock(HFd, boff, XLOCK_EX); /* * make sure message is not already in hash table */ { int32 off; off = HAry[hi]; if ((HFlags & HGF_NOSEARCH) == 0) { int counter = 0; while (off) { History ht; lseek(HFd, (off_t)(uint32)off, 0); if (read(HFd, &ht, sizeof(ht)) != sizeof(ht)) { if ((LoggedDHistCorrupt & 2) == 0 || DebugOpt) { LoggedDHistCorrupt |= 2; logit(LOG_ERR, "dhistory file corrupted @ %d", off); sleep(1); } break; } if (ht.hv.h1 == h->hv.h1 && ht.hv.h2 == h->hv.h2) { r = RCALREADY; break; } poff = off; off = ht.next; if (counter++ > 5000) { logit(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d (%s)", (int)poff, (int)off, (int)((ht.hv.h1 ^ ht.hv.h2) & HMask), msgid ? msgid : "no message-id"); off = 0; } } } } while (r == RCOK) { off_t writePos; int n = 0; h->next = HAry[hi]; if ((HFlags & HGF_FAST) == 0) /* append/scan lock */ hflock(HFd, 4, XLOCK_EX); if ((writePos = lseek(HFd, 0L, 2)) == -1) { r = RCTRYAGAIN; break; } HScanOff = writePos; n = write(HFd, h, sizeof(History)); if (n != sizeof(History)) { lseek(HFd, writePos, 0); ftruncate(HFd, writePos); } if ((HFlags & HGF_FAST) == 0) /* append/scan lock */ hflock(HFd, 4, XLOCK_UN); if (n == sizeof(History)) { if ((HFlags & HGF_FAST) == 0) { int32 off = (int32)HScanOff; /* for write XXX */ lseek(HFd, HHead.headSize + hi * sizeof(int32), 0); if (write(HFd, &off, sizeof(int32)) != sizeof(int32)) { r = RCTRYAGAIN; } } else { HAry[hi] = (int)HScanOff; } } else { r = RCTRYAGAIN; } break; } if ((HFlags & HGF_FAST) == 0) /* unlock hash chain */ hflock(HFd, boff, XLOCK_UN); return(r); }