*** lib/global.c.old Fri May 14 16:31:41 1999 --- lib/global.c Fri May 14 16:22:08 1999 *************** *** 39,44 **** --- 39,45 ---- Prototype const char *DHistoryPat; /* db relative */ Prototype const char *SpamCachePat; /* db relative */ Prototype const char *PCommitCachePat; /* db relative */ + Prototype const char *DExpireOverListPat; /* db relative */ Prototype volatile int DebugOpt; Prototype MemPool *SysMemPool; *************** *** 67,72 **** --- 68,74 ---- const char *ModeratorsPat = "%s/moderators"; const char *SpamCachePat = "%s/spam.cache"; const char *PCommitCachePat = "%s/pcommit.cache"; + const char *DExpireOverListPat = "%s/dexpover.dat"; const char *DNewsfeedsPat = "%s/dnewsfeeds"; const char *DNNTPSpoolCtlPat = "%s/dnntpspool.ctl"; const char *DistribDotPatsPat = "%s/distrib.pats"; *** lib/config.c.old Fri May 14 16:32:20 1999 --- lib/config.c Fri May 14 16:26:51 1999 *************** *** 280,285 **** --- 280,287 ---- pptr = &SpamCachePat; else if (strcasecmp(cmd + 5, "pcommit_cache") == 0) pptr = &PCommitCachePat; + else if (strcasecmp(cmd + 5, "dexpover_list") == 0) + pptr = &DExpireOverListPat; else if (strcasecmp(cmd + 5, "dnewsfeeds") == 0) pptr = &DNewsfeedsPat; else if (strcasecmp(cmd + 5, "dnntpspool") == 0) *** util/dexpire.c.old Mon Apr 26 18:59:39 1999 --- util/dexpire.c Fri May 14 17:01:10 1999 *************** *** 14,19 **** --- 14,22 ---- * (c)Copyright 1997, Matthew Dillon, All Rights Reserved. Refer to * the COPYRIGHT file in the base directory of this distribution * for specific rights granted. + * + * Modification by Nickolai Zeldovich to store msgid hashes when + * expiring articles to allow for better overview expiration. */ #include "defs.h" *************** *** 53,58 **** --- 56,63 ---- int IndexOpt = 0; int HistoryUpdateOpt = 1; + int WriteHashesToFileOpt = 0; + int NotEnoughFreeSpace(void); int FindNode(const char *path, int createMe); int UpdateHistory(void); *************** *** 120,125 **** --- 125,133 ---- ptr - 2 ); break; + case 'o': + WriteHashesToFileOpt = 1; + break; case 'h': if (*ptr) UpdateHistoryOpt = strtol(ptr, NULL, 0); *************** *** 344,350 **** --- 352,367 ---- uint32 startGmt = time(NULL) / 60; /* time_t in minutes */ uint32 countExp = 0; + FILE *DExpOverList; + /* + * Write expired article msgid hashes to a file if requested. + */ + + if (WriteHashesToFileOpt == 1) + DExpOverList = fopen(PatDbExpand(DExpireOverListPat), "a"); + + /* * scan all directories in the spool. Expire history records by * directory. We can't expire history records by file anymore * because 'reader mode' expire may create new files with 'old' gmt *************** *** 458,463 **** --- 475,484 ---- 0 ); write(fd, &h->exp, sizeof(h->exp)); + + if (WriteHashesToFileOpt == 1) + fwrite(&h->hv, sizeof(hash_t), 1, DExpOverList); + changed = 1; } ++countExp; *************** *** 469,474 **** --- 490,499 ---- } close(fd); } + + if (WriteHashesToFileOpt == 1) + fclose(DExpOverList); + return(countExp); } *** util/dexpireover.c.old Sun May 9 10:50:04 1999 --- util/dexpireover.c Fri May 21 10:16:19 1999 *************** *** 7,13 **** * for specific rights granted. * * dexpireover [-a] [-v[N]] [-w grpwildcard] [-f dactive.kp] [-NB] [-n] [-O[n]] ! * [-s] [-R] * * In this incarnation, dexpireover cleans up overview information as * specified in the 'x' fields in dexpire.ctl (see the sample dexpire.ctl) --- 7,13 ---- * for specific rights granted. * * dexpireover [-a] [-v[N]] [-w grpwildcard] [-f dactive.kp] [-NB] [-n] [-O[n]] ! * [-s] [-R] [-e] [-o] * * In this incarnation, dexpireover cleans up overview information as * specified in the 'x' fields in dexpire.ctl (see the sample dexpire.ctl) *************** *** 20,25 **** --- 20,28 ---- * I also need to have a remote-server scanning option to allow * dexpireover to adjust expirations based on remote server retentions. * It does not do this yet either. + * + * Modifications by Nickolai Zeldovich to allow spool-based expiration + * (ExpireBySpool and ExpireFromFile) */ #include *************** *** 56,61 **** --- 59,77 ---- int rf_ArtBase; } ReplaceFile; + #define DEXPOVER_READ_BUFFER_SIZE 4096 + #define DEXPOVER_HASH_SIZE 32768 + + /* + * These aren't really buckets, they're parts of a bucket + */ + + typedef struct bucket_t { + struct bucket_t *next; + hash_t hash_item; + short valid; + } bucket_t; + KPDB *KDBActive; Group *GHash[GHSIZE]; *************** *** 66,75 **** --- 82,95 ---- Group *FindGroupByHash(hash_t *hv); int SetField(char **pptr, const char *str); void ExpireByDays(Group *group, int fd, OverHead *oh); + void ExpireBySpool(Group *group, int fd, OverHead *oh); + void ExpireFromFile(Group *group, int fd, OverHead *oh); void RewriteData(Group *group, int fd, OverHead *oh, const char *dirPath, int h1, int h2); void rewriteDataFile(Group *group, ReplaceFile **prf, const char *cacheBase, int cacheSize, const OverArt *oa, OverArt *ob, const char *dirPath, int h1, int h2); void ResizeGroup(Group *group, int fd, OverHead *oh, int maxArts); int nearestPower(int n); + void ReadDExpOverList(void); + int expOverListCheckExpired(hash_t *hv); int UpdateBegArtNoOpt = 0; int UpdateCTSOpt = 0; *************** *** 83,90 **** --- 103,114 ---- int ResizeOpt = -1; int ForReal = 1; int OldGroups = 0; + int UseExpireBySpool = 0; + int UseExpireFromFile = 0; char *Wild; + bucket_t *dexpover_msgid_hash; + int main(int ac, char **av) { *************** *** 134,139 **** --- 158,169 ---- case 'f': dbfile = (*ptr) ? ptr : av[++i]; break; + case 'e': + UseExpireBySpool = 1; + break; + case 'o': + UseExpireFromFile = 1; + break; case 'R': RewriteDataOpt = 1; break; *************** *** 185,190 **** --- 215,234 ---- LoadExpireCtl(1); /* + * Open the history file if we are going to expire based on local spool + */ + + if (UseExpireBySpool) + HistoryOpen(NULL, 0); + + /* + * Read in the list of expired msgid hashes, if we are using it + */ + + if (UseExpireFromFile) + ReadDExpOverList(); + + /* * scan dactive.kp */ *************** *** 297,303 **** ); /* ! * Writeback history file */ if (UpdateBegArtNoOpt || OldGroups || UpdateCTSOpt) { --- 341,347 ---- ); /* ! * Writeback active file */ if (UpdateBegArtNoOpt || OldGroups || UpdateCTSOpt) { *************** *** 374,379 **** --- 418,430 ---- } if (KDBActive) KPDBClose(KDBActive); + + /* + * Close history if we had it open + */ + if (UseExpireBySpool) + HistoryClose(); + return(0); } *************** *** 489,496 **** if (numArts != maxArts || RewriteDataOpt > 0) ResizeGroup(group, fd, &oh, numArts); } - ExpireByDays(group, fd, &oh); /* * Rewrite data files associated with over. file if -R. */ --- 540,553 ---- if (numArts != maxArts || RewriteDataOpt > 0) ResizeGroup(group, fd, &oh, numArts); } + if (UseExpireBySpool) + ExpireBySpool(group, fd, &oh); + else if (UseExpireFromFile) + ExpireFromFile(group, fd, &oh); + else + ExpireByDays(group, fd, &oh); + /* * Rewrite data files associated with over. file if -R. */ *************** *** 733,738 **** --- 790,1029 ---- } /* + * Scan overview records from beginning article to ending article + * + * Expire by checking the history file for the expired bit + */ + + void + ExpireBySpool(Group *group, int fd, OverHead *oh) + { + const OverArt *oaBase; + struct stat st; + int count = 0; + int jumped = 0; + int expired = 0; + int canceled = 0; + int stale = 0; + int n; + + if (fstat(fd, &st) != 0) + return; + + /* + * Calculate number of overview records + */ + + n = (st.st_size - oh->oh_HeadSize) / sizeof(OverArt); + + oaBase = xmap(NULL, n * sizeof(OverArt), PROT_READ, MAP_SHARED, fd, oh->oh_HeadSize); + if (oaBase == NULL) { + fprintf(stderr, "Unable to xmap over.* file for group %s\n", group->gr_GroupName); + return; + } + + /* + * Delete expired overview + */ + + { + int i; + + for (i = 0; i < n; ++i) { + const OverArt *oa = &oaBase[i]; + + if (oa->oa_ArtNo > 0) { + hash_t dh = (hash_t)oa->oa_MsgHash; + History dh_lookup; + + /* + * Make sure that the history entry exists. It's possible + * that dexpire already removed the article, and dhistory + * was cleaned, so the msgID doesn't exist. + * + * If the article does not exist or is expired, then expire + * its overview entry as well. + */ + if ((HistoryLookupByHash(dh, &dh_lookup) == -1) || + (H_EXP(dh_lookup.exp) == H_EXP((unsigned short)-1)) + ) { + OverArt copy = *oa; + + copy.oa_ArtNo = -2; /* EXPIRED */ + if (ForReal) { + lseek(fd, oh->oh_HeadSize + i * sizeof(OverArt), 0); + write(fd, ©, sizeof(OverArt)); + } + ++count; + } + } + } + } + + { + /* + * Update history file begin sequence number + */ + while (group->gr_StartNo < group->gr_EndNo) { + int i = (group->gr_StartNo & 0x7FFFFFFF) % n; + const OverArt *oa = &oaBase[i]; + + if (VerboseOpt > 2) + printf("test %d vs %d (i = %d)\n", oa->oa_ArtNo, group->gr_StartNo, i); + if (oa->oa_ArtNo == group->gr_StartNo) + break; + ++group->gr_StartNo; + switch(oa->oa_ArtNo) { + case -2: + ++expired; + break; + case -1: + ++canceled; + break; + default: + ++stale; + break; + } + ++jumped; + } + if (jumped) + group->gr_State |= GRF_EDITEDBEG | GRF_MODIFIED; + } + if (VerboseOpt && (jumped || count)) { + printf("expired %-4d NB += %-4d (%3d can, %3d stale, %3d exprd) left %-4d expires by spool, grp=%s\n", + count, + jumped, + canceled, stale, expired, + group->gr_EndNo - group->gr_StartNo, + group->gr_GroupName + ); + } + xunmap((void *)oaBase, n * sizeof(OverArt)); + } + + /* + * Similar to ExpireBySpool but uses a file generated by dexpire as a list + * of msgid hashes of messages which are expired. + * + * The 'x' parameter in dexpire.ctl is also checked, to punt articles which + * have been laying in overview for a long time and somehow escaped being + * written to the dexpover.dat file. + */ + + void + ExpireFromFile(Group *group, int fd, OverHead *oh) + { + const OverArt *oaBase; + struct stat st; + int count = 0; + int jumped = 0; + int expired = 0; + int canceled = 0; + int stale = 0; + int n; + int expireSecs = -1; + time_t t = time(NULL); + + if (fstat(fd, &st) != 0) + return; + + /* + * Figure out expireSecs + */ + { + if ((expireSecs = GetOverExpire(group->gr_GroupName)) == 0) + expireSecs = -1; + } + + /* + * Calculate number of overview records + */ + + n = (st.st_size - oh->oh_HeadSize) / sizeof(OverArt); + + oaBase = xmap(NULL, n * sizeof(OverArt), PROT_READ, MAP_SHARED, fd, oh->oh_HeadSize); + if (oaBase == NULL) { + fprintf(stderr, "Unable to xmap over.* file for group %s\n", group->gr_GroupName); + return; + } + + /* + * Delete expired overview + */ + + { + int i; + + for (i = 0; i < n; ++i) { + const OverArt *oa = &oaBase[i]; + + if (oa->oa_ArtNo > 0) { + int dt = (int)(t - oa->oa_TimeRcvd); + hash_t *hv = (hash_t *)(&(oa->oa_MsgHash)); + + if (VerboseOpt > 2) + printf("DT %d/%d %08lx %08lx\n", dt, expireSecs, (long)t, (long)oa->oa_TimeRcvd); + + if ((expOverListCheckExpired(hv) == 0) || + ((expireSecs > 0) && + (dt > expireSecs || dt < -(60 * 60 * 24))) + ) { + OverArt copy = *oa; + + copy.oa_ArtNo = -2; /* EXPIRED */ + if (ForReal) { + lseek(fd, oh->oh_HeadSize + i * sizeof(OverArt), 0); + write(fd, ©, sizeof(OverArt)); + } + ++count; + } + } + } + } + + { + /* + * Update history file begin sequence number + */ + while (group->gr_StartNo < group->gr_EndNo) { + int i = (group->gr_StartNo & 0x7FFFFFFF) % n; + const OverArt *oa = &oaBase[i]; + + if (VerboseOpt > 2) + printf("test %d vs %d (i = %d)\n", oa->oa_ArtNo, group->gr_StartNo, i); + if (oa->oa_ArtNo == group->gr_StartNo) + break; + ++group->gr_StartNo; + switch(oa->oa_ArtNo) { + case -2: + ++expired; + break; + case -1: + ++canceled; + break; + default: + ++stale; + break; + } + ++jumped; + } + if (jumped) + group->gr_State |= GRF_EDITEDBEG | GRF_MODIFIED; + } + if (VerboseOpt && (jumped || count)) { + printf("expired %-4d NB += %-4d (%3d can, %3d stale, %3d exprd) left %-4d expires in %6.2f days, grp=%s\n", + count, + jumped, + canceled, stale, expired, + group->gr_EndNo - group->gr_StartNo, + ((expireSecs>0) ? (double)expireSecs / (60.0 * 60.0 * 24.0) :-1.0), + group->gr_GroupName + ); + } + xunmap((void *)oaBase, n * sizeof(OverArt)); + } + + /* * Rewrite the data.* files associated with an overview file */ *************** *** 1082,1084 **** --- 1373,1443 ---- return(i); } + void + ReadDExpOverList() + { + FILE *DExpOverList; + hash_t read_buffer[DEXPOVER_READ_BUFFER_SIZE]; + int i, n; + char path[128]; + + dexpover_msgid_hash = + (struct bucket_t *)malloc(DEXPOVER_HASH_SIZE * sizeof(struct bucket_t)); + for(i=0; ivalid == 1) && (chain->next != NULL)) + chain = chain->next; + + if(chain->valid == 1) { + chain->next = (struct bucket_t *)malloc(sizeof(struct bucket_t)); + chain = chain->next; + } + + chain->valid = 1; + chain->hash_item = read_buffer[i]; + chain->next = NULL; + } + } + + fclose(DExpOverList); + remove(path); + } + + int + expOverListCheckExpired(hash_t *hv) + { + int hashval; + bucket_t *chain; + + hashval = (hv->h1)&(DEXPOVER_HASH_SIZE-1); + chain = &dexpover_msgid_hash[hashval]; + + while(chain && chain->valid) { + if((chain->hash_item.h1 == hv->h1) && + (chain->hash_item.h2 == hv->h2)) { + return 0; + } + chain = chain->next; + } + + return -1; + } + *** man/dexpire.8.old Fri May 14 16:30:56 1999 --- man/dexpire.8 Fri May 14 16:29:06 1999 *************** *** 20,25 **** --- 20,28 ---- [ .B \-n ] + [ + .B \-o + ] .SH DESCRIPTION .IR DExpire *************** *** 68,73 **** --- 71,83 ---- Tell dexpire to fake it... don't actually do anything destructive. Tell the user what dexpire would have done. .PP + .B \-o + .PP + Tell dexpire to write the expired message IDs to a file for later + use by dexpireover when expiring overview information. Writes to + file specified by path_dexpover_list in diablo.config, defaults to + %s/dexpover.dat, path_db based. + .PP .SH "SEE ALSO" diablo(8), *** man/dexpireover.8.old Sun May 9 11:07:05 1999 --- man/dexpireover.8 Fri May 14 19:39:26 1999 *************** *** 33,38 **** --- 33,44 ---- .B \-s ] [ + .B \-e + ] + [ + .B \-o + ] + [ .B \-O[n] ] *************** *** 49,54 **** --- 55,63 ---- in the dexpire.ctl file. If the -a or -NB options are given, it will update the active file. dexpireover is usually run with just the -a option. .PP + If the -e option is given, dexpireover will expire articles based on what + articles are left in your local spool. This requires a local spool. + .PP .B \-a .PP This option turns on -NB, -U, -s, and, in the future, will also turn on *************** *** 116,121 **** --- 125,156 ---- suffices for a daily expiration, but 'dexpireover -R -a' should be used once a week to clean up the tailings. See samples/adm/weekly.reader .PP + .B \-e + .PP + Tell dexpireover that spool-based expiration should be used. This option + will cause dexpireover to look up every message stored in the overview database + in the history file, and expire the article if the history entry is marked + expired. This requires that your reader machine carries a local spool, + and only articles in your local spool will be left in overview. You should + not specify this option and -o at the same time. + .PP + .B \-o + .PP + Tell dexpireover that it should use the file specified by path_dexpover_list + in diablo.config (path_db based) to decide which articles should be removed + from overview. This is similar to the spool-based expiration method, but does + not perform history lookups on every article, and can be faster. This also + requires that you run dexpire with the -o option. + .PP + The 'x' option in dexpire.ctl will also be used to safeguard against stale + articles which somehow avoided being written to dexpover.dat and remain in + overview. You should probably set your 'x' value high in order to avoid + removing valid articles. + .PP + The dexpover.dat file will be removed after being read in by dexpireover. + .PP + You should not specify this option and -e at the same time. + .PP .B \-O[n] .PP Request that dexpireover delete groups which have not had new articles in 'n'