--- src/afs/afs.h 2001/11/13 17:24:05 1.17 +++ src/afs/afs.h 2001/11/16 05:13:53 @@ -505,7 +505,6 @@ #endif #define CUnique 0x00001000 /* vc's uniquifier - latest unifiquier for fid */ #define CForeign 0x00002000 /* this is a non-afs vcache */ -#define CHasPages 0x00004000 #define CUnlinked 0x00010000 #define CBulkStat 0x00020000 /* loaded by a bulk stat, and not ref'd since */ #define CUnlinkedDel 0x00040000 @@ -610,19 +609,19 @@ #ifdef AFS_DARWIN_ENV struct lock__bsd__ rwlock; #endif - afs_int32 parentVnode; /* Parent dir, if a file. */ + afs_int32 parentVnode; /* Parent dir, if a file. */ afs_int32 parentUnique; struct VenusFid *mvid; /* Either parent dir (if root) or root (if mt pt) */ char *linkData; /* Link data if a symlink. */ - afs_hyper_t flushDV; /* data version last flushed from text */ + afs_hyper_t flushDV; /* data version last flushed from text */ afs_hyper_t mapDV; /* data version last flushed from map */ - afs_size_t truncPos; /* truncate file to this position at next store */ + afs_size_t truncPos; /* truncate file to this position at next store */ struct server *callback; /* The callback host, if any */ - afs_uint32 cbExpires; /* time the callback expires */ + afs_uint32 cbExpires; /* time the callback expires */ struct afs_q callsort; /* queue in expiry order, sort of */ struct axscache *Access; /* a list of cached access bits */ - afs_int32 anyAccess; /* System:AnyUser's access to this. */ - afs_int32 last_looker; /* pag/uid from last lookup here */ + afs_int32 anyAccess; /* System:AnyUser's access to this. */ + afs_int32 last_looker; /* pag/uid from last lookup here */ #if defined(AFS_SUN5_ENV) afs_int32 activeV; #endif /* defined(AFS_SUN5_ENV) */ @@ -778,11 +777,12 @@ /* struct dcache states bits */ #define DWriting 8 /* file being written (used for cache validation) */ -/* dcache flags */ -#define DFNextStarted 1 /* next chunk has been prefetched already */ -#define DFEntryMod 2 /* has entry itself been modified? */ -#define DFFetching 4 /* file is currently being fetched */ -#define DFWaiting 8 /* someone waiting for file */ +/* dcache data flags */ +#define DFEntryMod 0x02 /* has entry itself been modified? */ +#define DFFetching 0x04 /* file is currently being fetched */ + +/* dcache meta flags */ +#define DFNextStarted 0x01 /* next chunk has been prefetched already */ #define DFFetchReq 0x10 /* someone is waiting for DFFetching to go on */ @@ -866,13 +866,47 @@ /* kept in memory */ struct dcache { struct afs_q lruq; /* Free queue for in-memory images */ - afs_rwlock_t lock; /* XXX */ + afs_rwlock_t lock; /* Protects validPos, some f */ + afs_rwlock_t tlock; /* Atomizes updates to refCount */ + afs_rwlock_t mflock; /* Atomizes accesses/updates to mflags */ afs_size_t validPos; /* number of valid bytes during fetch */ - afs_int32 index; /* The index in the CacheInfo file*/ + afs_int32 index; /* The index in the CacheInfo file*/ short refCount; /* Associated reference count. */ - short flags; /* more flags bits */ + char dflags; /* Data flags */ + char mflags; /* Meta flags */ struct fcache f; /* disk image */ - afs_int32 stamp; /* used with vtodc struct for hints */ + afs_int32 stamp; /* used with vtodc struct for hints */ + + /* + * Locking rules: + * + * dcache.lock protects the actual contents of the cache file (in + * f.inode), subfields of f except those noted below, dflags and + * validPos. + * + * dcache.tlock is used to make atomic updates to refCount. Zero + * refCount dcache entries are protected by afs_xdcache instead of + * tlock. + * + * dcache.mflock is used to access and update mflags. It cannot be + * held without holding the corresponding dcache.lock. Updating + * mflags requires holding dcache.lock(R) and dcache.mflock(W), and + * checking for mflags requires dcache.lock(R) and dcache.mflock(R). + * Note that dcache.lock(W) gives you the right to update mflags, + * as dcache.mflock(W) can only be held with dcache.lock(R). + * + * dcache.stamp is protected by the associated vcache lock, because + * it's only purpose is to establish correspondence between vcache + * and dcache entries. + * + * dcache.index, dcache.f.fid, dcache.f.chunk and dcache.f.inode are + * write-protected by afs_xdcache and read-protected by refCount. + * Once an entry is referenced, these values cannot change, and if + * it's on the free list (with refCount=0), it can be reused for a + * different file/chunk. These values can only be written while + * holding afs_xdcache(W) and allocating this dcache entry (thereby + * ensuring noone else has a refCount on it). + */ }; /* this is obsolete and should be removed */ #define ihint stamp @@ -882,15 +916,9 @@ do { \ (x)->f.fid.Fid.Unique = 0; \ afs_indexUnique[(x)->index] = 0; \ - (x)->flags |= DFEntryMod; \ + (x)->dflags |= DFEntryMod; \ } while(0) -/* - * Convenient release macro for use when afs_PutDCache would cause - * deadlock on afs_xdcache lock - */ -#define lockedPutDCache(ad) ((ad)->refCount--) - /* FakeOpen and Fake Close used to be real subroutines. They're only used in * sun_subr and afs_vnodeops, and they're very frequently called, so I made * them into macros. They do: @@ -978,7 +1006,7 @@ extern struct conn *afs_Conn(); extern struct conn *afs_ConnByHost(); extern struct conn *afs_ConnByMHosts(); -extern afs_int32 afs_NewCell(); +extern afs_int32 afs_NewCell(); extern struct dcache *afs_GetDCache(); extern struct dcache *afs_FindDCache(); extern struct dcache *afs_NewDCache(); --- src/afs/afs_cbqueue.c 2001/08/08 00:03:28 1.5 +++ src/afs/afs_cbqueue.c 2001/11/16 05:13:53 @@ -289,10 +289,14 @@ { struct dcache * adc; + ObtainReadLock(&adc->tlock); + ObtainReadLock(&afs_xdcache); if ((adc = tvc->quick.dc) && (adc->stamp == tvc->quick.stamp) && (afs_indexTimes[adc->index] > afs_indexCounter - 20)) { queue up the stat request } + ReleaseReadLock(&afs_xdcache); + ReleaseReadLock(&adc->tlock); } */ --- src/afs/afs_cell.c 2001/11/13 20:31:31 1.14 +++ src/afs/afs_cell.c 2001/11/16 05:13:53 @@ -79,6 +79,7 @@ struct cell *afs_GetCellByName2(); +struct cell *afs_GetCellInternal(); int afs_strcasecmp(s1, s2) register char *s1, *s2; --- src/afs/afs_daemons.c 2001/11/10 23:00:55 1.12 +++ src/afs/afs_daemons.c 2001/11/16 05:13:53 @@ -433,16 +433,17 @@ * be waiting for our wakeup anyway. */ tdc = (struct dcache *) (ab->ptr_parm[0]); - tdc->flags &= ~DFFetchReq; + ObtainSharedLock(&tdc->lock, 640); + if (tdc->mflags & DFFetchReq) { + UpgradeSToWLock(&tdc->lock, 641); + tdc->mflags &= ~DFFetchReq; + ReleaseWriteLock(&tdc->lock); + } else { + ReleaseSharedLock(&tdc->lock); + } afs_osi_Wakeup(&tdc->validPos); if (ab->size_parm[1]) { -#ifdef AFS_SUN5_ENVX - mutex_enter(&tdc->lock); - tdc->refCount--; - mutex_exit(&tdc->lock); -#else afs_PutDCache(tdc); /* put this one back, too */ -#endif } } @@ -1348,7 +1349,7 @@ afs_sgibklist = NULL; SPUNLOCK(afs_sgibklock, s); AFS_GLOCK(); - tdc->flags &= ~DFEntryMod; + tdc->dflags &= ~DFEntryMod; afs_WriteDCache(tdc, 1); AFS_GUNLOCK(); s = SPLOCK(afs_sgibklock); --- src/afs/afs_dcache.c 2001/11/13 14:47:11 1.14 +++ src/afs/afs_dcache.c 2001/11/16 05:13:53 @@ -25,6 +25,7 @@ static void afs_GetDownD(int anumber, int *aneedSpace); static void afs_FreeDiscardedDCache(void); static void afs_DiscardDCache(struct dcache *); +static void afs_FreeDCache(struct dcache *); /* Imported variables */ extern afs_rwlock_t afs_xvcache; @@ -85,7 +86,7 @@ int afs_TruncateDaemonRunning = 0; int afs_CacheTooFull = 0; -afs_int32 afs_dcentries; /* In-memory dcache entries */ +afs_int32 afs_dcentries; /* In-memory dcache entries */ int dcacheDisabled = 0; @@ -355,7 +356,7 @@ AFS_STATCNT(afs_AdjustSize); - adc->flags |= DFEntryMod; + adc->dflags |= DFEntryMod; oldSize = ((adc->f.chunkBytes + afs_fsfragsize)^afs_fsfragsize)>>10;/* round up */ adc->f.chunkBytes = newSize; newSize = ((newSize + afs_fsfragsize)^afs_fsfragsize)>>10;/* round up */ @@ -494,8 +495,16 @@ /* first, hold all the victims, since we're going to release the lock * during the truncate operation. */ - for(i=0; i < victimPtr; i++) - victimDCs[i] = afs_GetDSlot(victims[i], 0); + for(i=0; i < victimPtr; i++) { + tdc = afs_GetDSlot(victims[i], 0); + /* We got tdc->tlock(R) here */ + if (tdc->refCount == 1) + victimDCs[i] = tdc; + else + victimDCs[i] = 0; + ReleaseReadLock(&tdc->tlock); + if (!victimDCs[i]) afs_PutDCache(tdc); + } for(i = 0; i < victimPtr; i++) { /* q is first elt in dcache entry */ tdc = victimDCs[i]; @@ -505,7 +514,7 @@ * compare with 1, since we bumped it above when we called * afs_GetDSlot to preserve the entry's identity. */ - if (tdc->refCount == 1) { + if (tdc && tdc->refCount == 1) { unsigned char chunkFlags; afs_size_t tchunkoffset; afid = &tdc->f.fid; @@ -651,11 +660,7 @@ j = 1; /* we reclaimed at least one victim */ } } -#ifdef AFS_SUN5_ENVX afs_PutDCache(tdc); -#else - tdc->refCount--; /* put it back */ -#endif } if (phase == 0) { @@ -747,7 +752,7 @@ adc->f.fid.Fid.Volume = 0; /* invalid */ /* mark entry as modified */ - adc->flags |= DFEntryMod; + adc->dflags |= DFEntryMod; /* all done */ return 0; @@ -814,8 +819,9 @@ * * Environment: called with afs_xdcache lock write-locked. */ -afs_FreeDCache(adc) -register struct dcache *adc; { +static void afs_FreeDCache(adc) + register struct dcache *adc; +{ /* Thread on free list, update free list count and mark entry as * freed in its indexFlags element. Also, ensure DCache entry gets * written out (set DFEntryMod). @@ -825,7 +831,7 @@ afs_freeDCList = adc->index; afs_freeDCCount++; afs_indexFlags[adc->index] |= IFFree; - adc->flags |= DFEntryMod; + adc->dflags |= DFEntryMod; if (afs_WaitForCacheDrain) { if ((afs_blocksUsed - afs_blocksDiscarded) <= @@ -834,7 +840,6 @@ afs_osi_Wakeup(&afs_WaitForCacheDrain); } } - return 0; } /* @@ -852,6 +857,9 @@ * * Parameters: * adc : Ptr to dcache entry. + * + * Environment: + * Must be called with afs_xdcache write-locked. */ static void @@ -863,6 +871,9 @@ register afs_int32 size; AFS_STATCNT(afs_DiscardDCache); + + osi_Assert(adc->refCount == 1); + size = ((adc->f.chunkBytes + afs_fsfragsize)^afs_fsfragsize)>>10;/* round up */ afs_blocksDiscarded += size; afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded; @@ -872,7 +883,7 @@ afs_discardDCCount++; adc->f.fid.Fid.Volume = 0; - adc->flags |= DFEntryMod; + adc->dflags |= DFEntryMod; afs_indexFlags[adc->index] |= IFDiscarded; if (afs_WaitForCacheDrain) { @@ -910,12 +921,17 @@ * Get an entry from the list of discarded cache elements */ tdc = afs_GetDSlot(afs_discardDCList, 0); + osi_Assert(tdc->refCount == 1); + ReleaseReadLock(&tdc->tlock); + afs_discardDCList = afs_dvnextTbl[tdc->index]; afs_dvnextTbl[tdc->index] = NULLIDX; afs_discardDCCount--; size = ((tdc->f.chunkBytes + afs_fsfragsize)^afs_fsfragsize)>>10;/* round up */ afs_blocksDiscarded -= size; afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded; + /* We can lock because we just took it off the free list */ + ObtainWriteLock(&tdc->lock, 626); MReleaseWriteLock(&afs_xdcache); /* @@ -932,7 +948,8 @@ MObtainWriteLock(&afs_xdcache,511); afs_indexFlags[tdc->index] &= ~IFDiscarded; afs_FreeDCache(tdc); - tdc->refCount--; + ReleaseWriteLock(&tdc->lock); + afs_PutDCache(tdc); MReleaseWriteLock(&afs_xdcache); } @@ -966,6 +983,9 @@ * * Parameters: * anumber : Targeted number of disk slots to free up. + * + * Environment: + * Must be called with afs_xdcache write-locked. */ #if defined(AFS_SGI_ENV) && defined(AFS_SGI_SHORTSTACK) extern SV_TYPE afs_sgibksync; @@ -1009,12 +1029,12 @@ QRemove(&tdc->lruq); /* write-through if modified */ - if (tdc->flags & DFEntryMod) { + if (tdc->dflags & DFEntryMod) { #if defined(AFS_SGI_ENV) && defined(AFS_SGI_SHORTSTACK) /* * ask proxy to do this for us - we don't have the stack space */ - while (tdc->flags & DFEntryMod) { + while (tdc->dflags & DFEntryMod) { int s; AFS_GUNLOCK(); s = SPLOCK(afs_sgibklock); @@ -1028,7 +1048,7 @@ AFS_GLOCK(); } #else - tdc->flags &= ~DFEntryMod; + tdc->dflags &= ~DFEntryMod; afs_WriteDCache(tdc, 1); #endif } @@ -1056,6 +1076,32 @@ } /*afs_GetDownDSlot*/ +/* + * afs_RefDCache + * + * Description: + * Increment the reference count on a disk cache entry, + * which already has a non-zero refcount. In order to + * increment the refcount of a zero-reference entry, you + * have to hold afs_xdcache. + * + * Parameters: + * adc : Pointer to the dcache entry to increment. + * + * Environment: + * Nothing interesting. + */ +afs_RefDCache(adc) + struct dcache *adc; +{ + ObtainWriteLock(&adc->tlock, 627); + if (adc->refCount < 0) + osi_Panic("RefDCache: negative refcount"); + adc->refCount++; + ReleaseWriteLock(&adc->tlock); + return 0; +} + /* * afs_PutDCache @@ -1069,22 +1115,16 @@ * Environment: * Nothing interesting. */ -afs_PutDCache(ad) - register struct dcache *ad; +afs_PutDCache(adc) + register struct dcache *adc; { /*afs_PutDCache*/ AFS_STATCNT(afs_PutDCache); -#ifndef AFS_SUN5_ENVX - MObtainWriteLock(&afs_xdcache,276); -#endif - if (ad->refCount <= 0) + ObtainWriteLock(&adc->tlock, 276); + if (adc->refCount <= 0) osi_Panic("putdcache"); - --ad->refCount; -#ifdef AFS_SUN5_ENVX - MReleaseWriteLock(&ad->lock); -#else - MReleaseWriteLock(&afs_xdcache); -#endif + --adc->refCount; + ReleaseWriteLock(&adc->tlock); return 0; } /*afs_PutDCache*/ @@ -1135,17 +1175,21 @@ for(index = afs_dvhashTbl[i]; index != NULLIDX; index=i) { i = afs_dvnextTbl[index]; /* next pointer this hash table */ if (afs_indexUnique[index] == avc->fid.Fid.Unique) { + int releaseTlock = 1; tdc = afs_GetDSlot(index, (struct dcache *)0); if (!FidCmp(&tdc->f.fid, &avc->fid)) { if (sync) { if ((afs_indexFlags[index] & IFDataMod) == 0 && tdc->refCount == 1) { + ReleaseReadLock(&tdc->tlock); + releaseTlock = 0; afs_FlushDCache(tdc); } } else afs_indexTable[index] = 0; } - lockedPutDCache(tdc); + if (releaseTlock) ReleaseReadLock(&tdc->tlock); + afs_PutDCache(tdc); } } #if defined(AFS_SUN5_ENV) @@ -1206,10 +1250,11 @@ for(index = afs_dchashTbl[i]; index != NULLIDX;) { if (afs_indexUnique[index] == avc->fid.Fid.Unique) { tdc = afs_GetDSlot(index, (struct dcache *)0); + ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->fid) && chunk == tdc->f.chunk) { break; /* leaving refCount high for caller */ } - lockedPutDCache(tdc); + afs_PutDCache(tdc); } index = afs_dcnextTbl[index]; } @@ -1289,17 +1334,12 @@ osi_FreeLargeSpace(tbuffer); return EIO; } - afs_Trace1(afs_iclSetp, CM_TRACE_STOREPROC2, ICL_TYPE_INT32, got); -if (got == 0) printf("StoreProc: got == 0\n"); -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + afs_Trace1(afs_iclSetp, CM_TRACE_STOREPROC2, ICL_TYPE_INT32, got); + RX_AFS_GUNLOCK(); code = rx_Write(acall, tbuffer, got); /* writing 0 bytes will * push a short packet. Is that really what we want, just because the * data didn't come back from the disk yet? Let's try it and see. */ -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); #ifndef AFS_NOSTATS (*abytesXferredP) += code; #endif /* AFS_NOSTATS */ @@ -1338,7 +1378,7 @@ * acall : Ptr to the Rx call structure. * afile : File descriptor for the cache file. * abase : Base offset to fetch. - * adc : Ptr to the dcache entry for the file. + * adc : Ptr to the dcache entry for the file, write-locked. * avc : Ptr to the vcache entry for the file. * abytesToXferP : Set to the number of bytes to xfer. * NOTE: This parameter is only used if AFS_NOSTATS @@ -1369,6 +1409,7 @@ int moredata = 0; AFS_STATCNT(UFS_CacheFetchProc); + osi_Assert(WriteLocked(&adc->lock)); afile->offset = 0; /* Each time start from the beginning */ length = lengthFound; #ifndef AFS_NOSTATS @@ -1378,14 +1419,10 @@ tbuffer = osi_AllocLargeSpace(AFS_LRALLOCSIZ); do { if (moredata) { -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); code = rx_Read(acall, (char *)&length, sizeof(afs_int32)); + RX_AFS_GLOCK(); length = ntohl(length); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ if (code != sizeof(afs_int32)) { osi_FreeLargeSpace(tbuffer); code = rx_Error(acall); @@ -1412,13 +1449,9 @@ #endif /* AFS_NOSTATS */ while (length > 0) { tlen = (length > AFS_LRALLOCSIZ ? AFS_LRALLOCSIZ : length); -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); code = rx_Read(acall, tbuffer, tlen); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); #ifndef AFS_NOSTATS (*abytesXferredP) += code; #endif /* AFS_NOSTATS */ @@ -1437,15 +1470,12 @@ abase += tlen; length -= tlen; adc->validPos = abase; - if (adc->flags & DFWaiting) { - afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, - ICL_TYPE_STRING, __FILE__, - ICL_TYPE_INT32, __LINE__, - ICL_TYPE_POINTER, adc, - ICL_TYPE_INT32, adc->flags); - adc->flags &= ~DFWaiting; - afs_osi_Wakeup(&adc->validPos); - } + afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, + ICL_TYPE_STRING, __FILE__, + ICL_TYPE_INT32, __LINE__, + ICL_TYPE_POINTER, adc, + ICL_TYPE_INT32, adc->dflags); + afs_osi_Wakeup(&adc->validPos); } } while (moredata); osi_FreeLargeSpace(tbuffer); @@ -1489,10 +1519,12 @@ struct AFSCallBack CallBack; }; -/* these fields are protected by the lock on the vcache and luck - * on the dcache */ -void updateV2DC(int l, struct vcache *v, struct dcache *d, int src) { - if (!l || 0 == NBObtainWriteLock(&(v->lock),src)) { +/* + * Update the vnode-to-dcache hint if we can get the vnode lock + * right away. Assumes dcache entry is at least read-locked. + */ +void updateV2DC(int lockVc, struct vcache *v, struct dcache *d, int src) { + if (!lockVc || 0 == NBObtainWriteLock(&v->lock,src)) { if (hsame(v->m.DataVersion, d->f.versionNo) && v->callback) { v->quick.dc = d; v->quick.stamp = d->stamp = MakeStamp(); @@ -1501,12 +1533,12 @@ v->quick.len = d->f.chunkBytes; v->h1.dchint = d; } - if(l) ReleaseWriteLock(&((v)->lock)); + if (lockVc) ReleaseWriteLock(&v->lock); } } struct dcache *afs_GetDCache(avc, abyte, areq, aoffset, alen, aflags) - register struct vcache *avc; /*Held*/ + register struct vcache *avc; /* Write-locked unless aflags & 1 */ afs_size_t abyte; afs_size_t *aoffset, *alen; int aflags; @@ -1527,14 +1559,20 @@ #endif /* AFS_64BIT_CLIENT */ afs_int32 size, tlen; /* size of segment to transfer */ afs_size_t lengthFound; /* as returned from server */ - struct tlocal1 *tsmall; + struct tlocal1 *tsmall = 0; register struct dcache *tdc; register struct osi_file *file; register struct conn *tc; int downDCount = 0; + struct server *newCallback; + char setNewCallback; + char setVcacheStatus; + char doVcacheUpdate; + char slowPass = 0; int doAdjustSize = 0; int doReallyAdjustSize = 0; int overWriteWholeChunk = 0; + XSTATS_DECLS #ifndef AFS_NOSTATS struct afs_stats_xferData *xferP; /* Ptr to this op's xfer struct */ @@ -1552,6 +1590,8 @@ if (dcacheDisabled) return NULL; + setLocks = aflags & 1; + /* * Determine the chunk number and offset within the chunk corresponding * to the desired byte. @@ -1563,17 +1603,42 @@ chunk = AFS_CHUNK(abyte); } - setLocks = aflags & 1; - /* come back to here if we waited for the cache to drain. */ - RetryGetDCache: +RetryGetDCache: + + setNewCallback = setVcacheStatus = 0; + + if (setLocks) { + if (slowPass) + ObtainWriteLock(&avc->lock, 616); + else + ObtainReadLock(&avc->lock); + } + + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + */ + shortcut = 0; /* check hints first! (might could use bcmp or some such...) */ - if (tdc = avc->h1.dchint) { + int dcLocked; + + /* + * The locking order between afs_xdcache and dcache lock matters. + * The hint dcache entry could be anywhere, even on the free list. + * Locking afs_xdcache ensures that noone is trying to pull dcache + * entries from the free list, and thereby assuming them to be not + * referenced and not locked. + */ MObtainReadLock(&afs_xdcache); - if ( (tdc->index != NULLIDX) && !FidCmp(&tdc->f.fid, &avc->fid) && + dcLocked = (0 == NBObtainSharedLock(&tdc->lock, 601)); + + if (dcLocked && + (tdc->index != NULLIDX) && !FidCmp(&tdc->f.fid, &avc->fid) && chunk == tdc->f.chunk && !(afs_indexFlags[tdc->index] & (IFFree|IFDiscarded))) { /* got the right one. It might not be the right version, and it @@ -1582,113 +1647,164 @@ /* All this code should be integrated better with what follows: * I can save a good bit more time under a write lock if I do.. */ - /* does avc need to be locked? */ - /* Note that the race labeled LOCKXXX is inconsequential: the xdcache - * lock protects both the dcache slots AND the DLRU list. While - * the slots and hash table and DLRU list all may change in the race, - * THIS particular dcache structure cannot be recycled and its LRU - * pointers must still be valid once we get the lock again. Still - * we should either create another lock or invent a new method of - * managing dcache structs -- CLOCK or something. */ - shortcut = 1; -#ifdef AFS_SUN5_ENVX - MObtainWriteLock(&tdc->lock,279); -#endif + ObtainWriteLock(&tdc->tlock, 603); tdc->refCount++; - if (hsame(tdc->f.versionNo, avc->m.DataVersion) - && !(tdc->flags & DFFetching)) { - afs_stats_cmperf.dcacheHits++; - MReleaseReadLock(&afs_xdcache); + ReleaseWriteLock(&tdc->tlock); - MObtainWriteLock(&afs_xdcache, 559); /* LOCKXXX */ + MReleaseReadLock(&afs_xdcache); + shortcut = 1; + + if (hsame(tdc->f.versionNo, avc->m.DataVersion) && + !(tdc->dflags & DFFetching)) { + + afs_stats_cmperf.dcacheHits++; + MObtainWriteLock(&afs_xdcache, 559); QRemove(&tdc->lruq); QAdd(&afs_DLRU, &tdc->lruq); MReleaseWriteLock(&afs_xdcache); + + /* Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(S) + */ goto done; } -#ifdef AFS_SUN5_ENVX - MReleaseWriteLock(&tdc->lock); -#endif + } else { + if (dcLocked) ReleaseSharedLock(&tdc->lock); + MReleaseReadLock(&afs_xdcache); } - MReleaseReadLock(&afs_xdcache); + if (!shortcut) tdc = 0; } - if (!tdc) { - /* - * Hash on the [fid, chunk] and get the corresponding dcache index - * after write-locking the dcache. - */ - RetryLookup: - i = DCHash(&avc->fid, chunk); - afs_MaybeWakeupTruncateDaemon(); /* check to make sure our space is fine */ - MObtainWriteLock(&afs_xdcache,280); - us = NULLIDX; - for(index = afs_dchashTbl[i]; index != NULLIDX;) { - if (afs_indexUnique[index] == avc->fid.Fid.Unique) { - tdc = afs_GetDSlot(index, (struct dcache *)0); - if (!FidCmp(&tdc->f.fid, &avc->fid) && chunk == tdc->f.chunk) { - /* Move it up in the beginning of the list */ - if (afs_dchashTbl[i] != index) { - afs_dcnextTbl[us] = afs_dcnextTbl[index]; - afs_dcnextTbl[index] = afs_dchashTbl[i]; - afs_dchashTbl[i] = index; - } - MReleaseWriteLock(&afs_xdcache); - break; /* leaving refCount high for caller */ + + /* Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(S) if tdc + */ + + if (!tdc) { /* If the hint wasn't the right dcache entry */ + /* + * Hash on the [fid, chunk] and get the corresponding dcache index + * after write-locking the dcache. + */ +RetryLookup: + + /* Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + */ + + i = DCHash(&avc->fid, chunk); + /* check to make sure our space is fine */ + afs_MaybeWakeupTruncateDaemon(); + + MObtainWriteLock(&afs_xdcache,280); + us = NULLIDX; + for (index = afs_dchashTbl[i]; index != NULLIDX; ) { + if (afs_indexUnique[index] == avc->fid.Fid.Unique) { + tdc = afs_GetDSlot(index, (struct dcache *)0); + ReleaseReadLock(&tdc->tlock); + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * afs_xdcache(W) + */ + if (!FidCmp(&tdc->f.fid, &avc->fid) && chunk == tdc->f.chunk) { + /* Move it up in the beginning of the list */ + if (afs_dchashTbl[i] != index) { + afs_dcnextTbl[us] = afs_dcnextTbl[index]; + afs_dcnextTbl[index] = afs_dchashTbl[i]; + afs_dchashTbl[i] = index; + } + MReleaseWriteLock(&afs_xdcache); + ObtainSharedLock(&tdc->lock, 606); + break; /* leaving refCount high for caller */ + } + afs_PutDCache(tdc); + tdc = 0; } - tdc->refCount--; /* was incremented by afs_GetDSlot */ - tdc = 0; - } - us = index; - index = afs_dcnextTbl[index]; - } - /* - * If we didn't find the entry, we'll create one. - */ - if (index == NULLIDX) { - afs_Trace2(afs_iclSetp, CM_TRACE_GETDCACHE1, ICL_TYPE_POINTER, avc, - ICL_TYPE_INT32, chunk); + us = index; + index = afs_dcnextTbl[index]; + } + /* + * If we didn't find the entry, we'll create one. + */ + if (index == NULLIDX) { + /* + * Locks held: + * avc->lock(R) if setLocks + * avc->lock(W) if !setLocks + * afs_xdcache(W) + */ + afs_Trace2(afs_iclSetp, CM_TRACE_GETDCACHE1, ICL_TYPE_POINTER, + avc, ICL_TYPE_INT32, chunk); + + /* Make sure there is a free dcache entry for us to use */ if (afs_discardDCList == NULLIDX && afs_freeDCList == NULLIDX) { - while (1) { + while (1) { if (!setLocks) avc->states |= CDCLock; afs_GetDownD(5, (int*)0); /* just need slots */ - if (!setLocks) avc->states &= (~CDCLock); + if (!setLocks) avc->states &= ~CDCLock; if (afs_discardDCList != NULLIDX || afs_freeDCList != NULLIDX) - break; + break; /* If we can't get space for 5 mins we give up and panic */ if (++downDCount > 300) - osi_Panic("getdcache"); - MReleaseWriteLock(&afs_xdcache); + osi_Panic("getdcache"); + MReleaseWriteLock(&afs_xdcache); + /* + * Locks held: + * avc->lock(R) if setLocks + * avc->lock(W) if !setLocks + */ afs_osi_Wait(1000, 0, 0); goto RetryLookup; - } + } } + if (afs_discardDCList == NULLIDX || - ((aflags & 2) && afs_freeDCList != NULLIDX)) { - afs_indexFlags[afs_freeDCList] &= ~IFFree; - tdc = afs_GetDSlot(afs_freeDCList, 0); - afs_freeDCList = afs_dvnextTbl[tdc->index]; - afs_freeDCCount--; + ((aflags & 2) && afs_freeDCList != NULLIDX)) { + + afs_indexFlags[afs_freeDCList] &= ~IFFree; + tdc = afs_GetDSlot(afs_freeDCList, 0); + osi_Assert(tdc->refCount == 1); + ReleaseReadLock(&tdc->tlock); + ObtainWriteLock(&tdc->lock, 604); + afs_freeDCList = afs_dvnextTbl[tdc->index]; + afs_freeDCCount--; } else { - afs_indexFlags[afs_discardDCList] &= ~IFDiscarded; - tdc = afs_GetDSlot(afs_discardDCList, 0); - afs_discardDCList = afs_dvnextTbl[tdc->index]; - afs_discardDCCount--; - size = ((tdc->f.chunkBytes + afs_fsfragsize)^afs_fsfragsize)>>10; - afs_blocksDiscarded -= size; - afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded; - if (aflags & 2) { + afs_indexFlags[afs_discardDCList] &= ~IFDiscarded; + tdc = afs_GetDSlot(afs_discardDCList, 0); + osi_Assert(tdc->refCount == 1); + ReleaseReadLock(&tdc->tlock); + ObtainWriteLock(&tdc->lock, 605); + afs_discardDCList = afs_dvnextTbl[tdc->index]; + afs_discardDCCount--; + size = ((tdc->f.chunkBytes + afs_fsfragsize)^afs_fsfragsize)>>10; + afs_blocksDiscarded -= size; + afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded; + if (aflags & 2) { /* Truncate the chunk so zeroes get filled properly */ file = afs_CFileOpen(tdc->f.inode); afs_CFileTruncate(file, 0); afs_CFileClose(file); afs_AdjustSize(tdc, 0); - } + } } - + /* + * Locks held: + * avc->lock(R) if setLocks + * avc->lock(W) if !setLocks + * tdc->lock(W) + * afs_xdcache(W) + */ + + /* * Fill in the newly-allocated dcache record. */ afs_indexFlags[tdc->index] &= ~(IFDirtyPages | IFAnyPages); @@ -1699,6 +1815,7 @@ tdc->validPos = AFS_CHUNKTOBASE(chunk); /* XXX */ if (tdc->lruq.prev == &tdc->lruq) osi_Panic("lruq 1"); + /* * Now add to the two hash chains - note that i is still set * from the above DCHash call. @@ -1708,23 +1825,31 @@ i = DVHash(&avc->fid); afs_dvnextTbl[tdc->index] = afs_dvhashTbl[i]; afs_dvhashTbl[i] = tdc->index; - tdc->flags = DFEntryMod; + tdc->dflags = DFEntryMod; + tdc->mflags = 0; tdc->f.states = 0; afs_MaybeWakeupTruncateDaemon(); MReleaseWriteLock(&afs_xdcache); - } - } /* else hint failed... */ + ConvertWToSLock(&tdc->lock); + } + } /* vcache->dcache hint failed */ + + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(S) + */ afs_Trace4(afs_iclSetp, CM_TRACE_GETDCACHE2, ICL_TYPE_POINTER, avc, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, hgetlo(tdc->f.versionNo), ICL_TYPE_INT32, hgetlo(avc->m.DataVersion)); /* - * Here we have the unlocked entry in tdc, with its refCount - * incremented. Note: we don't use the S-lock; it costs concurrency - * when storing a file back to the server. + * Here we have the entry in tdc, with its refCount incremented. + * Note: we don't use the S-lock on avc; it costs concurrency when + * storing a file back to the server. */ - if (setLocks) ObtainReadLock(&avc->lock); /* * Not a newly created file so we need to check the file's length and @@ -1735,7 +1860,7 @@ */ size = AFS_CHUNKSIZE(abyte); if (aflags & 4) /* called from write */ - tlen = *alen; + tlen = *alen; else /* called from read */ tlen = tdc->validPos - abyte; Position = AFS_CHUNKTOBASE(chunk); @@ -1768,41 +1893,22 @@ #endif #endif /* defined(AFS_AIX32_ENV) || defined(AFS_SGI_ENV) */ !hsame(avc->m.DataVersion, tdc->f.versionNo)) - doReallyAdjustSize = 1; + doReallyAdjustSize = 1; + if (doReallyAdjustSize || overWriteWholeChunk) { - doReallyAdjustSize = 0; /* no data in file to read at this position */ - if (setLocks) { - ReleaseReadLock(&avc->lock); - ObtainWriteLock(&avc->lock,64); - } - /* check again, now that we have a write lock */ -#if defined(AFS_AIX32_ENV) || defined(AFS_SGI_ENV) - if (AFS_CHUNKTOBASE(chunk)+adjustsize >= avc->m.Length && -#else -#if defined(AFS_SUN_ENV) || defined(AFS_OSF_ENV) - if ((doAdjustSize || (AFS_CHUNKTOBASE(chunk) >= avc->m.Length)) && -#else - if (AFS_CHUNKTOBASE(chunk) >= avc->m.Length && -#endif -#endif - !hsame(avc->m.DataVersion, tdc->f.versionNo)) - doReallyAdjustSize = 1; - if (doReallyAdjustSize || overWriteWholeChunk) { - file = afs_CFileOpen(tdc->f.inode); - afs_CFileTruncate(file, 0); - afs_CFileClose(file); - afs_AdjustSize(tdc, 0); - hset(tdc->f.versionNo, avc->m.DataVersion); - tdc->flags |= DFEntryMod; - } - if (setLocks) { - ReleaseWriteLock(&avc->lock); - ObtainReadLock(&avc->lock); - } + UpgradeSToWLock(&tdc->lock, 607); + + file = afs_CFileOpen(tdc->f.inode); + afs_CFileTruncate(file, 0); + afs_CFileClose(file); + afs_AdjustSize(tdc, 0); + hset(tdc->f.versionNo, avc->m.DataVersion); + tdc->dflags |= DFEntryMod; + + ConvertWToSLock(&tdc->lock); } } - if (setLocks) ReleaseReadLock(&avc->lock); /* * We must read in the whole chunk if the version number doesn't @@ -1810,9 +1916,12 @@ */ if (aflags & 2) { /* don't need data, just a unique dcache entry */ + ObtainWriteLock(&afs_xdcache, 608); hset(afs_indexTimes[tdc->index], afs_indexCounter); hadd32(afs_indexCounter, 1); - updateV2DC(setLocks,avc,tdc,567); + ReleaseWriteLock(&afs_xdcache); + + updateV2DC(setLocks, avc, tdc, 553); if (vType(avc) == VDIR) *aoffset = abyte; else @@ -1821,19 +1930,37 @@ *alen = (afs_size_t) 0; else *alen = tdc->validPos - abyte; + ReleaseSharedLock(&tdc->lock); + if (setLocks) { + if (slowPass) + ReleaseWriteLock(&avc->lock); + else + ReleaseReadLock(&avc->lock); + } return tdc; /* check if we're done */ } - osi_Assert(setLocks || WriteLocked(&avc->lock)); - if (setLocks) ObtainReadLock(&avc->lock); + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(S) + */ + osi_Assert((setLocks && !slowPass) || WriteLocked(&avc->lock)); + + setNewCallback = setVcacheStatus = 0; + + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(S) + */ if (!hsame(avc->m.DataVersion, tdc->f.versionNo) && !overWriteWholeChunk) { /* * Version number mismatch. */ - if (setLocks) { - ReleaseReadLock(&avc->lock); - ObtainWriteLock(&avc->lock,65); - } + UpgradeSToWLock(&tdc->lock, 609); /* * If data ever existed for this vnode, and this is a text object, @@ -1853,32 +1980,52 @@ /* * By here, the cache entry is always write-locked. We can * deadlock if we call osi_Flush with the cache entry locked... + * Unlock the dcache too. */ - ReleaseWriteLock(&avc->lock); + ReleaseWriteLock(&tdc->lock); + if (setLocks && !slowPass) + ReleaseReadLock(&avc->lock); + else + ReleaseWriteLock(&avc->lock); + osi_FlushText(avc); /* * Call osi_FlushPages in open, read/write, and map, since it * is too hard here to figure out if we should lock the * pvnLock. */ - ObtainWriteLock(&avc->lock,66); + if (setLocks && !slowPass) + ObtainReadLock(&avc->lock); + else + ObtainWriteLock(&avc->lock, 66); + ObtainWriteLock(&tdc->lock, 610); } + + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(W) + */ - /* Watch for standard race condition */ + /* Watch for standard race condition around osi_FlushText */ if (hsame(avc->m.DataVersion, tdc->f.versionNo)) { - updateV2DC(0,avc,tdc,569); /* set hint */ - if (setLocks) ReleaseWriteLock(&avc->lock); - afs_stats_cmperf.dcacheHits++; + updateV2DC(setLocks, avc, tdc, 569); /* set hint */ + afs_stats_cmperf.dcacheHits++; + ConvertWToSLock(&tdc->lock); goto done; } /* Sleep here when cache needs to be drained. */ - if (setLocks && + if (setLocks && !slowPass && (afs_blocksUsed > (CM_WAITFORDRAINPCT*afs_cacheBlocks)/100)) { /* Make sure truncate daemon is running */ afs_MaybeWakeupTruncateDaemon(); + ObtainWriteLock(&tdc->tlock, 614); tdc->refCount--; /* we'll re-obtain the dcache when we re-try. */ - ReleaseWriteLock(&avc->lock); + ReleaseWriteLock(&tdc->tlock); + ReleaseWriteLock(&tdc->lock); + ReleaseReadLock(&avc->lock); while ((afs_blocksUsed-afs_blocksDiscarded) > (CM_WAITFORDRAINPCT*afs_cacheBlocks)/100) { afs_WaitForCacheDrain = 1; @@ -1887,7 +2034,7 @@ afs_MaybeFreeDiscardedDCache(); /* need to check if someone else got the chunk first. */ goto RetryGetDCache; - } + } /* Do not fetch data beyond truncPos. */ maxGoodLength = avc->m.Length; @@ -1937,13 +2084,14 @@ file = afs_CFileOpen(tdc->f.inode); afs_RemoveVCB(&avc->fid); tdc->f.states |= DWriting; - tdc->flags |= DFFetching; + tdc->dflags |= DFFetching; tdc->validPos = Position; /* which is AFS_CHUNKBASE(abyte) */ - if (tdc->flags & DFFetchReq) { - tdc->flags &= ~DFFetchReq; + if (tdc->mflags & DFFetchReq) { + tdc->mflags &= ~DFFetchReq; afs_osi_Wakeup(&tdc->validPos); } tsmall = (struct tlocal1 *) osi_AllocLargeSpace(sizeof(struct tlocal1)); + setVcacheStatus = 0; #ifndef AFS_NOSTATS /* * Remember if we are doing the reading from a replicated volume, @@ -1994,6 +2142,13 @@ * Not a dynamic vnode: do the real fetch. */ if (size) do { + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(W) + */ + tc = afs_Conn(&avc->fid, areq, SHARED_LOCK); if (tc) { afs_int32 length_hi, length, bytes; @@ -2003,50 +2158,39 @@ (accP->numReplicasAccessed)++; #endif /* AFS_NOSTATS */ - avc->callback = tc->srvr->server; - ConvertWToSLock(&avc->lock); + if (!setLocks || slowPass) { + avc->callback = tc->srvr->server; + } else { + newCallback = tc->srvr->server; + setNewCallback = 1; + } i = osi_Time(); -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); tcall = rx_NewCall(tc->id); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ - + RX_AFS_GLOCK(); XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_FETCHDATA); #ifdef AFS_64BIT_CLIENT length_hi = code = 0; if (!afs_serverHasNo64Bit(tc)) { tsize = size; -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); code = StartRXAFS_FetchData64(tcall, (struct AFSFid *) &avc->fid.Fid, Position, tsize); if (code != 0) { -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); } else { bytes = rx_Read(tcall, (char *)&length_hi, sizeof(afs_int32)); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); if (bytes == sizeof(afs_int32)) { length_hi = ntohl(length_hi); } else { length_hi = 0; code = rx_Error(tcall); -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); code1 = rx_EndCall(tcall, code); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); afs_Trace2(afs_iclSetp, CM_TRACE_FETCH64CODE, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, code); tcall = (struct rx_call *) 0; @@ -2059,27 +2203,19 @@ } else { afs_int32 pos; pos = Position; -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); if (!tcall) tcall = rx_NewCall(tc->id); code = StartRXAFS_FetchData(tcall, (struct AFSFid *) &avc->fid.Fid, pos, size); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); } afs_serverSetNo64Bit(tc); } if (code == 0) { -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); bytes = rx_Read(tcall, (char *)&length, sizeof(afs_int32)); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); if (bytes == sizeof(afs_int32)) { length = ntohl(length); } else { @@ -2091,23 +2227,15 @@ ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, code, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(lengthFound)); #else /* AFS_64BIT_CLIENT */ -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); code = StartRXAFS_FetchData(tcall, (struct AFSFid *) &avc->fid.Fid, Position, size); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); if (code == 0) { -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); bytes = rx_Read(tcall, (char *)&length, sizeof(afs_int32)); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); if (bytes == sizeof(afs_int32)) { length = ntohl(length); } else { @@ -2137,10 +2265,10 @@ if (bytesXferred > xferP->maxBytes) xferP->maxBytes = bytesXferred; - /* - * Tally the size of the object. Note: we tally the actual size, - * NOT the number of bytes that made it out over the wire. - */ + /* + * Tally the size of the object. Note: we tally the actual size, + * NOT the number of bytes that made it out over the wire. + */ if (bytesToXfer <= AFS_STATS_MAXBYTES_BUCKET0) (xferP->count[0])++; else @@ -2176,33 +2304,24 @@ if (afs_stats_TimeGreaterThan(elapsedTime, (xferP->maxTime))) { afs_stats_TimeAssign((xferP->maxTime), elapsedTime); } - } + } #else code = afs_CacheFetchProc(tcall, file, Position, tdc, avc, 0, 0, length); #endif /* AFS_NOSTATS */ } if (code == 0) { -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); code = EndRXAFS_FetchData(tcall, &tsmall->OutStatus, &tsmall->CallBack, &tsmall->tsync); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); } XSTATS_END_TIME; -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); if (tcall) code1 = rx_EndCall(tcall, code); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ - UpgradeSToWLock(&avc->lock,27); + RX_AFS_GLOCK(); } else { code = -1; @@ -2216,27 +2335,51 @@ * when we started. */ /* * validPos is updated by CacheFetchProc, and can only be - * modifed under an S or W lock, which we've blocked out + * modifed under a dcache write lock, which we've blocked out */ size = tdc->validPos - Position; /* actual segment size */ if (size < 0) size = 0; afs_CFileTruncate(file, size); /* prune it */ + } + else { + if (!setLocks || slowPass) { + ObtainWriteLock(&afs_xcbhash, 453); + afs_DequeueCallback(avc); + avc->states &= ~(CStatd | CUnique); + avc->callback = (struct server *)0; + ReleaseWriteLock(&afs_xcbhash); + if (avc->fid.Fid.Vnode & 1 || (vType(avc) == VDIR)) + osi_dnlc_purgedp(avc); + } else { + /* Something lost. Forget about performance, and go + * back with a vcache write lock. + */ + afs_CFileTruncate(file, 0); + afs_AdjustSize(tdc, 0); + afs_CFileClose(file); + osi_FreeLargeSpace(tsmall); + tsmall = 0; + ReleaseWriteLock(&tdc->lock); + afs_PutDCache(tdc); + tdc = 0; + ReleaseReadLock(&avc->lock); + slowPass = 1; + goto RetryGetDCache; + } } - else { - ObtainWriteLock(&afs_xcbhash, 453); - afs_DequeueCallback(avc); - avc->states &= ~(CStatd | CUnique); - avc->callback = (struct server *)0; - ReleaseWriteLock(&afs_xcbhash); - if (avc->fid.Fid.Vnode & 1 || (vType(avc) == VDIR)) - osi_dnlc_purgedp(avc); - } } while (afs_Analyze(tc, code, &avc->fid, areq, AFS_STATS_FS_RPCIDX_FETCHDATA, SHARED_LOCK, (struct cell *)0)); + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(W) + */ + #ifndef AFS_NOSTATS /* * In the case of replicated access, jot down info on the number of @@ -2250,19 +2393,19 @@ } #endif /* AFS_NOSTATS */ - tdc->flags &= ~DFFetching; - if (tdc->flags & DFWaiting) { - afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, - ICL_TYPE_STRING, __FILE__, - ICL_TYPE_INT32, __LINE__, - ICL_TYPE_POINTER, tdc, - ICL_TYPE_INT32, tdc->flags); - tdc->flags &= ~DFWaiting; - afs_osi_Wakeup(&tdc->validPos); - } + tdc->dflags &= ~DFFetching; + afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, + ICL_TYPE_STRING, __FILE__, + ICL_TYPE_INT32, __LINE__, + ICL_TYPE_POINTER, tdc, + ICL_TYPE_INT32, tdc->dflags); + afs_osi_Wakeup(&tdc->validPos); if (avc->execsOrWriters == 0) tdc->f.states &= ~DWriting; - /* now, if code != 0, we have an error and should punt */ + /* now, if code != 0, we have an error and should punt. + * note that we have the vcache write lock, either because + * !setLocks or slowPass. + */ if (code) { afs_CFileTruncate(file, 0); afs_AdjustSize(tdc, 0); @@ -2271,19 +2414,19 @@ if (vType(avc) == VDIR) { DZap(&tdc->f.inode); } -#ifdef AFS_SUN5_ENVX + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); -#else - tdc->refCount--; -#endif ObtainWriteLock(&afs_xcbhash, 454); afs_DequeueCallback(avc); avc->states &= ~( CStatd | CUnique ); ReleaseWriteLock(&afs_xcbhash); if (avc->fid.Fid.Vnode & 1 || (vType(avc) == VDIR)) osi_dnlc_purgedp(avc); - if (setLocks) ReleaseWriteLock(&avc->lock); - osi_FreeLargeSpace(tsmall); + /* + * Locks held: + * avc->lock(W); assert(!setLocks || slowPass) + */ + osi_Assert(!setLocks || slowPass); tdc = (struct dcache *) 0; goto done; } @@ -2292,28 +2435,36 @@ afs_CFileClose(file); afs_AdjustSize(tdc, size); /* new size */ /* - * Copy appropriate fields into vcache + * Copy appropriate fields into vcache. Status is + * copied later where we selectively acquire the + * vcache write lock. */ - afs_ProcessFS(avc, &tsmall->OutStatus, areq); + if (slowPass) + afs_ProcessFS(avc, &tsmall->OutStatus, areq); + else + setVcacheStatus = 1; hset64(tdc->f.versionNo, tsmall->OutStatus.dataVersionHigh, tsmall->OutStatus.DataVersion); - tdc->flags |= DFEntryMod; + tdc->dflags |= DFEntryMod; afs_indexFlags[tdc->index] |= IFEverUsed; - if (setLocks) ReleaseWriteLock(&avc->lock); - osi_FreeLargeSpace(tsmall); + ConvertWToSLock(&tdc->lock); } /*Data version numbers don't match*/ else { /* - * Data version numbers match. Release locks if we locked - * them, and remember we've had a cache hit. + * Data version numbers match. */ - if (setLocks) - ReleaseReadLock(&avc->lock); afs_stats_cmperf.dcacheHits++; } /*Data version numbers match*/ - updateV2DC(setLocks,avc,tdc,332); /* set hint */ + updateV2DC(setLocks, avc, tdc, 335); /* set hint */ done: /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + * tdc->lock(S) if tdc + */ + + /* * See if this was a reference to a file in the local cell. */ if (avc->fid.Cell == LOCALCELL) @@ -2324,19 +2475,84 @@ /* Fix up LRU info */ if (tdc) { - hset(afs_indexTimes[tdc->index], afs_indexCounter); - hadd32(afs_indexCounter, 1); + MObtainWriteLock(&afs_xdcache, 602); + hset(afs_indexTimes[tdc->index], afs_indexCounter); + hadd32(afs_indexCounter, 1); + MReleaseWriteLock(&afs_xdcache); - /* return the data */ - if (vType(avc) == VDIR) + /* return the data */ + if (vType(avc) == VDIR) *aoffset = abyte; - else + else *aoffset = AFS_CHUNKOFFSET(abyte); - *alen = *aoffset + tdc->f.chunkBytes - abyte; + *alen = *aoffset + tdc->f.chunkBytes - abyte; + ReleaseSharedLock(&tdc->lock); } - return tdc; + /* + * Locks held: + * avc->lock(R) if setLocks && !slowPass + * avc->lock(W) if !setLocks || slowPass + */ + + /* Fix up the callback and status values in the vcache */ + doVcacheUpdate = 0; + if (setLocks && !slowPass) { + /* DCLOCKXXX + * + * This is our dirty little secret to parallel fetches. + * We don't write-lock the vcache while doing the fetch, + * but potentially we'll need to update the vcache after + * the fetch is done. + * + * Drop the read lock and try to re-obtain the write + * lock. If the vcache still has the same DV, it's + * ok to go ahead and install the new data. + */ + afs_hyper_t currentDV, statusDV; + + hset(currentDV, avc->m.DataVersion); + + if (setNewCallback && avc->callback != newCallback) + doVcacheUpdate = 1; + + if (tsmall) { + hset64(statusDV, tsmall->OutStatus.dataVersionHigh, + tsmall->OutStatus.DataVersion); + + if (setVcacheStatus && avc->m.Length != tsmall->OutStatus.Length) + doVcacheUpdate = 1; + if (setVcacheStatus && !hsame(currentDV, statusDV)) + doVcacheUpdate = 1; + } + + ReleaseReadLock(&avc->lock); + + if (doVcacheUpdate) { + ObtainWriteLock(&avc->lock, 615); + if (!hsame(avc->m.DataVersion, currentDV)) { + /* We lose. Someone will beat us to it. */ + doVcacheUpdate = 0; + ReleaseWriteLock(&avc->lock); + } + } + } + + /* With slow pass, we've already done all the updates */ + if (slowPass) { + ReleaseWriteLock(&avc->lock); + } + + /* Check if we need to perform any last-minute fixes with a write-lock */ + if (!setLocks || doVcacheUpdate) { + if (setNewCallback) avc->callback = newCallback; + if (tsmall) afs_ProcessFS(avc, &tsmall->OutStatus, areq); + if (setLocks) ReleaseWriteLock(&avc->lock); + } + if (tsmall) osi_FreeLargeSpace(tsmall); + + return tdc; } /*afs_GetDCache*/ @@ -2360,20 +2576,68 @@ register struct dcache *tdc; register afs_int32 i, touchedit=0; + struct dcache **ents; + int entmax, entcount; AFS_STATCNT(afs_WriteThroughDSlots); + + /* + * Because of lock ordering, we can't grab dcache locks while + * holding afs_xdcache. So we enter xdcache, get a reference + * for every dcache entry, and exit xdcache. + */ MObtainWriteLock(&afs_xdcache,283); + entmax = afs_cacheFiles; + ents = afs_osi_Alloc(entmax * sizeof(struct dcache *)); + entcount = 0; for(i = 0; i < afs_cacheFiles; i++) { tdc = afs_indexTable[i]; - if (tdc && (tdc->flags & DFEntryMod)) { - tdc->flags &= ~DFEntryMod; - afs_WriteDCache(tdc, 1); - touchedit = 1; + + /* Grab tlock in case the existing refcount isn't zero */ + if (tdc && !(afs_indexFlags[i] & (IFFree | IFDiscarded))) { + ObtainWriteLock(&tdc->tlock, 623); + tdc->refCount++; + ReleaseWriteLock(&tdc->tlock); + + ents[entcount++] = tdc; + } + } + MReleaseWriteLock(&afs_xdcache); + + /* + * Now, for each dcache entry we found, check if it's dirty. + * If so, get write-lock, get afs_xdcache, which protects + * afs_cacheInodep, and flush it. Don't forget to put back + * the refcounts. + */ + for (i = 0; i < entcount; i++) { + tdc = ents[i]; + + if (tdc->dflags & DFEntryMod) { + int wrLock; + + wrLock = (0 == NBObtainWriteLock(&tdc->lock, 619)); + + /* Now that we have the write lock, double-check */ + if (wrLock && (tdc->dflags & DFEntryMod)) { + tdc->dflags &= ~DFEntryMod; + MObtainWriteLock(&afs_xdcache, 620); + afs_WriteDCache(tdc, 1); + MReleaseWriteLock(&afs_xdcache); + touchedit = 1; + } + if (wrLock) ReleaseWriteLock(&tdc->lock); } + + afs_PutDCache(tdc); } + afs_osi_Free(ents, entmax * sizeof(struct dcache *)); + + MObtainWriteLock(&afs_xdcache, 617); if (!touchedit && (cacheDiskType != AFS_FCACHE_TYPE_MEM)) { - /* Touch the file to make sure that the mtime on the file is kept up-to-date - * to avoid losing cached files on cold starts because their mtime seems old... + /* Touch the file to make sure that the mtime on the file is kept + * up-to-date to avoid losing cached files on cold starts because + * their mtime seems old... */ struct afs_fheader theader; @@ -2392,14 +2656,14 @@ * * Description: * Return a pointer to an freshly initialized dcache entry using - * a memory-based cache. + * a memory-based cache. The tlock will be read-locked. * * Parameters: * aslot : Dcache slot to look at. * tmpdc : Ptr to dcache entry. * * Environment: - * Nothing interesting. + * Must be called with afs_xdcache write-locked. */ struct dcache *afs_MemGetDSlot(aslot, tmpdc) @@ -2411,6 +2675,7 @@ register afs_int32 code; register struct dcache *tdc; register char *tfile; + int existing = 0; AFS_STATCNT(afs_MemGetDSlot); if (CheckLock(&afs_xdcache) != -1) osi_Panic("getdslot nolock"); @@ -2419,7 +2684,10 @@ if (tdc) { QRemove(&tdc->lruq); /* move to queue head */ QAdd(&afs_DLRU, &tdc->lruq); + /* We're holding afs_xdcache, but get tlock in case refCount != 0 */ + ObtainWriteLock(&tdc->tlock, 624); tdc->refCount++; + ConvertWToRLock(&tdc->tlock); return tdc; } if (tmpdc == (struct dcache *)0) { @@ -2434,8 +2702,10 @@ } else { tdc = afs_freeDSList; afs_freeDSList = (struct dcache *) tdc->lruq.next; + existing = 1; } - tdc->flags = 0; /* up-to-date, not in free q */ + tdc->dflags = 0; /* up-to-date, not in free q */ + tdc->mflags = 0; QAdd(&afs_DLRU, &tdc->lruq); if (tdc->lruq.prev == &tdc->lruq) osi_Panic("lruq 3"); } @@ -2450,10 +2720,21 @@ tdc->f.chunk = -1; hones(tdc->f.versionNo); tdc->f.inode = aslot; - tdc->flags |= DFEntryMod; + tdc->dflags |= DFEntryMod; tdc->refCount = 1; tdc->index = aslot; afs_indexUnique[aslot] = tdc->f.fid.Fid.Unique; + + if (existing) { + osi_Assert(0 == NBObtainWriteLock(&tdc->lock, 674)); + osi_Assert(0 == NBObtainWriteLock(&tdc->mflock, 675)); + osi_Assert(0 == NBObtainWriteLock(&tdc->tlock, 676)); + } + + RWLOCK_INIT(&tdc->lock, "dcache lock"); + RWLOCK_INIT(&tdc->tlock, "dcache tlock"); + RWLOCK_INIT(&tdc->mflock, "dcache flock"); + ObtainReadLock(&tdc->tlock); if (tmpdc == (struct dcache *)0) afs_indexTable[aslot] = tdc; @@ -2468,7 +2749,7 @@ * * Description: * Return a pointer to an freshly initialized dcache entry using - * a UFS-based disk cache. + * a UFS-based disk cache. The dcache tlock will be read-locked. * * Parameters: * aslot : Dcache slot to look at. @@ -2485,18 +2766,19 @@ register afs_int32 code; register struct dcache *tdc; + int existing = 0; AFS_STATCNT(afs_UFSGetDSlot); if (CheckLock(&afs_xdcache) != -1) osi_Panic("getdslot nolock"); if (aslot < 0 || aslot >= afs_cacheFiles) osi_Panic("getdslot slot"); tdc = afs_indexTable[aslot]; if (tdc) { -#ifdef AFS_SUN5_ENVX - mutex_enter(&tdc->lock); -#endif QRemove(&tdc->lruq); /* move to queue head */ QAdd(&afs_DLRU, &tdc->lruq); + /* Grab tlock in case refCount != 0 */ + ObtainWriteLock(&tdc->tlock, 625); tdc->refCount++; + ConvertWToRLock(&tdc->tlock); return tdc; } /* otherwise we should read it in from the cache file */ @@ -2516,8 +2798,10 @@ } else { tdc = afs_freeDSList; afs_freeDSList = (struct dcache *) tdc->lruq.next; + existing = 1; } - tdc->flags = 0; /* up-to-date, not in free q */ + tdc->dflags = 0; /* up-to-date, not in free q */ + tdc->mflags = 0; QAdd(&afs_DLRU, &tdc->lruq); if (tdc->lruq.prev == &tdc->lruq) osi_Panic("lruq 3"); } @@ -2527,9 +2811,6 @@ tdc->ihint = 0; } -#ifdef AFS_SUN5_ENVX - mutex_enter(&tdc->lock); -#endif /* * Seek to the aslot'th entry and read it in. */ @@ -2540,7 +2821,7 @@ tdc->f.fid.Fid.Volume = 0; tdc->f.chunk = -1; hones(tdc->f.versionNo); - tdc->flags |= DFEntryMod; + tdc->dflags |= DFEntryMod; #if !defined(AFS_SUN5_ENV) && !defined(AFS_OSF_ENV) && !defined(AFS_SGI64_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_DARWIN_ENV) && !defined(AFS_FBSD_ENV) last_error = getuerror(); #endif @@ -2550,6 +2831,17 @@ tdc->refCount = 1; tdc->index = aslot; + if (existing) { + osi_Assert(0 == NBObtainWriteLock(&tdc->lock, 674)); + osi_Assert(0 == NBObtainWriteLock(&tdc->mflock, 675)); + osi_Assert(0 == NBObtainWriteLock(&tdc->tlock, 676)); + } + + RWLOCK_INIT(&tdc->lock, "dcache lock"); + RWLOCK_INIT(&tdc->tlock, "dcache tlock"); + RWLOCK_INIT(&tdc->mflock, "dcache flock"); + ObtainReadLock(&tdc->tlock); + /* * If we didn't read into a temporary dcache region, update the * slot pointer table. @@ -2574,7 +2866,8 @@ * atime : If true, set the modtime on the file to the current time. * * Environment: - * Must be called with the afs_xdcache lock at least read-locked. + * Must be called with the afs_xdcache lock at least read-locked, + * and dcache entry at least read-locked. * The reference count is not changed. */ @@ -2695,7 +2988,11 @@ MObtainWriteLock(&afs_xdcache,282); tdc = afs_GetDSlot(index, (struct dcache *)0); + ReleaseReadLock(&tdc->tlock); MReleaseWriteLock(&afs_xdcache); + + ObtainWriteLock(&tdc->lock, 621); + MObtainWriteLock(&afs_xdcache, 622); if (afile) { code = gop_lookupname(afile, AFS_UIOSYS, @@ -2703,6 +3000,8 @@ (struct vnode **) 0, &filevp); if (code) { + ReleaseWriteLock(&afs_xdcache); + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); return code; } @@ -2791,9 +3090,11 @@ osi_UFSClose(tfile); tdc->f.states &= ~DWriting; - tdc->flags &= ~DFEntryMod; + tdc->dflags &= ~DFEntryMod; /* don't set f.modTime; we're just cleaning up */ afs_WriteDCache(tdc, 0); + ReleaseWriteLock(&afs_xdcache); + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); afs_stats_cmperf.cacheNumEntries++; return 0; --- src/afs/afs_init.c 2001/11/01 04:01:22 1.12 +++ src/afs/afs_init.c 2001/11/16 05:13:53 @@ -867,8 +867,10 @@ RWLOCK_INIT(&afs_xconn, "afs_xconn"); memset((char *)&afs_rootFid, 0, sizeof(struct VenusFid)); RWLOCK_INIT(&afs_xuser, "afs_xuser"); - RWLOCK_INIT(&afs_xvolume, "afs_xvolume"), RWLOCK_INIT(&afs_xcell, "afs_xcell"); - RWLOCK_INIT(&afs_xserver, "afs_xserver"), LOCK_INIT(&afs_puttofileLock, "afs_puttofileLock"); + RWLOCK_INIT(&afs_xvolume, "afs_xvolume"); + RWLOCK_INIT(&afs_xcell, "afs_xcell"); + RWLOCK_INIT(&afs_xserver, "afs_xserver"); + LOCK_INIT(&afs_puttofileLock, "afs_puttofileLock"); } } /*shutdown_AFS*/ --- src/afs/afs_memcache.c 2001/11/01 04:01:22 1.6 +++ src/afs/afs_memcache.c 2001/11/16 05:13:53 @@ -532,10 +532,7 @@ abase += tlen; length -= tlen; adc->validPos = abase; - if (adc->flags & DFWaiting) { - adc->flags &= ~DFWaiting; - afs_osi_Wakeup(&adc->validPos); - } + afs_osi_Wakeup(&adc->validPos); } } while (moredata); /* max of two sizes */ --- src/afs/afs_osi.h 2001/07/05 15:20:00 1.7 +++ src/afs/afs_osi.h 2001/11/16 05:13:53 @@ -194,6 +194,14 @@ (ISAFS_RXGLOCK() || (osi_Panic("rx global lock not held"), 0)) #endif /* AFS_GLOBAL_SUNLOCK */ +#ifdef RX_ENABLE_LOCKS +#define RX_AFS_GLOCK() AFS_GLOCK() +#define RX_AFS_GUNLOCK() AFS_GUNLOCK() +#else +#define RX_AFS_GLOCK() +#define RX_AFS_GUNLOCK() +#endif + #ifndef KERNEL --- src/afs/afs_pioctl.c 2001/11/13 17:24:05 1.28 +++ src/afs/afs_pioctl.c 2001/11/16 05:13:53 @@ -1693,17 +1693,18 @@ tdc = afs_GetDCache(avc, (afs_size_t) 0, areq, &offset, &len, 1); if (!tdc) return ENOENT; Check_AtSys(avc, ain, &sysState, areq); + ObtainReadLock(&tdc->lock); do { code = afs_dir_Lookup(&tdc->f.inode, sysState.name, &tfid.Fid); } while (code == ENOENT && Next_AtSys(avc, areq, &sysState)); + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); /* we're done with the data */ bufp = sysState.name; if (code) { - afs_PutDCache(tdc); goto out; } tfid.Cell = avc->fid.Cell; tfid.Fid.Volume = avc->fid.Fid.Volume; - afs_PutDCache(tdc); /* we're done with the data */ if (!tfid.Fid.Unique && (avc->states & CForeign)) { tvc = afs_LookupVCache(&tfid, areq, (afs_int32 *)0, WRITE_LOCK, avc, bufp); } else { @@ -2480,9 +2481,11 @@ tdc = afs_GetDCache(avc, (afs_size_t) 0, areq, &offset, &len, 1); /* test for error below */ if (!tdc) return ENOENT; Check_AtSys(avc, ain, &sysState, areq); + ObtainReadLock(&tdc->lock); do { code = afs_dir_Lookup(&tdc->f.inode, sysState.name, &tfid.Fid); } while (code == ENOENT && Next_AtSys(avc, areq, &sysState)); + ReleaseReadLock(&tdc->lock); bufp = sysState.name; if (code) { afs_PutDCache(tdc); @@ -2529,14 +2532,10 @@ tc = afs_Conn(&avc->fid, areq, SHARED_LOCK); if (tc) { XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_REMOVEFILE); -#ifdef RX_ENABLE_LOCKS - AFS_GUNLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GUNLOCK(); code = RXAFS_RemoveFile(tc->id, (struct AFSFid *) &avc->fid.Fid, bufp, &OutDirStatus, &tsync); -#ifdef RX_ENABLE_LOCKS - AFS_GLOCK(); -#endif /* RX_ENABLE_LOCKS */ + RX_AFS_GLOCK(); XSTATS_END_TIME; } else code = -1; @@ -2552,6 +2551,7 @@ } if (tdc) { /* we have the thing in the cache */ + ObtainWriteLock(&tdc->lock, 661); if (afs_LocalHero(avc, tdc, &OutDirStatus, 1)) { /* we can do it locally */ code = afs_dir_Delete(&tdc->f.inode, bufp); @@ -2560,6 +2560,7 @@ DZap(&tdc->f.inode); } } + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); /* drop ref count */ } avc->states &= ~CUnique; /* For the dfs xlator */ @@ -2705,6 +2706,7 @@ if (!(afs_indexFlags[i] & IFEverUsed)) continue; /* never had any data */ tdc = afs_GetDSlot(i, (struct dcache *) 0); if (tdc->refCount <= 1) { /* too high, in use by running sys call */ + ReleaseReadLock(&tdc->tlock); if (tdc->f.fid.Fid.Volume == volume && tdc->f.fid.Cell == cell) { if (! (afs_indexFlags[i] & IFDataMod)) { /* if the file is modified, but has a ref cnt of only 1, then @@ -2717,8 +2719,10 @@ afs_FlushDCache(tdc); } } + } else { + ReleaseReadLock(&tdc->tlock); } - tdc->refCount--; /* bumped by getdslot */ + afs_PutDCache(tdc); /* bumped by getdslot */ } MReleaseWriteLock(&afs_xdcache); @@ -3677,17 +3681,18 @@ tdc = afs_GetDCache(avc, (afs_size_t) 0, areq, &offset, &len, 1); if (!tdc) return ENOENT; Check_AtSys(avc, ain, &sysState, areq); + ObtainReadLock(&tdc->lock); do { code = afs_dir_Lookup(&tdc->f.inode, sysState.name, &tfid.Fid); } while (code == ENOENT && Next_AtSys(avc, areq, &sysState)); + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); /* we're done with the data */ bufp = sysState.name; if (code) { - afs_PutDCache(tdc); goto out; } tfid.Cell = avc->fid.Cell; tfid.Fid.Volume = avc->fid.Fid.Volume; - afs_PutDCache(tdc); /* we're done with the data */ if (!tfid.Fid.Unique && (avc->states & CForeign)) { tvc = afs_LookupVCache(&tfid, areq, (afs_int32 *)0, WRITE_LOCK, avc, bufp); } else { @@ -3706,8 +3711,8 @@ #if defined(AFS_SUN_ENV) || defined(AFS_ALPHA_ENV) || defined(AFS_SUN5_ENV) afs_BozonLock(&tvc->pvnLock, tvc); /* Since afs_TryToSmush will do a pvn_vptrunc */ #endif - ObtainWriteLock(&tvc->lock,645); - ObtainWriteLock(&afs_xcbhash, 646); + ObtainWriteLock(&tvc->lock,649); + ObtainWriteLock(&afs_xcbhash, 650); afs_DequeueCallback(tvc); tvc->states &= ~(CStatd | CDirty); /* next reference will re-stat cache entry */ ReleaseWriteLock(&afs_xcbhash); --- src/afs/afs_segments.c 2001/11/10 23:00:55 1.8 +++ src/afs/afs_segments.c 2001/11/16 05:13:53 @@ -147,7 +147,7 @@ } else { /* blew it away */ - afs_InvalidateAllSegments(avc, 1); + afs_InvalidateAllSegments(avc); } return code; @@ -279,6 +279,7 @@ if ((afs_indexFlags[index] & IFDataMod) && (afs_indexUnique[index] == avc->fid.Fid.Unique)) { tdc = afs_GetDSlot(index, 0); /* refcount+1. */ + ReleaseReadLock(&tdc->tlock); if (!FidCmp( &tdc->f.fid, &avc->fid ) && tdc->f.chunk >= minj ) { off = tdc->f.chunk - minj; if (off < NCHUNKSATONCE) { @@ -287,24 +288,31 @@ dcList[ off ] = tdc; if (off > high) high = off; - tlen -= tdc->f.chunkBytes; /* shortcut: big win for little files */ j++; - if (tlen <= 0) - break; - } else { + /* DCLOCKXXX: chunkBytes is protected by tdc->lock which we + * can't grab here, due to lock ordering with afs_xdcache. + * So, disable this shortcut for now. -- kolya 2001-10-13 + */ + /* shortcut: big win for little files */ + /* tlen -= tdc->f.chunkBytes; + * if (tlen <= 0) + * break; + */ + } + else { moredata = TRUE; - lockedPutDCache(tdc); + afs_PutDCache(tdc); if (j == NCHUNKSATONCE) break; } } else { - lockedPutDCache(tdc); + afs_PutDCache(tdc); } } index = afs_dvnextTbl[index]; } - MReleaseWriteLock(&afs_xdcache); + /* this guy writes chunks, puts back dcache structs, and bumps newDV */ /* "moredata" just says "there are more dirty chunks yet to come". */ @@ -324,6 +332,7 @@ for (bytes = 0, j = 0; !code && j<=high; j++) { if (dcList[j]) { + ObtainSharedLock(&(dcList[j]->lock), 629); if (!bytes) first = j; bytes += dcList[j]->f.chunkBytes; @@ -332,9 +341,6 @@ && dcList[j+1]) { int sbytes = afs_OtherCSize - dcList[j]->f.chunkBytes; bytes += sbytes; - - - } } if (bytes && (j==high || !dcList[j+1])) { @@ -583,6 +589,10 @@ tdc = dclist[i]; if (!code) { if (afs_indexFlags[tdc->index] & IFDataMod) { + /* + * LOCKXXX -- should hold afs_xdcache(W) when + * modifying afs_indexFlags. + */ afs_indexFlags[tdc->index] &= ~IFDataMod; afs_stats_cmperf.cacheCurrDirtyChunks--; afs_indexFlags[tdc->index] &= ~IFDirtyPages; @@ -598,15 +608,20 @@ } } } + UpgradeSToWLock(&tdc->lock, 628); tdc->f.states &= ~DWriting; /* correct?*/ - tdc->flags |= DFEntryMod; - lockedPutDCache(tdc); + tdc->dflags |= DFEntryMod; + ReleaseWriteLock(&tdc->lock); + afs_PutDCache(tdc); } if (code) { - for (j++; j<=high; j++) - if ( dcList[j] ) - lockedPutDCache(dcList[j]); + for (j++; j<=high; j++) { + if ( dcList[j] ) { + ReleaseSharedLock(&(dcList[j]->lock)); + afs_PutDCache(dcList[j]); + } + } } afs_Trace2(afs_iclSetp, CM_TRACE_STOREALLDCDONE, @@ -642,18 +657,52 @@ * A lot of this could be integrated into the loop above */ if (!code) { - afs_hyper_t h_unset; - hones(h_unset); - MObtainWriteLock(&afs_xdcache,285); /* overkill, but it gets the - * lock in case GetDSlot needs it */ - for(safety = 0, index = afs_dvhashTbl[hash]; - index != NULLIDX && safety < afs_cacheFiles+2;) { + afs_hyper_t h_unset; + hones(h_unset); - if (afs_indexUnique[index] == avc->fid.Fid.Unique) { - tdc = afs_GetDSlot(index, 0); + minj = 0; - if (!FidCmp(&tdc->f.fid, &avc->fid)) { - /* this is the file */ + do { + moredata = FALSE; + memset((char *)dcList, 0, NCHUNKSATONCE * sizeof(struct dcache *)); + + /* overkill, but it gets the lock in case GetDSlot needs it */ + MObtainWriteLock(&afs_xdcache,285); + + for(j = 0, safety = 0, index = afs_dvhashTbl[hash]; + index != NULLIDX && safety < afs_cacheFiles+2;) { + + if (afs_indexUnique[index] == avc->fid.Fid.Unique) { + tdc = afs_GetDSlot(index, 0); + ReleaseReadLock(&tdc->tlock); + + if (!FidCmp(&tdc->f.fid, &avc->fid) && tdc->f.chunk >= minj) { + off = tdc->f.chunk - minj; + if (off < NCHUNKSATONCE) { + /* this is the file, and the correct chunk range */ + if (j >= NCHUNKSATONCE) + osi_Panic("Too many dcache entries in range\n"); + dcList[j++] = tdc; + } else { + moredata = TRUE; + afs_PutDCache(tdc); + if (j == NCHUNKSATONCE) + break; + } + } else { + afs_PutDCache(tdc); + } + } + + index = afs_dvnextTbl[index]; + } + MReleaseWriteLock(&afs_xdcache); + + for (i=0; ilock, 677); + /* was code here to clear IFDataMod, but it should only be done * in storedcache and storealldcache. */ @@ -665,21 +714,27 @@ * for a chunk that we didn't store this time */ /* Don't update the version number if it's not yet set. */ - if (code == 0 && (!hsame(tdc->f.versionNo, h_unset)) - && (hcmp(tdc->f.versionNo, oldDV) >= 0)) { - if ((!(afs_dvhack || foreign) && hsame(avc->m.DataVersion, newDV)) - || ((afs_dvhack || foreign) && (origCBs == afs_allCBs)) ) { - /* no error, this is the DV */ - hset(tdc->f.versionNo, avc->m.DataVersion); - tdc->flags |= DFEntryMod; - } + if (!hsame(tdc->f.versionNo, h_unset) && + hcmp(tdc->f.versionNo, oldDV) >= 0) { + + if ((!(afs_dvhack || foreign) && hsame(avc->m.DataVersion, newDV)) + || ((afs_dvhack || foreign) && (origCBs == afs_allCBs)) ) { + /* no error, this is the DV */ + + UpgradeSToWLock(&tdc->lock, 678); + hset(tdc->f.versionNo, avc->m.DataVersion); + tdc->dflags |= DFEntryMod; + ConvertWToSLock(&tdc->lock); + } } - } - lockedPutDCache(tdc); + + ReleaseSharedLock(&tdc->lock); + afs_PutDCache(tdc); } - index = afs_dvnextTbl[index]; - } - MReleaseWriteLock(&afs_xdcache); + + minj += NCHUNKSATONCE; + + } while (moredata); } if (code) { @@ -690,7 +745,7 @@ * fileserver. */ if (areq->permWriteError || (avc->states & (CCore1 | CCore))) { - afs_InvalidateAllSegments(avc, 1); + afs_InvalidateAllSegments(avc); } } afs_Trace3(afs_iclSetp, CM_TRACE_STOREALLDONE, ICL_TYPE_POINTER, avc, @@ -737,23 +792,22 @@ * * Parameters: * avc : Pointer to vcache entry. - * asetLock : If true, we are to set the afs_xdcache lock; otherwise, - * the caller has already done it. * * Environment: * For example, called after an error has been detected. Called - * with avc write-locked. + * with avc write-locked, and afs_xdcache unheld. */ -afs_InvalidateAllSegments(avc, asetLock) +afs_InvalidateAllSegments(avc) struct vcache *avc; - int asetLock; { /*afs_InvalidateAllSegments*/ struct dcache *tdc; afs_int32 hash; afs_int32 index; + struct dcache **dcList; + int i, dcListMax, dcListCount; AFS_STATCNT(afs_InvalidateAllSegments); afs_Trace2(afs_iclSetp, CM_TRACE_INVALL, ICL_TYPE_POINTER, avc, @@ -763,7 +817,7 @@ avc->states &= ~CExtendedFile; /* not any more */ ObtainWriteLock(&afs_xcbhash, 459); afs_DequeueCallback(avc); - avc->states &= ~(CStatd|CDirty); /* mark status information as bad, too */ + avc->states &= ~(CStatd|CDirty); /* mark status information as bad, too */ ReleaseWriteLock(&afs_xcbhash); if (avc->fid.Fid.Vnode & 1 || (vType(avc) == VDIR)) osi_dnlc_purgedp(avc); @@ -776,28 +830,60 @@ * Block out others from screwing with this table; is a read lock * sufficient? */ - if (asetLock) MObtainWriteLock(&afs_xdcache,286); + MObtainWriteLock(&afs_xdcache,286); + dcListMax = 0; + for(index = afs_dvhashTbl[hash]; index != NULLIDX;) { - if (afs_indexUnique[index] == avc->fid.Fid.Unique) { - tdc = afs_GetDSlot(index, 0); - if (!FidCmp(&tdc->f.fid, &avc->fid)) { - /* same file? we'll zap it */ - if (afs_indexFlags[index] & IFDataMod) { - afs_stats_cmperf.cacheCurrDirtyChunks--; - /* don't write it back */ - afs_indexFlags[index] &= ~IFDataMod; - } - afs_indexFlags[index] &= ~IFAnyPages; - ZapDCE(tdc); - if (vType(avc) == VDIR) { - DZap(&tdc->f.inode); + if (afs_indexUnique[index] == avc->fid.Fid.Unique) { + tdc = afs_GetDSlot(index, 0); + ReleaseReadLock(&tdc->tlock); + if (!FidCmp(&tdc->f.fid, &avc->fid)) + dcListMax++; + afs_PutDCache(tdc); + } + index = afs_dvnextTbl[index]; + } + + dcList = osi_Alloc(dcListMax * sizeof(struct dcache *)); + dcListCount = 0; + + for(index = afs_dvhashTbl[hash]; index != NULLIDX;) { + if (afs_indexUnique[index] == avc->fid.Fid.Unique) { + tdc = afs_GetDSlot(index, 0); + ReleaseReadLock(&tdc->tlock); + if (!FidCmp(&tdc->f.fid, &avc->fid)) { + /* same file? we'll zap it */ + if (afs_indexFlags[index] & IFDataMod) { + afs_stats_cmperf.cacheCurrDirtyChunks--; + /* don't write it back */ + afs_indexFlags[index] &= ~IFDataMod; + } + afs_indexFlags[index] &= ~IFAnyPages; + if (dcListCount < dcListMax) + dcList[dcListCount++] = tdc; + else + afs_PutDCache(tdc); + } else { + afs_PutDCache(tdc); } } - lockedPutDCache(tdc); - } - index = afs_dvnextTbl[index]; + index = afs_dvnextTbl[index]; } - if (asetLock) MReleaseWriteLock(&afs_xdcache); + MReleaseWriteLock(&afs_xdcache); + + for (i=0; ilock, 679); + ZapDCE(tdc); + if (vType(avc) == VDIR) + DZap(&tdc->f.inode); + ReleaseWriteLock(&tdc->lock); + afs_PutDCache(tdc); + } + + osi_Free(dcList, dcListMax * sizeof(struct dcache *)); + return 0; } /*afs_InvalidateAllSegments*/ @@ -830,6 +916,9 @@ register afs_int32 index; afs_int32 newSize; + int dcCount, dcPos; + struct dcache **tdcArray; + AFS_STATCNT(afs_TruncateAllSegments); avc->m.Date = osi_Time(); afs_Trace3(afs_iclSetp, CM_TRACE_TRUNCALL, ICL_TYPE_POINTER, avc, @@ -876,27 +965,72 @@ if (alen < avc->truncPos) avc->truncPos = alen; code = DVHash(&avc->fid); + /* block out others from screwing with this table */ MObtainWriteLock(&afs_xdcache,287); + + dcCount = 0; for(index = afs_dvhashTbl[code]; index != NULLIDX;) { if (afs_indexUnique[index] == avc->fid.Fid.Unique) { tdc = afs_GetDSlot(index, 0); + ReleaseReadLock(&tdc->tlock); + if (!FidCmp(&tdc->f.fid, &avc->fid)) + dcCount++; + afs_PutDCache(tdc); + } + index = afs_dvnextTbl[index]; + } + + /* Now allocate space where we can save those dcache entries, and + * do a second pass over them.. Since we're holding xdcache, it + * shouldn't be changing. + */ + tdcArray = osi_Alloc(dcCount * sizeof(struct dcache *)); + dcPos = 0; + + for(index = afs_dvhashTbl[code]; index != NULLIDX;) { + if (afs_indexUnique[index] == avc->fid.Fid.Unique) { + tdc = afs_GetDSlot(index, 0); + ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->fid)) { /* same file, and modified, we'll store it back */ - newSize = alen - AFS_CHUNKTOBASE(tdc->f.chunk); - if (newSize < 0) newSize = 0; - if (newSize < tdc->f.chunkBytes) { - register struct osi_file *tfile; - tfile = afs_CFileOpen(tdc->f.inode); - afs_CFileTruncate(tfile, newSize); - afs_CFileClose(tfile); - afs_AdjustSize(tdc, newSize); + if (dcPos < dcCount) { + tdcArray[dcPos++] = tdc; + } else { + afs_PutDCache(tdc); } + } else { + afs_PutDCache(tdc); } - lockedPutDCache(tdc); } index = afs_dvnextTbl[index]; } + + MReleaseWriteLock(&afs_xdcache); + + /* Now we loop over the array of dcache entries and truncate them */ + for (index = 0; index < dcPos; index++) { + struct osi_file *tfile; + + tdc = tdcArray[index]; + + newSize = alen - AFS_CHUNKTOBASE(tdc->f.chunk); + if (newSize < 0) newSize = 0; + ObtainSharedLock(&tdc->lock, 672); + if (newSize < tdc->f.chunkBytes) { + UpgradeSToWLock(&tdc->lock, 673); + tfile = afs_CFileOpen(tdc->f.inode); + afs_CFileTruncate(tfile, newSize); + afs_CFileClose(tfile); + afs_AdjustSize(tdc, newSize); + ConvertWToSLock(&tdc->lock); + } + ReleaseSharedLock(&tdc->lock); + afs_PutDCache(tdc); + } + + osi_Free(tdcArray, dcCount * sizeof(struct dcache *)); + #if (defined(AFS_SUN5_ENV)) ObtainWriteLock(&avc->vlock, 547); if (--avc->activeV == 0 && (avc->vstates & VRevokeWait)) { @@ -905,7 +1039,6 @@ } ReleaseWriteLock(&avc->vlock); #endif - MReleaseWriteLock(&afs_xdcache); return 0; } /*afs_TruncateAllSegments*/ --- src/afs/afs_vcache.c 2001/11/01 04:01:22 1.14 +++ src/afs/afs_vcache.c 2001/11/16 05:13:53 @@ -251,7 +251,7 @@ AFS_STATCNT(afs_inactive); if (avc->states & CDirty) { /* we can't keep trying to push back dirty data forever. Give up. */ - afs_InvalidateAllSegments(avc, 1/*set lock*/); /* turns off dirty bit */ + afs_InvalidateAllSegments(avc); /* turns off dirty bit */ } avc->states &= ~CMAPPED; /* mainly used by SunOS 4.0.x */ avc->states &= ~CDirty; /* Turn it off */ --- src/afs/afs_volume.c 2001/10/10 00:10:32 1.8 +++ src/afs/afs_volume.c 2001/11/16 05:13:53 @@ -316,6 +316,7 @@ ReleaseReadLock(&afs_xvcache); ObtainWriteLock(&afs_xcbhash, 485); + /* LOCKXXX: We aren't holding tvc write lock? */ afs_DequeueCallback(tvc); tvc->states &= ~CStatd; ReleaseWriteLock(&afs_xcbhash); --- src/afs/lock.h 2001/11/13 13:14:06 1.7 +++ src/afs/lock.h 2001/11/16 05:13:53 @@ -38,7 +38,7 @@ /* This is the max lock number in use. Please update it if you add any new * lock numbers. */ -#define MAX_LOCK_NUMBER 572 +#define MAX_LOCK_NUMBER 700 #endif struct afs_bozoLock { --- src/afs/AIX/osi_vnodeops.c 2001/11/01 04:01:26 1.6 +++ src/afs/AIX/osi_vnodeops.c 2001/11/16 05:13:53 @@ -1056,7 +1056,7 @@ ObtainWriteLock(&vcp->lock,407); tdc = afs_FindDCache(vcp, xfrOffset); if (tdc) { - if (!(tdc->flags & DFNextStarted)) + if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(vcp, tdc, credp, &treq); afs_PutDCache(tdc); } --- src/afs/DOC/afs_rwlocks 2000/11/04 10:03:18 1.2 +++ src/afs/DOC/afs_rwlocks 2001/11/16 05:13:53 @@ -14,44 +14,65 @@ they're locked in order of vnode within the same volume. afs_AccessOK is called before locking other entries. -3. afs_xvcache locked before afs_xdcache in afs_remove, afs_symlink, etc -in the file afs_vnodeops.c +3. VCache entry vlock (Solaris only). -4. afs_xvcache. Must be able to load new cache -entries while holding locks on others. Note this means you can't lock -a cache entry while holding either of this lock, unless, as in afs_create, -the cache entry is actually created while the afs_xvcache is held. +4. DCache entries. Tentatively, multiple ones can be locked now. +Locking order between dcache entries is in increasing offset order. +However, if it turns out we never need to lock multiple dcache's, +we should just say it's not allowed, and simplify things. -5. afs_xvcb. Volume callback lock. Locked before afs_xserver in afs_RemoveVCB. +5. afs_xdcache. Protects the dcache hash tables and afs_index* in +afs_dcache.c. As with afs_xvcache below, a newly created dcache +entries can be locked while holding afs_xdcache. -6. afs_xvolume -- allows low-level server etc stuff to happen while +Bugs: afs_xvcache locked before afs_xdcache in afs_remove, afs_symlink, +etc in the file afs_vnodeops.c + +6. afs_xvcache. Must be able to load new cache entries while holding +locks on others. Note this means you can't lock a cache entry while +holding either of this lock, unless, as in afs_create, the cache entry +is actually created while the afs_xvcache is held. + +7. afs_xvcb. Volume callback lock. Locked before afs_xserver in +afs_RemoveVCB. + +8. afs_xvolume -- allows low-level server etc stuff to happen while creating a volume? -7. afs_xuser -- afs_xuser locked before afs_xserver and afs_xconn in -PUnlog. +9. afs_xuser -- afs_xuser is locked before afs_xserver and afs_xconn +in PUnlog. -8. afs_xcell -- locked before server in afs_GetCell. +10. afs_xcell -- afs_xcell locked before afs_xserver in afs_GetCell. -9. afs_xserver -- afs_xserver locked before afs_xconn in -afs_ResetUserConns. +11. afs_xserver -- locked before afs_xconn in afs_ResetUserConns. -10. afs_xsrvAddr -- afs_xserver locked before afs_xsrvAddr in +12. afs_xsrvAddr -- afs_xserver locked before afs_xsrvAddr in afs_CheckServers. -11. afs_xconn -- see above +13. afs_xconn -- see above -12. Individual volume locks. Must be after afs_xvolume so we can +14. Individual volume locks. Must be after afs_xvolume so we can iterate over all volumes without others being inserted/deleted. Same hack doesn't work for cache entry locks since we need to be able to lock multiple cache entries (but not multiple volumes) simultaneously. -13. afs_xdnlc -- locked after afs_xvcache in afs_osidnlc.c. Shouldn't +15. afs_xdnlc -- locked after afs_xvcache in afs_osidnlc.c. Shouldn't interact with any of the other locks. + +16. afs_xcbhash -- No code which holds xcbhash (all of it is in +afs_cbqueue.c) (note: this doesn't seem to be true -- it's used +elsewhere too) attempts to get any other locks, so it should always +be obtained last. It is locked in afs_DequeueCallbacks which is +called from afs_FlushVCache with afs_xvcache write-locked. + +17. afs_dynrootDirLock -- afs_GetDynroot returns the lock held, +afs_PutDynroot releases it. + +18. Dcache entry mflock -- used to atomize accesses and updates to +dcache mflags. -14. afs_xcbhash -- No code which holds xcbhash (all of it is in -afs_cbqueue.c) attempts to get any other locks, so it should always be -obtained last. It is locked in afs_DequeueCallbacks which is called from -afs_FlushVCache with afs_xvcache write-locked. +19. DCache entry tlock -- used to make atomic reads or writes to +the dcache refcount. ***** RX_ENABLE_LOCKS --- src/afs/DUX/osi_vnodeops.c 2001/07/12 19:58:19 1.5 +++ src/afs/DUX/osi_vnodeops.c 2001/11/16 05:13:53 @@ -625,7 +625,7 @@ && (counter == 0 || AFS_CHUNKOFFSET(fileBase) == 0)) { tdc = afs_FindDCache(avc, fileBase); if (tdc) { - if (!(tdc->flags & DFNextStarted)) + if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, cred, &treq); afs_PutDCache(tdc); } --- src/afs/FBSD/osi_vnodeops.c 2001/07/12 19:58:19 1.3 +++ src/afs/FBSD/osi_vnodeops.c 2001/11/16 05:13:53 @@ -626,7 +626,7 @@ && (counter == 0 || AFS_CHUNKOFFSET(fileBase) == 0)) { tdc = afs_FindDCache(avc, fileBase); if (tdc) { - if (!(tdc->flags & DFNextStarted)) + if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, cred, &treq); afs_PutDCache(tdc); } --- src/afs/IRIX/osi_vnodeops.c 2001/11/02 21:05:21 1.8 +++ src/afs/IRIX/osi_vnodeops.c 2001/11/16 05:13:54 @@ -549,7 +549,7 @@ ObtainWriteLock(&avc->lock,562); tdc = afs_FindDCache(avc, off); if (tdc) { - if (!(tdc->flags & DFNextStarted)) + if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, cr, &treq); afs_PutDCache(tdc); } @@ -1142,7 +1142,7 @@ avc->fid.Fid.Vnode, avc->fid.Fid.Unique, code); } - afs_InvalidateAllSegments(avc, 1); + afs_InvalidateAllSegments(avc); } s = VN_LOCK(vp); vp->v_count --; --- src/afs/LINUX/osi_vnodeops.c 2001/11/01 04:01:54 1.31 +++ src/afs/LINUX/osi_vnodeops.c 2001/11/16 05:13:54 @@ -287,6 +287,7 @@ return -ENOENT; } ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); /* * Make sure that the data in the cache is current. There are two * cases we need to worry about: @@ -294,15 +295,17 @@ * 2. The cache data is no longer valid */ while ((avc->states & CStatd) - && (tdc->flags & DFFetching) + && (tdc->dflags & DFFetching) && hsame(avc->m.DataVersion, tdc->f.versionNo)) { - tdc->flags |= DFWaiting; + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_osi_Sleep(&tdc->validPos); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); } if (!(avc->states & CStatd) || !hsame(avc->m.DataVersion, tdc->f.versionNo)) { + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_PutDCache(tdc); goto tagain; @@ -373,6 +376,7 @@ */ fp->f_pos = (loff_t)offset; + ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); ReleaseReadLock(&avc->lock); AFS_GUNLOCK(); --- src/afs/SOLARIS/osi_vnodeops.c 2001/11/10 23:00:59 1.10 +++ src/afs/SOLARIS/osi_vnodeops.c 2001/11/16 05:13:54 @@ -280,18 +280,55 @@ if (code = afs_InitReq(&treq, acred)) return code; if (!pl) { - /* - * This is a read-ahead request, e.g. due to madvise. - */ - tdc = afs_GetDCache(avc, (afs_int32)off, &treq, &offset, &nlen, 1); - if (!tdc) return 0; + /* This is a read-ahead request, e.g. due to madvise. */ +#ifdef AFS_SUN5_ENV + int plen = alen; +#else + int plen = PAGESIZE; +#endif + ObtainReadLock(&avc->lock); - if (!(tdc->flags & DFNextStarted)) { - ObtainReadLock(&avc->lock); - afs_PrefetchChunk(avc, tdc, acred, &treq); - ReleaseReadLock(&avc->lock); + while (plen > 0 && !afs_BBusy()) { + /* Obtain a dcache entry at off. 2 means don't fetch data. */ + tdc = afs_GetDCache(avc, (afs_offs_t)off, &treq, &offset, &nlen, 2); + if (!tdc) break; + + /* Write-lock the dcache entry, if we don't succeed, just go on */ + if (0 != NBObtainWriteLock(&tdc->lock, 642)) { + afs_PutDCache(tdc); + goto next_prefetch; + } + + /* If we aren't already fetching this dcache entry, queue it */ + if (!(tdc->mflags & DFFetchReq)) { + struct brequest *bp; + + tdc->mflags |= DFFetchReq; + bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, + (afs_size_t) off, (afs_size_t) 1, tdc); + if (!bp) { + /* Unable to start background fetch; might as well stop */ + tdc->mflags &= ~DFFetchReq; + ReleaseWriteLock(&tdc->lock); + afs_PutDCache(tdc); + break; + } + ReleaseWriteLock(&tdc->lock); + } else { + ReleaseWriteLock(&tdc->lock); + afs_PutDCache(tdc); + } + +next_prefetch: + /* Adjust our offset and remaining length values */ + off += nlen; + plen -= nlen; + + /* If we aren't making progress for some reason, bail out */ + if (nlen <= 0) break; } - afs_PutDCache(tdc); + + ReleaseReadLock(&avc->lock); return 0; } @@ -321,6 +358,7 @@ } } #endif + retry: #ifdef AFS_SUN5_ENV if (rw == S_WRITE || rw == S_CREATE) @@ -372,9 +410,13 @@ ReleaseWriteLock(&avc->vlock); #endif + /* We're about to do stuff with our dcache entry.. Lock it. */ + ObtainReadLock(&tdc->lock); + /* Check to see whether the cache entry is still valid */ if (!(avc->states & CStatd) || !hsame(avc->m.DataVersion, tdc->f.versionNo)) { + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_BozonUnlock(&avc->pvnLock, avc); afs_PutDCache(tdc); @@ -453,6 +495,9 @@ buf->b_dev = 0; buf->b_blkno = btodb(toffset); bp_mapin(buf); /* map it in to our address space */ + + /* afs_ustrategy will want to lock the dcache entry */ + ReleaseReadLock(&tdc->lock); #ifndef AFS_SUN5_ENV ReleaseReadLock(&avc->lock); #endif @@ -466,6 +511,8 @@ #ifndef AFS_SUN5_ENV ObtainReadLock(&avc->lock); #endif + ObtainReadLock(&tdc->lock); + #ifdef AFS_SUN5_ENV /* Before freeing unmap the buffer */ bp_mapout(buf); @@ -509,12 +556,14 @@ AFS_GLOCK(); pl[slot] = (struct page *) 0; - /* - * XXX This seems kind-of wrong: we shouldn't be modifying - * avc->states while not holding the write lock (even - * though nothing really uses CHasPages..) - */ - avc->states |= CHasPages; + ReleaseReadLock(&tdc->lock); + + /* Prefetch next chunk if we're at a chunk boundary */ + if (AFS_CHUNKOFFSET(off) == 0) { + if (!(tdc->mflags & DFNextStarted)) + afs_PrefetchChunk(avc, tdc, acred, &treq); + } + ReleaseReadLock(&avc->lock); #ifdef AFS_SUN5_ENV ObtainWriteLock(&afs_xdcache,246); @@ -548,6 +597,7 @@ ReleaseReadLock(&avc->lock); afs_BozonUnlock(&avc->pvnLock, avc); #ifdef AFS_SUN5_ENV + ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); #endif return code; @@ -576,6 +626,7 @@ #else int toff = (int)off; #endif + int didWriteLock; AFS_STATCNT(afs_putpage); if (vp->v_flag & VNOMAP) /* file doesn't allow mapping */ @@ -591,7 +642,8 @@ ICL_TYPE_LONG, (int) flags); avc = (struct vcache *) vp; afs_BozonLock(&avc->pvnLock, avc); - ObtainWriteLock(&avc->lock,247); + ObtainSharedLock(&avc->lock,247); + didWriteLock = 0; /* Get a list of modified (or whatever) pages */ if (len) { @@ -608,6 +660,12 @@ if (!pages || !pvn_getdirty(pages, flags)) tlen = PAGESIZE; else { + if (!didWriteLock) { + AFS_GLOCK(); + didWriteLock = 1; + UpgradeSToWLock(&avc->lock, 671); + AFS_GUNLOCK(); + } NPages++; code = afs_putapage(vp, pages, &toff, &tlen, flags, cred); if (code) { @@ -619,6 +677,11 @@ AFS_GLOCK(); } } else { + if (!didWriteLock) { + UpgradeSToWLock(&avc->lock, 670); + didWriteLock = 1; + } + AFS_GUNLOCK(); #if defined(AFS_SUN56_ENV) code = pvn_vplist_dirty(vp, toff, afs_putapage, flags, cred); @@ -628,10 +691,18 @@ AFS_GLOCK(); } - if (code && !avc->vc_error) + if (code && !avc->vc_error) { + if (!didWriteLock) { + UpgradeSToWLock(&avc->lock, 669); + didWriteLock = 1; + } avc->vc_error = code; + } - ReleaseWriteLock(&avc->lock); + if (didWriteLock) + ReleaseWriteLock(&avc->lock); + else + ReleaseSharedLock(&avc->lock); afs_BozonUnlock(&avc->pvnLock, avc); afs_Trace2(afs_iclSetp, CM_TRACE_PAGEOUTDONE, ICL_TYPE_LONG, code, ICL_TYPE_LONG, NPages); AFS_GUNLOCK(); @@ -1120,9 +1191,10 @@ && hsame(avc->m.DataVersion, dcp_newpage->f.versionNo)) { ObtainWriteLock(&avc->lock,251); ObtainWriteLock(&avc->vlock,576); + ObtainReadLock(&dcp_newpage->lock); if ((avc->activeV == 0) && hsame(avc->m.DataVersion, dcp_newpage->f.versionNo) - && !(dcp_newpage->flags & (DFFetching))) { + && !(dcp_newpage->dflags & (DFFetching))) { AFS_GUNLOCK(); segmap_pagecreate(segkmap, raddr, rsize, 1); AFS_GLOCK(); @@ -1131,9 +1203,9 @@ afs_indexFlags[dcp_newpage->index] |= (IFAnyPages | IFDirtyPages); ReleaseWriteLock(&afs_xdcache); - avc->states |= CHasPages; created = 1; } + ReleaseReadLock(&dcp_newpage->lock); afs_PutDCache(dcp_newpage); ReleaseWriteLock(&avc->vlock); ReleaseWriteLock(&avc->lock); @@ -1158,13 +1230,6 @@ AFS_GLOCK(); ObtainWriteLock(&avc->lock,253); #ifdef AFS_SUN5_ENV - /* - * If at a chunk boundary, start prefetch of next chunk. - */ - if (counter == 0 || AFS_CHUNKOFFSET(fileBase) == 0) { - if (!(dcp->flags & DFNextStarted)) - afs_PrefetchChunk(avc, dcp, acred, &treq); - } counter++; if (dcp) afs_PutDCache(dcp); --- src/afs/VNOPS/afs_vnop_create.c 2001/11/01 04:02:05 1.7 +++ src/afs/VNOPS/afs_vnop_create.c 2001/11/16 05:13:54 @@ -119,6 +119,7 @@ tdc = afs_GetDCache(adp, (afs_size_t) 0, &treq, &offset, &len, 1); ObtainWriteLock(&adp->lock,135); + if (tdc) ObtainSharedLock(&tdc->lock,630); /* * Make sure that the data in the cache is current. We may have @@ -127,8 +128,10 @@ if (!(adp->states & CStatd) || (tdc && !hsame(adp->m.DataVersion, tdc->f.versionNo))) { ReleaseWriteLock(&adp->lock); - if (tdc) + if (tdc) { + ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); + } goto tagain; } if (tdc) { @@ -136,6 +139,7 @@ * the size attributes (to handle O_TRUNC) */ code = afs_dir_Lookup(&tdc->f.inode, aname, &newFid.Fid); /* use dnlc first xxx */ if (code == 0) { + ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); ReleaseWriteLock(&adp->lock); #ifdef AFS_SGI64_ENV @@ -298,7 +302,10 @@ if (code == EEXIST && aexcl == NONEXCL) { /* This lookup was handled in the common vn_open code in the vnode layer */ - if (tdc) afs_PutDCache(tdc); + if (tdc) { + ReleaseSharedLock(&tdc->lock); + afs_PutDCache(tdc); + } ReleaseWriteLock(&adp->lock); goto done; } @@ -309,7 +316,10 @@ if (code == EEXIST && aexcl == NONEXCL) { #endif /* AFS_SGI64_ENV */ /* if we get an EEXIST in nonexcl mode, just do a lookup */ - if (tdc) afs_PutDCache(tdc); + if (tdc) { + ReleaseSharedLock(&tdc->lock); + afs_PutDCache(tdc); + } ReleaseWriteLock(&adp->lock); #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) #if defined(AFS_SGI64_ENV) @@ -335,10 +345,14 @@ osi_dnlc_purgedp(adp); } ReleaseWriteLock(&adp->lock); - if (tdc) afs_PutDCache(tdc); + if (tdc) { + ReleaseSharedLock(&tdc->lock); + afs_PutDCache(tdc); + } goto done; } /* otherwise, we should see if we can make the change to the dir locally */ + if (tdc) UpgradeSToWLock(&tdc->lock, 631); if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) { /* we can do it locally */ code = afs_dir_Create(&tdc->f.inode, aname, &newFid.Fid); @@ -348,6 +362,7 @@ } } if (tdc) { + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } newFid.Cell = adp->fid.Cell; @@ -460,7 +475,8 @@ * data and stat cache entries. This routine returns 1 if we should * do the operation locally, and 0 otherwise. * - * This routine must be called with the stat cache entry write-locked. + * This routine must be called with the stat cache entry write-locked, + * and dcache entry write-locked. */ afs_LocalHero(avc, adc, astat, aincr) register struct vcache *avc; @@ -496,7 +512,7 @@ } if (ok) { /* we've been tracking things correctly */ - adc->flags |= DFEntryMod; + adc->dflags |= DFEntryMod; adc->f.versionNo = avers; return 1; } --- src/afs/VNOPS/afs_vnop_dirops.c 2001/11/01 04:02:05 1.5 +++ src/afs/VNOPS/afs_vnop_dirops.c 2001/11/16 05:13:54 @@ -128,6 +128,7 @@ goto done; } /* otherwise, we should see if we can make the change to the dir locally */ + if (tdc) ObtainWriteLock(&tdc->lock, 632); if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) { /* we can do it locally */ code = afs_dir_Create(&tdc->f.inode, aname, &newFid.Fid); @@ -137,6 +138,7 @@ } } if (tdc) { + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } adp->m.LinkCount = OutDirStatus.LinkCount; @@ -207,6 +209,7 @@ tdc = afs_GetDCache(adp, (afs_size_t) 0, &treq, &offset, &len, 1); /* test for error below */ ObtainWriteLock(&adp->lock,154); + if (tdc) ObtainSharedLock(&tdc->lock, 633); if (tdc && (adp->states & CForeign)) { struct VenusFid unlinkFid; @@ -249,7 +252,10 @@ AFS_STATS_FS_RPCIDX_REMOVEDIR, SHARED_LOCK, (struct cell *)0)); if (code) { - if (tdc) afs_PutDCache(tdc); + if (tdc) { + ReleaseSharedLock(&tdc->lock); + afs_PutDCache(tdc); + } if (code < 0) { ObtainWriteLock(&afs_xcbhash, 491); afs_DequeueCallback(adp); @@ -262,6 +268,7 @@ } /* here if rpc worked; update the in-core link count */ adp->m.LinkCount = OutDirStatus.LinkCount; + if (tdc) UpgradeSToWLock(&tdc->lock, 634); if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) { /* we can do it locally */ code = afs_dir_Delete(&tdc->f.inode, aname); @@ -271,6 +278,7 @@ } } if (tdc) { + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); /* drop ref count */ } --- src/afs/VNOPS/afs_vnop_link.c 2001/11/01 04:02:05 1.5 +++ src/afs/VNOPS/afs_vnop_link.c 2001/11/16 05:13:54 @@ -114,6 +114,7 @@ ReleaseWriteLock(&adp->lock); goto done; } + if (tdc) ObtainWriteLock(&tdc->lock, 635); if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) { /* we can do it locally */ code = afs_dir_Create(&tdc->f.inode, aname, &avc->fid.Fid); @@ -123,6 +124,7 @@ } } if (tdc) { + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); /* drop ref count */ } ReleaseWriteLock(&adp->lock); --- src/afs/VNOPS/afs_vnop_lookup.c 2001/11/13 14:47:15 1.24 +++ src/afs/VNOPS/afs_vnop_lookup.c 2001/11/16 05:13:54 @@ -445,6 +445,7 @@ /* lock the directory cache entry */ ObtainReadLock(&adp->lock); + ObtainReadLock(&dcp->lock); /* * Make sure that the data in the cache is current. There are two @@ -453,20 +454,22 @@ * 2. The cache data is no longer valid */ while ((adp->states & CStatd) - && (dcp->flags & DFFetching) + && (dcp->dflags & DFFetching) && hsame(adp->m.DataVersion, dcp->f.versionNo)) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, dcp, - ICL_TYPE_INT32, dcp->flags); - dcp->flags |= DFWaiting; + ICL_TYPE_INT32, dcp->dflags); + ReleaseReadLock(&dcp->lock); ReleaseReadLock(&adp->lock); afs_osi_Sleep(&dcp->validPos); ObtainReadLock(&adp->lock); + ObtainReadLock(&dcp->lock); } if (!(adp->states & CStatd) || !hsame(adp->m.DataVersion, dcp->f.versionNo)) { + ReleaseReadLock(&dcp->lock); ReleaseReadLock(&adp->lock); afs_PutDCache(dcp); goto tagain; @@ -586,6 +589,7 @@ } /* while loop over all dir entries */ /* now release the dir lock and prepare to make the bulk RPC */ + ReleaseReadLock(&dcp->lock); ReleaseReadLock(&adp->lock); /* release the chunk */ @@ -1081,6 +1085,7 @@ /* now we will just call dir package with appropriate inode. Dirs are always fetched in their entirety for now */ ObtainReadLock(&adp->lock); + ObtainReadLock(&tdc->lock); /* * Make sure that the data in the cache is current. There are two @@ -1089,15 +1094,17 @@ * 2. The cache data is no longer valid */ while ((adp->states & CStatd) - && (tdc->flags & DFFetching) + && (tdc->dflags & DFFetching) && hsame(adp->m.DataVersion, tdc->f.versionNo)) { - tdc->flags |= DFWaiting; + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&adp->lock); afs_osi_Sleep(&tdc->validPos); ObtainReadLock(&adp->lock); + ObtainReadLock(&tdc->lock); } if (!(adp->states & CStatd) || !hsame(adp->m.DataVersion, tdc->f.versionNo)) { + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&adp->lock); afs_PutDCache(tdc); goto redo; @@ -1128,6 +1135,7 @@ } tname = sysState.name; + ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); if (code == ENOENT && afs_IsDynroot(adp) && dynrootRetry) { --- src/afs/VNOPS/afs_vnop_read.c 2001/11/13 14:47:15 1.8 +++ src/afs/VNOPS/afs_vnop_read.c 2001/11/16 05:13:54 @@ -107,13 +107,22 @@ hset(avc->flushDV, avc->m.DataVersion); } #endif + + /* + * Locks held: + * avc->lock(R) + */ while (totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->m.Length) break; /* all done */ if (noLock) { - if (tdc) afs_PutDCache(tdc); + if (tdc) { + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); + } tdc = afs_FindDCache(avc, filePos); if (tdc) { + ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->f.chunkBytes - offset; } @@ -123,15 +132,16 @@ The server could update the file as soon as the fetch responsible for the setting of the DFFetching flag completes. - However, the presence of the DFFetching flag (visible under a - read lock since it is set and cleared only under a write lock) - means that we're fetching as good a version as was known to this - client at the time of the last call to afs_VerifyVCache, since - the latter updates the stat cache's m.DataVersion field under a - write lock, and from the time that the DFFetching flag goes on - (before the fetch starts), to the time it goes off (after the - fetch completes), afs_GetDCache keeps at least a read lock - (actually it keeps an S lock) on the cache entry. + However, the presence of the DFFetching flag (visible under + a dcache read lock since it is set and cleared only under a + dcache write lock) means that we're fetching as good a version + as was known to this client at the time of the last call to + afs_VerifyVCache, since the latter updates the stat cache's + m.DataVersion field under a vcache write lock, and from the + time that the DFFetching flag goes on in afs_GetDCache (before + the fetch starts), to the time it goes off (after the fetch + completes), afs_GetDCache keeps at least a read lock on the + vcache entry. This means that if the DFFetching flag is set, we can use that data for any reads that must come from the current version of @@ -147,13 +157,17 @@ m.DataVersion > f.versionNo (the latter is not updated until after the fetch completes). */ - if (tdc) afs_PutDCache(tdc); /* before reusing tdc */ + if (tdc) { + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); /* before reusing tdc */ + } tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2); + ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ - if (!(tdc->flags & DFFetching) + if (!(tdc->dflags & DFFetching) && !hsame(avc->m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, * and we'll need new data */ @@ -161,43 +175,54 @@ if (trybusy && !afs_BBusy()) { struct brequest *bp; /* daemon is not busy */ - if (!(tdc->flags & DFFetchReq)) { + ObtainSharedLock(&tdc->mflock, 665); + if (!(tdc->mflags & DFFetchReq)) { /* start the daemon (may already be running, however) */ - tdc->flags |= DFFetchReq; + UpgradeSToWLock(&tdc->mflock, 666); + tdc->mflags |= DFFetchReq; bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t)filePos, (afs_size_t) 0, tdc); if (!bp) { - tdc->flags &= ~DFFetchReq; + tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ + ReleaseWriteLock(&tdc->mflock); goto tagain; } + ConvertWToSLock(&tdc->mflock); /* don't use bp pointer! */ } - while (tdc->flags & DFFetchReq) { + ConvertSToRLock(&tdc->mflock); + while (tdc->mflags & DFFetchReq) { /* don't need waiting flag on this one */ + ReleaseReadLock(&tdc->mflock); + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_osi_Sleep(&tdc->validPos); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); + ObtainReadLock(&tdc->mflock); } + ReleaseReadLock(&tdc->mflock); } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ - while ((tdc->flags & DFFetching) && tdc->validPos <= filePos) { + while ((tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, - ICL_TYPE_INT32, tdc->flags); - tdc->flags |= DFWaiting; + ICL_TYPE_INT32, tdc->dflags); + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_osi_Sleep(&tdc->validPos); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); } /* fetching flag gone, data is here, or we never tried (BBusy for instance) */ - if (tdc->flags & DFFetching) { + if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; @@ -210,6 +235,7 @@ } else { /* don't have current data, so get it below */ + ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = (struct dcache *) 0; } @@ -219,6 +245,7 @@ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); } } @@ -226,6 +253,13 @@ error = EIO; break; } + + /* + * Locks held: + * avc->lock(R) + * tdc->lock(R) + */ + if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ /* read past the end of a chunk, may not be at next chunk yet, and yet @@ -271,14 +305,21 @@ if (len <= 0) break; /* surprise eof */ } /* the whole while loop */ + /* + * Locks held: + * avc->lock(R) + * tdc->lock(R) if tdc + */ + /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch. */ if (tdc) { + ReleaseReadLock(&tdc->lock); #ifndef AFS_VM_RDWR_ENV /* try to queue prefetch, if needed */ - if (!(tdc->flags & DFNextStarted) && !noLock) { + if (!noLock) { afs_PrefetchChunk(avc, tdc, acred, &treq); } #endif @@ -297,8 +338,8 @@ * flag in the prefetched block, so that the next call to read knows to wait * for the daemon to start doing things. * - * This function must be called with the vnode at least read-locked - * because it plays around with dcache entries. + * This function must be called with the vnode at least read-locked, and + * no locks on the dcache, because it plays around with dcache entries. */ void afs_PrefetchChunk(struct vcache *avc, struct dcache *adc, struct AFS_UCRED *acred, struct vrequest *areq) @@ -309,30 +350,55 @@ offset = adc->f.chunk+1; /* next chunk we'll need */ offset = AFS_CHUNKTOBASE(offset); /* base of next chunk */ - if (offset < avc->m.Length && !afs_BBusy()) { + ObtainReadLock(&adc->lock); + ObtainSharedLock(&adc->mflock, 662); + if (offset < avc->m.Length && !(adc->mflags & DFNextStarted) && !afs_BBusy()) { struct brequest *bp; - adc->flags |= DFNextStarted; /* we've tried to prefetch for this guy */ + + UpgradeSToWLock(&adc->mflock, 663); + adc->mflags |= DFNextStarted; /* we've tried to prefetch for this guy */ + ReleaseWriteLock(&adc->mflock); + ReleaseReadLock(&adc->lock); + tdc = afs_GetDCache(avc, offset, areq, &j1, &j2, 2); /* type 2 never returns 0 */ - if (!(tdc->flags & DFFetchReq)) { + ObtainSharedLock(&tdc->mflock, 651); + if (!(tdc->mflags & DFFetchReq)) { /* ask the daemon to do the work */ - tdc->flags |= DFFetchReq; /* guaranteed to be cleared by BKG or GetDCache */ + UpgradeSToWLock(&tdc->mflock, 652); + tdc->mflags |= DFFetchReq; /* guaranteed to be cleared by BKG or GetDCache */ /* last parm (1) tells bkg daemon to do an afs_PutDCache when it is done, * since we don't want to wait for it to finish before doing so ourselves. */ -#ifdef AFS_SUN5_ENVX - mutex_exit(&tdc->lock); -#endif - bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, - (afs_size_t) offset, (afs_size_t) 1, tdc); + bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, + (afs_size_t) offset, (afs_size_t) 1, tdc); if (!bp) { /* Bkg table full; just abort non-important prefetching to avoid deadlocks */ - tdc->flags &= ~(DFNextStarted | DFFetchReq); + tdc->mflags &= ~DFFetchReq; + ReleaseWriteLock(&tdc->mflock); afs_PutDCache(tdc); - return; + + /* + * DCLOCKXXX: This is a little sketchy, since someone else + * could have already started a prefetch.. In practice, + * this probably doesn't matter; at most it would cause an + * extra slot in the BKG table to be used up when someone + * prefetches this for the second time. + */ + ObtainReadLock(&adc->lock); + ObtainWriteLock(&adc->mflock, 664); + adc->mflags &= ~DFNextStarted; + ReleaseWriteLock(&adc->mflock); + ReleaseReadLock(&adc->lock); + } else { + ReleaseWriteLock(&tdc->mflock); } - } - else + } else { + ReleaseSharedLock(&tdc->mflock); afs_PutDCache(tdc); + } + } else { + ReleaseSharedLock(&adc->mflock); + ReleaseReadLock(&adc->lock); } } @@ -381,15 +447,21 @@ if ((avc->states & CStatd) /* up to date */ && (tdc = avc->quick.dc) && (tdc->index != NULLIDX) - && !(afs_indexFlags[tdc->index] & IFFree)) { + && !(afs_indexFlags[tdc->index] & (IFFree | IFDiscarded))) { + + int readLocked = 0; - tdc->refCount++; + afs_RefDCache(tdc); ReleaseReadLock(&afs_xdcache); + if (tdc->stamp == avc->quick.stamp) { + readLocked = 1; + ObtainReadLock(&tdc->lock); + } if ((tdc->stamp == avc->quick.stamp) /* hint matches */ && ((offDiff = (auio->afsio_offset - avc->quick.minLoc)) >= 0) && (tdc->f.chunkBytes >= auio->afsio_resid + offDiff) - && !(tdc->flags & DFFetching)) { /* fits in chunk */ + && !(tdc->dflags & DFFetching)) { /* fits in chunk */ auio->afsio_offset -= avc->quick.minLoc; @@ -465,19 +537,23 @@ hadd32(afs_indexCounter, 1); if (!noLock) { + ReleaseReadLock(&avc->lock); #ifndef AFS_VM_RDWR_ENV - if (!(code = afs_InitReq(&treq, acred))&& (!(tdc->flags & DFNextStarted))) - afs_PrefetchChunk(avc, tdc, acred, &treq); + if (!(code = afs_InitReq(&treq, acred))) { + if (!(tdc->mflags & DFNextStarted)) + afs_PrefetchChunk(avc, tdc, acred, &treq); + } #endif - ReleaseReadLock(&avc->lock); } - tdc->refCount--; + if (readLocked) ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); return (code); } if (!tdc->f.chunkBytes) { /* debugging f.chunkBytes == 0 problem */ savedc = tdc; } - tdc->refCount--; + if (readLocked) ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); } else { ReleaseReadLock(&afs_xdcache); } @@ -507,7 +583,7 @@ struct iovec *tvec; struct osi_file *tfile; afs_int32 code; - int munlocked, trybusy=1; + int trybusy=1; struct vnode *vp; struct vrequest treq; @@ -559,9 +635,13 @@ /* read all of the cached info */ if (filePos >= avc->m.Length) break; /* all done */ if (noLock) { - if (tdc) afs_PutDCache(tdc); + if (tdc) { + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); + } tdc = afs_FindDCache(avc, filePos); if (tdc) { + ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->f.chunkBytes - offset; } @@ -571,15 +651,16 @@ The server could update the file as soon as the fetch responsible for the setting of the DFFetching flag completes. - However, the presence of the DFFetching flag (visible under a - read lock since it is set and cleared only under a write lock) - means that we're fetching as good a version as was known to this - client at the time of the last call to afs_VerifyVCache, since - the latter updates the stat cache's m.DataVersion field under a - write lock, and from the time that the DFFetching flag goes on - (before the fetch starts), to the time it goes off (after the - fetch completes), afs_GetDCache keeps at least a read lock - (actually it keeps an S lock) on the cache entry. + However, the presence of the DFFetching flag (visible under + a dcache read lock since it is set and cleared only under a + dcache write lock) means that we're fetching as good a version + as was known to this client at the time of the last call to + afs_VerifyVCache, since the latter updates the stat cache's + m.DataVersion field under a vcache write lock, and from the + time that the DFFetching flag goes on in afs_GetDCache (before + the fetch starts), to the time it goes off (after the fetch + completes), afs_GetDCache keeps at least a read lock on the + vcache entry. This means that if the DFFetching flag is set, we can use that data for any reads that must come from the current version of @@ -595,62 +676,72 @@ m.DataVersion > f.versionNo (the latter is not updated until after the fetch completes). */ - if (tdc) afs_PutDCache(tdc); /* before reusing tdc */ - munlocked = 0; + if (tdc) { + ReleaseReadLock(&tdc->lock); + afs_PutDCache(tdc); /* before reusing tdc */ + } tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2); + ObtainReadLock(&tdc->lock); if (tdc == savedc) { savedc = 0; } /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ - if (!(tdc->flags & DFFetching) + if (!(tdc->dflags & DFFetching) && !hsame(avc->m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, and we'll need new data */ tagain: if (trybusy && !afs_BBusy()) { struct brequest *bp; /* daemon is not busy */ - if (!(tdc->flags & DFFetchReq)) { - tdc->flags |= DFFetchReq; -#ifdef AFS_SUN5_ENVX - mutex_exit(&tdc->lock); - munlocked = 1; -#endif + ObtainSharedLock(&tdc->mflock, 667); + if (!(tdc->mflags & DFFetchReq)) { + UpgradeSToWLock(&tdc->mflock, 668); + tdc->mflags |= DFFetchReq; bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, - (afs_size_t)filePos, (afs_size_t) 0, + (afs_size_t) filePos, (afs_size_t) 0, tdc); if (!bp) { /* Bkg table full; retry deadlocks */ - tdc->flags &= ~DFFetchReq; + tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ + ReleaseWriteLock(&tdc->mflock); goto tagain; } + ConvertWToSLock(&tdc->mflock); } - while (tdc->flags & DFFetchReq) { + ConvertSToRLock(&tdc->mflock); + while (tdc->mflags & DFFetchReq) { /* don't need waiting flag on this one */ + ReleaseReadLock(&tdc->mflock); + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_osi_Sleep(&tdc->validPos); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); + ObtainReadLock(&tdc->mflock); } + ReleaseReadLock(&tdc->mflock); } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ - while ((tdc->flags & DFFetching) && tdc->validPos <= filePos) { + while ((tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, - ICL_TYPE_INT32, tdc->flags); - tdc->flags |= DFWaiting; + ICL_TYPE_INT32, tdc->dflags); + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_osi_Sleep(&tdc->validPos); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); } /* fetching flag gone, data is here, or we never tried (BBusy for instance) */ - if (tdc->flags & DFFetching) { + if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; @@ -663,6 +754,7 @@ } else { /* don't have current data, so get it below */ + ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = (struct dcache *) 0; } @@ -672,6 +764,7 @@ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); } } @@ -843,10 +936,12 @@ * to it in case we need to do a prefetch, obviously. */ if (tdc) { + ReleaseReadLock(&tdc->lock); #ifndef AFS_VM_RDWR_ENV /* try to queue prefetch, if needed */ - if (!(tdc->flags & DFNextStarted) && !noLock) { - afs_PrefetchChunk(avc, tdc, acred, &treq); + if (!noLock) { + if (!(tdc->mflags & DFNextStarted)) + afs_PrefetchChunk(avc, tdc, acred, &treq); } #endif afs_PutDCache(tdc); --- src/afs/VNOPS/afs_vnop_readdir.c 2001/11/13 14:47:15 1.10 +++ src/afs/VNOPS/afs_vnop_readdir.c 2001/11/16 05:13:54 @@ -508,6 +508,7 @@ goto done; } ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); /* * Make sure that the data in the cache is current. There are two @@ -516,20 +517,22 @@ * 2. The cache data is no longer valid */ while ((avc->states & CStatd) - && (tdc->flags & DFFetching) + && (tdc->dflags & DFFetching) && hsame(avc->m.DataVersion, tdc->f.versionNo)) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, - ICL_TYPE_INT32, tdc->flags); - tdc->flags |= DFWaiting; + ICL_TYPE_INT32, tdc->dflags); + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_osi_Sleep(&tdc->validPos); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); } if (!(avc->states & CStatd) || !hsame(avc->m.DataVersion, tdc->f.versionNo)) { + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_PutDCache(tdc); goto tagain; @@ -730,6 +733,7 @@ if (ode) DRelease(ode, 0); dirend: + ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); ReleaseReadLock(&avc->lock); @@ -789,6 +793,7 @@ goto done; } ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); /* * Make sure that the data in the cache is current. There are two @@ -797,20 +802,22 @@ * 2. The cache data is no longer valid */ while ((avc->states & CStatd) - && (tdc->flags & DFFetching) + && (tdc->dflags & DFFetching) && hsame(avc->m.DataVersion, tdc->f.versionNo)) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, - ICL_TYPE_INT32, tdc->flags); - tdc->flags |= DFWaiting; + ICL_TYPE_INT32, tdc->dflags); + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_osi_Sleep(&tdc->validPos); ObtainReadLock(&avc->lock); + ObtainReadLock(&tdc->lock); } if (!(avc->states & CStatd) || !hsame(avc->m.DataVersion, tdc->f.versionNo)) { + ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); afs_PutDCache(tdc); goto tagain; @@ -961,6 +968,7 @@ if (ode) DRelease(ode, 0); dirend: + ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); ReleaseReadLock(&avc->lock); --- src/afs/VNOPS/afs_vnop_remove.c 2001/11/01 05:24:36 1.8 +++ src/afs/VNOPS/afs_vnop_remove.c 2001/11/16 05:13:54 @@ -130,7 +130,10 @@ if (tvc) afs_symhint_inval(tvc); /* XXX: don't really need to be so extreme */ if (code) { - if (tdc) afs_PutDCache(tdc); + if (tdc) { + ReleaseSharedLock(&tdc->lock); + afs_PutDCache(tdc); + } if (tvc) afs_PutVCache(tvc, WRITE_LOCK); if (code < 0) { @@ -144,6 +147,7 @@ code = afs_CheckCode(code, treqp, 21); return code; } + if (tdc) UpgradeSToWLock(&tdc->lock, 637); if (afs_LocalHero(adp, tdc, &OutDirStatus, 1)) { /* we can do it locally */ code = afs_dir_Delete(&tdc->f.inode, aname); @@ -153,6 +157,7 @@ } } if (tdc) { + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); /* drop ref count */ } ReleaseWriteLock(&adp->lock); @@ -265,6 +270,7 @@ tdc = afs_GetDCache(adp, (afs_size_t) 0, &treq, &offset, &len, 1); /* test for error below */ ObtainWriteLock(&adp->lock,142); + ObtainSharedLock(&tdc->lock, 638); /* * Make sure that the data in the cache is current. We may have @@ -273,8 +279,10 @@ if (!(adp->states & CStatd) || (tdc && !hsame(adp->m.DataVersion, tdc->f.versionNo))) { ReleaseWriteLock(&adp->lock); - if (tdc) + if (tdc) { + ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); + } goto tagain; } @@ -330,6 +338,7 @@ char *unlname = newname(); ReleaseWriteLock(&adp->lock); + if (tdc) ReleaseSharedLock(&tdc->lock); code = afsrename(adp, aname, adp, unlname, acred); Tnam1 = unlname; if (!code) { @@ -408,8 +417,9 @@ if (adp) { tdc = afs_FindDCache(adp, 0); ObtainWriteLock(&adp->lock, 159); + ObtainSharedLock(&tdc->lock, 639); - /* afsremove releases the adp lock, and does vn_rele(avc) */ + /* afsremove releases the adp & tdc locks, and does vn_rele(avc) */ code = afsremove(adp, tdc, avc, unlname, cred, &treq); afs_PutVCache(adp, WRITE_LOCK); } else { --- src/afs/VNOPS/afs_vnop_rename.c 2001/11/01 04:40:02 1.6 +++ src/afs/VNOPS/afs_vnop_rename.c 2001/11/16 05:13:54 @@ -68,6 +68,7 @@ goto done; } oneDir = 0; + code = 0; if (andp->fid.Fid.Vnode == aodp->fid.Fid.Vnode) { if (!strcmp(aname1, aname2)) { /* Same directory and same name; this is a noop and just return success @@ -77,6 +78,13 @@ goto done; } ObtainWriteLock(&andp->lock,147); + tdc1 = afs_GetDCache(aodp, (afs_size_t) 0, &treq, &offset, &len, 0); + if (!tdc1) { + code = ENOENT; + } else { + ObtainWriteLock(&tdc1->lock, 643); + } + tdc2 = tdc1; oneDir = 1; /* only one dude locked */ } else if ((andp->states & CRO) || (aodp->states & CRO)) { @@ -86,10 +94,24 @@ else if (andp->fid.Fid.Vnode < aodp->fid.Fid.Vnode) { ObtainWriteLock(&andp->lock,148); /* lock smaller one first */ ObtainWriteLock(&aodp->lock,149); + tdc2 = afs_FindDCache(andp, 0); + if (tdc2) ObtainWriteLock(&tdc2->lock, 644); + tdc1 = afs_GetDCache(aodp, (afs_size_t) 0, &treq, &offset, &len, 0); + if (tdc1) + ObtainWriteLock(&tdc1->lock, 645); + else + code = ENOENT; } else { ObtainWriteLock(&aodp->lock,150); /* lock smaller one first */ ObtainWriteLock(&andp->lock,557); + tdc1 = afs_GetDCache(aodp, (afs_size_t) 0, &treq, &offset, &len, 0); + if (tdc1) + ObtainWriteLock(&tdc1->lock, 646); + else + code = ENOENT; + tdc2 = afs_FindDCache(andp, 0); + if (tdc2) ObtainWriteLock(&tdc2->lock, 647); } osi_dnlc_remove (aodp, aname1, 0); @@ -97,36 +119,45 @@ afs_symhint_inval(aodp); afs_symhint_inval(andp); - /* before doing the rename, lookup the fileFid, just in case we - * don't make it down the path below that looks it up. We always need - * fileFid in order to handle ".." invalidation at the very end. - */ - code = 0; - tdc1 = afs_GetDCache(aodp, (afs_size_t) 0, &treq, &offset, &len, 0); - if (!tdc1) { - code = ENOENT; - } /* * Make sure that the data in the cache is current. We may have * received a callback while we were waiting for the write lock. */ - else if (!(aodp->states & CStatd) - || !hsame(aodp->m.DataVersion, tdc1->f.versionNo)) { - ReleaseWriteLock(&aodp->lock); - if (!oneDir) ReleaseWriteLock(&andp->lock); - afs_PutDCache(tdc1); - goto tagain; + if (tdc1) { + if (!(aodp->states & CStatd) + || !hsame(aodp->m.DataVersion, tdc1->f.versionNo)) { + + ReleaseWriteLock(&aodp->lock); + if (!oneDir) { + if (tdc2) { + ReleaseWriteLock(&tdc2->lock); + afs_PutDCache(tdc2); + } + ReleaseWriteLock(&andp->lock); + } + ReleaseWriteLock(&tdc1->lock); + afs_PutDCache(tdc1); + goto tagain; + } } if (code == 0) code = afs_dir_Lookup(&tdc1->f.inode, aname1, &fileFid.Fid); if (code) { - if (tdc1) afs_PutDCache(tdc1); + if (tdc1) { + ReleaseWriteLock(&tdc1->lock); + afs_PutDCache(tdc1); + } ReleaseWriteLock(&aodp->lock); - if (!oneDir) ReleaseWriteLock(&andp->lock); + if (!oneDir) { + if (tdc2) { + ReleaseWriteLock(&tdc2->lock); + afs_PutDCache(tdc2); + } + ReleaseWriteLock(&andp->lock); + } goto done; } - afs_PutDCache(tdc1); /* locks are now set, proceed to do the real work */ do { @@ -154,16 +185,8 @@ /* Now we try to do things locally. This is really loathsome code. */ unlinkFid.Fid.Vnode = 0; if (code == 0) { - tdc1 = tdc2 = 0; - /* don't use GetDCache because we don't want to worry about what happens if - we have to stat the file (updating the stat block) before finishing - local hero stuff (which may put old (from rename) data version number - back in the cache entry). - In any event, we don't really care if the data is not + /* In any event, we don't really care if the data (tdc2) is not in the cache; if it isn't, we won't do the update locally. */ - tdc1 = afs_FindDCache(aodp, 0); - if (!oneDir) tdc2 = afs_FindDCache(andp, 0); - else tdc2 = tdc1; /* see if version numbers increased properly */ doLocally = 1; if (oneDir) { @@ -215,8 +238,6 @@ } } } - if (tdc1) afs_PutDCache(tdc1); - if ((!oneDir) && tdc2) afs_PutDCache(tdc2); /* update dir link counts */ aodp->m.LinkCount = OutOldDirStatus.LinkCount; @@ -240,6 +261,16 @@ } /* release locks */ + if (tdc1) { + ReleaseWriteLock(&tdc1->lock); + afs_PutDCache(tdc1); + } + + if ((!oneDir) && tdc2) { + ReleaseWriteLock(&tdc2->lock); + afs_PutDCache(tdc2); + } + ReleaseWriteLock(&aodp->lock); if (!oneDir) ReleaseWriteLock(&andp->lock); @@ -306,8 +337,10 @@ ObtainWriteLock(&tvc->lock,152); tdc1 = afs_FindDCache(tvc, 0); if (tdc1) { + ObtainWriteLock(&tdc1->lock, 648); ZapDCE(tdc1); /* mark as unknown */ DZap(&tdc1->f.inode); + ReleaseWriteLock(&tdc1->lock); afs_PutDCache(tdc1); /* put it back */ } osi_dnlc_remove(tvc, "..", 0); --- src/afs/VNOPS/afs_vnop_symlink.c 2001/11/01 05:24:36 1.6 +++ src/afs/VNOPS/afs_vnop_symlink.c 2001/11/16 05:13:54 @@ -112,6 +112,7 @@ tdc = afs_GetDCache(adp, (afs_size_t) 0, &treq, &offset, &len, 1); volp = afs_FindVolume(&adp->fid, READ_LOCK); /*parent is also in same vol*/ ObtainWriteLock(&adp->lock,156); + ObtainWriteLock(&tdc->lock, 636); ObtainSharedLock(&afs_xvcache,17); /* prevent others from creating this entry */ /* XXX Pay attention to afs_xvcache around the whole thing!! XXX */ do { @@ -159,7 +160,10 @@ } ReleaseWriteLock(&adp->lock); ReleaseWriteLock(&afs_xvcache); - if (tdc) afs_PutDCache(tdc); + if (tdc) { + ReleaseWriteLock(&tdc->lock); + afs_PutDCache(tdc); + } goto done; } /* otherwise, we should see if we can make the change to the dir locally */ @@ -172,6 +176,7 @@ } } if (tdc) { + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } newFid.Cell = adp->fid.Cell; @@ -248,9 +253,12 @@ if (avc->m.Mode & 0111) alen = len+1; /* regular link */ else alen = len; /* mt point */ tp = afs_osi_Alloc(alen); /* make room for terminating null */ + ObtainReadLock(&tdc->lock); addr = afs_MemCacheOpen(tdc->f.inode); tlen = len; code = afs_MemReadBlk(addr, 0, tp, tlen); + afs_MemCacheClose(addr); + ReleaseReadLock(&tdc->lock); tp[alen-1] = 0; afs_PutDCache(tdc); if (code != len) { @@ -290,6 +298,7 @@ afs_PutDCache(tdc); return EFAULT; } + ObtainReadLock(&tdc->lock); tfile = osi_UFSOpen (tdc->f.inode); if (avc->m.Mode & 0111) alen = len+1; /* regular link */ else alen = len; /* mt point */ @@ -298,6 +307,7 @@ code = afs_osi_Read(tfile, -1, tp, tlen); tp[alen-1] = 0; osi_UFSClose(tfile); + ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); if (code != tlen) { afs_osi_Free(tp, alen); --- src/afs/VNOPS/afs_vnop_write.c 2001/11/10 23:01:03 1.14 +++ src/afs/VNOPS/afs_vnop_write.c 2001/11/16 05:13:54 @@ -197,11 +197,14 @@ */ if (noLock) { tdc = afs_FindDCache(avc, filePos); + if (tdc) ObtainWriteLock(&tdc->lock, 653); } else if (afs_blocksUsed > (CM_WAITFORDRAINPCT*afs_cacheBlocks)/100) { tdc = afs_FindDCache(avc, filePos); if (tdc) { + ObtainWriteLock(&tdc->lock, 654); if (!hsame(tdc->f.versionNo, avc->m.DataVersion) || - (tdc->flags & DFFetching)) { + (tdc->dflags & DFFetching)) { + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } @@ -222,9 +225,11 @@ } avc->states |= CDirty; tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 4); + if (tdc) ObtainWriteLock(&tdc->lock, 655); } } else { tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 4); + if (tdc) ObtainWriteLock(&tdc->lock, 656); } if (!tdc) { error = EIO; @@ -237,7 +242,7 @@ if (!(tdc->f.states & DWriting)) { /* don't mark entry as mod if we don't have to */ tdc->f.states |= DWriting; - tdc->flags |= DFEntryMod; + tdc->dflags |= DFEntryMod; } len = totalLength; /* write this amount by default */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); @@ -260,6 +265,7 @@ afs_MemCacheTruncate(tdc->f.inode, 0); afs_stats_cmperf.cacheCurrDirtyChunks--; afs_indexFlags[tdc->index] &= ~IFDataMod; /* so it does disappear */ + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); break; } @@ -297,11 +303,13 @@ code = afs_DoPartialWrite(avc, &treq); if (code) { error = code; + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); break; } } #endif + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } #ifndef AFS_VM_RDWR_ENV @@ -429,11 +437,14 @@ /* read the cached info */ if (noLock) { tdc = afs_FindDCache(avc, filePos); + if (tdc) ObtainWriteLock(&tdc->lock, 657); } else if (afs_blocksUsed > (CM_WAITFORDRAINPCT*afs_cacheBlocks)/100) { tdc = afs_FindDCache(avc, filePos); if (tdc) { + ObtainWriteLock(&tdc->lock, 658); if (!hsame(tdc->f.versionNo, avc->m.DataVersion) || - (tdc->flags & DFFetching)) { + (tdc->dflags & DFFetching)) { + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } @@ -454,9 +465,11 @@ } avc->states |= CDirty; tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 4); + if (tdc) ObtainWriteLock(&tdc->lock, 659); } } else { tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 4); + if (tdc) ObtainWriteLock(&tdc->lock, 660); } if (!tdc) { error = EIO; @@ -469,7 +482,7 @@ if (!(tdc->f.states & DWriting)) { /* don't mark entry as mod if we don't have to */ tdc->f.states |= DWriting; - tdc->flags |= DFEntryMod; + tdc->dflags |= DFEntryMod; } tfile = (struct osi_file *)osi_UFSOpen(tdc->f.inode); len = totalLength; /* write this amount by default */ @@ -563,8 +576,9 @@ afs_AdjustSize(tdc, 0); /* sets f.chunkSize to 0 */ afs_stats_cmperf.cacheCurrDirtyChunks--; afs_indexFlags[tdc->index] &= ~IFDataMod; /* so it does disappear */ - afs_PutDCache(tdc); afs_CFileClose(tfile); + ReleaseWriteLock(&tdc->lock); + afs_PutDCache(tdc); break; } /* otherwise we've written some, fixup length, etc and continue with next seg */ @@ -602,11 +616,13 @@ code = afs_DoPartialWrite(avc, &treq); if (code) { error = code; + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); break; } } #endif + ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } #ifndef AFS_VM_RDWR_ENV --- src/venus/kdump.c 2001/11/08 08:12:59 1.21 +++ src/venus/kdump.c 2001/11/16 05:13:56 @@ -2643,8 +2643,8 @@ if (!pnt) return; printf("%lx: ", dp); print_venusfid(" fid", &dcp->f.fid); - printf("refcnt=%d, flags=%x, validPos=%d\n", - dcp->refCount, dcp->flags, dcp->validPos); + printf("refcnt=%d, dflags=%x, mflags=%x, validPos=%d\n", + dcp->refCount, dcp->dflags, dcp->mflags, dcp->validPos); #ifdef AFS33 printf("\tf.modtime=%d, f.versNo=%d.%d\n", @@ -3334,7 +3334,7 @@ kread(kmem, (off_t) sep, (char *)sentry, sizeof *sentry); printf("\t%lx: next=0x%lx, peer=0x%lx, epoch=0x%x, cid=0x%x, ackRate=%d\n", sep, se.next, se.peer, se.epoch, se.cid, se.ackRate); - printf("\t\tcall[%x=%d, %x=%d, %x=%d, %x=%d]\n", + printf("\t\tcall[%lx=%d, %lx=%d, %lx=%d, %lx=%d]\n", se.call[0], se.callNumber[0], se.call[1], se.callNumber[1], se.call[2], se.callNumber[2],