/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Mutex-free cache for key1+key2=value.
*/
#ifndef _DALVIK_ATOMICCACHE
#define _DALVIK_ATOMICCACHE
/*
* If set to "1", gather some stats on our caching success rate.
*/
#define CALC_CACHE_STATS 0
/*
* One entry in the cache. We store two keys (e.g. the classes that are
* arguments to "instanceof") and one result (e.g. a boolean value).
*
* Must be exactly 16 bytes.
*/
typedef struct AtomicCacheEntry {
u4 key1;
u4 key2;
u4 value;
volatile u4 version; /* version and lock flag */
} AtomicCacheEntry;
/*
* One cache.
*
* Thought: we might be able to save a few cycles by storing the cache
* struct and "entries" separately, avoiding an indirection. (We already
* handle "numEntries" separately in ATOMIC_CACHE_LOOKUP.)
*/
typedef struct AtomicCache {
AtomicCacheEntry* entries; /* array of entries */
int numEntries; /* #of entries, must be power of 2 */
void* entryAlloc; /* memory allocated for entries */
/* cache stats; note we don't guarantee atomic increments for these */
int trivial; /* cache access not required */
int fail; /* contention failure */
int hits; /* found entry in cache */
int misses; /* entry was for other keys */
int fills; /* entry was empty */
} AtomicCache;
/*
* Do a cache lookup. We need to be able to read and write entries
* atomically. There are a couple of ways to do this:
* (1) Have a global lock. A mutex is too heavy, so instead we would use
* an atomic flag. If the flag is set, we could sit and spin,
* but if we're a high-priority thread that may cause a lockup.
* Better to just ignore the cache and do the full computation.
* (2) Have a "version" that gets incremented atomically when a write
* begins and again when it completes. Compare the version before
* and after doing reads. So long as we have memory barriers in the
* right place the compiler and CPU will do the right thing, allowing
* us to skip atomic ops in the common read case. The table updates
* are expensive, requiring two writes with barriers. We also need
* some sort of lock to ensure that nobody else tries to start an
* update while we're in the middle of one.
*
* We expect a 95+% hit rate for the things we use this for, so #2 is
* much better than #1.
*
* _cache is an AtomicCache*
* _cacheSize is _cache->cacheSize (can save a cycle avoiding the lookup)
* _key1, _key2 are the keys
*
* Define a function ATOMIC_CACHE_CALC that returns a 32-bit value. This
* will be invoked when we need to compute the value.
*
* Returns the value.
*/
#if CALC_CACHE_STATS > 0
# define CACHE_XARG(_value) ,_value
#else
# define CACHE_XARG(_value)
#endif
#define ATOMIC_CACHE_LOOKUP(_cache, _cacheSize, _key1, _key2) ({ \
AtomicCacheEntry* pEntry; \
int hash; \
u4 firstVersion, secondVersion; \
u4 value; \
\
/* simple hash function */ \
hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1); \
pEntry = (_cache)->entries + hash; \
\
firstVersion = android_atomic_acquire_load((int32_t*)&pEntry->version); \
\
if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) { \
/* \
* The fields match. Get the value, then read the version a \
* second time to verify that we didn't catch a partial update. \
* We're also hosed if "firstVersion" was odd, indicating that \
* an update was in progress before we got here (unlikely). \
*/ \
value = android_atomic_acquire_load((int32_t*) &pEntry->value); \
secondVersion = pEntry->version; \
\
if ((firstVersion & 0x01) != 0 || firstVersion != secondVersion) { \
/* \
* We clashed with another thread. Instead of sitting and \
* spinning, which might not complete if we're a high priority \
* thread, just do the regular computation. \
*/ \
if (CALC_CACHE_STATS) \
(_cache)->fail++; \
value = (u4) ATOMIC_CACHE_CALC; \
} else { \
/* all good */ \
if (CALC_CACHE_STATS) \
(_cache)->hits++; \
} \
} else { \
/* \
* Compute the result and update the cache. We really want this \
* to happen in a different method -- it makes the ARM frame \
* setup for this method simpler, which gives us a ~10% speed \
* boost. \
*/ \
value = (u4) ATOMIC_CACHE_CALC; \
dvmUpdateAtomicCache((u4) (_key1), (u4) (_key2), value, pEntry, \
firstVersion CACHE_XARG(_cache) ); \
} \
value; \
})
/*
* Allocate a cache.
*/
AtomicCache* dvmAllocAtomicCache(int numEntries);
/*
* Free a cache.
*/
void dvmFreeAtomicCache(AtomicCache* cache);
/*
* Update a cache entry.
*
* Making the last argument optional, instead of merely unused, saves us
* a few percent in the ATOMIC_CACHE_LOOKUP time.
*/
void dvmUpdateAtomicCache(u4 key1, u4 key2, u4 value, AtomicCacheEntry* pEntry,
u4 firstVersion
#if CALC_CACHE_STATS > 0
, AtomicCache* pCache
#endif
);
/*
* Debugging.
*/
void dvmDumpAtomicCacheStats(const AtomicCache* pCache);
#endif /*_DALVIK_ATOMICCACHE*/