//===- StringHash.h -------------------------------------------------------===//
//
// The MCLinker Project
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef MCLD_STRING_HASH_FUNCTION_H
#define MCLD_STRING_HASH_FUNCTION_H
#ifdef ENABLE_UNITTEST
#include <gtest.h>
#endif
#include <llvm/ADT/StringRef.h>
#include <llvm/Support/DataTypes.h>
#include <llvm/Support/ErrorHandling.h>
#include <cctype>
#include <functional>
namespace mcld {
namespace hash {
enum Type {
RS,
JS,
PJW,
ELF,
BKDR,
SDBM,
DJB,
DEK,
BP,
FNV,
AP,
ES
};
/** \class template<uint32_t TYPE> StringHash
* \brief the template StringHash class, for specification
*/
template<uint32_t TYPE>
struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
llvm::report_fatal_error("Undefined StringHash function.\n");
}
};
/** \class StringHash<RSHash>
* \brief RS StringHash funciton
*/
template<>
struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
const unsigned int b = 378551;
uint32_t a = 63689;
uint32_t hash_val = 0;
for(unsigned int i = 0; i < pKey.size(); ++i) {
hash_val = hash_val * a + pKey[i];
a = a * b;
}
return hash_val;
}
};
/** \class StringHash<JSHash>
* \brief JS hash funciton
*/
template<>
struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
uint32_t hash_val = 1315423911;
for(unsigned int i = 0; i < pKey.size(); ++i) {
hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
}
return hash_val;
}
};
/** \class StringHash<PJW>
* \brief P.J. Weinberger hash function
*/
template<>
struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
const unsigned int ThreeQuarters = (unsigned int)((BitsInUnsignedInt * 3) / 4);
const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8);
const unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
uint32_t hash_val = 0;
uint32_t test = 0;
for(unsigned int i = 0; i < pKey.size(); ++i) {
hash_val = (hash_val << OneEighth) + pKey[i];
if((test = hash_val & HighBits) != 0) {
hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
}
}
return hash_val;
}
};
/** \class StringHash<ELF>
* \brief ELF hash function.
*/
template<>
struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
uint32_t hash_val = 0;
uint32_t x = 0;
for (unsigned int i = 0; i < pKey.size(); ++i) {
hash_val = (hash_val << 4) + pKey[i];
if((x = hash_val & 0xF0000000L) != 0)
hash_val ^= (x >> 24);
hash_val &= ~x;
}
return hash_val;
}
};
/** \class StringHash<BKDR>
* \brief BKDR hash function
*/
template<>
struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
const uint32_t seed = 131;
uint32_t hash_val = 0;
for(uint32_t i = 0; i < pKey.size(); ++i)
hash_val = (hash_val * seed) + pKey[i];
return hash_val;
}
};
/** \class StringHash<SDBM>
* \brief SDBM hash function
* 0.049s in 100000 test
*/
template<>
struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
uint32_t hash_val = 0;
for(uint32_t i = 0; i < pKey.size(); ++i)
hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
return hash_val;
}
};
/** \class StringHash<DJB>
* \brief DJB hash function
* 0.057s in 100000 test
*/
template<>
struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
uint32_t hash_val = 5381;
for(uint32_t i = 0; i < pKey.size(); ++i)
hash_val = ((hash_val << 5) + hash_val) + pKey[i];
return hash_val;
}
};
/** \class StringHash<DEK>
* \brief DEK hash function
* 0.60s
*/
template<>
struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
uint32_t hash_val = pKey.size();
for(uint32_t i = 0; i < pKey.size(); ++i)
hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
return hash_val;
}
};
/** \class StringHash<BP>
* \brief BP hash function
* 0.057s
*/
template<>
struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
uint32_t hash_val = 0;
for(uint32_t i = 0; i < pKey.size(); ++i)
hash_val = hash_val << 7 ^ pKey[i];
return hash_val;
}
};
/** \class StringHash<FNV>
* \brief FNV hash function
* 0.058s
*/
template<>
struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
const uint32_t fnv_prime = 0x811C9DC5;
uint32_t hash_val = 0;
for(uint32_t i = 0; i < pKey.size(); ++i) {
hash_val *= fnv_prime;
hash_val ^= pKey[i];
}
return hash_val;
}
};
/** \class StringHash<AP>
* \brief AP hash function
* 0.060s
*/
template<>
struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pKey) const
{
unsigned int hash_val = 0xAAAAAAAA;
for(uint32_t i = 0; i < pKey.size(); ++i) {
hash_val ^= ((i & 1) == 0)?
((hash_val << 7) ^ pKey[i] * (hash_val >> 3)):
(~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
}
return hash_val;
}
};
/** \class StringHash<ES>
* \brief This is a revision of Edward Sayers' string characteristic function.
*
* 31-28 27 26 25 - 0
* +----+---+---+------------+
* | . | N | - | a/A ~ z/Z |
* +----+---+---+------------+
*
* . (bit 31~28) - The number of '.' characters
* N (bit 27) - Are there any numbers in the string
* - (bit 26) - Does the string have '-' character
* bit 25~0 - Bit 25 is set only if the string contains a 'a' or 'A', and
* Bit 24 is set only if ... 'b' or 'B', ...
*/
template<>
struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
uint32_t operator()(const llvm::StringRef& pString) const
{
uint32_t result = 0x0;
unsigned int dot = 0;
std::string::size_type idx;
for (idx = 0; idx < pString.size(); ++idx) {
int cur_char = pString[idx];
if ('.' == cur_char) {
++dot;
continue;
}
if (isdigit(cur_char)) {
result |= (1 << 27);
continue;
}
if ('_' == cur_char) {
result |= (1 << 26);
continue;
}
if (isupper(cur_char)) {
result |= (1 << (cur_char - 'A'));
continue;
}
if (islower(cur_char)) {
result |= (1 << (cur_char - 'a'));
continue;
}
}
result |= (dot << 28);
return result;
}
/** \func may_include
* \brief is it possible that pRule is a sub-string of pInString
*/
static bool may_include(uint32_t pRule, uint32_t pInString)
{
uint32_t in_c = pInString << 4;
uint32_t r_c = pRule << 4;
uint32_t res = (in_c ^ r_c) & r_c;
if (0 != res)
return false;
uint32_t in_dot = pInString >> 28;
uint32_t r_dot = pRule >> 28;
if (r_dot > in_dot)
return false;
return true;
}
};
/** \class template<uint32_t TYPE> StringCompare
* \brief the template StringCompare class, for specification
*/
template<typename STRING_TYPE>
struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
{
bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
{ return X == Y; }
};
template<>
struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
{
bool operator()(const char* X, const char* Y) const
{ return (0 == std::strcmp(X, Y)); }
};
template<>
struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
{
bool operator()(const char* X, const char* Y) const
{ return (0 == std::strcmp(X, Y)); }
};
} // namespace of hash
} // namespace of mcld
#endif