C++程序  |  368行  |  8.82 KB

//===- StringHash.h -------------------------------------------------------===//
//
//                     The MCLinker Project
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef MCLD_STRING_HASH_FUNCTION_H
#define MCLD_STRING_HASH_FUNCTION_H
#ifdef ENABLE_UNITTEST
#include <gtest.h>
#endif
#include <llvm/ADT/StringRef.h>
#include <llvm/Support/DataTypes.h>
#include <llvm/Support/ErrorHandling.h>
#include <cctype>
#include <functional>

namespace mcld {
namespace hash {

enum Type {
  RS,
  JS,
  PJW,
  ELF,
  BKDR,
  SDBM,
  DJB,
  DEK,
  BP,
  FNV,
  AP,
  ES
};

/** \class template<uint32_t TYPE> StringHash
 *  \brief the template StringHash class, for specification
 */
template<uint32_t TYPE>
struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    llvm::report_fatal_error("Undefined StringHash function.\n");
  }
};

/** \class StringHash<RSHash>
 *  \brief RS StringHash funciton
 */
template<>
struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    const unsigned int b = 378551;
    uint32_t a = 63689;
    uint32_t hash_val = 0;

    for(unsigned int i = 0; i < pKey.size(); ++i) {
      hash_val = hash_val * a + pKey[i];
      a = a * b;
    }
    return hash_val;
  }
};

/** \class StringHash<JSHash>
 *  \brief JS hash funciton
 */
template<>
struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    uint32_t hash_val = 1315423911;

    for(unsigned int i = 0; i < pKey.size(); ++i) {
       hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
    } 
    return hash_val;
  }
};

/** \class StringHash<PJW>
 *  \brief P.J. Weinberger hash function
 */
template<>
struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
    const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
    const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
    const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
    uint32_t hash_val = 0;
    uint32_t test = 0;

    for(unsigned int i = 0; i < pKey.size(); ++i) {
      hash_val = (hash_val << OneEighth) + pKey[i];

      if((test = hash_val & HighBits) != 0) {
        hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
      }
    }
    return hash_val;
  }
};

/** \class StringHash<ELF>
 *  \brief ELF hash function.
 */
template<>
struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    uint32_t hash_val = 0;
    uint32_t x = 0;

    for (unsigned int i = 0; i < pKey.size(); ++i) {
      hash_val = (hash_val << 4) + pKey[i];
      if((x = hash_val & 0xF0000000L) != 0)
        hash_val ^= (x >> 24); 
      hash_val &= ~x;
    }
    return hash_val;
  }
};

/** \class StringHash<BKDR>
 *  \brief BKDR hash function
 */
template<>
struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    const uint32_t seed = 131;
    uint32_t hash_val = 0;
      
    for(uint32_t i = 0; i < pKey.size(); ++i)
      hash_val = (hash_val * seed) + pKey[i];
    return hash_val;
  }
};


/** \class StringHash<SDBM>
 *  \brief SDBM hash function
 *  0.049s in 100000 test
 */
template<>
struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    uint32_t hash_val = 0;

    for(uint32_t i = 0; i < pKey.size(); ++i)
      hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
    return hash_val;
  }
};

/** \class StringHash<DJB>
 *  \brief DJB hash function
 *  0.057s in 100000 test
 */
template<>
struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    uint32_t hash_val = 5381;

    for(uint32_t i = 0; i < pKey.size(); ++i)
      hash_val = ((hash_val << 5) + hash_val) + pKey[i];

    return hash_val;
  }
};

/** \class StringHash<DEK>
 *  \brief DEK hash function
 *  0.60s
 */
template<>
struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    uint32_t hash_val = pKey.size();

    for(uint32_t i = 0; i < pKey.size(); ++i)
      hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];

    return hash_val;
  }
};

/** \class StringHash<BP>
 *  \brief BP hash function
 *  0.057s
 */
template<>
struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    uint32_t hash_val = 0;
    for(uint32_t i = 0; i < pKey.size(); ++i)
      hash_val = hash_val << 7 ^ pKey[i];

    return hash_val;
  }
};

/** \class StringHash<FNV>
 *  \brief FNV hash function
 *  0.058s
 */
template<>
struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    const uint32_t fnv_prime = 0x811C9DC5;
    uint32_t hash_val = 0;
    for(uint32_t i = 0; i < pKey.size(); ++i) {
      hash_val *= fnv_prime;
      hash_val ^= pKey[i];
    }

    return hash_val;
  }
};

/** \class StringHash<AP>
 *  \brief AP hash function
 *  0.060s
 */
template<>
struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pKey) const
  {
    unsigned int hash_val = 0xAAAAAAAA;
   
    for(uint32_t i = 0; i < pKey.size(); ++i) {  
      hash_val ^= ((i & 1) == 0)?
                          ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
                          (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
    }
   
    return hash_val;
  }
};

/** \class StringHash<ES>
 *  \brief This is a revision of Edward Sayers' string characteristic function.
 *
 *  31-28  27  26  25   -   0
 *  +----+---+---+------------+
 *  | .  | N | - | a/A  ~ z/Z |
 *  +----+---+---+------------+
 *
 *  . (bit 31~28) - The number of '.' characters
 *  N (bit 27)    - Are there any numbers in the string
 *  - (bit 26)    - Does the string have '-' character
 *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
 *                  Bit 24 is set only if ...                   'b' or 'B', ...
 */
template<>
struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
{
  uint32_t operator()(const llvm::StringRef& pString) const
  {
    uint32_t result = 0x0;
    unsigned int dot = 0;
    std::string::size_type idx;
    for (idx = 0; idx < pString.size(); ++idx) {
      int cur_char = pString[idx];

      if ('.' == cur_char) {
        ++dot;
        continue;
      }

      if (isdigit(cur_char)) {
        result |= (1 << 27);
        continue;
      }

      if ('_' == cur_char) {
        result |= (1 << 26);
        continue;
      }

      if (isupper(cur_char)) {
        result |= (1 << (cur_char - 'A'));
        continue;
      }

      if (islower(cur_char)) {
        result |= (1 << (cur_char - 'a'));
        continue;
      }
    }
    result |= (dot << 28);
    return result;
  }


  /** \func may_include
   *  \brief is it possible that pRule is a sub-string of pInString
   */
  static bool may_include(uint32_t pRule, uint32_t pInString)
  {
    uint32_t in_c = pInString << 4;
    uint32_t r_c  = pRule << 4;

    uint32_t res = (in_c ^ r_c) & r_c;
    if (0 != res)
      return false;

    uint32_t in_dot = pInString >> 28;
    uint32_t r_dot  = pRule >> 28;
    if (r_dot > in_dot)
      return false;

    return true;
  }
};

/** \class template<uint32_t TYPE> StringCompare
 *  \brief the template StringCompare class, for specification
 */
template<typename STRING_TYPE>
struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
{
  bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
  { return X == Y; }
};

template<>
struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
{
  bool operator()(const char* X, const char* Y) const
  { return (0 == std::strcmp(X, Y)); }
};

template<>
struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
{
  bool operator()(const char* X, const char* Y) const
  { return (0 == std::strcmp(X, Y)); }
};

} // namespace of hash
} // namespace of mcld

#endif