C++程序  |  467行  |  17.07 KB

// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (c) 2001-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   08/10/2001  aliu        Creation.
**********************************************************************
*/
#ifndef _TRANSREG_H
#define _TRANSREG_H

#include "unicode/utypes.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/uobject.h"
#include "unicode/translit.h"
#include "hash.h"
#include "uvector.h"

U_NAMESPACE_BEGIN

class TransliteratorEntry;
class TransliteratorSpec;
class UnicodeString;

//------------------------------------------------------------------
// TransliteratorAlias
//------------------------------------------------------------------

/**
 * A TransliteratorAlias object is returned by get() if the given ID
 * actually translates into something else.  The caller then invokes
 * the create() method on the alias to create the actual
 * transliterator, and deletes the alias.
 *
 * Why all the shenanigans?  To prevent circular calls between
 * the registry code and the transliterator code that deadlocks.
 */
class TransliteratorAlias : public UMemory {
 public:
    /**
     * Construct a simple alias (type == SIMPLE)
     * @param aliasID the given id.
     */
    TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);

    /**
     * Construct a compound RBT alias (type == COMPOUND)
     */
    TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
                        UVector* adoptedTransliterators,
                        const UnicodeSet* compoundFilter);

    /**
     * Construct a rules alias (type = RULES)
     */
    TransliteratorAlias(const UnicodeString& theID,
                        const UnicodeString& rules,
                        UTransDirection dir);

    ~TransliteratorAlias();

    /**
     * The whole point of create() is that the caller must invoke
     * it when the registry mutex is NOT held, to prevent deadlock.
     * It may only be called once.
     *
     * Note: Only call create() if isRuleBased() returns FALSE.
     *
     * This method must be called *outside* of the TransliteratorRegistry
     * mutex.
     */
    Transliterator* create(UParseError&, UErrorCode&);

    /**
     * Return TRUE if this alias is rule-based.  If so, the caller
     * must call parse() on it, then call TransliteratorRegistry::reget().
     */
    UBool isRuleBased() const;

    /**
     * If isRuleBased() returns TRUE, then the caller must call this
     * method, followed by TransliteratorRegistry::reget().  The latter
     * method must be called inside the TransliteratorRegistry mutex.
     *
     * Note: Only call parse() if isRuleBased() returns TRUE.
     *
     * This method must be called *outside* of the TransliteratorRegistry
     * mutex, because it can instantiate Transliterators embedded in
     * the rules via the "&Latin-Arabic()" syntax.
     */
    void parse(TransliteratorParser& parser,
               UParseError& pe, UErrorCode& ec) const;

 private:
    // We actually come in three flavors:
    // 1. Simple alias
    //    Here aliasID is the alias string.  Everything else is
    //    null, zero, empty.
    // 2. CompoundRBT
    //    Here ID is the ID, aliasID is the idBlock, trans is the
    //    contained RBT, and idSplitPoint is the offet in aliasID
    //    where the contained RBT goes.  compoundFilter is the
    //    compound filter, and it is _not_ owned.
    // 3. Rules
    //    Here ID is the ID, aliasID is the rules string.
    //    idSplitPoint is the UTransDirection.
    UnicodeString ID;
    UnicodeString aliasesOrRules;
    UVector* transes; // owned
    const UnicodeSet* compoundFilter; // alias
    UTransDirection direction;
    enum { SIMPLE, COMPOUND, RULES } type;

    TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
    TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
};


/**
 * A registry of system transliterators.  This is the data structure
 * that implements the mapping between transliterator IDs and the data
 * or function pointers used to create the corresponding
 * transliterators.  There is one instance of the registry that is
 * created statically.
 *
 * The registry consists of a dynamic component -- a hashtable -- and
 * a static component -- locale resource bundles.  The dynamic store
 * is semantically overlaid on the static store, so the static mapping
 * can be dynamically overridden.
 *
 * This is an internal class that is only used by Transliterator.
 * Transliterator maintains one static instance of this class and
 * delegates all registry-related operations to it.
 *
 * @author Alan Liu
 */
class TransliteratorRegistry : public UMemory {

 public:

    /**
     * Contructor
     * @param status Output param set to success/failure code.
     */
    TransliteratorRegistry(UErrorCode& status);

    /**
     * Nonvirtual destructor -- this class is not subclassable.
     */
    ~TransliteratorRegistry();

    //------------------------------------------------------------------
    // Basic public API
    //------------------------------------------------------------------

    /**
     * Given a simple ID (forward direction, no inline filter, not
     * compound) attempt to instantiate it from the registry.  Return
     * 0 on failure.
     *
     * Return a non-NULL aliasReturn value if the ID points to an alias.
     * We cannot instantiate it ourselves because the alias may contain
     * filters or compounds, which we do not understand.  Caller should
     * make aliasReturn NULL before calling.
     * @param ID          the given ID
     * @param aliasReturn output param to receive TransliteratorAlias;
     *                    should be NULL on entry
     * @param parseError  Struct to recieve information on position
     *                    of error if an error is encountered
     * @param status      Output param set to success/failure code.
     */
    Transliterator* get(const UnicodeString& ID,
                        TransliteratorAlias*& aliasReturn,
                        UErrorCode& status);

    /**
     * The caller must call this after calling get(), if [a] calling get()
     * returns an alias, and [b] the alias is rule based.  In that
     * situation the caller must call alias->parse() to do the parsing
     * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
     * instantiating the transliterator.
     *
     * Note: Another alias might be returned by this method.
     *
     * This method (like all public methods of this class) must be called
     * from within the TransliteratorRegistry mutex.
     *
     * @param aliasReturn output param to receive TransliteratorAlias;
     *                    should be NULL on entry
     */
    Transliterator* reget(const UnicodeString& ID,
                          TransliteratorParser& parser,
                          TransliteratorAlias*& aliasReturn,
                          UErrorCode& status);

    /**
     * Register a prototype (adopted).  This adds an entry to the
     * dynamic store, or replaces an existing entry.  Any entry in the
     * underlying static locale resource store is masked.
     */
    void put(Transliterator* adoptedProto,
             UBool visible,
             UErrorCode& ec);

    /**
     * Register an ID and a factory function pointer.  This adds an
     * entry to the dynamic store, or replaces an existing entry.  Any
     * entry in the underlying static locale resource store is masked.
     */
    void put(const UnicodeString& ID,
             Transliterator::Factory factory,
             Transliterator::Token context,
             UBool visible,
             UErrorCode& ec);

    /**
     * Register an ID and a resource name.  This adds an entry to the
     * dynamic store, or replaces an existing entry.  Any entry in the
     * underlying static locale resource store is masked.
     */
    void put(const UnicodeString& ID,
             const UnicodeString& resourceName,
             UTransDirection dir,
             UBool readonlyResourceAlias,
             UBool visible,
             UErrorCode& ec);

    /**
     * Register an ID and an alias ID.  This adds an entry to the
     * dynamic store, or replaces an existing entry.  Any entry in the
     * underlying static locale resource store is masked.
     */
    void put(const UnicodeString& ID,
             const UnicodeString& alias,
             UBool readonlyAliasAlias,
             UBool visible,
             UErrorCode& ec);

    /**
     * Unregister an ID.  This removes an entry from the dynamic store
     * if there is one.  The static locale resource store is
     * unaffected.
     * @param ID    the given ID.
     */
    void remove(const UnicodeString& ID);

    //------------------------------------------------------------------
    // Public ID and spec management
    //------------------------------------------------------------------

    /**
     * Return a StringEnumeration over the IDs currently registered
     * with the system.
     * @internal
     */
    StringEnumeration* getAvailableIDs() const;

    /**
     * == OBSOLETE - remove in ICU 3.4 ==
     * Return the number of IDs currently registered with the system.
     * To retrieve the actual IDs, call getAvailableID(i) with
     * i from 0 to countAvailableIDs() - 1.
     * @return the number of IDs currently registered with the system.
     * @internal
     */
    int32_t countAvailableIDs(void) const;

    /**
     * == OBSOLETE - remove in ICU 3.4 ==
     * Return the index-th available ID.  index must be between 0
     * and countAvailableIDs() - 1, inclusive.  If index is out of
     * range, the result of getAvailableID(0) is returned.
     * @param index the given index.
     * @return the index-th available ID.  index must be between 0
     *         and countAvailableIDs() - 1, inclusive.  If index is out of
     *         range, the result of getAvailableID(0) is returned.
     * @internal
     */
    const UnicodeString& getAvailableID(int32_t index) const;

    /**
     * Return the number of registered source specifiers.
     * @return the number of registered source specifiers.
     */
    int32_t countAvailableSources(void) const;

    /**
     * Return a registered source specifier.
     * @param index which specifier to return, from 0 to n-1, where
     * n = countAvailableSources()
     * @param result fill-in paramter to receive the source specifier.
     * If index is out of range, result will be empty.
     * @return reference to result
     */
    UnicodeString& getAvailableSource(int32_t index,
                                      UnicodeString& result) const;

    /**
     * Return the number of registered target specifiers for a given
     * source specifier.
     * @param source the given source specifier.
     * @return the number of registered target specifiers for a given
     *         source specifier.
     */
    int32_t countAvailableTargets(const UnicodeString& source) const;

    /**
     * Return a registered target specifier for a given source.
     * @param index which specifier to return, from 0 to n-1, where
     * n = countAvailableTargets(source)
     * @param source the source specifier
     * @param result fill-in paramter to receive the target specifier.
     * If source is invalid or if index is out of range, result will
     * be empty.
     * @return reference to result
     */
    UnicodeString& getAvailableTarget(int32_t index,
                                      const UnicodeString& source,
                                      UnicodeString& result) const;

    /**
     * Return the number of registered variant specifiers for a given
     * source-target pair.  There is always at least one variant: If
     * just source-target is registered, then the single variant
     * NO_VARIANT is returned.  If source-target/variant is registered
     * then that variant is returned.
     * @param source the source specifiers
     * @param target the target specifiers
     * @return the number of registered variant specifiers for a given
     *         source-target pair.
     */
    int32_t countAvailableVariants(const UnicodeString& source,
                                   const UnicodeString& target) const;

    /**
     * Return a registered variant specifier for a given source-target
     * pair.  If NO_VARIANT is one of the variants, then it will be
     * at index 0.
     * @param index which specifier to return, from 0 to n-1, where
     * n = countAvailableVariants(source, target)
     * @param source the source specifier
     * @param target the target specifier
     * @param result fill-in paramter to receive the variant
     * specifier.  If source is invalid or if target is invalid or if
     * index is out of range, result will be empty.
     * @return reference to result
     */
    UnicodeString& getAvailableVariant(int32_t index,
                                       const UnicodeString& source,
                                       const UnicodeString& target,
                                       UnicodeString& result) const;

 private:

    //----------------------------------------------------------------
    // Private implementation
    //----------------------------------------------------------------

    TransliteratorEntry* find(const UnicodeString& ID);

    TransliteratorEntry* find(UnicodeString& source,
                UnicodeString& target,
                UnicodeString& variant);

    TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
                              const TransliteratorSpec& trg,
                              const UnicodeString& variant) const;

    TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
                             const TransliteratorSpec& trg,
                             const UnicodeString& variant);

    static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
                               const TransliteratorSpec& specToFind,
                               const UnicodeString& variant,
                               UTransDirection direction);

    void registerEntry(const UnicodeString& source,
                       const UnicodeString& target,
                       const UnicodeString& variant,
                       TransliteratorEntry* adopted,
                       UBool visible);

    void registerEntry(const UnicodeString& ID,
                       TransliteratorEntry* adopted,
                       UBool visible);

    void registerEntry(const UnicodeString& ID,
                       const UnicodeString& source,
                       const UnicodeString& target,
                       const UnicodeString& variant,
                       TransliteratorEntry* adopted,
                       UBool visible);

    void registerSTV(const UnicodeString& source,
                     const UnicodeString& target,
                     const UnicodeString& variant);

    void removeSTV(const UnicodeString& source,
                   const UnicodeString& target,
                   const UnicodeString& variant);

    Transliterator* instantiateEntry(const UnicodeString& ID,
                                     TransliteratorEntry *entry,
                                     TransliteratorAlias*& aliasReturn,
                                     UErrorCode& status);

    /**
     * A StringEnumeration over the registered IDs in this object.
     */
    class Enumeration : public StringEnumeration {
    public:
        Enumeration(const TransliteratorRegistry& reg);
        virtual ~Enumeration();
        virtual int32_t count(UErrorCode& status) const;
        virtual const UnicodeString* snext(UErrorCode& status);
        virtual void reset(UErrorCode& status);
        static UClassID U_EXPORT2 getStaticClassID();
        virtual UClassID getDynamicClassID() const;
    private:
        int32_t index;
        const TransliteratorRegistry& reg;
    };
    friend class Enumeration;

 private:

    /**
     * Dynamic registry mapping full IDs to Entry objects.  This
     * contains both public and internal entities.  The visibility is
     * controlled by whether an entry is listed in availableIDs and
     * specDAG or not.
     */
    Hashtable registry;

    /**
     * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
     * target => (UVector: variant)) The UVector of variants is never
     * empty.  For a source-target with no variant, the special
     * variant NO_VARIANT (the empty string) is stored in slot zero of
     * the UVector.
     */
    Hashtable specDAG;

    /**
     * Vector of public full IDs.
     */
    UVector availableIDs;

    TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
    TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
};

U_NAMESPACE_END

U_CFUNC UBool utrans_transliterator_cleanup(void);

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

#endif
//eof