// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
#define CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
#include <map>
#include <stdio.h>
#include <string>
#include <vector>
namespace base {
class FilePath;
}
namespace convert_dict {
class AffReader {
public:
explicit AffReader(const base::FilePath& path);
~AffReader();
bool Read();
// Returns whether this file uses indexed affixes, or, on false, whether the
// rule string will be specified literally in the .dic file. This must be
// called after Read().
bool has_indexed_affixes() const { return has_indexed_affixes_; }
// Returns a string representing the encoding of the dictionary. This will
// default to ISO-8859-1 if the .aff file does not specify it.
const char* encoding() const { return encoding_.c_str(); }
// Converts the given string from the file encoding to UTF-8, returning true
// on success.
bool EncodingToUTF8(const std::string& encoded, std::string* utf8) const;
// Adds a new affix string, returning the index. If it already exists, returns
// the index of the existing one. This is used to convert .dic files which
// list the
// You must not call this until after Read();
int GetAFIndexForAFString(const std::string& af_string);
// Getters for the computed data.
const std::string& comments() const { return intro_comment_; }
const std::vector<std::string>& affix_rules() const { return affix_rules_; }
const std::vector< std::pair<std::string, std::string> >&
replacements() const {
return replacements_;
}
const std::vector<std::string>& other_commands() const {
return other_commands_;
}
// Returns the affix groups ("AF" lines) for this file. The indices into this
// are 1-based, but we don't use the 0th item, so lookups will have to
// subtract one to get the index. This is how hunspell stores this data.
std::vector<std::string> GetAffixGroups() const;
private:
// Command-specific handlers. These are given the string folling the
// command. The input rule may be modified arbitrarily by the function.
int AddAffixGroup(std::string* rule); // Returns the new affix group ID.
void AddAffix(std::string* rule); // SFX/PFX
void AddReplacement(std::string* rule);
// void HandleFlag(std::string* rule);
// Used to handle "other" commands. The "raw" just saves the line as-is.
// The "encoded" version converts the line to UTF-8 and saves it.
void HandleRawCommand(const std::string& line);
void HandleEncodedCommand(const std::string& line);
FILE* file_;
// Comments from the beginning of the file. This is everything before the
// first command. We want to store this since it often contains the copyright
// information.
std::string intro_comment_;
// Encoding of the source words.
std::string encoding_;
// Affix rules. These are populated by "AF" commands. The .dic file can refer
// to these by index. They are indexed by their string value (the list of
// characters representing rules), and map to the numeric affix IDs.
//
// These can also be added using GetAFIndexForAFString.
std::map<std::string, int> affix_groups_;
// True when the affixes were specified in the .aff file using indices. The
// dictionary reader uses this to see how it should treat the stuff after the
// word on each line.
bool has_indexed_affixes_;
// SFX and PFX commands. This is a list of each of those lines in the order
// they appear in the file. They have been re-encoded.
std::vector<std::string> affix_rules_;
// Replacement commands. The first string is a possible input, and the second
// is the replacment.
std::vector< std::pair<std::string, std::string> > replacements_;
// All other commands.
std::vector<std::string> other_commands_;
};
} // namespace convert_dict
#endif // CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__