/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "common/fml-parser.h" #include <ctype.h> #include <string> #include "util/base/logging.h" #include "util/strings/numbers.h" namespace libtextclassifier { namespace nlp_core { namespace { inline bool IsValidCharAtStartOfIdentifier(char c) { return isalpha(c) || (c == '_') || (c == '/'); } // Returns true iff character c can appear inside an identifier. inline bool IsValidCharInsideIdentifier(char c) { return isalnum(c) || (c == '_') || (c == '-') || (c == '/'); } // Returns true iff character c can appear at the beginning of a number. inline bool IsValidCharAtStartOfNumber(char c) { return isdigit(c) || (c == '+') || (c == '-'); } // Returns true iff character c can appear inside a number. inline bool IsValidCharInsideNumber(char c) { return isdigit(c) || (c == '.'); } } // namespace bool FMLParser::Initialize(const std::string &source) { // Initialize parser state. source_ = source; current_ = source_.begin(); item_start_ = line_start_ = current_; line_number_ = item_line_number_ = 1; // Read first input item. return NextItem(); } void FMLParser::ReportError(const std::string &error_message) { const int position = item_start_ - line_start_ + 1; const std::string line(line_start_, current_); TC_LOG(ERROR) << "Error in feature model, line " << item_line_number_ << ", position " << position << ": " << error_message << "\n " << line << " <--HERE"; } void FMLParser::Next() { // Move to the next input character. If we are at a line break update line // number and line start position. if (CurrentChar() == '\n') { ++line_number_; ++current_; line_start_ = current_; } else { ++current_; } } bool FMLParser::NextItem() { // Skip white space and comments. while (!eos()) { if (CurrentChar() == '#') { // Skip comment. while (!eos() && CurrentChar() != '\n') Next(); } else if (isspace(CurrentChar())) { // Skip whitespace. while (!eos() && isspace(CurrentChar())) Next(); } else { break; } } // Record start position for next item. item_start_ = current_; item_line_number_ = line_number_; // Check for end of input. if (eos()) { item_type_ = END; return true; } // Parse number. if (IsValidCharAtStartOfNumber(CurrentChar())) { std::string::iterator start = current_; Next(); while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next(); item_text_.assign(start, current_); item_type_ = NUMBER; return true; } // Parse std::string. if (CurrentChar() == '"') { Next(); std::string::iterator start = current_; while (CurrentChar() != '"') { if (eos()) { ReportError("Unterminated string"); return false; } Next(); } item_text_.assign(start, current_); item_type_ = STRING; Next(); return true; } // Parse identifier name. if (IsValidCharAtStartOfIdentifier(CurrentChar())) { std::string::iterator start = current_; while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) { Next(); } item_text_.assign(start, current_); item_type_ = NAME; return true; } // Single character item. item_type_ = CurrentChar(); Next(); return true; } bool FMLParser::Parse(const std::string &source, FeatureExtractorDescriptor *result) { // Initialize parser. if (!Initialize(source)) { return false; } while (item_type_ != END) { // Current item should be a feature name. if (item_type_ != NAME) { ReportError("Feature type name expected"); return false; } std::string name = item_text_; if (!NextItem()) { return false; } // Parse feature. FeatureFunctionDescriptor *descriptor = result->add_feature(); descriptor->set_type(name); if (!ParseFeature(descriptor)) { return false; } } return true; } bool FMLParser::ParseFeature(FeatureFunctionDescriptor *result) { // Parse argument and parameters. if (item_type_ == '(') { if (!NextItem()) return false; if (!ParseParameter(result)) return false; while (item_type_ == ',') { if (!NextItem()) return false; if (!ParseParameter(result)) return false; } if (item_type_ != ')') { ReportError(") expected"); return false; } if (!NextItem()) return false; } // Parse feature name. if (item_type_ == ':') { if (!NextItem()) return false; if (item_type_ != NAME && item_type_ != STRING) { ReportError("Feature name expected"); return false; } std::string name = item_text_; if (!NextItem()) return false; // Set feature name. result->set_name(name); } // Parse sub-features. if (item_type_ == '.') { // Parse dotted sub-feature. if (!NextItem()) return false; if (item_type_ != NAME) { ReportError("Feature type name expected"); return false; } std::string type = item_text_; if (!NextItem()) return false; // Parse sub-feature. FeatureFunctionDescriptor *subfeature = result->add_feature(); subfeature->set_type(type); if (!ParseFeature(subfeature)) return false; } else if (item_type_ == '{') { // Parse sub-feature block. if (!NextItem()) return false; while (item_type_ != '}') { if (item_type_ != NAME) { ReportError("Feature type name expected"); return false; } std::string type = item_text_; if (!NextItem()) return false; // Parse sub-feature. FeatureFunctionDescriptor *subfeature = result->add_feature(); subfeature->set_type(type); if (!ParseFeature(subfeature)) return false; } if (!NextItem()) return false; } return true; } bool FMLParser::ParseParameter(FeatureFunctionDescriptor *result) { if (item_type_ == NUMBER) { int32 argument; if (!ParseInt32(item_text_.c_str(), &argument)) { ReportError("Unable to parse number"); return false; } if (!NextItem()) return false; // Set default argument for feature. result->set_argument(argument); } else if (item_type_ == NAME) { std::string name = item_text_; if (!NextItem()) return false; if (item_type_ != '=') { ReportError("= expected"); return false; } if (!NextItem()) return false; if (item_type_ >= END) { ReportError("Parameter value expected"); return false; } std::string value = item_text_; if (!NextItem()) return false; // Add parameter to feature. Parameter *parameter; parameter = result->add_parameter(); parameter->set_name(name); parameter->set_value(value); } else { ReportError("Syntax error in parameter list"); return false; } return true; } void ToFMLFunction(const FeatureFunctionDescriptor &function, std::string *output) { output->append(function.type()); if (function.argument() != 0 || function.parameter_size() > 0) { output->append("("); bool first = true; if (function.argument() != 0) { output->append(IntToString(function.argument())); first = false; } for (int i = 0; i < function.parameter_size(); ++i) { if (!first) output->append(","); output->append(function.parameter(i).name()); output->append("="); output->append("\""); output->append(function.parameter(i).value()); output->append("\""); first = false; } output->append(")"); } } void ToFML(const FeatureFunctionDescriptor &function, std::string *output) { ToFMLFunction(function, output); if (function.feature_size() == 1) { output->append("."); ToFML(function.feature(0), output); } else if (function.feature_size() > 1) { output->append(" { "); for (int i = 0; i < function.feature_size(); ++i) { if (i > 0) output->append(" "); ToFML(function.feature(i), output); } output->append(" } "); } } void ToFML(const FeatureExtractorDescriptor &extractor, std::string *output) { for (int i = 0; i < extractor.feature_size(); ++i) { ToFML(extractor.feature(i), output); output->append("\n"); } } } // namespace nlp_core } // namespace libtextclassifier