CharMatcher.java - Android社区 - https://www.androidos.net.cn/

/*
 * Copyright (C) 2008 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.base;

import com.google.common.annotations.GwtCompatible;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * Determines a true or false value for any Java {@code char} value, just as
 * {@link Predicate} does for any {@link Object}. Also offers basic text
 * processing methods based on this function. Implementations are strongly
 * encouraged to be side-effect-free and immutable.
 *
 * Throughout the documentation of this class, the phrase "matching
 * character" is used to mean "any character {@code c} for which {@code
 * this.matches(c)} returns {@code true}".
 *
 * Note: This class deals only with {@code char} values; it does not
 * understand supplementary Unicode code points in the range {@code 0x10000} to
 * {@code 0x10FFFF}. Such logical characters are encoded into a {@code String}
 * using surrogate pairs, and a {@code CharMatcher} treats these just as two
 * separate characters.
 *
 * @author Kevin Bourrillion
 * @since 2009.09.15 tentative
 */
@GwtCompatible
public abstract class CharMatcher implements Predicate<Character> {

// Constants

// Excludes 2000-2000a, which is handled as a range
  private static final String BREAKING_WHITESPACE_CHARS =
      "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";

// Excludes 2007, which is handled as a gap in a pair of ranges
  private static final String NON_BREAKING_WHITESPACE_CHARS =
      "\u00a0\u180e\u202f";

/**
 * Determines whether a character is whitespace according to the latest
 * Unicode standard, as illustrated
 * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
 * This is not the same definition used by other Java APIs. See a comparison
 * of several definitions of "whitespace" at
 * <a href="TODO">(TODO)</a>.
 *
 * Note: as the Unicode definition evolves, we will modify this
 * constant to keep it up to date.
 */
 public static final CharMatcher WHITESPACE =
 anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
 .or(inRange('\u2000', '\u200a'));

/**
 * Determines whether a character is a breaking whitespace (that is,
 * a whitespace which can be interpreted as a break between words
 * for formatting purposes). See {@link #WHITESPACE} for a discussion
 * of that term.
 *
 * @since 2010.01.04 tentative
 */
 public static final CharMatcher BREAKING_WHITESPACE =
 anyOf(BREAKING_WHITESPACE_CHARS)
 .or(inRange('\u2000', '\u2006'))
 .or(inRange('\u2008', '\u200a'));

/**
   * Determines whether a character is ASCII, meaning that its code point is
   * less than 128.
   */
  public static final CharMatcher ASCII = inRange('\0', '\u007f');

/**
 * Determines whether a character is a digit according to
 * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
 */
 public static final CharMatcher DIGIT;

static {
    CharMatcher digit = inRange('0', '9');
    String zeroes =
        "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
            + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
            + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
    for (char base : zeroes.toCharArray()) {
      digit = digit.or(inRange(base, (char) (base + 9)));
    }
    DIGIT = digit;
  }

/**
 * Determines whether a character is whitespace according to {@link
 * Character#isWhitespace(char) Java's definition}; it is usually preferable
 * to use {@link #WHITESPACE}. See a comparison of several definitions of
 * "whitespace" at <a href="http://go/white+space">go/white+space</a>.
 */
 public static final CharMatcher JAVA_WHITESPACE
 = inRange('\u0009', (char) 13) // \\u000d doesn't work as a char literal
 .or(inRange('\u001c', '\u0020'))
 .or(is('\u1680'))
 .or(is('\u180e'))
 .or(inRange('\u2000', '\u2006'))
 .or(inRange('\u2008', '\u200b'))
 .or(inRange('\u2028', '\u2029'))
 .or(is('\u205f'))
 .or(is('\u3000'));

/**
   * Determines whether a character is a digit according to {@link
   * Character#isDigit(char) Java's definition}. If you only care to match
   * ASCII digits, you can use {@code inRange('0', '9')}.
   */
  public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isDigit(c);
    }
  };

/**
   * Determines whether a character is a letter according to {@link
   * Character#isLetter(char) Java's definition}. If you only care to match
   * letters of the Latin alphabet, you can use {@code
   * inRange('a', 'z').or(inRange('A', 'Z'))}.
   */
  public static final CharMatcher JAVA_LETTER = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isLetter(c);
    }
  };

/**
   * Determines whether a character is a letter or digit according to {@link
   * Character#isLetterOrDigit(char) Java's definition}.
   */
  public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isLetterOrDigit(c);
    }
  };

/**
   * Determines whether a character is upper case according to {@link
   * Character#isUpperCase(char) Java's definition}.
   */
  public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isUpperCase(c);
    }
  };

/**
   * Determines whether a character is lower case according to {@link
   * Character#isLowerCase(char) Java's definition}.
   */
  public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isLowerCase(c);
    }
  };

/**
   * Determines whether a character is an ISO control character according to
   * {@link Character#isISOControl(char)}.
   */
  public static final CharMatcher JAVA_ISO_CONTROL = inRange('\u0000', '\u001f')
      .or(inRange('\u007f', '\u009f'));

/**
   * Determines whether a character is invisible; that is, if its Unicode
   * category is any of SPACE_SEPARATOR, LINE_SEPARATOR,
   * PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according
   * to ICU4J.
   */
  public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
      .or(inRange('\u007f', '\u00a0'))
      .or(is('\u00ad'))
      .or(inRange('\u0600', '\u0603'))
      .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
      .or(inRange('\u2000', '\u200f'))
      .or(inRange('\u2028', '\u202f'))
      .or(inRange('\u205f', '\u2064'))
      .or(inRange('\u206a', '\u206f'))
      .or(is('\u3000'))
      .or(inRange('\ud800', '\uf8ff'))
      .or(anyOf("\ufeff\ufff9\ufffa\ufffb"));

/**
 * Determines whether a character is single-width (not double-width). When
 * in doubt, this matcher errs on the side of returning {@code false} (that
 * is, it tends to assume a character is double-width).
 *
 * Note: as the reference file evolves, we will modify this constant
 * to keep it up to date.
 */
 public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
 .or(is('\u05be'))
 .or(inRange('\u05d0', '\u05ea'))
 .or(is('\u05f3'))
 .or(is('\u05f4'))
 .or(inRange('\u0600', '\u06ff'))
 .or(inRange('\u0750', '\u077f'))
 .or(inRange('\u0e00', '\u0e7f'))
 .or(inRange('\u1e00', '\u20af'))
 .or(inRange('\u2100', '\u213a'))
 .or(inRange('\ufb50', '\ufdff'))
 .or(inRange('\ufe70', '\ufeff'))
 .or(inRange('\uff61', '\uffdc'));

/** Matches any character. */
  public static final CharMatcher ANY = new CharMatcher() {
    @Override public boolean matches(char c) {
      return true;
    }

@Override public int indexIn(CharSequence sequence) {
 return (sequence.length() == 0) ? -1 : 0;
 }
 @Override public int indexIn(CharSequence sequence, int start) {
 int length = sequence.length();
 Preconditions.checkPositionIndex(start, length);
 return (start == length) ? -1 : start;
 }
 @Override public int lastIndexIn(CharSequence sequence) {
 return sequence.length() - 1;
 }
 @Override public boolean matchesAllOf(CharSequence sequence) {
 checkNotNull(sequence);
 return true;
 }
 @Override public boolean matchesNoneOf(CharSequence sequence) {
 return sequence.length() == 0;
 }
 @Override public String removeFrom(CharSequence sequence) {
 checkNotNull(sequence);
 return "";
 }
 @Override public String replaceFrom(
 CharSequence sequence, char replacement) {
 char[] array = new char[sequence.length()];
 Arrays.fill(array, replacement);
 return new String(array);
 }
 @Override public String replaceFrom(
 CharSequence sequence, CharSequence replacement) {
 StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
 for (int i = 0; i < sequence.length(); i++) {
 retval.append(replacement);
 }
 return retval.toString();
 }
 @Override public String collapseFrom(CharSequence sequence, char replacement) {
 return (sequence.length() == 0) ? "" : String.valueOf(replacement);
 }
 @Override public String trimFrom(CharSequence sequence) {
 checkNotNull(sequence);
 return "";
 }
 @Override public int countIn(CharSequence sequence) {
 return sequence.length();
 }
 @Override public CharMatcher and(CharMatcher other) {
 return checkNotNull(other);
 }
 @Override public CharMatcher or(CharMatcher other) {
 checkNotNull(other);
 return this;
 }
 @Override public CharMatcher negate() {
 return NONE;
 }
 @Override public CharMatcher precomputed() {
 return this;
 }
 };

/** Matches no characters. */
  public static final CharMatcher NONE = new CharMatcher() {
    @Override public boolean matches(char c) {
      return false;
    }

@Override public int indexIn(CharSequence sequence) {
      checkNotNull(sequence);
      return -1;
    }
    @Override public int indexIn(CharSequence sequence, int start) {
      int length = sequence.length();
      Preconditions.checkPositionIndex(start, length);
      return -1;
    }
    @Override public int lastIndexIn(CharSequence sequence) {
      checkNotNull(sequence);
      return -1;
    }
    @Override public boolean matchesAllOf(CharSequence sequence) {
      return sequence.length() == 0;
    }
    @Override public boolean matchesNoneOf(CharSequence sequence) {
      checkNotNull(sequence);
      return true;
    }
    @Override public String removeFrom(CharSequence sequence) {
      return sequence.toString();
    }
    @Override public String replaceFrom(
        CharSequence sequence, char replacement) {
      return sequence.toString();
    }
    @Override public String replaceFrom(
        CharSequence sequence, CharSequence replacement) {
      checkNotNull(replacement);
      return sequence.toString();
    }
    @Override public String collapseFrom(
        CharSequence sequence, char replacement) {
      return sequence.toString();
    }
    @Override public String trimFrom(CharSequence sequence) {
      return sequence.toString();
    }
    @Override public int countIn(CharSequence sequence) {
      checkNotNull(sequence);
      return 0;
    }
    @Override public CharMatcher and(CharMatcher other) {
      checkNotNull(other);
      return this;
    }
    @Override public CharMatcher or(CharMatcher other) {
      return checkNotNull(other);
    }
    @Override public CharMatcher negate() {
      return ANY;
    }
    @Override protected void setBits(LookupTable table) {
    }
    @Override public CharMatcher precomputed() {
      return this;
    }
  };

// Static factories

/**
   * Returns a {@code char} matcher that matches only one specified character.
   */
  public static CharMatcher is(final char match) {
    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return c == match;
      }

@Override public String replaceFrom(
          CharSequence sequence, char replacement) {
        return sequence.toString().replace(match, replacement);
      }
      @Override public CharMatcher and(CharMatcher other) {
        return other.matches(match) ? this : NONE;
      }
      @Override public CharMatcher or(CharMatcher other) {
        return other.matches(match) ? other : super.or(other);
      }
      @Override public CharMatcher negate() {
        return isNot(match);
      }
      @Override protected void setBits(LookupTable table) {
        table.set(match);
      }
      @Override public CharMatcher precomputed() {
        return this;
      }
    };
  }

/**
 * Returns a {@code char} matcher that matches any character except the one
 * specified.
 *
 * To negate another {@code CharMatcher}, use {@link #negate()}.
 */
 public static CharMatcher isNot(final char match) {
 return new CharMatcher() {
 @Override public boolean matches(char c) {
 return c != match;
 }

@Override public CharMatcher and(CharMatcher other) {
        return other.matches(match) ? super.and(other) : other;
      }
      @Override public CharMatcher or(CharMatcher other) {
        return other.matches(match) ? ANY : this;
      }
      @Override public CharMatcher negate() {
        return is(match);
      }
    };
  }

/**
   * Returns a {@code char} matcher that matches any character present in the
   * given character sequence.
   */
  public static CharMatcher anyOf(final CharSequence sequence) {
    switch (sequence.length()) {
      case 0:
        return NONE;
      case 1:
        return is(sequence.charAt(0));
      case 2:
        final char match1 = sequence.charAt(0);
        final char match2 = sequence.charAt(1);
        return new CharMatcher() {
          @Override public boolean matches(char c) {
            return c == match1 || c == match2;
          }
          @Override protected void setBits(LookupTable table) {
            table.set(match1);
            table.set(match2);
          }
          @Override public CharMatcher precomputed() {
            return this;
          }
        };
    }

final char[] chars = sequence.toString().toCharArray();
    Arrays.sort(chars); // not worth collapsing duplicates

return new CharMatcher() {
      @Override public boolean matches(char c) {
        return Arrays.binarySearch(chars, c) >= 0;
      }
      @Override protected void setBits(LookupTable table) {
        for (char c : chars) {
          table.set(c);
        }
      }
    };
  }

/**
   * Returns a {@code char} matcher that matches any character not present in
   * the given character sequence.
   */
  public static CharMatcher noneOf(CharSequence sequence) {
    return anyOf(sequence).negate();
  }

/**
 * Returns a {@code char} matcher that matches any character in a given range
 * (both endpoints are inclusive). For example, to match any lowercase letter
 * of the English alphabet, use {@code CharMatcher.inRange('a', 'z')}.
 *
 * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
 */
 public static CharMatcher inRange(
 final char startInclusive, final char endInclusive) {
 checkArgument(endInclusive >= startInclusive);
 return new CharMatcher() {
 @Override public boolean matches(char c) {
 return startInclusive <= c && c <= endInclusive;
 }
 @Override protected void setBits(LookupTable table) {
 char c = startInclusive;
 while (true) {
 table.set(c);
 if (c++ == endInclusive) {
 break;
 }
 }
 }
 @Override public CharMatcher precomputed() {
 return this;
 }
 };
 }

/**
 * Returns a matcher with identical behavior to the given {@link
 * Character}-based predicate, but which operates on primitive {@code char}
 * instances instead.
 */
 public static CharMatcher forPredicate(
 final Predicate<? super Character> predicate) {
 checkNotNull(predicate);
 if (predicate instanceof CharMatcher) {
 return (CharMatcher) predicate;
 }
 return new CharMatcher() {
 @Override public boolean matches(char c) {
 return predicate.apply(c);
 }
 @Override public boolean apply(Character character) {
 return predicate.apply(checkNotNull(character));
 }
 };
 }

// Abstract methods

/** Determines a true or false value for the given character. */
  public abstract boolean matches(char c);

// Non-static factories

/**
   * Returns a matcher that matches any character not matched by this matcher.
   */
  public CharMatcher negate() {
    final CharMatcher original = this;
    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return !original.matches(c);
      }

@Override public boolean matchesAllOf(CharSequence sequence) {
        return original.matchesNoneOf(sequence);
      }
      @Override public boolean matchesNoneOf(CharSequence sequence) {
        return original.matchesAllOf(sequence);
      }
      @Override public int countIn(CharSequence sequence) {
        return sequence.length() - original.countIn(sequence);
      }
      @Override public CharMatcher negate() {
        return original;
      }
    };
  }

/**
   * Returns a matcher that matches any character matched by both this matcher
   * and {@code other}.
   */
  public CharMatcher and(CharMatcher other) {
    return new And(Arrays.asList(this, checkNotNull(other)));
  }

private static class And extends CharMatcher {
 List<CharMatcher> components;

And(List<CharMatcher> components) {
 this.components = components; // Skip defensive copy (private)
 }

@Override public boolean matches(char c) {
      for (CharMatcher matcher : components) {
        if (!matcher.matches(c)) {
          return false;
        }
      }
      return true;
    }

@Override public CharMatcher and(CharMatcher other) {
 List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
 newComponents.add(checkNotNull(other));
 return new And(newComponents);
 }
 }

/**
   * Returns a matcher that matches any character matched by either this matcher
   * or {@code other}.
   */
  public CharMatcher or(CharMatcher other) {
    return new Or(Arrays.asList(this, checkNotNull(other)));
  }

private static class Or extends CharMatcher {
 List<CharMatcher> components;

Or(List<CharMatcher> components) {
 this.components = components; // Skip defensive copy (private)
 }

@Override public boolean matches(char c) {
      for (CharMatcher matcher : components) {
        if (matcher.matches(c)) {
          return true;
        }
      }
      return false;
    }

@Override public CharMatcher or(CharMatcher other) {
 List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
 newComponents.add(checkNotNull(other));
 return new Or(newComponents);
 }

@Override protected void setBits(LookupTable table) {
      for (CharMatcher matcher : components) {
        matcher.setBits(table);
      }
    }
  }

/**
 * Returns a {@code char} matcher functionally equivalent to this one, but
 * which may be faster to query than the original; your mileage may vary.
 * Precomputation takes time and is likely to be worthwhile only if the
 * precomputed matcher is queried many thousands of times.
 *
 * This method has no effect (returns {@code this}) when called in GWT:
 * it's unclear whether a precomputed matcher is faster, but it certainly
 * consumes more memory, which doesn't seem like a worthwhile tradeoff in a
 * browser.
 */
 public CharMatcher precomputed() {
 return Platform.precomputeCharMatcher(this);
 }

/**
 * This is the actual implementation of {@link #precomputed}, but we bounce
 * calls through a method on {@link Platform} so that we can have different
 * behavior in GWT.
 *
 * The default precomputation is to cache the configuration of the original
 * matcher in an eight-kilobyte bit array. In some situations this produces a
 * matcher which is faster to query than the original.
 *
 * The default implementation creates a new bit array and passes it to
 * {@link #setBits(LookupTable)}.
 */
 CharMatcher precomputedInternal() {
 final LookupTable table = new LookupTable();
 setBits(table);

return new CharMatcher() {
      @Override public boolean matches(char c) {
        return table.get(c);
      }

// TODO: make methods like negate() smart

@Override public CharMatcher precomputed() {
        return this;
      }
    };
  }

/**
 * For use by implementors; sets the bit corresponding to each character ('\0'
 * to '{@literal \}uFFFF') that matches this matcher in the given bit array,
 * leaving all other bits untouched.
 *
 * The default implementation loops over every possible character value,
 * invoking {@link #matches} for each one.
 */
 protected void setBits(LookupTable table) {
 char c = Character.MIN_VALUE;
 while (true) {
 if (matches(c)) {
 table.set(c);
 }
 if (c++ == Character.MAX_VALUE) {
 break;
 }
 }
 }

/**
 * A bit array with one bit per {@code char} value, used by {@link
 * CharMatcher#precomputed}.
 *
 * TODO: possibly share a common BitArray class with BloomFilter
 * and others... a simpler java.util.BitSet.
 */
 protected static class LookupTable {
 int[] data = new int[2048];

void set(char index) {
 data[index >> 5] |= (1 << index);
 }
 boolean get(char index) {
 return (data[index >> 5] & (1 << index)) != 0;
 }
 }

// Text processing routines

/**
 * Returns {@code true} if a character sequence contains only matching
 * characters.
 *
 * The default implementation iterates over the sequence, invoking {@link
 * #matches} for each character, until this returns {@code false} or the end
 * is reached.
 *
 * @param sequence the character sequence to examine, possibly empty
 * @return {@code true} if this matcher matches every character in the
 * sequence, including when the sequence is empty
 */
 public boolean matchesAllOf(CharSequence sequence) {
 for (int i = sequence.length() - 1; i >= 0; i--) {
 if (!matches(sequence.charAt(i))) {
 return false;
 }
 }
 return true;
 }

/**
 * Returns {@code true} if a character sequence contains no matching
 * characters.
 *
 * The default implementation iterates over the sequence, invoking {@link
 * #matches} for each character, until this returns {@code false} or the end is
 * reached.
 *
 * @param sequence the character sequence to examine, possibly empty
 * @return {@code true} if this matcher matches every character in the
 * sequence, including when the sequence is empty
 */
 public boolean matchesNoneOf(CharSequence sequence) {
 return indexIn(sequence) == -1;
 }

// TODO: perhaps add matchesAnyOf()

/**
 * Returns the index of the first matching character in a character sequence,
 * or {@code -1} if no matching character is present.
 *
 * The default implementation iterates over the sequence in forward order
 * calling {@link #matches} for each character.
 *
 * @param sequence the character sequence to examine from the beginning
 * @return an index, or {@code -1} if no character matches
 */
 public int indexIn(CharSequence sequence) {
 int length = sequence.length();
 for (int i = 0; i < length; i++) {
 if (matches(sequence.charAt(i))) {
 return i;
 }
 }
 return -1;
 }

/**
 * Returns the index of the first matching character in a character sequence,
 * starting from a given position, or {@code -1} if no character matches after
 * that position.
 *
 * The default implementation iterates over the sequence in forward order,
 * beginning at {@code start}, calling {@link #matches} for each character.
 *
 * @param sequence the character sequence to examine
 * @param start the first index to examine; must be nonnegative and no
 * greater than {@code sequence.length()}
 * @return the index of the first matching character, guaranteed to be no less
 * than {@code start}, or {@code -1} if no character matches
 * @throws IndexOutOfBoundsException if start is negative or greater than
 * {@code sequence.length()}
 */
 public int indexIn(CharSequence sequence, int start) {
 int length = sequence.length();
 Preconditions.checkPositionIndex(start, length);
 for (int i = start; i < length; i++) {
 if (matches(sequence.charAt(i))) {
 return i;
 }
 }
 return -1;
 }

/**
 * Returns the index of the last matching character in a character sequence,
 * or {@code -1} if no matching character is present.
 *
 * The default implementation iterates over the sequence in reverse order
 * calling {@link #matches} for each character.
 *
 * @param sequence the character sequence to examine from the end
 * @return an index, or {@code -1} if no character matches
 */
 public int lastIndexIn(CharSequence sequence) {
 for (int i = sequence.length() - 1; i >= 0; i--) {
 if (matches(sequence.charAt(i))) {
 return i;
 }
 }
 return -1;
 }

/**
 * Returns the number of matching characters found in a character sequence.
 */
 public int countIn(CharSequence sequence) {
 int count = 0;
 for (int i = 0; i < sequence.length(); i++) {
 if (matches(sequence.charAt(i))) {
 count++;
 }
 }
 return count;
 }

/**
 * Returns a string containing all non-matching characters of a character
 * sequence, in order. For example: <pre> {@code
 *
 * CharMatcher.is('a').removeFrom("bazaar")}</pre>
 *
 * ... returns {@code "bzr"}.
 */
 public String removeFrom(CharSequence sequence) {
 String string = sequence.toString();
 int pos = indexIn(string);
 if (pos == -1) {
 return string;
 }

char[] chars = string.toCharArray();
    int spread = 1;

// This unusual loop comes from extensive benchmarking
    OUT:
    while (true) {
      pos++;
      while (true) {
        if (pos == chars.length) {
          break OUT;
        }
        if (matches(chars[pos])) {
          break;
        }
        chars[pos - spread] = chars[pos];
        pos++;
      }
      spread++;
    }
    return new String(chars, 0, pos - spread);
  }

/**
 * Returns a string containing all matching characters of a character
 * sequence, in order. For example: <pre> {@code
 *
 * CharMatcher.is('a').retainFrom("bazaar")}</pre>
 *
 * ... returns {@code "aaa"}.
 */
 public String retainFrom(CharSequence sequence) {
 return negate().removeFrom(sequence);
 }

/**
 * Returns a string copy of the input character sequence, with each character
 * that matches this matcher replaced by a given replacement character. For
 * example: <pre> {@code
 *
 * CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
 *
 * ... returns {@code "rodor"}.
 *
 * The default implementation uses {@link #indexIn(CharSequence)} to find
 * the first matching character, then iterates the remainder of the sequence
 * calling {@link #matches(char)} for each character.
 *
 * @param sequence the character sequence to replace matching characters in
 * @param replacement the character to append to the result string in place of
 * each matching character in {@code sequence}
 * @return the new string
 */
 public String replaceFrom(CharSequence sequence, char replacement) {
 String string = sequence.toString();
 int pos = indexIn(string);
 if (pos == -1) {
 return string;
 }
 char[] chars = string.toCharArray();
 chars[pos] = replacement;
 for (int i = pos + 1; i < chars.length; i++) {
 if (matches(chars[i])) {
 chars[i] = replacement;
 }
 }
 return new String(chars);
 }

/**
 * Returns a string copy of the input character sequence, with each character
 * that matches this matcher replaced by a given replacement sequence. For
 * example: <pre> {@code
 *
 * CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
 *
 * ... returns {@code "yoohoo"}.
 *
 * Note: If the replacement is a fixed string with only one character,
 * you are better off calling {@link #replaceFrom(CharSequence, char)} directly.
 *
 * @param sequence the character sequence to replace matching characters in
 * @param replacement the characters to append to the result string in place
 * of each matching character in {@code sequence}
 * @return the new string
 */
 public String replaceFrom(CharSequence sequence, CharSequence replacement) {
 int replacementLen = replacement.length();
 if (replacementLen == 0) {
 return removeFrom(sequence);
 }
 if (replacementLen == 1) {
 return replaceFrom(sequence, replacement.charAt(0));
 }

String string = sequence.toString();
    int pos = indexIn(string);
    if (pos == -1) {
      return string;
    }

int len = string.length();
    StringBuilder buf = new StringBuilder((int) (len * 1.5) + 16);

int oldpos = 0;
    do {
      buf.append(string, oldpos, pos);
      buf.append(replacement);
      oldpos = pos + 1;
      pos = indexIn(string, oldpos);
    } while (pos != -1);

buf.append(string, oldpos, len);
    return buf.toString();
  }

/**
 * Returns a substring of the input character sequence that omits all
 * characters this matcher matches from the beginning and from the end of the
 * string. For example: <pre> {@code
 *
 * CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
 *
 * ... returns {@code "cat"}.
 *
 * Note that<pre> {@code
 *
 * CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
 *
 * ... is equivalent to {@link String#trim()}.
 */
 public String trimFrom(CharSequence sequence) {
 int len = sequence.length();
 int first;
 int last;

for (first = 0; first < len; first++) {
 if (!matches(sequence.charAt(first))) {
 break;
 }
 }
 for (last = len - 1; last > first; last--) {
 if (!matches(sequence.charAt(last))) {
 break;
 }
 }

return sequence.subSequence(first, last + 1).toString();
  }

/**
 * Returns a substring of the input character sequence that omits all
 * characters this matcher matches from the beginning of the
 * string. For example: <pre> {@code
 *
 * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
 *
 * ... returns {@code "catbab"}.
 */
 public String trimLeadingFrom(CharSequence sequence) {
 int len = sequence.length();
 int first;

for (first = 0; first < len; first++) {
 if (!matches(sequence.charAt(first))) {
 break;
 }
 }

return sequence.subSequence(first, len).toString();
  }

/**
 * Returns a substring of the input character sequence that omits all
 * characters this matcher matches from the end of the
 * string. For example: <pre> {@code
 *
 * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
 *
 * ... returns {@code "abacat"}.
 */
 public String trimTrailingFrom(CharSequence sequence) {
 int len = sequence.length();
 int last;

for (last = len - 1; last >= 0; last--) {
      if (!matches(sequence.charAt(last))) {
        break;
      }
    }

return sequence.subSequence(0, last + 1).toString();
  }

/**
 * Returns a string copy of the input character sequence, with each group of
 * consecutive characters that match this matcher replaced by a single
 * replacement character. For example: <pre> {@code
 *
 * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
 *
 * ... returns {@code "b-p-r"}.
 *
 * The default implementation uses {@link #indexIn(CharSequence)} to find
 * the first matching character, then iterates the remainder of the sequence
 * calling {@link #matches(char)} for each character.
 *
 * @param sequence the character sequence to replace matching groups of
 * characters in
 * @param replacement the character to append to the result string in place of
 * each group of matching characters in {@code sequence}
 * @return the new string
 */
 public String collapseFrom(CharSequence sequence, char replacement) {
 int first = indexIn(sequence);
 if (first == -1) {
 return sequence.toString();
 }

// TODO: this implementation can probably be made faster.

StringBuilder builder = new StringBuilder(sequence.length())
 .append(sequence.subSequence(0, first))
 .append(replacement);
 boolean in = true;
 for (int i = first + 1; i < sequence.length(); i++) {
 char c = sequence.charAt(i);
 if (apply(c)) {
 if (!in) {
 builder.append(replacement);
 in = true;
 }
 } else {
 builder.append(c);
 in = false;
 }
 }
 return builder.toString();
 }

/**
 * Collapses groups of matching characters exactly as {@link #collapseFrom}
 * does, except that groups of matching characters at the start or end of the
 * sequence are removed without replacement.
 */
 public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
 int first = negate().indexIn(sequence);
 if (first == -1) {
 return ""; // everything matches. nothing's left.
 }
 StringBuilder builder = new StringBuilder(sequence.length());
 boolean inMatchingGroup = false;
 for (int i = first; i < sequence.length(); i++) {
 char c = sequence.charAt(i);
 if (apply(c)) {
 inMatchingGroup = true;
 } else {
 if (inMatchingGroup) {
 builder.append(replacement);
 inMatchingGroup = false;
 }
 builder.append(c);
 }
 }
 return builder.toString();
 }

// Predicate interface

/**
   * Returns {@code true} if this matcher matches the given character.
   *
   * @throws NullPointerException if {@code character} is null
   */
  /*@Override*/ public boolean apply(Character character) {
    return matches(character);
  }
}

Java程序 | 1113行 | 34.44 KB

/*
 * Copyright (C) 2008 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.base;

import com.google.common.annotations.GwtCompatible;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * Determines a true or false value for any Java {@code char} value, just as
 * {@link Predicate} does for any {@link Object}. Also offers basic text
 * processing methods based on this function. Implementations are strongly
 * encouraged to be side-effect-free and immutable.
 *
 * <p>Throughout the documentation of this class, the phrase "matching
 * character" is used to mean "any character {@code c} for which {@code
 * this.matches(c)} returns {@code true}".
 *
 * <p><b>Note:</b> This class deals only with {@code char} values; it does not
 * understand supplementary Unicode code points in the range {@code 0x10000} to
 * {@code 0x10FFFF}. Such logical characters are encoded into a {@code String}
 * using surrogate pairs, and a {@code CharMatcher} treats these just as two
 * separate characters.
 *
 * @author Kevin Bourrillion
 * @since 2009.09.15 <b>tentative</b>
 */
@GwtCompatible
public abstract class CharMatcher implements Predicate<Character> {

  // Constants

  // Excludes 2000-2000a, which is handled as a range
  private static final String BREAKING_WHITESPACE_CHARS =
      "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";

  // Excludes 2007, which is handled as a gap in a pair of ranges
  private static final String NON_BREAKING_WHITESPACE_CHARS =
      "\u00a0\u180e\u202f";

  /**
   * Determines whether a character is whitespace according to the latest
   * Unicode standard, as illustrated
   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
   * This is not the same definition used by other Java APIs. See a comparison
   * of several definitions of "whitespace" at
   * <a href="TODO">(TODO)</a>.
   *
   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this
   * constant to keep it up to date.
   */
  public static final CharMatcher WHITESPACE =
      anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
          .or(inRange('\u2000', '\u200a'));

  /**
   * Determines whether a character is a breaking whitespace (that is,
   * a whitespace which can be interpreted as a break between words
   * for formatting purposes).  See {@link #WHITESPACE} for a discussion
   * of that term.
   *
   * @since 2010.01.04 <b>tentative</b>
   */
  public static final CharMatcher BREAKING_WHITESPACE =
      anyOf(BREAKING_WHITESPACE_CHARS)
          .or(inRange('\u2000', '\u2006'))
          .or(inRange('\u2008', '\u200a'));

  /**
   * Determines whether a character is ASCII, meaning that its code point is
   * less than 128.
   */
  public static final CharMatcher ASCII = inRange('\0', '\u007f');

  /**
   * Determines whether a character is a digit according to
   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
   */
  public static final CharMatcher DIGIT;

  static {
    CharMatcher digit = inRange('0', '9');
    String zeroes =
        "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
            + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
            + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
    for (char base : zeroes.toCharArray()) {
      digit = digit.or(inRange(base, (char) (base + 9)));
    }
    DIGIT = digit;
  }

  /**
   * Determines whether a character is whitespace according to {@link
   * Character#isWhitespace(char) Java's definition}; it is usually preferable
   * to use {@link #WHITESPACE}. See a comparison of several definitions of
   * "whitespace" at <a href="http://go/white+space">go/white+space</a>.
   */
  public static final CharMatcher JAVA_WHITESPACE
      = inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
      .or(inRange('\u001c', '\u0020'))
      .or(is('\u1680'))
      .or(is('\u180e'))
      .or(inRange('\u2000', '\u2006'))
      .or(inRange('\u2008', '\u200b'))
      .or(inRange('\u2028', '\u2029'))
      .or(is('\u205f'))
      .or(is('\u3000'));

  /**
   * Determines whether a character is a digit according to {@link
   * Character#isDigit(char) Java's definition}. If you only care to match
   * ASCII digits, you can use {@code inRange('0', '9')}.
   */
  public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isDigit(c);
    }
  };

  /**
   * Determines whether a character is a letter according to {@link
   * Character#isLetter(char) Java's definition}. If you only care to match
   * letters of the Latin alphabet, you can use {@code
   * inRange('a', 'z').or(inRange('A', 'Z'))}.
   */
  public static final CharMatcher JAVA_LETTER = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isLetter(c);
    }
  };

  /**
   * Determines whether a character is a letter or digit according to {@link
   * Character#isLetterOrDigit(char) Java's definition}.
   */
  public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isLetterOrDigit(c);
    }
  };

  /**
   * Determines whether a character is upper case according to {@link
   * Character#isUpperCase(char) Java's definition}.
   */
  public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isUpperCase(c);
    }
  };

  /**
   * Determines whether a character is lower case according to {@link
   * Character#isLowerCase(char) Java's definition}.
   */
  public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
    @Override public boolean matches(char c) {
      return Character.isLowerCase(c);
    }
  };

  /**
   * Determines whether a character is an ISO control character according to
   * {@link Character#isISOControl(char)}.
   */
  public static final CharMatcher JAVA_ISO_CONTROL = inRange('\u0000', '\u001f')
      .or(inRange('\u007f', '\u009f'));

  /**
   * Determines whether a character is invisible; that is, if its Unicode
   * category is any of SPACE_SEPARATOR, LINE_SEPARATOR,
   * PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according
   * to ICU4J.
   */
  public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
      .or(inRange('\u007f', '\u00a0'))
      .or(is('\u00ad'))
      .or(inRange('\u0600', '\u0603'))
      .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
      .or(inRange('\u2000', '\u200f'))
      .or(inRange('\u2028', '\u202f'))
      .or(inRange('\u205f', '\u2064'))
      .or(inRange('\u206a', '\u206f'))
      .or(is('\u3000'))
      .or(inRange('\ud800', '\uf8ff'))
      .or(anyOf("\ufeff\ufff9\ufffa\ufffb"));

  /**
   * Determines whether a character is single-width (not double-width).  When
   * in doubt, this matcher errs on the side of returning {@code false} (that
   * is, it tends to assume a character is double-width).
   *
   * <b>Note:</b> as the reference file evolves, we will modify this constant
   * to keep it up to date.
   */
  public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
      .or(is('\u05be'))
      .or(inRange('\u05d0', '\u05ea'))
      .or(is('\u05f3'))
      .or(is('\u05f4'))
      .or(inRange('\u0600', '\u06ff'))
      .or(inRange('\u0750', '\u077f'))
      .or(inRange('\u0e00', '\u0e7f'))
      .or(inRange('\u1e00', '\u20af'))
      .or(inRange('\u2100', '\u213a'))
      .or(inRange('\ufb50', '\ufdff'))
      .or(inRange('\ufe70', '\ufeff'))
      .or(inRange('\uff61', '\uffdc'));

  /** Matches any character. */
  public static final CharMatcher ANY = new CharMatcher() {
    @Override public boolean matches(char c) {
      return true;
    }

    @Override public int indexIn(CharSequence sequence) {
      return (sequence.length() == 0) ? -1 : 0;
    }
    @Override public int indexIn(CharSequence sequence, int start) {
      int length = sequence.length();
      Preconditions.checkPositionIndex(start, length);
      return (start == length) ? -1 : start;
    }
    @Override public int lastIndexIn(CharSequence sequence) {
      return sequence.length() - 1;
    }
    @Override public boolean matchesAllOf(CharSequence sequence) {
      checkNotNull(sequence);
      return true;
    }
    @Override public boolean matchesNoneOf(CharSequence sequence) {
      return sequence.length() == 0;
    }
    @Override public String removeFrom(CharSequence sequence) {
      checkNotNull(sequence);
      return "";
    }
    @Override public String replaceFrom(
        CharSequence sequence, char replacement) {
      char[] array = new char[sequence.length()];
      Arrays.fill(array, replacement);
      return new String(array);
    }
    @Override public String replaceFrom(
        CharSequence sequence, CharSequence replacement) {
      StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
      for (int i = 0; i < sequence.length(); i++) {
        retval.append(replacement);
      }
      return retval.toString();
    }
    @Override public String collapseFrom(CharSequence sequence, char replacement) {
      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
    }
    @Override public String trimFrom(CharSequence sequence) {
      checkNotNull(sequence);
      return "";
    }
    @Override public int countIn(CharSequence sequence) {
      return sequence.length();
    }
    @Override public CharMatcher and(CharMatcher other) {
      return checkNotNull(other);
    }
    @Override public CharMatcher or(CharMatcher other) {
      checkNotNull(other);
      return this;
    }
    @Override public CharMatcher negate() {
      return NONE;
    }
    @Override public CharMatcher precomputed() {
      return this;
    }
  };

  /** Matches no characters. */
  public static final CharMatcher NONE = new CharMatcher() {
    @Override public boolean matches(char c) {
      return false;
    }

    @Override public int indexIn(CharSequence sequence) {
      checkNotNull(sequence);
      return -1;
    }
    @Override public int indexIn(CharSequence sequence, int start) {
      int length = sequence.length();
      Preconditions.checkPositionIndex(start, length);
      return -1;
    }
    @Override public int lastIndexIn(CharSequence sequence) {
      checkNotNull(sequence);
      return -1;
    }
    @Override public boolean matchesAllOf(CharSequence sequence) {
      return sequence.length() == 0;
    }
    @Override public boolean matchesNoneOf(CharSequence sequence) {
      checkNotNull(sequence);
      return true;
    }
    @Override public String removeFrom(CharSequence sequence) {
      return sequence.toString();
    }
    @Override public String replaceFrom(
        CharSequence sequence, char replacement) {
      return sequence.toString();
    }
    @Override public String replaceFrom(
        CharSequence sequence, CharSequence replacement) {
      checkNotNull(replacement);
      return sequence.toString();
    }
    @Override public String collapseFrom(
        CharSequence sequence, char replacement) {
      return sequence.toString();
    }
    @Override public String trimFrom(CharSequence sequence) {
      return sequence.toString();
    }
    @Override public int countIn(CharSequence sequence) {
      checkNotNull(sequence);
      return 0;
    }
    @Override public CharMatcher and(CharMatcher other) {
      checkNotNull(other);
      return this;
    }
    @Override public CharMatcher or(CharMatcher other) {
      return checkNotNull(other);
    }
    @Override public CharMatcher negate() {
      return ANY;
    }
    @Override protected void setBits(LookupTable table) {
    }
    @Override public CharMatcher precomputed() {
      return this;
    }
  };

  // Static factories

  /**
   * Returns a {@code char} matcher that matches only one specified character.
   */
  public static CharMatcher is(final char match) {
    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return c == match;
      }

      @Override public String replaceFrom(
          CharSequence sequence, char replacement) {
        return sequence.toString().replace(match, replacement);
      }
      @Override public CharMatcher and(CharMatcher other) {
        return other.matches(match) ? this : NONE;
      }
      @Override public CharMatcher or(CharMatcher other) {
        return other.matches(match) ? other : super.or(other);
      }
      @Override public CharMatcher negate() {
        return isNot(match);
      }
      @Override protected void setBits(LookupTable table) {
        table.set(match);
      }
      @Override public CharMatcher precomputed() {
        return this;
      }
    };
  }

  /**
   * Returns a {@code char} matcher that matches any character except the one
   * specified.
   *
   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
   */
  public static CharMatcher isNot(final char match) {
    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return c != match;
      }

      @Override public CharMatcher and(CharMatcher other) {
        return other.matches(match) ? super.and(other) : other;
      }
      @Override public CharMatcher or(CharMatcher other) {
        return other.matches(match) ? ANY : this;
      }
      @Override public CharMatcher negate() {
        return is(match);
      }
    };
  }

  /**
   * Returns a {@code char} matcher that matches any character present in the
   * given character sequence.
   */
  public static CharMatcher anyOf(final CharSequence sequence) {
    switch (sequence.length()) {
      case 0:
        return NONE;
      case 1:
        return is(sequence.charAt(0));
      case 2:
        final char match1 = sequence.charAt(0);
        final char match2 = sequence.charAt(1);
        return new CharMatcher() {
          @Override public boolean matches(char c) {
            return c == match1 || c == match2;
          }
          @Override protected void setBits(LookupTable table) {
            table.set(match1);
            table.set(match2);
          }
          @Override public CharMatcher precomputed() {
            return this;
          }
        };
    }

    final char[] chars = sequence.toString().toCharArray();
    Arrays.sort(chars); // not worth collapsing duplicates

    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return Arrays.binarySearch(chars, c) >= 0;
      }
      @Override protected void setBits(LookupTable table) {
        for (char c : chars) {
          table.set(c);
        }
      }
    };
  }

  /**
   * Returns a {@code char} matcher that matches any character not present in
   * the given character sequence.
   */
  public static CharMatcher noneOf(CharSequence sequence) {
    return anyOf(sequence).negate();
  }

  /**
   * Returns a {@code char} matcher that matches any character in a given range
   * (both endpoints are inclusive). For example, to match any lowercase letter
   * of the English alphabet, use {@code CharMatcher.inRange('a', 'z')}.
   *
   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
   */
  public static CharMatcher inRange(
      final char startInclusive, final char endInclusive) {
    checkArgument(endInclusive >= startInclusive);
    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return startInclusive <= c && c <= endInclusive;
      }
      @Override protected void setBits(LookupTable table) {
        char c = startInclusive;
        while (true) {
          table.set(c);
          if (c++ == endInclusive) {
            break;
          }
        }
      }
      @Override public CharMatcher precomputed() {
        return this;
      }
    };
  }

  /**
   * Returns a matcher with identical behavior to the given {@link
   * Character}-based predicate, but which operates on primitive {@code char}
   * instances instead.
   */
  public static CharMatcher forPredicate(
      final Predicate<? super Character> predicate) {
    checkNotNull(predicate);
    if (predicate instanceof CharMatcher) {
      return (CharMatcher) predicate;
    }
    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return predicate.apply(c);
      }
      @Override public boolean apply(Character character) {
        return predicate.apply(checkNotNull(character));
      }
    };
  }

  // Abstract methods

  /** Determines a true or false value for the given character. */
  public abstract boolean matches(char c);

  // Non-static factories

  /**
   * Returns a matcher that matches any character not matched by this matcher.
   */
  public CharMatcher negate() {
    final CharMatcher original = this;
    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return !original.matches(c);
      }

      @Override public boolean matchesAllOf(CharSequence sequence) {
        return original.matchesNoneOf(sequence);
      }
      @Override public boolean matchesNoneOf(CharSequence sequence) {
        return original.matchesAllOf(sequence);
      }
      @Override public int countIn(CharSequence sequence) {
        return sequence.length() - original.countIn(sequence);
      }
      @Override public CharMatcher negate() {
        return original;
      }
    };
  }

  /**
   * Returns a matcher that matches any character matched by both this matcher
   * and {@code other}.
   */
  public CharMatcher and(CharMatcher other) {
    return new And(Arrays.asList(this, checkNotNull(other)));
  }

  private static class And extends CharMatcher {
    List<CharMatcher> components;

    And(List<CharMatcher> components) {
      this.components = components; // Skip defensive copy (private)
    }

    @Override public boolean matches(char c) {
      for (CharMatcher matcher : components) {
        if (!matcher.matches(c)) {
          return false;
        }
      }
      return true;
    }

    @Override public CharMatcher and(CharMatcher other) {
      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
      newComponents.add(checkNotNull(other));
      return new And(newComponents);
    }
  }

  /**
   * Returns a matcher that matches any character matched by either this matcher
   * or {@code other}.
   */
  public CharMatcher or(CharMatcher other) {
    return new Or(Arrays.asList(this, checkNotNull(other)));
  }

  private static class Or extends CharMatcher {
    List<CharMatcher> components;

    Or(List<CharMatcher> components) {
      this.components = components; // Skip defensive copy (private)
    }

    @Override public boolean matches(char c) {
      for (CharMatcher matcher : components) {
        if (matcher.matches(c)) {
          return true;
        }
      }
      return false;
    }

    @Override public CharMatcher or(CharMatcher other) {
      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
      newComponents.add(checkNotNull(other));
      return new Or(newComponents);
    }

    @Override protected void setBits(LookupTable table) {
      for (CharMatcher matcher : components) {
        matcher.setBits(table);
      }
    }
  }

  /**
   * Returns a {@code char} matcher functionally equivalent to this one, but
   * which may be faster to query than the original; your mileage may vary.
   * Precomputation takes time and is likely to be worthwhile only if the
   * precomputed matcher is queried many thousands of times.
   *
   * <p>This method has no effect (returns {@code this}) when called in GWT:
   * it's unclear whether a precomputed matcher is faster, but it certainly
   * consumes more memory, which doesn't seem like a worthwhile tradeoff in a
   * browser.
   */
  public CharMatcher precomputed() {
    return Platform.precomputeCharMatcher(this);
  }

  /**
   * This is the actual implementation of {@link #precomputed}, but we bounce
   * calls through a method on {@link Platform} so that we can have different
   * behavior in GWT.
   *
   * <p>The default precomputation is to cache the configuration of the original
   * matcher in an eight-kilobyte bit array. In some situations this produces a
   * matcher which is faster to query than the original.
   *
   * <p>The default implementation creates a new bit array and passes it to
   * {@link #setBits(LookupTable)}.
   */
  CharMatcher precomputedInternal() {
    final LookupTable table = new LookupTable();
    setBits(table);

    return new CharMatcher() {
      @Override public boolean matches(char c) {
        return table.get(c);
      }

      // TODO: make methods like negate() smart

      @Override public CharMatcher precomputed() {
        return this;
      }
    };
  }

  /**
   * For use by implementors; sets the bit corresponding to each character ('\0'
   * to '{@literal \}uFFFF') that matches this matcher in the given bit array,
   * leaving all other bits untouched.
   *
   * <p>The default implementation loops over every possible character value,
   * invoking {@link #matches} for each one.
   */
  protected void setBits(LookupTable table) {
    char c = Character.MIN_VALUE;
    while (true) {
      if (matches(c)) {
        table.set(c);
      }
      if (c++ == Character.MAX_VALUE) {
        break;
      }
    }
  }

  /**
   * A bit array with one bit per {@code char} value, used by {@link
   * CharMatcher#precomputed}.
   *
   * <p>TODO: possibly share a common BitArray class with BloomFilter
   * and others... a simpler java.util.BitSet.
   */
  protected static class LookupTable {
    int[] data = new int[2048];

    void set(char index) {
      data[index >> 5] |= (1 << index);
    }
    boolean get(char index) {
      return (data[index >> 5] & (1 << index)) != 0;
    }
  }

  // Text processing routines

  /**
   * Returns {@code true} if a character sequence contains only matching
   * characters.
   *
   * <p>The default implementation iterates over the sequence, invoking {@link
   * #matches} for each character, until this returns {@code false} or the end
   * is reached.
   *
   * @param sequence the character sequence to examine, possibly empty
   * @return {@code true} if this matcher matches every character in the
   *     sequence, including when the sequence is empty
   */
  public boolean matchesAllOf(CharSequence sequence) {
    for (int i = sequence.length() - 1; i >= 0; i--) {
      if (!matches(sequence.charAt(i))) {
        return false;
      }
    }
    return true;
  }

  /**
   * Returns {@code true} if a character sequence contains no matching
   * characters.
   *
   * <p>The default implementation iterates over the sequence, invoking {@link
   * #matches} for each character, until this returns {@code false} or the end is
   * reached.
   *
   * @param sequence the character sequence to examine, possibly empty
   * @return {@code true} if this matcher matches every character in the
   *     sequence, including when the sequence is empty
   */
  public boolean matchesNoneOf(CharSequence sequence) {
    return indexIn(sequence) == -1;
  }

  // TODO: perhaps add matchesAnyOf()

  /**
   * Returns the index of the first matching character in a character sequence,
   * or {@code -1} if no matching character is present.
   *
   * <p>The default implementation iterates over the sequence in forward order
   * calling {@link #matches} for each character.
   *
   * @param sequence the character sequence to examine from the beginning
   * @return an index, or {@code -1} if no character matches
   */
  public int indexIn(CharSequence sequence) {
    int length = sequence.length();
    for (int i = 0; i < length; i++) {
      if (matches(sequence.charAt(i))) {
        return i;
      }
    }
    return -1;
  }

  /**
   * Returns the index of the first matching character in a character sequence,
   * starting from a given position, or {@code -1} if no character matches after
   * that position.
   *
   * <p>The default implementation iterates over the sequence in forward order,
   * beginning at {@code start}, calling {@link #matches} for each character.
   *
   * @param sequence the character sequence to examine
   * @param start the first index to examine; must be nonnegative and no
   *     greater than {@code sequence.length()}
   * @return the index of the first matching character, guaranteed to be no less
   *     than {@code start}, or {@code -1} if no character matches
   * @throws IndexOutOfBoundsException if start is negative or greater than
   *     {@code sequence.length()}
   */
  public int indexIn(CharSequence sequence, int start) {
    int length = sequence.length();
    Preconditions.checkPositionIndex(start, length);
    for (int i = start; i < length; i++) {
      if (matches(sequence.charAt(i))) {
        return i;
      }
    }
    return -1;
  }

  /**
   * Returns the index of the last matching character in a character sequence,
   * or {@code -1} if no matching character is present.
   *
   * <p>The default implementation iterates over the sequence in reverse order
   * calling {@link #matches} for each character.
   *
   * @param sequence the character sequence to examine from the end
   * @return an index, or {@code -1} if no character matches
   */
  public int lastIndexIn(CharSequence sequence) {
    for (int i = sequence.length() - 1; i >= 0; i--) {
      if (matches(sequence.charAt(i))) {
        return i;
      }
    }
    return -1;
  }

  /**
   * Returns the number of matching characters found in a character sequence.
   */
  public int countIn(CharSequence sequence) {
    int count = 0;
    for (int i = 0; i < sequence.length(); i++) {
      if (matches(sequence.charAt(i))) {
        count++;
      }
    }
    return count;
  }

  /**
   * Returns a string containing all non-matching characters of a character
   * sequence, in order. For example: <pre>   {@code
   *
   *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
   *
   * ... returns {@code "bzr"}.
   */
  public String removeFrom(CharSequence sequence) {
    String string = sequence.toString();
    int pos = indexIn(string);
    if (pos == -1) {
      return string;
    }

    char[] chars = string.toCharArray();
    int spread = 1;

    // This unusual loop comes from extensive benchmarking
    OUT:
    while (true) {
      pos++;
      while (true) {
        if (pos == chars.length) {
          break OUT;
        }
        if (matches(chars[pos])) {
          break;
        }
        chars[pos - spread] = chars[pos];
        pos++;
      }
      spread++;
    }
    return new String(chars, 0, pos - spread);
  }

  /**
   * Returns a string containing all matching characters of a character
   * sequence, in order. For example: <pre>   {@code
   *
   *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
   *
   * ... returns {@code "aaa"}.
   */
  public String retainFrom(CharSequence sequence) {
    return negate().removeFrom(sequence);
  }

  /**
   * Returns a string copy of the input character sequence, with each character
   * that matches this matcher replaced by a given replacement character. For
   * example: <pre>   {@code
   *
   *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
   *
   * ... returns {@code "rodor"}.
   *
   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find
   * the first matching character, then iterates the remainder of the sequence
   * calling {@link #matches(char)} for each character.
   *
   * @param sequence the character sequence to replace matching characters in
   * @param replacement the character to append to the result string in place of
   *     each matching character in {@code sequence}
   * @return the new string
   */
  public String replaceFrom(CharSequence sequence, char replacement) {
    String string = sequence.toString();
    int pos = indexIn(string);
    if (pos == -1) {
      return string;
    }
    char[] chars = string.toCharArray();
    chars[pos] = replacement;
    for (int i = pos + 1; i < chars.length; i++) {
      if (matches(chars[i])) {
        chars[i] = replacement;
      }
    }
    return new String(chars);
  }

  /**
   * Returns a string copy of the input character sequence, with each character
   * that matches this matcher replaced by a given replacement sequence. For
   * example: <pre>   {@code
   *
   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
   *
   * ... returns {@code "yoohoo"}.
   *
   * <p><b>Note:</b> If the replacement is a fixed string with only one character,
   * you are better off calling {@link #replaceFrom(CharSequence, char)} directly.
   *
   * @param sequence the character sequence to replace matching characters in
   * @param replacement the characters to append to the result string in place
   *     of each matching character in {@code sequence}
   * @return the new string
   */
  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
    int replacementLen = replacement.length();
    if (replacementLen == 0) {
      return removeFrom(sequence);
    }
    if (replacementLen == 1) {
      return replaceFrom(sequence, replacement.charAt(0));
    }

    String string = sequence.toString();
    int pos = indexIn(string);
    if (pos == -1) {
      return string;
    }

    int len = string.length();
    StringBuilder buf = new StringBuilder((int) (len * 1.5) + 16);

    int oldpos = 0;
    do {
      buf.append(string, oldpos, pos);
      buf.append(replacement);
      oldpos = pos + 1;
      pos = indexIn(string, oldpos);
    } while (pos != -1);

    buf.append(string, oldpos, len);
    return buf.toString();
  }

  /**
   * Returns a substring of the input character sequence that omits all
   * characters this matcher matches from the beginning and from the end of the
   * string. For example: <pre> {@code
   *
   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
   *
   * ... returns {@code "cat"}.
   *
   * <p>Note that<pre>   {@code
   *
   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
   *
   * ... is equivalent to {@link String#trim()}.
   */
  public String trimFrom(CharSequence sequence) {
    int len = sequence.length();
    int first;
    int last;

    for (first = 0; first < len; first++) {
      if (!matches(sequence.charAt(first))) {
        break;
      }
    }
    for (last = len - 1; last > first; last--) {
      if (!matches(sequence.charAt(last))) {
        break;
      }
    }

    return sequence.subSequence(first, last + 1).toString();
  }

  /**
   * Returns a substring of the input character sequence that omits all
   * characters this matcher matches from the beginning of the
   * string. For example: <pre> {@code
   *
   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
   *
   * ... returns {@code "catbab"}.
   */
  public String trimLeadingFrom(CharSequence sequence) {
    int len = sequence.length();
    int first;

    for (first = 0; first < len; first++) {
      if (!matches(sequence.charAt(first))) {
        break;
      }
    }

    return sequence.subSequence(first, len).toString();
  }

  /**
   * Returns a substring of the input character sequence that omits all
   * characters this matcher matches from the end of the
   * string. For example: <pre> {@code
   *
   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
   *
   * ... returns {@code "abacat"}.
   */
  public String trimTrailingFrom(CharSequence sequence) {
    int len = sequence.length();
    int last;

    for (last = len - 1; last >= 0; last--) {
      if (!matches(sequence.charAt(last))) {
        break;
      }
    }

    return sequence.subSequence(0, last + 1).toString();
  }

  /**
   * Returns a string copy of the input character sequence, with each group of
   * consecutive characters that match this matcher replaced by a single
   * replacement character. For example: <pre>   {@code
   *
   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
   *
   * ... returns {@code "b-p-r"}.
   *
   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find
   * the first matching character, then iterates the remainder of the sequence
   * calling {@link #matches(char)} for each character.
   *
   * @param sequence the character sequence to replace matching groups of
   *     characters in
   * @param replacement the character to append to the result string in place of
   *     each group of matching characters in {@code sequence}
   * @return the new string
   */
  public String collapseFrom(CharSequence sequence, char replacement) {
    int first = indexIn(sequence);
    if (first == -1) {
      return sequence.toString();
    }

    // TODO: this implementation can probably be made faster.

    StringBuilder builder = new StringBuilder(sequence.length())
        .append(sequence.subSequence(0, first))
        .append(replacement);
    boolean in = true;
    for (int i = first + 1; i < sequence.length(); i++) {
      char c = sequence.charAt(i);
      if (apply(c)) {
        if (!in) {
          builder.append(replacement);
          in = true;
        }
      } else {
        builder.append(c);
        in = false;
      }
    }
    return builder.toString();
  }

  /**
   * Collapses groups of matching characters exactly as {@link #collapseFrom}
   * does, except that groups of matching characters at the start or end of the
   * sequence are removed without replacement.
   */
  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
    int first = negate().indexIn(sequence);
    if (first == -1) {
      return ""; // everything matches. nothing's left.
    }
    StringBuilder builder = new StringBuilder(sequence.length());
    boolean inMatchingGroup = false;
    for (int i = first; i < sequence.length(); i++) {
      char c = sequence.charAt(i);
      if (apply(c)) {
        inMatchingGroup = true;
      } else {
        if (inMatchingGroup) {
          builder.append(replacement);
          inMatchingGroup = false;
        }
        builder.append(c);
      }
    }
    return builder.toString();
  }

  // Predicate interface

  /**
   * Returns {@code true} if this matcher matches the given character.
   *
   * @throws NullPointerException if {@code character} is null
   */
  /*@Override*/ public boolean apply(Character character) {
    return matches(character);
  }
}

登录后可以享受更多权益