普通文本  |  224行  |  5.58 KB

// Copyright 2006 The RE2 Authors.  All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Test character class manipulations.

#include "util/test.h"
#include "re2/regexp.h"

namespace re2 {

struct CCTest {
  struct {
    Rune lo;
    Rune hi;
  } add[10];
  int remove;
  struct {
    Rune lo;
    Rune hi;
  } final[10];
};

static CCTest tests[] = {
  { { { 10, 20 }, {-1} }, -1,
    { { 10, 20 }, {-1} } },

  { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
    { { 10, 30 }, {-1} } },

  { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
    { { 10, 40 }, {-1} } },

  { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
    { { 0, 50 }, {-1} } },

  { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },

  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },

  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },

  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
    { { 5, 25 }, {-1} } },

  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
    { { 10, 23 }, {-1} } },

  // These check boundary cases during negation.
  { { { 0, Runemax }, {-1} }, -1,
    { { 0, Runemax }, {-1} } },

  { { { 0, 50 }, {-1} }, -1,
    { { 0, 50 }, {-1} } },

  { { { 50, Runemax }, {-1} }, -1,
    { { 50, Runemax }, {-1} } },

  // Check RemoveAbove.
  { { { 50, Runemax }, {-1} }, 255,
    { { 50, 255 }, {-1} } },

  { { { 50, Runemax }, {-1} }, 65535,
    { { 50, 65535 }, {-1} } },

  { { { 50, Runemax }, {-1} }, Runemax,
    { { 50, Runemax }, {-1} } },

  { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
    { { 50, 60 }, { 250, 255 }, {-1} } },

  { { { 50, 60 }, {-1} }, 255,
    { { 50, 60 }, {-1} } },

  { { { 350, 360 }, {-1} }, 255,
    { {-1} } },

  { { {-1} }, 255,
    { {-1} } },
};

template<class CharClass>
static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
  if (t == NULL) {
    printf("\t%s:", desc);
  } else {
    printf("\n");
    printf("CharClass added: [%s]", desc);
    for (int k = 0; t->add[k].lo >= 0; k++)
      printf(" %d-%d", t->add[k].lo, t->add[k].hi);
    printf("\n");
    if (t->remove >= 0)
      printf("Removed > %d\n", t->remove);
    printf("\twant:");
    for (int k = 0; t->final[k].lo >= 0; k++)
      printf(" %d-%d", t->final[k].lo, t->final[k].hi);
    printf("\n");
    printf("\thave:");
  }

  for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
    printf(" %d-%d", it->lo, it->hi);
  printf("\n");
}

bool ShouldContain(CCTest *t, int x) {
  for (int j = 0; t->final[j].lo >= 0; j++)
    if (t->final[j].lo <= x && x <= t->final[j].hi)
      return true;
  return false;
}

// Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.

CharClass* Negate(CharClass *cc) {
  return cc->Negate();
}

void Delete(CharClass* cc) {
  cc->Delete();
}

CharClassBuilder* Negate(CharClassBuilder* cc) {
  CharClassBuilder* ncc = cc->Copy();
  ncc->Negate();
  return ncc;
}

void Delete(CharClassBuilder* cc) {
  delete cc;
}

template<class CharClass>
bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
  typename CharClass::iterator it = cc->begin();
  int size = 0;
  for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
    if (it == cc->end() ||
        it->lo != t->final[j].lo ||
        it->hi != t->final[j].hi) {
      Broke(desc, t, cc);
      return false;
    }
    size += it->hi - it->lo + 1;
  }
  if (it != cc->end()) {
    Broke(desc, t, cc);
    return false;
  }
  if (cc->size() != size) {
    Broke(desc, t, cc);
    printf("wrong size: want %d have %d\n", size, cc->size());
    return false;
  }

  for (int j = 0; j < 101; j++) {
    if (j == 100)
      j = Runemax;
    if (ShouldContain(t, j) != cc->Contains(j)) {
      Broke(desc, t, cc);
      printf("want contains(%d)=%d, got %d\n",
             j, ShouldContain(t, j), cc->Contains(j));
      return false;
    }
  }

  CharClass* ncc = Negate(cc);
  for (int j = 0; j < 101; j++) {
    if (j == 100)
      j = Runemax;
    if (ShouldContain(t, j) == ncc->Contains(j)) {
      Broke(desc, t, cc);
      Broke("ncc", NULL, ncc);
      printf("want ncc contains(%d)!=%d, got %d\n",
             j, ShouldContain(t, j), ncc->Contains(j));
      Delete(ncc);
      return false;
    }
    if (ncc->size() != Runemax+1 - cc->size()) {
      Broke(desc, t, cc);
      Broke("ncc", NULL, ncc);
      printf("ncc size should be %d is %d\n",
             Runemax+1 - cc->size(), ncc->size());
      Delete(ncc);
      return false;
    }
  }
  Delete(ncc);
  return true;
}

TEST(TestCharClassBuilder, Adds) {
  int nfail = 0;
  for (int i = 0; i < arraysize(tests); i++) {
    CharClassBuilder ccb;
    CCTest* t = &tests[i];
    for (int j = 0; t->add[j].lo >= 0; j++)
      ccb.AddRange(t->add[j].lo, t->add[j].hi);
    if (t->remove >= 0)
      ccb.RemoveAbove(t->remove);
    if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
      nfail++;
    CharClass* cc = ccb.GetCharClass();
    if (!CorrectCC(cc, t, "before copy (CharClass)"))
      nfail++;
    cc->Delete();

    CharClassBuilder *ccb1 = ccb.Copy();
    if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
      nfail++;
    cc = ccb.GetCharClass();
    if (!CorrectCC(cc, t, "after copy (CharClass)"))
      nfail++;
    cc->Delete();
    delete ccb1;
  }
  EXPECT_EQ(nfail, 0);
}

}  // namespace re2