/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 322 by ph10, Wed Mar 5 17:14:08 2008 UTC revision 356 by ph10, Tue Jul 8 14:14:34 2008 UTC
# Line 59  Arg RE::no_arg((void*)NULL); Line 59  Arg RE::no_arg((void*)NULL);
59    
60  // This is for ABI compatibility with old versions of pcre (pre-7.6),  // This is for ABI compatibility with old versions of pcre (pre-7.6),
61  // which defined a global no_arg variable instead of putting it in the  // which defined a global no_arg variable instead of putting it in the
62  // RE class.  This works on GCC >= 3, at least.  We could probably have  // RE class.  This works on GCC >= 3, at least.  It definitely works
63  // a more inclusive test if we ever needed it.  // for ELF, but may not for other object formats (Mach-O, for
64  #if defined(__GNUC__) && __GNUC__ >= 3  // instance, does not support aliases.)  We could probably have a more
65  extern Arg no_arg __attribute__((alias("_ZN7pcrecpp2RE6no_argE")));  // inclusive test if we ever needed it.  (Note that not only the
66    // __attribute__ syntax, but also __USER_LABEL_PREFIX__, are
67    // gnu-specific.)
68    #if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__)
69    # define ULP_AS_STRING(x)            ULP_AS_STRING_INTERNAL(x)
70    # define ULP_AS_STRING_INTERNAL(x)   #x
71    # define USER_LABEL_PREFIX_STR       ULP_AS_STRING(__USER_LABEL_PREFIX__)
72    extern Arg no_arg
73      __attribute__((alias(USER_LABEL_PREFIX_STR "_ZN7pcrecpp2RE6no_argE")));
74  #endif  #endif
75    
76  // If a regular expression has no error, its error_ field points here  // If a regular expression has no error, its error_ field points here
# Line 364  static int NewlineMode(int pcre_options) Line 372  static int NewlineMode(int pcre_options)
372      else if (newline == -2)      else if (newline == -2)
373        newline_mode = PCRE_NEWLINE_ANYCRLF;        newline_mode = PCRE_NEWLINE_ANYCRLF;
374      else      else
375        assert("" == "Unexpected return value from pcre_config(NEWLINE)");        assert(NULL == "Unexpected return value from pcre_config(NEWLINE)");
376    }    }
377    return newline_mode;    return newline_mode;
378  }  }
# Line 449  bool RE::Extract(const StringPiece& rewr Line 457  bool RE::Extract(const StringPiece& rewr
457    // Note that it's legal to escape a character even if it has no    // Note that it's legal to escape a character even if it has no
458    // special meaning in a regular expression -- so this function does    // special meaning in a regular expression -- so this function does
459    // that.  (This also makes it identical to the perl function of the    // that.  (This also makes it identical to the perl function of the
460    // same name; see `perldoc -f quotemeta`.)    // same name; see `perldoc -f quotemeta`.)  The one exception is
461      // escaping NUL: rather than doing backslash + NUL, like perl does,
462      // we do '\0', because pcre itself doesn't take embedded NUL chars.
463    for (int ii = 0; ii < unquoted.size(); ++ii) {    for (int ii = 0; ii < unquoted.size(); ++ii) {
464      // Note that using 'isalnum' here raises the benchmark time from      // Note that using 'isalnum' here raises the benchmark time from
465      // 32ns to 58ns:      // 32ns to 58ns:
466      if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&      if (unquoted[ii] == '\0') {
467          (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&        result += "\\0";
468          (unquoted[ii] < '0' || unquoted[ii] > '9') &&      } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
469          unquoted[ii] != '_' &&                 (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
470          // If this is the part of a UTF8 or Latin1 character, we need                 (unquoted[ii] < '0' || unquoted[ii] > '9') &&
471          // to copy this byte without escaping.  Experimentally this is                 unquoted[ii] != '_' &&
472          // what works correctly with the regexp library.                 // If this is the part of a UTF8 or Latin1 character, we need
473          !(unquoted[ii] & 128)) {                 // to copy this byte without escaping.  Experimentally this is
474                   // what works correctly with the regexp library.
475                   !(unquoted[ii] & 128)) {
476        result += '\\';        result += '\\';
477          result += unquoted[ii];
478        } else {
479          result += unquoted[ii];
480      }      }
     result += unquoted[ii];  
481    }    }
482    
483    return result;    return result;
# Line 591  bool RE::Rewrite(string *out, const Stri Line 605  bool RE::Rewrite(string *out, const Stri
605          if (start >= 0)          if (start >= 0)
606            out->append(text.data() + start, vec[2 * n + 1] - start);            out->append(text.data() + start, vec[2 * n + 1] - start);
607        } else if (c == '\\') {        } else if (c == '\\') {
608          out->push_back('\\');          *out += '\\';
609        } else {        } else {
610          //fprintf(stderr, "invalid rewrite pattern: %.*s\n",          //fprintf(stderr, "invalid rewrite pattern: %.*s\n",
611          //        rewrite.size(), rewrite.data());          //        rewrite.size(), rewrite.data());
612          return false;          return false;
613        }        }
614      } else {      } else {
615        out->push_back(c);        *out += c;
616      }      }
617    }    }
618    return true;    return true;

Legend:
Removed from v.322  
changed lines
  Added in v.356

  ViewVC Help
Powered by ViewVC 1.1.5