/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 202 by ph10, Fri Aug 3 09:44:26 2007 UTC revision 211 by ph10, Thu Aug 9 09:52:43 2007 UTC
# Line 140  static const short int escapes[] = { Line 140  static const short int escapes[] = {
140  #endif  #endif
141    
142    
143    /* Table of special "verbs" like (*PRUNE) */
144    
145    typedef struct verbitem {
146      const char *name;
147      int   len;
148      int   op;
149    } verbitem;
150    
151    static verbitem verbs[] = {
152      { "ACCEPT", 6, OP_ACCEPT },
153      { "COMMIT", 6, OP_COMMIT },
154      { "F",      1, OP_FAIL },
155      { "FAIL",   4, OP_FAIL },
156      { "PRUNE",  5, OP_PRUNE },
157      { "SKIP",   4, OP_SKIP  },
158      { "THEN",   4, OP_THEN  }
159    };
160    
161    static int verbcount = sizeof(verbs)/sizeof(verbitem);
162    
163    
164  /* Tables of names of POSIX character classes and their lengths. The list is  /* Tables of names of POSIX character classes and their lengths. The list is
165  terminated by a zero length entry. The first three must be alpha, lower, upper,  terminated by a zero length entry. The first three must be alpha, lower, upper,
166  as this is assumed for handling case independence. */  as this is assumed for handling case independence. */
# Line 258  static const char *error_texts[] = { Line 279  static const char *error_texts[] = {
279    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed",
280    "inconsistent NEWLINE options",    "inconsistent NEWLINE options",
281    "\\g is not followed by a braced name or an optionally braced non-zero number",    "\\g is not followed by a braced name or an optionally braced non-zero number",
282    "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"    "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number",
283      "(*VERB) with an argument is not supported",
284      /* 60 */
285      "(*VERB) not recognized"
286  };  };
287    
288    
# Line 941  for (; *ptr != 0; ptr++) Line 965  for (; *ptr != 0; ptr++)
965    /* An opening parens must now be a real metacharacter */    /* An opening parens must now be a real metacharacter */
966    
967    if (*ptr != '(') continue;    if (*ptr != '(') continue;
968    if (ptr[1] != '?')    if (ptr[1] != '?' && ptr[1] != '*')
969      {      {
970      count++;      count++;
971      if (name == NULL && count == lorn) return count;      if (name == NULL && count == lorn) return count;
# Line 1409  for (code = first_significant_code(code Line 1433  for (code = first_significant_code(code
1433    
1434    /* For other groups, scan the branches. */    /* For other groups, scan the branches. */
1435    
1436    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
1437      {      {
1438      BOOL empty_branch;      BOOL empty_branch;
1439      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2453  for (;; ptr++) Line 2477  for (;; ptr++)
2477        goto FAILED;        goto FAILED;
2478        }        }
2479    
2480      /* If the first character is '^', set the negation flag and skip it. */      /* If the first character is '^', set the negation flag and skip it. Also,
2481        if the first few characters (either before or after ^) are \Q\E or \E we
2482        skip them too. This makes for compatibility with Perl. */
2483    
2484      if ((c = *(++ptr)) == '^')      negate_class = FALSE;
2485        for (;;)
2486        {        {
       negate_class = TRUE;  
2487        c = *(++ptr);        c = *(++ptr);
2488        }        if (c == '\\')
2489      else          {
2490        {          if (ptr[1] == 'E') ptr++;
2491        negate_class = FALSE;            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
2492                else break;
2493            }
2494          else if (!negate_class && c == '^')
2495            negate_class = TRUE;
2496          else break;
2497        }        }
2498    
2499      /* Keep a count of chars with values < 256 so that we can optimize the case      /* Keep a count of chars with values < 256 so that we can optimize the case
# Line 2603  for (;; ptr++) Line 2634  for (;; ptr++)
2634        of the specials, which just set a flag. The sequence \b is a special        of the specials, which just set a flag. The sequence \b is a special
2635        case. Inside a class (and only there) it is treated as backspace.        case. Inside a class (and only there) it is treated as backspace.
2636        Elsewhere it marks a word boundary. Other escapes have preset maps ready        Elsewhere it marks a word boundary. Other escapes have preset maps ready
2637        to or into the one we are building. We assume they have more than one        to 'or' into the one we are building. We assume they have more than one
2638        character in them, so set class_charcount bigger than one. */        character in them, so set class_charcount bigger than one. */
2639    
2640        if (c == '\\')        if (c == '\\')
# Line 3853  for (;; ptr++) Line 3884  for (;; ptr++)
3884      /* ===================================================================*/      /* ===================================================================*/
3885      /* Start of nested parenthesized sub-expression, or comment or lookahead or      /* Start of nested parenthesized sub-expression, or comment or lookahead or
3886      lookbehind or option setting or condition or all the other extended      lookbehind or option setting or condition or all the other extended
3887      parenthesis forms. First deal with the specials; all are introduced by ?,      parenthesis forms.  */
     and the appearance of any of them means that this is not a capturing  
     group. */  
3888    
3889      case '(':      case '(':
3890      newoptions = options;      newoptions = options;
# Line 3864  for (;; ptr++) Line 3893  for (;; ptr++)
3893      save_hwm = cd->hwm;      save_hwm = cd->hwm;
3894      reset_bracount = FALSE;      reset_bracount = FALSE;
3895    
3896      if (*(++ptr) == '?')      /* First deal with various "verbs" that can be introduced by '*'. */
3897    
3898        if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
3899          {
3900          int i, namelen;
3901          const uschar *name = ++ptr;
3902          previous = NULL;
3903          while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
3904          if (*ptr == ':')
3905            {
3906            *errorcodeptr = ERR59;   /* Not supported */
3907            goto FAILED;
3908            }
3909          if (*ptr != ')')
3910            {
3911            *errorcodeptr = ERR60;
3912            goto FAILED;
3913            }
3914          namelen = ptr - name;
3915          for (i = 0; i < verbcount; i++)
3916            {
3917            if (namelen == verbs[i].len &&
3918                strncmp((char *)name, verbs[i].name, namelen) == 0)
3919              {
3920              *code = verbs[i].op;
3921              if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
3922              break;
3923              }
3924            }
3925          if (i < verbcount) continue;
3926          *errorcodeptr = ERR60;
3927          goto FAILED;
3928          }
3929    
3930        /* Deal with the extended parentheses; all are introduced by '?', and the
3931        appearance of any of them means that this is not a capturing group. */
3932    
3933        else if (*ptr == '?')
3934        {        {
3935        int i, set, unset, namelen;        int i, set, unset, namelen;
3936        int *optset;        int *optset;
# Line 4106  for (;; ptr++) Line 4172  for (;; ptr++)
4172    
4173          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4174          case '!':                 /* Negative lookahead */          case '!':                 /* Negative lookahead */
         bravalue = OP_ASSERT_NOT;  
4175          ptr++;          ptr++;
4176            if (*ptr == ')')          /* Optimize (?!) */
4177              {
4178              *code++ = OP_FAIL;
4179              previous = NULL;
4180              continue;
4181              }
4182            bravalue = OP_ASSERT_NOT;
4183          break;          break;
4184    
4185    
# Line 5699  cd->start_code = codestart; Line 5771  cd->start_code = codestart;
5771  cd->hwm = cworkspace;  cd->hwm = cworkspace;
5772  cd->req_varyopt = 0;  cd->req_varyopt = 0;
5773  cd->nopartial = FALSE;  cd->nopartial = FALSE;
5774    cd->had_accept = FALSE;
5775    
5776  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
5777  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
# Line 5713  re->top_bracket = cd->bracount; Line 5786  re->top_bracket = cd->bracount;
5786  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
5787    
5788  if (cd->nopartial) re->options |= PCRE_NOPARTIAL;  if (cd->nopartial) re->options |= PCRE_NOPARTIAL;
5789    if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */
5790    
5791  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
5792    

Legend:
Removed from v.202  
changed lines
  Added in v.211

  ViewVC Help
Powered by ViewVC 1.1.5