/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 269 by ph10, Fri Nov 16 16:22:24 2007 UTC revision 286 by ph10, Mon Dec 17 14:46:11 2007 UTC
# Line 301  static const char error_texts[] = Line 301  static const char error_texts[] =
301    /* 60 */    /* 60 */
302    "(*VERB) not recognized\0"    "(*VERB) not recognized\0"
303    "number is too big\0"    "number is too big\0"
304    "subpattern name expected after (?&\0"    "subpattern name expected\0"
305    "digit expected after (?+";    "digit expected after (?+";
306    
307    
# Line 498  ptr--;                            /* Set Line 498  ptr--;                            /* Set
498    
499  if (c == 0) *errorcodeptr = ERR1;  if (c == 0) *errorcodeptr = ERR1;
500    
501  /* Non-alphamerics are literals. For digits or letters, do an initial lookup in  /* Non-alphanumerics are literals. For digits or letters, do an initial lookup
502  a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
503  Otherwise further processing may be required. */  Otherwise further processing may be required. */
504    
505  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
506  else if (c < '0' || c > 'z') {}                           /* Not alphameric */  else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */
507  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - '0']) != 0) c = i;
508    
509  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
510  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
511  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
512  #endif  #endif
513    
# Line 724  else Line 724  else
724      break;      break;
725    
726      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
727      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphanumeric following \ is an error if PCRE_EXTRA was set;
728      for Perl compatibility, it is a literal. This code looks a bit odd, but      otherwise, for Perl compatibility, it is a literal. This code looks a bit
729      there used to be some cases other than the default, and there may be again      odd, but there used to be some cases other than the default, and there may
730      in future, so I haven't "optimized" it. */      be again in future, so I haven't "optimized" it. */
731    
732      default:      default:
733      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
# Line 1508  for (;;) Line 1508  for (;;)
1508  can match the empty string or not. It is called from could_be_empty()  can match the empty string or not. It is called from could_be_empty()
1509  below and from compile_branch() when checking for an unlimited repeat of a  below and from compile_branch() when checking for an unlimited repeat of a
1510  group that can match nothing. Note that first_significant_code() skips over  group that can match nothing. Note that first_significant_code() skips over
1511  assertions. If we hit an unclosed bracket, we return "empty" - this means we've  backward and negative forward assertions when its final argument is TRUE. If we
1512  struck an inner bracket whose current branch will already have been scanned.  hit an unclosed bracket, we return "empty" - this means we've struck an inner
1513    bracket whose current branch will already have been scanned.
1514    
1515  Arguments:  Arguments:
1516    code        points to start of search    code        points to start of search
# Line 1531  for (code = first_significant_code(code Line 1532  for (code = first_significant_code(code
1532    
1533    c = *code;    c = *code;
1534    
1535      /* Skip over forward assertions; the other assertions are skipped by
1536      first_significant_code() with a TRUE final argument. */
1537    
1538      if (c == OP_ASSERT)
1539        {
1540        do code += GET(code, 1); while (*code == OP_ALT);
1541        c = *code;
1542        continue;
1543        }
1544    
1545    /* Groups with zero repeats can of course be empty; skip them. */    /* Groups with zero repeats can of course be empty; skip them. */
1546    
1547    if (c == OP_BRAZERO || c == OP_BRAMINZERO)    if (c == OP_BRAZERO || c == OP_BRAMINZERO)
# Line 2385  req_caseopt = ((options & PCRE_CASELESS) Line 2396  req_caseopt = ((options & PCRE_CASELESS)
2396  for (;; ptr++)  for (;; ptr++)
2397    {    {
2398    BOOL negate_class;    BOOL negate_class;
2399    BOOL should_flip_negation;    BOOL should_flip_negation;
2400    BOOL possessive_quantifier;    BOOL possessive_quantifier;
2401    BOOL is_quantifier;    BOOL is_quantifier;
2402    BOOL is_recurse;    BOOL is_recurse;
# Line 2634  for (;; ptr++) Line 2645  for (;; ptr++)
2645        else break;        else break;
2646        }        }
2647    
2648      /* If a class contains a negative special such as \S, we need to flip the      /* If a class contains a negative special such as \S, we need to flip the
2649      negation flag at the end, so that support for characters > 255 works      negation flag at the end, so that support for characters > 255 works
2650      correctly (they are all included in the class). */      correctly (they are all included in the class). */
2651    
2652      should_flip_negation = FALSE;      should_flip_negation = FALSE;
# Line 2712  for (;; ptr++) Line 2723  for (;; ptr++)
2723          if (*ptr == '^')          if (*ptr == '^')
2724            {            {
2725            local_negate = TRUE;            local_negate = TRUE;
2726            should_flip_negation = TRUE;  /* Note negative special */            should_flip_negation = TRUE;  /* Note negative special */
2727            ptr++;            ptr++;
2728            }            }
2729    
# Line 2787  for (;; ptr++) Line 2798  for (;; ptr++)
2798          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
2799          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
2800    
2801          if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */          if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */
2802          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
2803          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
2804          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
# Line 2815  for (;; ptr++) Line 2826  for (;; ptr++)
2826              continue;              continue;
2827    
2828              case ESC_D:              case ESC_D:
2829              should_flip_negation = TRUE;              should_flip_negation = TRUE;
2830              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
2831              continue;              continue;
2832    
# Line 2824  for (;; ptr++) Line 2835  for (;; ptr++)
2835              continue;              continue;
2836    
2837              case ESC_W:              case ESC_W:
2838              should_flip_negation = TRUE;              should_flip_negation = TRUE;
2839              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
2840              continue;              continue;
2841    
# Line 2834  for (;; ptr++) Line 2845  for (;; ptr++)
2845              continue;              continue;
2846    
2847              case ESC_S:              case ESC_S:
2848              should_flip_negation = TRUE;              should_flip_negation = TRUE;
2849              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
2850              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
2851              continue;              continue;
2852    
             case ESC_E: /* Perl ignores an orphan \E */  
             continue;  
   
2853              default:    /* Not recognized; fall through */              default:    /* Not recognized; fall through */
2854              break;      /* Need "default" setting to stop compiler warning. */              break;      /* Need "default" setting to stop compiler warning. */
2855              }              }
# Line 3076  for (;; ptr++) Line 3084  for (;; ptr++)
3084            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3085            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
3086    
3087            /* \b is backslash; \X is literal X; \R is literal R; any other            /* \b is backspace; \X is literal X; \R is literal R; any other
3088            special means the '-' was literal */            special means the '-' was literal */
3089    
3090            if (d < 0)            if (d < 0)
# Line 3340  we set the flag only if there is a liter Line 3348  we set the flag only if there is a liter
3348      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
3349    
3350      /* If there are characters with values > 255, we have to compile an      /* If there are characters with values > 255, we have to compile an
3351      extended class, with its own opcode, unless there was a negated special      extended class, with its own opcode, unless there was a negated special
3352      such as \S in the class, because in that case all characters > 255 are in      such as \S in the class, because in that case all characters > 255 are in
3353      the class, so any that were explicitly given as well can be ignored. If      the class, so any that were explicitly given as well can be ignored. If
3354      (when there are explicit characters > 255 that must be listed) there are no      (when there are explicit characters > 255 that must be listed) there are no
3355      characters < 256, we can omit the bitmap in the actual compiled code. */      characters < 256, we can omit the bitmap in the actual compiled code. */
3356    
# Line 3373  we set the flag only if there is a liter Line 3381  we set the flag only if there is a liter
3381        }        }
3382  #endif  #endif
3383    
3384      /* If there are no characters > 255, set the opcode to OP_CLASS or      /* If there are no characters > 255, set the opcode to OP_CLASS or
3385      OP_NCLASS, depending on whether the whole class was negated and whether      OP_NCLASS, depending on whether the whole class was negated and whether
3386      there were negative specials such as \S in the class. Then copy the 32-byte      there were negative specials such as \S in the class. Then copy the 32-byte
3387      map into the code vector, negating it if necessary. */      map into the code vector, negating it if necessary. */
3388    
3389      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
3390      if (negate_class)      if (negate_class)
3391        {        {
# Line 4021  we set the flag only if there is a liter Line 4029  we set the flag only if there is a liter
4029        int len;        int len;
4030        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
4031            *tempcode == OP_NOTEXACT)            *tempcode == OP_NOTEXACT)
4032          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += _pcre_OP_lengths[*tempcode] +
4033              ((*tempcode == OP_TYPEEXACT &&
4034                 (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
4035        len = code - tempcode;        len = code - tempcode;
4036        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4037          {          {
# Line 4248  we set the flag only if there is a liter Line 4258  we set the flag only if there is a liter
4258              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4259              goto FAILED;              goto FAILED;
4260              }              }
4261            if (refsign == '-')            recno = (refsign == '-')?
4262                cd->bracount - recno + 1 : recno +cd->bracount;
4263              if (recno <= 0 || recno > cd->final_bracount)
4264              {              {
4265              recno = cd->bracount - recno + 1;              *errorcodeptr = ERR15;
4266              if (recno <= 0)              goto FAILED;
               {  
               *errorcodeptr = ERR15;  
               goto FAILED;  
               }  
4267              }              }
           else recno += cd->bracount;  
4268            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4269            break;            break;
4270            }            }
# Line 4329  we set the flag only if there is a liter Line 4336  we set the flag only if there is a liter
4336            skipbytes = 1;            skipbytes = 1;
4337            }            }
4338    
4339          /* Check for the "name" actually being a subpattern number. */          /* Check for the "name" actually being a subpattern number. We are
4340            in the second pass here, so final_bracount is set. */
4341    
4342          else if (recno > 0)          else if (recno > 0 && recno <= cd->final_bracount)
4343            {            {
4344            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4345            }            }
# Line 4525  we set the flag only if there is a liter Line 4533  we set the flag only if there is a liter
4533    
4534          /* We come here from the Python syntax above that handles both          /* We come here from the Python syntax above that handles both
4535          references (?P=name) and recursion (?P>name), as well as falling          references (?P=name) and recursion (?P>name), as well as falling
4536          through from the Perl recursion syntax (?&name). */          through from the Perl recursion syntax (?&name). We also come here from
4537            the Perl \k<name> or \k'name' back reference syntax and the \k{name}
4538            .NET syntax. */
4539    
4540          NAMED_REF_OR_RECURSE:          NAMED_REF_OR_RECURSE:
4541          name = ++ptr;          name = ++ptr;
# Line 4541  we set the flag only if there is a liter Line 4551  we set the flag only if there is a liter
4551              {              {
4552              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
4553              goto FAILED;              goto FAILED;
4554              }              }
4555            if (*ptr != terminator)            if (*ptr != terminator)
4556              {              {
4557              *errorcodeptr = ERR42;              *errorcodeptr = ERR42;
# Line 4555  we set the flag only if there is a liter Line 4565  we set the flag only if there is a liter
4565            recno = 0;            recno = 0;
4566            }            }
4567    
4568          /* In the real compile, seek the name in the table. We check the name          /* In the real compile, seek the name in the table. We check the name
4569          first, and then check that we have reached the end of the name in the          first, and then check that we have reached the end of the name in the
4570          table. That way, if the name that is longer than any in the table,          table. That way, if the name that is longer than any in the table,
4571          the comparison will fail without reading beyond the table entry. */          the comparison will fail without reading beyond the table entry. */
4572    
# Line 4566  we set the flag only if there is a liter Line 4576  we set the flag only if there is a liter
4576            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
4577              {              {
4578              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
4579                  slot[2+namelen] == 0)                  slot[2+namelen] == 0)
4580                break;                break;
4581              slot += cd->name_entry_size;              slot += cd->name_entry_size;
4582              }              }
# Line 4604  we set the flag only if there is a liter Line 4614  we set the flag only if there is a liter
4614            {            {
4615            const uschar *called;            const uschar *called;
4616    
4617            if ((refsign = *ptr) == '+')            if ((refsign = *ptr) == '+')
4618              {              {
4619              ptr++;              ptr++;
4620              if ((digitab[*ptr] & ctype_digit) == 0)              if ((digitab[*ptr] & ctype_digit) == 0)
4621                {                {
4622                *errorcodeptr = ERR63;                *errorcodeptr = ERR63;
4623                goto FAILED;                goto FAILED;
4624                }                }
4625              }              }
4626            else if (refsign == '-')            else if (refsign == '-')
4627              {              {
4628              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
# Line 5939  to compile parts of the pattern into; th Line 5949  to compile parts of the pattern into; th
5949  no longer needed, so hopefully this workspace will never overflow, though there  no longer needed, so hopefully this workspace will never overflow, though there
5950  is a test for its doing so. */  is a test for its doing so. */
5951    
5952  cd->bracount = 0;  cd->bracount = cd->final_bracount = 0;
5953  cd->names_found = 0;  cd->names_found = 0;
5954  cd->name_entry_size = 0;  cd->name_entry_size = 0;
5955  cd->name_table = NULL;  cd->name_table = NULL;
# Line 6016  field. Reset the bracket count and the n Line 6026  field. Reset the bracket count and the n
6026  field; this time it's used for remembering forward references to subpatterns.  field; this time it's used for remembering forward references to subpatterns.
6027  */  */
6028    
6029    cd->final_bracount = cd->bracount;  /* Save for checking forward references */
6030  cd->bracount = 0;  cd->bracount = 0;
6031  cd->names_found = 0;  cd->names_found = 0;
6032  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (uschar *)re + re->name_table_offset;

Legend:
Removed from v.269  
changed lines
  Added in v.286

  ViewVC Help
Powered by ViewVC 1.1.5