/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 269 by ph10, Fri Nov 16 16:22:24 2007 UTC revision 285 by ph10, Wed Dec 12 17:03:50 2007 UTC
# Line 301  static const char error_texts[] = Line 301  static const char error_texts[] =
301    /* 60 */    /* 60 */
302    "(*VERB) not recognized\0"    "(*VERB) not recognized\0"
303    "number is too big\0"    "number is too big\0"
304    "subpattern name expected after (?&\0"    "subpattern name expected\0"
305    "digit expected after (?+";    "digit expected after (?+";
306    
307    
# Line 498  ptr--;                            /* Set Line 498  ptr--;                            /* Set
498    
499  if (c == 0) *errorcodeptr = ERR1;  if (c == 0) *errorcodeptr = ERR1;
500    
501  /* Non-alphamerics are literals. For digits or letters, do an initial lookup in  /* Non-alphanumerics are literals. For digits or letters, do an initial lookup
502  a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
503  Otherwise further processing may be required. */  Otherwise further processing may be required. */
504    
505  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
506  else if (c < '0' || c > 'z') {}                           /* Not alphameric */  else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */
507  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - '0']) != 0) c = i;
508    
509  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
510  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
511  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
512  #endif  #endif
513    
# Line 724  else Line 724  else
724      break;      break;
725    
726      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
727      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphanumeric following \ is an error if PCRE_EXTRA was set;
728      for Perl compatibility, it is a literal. This code looks a bit odd, but      otherwise, for Perl compatibility, it is a literal. This code looks a bit
729      there used to be some cases other than the default, and there may be again      odd, but there used to be some cases other than the default, and there may
730      in future, so I haven't "optimized" it. */      be again in future, so I haven't "optimized" it. */
731    
732      default:      default:
733      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
# Line 1508  for (;;) Line 1508  for (;;)
1508  can match the empty string or not. It is called from could_be_empty()  can match the empty string or not. It is called from could_be_empty()
1509  below and from compile_branch() when checking for an unlimited repeat of a  below and from compile_branch() when checking for an unlimited repeat of a
1510  group that can match nothing. Note that first_significant_code() skips over  group that can match nothing. Note that first_significant_code() skips over
1511  assertions. If we hit an unclosed bracket, we return "empty" - this means we've  backward and negative forward assertions when its final argument is TRUE. If we
1512  struck an inner bracket whose current branch will already have been scanned.  hit an unclosed bracket, we return "empty" - this means we've struck an inner
1513    bracket whose current branch will already have been scanned.
1514    
1515  Arguments:  Arguments:
1516    code        points to start of search    code        points to start of search
# Line 1530  for (code = first_significant_code(code Line 1531  for (code = first_significant_code(code
1531    const uschar *ccode;    const uschar *ccode;
1532    
1533    c = *code;    c = *code;
1534    
1535      /* Skip over forward assertions; the other assertions are skipped by
1536      first_significant_code() with a TRUE final argument. */
1537    
1538      if (c == OP_ASSERT)
1539        {
1540        do code += GET(code, 1); while (*code == OP_ALT);
1541        c = *code;
1542        continue;
1543        }
1544    
1545    /* Groups with zero repeats can of course be empty; skip them. */    /* Groups with zero repeats can of course be empty; skip them. */
1546    
# Line 2787  for (;; ptr++) Line 2798  for (;; ptr++)
2798          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
2799          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
2800    
2801          if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */          if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */
2802          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
2803          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
2804          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
# Line 2839  for (;; ptr++) Line 2850  for (;; ptr++)
2850              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
2851              continue;              continue;
2852    
             case ESC_E: /* Perl ignores an orphan \E */  
             continue;  
   
2853              default:    /* Not recognized; fall through */              default:    /* Not recognized; fall through */
2854              break;      /* Need "default" setting to stop compiler warning. */              break;      /* Need "default" setting to stop compiler warning. */
2855              }              }
# Line 3076  for (;; ptr++) Line 3084  for (;; ptr++)
3084            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3085            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
3086    
3087            /* \b is backslash; \X is literal X; \R is literal R; any other            /* \b is backspace; \X is literal X; \R is literal R; any other
3088            special means the '-' was literal */            special means the '-' was literal */
3089    
3090            if (d < 0)            if (d < 0)
# Line 4021  we set the flag only if there is a liter Line 4029  we set the flag only if there is a liter
4029        int len;        int len;
4030        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
4031            *tempcode == OP_NOTEXACT)            *tempcode == OP_NOTEXACT)
4032          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += _pcre_OP_lengths[*tempcode] +
4033              ((*tempcode == OP_TYPEEXACT &&
4034                 (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
4035        len = code - tempcode;        len = code - tempcode;
4036        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4037          {          {
# Line 4248  we set the flag only if there is a liter Line 4258  we set the flag only if there is a liter
4258              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4259              goto FAILED;              goto FAILED;
4260              }              }
4261            if (refsign == '-')            recno = (refsign == '-')?
4262                cd->bracount - recno + 1 : recno +cd->bracount;
4263              if (recno <= 0 || recno > cd->final_bracount)
4264              {              {
4265              recno = cd->bracount - recno + 1;              *errorcodeptr = ERR15;
4266              if (recno <= 0)              goto FAILED;
               {  
               *errorcodeptr = ERR15;  
               goto FAILED;  
               }  
4267              }              }
           else recno += cd->bracount;  
4268            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4269            break;            break;
4270            }            }
# Line 4329  we set the flag only if there is a liter Line 4336  we set the flag only if there is a liter
4336            skipbytes = 1;            skipbytes = 1;
4337            }            }
4338    
4339          /* Check for the "name" actually being a subpattern number. */          /* Check for the "name" actually being a subpattern number. We are
4340            in the second pass here, so final_bracount is set. */
4341    
4342          else if (recno > 0)          else if (recno > 0 && recno <= cd->final_bracount)
4343            {            {
4344            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4345            }            }
# Line 4525  we set the flag only if there is a liter Line 4533  we set the flag only if there is a liter
4533    
4534          /* We come here from the Python syntax above that handles both          /* We come here from the Python syntax above that handles both
4535          references (?P=name) and recursion (?P>name), as well as falling          references (?P=name) and recursion (?P>name), as well as falling
4536          through from the Perl recursion syntax (?&name). */          through from the Perl recursion syntax (?&name). We also come here from
4537            the Perl \k<name> or \k'name' back reference syntax and the \k{name}
4538            .NET syntax. */
4539    
4540          NAMED_REF_OR_RECURSE:          NAMED_REF_OR_RECURSE:
4541          name = ++ptr;          name = ++ptr;
# Line 5939  to compile parts of the pattern into; th Line 5949  to compile parts of the pattern into; th
5949  no longer needed, so hopefully this workspace will never overflow, though there  no longer needed, so hopefully this workspace will never overflow, though there
5950  is a test for its doing so. */  is a test for its doing so. */
5951    
5952  cd->bracount = 0;  cd->bracount = cd->final_bracount = 0;
5953  cd->names_found = 0;  cd->names_found = 0;
5954  cd->name_entry_size = 0;  cd->name_entry_size = 0;
5955  cd->name_table = NULL;  cd->name_table = NULL;
# Line 6016  field. Reset the bracket count and the n Line 6026  field. Reset the bracket count and the n
6026  field; this time it's used for remembering forward references to subpatterns.  field; this time it's used for remembering forward references to subpatterns.
6027  */  */
6028    
6029    cd->final_bracount = cd->bracount;  /* Save for checking forward references */
6030  cd->bracount = 0;  cd->bracount = 0;
6031  cd->names_found = 0;  cd->names_found = 0;
6032  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (uschar *)re + re->name_table_offset;

Legend:
Removed from v.269  
changed lines
  Added in v.285

  ViewVC Help
Powered by ViewVC 1.1.5