/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 265 by ph10, Wed Nov 14 11:35:48 2007 UTC revision 276 by ph10, Wed Nov 21 15:39:20 2007 UTC
# Line 241  static const char error_texts[] = Line 241  static const char error_texts[] =
241    /* 10 */    /* 10 */
242    "operand of unlimited repeat could match the empty string\0"  /** DEAD **/    "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
243    "internal error: unexpected repeat\0"    "internal error: unexpected repeat\0"
244    "unrecognized character after (?\0"    "unrecognized character after (? or (?-\0"
245    "POSIX named classes are supported only within a class\0"    "POSIX named classes are supported only within a class\0"
246    "missing )\0"    "missing )\0"
247    /* 15 */    /* 15 */
# Line 300  static const char error_texts[] = Line 300  static const char error_texts[] =
300    "(*VERB) with an argument is not supported\0"    "(*VERB) with an argument is not supported\0"
301    /* 60 */    /* 60 */
302    "(*VERB) not recognized\0"    "(*VERB) not recognized\0"
303    "number is too big";    "number is too big\0"
304      "subpattern name expected\0"
305      "digit expected after (?+";
306    
307    
308  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 496  ptr--;                            /* Set Line 498  ptr--;                            /* Set
498    
499  if (c == 0) *errorcodeptr = ERR1;  if (c == 0) *errorcodeptr = ERR1;
500    
501  /* Non-alphamerics are literals. For digits or letters, do an initial lookup in  /* Non-alphanumerics are literals. For digits or letters, do an initial lookup
502  a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
503  Otherwise further processing may be required. */  Otherwise further processing may be required. */
504    
505  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
506  else if (c < '0' || c > 'z') {}                           /* Not alphameric */  else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */
507  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - '0']) != 0) c = i;
508    
509  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
510  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
511  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
512  #endif  #endif
513    
# Line 722  else Line 724  else
724      break;      break;
725    
726      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
727      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphanumeric following \ is an error if PCRE_EXTRA was set;
728      for Perl compatibility, it is a literal. This code looks a bit odd, but      otherwise, for Perl compatibility, it is a literal. This code looks a bit
729      there used to be some cases other than the default, and there may be again      odd, but there used to be some cases other than the default, and there may
730      in future, so I haven't "optimized" it. */      be again in future, so I haven't "optimized" it. */
731    
732      default:      default:
733      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
# Line 2785  for (;; ptr++) Line 2787  for (;; ptr++)
2787          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
2788          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
2789    
2790          if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */          if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */
2791          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
2792          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
2793          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
# Line 2837  for (;; ptr++) Line 2839  for (;; ptr++)
2839              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
2840              continue;              continue;
2841    
             case ESC_E: /* Perl ignores an orphan \E */  
             continue;  
   
2842              default:    /* Not recognized; fall through */              default:    /* Not recognized; fall through */
2843              break;      /* Need "default" setting to stop compiler warning. */              break;      /* Need "default" setting to stop compiler warning. */
2844              }              }
# Line 3074  for (;; ptr++) Line 3073  for (;; ptr++)
3073            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3074            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
3075    
3076            /* \b is backslash; \X is literal X; \R is literal R; any other            /* \b is backspace; \X is literal X; \R is literal R; any other
3077            special means the '-' was literal */            special means the '-' was literal */
3078    
3079            if (d < 0)            if (d < 0)
# Line 4246  we set the flag only if there is a liter Line 4245  we set the flag only if there is a liter
4245              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4246              goto FAILED;              goto FAILED;
4247              }              }
4248            if (refsign == '-')            recno = (refsign == '-')?
4249                cd->bracount - recno + 1 : recno +cd->bracount;
4250              if (recno <= 0 || recno > cd->final_bracount)
4251              {              {
4252              recno = cd->bracount - recno + 1;              *errorcodeptr = ERR15;
4253              if (recno <= 0)              goto FAILED;
               {  
               *errorcodeptr = ERR15;  
               goto FAILED;  
               }  
4254              }              }
           else recno += cd->bracount;  
4255            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4256            break;            break;
4257            }            }
# Line 4327  we set the flag only if there is a liter Line 4323  we set the flag only if there is a liter
4323            skipbytes = 1;            skipbytes = 1;
4324            }            }
4325    
4326          /* Check for the "name" actually being a subpattern number. */          /* Check for the "name" actually being a subpattern number. We are
4327            in the second pass here, so final_bracount is set. */
4328    
4329          else if (recno > 0)          else if (recno > 0 && recno <= cd->final_bracount)
4330            {            {
4331            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4332            }            }
# Line 4523  we set the flag only if there is a liter Line 4520  we set the flag only if there is a liter
4520    
4521          /* We come here from the Python syntax above that handles both          /* We come here from the Python syntax above that handles both
4522          references (?P=name) and recursion (?P>name), as well as falling          references (?P=name) and recursion (?P>name), as well as falling
4523          through from the Perl recursion syntax (?&name). */          through from the Perl recursion syntax (?&name). We also come here from
4524            the Perl \k<name> or \k'name' back reference syntax and the \k{name}
4525            .NET syntax. */
4526    
4527          NAMED_REF_OR_RECURSE:          NAMED_REF_OR_RECURSE:
4528          name = ++ptr;          name = ++ptr;
# Line 4535  we set the flag only if there is a liter Line 4534  we set the flag only if there is a liter
4534    
4535          if (lengthptr != NULL)          if (lengthptr != NULL)
4536            {            {
4537              if (namelen == 0)
4538                {
4539                *errorcodeptr = ERR62;
4540                goto FAILED;
4541                }
4542            if (*ptr != terminator)            if (*ptr != terminator)
4543              {              {
4544              *errorcodeptr = ERR42;              *errorcodeptr = ERR42;
# Line 4548  we set the flag only if there is a liter Line 4552  we set the flag only if there is a liter
4552            recno = 0;            recno = 0;
4553            }            }
4554    
4555          /* In the real compile, seek the name in the table */          /* In the real compile, seek the name in the table. We check the name
4556            first, and then check that we have reached the end of the name in the
4557            table. That way, if the name that is longer than any in the table,
4558            the comparison will fail without reading beyond the table entry. */
4559    
4560          else          else
4561            {            {
4562            slot = cd->name_table;            slot = cd->name_table;
4563            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
4564              {              {
4565              if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
4566                    slot[2+namelen] == 0)
4567                  break;
4568              slot += cd->name_entry_size;              slot += cd->name_entry_size;
4569              }              }
4570    
# Line 4592  we set the flag only if there is a liter Line 4601  we set the flag only if there is a liter
4601            {            {
4602            const uschar *called;            const uschar *called;
4603    
4604            if ((refsign = *ptr) == '+') ptr++;            if ((refsign = *ptr) == '+')
4605                {
4606                ptr++;
4607                if ((digitab[*ptr] & ctype_digit) == 0)
4608                  {
4609                  *errorcodeptr = ERR63;
4610                  goto FAILED;
4611                  }
4612                }
4613            else if (refsign == '-')            else if (refsign == '-')
4614              {              {
4615              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
# Line 5919  to compile parts of the pattern into; th Line 5936  to compile parts of the pattern into; th
5936  no longer needed, so hopefully this workspace will never overflow, though there  no longer needed, so hopefully this workspace will never overflow, though there
5937  is a test for its doing so. */  is a test for its doing so. */
5938    
5939  cd->bracount = 0;  cd->bracount = cd->final_bracount = 0;
5940  cd->names_found = 0;  cd->names_found = 0;
5941  cd->name_entry_size = 0;  cd->name_entry_size = 0;
5942  cd->name_table = NULL;  cd->name_table = NULL;
# Line 5996  field. Reset the bracket count and the n Line 6013  field. Reset the bracket count and the n
6013  field; this time it's used for remembering forward references to subpatterns.  field; this time it's used for remembering forward references to subpatterns.
6014  */  */
6015    
6016    cd->final_bracount = cd->bracount;  /* Save for checking forward references */
6017  cd->bracount = 0;  cd->bracount = 0;
6018  cd->names_found = 0;  cd->names_found = 0;
6019  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (uschar *)re + re->name_table_offset;

Legend:
Removed from v.265  
changed lines
  Added in v.276

  ViewVC Help
Powered by ViewVC 1.1.5