/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 145 by ph10, Wed Apr 4 14:06:52 2007 UTC revision 171 by ph10, Mon Jun 4 14:28:58 2007 UTC
# Line 87  static const short int escapes[] = { Line 87  static const short int escapes[] = {
87       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */
90       0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */       0,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */
91  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
92  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
93     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */
# Line 208  static const char *error_texts[] = { Line 208  static const char *error_texts[] = {
208    "malformed number or name after (?(",    "malformed number or name after (?(",
209    "conditional group contains more than two branches",    "conditional group contains more than two branches",
210    "assertion expected after (?(",    "assertion expected after (?(",
211    "(?R or (?digits must be followed by )",    "(?R or (?[+-]digits must be followed by )",
212    /* 30 */    /* 30 */
213    "unknown POSIX class name",    "unknown POSIX class name",
214    "POSIX collating elements are not supported",    "POSIX collating elements are not supported",
# Line 242  static const char *error_texts[] = { Line 242  static const char *error_texts[] = {
242    /* 55 */    /* 55 */
243    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed",
244    "inconsistent NEWLINE options",    "inconsistent NEWLINE options",
245    "\\g is not followed by an (optionally braced) non-zero number"    "\\g is not followed by a braced name or an optionally braced non-zero number",
246      "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
247  };  };
248    
249    
# Line 452  else Line 453  else
453    
454      /* \g must be followed by a number, either plain or braced. If positive, it      /* \g must be followed by a number, either plain or braced. If positive, it
455      is an absolute backreference. If negative, it is a relative backreference.      is an absolute backreference. If negative, it is a relative backreference.
456      This is a Perl 5.10 feature. */      This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
457        reference to a named group. This is part of Perl's movement towards a
458        unified syntax for back references. As this is synonymous with \k{name}, we
459        fudge it up by pretending it really was \k. */
460    
461      case 'g':      case 'g':
462      if (ptr[1] == '{')      if (ptr[1] == '{')
463        {        {
464          const uschar *p;
465          for (p = ptr+2; *p != 0 && *p != '}'; p++)
466            if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
467          if (*p != 0 && *p != '}')
468            {
469            c = -ESC_k;
470            break;
471            }
472        braced = TRUE;        braced = TRUE;
473        ptr++;        ptr++;
474        }        }
# Line 1369  for (code = first_significant_code(code Line 1381  for (code = first_significant_code(code
1381    const uschar *ccode;    const uschar *ccode;
1382    
1383    c = *code;    c = *code;
1384    
1385      /* Groups with zero repeats can of course be empty; skip them. */
1386    
1387      if (c == OP_BRAZERO || c == OP_BRAMINZERO)
1388        {
1389        do code += GET(code, 1); while (*code == OP_ALT);
1390        c = *code;
1391        continue;
1392        }
1393    
1394      /* For other groups, scan the branches. */
1395    
1396    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
1397      {      {
1398      BOOL empty_branch;      BOOL empty_branch;
# Line 1386  for (code = first_significant_code(code Line 1409  for (code = first_significant_code(code
1409        }        }
1410      while (*code == OP_ALT);      while (*code == OP_ALT);
1411      if (!empty_branch) return FALSE;   /* All branches are non-empty */      if (!empty_branch) return FALSE;   /* All branches are non-empty */
1412        c = *code;
     /* Move past the KET and fudge things so that the increment in the "for"  
     above has no effect. */  
   
     c = OP_END;  
     code += 1 + LINK_SIZE - _pcre_OP_lengths[c];  
1413      continue;      continue;
1414      }      }
1415    
# Line 2095  for (;; ptr++) Line 2113  for (;; ptr++)
2113    int class_lastchar;    int class_lastchar;
2114    int newoptions;    int newoptions;
2115    int recno;    int recno;
2116      int refsign;
2117    int skipbytes;    int skipbytes;
2118    int subreqbyte;    int subreqbyte;
2119    int subfirstbyte;    int subfirstbyte;
# Line 3621  for (;; ptr++) Line 3640  for (;; ptr++)
3640    
3641          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
3642          skipbytes = 3;          skipbytes = 3;
3643            refsign = -1;
3644    
3645          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
3646    
# Line 3644  for (;; ptr++) Line 3664  for (;; ptr++)
3664            terminator = '\'';            terminator = '\'';
3665            ptr++;            ptr++;
3666            }            }
3667          else terminator = 0;          else
3668              {
3669              terminator = 0;
3670              if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
3671              }
3672    
3673          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
3674    
# Line 3680  for (;; ptr++) Line 3704  for (;; ptr++)
3704          if (lengthptr != NULL) break;          if (lengthptr != NULL) break;
3705    
3706          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
3707          reference. */          reference. If the string started with "+" or "-" we require the rest to
3708            be digits, in which case recno will be set. */
3709    
3710            if (refsign > 0)
3711              {
3712              if (recno <= 0)
3713                {
3714                *errorcodeptr = ERR58;
3715                goto FAILED;
3716                }
3717              if (refsign == '-')
3718                {
3719                recno = cd->bracount - recno + 1;
3720                if (recno <= 0)
3721                  {
3722                  *errorcodeptr = ERR15;
3723                  goto FAILED;
3724                  }
3725                }
3726              else recno += cd->bracount;
3727              PUT2(code, 2+LINK_SIZE, recno);
3728              break;
3729              }
3730    
3731            /* Otherwise (did not start with "+" or "-"), start by looking for the
3732            name. */
3733    
3734          slot = cd->name_table;          slot = cd->name_table;
3735          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
3736            {            {
# Line 3999  for (;; ptr++) Line 4048  for (;; ptr++)
4048    
4049    
4050          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4051            case '-': case '+':
4052          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */
4053          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case '5': case '6': case '7': case '8': case '9':   /* subroutine */
4054            {            {
4055            const uschar *called;            const uschar *called;
4056    
4057              if ((refsign = *ptr) == '+') ptr++;
4058              else if (refsign == '-')
4059                {
4060                if ((digitab[ptr[1]] & ctype_digit) == 0)
4061                  goto OTHER_CHAR_AFTER_QUERY;
4062                ptr++;
4063                }
4064    
4065            recno = 0;            recno = 0;
4066            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4067              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - '0';
4068    
4069            if (*ptr != ')')            if (*ptr != ')')
4070              {              {
4071              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4072              goto FAILED;              goto FAILED;
4073              }              }
4074    
4075              if (refsign == '-')
4076                {
4077                if (recno == 0)
4078                  {
4079                  *errorcodeptr = ERR58;
4080                  goto FAILED;
4081                  }
4082                recno = cd->bracount - recno + 1;
4083                if (recno <= 0)
4084                  {
4085                  *errorcodeptr = ERR15;
4086                  goto FAILED;
4087                  }
4088                }
4089              else if (refsign == '+')
4090                {
4091                if (recno == 0)
4092                  {
4093                  *errorcodeptr = ERR58;
4094                  goto FAILED;
4095                  }
4096                recno += cd->bracount;
4097                }
4098    
4099            /* Come here from code above that handles a named recursion */            /* Come here from code above that handles a named recursion */
4100    
# Line 4084  for (;; ptr++) Line 4168  for (;; ptr++)
4168    
4169          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4170          default:              /* Other characters: check option setting */          default:              /* Other characters: check option setting */
4171            OTHER_CHAR_AFTER_QUERY:
4172          set = unset = 0;          set = unset = 0;
4173          optset = &set;          optset = &set;
4174    
# Line 4396  for (;; ptr++) Line 4481  for (;; ptr++)
4481        zerofirstbyte = firstbyte;        zerofirstbyte = firstbyte;
4482        zeroreqbyte = reqbyte;        zeroreqbyte = reqbyte;
4483    
4484        /* \k<name> or \k'name' is a back reference by name (Perl syntax) */        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
4485          We also support \k{name} (.NET syntax) */
4486    
4487        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
4488          {          {
4489          is_recurse = FALSE;          is_recurse = FALSE;
4490          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
4491          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
4492          }          }
4493    
# Line 5138  cd->cbits = tables + cbits_offset; Line 5224  cd->cbits = tables + cbits_offset;
5224  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
5225    
5226  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
5227  current code allows for fixed one- or two-byte sequences, plus "any". */  current code allows for fixed one- or two-byte sequences, plus "any" and
5228    "anycrlf". */
5229    
5230  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
5231    {    {
# Line 5148  switch (options & (PCRE_NEWLINE_CRLF | P Line 5235  switch (options & (PCRE_NEWLINE_CRLF | P
5235    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5236         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
5237    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5238      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5239    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
5240    }    }
5241    
5242  if (newline < 0)  if (newline == -2)
5243      {
5244      cd->nltype = NLTYPE_ANYCRLF;
5245      }
5246    else if (newline < 0)
5247    {    {
5248    cd->nltype = NLTYPE_ANY;    cd->nltype = NLTYPE_ANY;
5249    }    }

Legend:
Removed from v.145  
changed lines
  Added in v.171

  ViewVC Help
Powered by ViewVC 1.1.5