/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 172 by ph10, Tue Jun 5 10:40:13 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 82  are simple data values; negative values Line 82  are simple data values; negative values
82  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
83  is invalid. */  is invalid. */
84    
85  #if !EBCDIC   /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */
86  static const short int escapes[] = {  static const short int escapes[] = {
87       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */
90       0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */       0,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */
91  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
92  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
93     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */
# Line 96  static const short int escapes[] = { Line 96  static const short int escapes[] = {
96       0,      0, -ESC_z                                            /* x - z */       0,      0, -ESC_z                                            /* x - z */
97  };  };
98    
99  #else         /* This is the "abnormal" table for EBCDIC systems */  #else           /* This is the "abnormal" table for EBCDIC systems */
100  static const short int escapes[] = {  static const short int escapes[] = {
101  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
102  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 208  static const char *error_texts[] = { Line 208  static const char *error_texts[] = {
208    "malformed number or name after (?(",    "malformed number or name after (?(",
209    "conditional group contains more than two branches",    "conditional group contains more than two branches",
210    "assertion expected after (?(",    "assertion expected after (?(",
211    "(?R or (?digits must be followed by )",    "(?R or (?[+-]digits must be followed by )",
212    /* 30 */    /* 30 */
213    "unknown POSIX class name",    "unknown POSIX class name",
214    "POSIX collating elements are not supported",    "POSIX collating elements are not supported",
# Line 242  static const char *error_texts[] = { Line 242  static const char *error_texts[] = {
242    /* 55 */    /* 55 */
243    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed",
244    "inconsistent NEWLINE options",    "inconsistent NEWLINE options",
245    "\\g is not followed by an (optionally braced) non-zero number"    "\\g is not followed by a braced name or an optionally braced non-zero number",
246      "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
247  };  };
248    
249    
# Line 262  For convenience, we use the same bit def Line 263  For convenience, we use the same bit def
263    
264  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
265    
266  #if !EBCDIC    /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */
267  static const unsigned char digitab[] =  static const unsigned char digitab[] =
268    {    {
269    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 298  static const unsigned char digitab[] = Line 299  static const unsigned char digitab[] =
299    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
300    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
301    
302  #else          /* This is the "abnormal" case, for EBCDIC systems */  #else           /* This is the "abnormal" case, for EBCDIC systems */
303  static const unsigned char digitab[] =  static const unsigned char digitab[] =
304    {    {
305    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 312  static const unsigned char digitab[] = Line 313  static const unsigned char digitab[] =
313    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */
314    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */
315    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */
316    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88-     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 95    */
317    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */
318    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */
319    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
# Line 346  static const unsigned char ebcdic_charta Line 347  static const unsigned char ebcdic_charta
347    0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */    0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */
348    0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */    0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */
349    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */
350    0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88-  */    0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- 95 */
351    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */
352    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */
353    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
# Line 421  if (c == 0) *errorcodeptr = ERR1; Line 422  if (c == 0) *errorcodeptr = ERR1;
422  a table. A non-zero result is something that can be returned immediately.  a table. A non-zero result is something that can be returned immediately.
423  Otherwise further processing may be required. */  Otherwise further processing may be required. */
424    
425  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
426  else if (c < '0' || c > 'z') {}                           /* Not alphameric */  else if (c < '0' || c > 'z') {}                           /* Not alphameric */
427  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - '0']) != 0) c = i;
428    
429  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
430  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
431  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
432  #endif  #endif
# Line 452  else Line 453  else
453    
454      /* \g must be followed by a number, either plain or braced. If positive, it      /* \g must be followed by a number, either plain or braced. If positive, it
455      is an absolute backreference. If negative, it is a relative backreference.      is an absolute backreference. If negative, it is a relative backreference.
456      This is a Perl 5.10 feature. */      This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
457        reference to a named group. This is part of Perl's movement towards a
458        unified syntax for back references. As this is synonymous with \k{name}, we
459        fudge it up by pretending it really was \k. */
460    
461      case 'g':      case 'g':
462      if (ptr[1] == '{')      if (ptr[1] == '{')
463        {        {
464          const uschar *p;
465          for (p = ptr+2; *p != 0 && *p != '}'; p++)
466            if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
467          if (*p != 0 && *p != '}')
468            {
469            c = -ESC_k;
470            break;
471            }
472        braced = TRUE;        braced = TRUE;
473        ptr++;        ptr++;
474        }        }
# Line 562  else Line 574  else
574          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == '0') continue;     /* Leading zeroes */
575          count++;          count++;
576    
577  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
578          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= 'a') cc -= 32;               /* Convert to upper case */
579          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
580  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
581          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */
582          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
583  #endif  #endif
# Line 589  else Line 601  else
601        {        {
602        int cc;                               /* Some compilers don't like ++ */        int cc;                               /* Some compilers don't like ++ */
603        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                        /* in initializers */
604  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
605        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= 'a') cc -= 32;              /* Convert to upper case */
606        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
607  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
608        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= 'z') cc += 64;              /* Convert to upper case */
609        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
610  #endif  #endif
# Line 611  else Line 623  else
623        return 0;        return 0;
624        }        }
625    
626  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
627      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= 'a' && c <= 'z') c -= 32;
628      c ^= 0x40;      c ^= 0x40;
629  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
630      if (c >= 'a' && c <= 'z') c += 64;      if (c >= 'a' && c <= 'z') c += 64;
631      c ^= 0xC0;      c ^= 0xC0;
632  #endif  #endif
# Line 1246  for (;;) Line 1258  for (;;)
1258    else    else
1259      {      {
1260      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1261    #ifdef SUPPORT_UTF8
1262      if (utf8) switch(c)      if (utf8) switch(c)
1263        {        {
1264        case OP_CHAR:        case OP_CHAR:
# Line 1266  for (;;) Line 1279  for (;;)
1279        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1280        break;        break;
1281        }        }
1282    #endif
1283      }      }
1284    }    }
1285  }  }
# Line 1309  for (;;) Line 1323  for (;;)
1323    else    else
1324      {      {
1325      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1326    #ifdef SUPPORT_UTF8
1327      if (utf8) switch(c)      if (utf8) switch(c)
1328        {        {
1329        case OP_CHAR:        case OP_CHAR:
# Line 1329  for (;;) Line 1344  for (;;)
1344        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1345        break;        break;
1346        }        }
1347    #endif
1348      }      }
1349    }    }
1350  }  }
# Line 1366  for (code = first_significant_code(code Line 1382  for (code = first_significant_code(code
1382    
1383    c = *code;    c = *code;
1384    
1385      /* Groups with zero repeats can of course be empty; skip them. */
1386    
1387      if (c == OP_BRAZERO || c == OP_BRAMINZERO)
1388        {
1389        code += _pcre_OP_lengths[c];
1390        do code += GET(code, 1); while (*code == OP_ALT);
1391        c = *code;
1392        continue;
1393        }
1394    
1395      /* For other groups, scan the branches. */
1396    
1397    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
1398      {      {
1399      BOOL empty_branch;      BOOL empty_branch;
# Line 1382  for (code = first_significant_code(code Line 1410  for (code = first_significant_code(code
1410        }        }
1411      while (*code == OP_ALT);      while (*code == OP_ALT);
1412      if (!empty_branch) return FALSE;   /* All branches are non-empty */      if (!empty_branch) return FALSE;   /* All branches are non-empty */
1413        c = *code;
     /* Move past the KET and fudge things so that the increment in the "for"  
     above has no effect. */  
   
     c = OP_END;  
     code += 1 + LINK_SIZE - _pcre_OP_lengths[c];  
1414      continue;      continue;
1415      }      }
1416    
# Line 2091  for (;; ptr++) Line 2114  for (;; ptr++)
2114    int class_lastchar;    int class_lastchar;
2115    int newoptions;    int newoptions;
2116    int recno;    int recno;
2117      int refsign;
2118    int skipbytes;    int skipbytes;
2119    int subreqbyte;    int subreqbyte;
2120    int subfirstbyte;    int subfirstbyte;
# Line 3617  for (;; ptr++) Line 3641  for (;; ptr++)
3641    
3642          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
3643          skipbytes = 3;          skipbytes = 3;
3644            refsign = -1;
3645    
3646          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
3647    
# Line 3640  for (;; ptr++) Line 3665  for (;; ptr++)
3665            terminator = '\'';            terminator = '\'';
3666            ptr++;            ptr++;
3667            }            }
3668          else terminator = 0;          else
3669              {
3670              terminator = 0;
3671              if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
3672              }
3673    
3674          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
3675    
# Line 3676  for (;; ptr++) Line 3705  for (;; ptr++)
3705          if (lengthptr != NULL) break;          if (lengthptr != NULL) break;
3706    
3707          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
3708          reference. */          reference. If the string started with "+" or "-" we require the rest to
3709            be digits, in which case recno will be set. */
3710    
3711            if (refsign > 0)
3712              {
3713              if (recno <= 0)
3714                {
3715                *errorcodeptr = ERR58;
3716                goto FAILED;
3717                }
3718              if (refsign == '-')
3719                {
3720                recno = cd->bracount - recno + 1;
3721                if (recno <= 0)
3722                  {
3723                  *errorcodeptr = ERR15;
3724                  goto FAILED;
3725                  }
3726                }
3727              else recno += cd->bracount;
3728              PUT2(code, 2+LINK_SIZE, recno);
3729              break;
3730              }
3731    
3732            /* Otherwise (did not start with "+" or "-"), start by looking for the
3733            name. */
3734    
3735          slot = cd->name_table;          slot = cd->name_table;
3736          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
# Line 3995  for (;; ptr++) Line 4049  for (;; ptr++)
4049    
4050    
4051          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4052            case '-': case '+':
4053          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */
4054          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case '5': case '6': case '7': case '8': case '9':   /* subroutine */
4055            {            {
4056            const uschar *called;            const uschar *called;
4057    
4058              if ((refsign = *ptr) == '+') ptr++;
4059              else if (refsign == '-')
4060                {
4061                if ((digitab[ptr[1]] & ctype_digit) == 0)
4062                  goto OTHER_CHAR_AFTER_QUERY;
4063                ptr++;
4064                }
4065    
4066            recno = 0;            recno = 0;
4067            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4068              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - '0';
4069    
4070            if (*ptr != ')')            if (*ptr != ')')
4071              {              {
4072              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4073              goto FAILED;              goto FAILED;
4074              }              }
4075    
4076              if (refsign == '-')
4077                {
4078                if (recno == 0)
4079                  {
4080                  *errorcodeptr = ERR58;
4081                  goto FAILED;
4082                  }
4083                recno = cd->bracount - recno + 1;
4084                if (recno <= 0)
4085                  {
4086                  *errorcodeptr = ERR15;
4087                  goto FAILED;
4088                  }
4089                }
4090              else if (refsign == '+')
4091                {
4092                if (recno == 0)
4093                  {
4094                  *errorcodeptr = ERR58;
4095                  goto FAILED;
4096                  }
4097                recno += cd->bracount;
4098                }
4099    
4100            /* Come here from code above that handles a named recursion */            /* Come here from code above that handles a named recursion */
4101    
4102            HANDLE_RECURSION:            HANDLE_RECURSION:
# Line 4080  for (;; ptr++) Line 4169  for (;; ptr++)
4169    
4170          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4171          default:              /* Other characters: check option setting */          default:              /* Other characters: check option setting */
4172            OTHER_CHAR_AFTER_QUERY:
4173          set = unset = 0;          set = unset = 0;
4174          optset = &set;          optset = &set;
4175    
# Line 4230  for (;; ptr++) Line 4320  for (;; ptr++)
4320      is on the bracket. */      is on the bracket. */
4321    
4322      /* If this is a conditional bracket, check that there are no more than      /* If this is a conditional bracket, check that there are no more than
4323      two branches in the group, or just one if it's a DEFINE group. */      two branches in the group, or just one if it's a DEFINE group. We do this
4324        in the real compile phase, not in the pre-pass, where the whole group may
4325        not be available. */
4326    
4327      if (bravalue == OP_COND)      if (bravalue == OP_COND && lengthptr == NULL)
4328        {        {
4329        uschar *tc = code;        uschar *tc = code;
4330        int condcount = 0;        int condcount = 0;
# Line 4392  for (;; ptr++) Line 4484  for (;; ptr++)
4484        zerofirstbyte = firstbyte;        zerofirstbyte = firstbyte;
4485        zeroreqbyte = reqbyte;        zeroreqbyte = reqbyte;
4486    
4487        /* \k<name> or \k'name' is a back reference by name (Perl syntax) */        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
4488          We also support \k{name} (.NET syntax) */
4489    
4490        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
4491          {          {
4492          is_recurse = FALSE;          is_recurse = FALSE;
4493          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
4494          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
4495          }          }
4496    
# Line 4563  This function is used during the pre-com Line 4656  This function is used during the pre-com
4656  out the amount of memory needed, as well as during the real compile phase. The  out the amount of memory needed, as well as during the real compile phase. The
4657  value of lengthptr distinguishes the two phases.  value of lengthptr distinguishes the two phases.
4658    
4659  Argument:  Arguments:
4660    options        option bits, including any changes for this subpattern    options        option bits, including any changes for this subpattern
4661    oldims         previous settings of ims option bits    oldims         previous settings of ims option bits
4662    codeptr        -> the address of the current code pointer    codeptr        -> the address of the current code pointer
# Line 4716  for (;;) Line 4809  for (;;)
4809        }        }
4810      }      }
4811    
4812    /* Reached end of expression, either ')' or end of pattern. Go back through    /* Reached end of expression, either ')' or end of pattern. In the real
4813    the alternative branches and reverse the chain of offsets, with the field in    compile phase, go back through the alternative branches and reverse the chain
4814    the BRA item now becoming an offset to the first alternative. If there are    of offsets, with the field in the BRA item now becoming an offset to the
4815    no alternatives, it points to the end of the group. The length in the    first alternative. If there are no alternatives, it points to the end of the
4816    terminating ket is always the length of the whole bracketed item. If any of    group. The length in the terminating ket is always the length of the whole
4817    the ims options were changed inside the group, compile a resetting op-code    bracketed item. If any of the ims options were changed inside the group,
4818    following, except at the very end of the pattern. Return leaving the pointer    compile a resetting op-code following, except at the very end of the pattern.
4819    at the terminating char. */    Return leaving the pointer at the terminating char. */
4820    
4821    if (*ptr != '|')    if (*ptr != '|')
4822      {      {
4823      int branch_length = code - last_branch;      if (lengthptr == NULL)
     do  
4824        {        {
4825        int prev_length = GET(last_branch, 1);        int branch_length = code - last_branch;
4826        PUT(last_branch, 1, branch_length);        do
4827        branch_length = prev_length;          {
4828        last_branch -= branch_length;          int prev_length = GET(last_branch, 1);
4829            PUT(last_branch, 1, branch_length);
4830            branch_length = prev_length;
4831            last_branch -= branch_length;
4832            }
4833          while (branch_length > 0);
4834        }        }
     while (branch_length > 0);  
4835    
4836      /* Fill in the ket */      /* Fill in the ket */
4837    
# Line 4762  for (;;) Line 4858  for (;;)
4858      return TRUE;      return TRUE;
4859      }      }
4860    
4861    /* Another branch follows; insert an "or" node. Its length field points back    /* Another branch follows. In the pre-compile phase, we can move the code
4862      pointer back to where it was for the start of the first branch. (That is,
4863      pretend that each branch is the only one.)
4864    
4865      In the real compile phase, insert an ALT node. Its length field points back
4866    to the previous branch while the bracket remains open. At the end the chain    to the previous branch while the bracket remains open. At the end the chain
4867    is reversed. It's done like this so that the start of the bracket has a    is reversed. It's done like this so that the start of the bracket has a
4868    zero offset until it is closed, making it possible to detect recursion. */    zero offset until it is closed, making it possible to detect recursion. */
4869    
4870    *code = OP_ALT;    if (lengthptr != NULL)
4871    PUT(code, 1, code - last_branch);      {
4872    bc.current = last_branch = code;      code = *codeptr + 1 + LINK_SIZE + skipbytes;
4873    code += 1 + LINK_SIZE;      length += 1 + LINK_SIZE;
4874        }
4875      else
4876        {
4877        *code = OP_ALT;
4878        PUT(code, 1, code - last_branch);
4879        bc.current = last_branch = code;
4880        code += 1 + LINK_SIZE;
4881        }
4882    
4883    ptr++;    ptr++;
   length += 1 + LINK_SIZE;  
4884    }    }
4885  /* Control never reaches here */  /* Control never reaches here */
4886  }  }
# Line 5039  Returns:        pointer to compiled data Line 5147  Returns:        pointer to compiled data
5147                  with errorptr and erroroffset set                  with errorptr and erroroffset set
5148  */  */
5149    
5150  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5151  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
5152    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
5153  {  {
# Line 5047  return pcre_compile2(pattern, options, N Line 5155  return pcre_compile2(pattern, options, N
5155  }  }
5156    
5157    
5158  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5159  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
5160    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
5161  {  {
# Line 5096  if (errorcodeptr != NULL) *errorcodeptr Line 5204  if (errorcodeptr != NULL) *errorcodeptr
5204  if (erroroffset == NULL)  if (erroroffset == NULL)
5205    {    {
5206    errorcode = ERR16;    errorcode = ERR16;
5207    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5208    }    }
5209    
5210  *erroroffset = 0;  *erroroffset = 0;
# Line 5109  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 5217  if (utf8 && (options & PCRE_NO_UTF8_CHEC
5217       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
5218    {    {
5219    errorcode = ERR44;    errorcode = ERR44;
5220    goto PCRE_UTF8_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5221    }    }
5222  #else  #else
5223  if ((options & PCRE_UTF8) != 0)  if ((options & PCRE_UTF8) != 0)
# Line 5134  cd->cbits = tables + cbits_offset; Line 5242  cd->cbits = tables + cbits_offset;
5242  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
5243    
5244  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
5245  current code allows for fixed one- or two-byte sequences, plus "any". */  current code allows for fixed one- or two-byte sequences, plus "any" and
5246    "anycrlf". */
5247    
5248  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
5249    {    {
# Line 5144  switch (options & (PCRE_NEWLINE_CRLF | P Line 5253  switch (options & (PCRE_NEWLINE_CRLF | P
5253    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5254         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
5255    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5256      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5257    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
5258    }    }
5259    
5260  if (newline < 0)  if (newline == -2)
5261      {
5262      cd->nltype = NLTYPE_ANYCRLF;
5263      }
5264    else if (newline < 0)
5265    {    {
5266    cd->nltype = NLTYPE_ANY;    cd->nltype = NLTYPE_ANY;
5267    }    }
# Line 5321  if (errorcode != 0) Line 5435  if (errorcode != 0)
5435    (pcre_free)(re);    (pcre_free)(re);
5436    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
5437    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
5438  #ifdef SUPPORT_UTF8    PCRE_EARLY_ERROR_RETURN2:
   PCRE_UTF8_ERROR_RETURN:  
 #endif  
5439    *errorptr = error_texts[errorcode];    *errorptr = error_texts[errorcode];
5440    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
5441    return NULL;    return NULL;
# Line 5413  if ((re->options & PCRE_REQCHSET) != 0) Line 5525  if ((re->options & PCRE_REQCHSET) != 0)
5525      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
5526    }    }
5527    
5528  pcre_printint(re, stdout);  pcre_printint(re, stdout, TRUE);
5529    
5530  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that
5531  was compiled can be seen. */  was compiled can be seen. */

Legend:
Removed from v.93  
changed lines
  Added in v.172

  ViewVC Help
Powered by ViewVC 1.1.5