/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 35 by nigel, Sat Feb 24 21:39:05 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1998 University of Cambridge             Copyright (c) 1997-1999 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 107  static const short int escapes[] = { Line 111  static const short int escapes[] = {
111    
112  static BOOL  static BOOL
113    compile_regex(int, int, int *, uschar **, const uschar **, const char **,    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114      BOOL, int);      BOOL, int, compile_data *);
   
 /* Structure for passing "static" information around between the functions  
 doing the matching, so that they are thread-safe. */  
   
 typedef struct match_data {  
   int    errorcode;             /* As it says */  
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   int    offset_max;            /* The maximum usable for return data */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   endonly;               /* Dollar not before final \n */  
   const uschar *start_subject;  /* Start of the subject string */  
   const uschar *end_subject;    /* End of the subject string */  
   const uschar *end_match_ptr;  /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
115    
116    
117    
# Line 145  void  (*pcre_free)(void *) = free; Line 131  void  (*pcre_free)(void *) = free;
131    
132    
133  /*************************************************  /*************************************************
134    *             Default character tables           *
135    *************************************************/
136    
137    /* A default set of character tables is included in the PCRE binary. Its source
138    is built by the maketables auxiliary program, which uses the default C ctypes
139    functions, and put in the file chartables.c. These tables are used by PCRE
140    whenever the caller of pcre_compile() does not provide an alternate set of
141    tables. */
142    
143    #include "chartables.c"
144    
145    
146    
147    /*************************************************
148  *          Return version string                 *  *          Return version string                 *
149  *************************************************/  *************************************************/
150    
# Line 237  Arguments: Line 237  Arguments:
237    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
238    options    the options bits    options    the options bits
239    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
240      cd         pointer to char tables block
241    
242  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
243               negative => a special escape sequence               negative => a special escape sequence
# Line 245  Returns:     zero or positive => a data Line 246  Returns:     zero or positive => a data
246    
247  static int  static int
248  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
249    int options, BOOL isclass)    int options, BOOL isclass, compile_data *cd)
250  {  {
251  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
252  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
# Line 288  else Line 289  else
289        {        {
290        oldptr = ptr;        oldptr = ptr;
291        c -= '0';        c -= '0';
292        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
293          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
294        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
295          {          {
# Line 314  else Line 315  else
315    
316      case '0':      case '0':
317      c -= '0';      c -= '0';
318      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
319        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
320          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
321      break;      break;
# Line 323  else Line 324  else
324    
325      case 'x':      case 'x':
326      c = 0;      c = 0;
327      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
328        {        {
329        ptr++;        ptr++;
330        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
331          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
332        }        }
333      break;      break;
334    
# Line 341  else Line 342  else
342    
343      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
344    
345      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
346      c ^= 0x40;      c ^= 0x40;
347      break;      break;
348    
349      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
350      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
351      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
352        there used to be some cases other than the default, and there may be again
353        in future, so I haven't "optimized" it. */
354    
355      default:      default:
356      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
# Line 377  where the ddds are digits. Line 380  where the ddds are digits.
380    
381  Arguments:  Arguments:
382    p         pointer to the first char after '{'    p         pointer to the first char after '{'
383      cd        pointer to char tables block
384    
385  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
386  */  */
387    
388  static BOOL  static BOOL
389  is_counted_repeat(const uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
390  {  {
391  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
392  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
393  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
394    
395  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
396  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
397    
398  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
399  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
400  return (*p == '}');  return (*p == '}');
401  }  }
402    
# Line 412  Arguments: Line 416  Arguments:
416    maxp       pointer to int for max    maxp       pointer to int for max
417               returned as -1 if no max               returned as -1 if no max
418    errorptr   points to pointer to error message    errorptr   points to pointer to error message
419      cd         pointer to character tables clock
420    
421  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
422               current ptr on error, with errorptr set               current ptr on error, with errorptr set
423  */  */
424    
425  static const uschar *  static const uschar *
426  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
427      const char **errorptr, compile_data *cd)
428  {  {
429  int min = 0;  int min = 0;
430  int max = -1;  int max = -1;
431    
432  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
433    
434  if (*p == '}') max = min; else  if (*p == '}') max = min; else
435    {    {
436    if (*(++p) != '}')    if (*(++p) != '}')
437      {      {
438      max = 0;      max = 0;
439      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
440      if (max < min)      if (max < min)
441        {        {
442        *errorptr = ERR4;        *errorptr = ERR4;
# Line 615  for (;;) Line 621  for (;;)
621  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
622    
623  Arguments:  Arguments:
624    options     the option bits    options      the option bits
625    brackets    points to number of brackets used    brackets     points to number of brackets used
626    code        points to the pointer to the current code point    code         points to the pointer to the current code point
627    ptrptr      points to the current pattern pointer    ptrptr       points to the current pattern pointer
628    errorptr    points to pointer to error message    errorptr     points to pointer to error message
629    optchanged  set to the value of the last OP_OPT item compiled    optchanged   set to the value of the last OP_OPT item compiled
630      cd           contains pointers to tables
631    
632  Returns:      TRUE on success  Returns:       TRUE on success
633                FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
634  */  */
635    
636  static BOOL  static BOOL
637  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
638    const uschar **ptrptr, const char **errorptr, int *optchanged)    const uschar **ptrptr, const char **errorptr, int *optchanged,
639      compile_data *cd)
640  {  {
641  int repeat_type, op_type;  int repeat_type, op_type;
642  int repeat_min, repeat_max;  int repeat_min, repeat_max;
# Line 660  for (;; ptr++) Line 668  for (;; ptr++)
668    c = *ptr;    c = *ptr;
669    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
670      {      {
671      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
672      if (c == '#')      if (c == '#')
673        {        {
674        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 748  for (;; ptr++) Line 756  for (;; ptr++)
756    
757        if (c == '\\')        if (c == '\\')
758          {          {
759          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
760          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
761          else if (c < 0)          else if (c < 0)
762            {            {
763              register const uschar *cbits = cd->cbits;
764            class_charcount = 10;            class_charcount = 10;
765            switch (-c)            switch (-c)
766              {              {
767              case ESC_d:              case ESC_d:
768              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
769              continue;              continue;
770    
771              case ESC_D:              case ESC_D:
772              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
773              continue;              continue;
774    
775              case ESC_w:              case ESC_w:
776              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
777                class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
778              continue;              continue;
779    
780              case ESC_W:              case ESC_W:
781              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
782                class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
783              continue;              continue;
784    
785              case ESC_s:              case ESC_s:
786              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
787              continue;              continue;
788    
789              case ESC_S:              case ESC_S:
790              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
791              continue;              continue;
792    
793              default:              default:
# Line 810  for (;; ptr++) Line 819  for (;; ptr++)
819    
820          if (d == '\\')          if (d == '\\')
821            {            {
822            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
823            if (d < 0)            if (d < 0)
824              {              {
825              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 832  for (;; ptr++) Line 841  for (;; ptr++)
841            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
842            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
843              {              {
844              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
845              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
846              }              }
847            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 847  for (;; ptr++) Line 856  for (;; ptr++)
856        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
857        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
858          {          {
859          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
860          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
861          }          }
862        class_charcount++;        class_charcount++;
# Line 894  for (;; ptr++) Line 903  for (;; ptr++)
903      /* Various kinds of repeat */      /* Various kinds of repeat */
904    
905      case '{':      case '{':
906      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
907      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
908      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
909      goto REPEAT;      goto REPEAT;
910    
# Line 1082  for (;; ptr++) Line 1091  for (;; ptr++)
1091      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1092               (int)*previous == OP_COND)               (int)*previous == OP_COND)
1093        {        {
1094        int i, ketoffset = 0;        register int i;
1095          int ketoffset = 0;
1096        int len = code - previous;        int len = code - previous;
1097          uschar *bralink = NULL;
1098    
1099        /* If the maximum repeat count is unlimited, find the end of the bracket        /* If the maximum repeat count is unlimited, find the end of the bracket
1100        by scanning through from the start, and compute the offset back to it        by scanning through from the start, and compute the offset back to it
# Line 1098  for (;; ptr++) Line 1109  for (;; ptr++)
1109          ketoffset = code - ket;          ketoffset = code - ket;
1110          }          }
1111    
1112          /* The case of a zero minimum is special because of the need to stick
1113          OP_BRAZERO in front of it, and because the group appears once in the
1114          data, whereas in other cases it appears the minimum number of times. For
1115          this reason, it is simplest to treat this case separately, as otherwise
1116          the code gets far too mess. There are several special subcases when the
1117          minimum is zero. */
1118    
1119          if (repeat_min == 0)
1120            {
1121            /* If the maximum is also zero, we just omit the group from the output
1122            altogether. */
1123    
1124            if (repeat_max == 0)
1125              {
1126              code = previous;
1127              previous = NULL;
1128              break;
1129              }
1130    
1131            /* If the maximum is 1 or unlimited, we just have to stick in the
1132            BRAZERO and do no more at this point. */
1133    
1134            if (repeat_max <= 1)
1135              {
1136              memmove(previous+1, previous, len);
1137              code++;
1138              *previous++ = OP_BRAZERO + repeat_type;
1139              }
1140    
1141            /* If the maximum is greater than 1 and limited, we have to replicate
1142            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1143            The first one has to be handled carefully because it's the original
1144            copy, which has to be moved up. The remainder can be handled by code
1145            that is common with the non-zero minimum case below. We just have to
1146            adjust the value or repeat_max, since one less copy is required. */
1147    
1148            else
1149              {
1150              int offset;
1151              memmove(previous+4, previous, len);
1152              code += 4;
1153              *previous++ = OP_BRAZERO + repeat_type;
1154              *previous++ = OP_BRA;
1155    
1156              /* We chain together the bracket offset fields that have to be
1157              filled in later when the ends of the brackets are reached. */
1158    
1159              offset = (bralink == NULL)? 0 : previous - bralink;
1160              bralink = previous;
1161              *previous++ = offset >> 8;
1162              *previous++ = offset & 255;
1163              }
1164    
1165            repeat_max--;
1166            }
1167    
1168          /* If the minimum is greater than zero, replicate the group as many
1169          times as necessary, and adjust the maximum to the number of subsequent
1170          copies that we need. */
1171    
1172          else
1173            {
1174            for (i = 1; i < repeat_min; i++)
1175              {
1176              memcpy(code, previous, len);
1177              code += len;
1178              }
1179            if (repeat_max > 0) repeat_max -= repeat_min;
1180            }
1181    
1182          /* This code is common to both the zero and non-zero minimum cases. If
1183          the maximum is limited, it replicates the group in a nested fashion,
1184          remembering the bracket starts on a stack. In the case of a zero minimum,
1185          the first one was set up above. In all cases the repeat_max now specifies
1186          the number of additional copies needed. */
1187    
1188          if (repeat_max >= 0)
1189            {
1190            for (i = repeat_max - 1; i >= 0; i--)
1191              {
1192              *code++ = OP_BRAZERO + repeat_type;
1193    
1194              /* All but the final copy start a new nesting, maintaining the
1195              chain of brackets outstanding. */
1196    
1197              if (i != 0)
1198                {
1199                int offset;
1200                *code++ = OP_BRA;
1201                offset = (bralink == NULL)? 0 : code - bralink;
1202                bralink = code;
1203                *code++ = offset >> 8;
1204                *code++ = offset & 255;
1205                }
1206    
1207              memcpy(code, previous, len);
1208              code += len;
1209              }
1210    
1211            /* Now chain through the pending brackets, and fill in their length
1212            fields (which are holding the chain links pro tem). */
1213    
1214            while (bralink != NULL)
1215              {
1216              int oldlinkoffset;
1217              int offset = code - bralink + 1;
1218              uschar *bra = code - offset;
1219              oldlinkoffset = (bra[1] << 8) + bra[2];
1220              bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1221              *code++ = OP_KET;
1222              *code++ = bra[1] = offset >> 8;
1223              *code++ = bra[2] = (offset & 255);
1224              }
1225            }
1226    
1227          /* If the maximum is unlimited, set a repeater in the final copy. We
1228          can't just offset backwards from the current code point, because we
1229          don't know if there's been an options resetting after the ket. The
1230          correct offset was computed above. */
1231    
1232          else code[-ketoffset] = OP_KETRMAX + repeat_type;
1233    
1234    
1235    #ifdef NEVER
1236        /* If the minimum is greater than zero, and the maximum is unlimited or        /* If the minimum is greater than zero, and the maximum is unlimited or
1237        equal to the minimum, the first copy remains where it is, and is        equal to the minimum, the first copy remains where it is, and is
1238        replicated up to the minimum number of times. This case includes the +        replicated up to the minimum number of times. This case includes the +
# Line 1145  for (;; ptr++) Line 1280  for (;; ptr++)
1280        correct offset was computed above. */        correct offset was computed above. */
1281    
1282        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;
1283    #endif
1284    
1285    
1286        }        }
1287    
1288      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1191  for (;; ptr++) Line 1329  for (;; ptr++)
1329    
1330          case '(':          case '(':
1331          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
1332          if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0)          if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1333            {            {
1334            condref = *ptr - '0';            condref = *ptr - '0';
1335            while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';            while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
# Line 1324  for (;; ptr++) Line 1462  for (;; ptr++)
1462           errorptr,                     /* Where to put an error message */           errorptr,                     /* Where to put an error message */
1463           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
1464            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1465           condref))                     /* Condition reference number */           condref,                      /* Condition reference number */
1466             cd))                          /* Tables block */
1467        goto FAILED;        goto FAILED;
1468    
1469      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
# Line 1372  for (;; ptr++) Line 1511  for (;; ptr++)
1511    
1512      case '\\':      case '\\':
1513      tempptr = ptr;      tempptr = ptr;
1514      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1515    
1516      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1517      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1417  for (;; ptr++) Line 1556  for (;; ptr++)
1556        {        {
1557        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1558          {          {
1559          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1560          if (c == '#')          if (c == '#')
1561            {            {
1562            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1433  for (;; ptr++) Line 1572  for (;; ptr++)
1572        if (c == '\\')        if (c == '\\')
1573          {          {
1574          tempptr = ptr;          tempptr = ptr;
1575          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1576          if (c < 0) { ptr = tempptr; break; }          if (c < 0) { ptr = tempptr; break; }
1577          }          }
1578    
# Line 1445  for (;; ptr++) Line 1584  for (;; ptr++)
1584    
1585      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1586    
1587      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1588    
1589      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1590      the next state. */      the next state. */
# Line 1490  Argument: Line 1629  Argument:
1629    errorptr    -> pointer to error message    errorptr    -> pointer to error message
1630    lookbehind  TRUE if this is a lookbehind assertion    lookbehind  TRUE if this is a lookbehind assertion
1631    condref     > 0 for OPT_CREF setting at start of conditional group    condref     > 0 for OPT_CREF setting at start of conditional group
1632      cd          points to the data block with tables pointers
1633    
1634  Returns:      TRUE on success  Returns:      TRUE on success
1635  */  */
1636    
1637  static BOOL  static BOOL
1638  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1639    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1640      compile_data *cd)
1641  {  {
1642  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1643  uschar *code = *codeptr;  uschar *code = *codeptr;
# Line 1543  for (;;) Line 1684  for (;;)
1684    
1685    /* Now compile the branch */    /* Now compile the branch */
1686    
1687    if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged))    if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd))
1688      {      {
1689      *ptrptr = ptr;      *ptrptr = ptr;
1690      return FALSE;      return FALSE;
# Line 1649  for (;;) Line 1790  for (;;)
1790      code += 2;      code += 2;
1791      break;      break;
1792    
1793        case OP_WORD_BOUNDARY:
1794        case OP_NOT_WORD_BOUNDARY:
1795        code++;
1796        break;
1797    
1798      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1799      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1800      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1676  all of whose alternatives start with OP_ Line 1822  all of whose alternatives start with OP_
1822  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
1823  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
1824    
1825  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1826  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
1827  trying them again.  so there is no point trying them again.
1828    
1829  Arguments:  Arguments:
1830    code       points to start of expression (the bracket)    code       points to start of expression (the bracket)
# Line 1696  do { Line 1842  do {
1842     register int op = *scode;     register int op = *scode;
1843     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1844       { if (!is_anchored(scode, options)) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
1845     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1846                (*options & PCRE_DOTALL) != 0)
1847       { if (scode[1] != OP_ANY) return FALSE; }       { if (scode[1] != OP_ANY) return FALSE; }
1848     else if (op != OP_SOD &&     else if (op != OP_SOD &&
1849             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
# Line 1710  return TRUE; Line 1857  return TRUE;
1857    
1858    
1859  /*************************************************  /*************************************************
1860  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
1861  *************************************************/  *************************************************/
1862    
1863  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
1864  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
1865    matching and for non-DOTALL patterns that start with .* (which must start at
1866    the beginning or after \n).
1867    
1868  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
1869  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
# Line 1728  do { Line 1877  do {
1877     register int op = *scode;     register int op = *scode;
1878     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1879       { if (!is_startline(scode)) return FALSE; }       { if (!is_startline(scode)) return FALSE; }
1880       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1881         { if (scode[1] != OP_ANY) return FALSE; }
1882     else if (op != OP_CIRC) return FALSE;     else if (op != OP_CIRC) return FALSE;
1883     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1884     }     }
# Line 1813  Arguments: Line 1964  Arguments:
1964    options      various option bits    options      various option bits
1965    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
1966    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
1967      tables       pointer to character tables or NULL
1968    
1969  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
1970                 with errorptr and erroroffset set                 with errorptr and erroroffset set
# Line 1820  Returns:       pointer to compiled data Line 1972  Returns:       pointer to compiled data
1972    
1973  pcre *  pcre *
1974  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1975    int *erroroffset)    int *erroroffset, const unsigned char *tables)
1976  {  {
1977  real_pcre *re;  real_pcre *re;
1978  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
# Line 1833  int branch_newextra; Line 1985  int branch_newextra;
1985  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
1986  uschar *code;  uschar *code;
1987  const uschar *ptr;  const uschar *ptr;
1988    compile_data compile_block;
1989  int brastack[BRASTACK_SIZE];  int brastack[BRASTACK_SIZE];
1990  uschar bralenstack[BRASTACK_SIZE];  uschar bralenstack[BRASTACK_SIZE];
1991    
# Line 1861  if ((options & ~PUBLIC_OPTIONS) != 0) Line 2014  if ((options & ~PUBLIC_OPTIONS) != 0)
2014    return NULL;    return NULL;
2015    }    }
2016    
2017    /* Set up pointers to the individual character tables */
2018    
2019    if (tables == NULL) tables = pcre_default_tables;
2020    compile_block.lcc = tables + lcc_offset;
2021    compile_block.fcc = tables + fcc_offset;
2022    compile_block.cbits = tables + cbits_offset;
2023    compile_block.ctypes = tables + ctypes_offset;
2024    
2025    /* Reflect pattern for debugging output */
2026    
2027  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
2028  DPRINTF(("%s\n", pattern));  DPRINTF(("%s\n", pattern));
2029    
# Line 1879  while ((c = *(++ptr)) != 0) Line 2042  while ((c = *(++ptr)) != 0)
2042    
2043    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2044      {      {
2045      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2046      if (c == '#')      if (c == '#')
2047        {        {
2048        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1897  while ((c = *(++ptr)) != 0) Line 2060  while ((c = *(++ptr)) != 0)
2060      case '\\':      case '\\':
2061        {        {
2062        const uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
2063        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2064        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2065        if (c >= 0)        if (c >= 0)
2066          {          {
# Line 1917  while ((c = *(++ptr)) != 0) Line 2080  while ((c = *(++ptr)) != 0)
2080        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2081        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2082        length++;   /* For single back reference */        length++;   /* For single back reference */
2083        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2084          {          {
2085          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2086          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2087          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2088            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1943  while ((c = *(++ptr)) != 0) Line 2106  while ((c = *(++ptr)) != 0)
2106      or back reference. */      or back reference. */
2107    
2108      case '{':      case '{':
2109      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2110      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2111      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2112      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
2113        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1979  while ((c = *(++ptr)) != 0) Line 2142  while ((c = *(++ptr)) != 0)
2142        {        {
2143        if (*ptr == '\\')        if (*ptr == '\\')
2144          {          {
2145          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2146              &compile_block);
2147          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2148          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2149          }          }
# Line 1996  while ((c = *(++ptr)) != 0) Line 2160  while ((c = *(++ptr)) != 0)
2160    
2161        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2162    
2163        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2164          {          {
2165          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2166          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2167          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2168            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 2064  while ((c = *(++ptr)) != 0) Line 2228  while ((c = *(++ptr)) != 0)
2228          group. */          group. */
2229    
2230          case '(':          case '(':
2231          if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0)          if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2232            {            {
2233            ptr += 4;            ptr += 4;
2234            length += 2;            length += 2;
2235            while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++;            while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2236            if (*ptr != ')')            if (*ptr != ')')
2237              {              {
2238              *errorptr = ERR26;              *errorptr = ERR26;
# Line 2237  while ((c = *(++ptr)) != 0) Line 2401  while ((c = *(++ptr)) != 0)
2401        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2402        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2403    
2404        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2405          {          {
2406          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2407              &compile_block);
2408          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2409          }          }
2410        else if (c == '*') { minval = 0; maxval = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2411        else if (c == '+') { maxval = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2412        else if (c == '?') { minval = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2413    
2414        /* If there is a minimum > 1 we have to replicate up to minval-1 times;        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2415        if there is a limited maximum we have to replicate up to maxval-1 times        group, and if the maximum is greater than zero, we have to replicate
2416        and allow for a BRAZERO item before each optional copy, as we also have        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2417        to do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2418    
2419        if (minval == 0) length++;        if (minval == 0)
2420          else if (minval > 1) length += (minval - 1) * duplength;          {
2421        if (maxval > minval) length += (maxval - minval) * (duplength + 1);          length++;
2422            if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2423            }
2424    
2425          /* When the minimum is greater than zero, 1 we have to replicate up to
2426          minval-1 times, with no additions required in the copies. Then, if
2427          there is a limited maximum we have to replicate up to maxval-1 times
2428          allowing for a BRAZERO item before each optional copy and nesting
2429          brackets for all but one of the optional copies. */
2430    
2431          else
2432            {
2433            length += (minval - 1) * duplength;
2434            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2435              length += (maxval - minval) * (duplength + 7) - 6;
2436            }
2437        }        }
2438      continue;      continue;
2439    
# Line 2270  while ((c = *(++ptr)) != 0) Line 2450  while ((c = *(++ptr)) != 0)
2450        {        {
2451        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
2452          {          {
2453          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2454          if (c == '#')          if (c == '#')
2455            {            {
2456            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 2284  while ((c = *(++ptr)) != 0) Line 2464  while ((c = *(++ptr)) != 0)
2464        if (c == '\\')        if (c == '\\')
2465          {          {
2466          const uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2467          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2468              &compile_block);
2469          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2470          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2471          }          }
# Line 2296  while ((c = *(++ptr)) != 0) Line 2477  while ((c = *(++ptr)) != 0)
2477    
2478      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2479    
2480      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2481          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2482    
2483      ptr--;      ptr--;
2484      length += runlength;      length += runlength;
# Line 2331  if (re == NULL) Line 2513  if (re == NULL)
2513    
2514  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2515  re->options = options;  re->options = options;
2516    re->tables = tables;
2517    
2518  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2519  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
# Line 2340  ptr = (const uschar *)pattern; Line 2523  ptr = (const uschar *)pattern;
2523  code = re->code;  code = re->code;
2524  *code = OP_BRA;  *code = OP_BRA;
2525  bracount = 0;  bracount = 0;
2526  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2527      &compile_block);
2528  re->top_bracket = bracount;  re->top_bracket = bracount;
2529  re->top_backref = top_backref;  re->top_backref = top_backref;
2530    
# Line 2372  if (*errorptr != NULL) Line 2556  if (*errorptr != NULL)
2556    return NULL;    return NULL;
2557    }    }
2558    
2559  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2560  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2561  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2562  unanchored matches no end. In the case of multiline matches, an alternative is  
2563  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2564    that speeds up unanchored matches no end. If not, see if we can set the
2565    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2566    start with ^. and also when all branches start with .* for non-DOTALL matches.
2567    */
2568    
2569  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2570    {    {
# Line 2637  return (pcre *)re; Line 2825  return (pcre *)re;
2825    
2826    
2827  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
2828  *          Match a back-reference                *  *          Match a back-reference                *
2829  *************************************************/  *************************************************/
2830    
# Line 2719  if (length > md->end_subject - eptr) ret Line 2867  if (length > md->end_subject - eptr) ret
2867  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
2868    
2869  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
2870    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
2871      while (length-- > 0)
2872        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2873      }
2874  else  else
2875    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2876    
# Line 2782  for (;;) Line 2933  for (;;)
2933      int number = op - OP_BRA;      int number = op - OP_BRA;
2934      int offset = number << 1;      int offset = number << 1;
2935    
2936      DPRINTF(("start bracket %d\n", number));  #ifdef DEBUG
2937        printf("start bracket %d subject=", number);
2938        pchars(eptr, 16, TRUE, md);
2939        printf("\n");
2940    #endif
2941    
2942      if (offset < md->offset_max)      if (offset < md->offset_max)
2943        {        {
# Line 3172  for (;;) Line 3327  for (;;)
3327      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3328        {        {
3329        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3330          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3331        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3332          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3333        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3334             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3335          return FALSE;          return FALSE;
# Line 3191  for (;;) Line 3346  for (;;)
3346      break;      break;
3347    
3348      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3349      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3350           (md->ctypes[*eptr++] & ctype_digit) != 0)
3351        return FALSE;        return FALSE;
3352      ecode++;      ecode++;
3353      break;      break;
3354    
3355      case OP_DIGIT:      case OP_DIGIT:
3356      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3357           (md->ctypes[*eptr++] & ctype_digit) == 0)
3358        return FALSE;        return FALSE;
3359      ecode++;      ecode++;
3360      break;      break;
3361    
3362      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3363      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3364           (md->ctypes[*eptr++] & ctype_space) != 0)
3365        return FALSE;        return FALSE;
3366      ecode++;      ecode++;
3367      break;      break;
3368    
3369      case OP_WHITESPACE:      case OP_WHITESPACE:
3370      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3371           (md->ctypes[*eptr++] & ctype_space) == 0)
3372        return FALSE;        return FALSE;
3373      ecode++;      ecode++;
3374      break;      break;
3375    
3376      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3377      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3378           (md->ctypes[*eptr++] & ctype_word) != 0)
3379        return FALSE;        return FALSE;
3380      ecode++;      ecode++;
3381      break;      break;
3382    
3383      case OP_WORDCHAR:      case OP_WORDCHAR:
3384      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3385           (md->ctypes[*eptr++] & ctype_word) == 0)
3386        return FALSE;        return FALSE;
3387      ecode++;      ecode++;
3388      break;      break;
# Line 3453  for (;;) Line 3614  for (;;)
3614        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3615        if ((ims & PCRE_CASELESS) != 0)        if ((ims & PCRE_CASELESS) != 0)
3616          {          {
3617          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
3618              if (md->lcc[*ecode++] != md->lcc[*eptr++])
3619                return FALSE;
3620          }          }
3621        else        else
3622          {          {
# Line 3510  for (;;) Line 3673  for (;;)
3673    
3674      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
3675        {        {
3676        c = pcre_lcc[c];        c = md->lcc[c];
3677        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3678            if (c != md->lcc[*eptr++]) return FALSE;
3679        if (min == max) continue;        if (min == max) continue;
3680        if (minimize)        if (minimize)
3681          {          {
# Line 3519  for (;;) Line 3683  for (;;)
3683            {            {
3684            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3685              return TRUE;              return TRUE;
3686            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])            if (i >= max || eptr >= md->end_subject ||
3687                  c != md->lcc[*eptr++])
3688              return FALSE;              return FALSE;
3689            }            }
3690          /* Control never gets here */          /* Control never gets here */
# Line 3529  for (;;) Line 3694  for (;;)
3694          const uschar *pp = eptr;          const uschar *pp = eptr;
3695          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3696            {            {
3697            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3698            eptr++;            eptr++;
3699            }            }
3700          while (eptr >= pp)          while (eptr >= pp)
# Line 3579  for (;;) Line 3744  for (;;)
3744      ecode++;      ecode++;
3745      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
3746        {        {
3747        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3748        }        }
3749      else      else
3750        {        {
# Line 3639  for (;;) Line 3804  for (;;)
3804    
3805      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
3806        {        {
3807        c = pcre_lcc[c];        c = md->lcc[c];
3808        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3809            if (c == md->lcc[*eptr++]) return FALSE;
3810        if (min == max) continue;        if (min == max) continue;
3811        if (minimize)        if (minimize)
3812          {          {
# Line 3648  for (;;) Line 3814  for (;;)
3814            {            {
3815            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3816              return TRUE;              return TRUE;
3817            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])            if (i >= max || eptr >= md->end_subject ||
3818                  c == md->lcc[*eptr++])
3819              return FALSE;              return FALSE;
3820            }            }
3821          /* Control never gets here */          /* Control never gets here */
# Line 3658  for (;;) Line 3825  for (;;)
3825          const uschar *pp = eptr;          const uschar *pp = eptr;
3826          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3827            {            {
3828            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3829            eptr++;            eptr++;
3830            }            }
3831          while (eptr >= pp)          while (eptr >= pp)
# Line 3752  for (;;) Line 3919  for (;;)
3919    
3920        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
3921        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3922          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3923        break;        break;
3924    
3925        case OP_DIGIT:        case OP_DIGIT:
3926        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3927          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3928        break;        break;
3929    
3930        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
3931        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3932          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3933        break;        break;
3934    
3935        case OP_WHITESPACE:        case OP_WHITESPACE:
3936        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3937          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3938        break;        break;
3939    
3940        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
3941        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
3942          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
3943              return FALSE;
3944        break;        break;
3945    
3946        case OP_WORDCHAR:        case OP_WORDCHAR:
3947        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
3948          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
3949              return FALSE;
3950        break;        break;
3951        }        }
3952    
# Line 3786  for (;;) Line 3955  for (;;)
3955      if (min == max) continue;      if (min == max) continue;
3956    
3957      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
3958      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
3959    
3960      if (minimize)      if (minimize)
3961        {        {
3962        for (i = min;; i++)        for (i = min;; i++)
3963          {          {
3964          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
3965          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
3966            !match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0))  
3967              return FALSE;          c = *eptr++;
3968            switch(ctype)
3969              {
3970              case OP_ANY:
3971              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
3972              break;
3973    
3974              case OP_NOT_DIGIT:
3975              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
3976              break;
3977    
3978              case OP_DIGIT:
3979              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
3980              break;
3981    
3982              case OP_NOT_WHITESPACE:
3983              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
3984              break;
3985    
3986              case OP_WHITESPACE:
3987              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
3988              break;
3989    
3990              case OP_NOT_WORDCHAR:
3991              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
3992              break;
3993    
3994              case OP_WORDCHAR:
3995              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
3996              break;
3997              }
3998          }          }
3999        /* Control never gets here */        /* Control never gets here */
4000        }        }
# Line 3828  for (;;) Line 4027  for (;;)
4027          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4028          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4029            {            {
4030            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4031              break;              break;
4032            eptr++;            eptr++;
4033            }            }
# Line 3837  for (;;) Line 4036  for (;;)
4036          case OP_DIGIT:          case OP_DIGIT:
4037          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4038            {            {
4039            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4040              break;              break;
4041            eptr++;            eptr++;
4042            }            }
# Line 3846  for (;;) Line 4045  for (;;)
4045          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4046          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4047            {            {
4048            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4049              break;              break;
4050            eptr++;            eptr++;
4051            }            }
# Line 3855  for (;;) Line 4054  for (;;)
4054          case OP_WHITESPACE:          case OP_WHITESPACE:
4055          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4056            {            {
4057            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4058              break;              break;
4059            eptr++;            eptr++;
4060            }            }
# Line 3864  for (;;) Line 4063  for (;;)
4063          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4064          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4065            {            {
4066            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4067              break;              break;
4068            eptr++;            eptr++;
4069            }            }
# Line 3873  for (;;) Line 4072  for (;;)
4072          case OP_WORDCHAR:          case OP_WORDCHAR:
4073          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4074            {            {
4075            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4076              break;              break;
4077            eptr++;            eptr++;
4078            }            }
# Line 3919  Arguments: Line 4118  Arguments:
4118    external_extra  points to "hints" from pcre_study() or is NULL    external_extra  points to "hints" from pcre_study() or is NULL
4119    subject         points to the subject string    subject         points to the subject string
4120    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
4121      start_offset    where to start in the subject string
4122    options         option bits    options         option bits
4123    offsets         points to a vector of ints to be filled in with offsets    offsets         points to a vector of ints to be filled in with offsets
4124    offsetcount     the number of elements in the vector    offsetcount     the number of elements in the vector
# Line 3931  Returns:          > 0 => success; value Line 4131  Returns:          > 0 => success; value
4131    
4132  int  int
4133  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4134    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int start_offset, int options, int *offsets,
4135      int offsetcount)
4136  {  {
4137  int resetcount, ocount;  int resetcount, ocount;
4138  int first_char = -1;  int first_char = -1;
4139  int ims = 0;  int ims = 0;
4140  match_data match_block;  match_data match_block;
4141  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4142  const uschar *start_match = (const uschar *)subject;  const uschar *start_match = (const uschar *)subject + start_offset;
4143  const uschar *end_subject;  const uschar *end_subject;
4144  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
4145  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
# Line 3963  match_block.noteol = (options & PCRE_NOT Line 4164  match_block.noteol = (options & PCRE_NOT
4164    
4165  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4166    
4167    match_block.lcc = re->tables + lcc_offset;
4168    match_block.ctypes = re->tables + ctypes_offset;
4169    
4170  /* The ims options can vary during the matching as a result of the presence  /* The ims options can vary during the matching as a result of the presence
4171  of (?ims) items in the pattern. They are kept in a local variable so that  of (?ims) items in the pattern. They are kept in a local variable so that
4172  restoring at the exit of a group is easy. */  restoring at the exit of a group is easy. */
# Line 3997  in the pattern. */ Line 4201  in the pattern. */
4201  resetcount = 2 + re->top_bracket * 2;  resetcount = 2 + re->top_bracket * 2;
4202  if (resetcount > offsetcount) resetcount = ocount;  if (resetcount > offsetcount) resetcount = ocount;
4203    
4204    /* Reset the working variable associated with each extraction. These should
4205    never be used unless previously set, but they get saved and restored, and so we
4206    initialize them to avoid reading uninitialized locations. */
4207    
4208    if (match_block.offset_vector != NULL)
4209      {
4210      register int *iptr = match_block.offset_vector + ocount;
4211      register int *iend = iptr - resetcount/2 + 1;
4212      while (--iptr >= iend) *iptr = -1;
4213      }
4214    
4215  /* Set up the first character to match, if available. The first_char value is  /* Set up the first character to match, if available. The first_char value is
4216  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
4217  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
# Line 4008  if (!anchored) Line 4223  if (!anchored)
4223    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->options & PCRE_FIRSTSET) != 0)
4224      {      {
4225      first_char = re->first_char;      first_char = re->first_char;
4226      if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char];      if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4227      }      }
4228    else    else
4229      if (!startline && extra != NULL &&      if (!startline && extra != NULL &&
# Line 4016  if (!anchored) Line 4231  if (!anchored)
4231          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4232    }    }
4233    
4234  /* Loop for unanchored matches; for anchored regexps the loop runs just once. */  /* Loop for unanchored matches; for anchored regexs the loop runs just once. */
4235    
4236  do  do
4237    {    {
# Line 4033  do Line 4248  do
4248    if (first_char >= 0)    if (first_char >= 0)
4249      {      {
4250      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
4251        while (start_match < end_subject && pcre_lcc[*start_match] != first_char)        while (start_match < end_subject &&
4252                 match_block.lcc[*start_match] != first_char)
4253          start_match++;          start_match++;
4254      else      else
4255        while (start_match < end_subject && *start_match != first_char)        while (start_match < end_subject && *start_match != first_char)
# Line 4106  do Line 4322  do
4322    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4323    return rc;    return rc;
4324    }    }
4325    
4326    /* This "while" is the end of the "do" above */
4327    
4328  while (!anchored &&  while (!anchored &&
4329         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
4330         start_match++ < end_subject);         start_match++ < end_subject);

Legend:
Removed from v.23  
changed lines
  Added in v.35

  ViewVC Help
Powered by ViewVC 1.1.5