/[pcre]/code/tags/pcre-1.09/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-1.09/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 21 by nigel, Sat Feb 24 21:38:37 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 40  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
68      "End", "\\A", "\\B", "\\b", "\\D", "\\d",
69    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
70    "not",    "not",
71    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 66  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 81  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 98  typedef struct match_data { Line 116  typedef struct match_data {
116    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
117    BOOL   dotall;                /* Dot matches any char */    BOOL   dotall;                /* Dot matches any char */
118    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
119    uschar *start_subject;        /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
120    uschar *end_subject;          /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
121    jmp_buf fail_env;             /* Environment for longjump() break out */    jmp_buf fail_env;             /* Environment for longjump() break out */
122    uschar *end_match_ptr;        /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
123    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
124  } match_data;  } match_data;
125    
# Line 126  void  (*pcre_free)(void *) = free; Line 144  void  (*pcre_free)(void *) = free;
144  *          Return version string                 *  *          Return version string                 *
145  *************************************************/  *************************************************/
146    
147  char *  const char *
148  pcre_version(void)  pcre_version(void)
149  {  {
150  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns:        number of identifying ex Line 174  Returns:        number of identifying ex
174  int  int
175  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
176  {  {
177  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
178  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
179  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
180  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 204  Arguments:
204  Returns:     nothing  Returns:     nothing
205  */  */
206    
207  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
208    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
209  {  {
210  int c;  int c;
211  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 228  do { Line 247  do {
247      /* Test an embedded subpattern; if it could not be empty, break the      /* Test an embedded subpattern; if it could not be empty, break the
248      loop. Otherwise carry on in the branch. */      loop. Otherwise carry on in the branch. */
249    
250      if ((int)(*cc) >= OP_BRA)      if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)
251        {        {
252        if (!could_be_empty(cc)) break;        if (!could_be_empty(cc)) break;
253        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
# Line 245  do { Line 264  do {
264        case OP_KETRMIN:        case OP_KETRMIN:
265        return TRUE;        return TRUE;
266    
267          /* Skip over entire bracket groups with zero lower bound */
268    
269          case OP_BRAZERO:
270          case OP_BRAMINZERO:
271          cc++;
272          /* Fall through */
273    
274        /* Skip over assertive subpatterns */        /* Skip over assertive subpatterns */
275    
276        case OP_ASSERT:        case OP_ASSERT:
# Line 259  do { Line 285  do {
285        case OP_EOD:        case OP_EOD:
286        case OP_CIRC:        case OP_CIRC:
287        case OP_DOLL:        case OP_DOLL:
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
288        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
289        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
290        cc++;        cc++;
# Line 272  do { Line 296  do {
296        case OP_MINSTAR:        case OP_MINSTAR:
297        case OP_QUERY:        case OP_QUERY:
298        case OP_MINQUERY:        case OP_MINQUERY:
299          case OP_NOTSTAR:
300          case OP_NOTMINSTAR:
301          case OP_NOTQUERY:
302          case OP_NOTMINQUERY:
303        case OP_TYPESTAR:        case OP_TYPESTAR:
304        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
305        case OP_TYPEQUERY:        case OP_TYPEQUERY:
# Line 291  do { Line 319  do {
319        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
320    
321        case OP_CLASS:        case OP_CLASS:
322          case OP_NEGCLASS:
323        case OP_REF:        case OP_REF:
324        cc += (*cc == OP_REF)? 2 : 4 + 2 * cc[2] + cc[3];        cc += (*cc == OP_REF)? 2 : 33;
325    
326        switch (*cc)        switch (*cc)
327          {          {
# Line 356  Returns:     zero or positive => a data Line 385  Returns:     zero or positive => a data
385  */  */
386    
387  static int  static int
388  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
389    BOOL isclass)    int options, BOOL isclass)
390  {  {
391  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
392  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
393  int i;  int i;
394    
# Line 378  else if ((i = escapes[c - '0']) != 0) c Line 407  else if ((i = escapes[c - '0']) != 0) c
407    
408  else  else
409    {    {
410    uschar *oldptr;    const uschar *oldptr;
411    switch (c)    switch (c)
412      {      {
413      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 498  Returns:    TRUE or FALSE Line 527  Returns:    TRUE or FALSE
527  */  */
528    
529  static BOOL  static BOOL
530  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p)
531  {  {
532  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
533  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
# Line 533  Returns:     pointer to '}' on success; Line 562  Returns:     pointer to '}' on success;
562               current ptr on error, with errorptr set               current ptr on error, with errorptr set
563  */  */
564    
565  static uschar *  static const uschar *
566  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
567  {  {
568  int min = 0;  int min = 0;
569  int max = -1;  int max = -1;
# Line 588  Returns:     TRUE on success Line 617  Returns:     TRUE on success
617  */  */
618    
619  static BOOL  static BOOL
620  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
621    char **errorptr)    const uschar **ptrptr, const char **errorptr)
622  {  {
623  int repeat_type, op_type;  int repeat_type, op_type;
624  int repeat_min, repeat_max;  int repeat_min, repeat_max;
625  int bravalue, length;  int bravalue, length;
626    int greedy_default, greedy_non_default;
627  register int c;  register int c;
628  register uschar *code = *codeptr;  register uschar *code = *codeptr;
629  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
630    const uschar *oldptr;
631  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
632  uschar class[32];  uschar class[32];
633    
634    /* Set up the default and non-default settings for greediness */
635    
636    greedy_default = ((options & PCRE_UNGREEDY) != 0);
637    greedy_non_default = greedy_default ^ 1;
638    
639  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
640    
641  for (;; ptr++)  for (;; ptr++)
# Line 655  for (;; ptr++) Line 690  for (;; ptr++)
690    
691      case '[':      case '[':
692      previous = code;      previous = code;
     *code++ = OP_CLASS;  
693    
694      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
695        different opcode. This only matters if caseless matching is specified at
696        runtime. */
697    
698      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
699        {        {
700        negate_class = TRUE;        negate_class = TRUE;
701          *code++ = OP_NEGCLASS;
702        c = *(++ptr);        c = *(++ptr);
703        }        }
704      else negate_class = FALSE;      else
705          {
706          negate_class = FALSE;
707          *code++ = OP_CLASS;
708          }
709    
710      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
711      character. */      character. */
# Line 693  for (;; ptr++) Line 734  for (;; ptr++)
734        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
735        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
736        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
737        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
738        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
739        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
740        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
# Line 872  for (;; ptr++) Line 913  for (;; ptr++)
913        goto FAILED;        goto FAILED;
914        }        }
915    
916      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
917        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
918      next character. */      next character. */
919    
920      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
921          { repeat_type = greedy_non_default; ptr++; }
922        else repeat_type = greedy_default;
923    
924      /* If the maximum is zero then the minimum must also be zero; Perl allows      /* If the maximum is zero then the minimum must also be zero; Perl allows
925      this case, so we do too - by simply omitting the item altogether. */      this case, so we do too - by simply omitting the item altogether. */
# Line 964  for (;; ptr++) Line 1008  for (;; ptr++)
1008          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1009          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1010          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1011          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1012          back naturally. */          put back naturally. Note that the final character is always going to
1013            get added below. */
1014    
1015          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1016            {            {
1017            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1018            }            }
1019    
1020          /* Insert an UPTO if the max is greater than the min. */          /*  For a single negated character we also have to put back the
1021            item that got cancelled. */
1022    
1023            else if (*previous == OP_NOT) code++;
1024    
1025            /* If the maximum is unlimited, insert an OP_STAR. */
1026    
1027            if (repeat_max < 0)
1028              {
1029              *code++ = c;
1030              *code++ = OP_STAR + repeat_type;
1031              }
1032    
1033            /* Else insert an UPTO if the max is greater than the min. */
1034    
1035          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1036            {            {
1037            *code++ = c;            *code++ = c;
1038            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 992  for (;; ptr++) Line 1050  for (;; ptr++)
1050      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1051      stuff after it. */      stuff after it. */
1052    
1053      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1054                 *previous == OP_REF)
1055        {        {
1056        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1057          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1018  for (;; ptr++) Line 1077  for (;; ptr++)
1077      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1078        {        {
1079        int i;        int i;
1080        int length = code - previous;        int len = code - previous;
1081    
1082        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1083          {          {
# Line 1035  for (;; ptr++) Line 1094  for (;; ptr++)
1094          {          {
1095          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1096            {            {
1097            memcpy(code, previous, length);            memcpy(code, previous, len);
1098            code += length;            code += len;
1099            }            }
1100          }          }
1101    
# Line 1048  for (;; ptr++) Line 1107  for (;; ptr++)
1107          {          {
1108          if (repeat_min == 0)          if (repeat_min == 0)
1109            {            {
1110            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1111            code++;            code++;
1112            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1113            }            }
1114    
1115          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1116            {            {
1117            memcpy(code, previous, length);            memcpy(code, previous, len);
1118            code += length;            code += len;
1119            }            }
1120    
1121          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1122            {            {
1123            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1124            memcpy(code, previous, length);            memcpy(code, previous, len);
1125            code += length;            code += len;
1126            }            }
1127          }          }
1128    
# Line 1105  for (;; ptr++) Line 1164  for (;; ptr++)
1164          case 'm':          case 'm':
1165          case 's':          case 's':
1166          case 'x':          case 'x':
1167            case 'U':
1168            case 'X':
1169          ptr++;          ptr++;
1170          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
1171          previous = NULL;          previous = NULL;
# Line 1210  for (;; ptr++) Line 1271  for (;; ptr++)
1271        continue;        continue;
1272        }        }
1273    
1274      /* Reset and fall through */      /* Data character: reset and fall through */
1275    
1276      ptr = oldptr;      ptr = oldptr;
1277      c = '\\';      c = '\\';
# Line 1264  for (;; ptr++) Line 1325  for (;; ptr++)
1325      the next state. */      the next state. */
1326    
1327      previous[1] = length;      previous[1] = length;
1328      ptr--;      if (length < 255) ptr--;
1329      break;      break;
1330      }      }
1331    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1301  Returns:    TRUE on success Line 1362  Returns:    TRUE on success
1362  */  */
1363    
1364  static BOOL  static BOOL
1365  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int *brackets, uschar **codeptr,
1366    char **errorptr)    const uschar **ptrptr, const char **errorptr)
1367  {  {
1368  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1369  uschar *code = *codeptr;  uschar *code = *codeptr;
1370  uschar *start_bracket = code;  uschar *start_bracket = code;
1371    
# Line 1370  Returns:   TRUE or FALSE Line 1431  Returns:   TRUE or FALSE
1431  */  */
1432    
1433  static BOOL  static BOOL
1434  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, BOOL multiline)
1435  {  {
1436  do {  do {
1437     int op = (int)code[3];     int op = (int)code[3];
# Line 1399  Returns:   TRUE or FALSE Line 1460  Returns:   TRUE or FALSE
1460  */  */
1461    
1462  static BOOL  static BOOL
1463  is_startline(uschar *code)  is_startline(const uschar *code)
1464  {  {
1465  do {  do {
1466     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
# Line 1484  Returns:       pointer to compiled data Line 1545  Returns:       pointer to compiled data
1545  */  */
1546    
1547  pcre *  pcre *
1548  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1549    int *erroroffset)    int *erroroffset)
1550  {  {
1551  real_pcre *re;  real_pcre *re;
# Line 1494  int runlength; Line 1555  int runlength;
1555  int c, size;  int c, size;
1556  int bracount = 0;  int bracount = 0;
1557  int brastack[200];  int brastack[200];
 int brastackptr = 0;  
1558  int top_backref = 0;  int top_backref = 0;
1559  uschar *code, *ptr;  unsigned int brastackptr = 0;
1560    uschar *code;
1561    const uschar *ptr;
1562    
1563  #ifdef DEBUG  #ifdef DEBUG
1564  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1523  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1585  if ((options & ~PUBLIC_OPTIONS) != 0)
1585    return NULL;    return NULL;
1586    }    }
1587    
1588  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1589  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1590    
1591  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1592  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1535  internal flag settings. Make an attempt Line 1595  internal flag settings. Make an attempt
1595  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1596  clever for #-comments. */  clever for #-comments. */
1597    
1598  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1599  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1600    {    {
1601    int min, max;    int min, max;
# Line 1562  while ((c = *(++ptr)) != 0) Line 1622  while ((c = *(++ptr)) != 0)
1622    
1623      case '\\':      case '\\':
1624        {        {
1625        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1626        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1627        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1628        if (c >= 0)        if (c >= 0)
# Line 1641  while ((c = *(++ptr)) != 0) Line 1701  while ((c = *(++ptr)) != 0)
1701        {        {
1702        if (*ptr == '\\')        if (*ptr == '\\')
1703          {          {
1704          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1705          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1706          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1707          }          }
1708        else class_charcount++;        else class_charcount++;
1709        ptr++;        ptr++;
# Line 1658  while ((c = *(++ptr)) != 0) Line 1718  while ((c = *(++ptr)) != 0)
1718    
1719        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1720    
1721        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1722          {          {
1723          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1724          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1709  while ((c = *(++ptr)) != 0) Line 1769  while ((c = *(++ptr)) != 0)
1769          ptr += 2;          ptr += 2;
1770          break;          break;
1771          }          }
1772        /* Else fall thourh */        /* Else fall through */
1773    
1774        /* Else loop setting valid options until ) is met. Anything else is an        /* Else loop setting valid options until ) is met. Anything else is an
1775        error. */        error. */
# Line 1739  while ((c = *(++ptr)) != 0) Line 1799  while ((c = *(++ptr)) != 0)
1799            length -= spaces;          /* Already counted spaces */            length -= spaces;          /* Already counted spaces */
1800            continue;            continue;
1801            }            }
1802            else if (c == 'X')
1803              {
1804              options |= PCRE_EXTRA;
1805              continue;
1806              }
1807            else if (c == 'U')
1808              {
1809              options |= PCRE_UNGREEDY;
1810              continue;
1811              }
1812          else if (c == ')') break;          else if (c == ')') break;
1813    
1814          *errorptr = ERR12;          *errorptr = ERR12;
# Line 1766  while ((c = *(++ptr)) != 0) Line 1836  while ((c = *(++ptr)) != 0)
1836      continue;      continue;
1837    
1838      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1839      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1840        0 this is an unmatched bracket which will generate an error, but take care
1841        not to try to access brastack[-1]. */
1842    
1843      case ')':      case ')':
1844      length += 3;      length += 3;
1845        {        {
1846        int min = 1;        int minval = 1;
1847        int max = 1;        int maxval = 1;
1848        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1849    
1850        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1851        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1852    
1853        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1854          {          {
1855          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1856          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1857          }          }
1858        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1859        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1860        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1861    
1862        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1863        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1864        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1865        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1866    
1867        if (min == 0) length++;        if (minval == 0) length++;
1868          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1869        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1870        }        }
   
1871      continue;      continue;
1872    
1873      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1827  while ((c = *(++ptr)) != 0) Line 1898  while ((c = *(++ptr)) != 0)
1898    
1899        if (c == '\\')        if (c == '\\')
1900          {          {
1901          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
1902          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1903          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1904          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
# Line 1857  if (length > 65539) Line 1928  if (length > 65539)
1928    }    }
1929    
1930  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1931  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1932    rather than just "code", because it has been reported that one broken compiler
1933    fails on "code" because it is also an independent variable. It should make no
1934    difference to the value of the offsetof(). */
1935    
1936  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1937  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1938    
1939  if (re == NULL)  if (re == NULL)
# Line 1868  if (re == NULL) Line 1942  if (re == NULL)
1942    return NULL;    return NULL;
1943    }    }
1944    
1945    /* Put in the magic number and the options. */
1946    
1947  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1948  re->options = options;  re->options = options;
1949    
# Line 1875  re->options = options; Line 1951  re->options = options;
1951  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
1952  of the function here. */  of the function here. */
1953    
1954  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
1955  code = re->code;  code = re->code;
1956  *code = OP_BRA;  *code = OP_BRA;
1957  bracount = 0;  bracount = 0;
# Line 1902  if (*errorptr != NULL) Line 1978  if (*errorptr != NULL)
1978    {    {
1979    (pcre_free)(re);    (pcre_free)(re);
1980    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
1981    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
1982    return NULL;    return NULL;
1983    }    }
1984    
# Line 1918  if ((options & PCRE_ANCHORED) == 0) Line 1994  if ((options & PCRE_ANCHORED) == 0)
1994      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1995    else    else
1996      {      {
1997      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1998      if (c >= 0)      if (ch >= 0)
1999        {        {
2000        re->first_char = c;        re->first_char = ch;
2001        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
2002        }        }
2003      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 1938  printf("Length = %d top_bracket = %d top Line 2014  printf("Length = %d top_bracket = %d top
2014    
2015  if (re->options != 0)  if (re->options != 0)
2016    {    {
2017    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s\n",
2018      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2019      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2020      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2021      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2022      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2023      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2024      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2025        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2026    }    }
2027    
2028  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 2013  while (code < code_end) Line 2090  while (code < code_end)
2090      case OP_MINUPTO:      case OP_MINUPTO:
2091      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2092        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2093      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2094      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2095      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2096      code += 3;      code += 3;
# Line 2058  while (code < code_end) Line 2135  while (code < code_end)
2135    
2136      case OP_REF:      case OP_REF:
2137      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2138      break;      code ++;
2139        goto CLASS_REF_REPEAT;
2140    
2141      case OP_CLASS:      case OP_CLASS:
2142        case OP_NEGCLASS:
2143        {        {
2144        int i, min, max;        int i, min, max;
2145    
2146        code++;        if (*code++ == OP_CLASS) printf("    [");
2147        printf("    [");          else printf("   ^[");
2148    
2149        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2150          {          {
# Line 2088  while (code < code_end) Line 2167  while (code < code_end)
2167        printf("]");        printf("]");
2168        code += 32;        code += 32;
2169    
2170          CLASS_REF_REPEAT:
2171    
2172        switch(*code)        switch(*code)
2173          {          {
2174          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2200  Returns:      TRUE if matched Line 2281  Returns:      TRUE if matched
2281  */  */
2282    
2283  static BOOL  static BOOL
2284  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int number, register const uschar *eptr, int length, match_data *md)
2285  {  {
2286  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[number];
2287    
2288  #ifdef DEBUG  #ifdef DEBUG
2289  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2249  Returns:       TRUE if matched Line 2330  Returns:       TRUE if matched
2330  */  */
2331    
2332  static BOOL  static BOOL
2333  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2334    match_data *md)    match_data *md)
2335  {  {
2336  for (;;)  for (;;)
# Line 2257  for (;;) Line 2338  for (;;)
2338    int min, max, ctype;    int min, max, ctype;
2339    register int i;    register int i;
2340    register int c;    register int c;
2341    BOOL minimize;    BOOL minimize = FALSE;
2342    
2343    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening bracket. Check the alternative branches in turn, failing if none
2344    match. We have to set the start offset if required and there is space    match. We have to set the start offset if required and there is space
# Line 2270  for (;;) Line 2351  for (;;)
2351    if ((int)*ecode >= OP_BRA)    if ((int)*ecode >= OP_BRA)
2352      {      {
2353      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2354      int save_offset1, save_offset2;      int save_offset1 = 0, save_offset2 = 0;
2355    
2356      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2357    
2358      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2359        {        {
# Line 2282  for (;;) Line 2361  for (;;)
2361        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2362        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2363    
2364        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2365        }        }
2366    
2367      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2296  for (;;) Line 2373  for (;;)
2373        }        }
2374      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2375    
2376      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2377    
2378      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2379        {        {
# Line 2360  for (;;) Line 2435  for (;;)
2435    
2436      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
2437      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
2438      a back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
2439      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
2440      matches, we carry on, leaving the subject pointer. */      matches, we carry on, leaving the subject pointer. */
2441    
# Line 2397  for (;;) Line 2472  for (;;)
2472    
2473      case OP_BRAZERO:      case OP_BRAZERO:
2474        {        {
2475        uschar *next = ecode+1;        const uschar *next = ecode+1;
2476        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md)) return TRUE;
2477        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2478        ecode = next + 3;        ecode = next + 3;
# Line 2406  for (;;) Line 2481  for (;;)
2481    
2482      case OP_BRAMINZERO:      case OP_BRAMINZERO:
2483        {        {
2484        uschar *next = ecode+1;        const uschar *next = ecode+1;
2485        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2486        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md)) return TRUE;
2487        ecode++;        ecode++;
# Line 2422  for (;;) Line 2497  for (;;)
2497      case OP_KETRMAX:      case OP_KETRMAX:
2498        {        {
2499        int number;        int number;
2500        uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2501    
2502        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2503          {          {
# Line 2437  for (;;) Line 2512  for (;;)
2512    
2513        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2514    
2515        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2516    
2517        if (number > 0)        if (number > 0)
2518          {          {
# Line 2671  for (;;) Line 2744  for (;;)
2744    
2745        else        else
2746          {          {
2747          uschar *pp = eptr;          const uschar *pp = eptr;
2748          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2749            {            {
2750            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(number, eptr, length, md)) break;
# Line 2691  for (;;) Line 2764  for (;;)
2764      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2765      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2766      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2767      versions of a character. */      versions of a character, and we have to behave differently for positive and
2768        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2769        treated differently. */
2770    
2771      case OP_CLASS:      case OP_CLASS:
2772        case OP_NEGCLASS:
2773        {        {
2774        uschar *data = ecode + 1;  /* Save for matching */        BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2775        ecode += 33;               /* Advance past the item */        const uschar *data = ecode + 1;  /* Save for matching */
2776          ecode += 33;                     /* Advance past the item */
2777    
2778        switch (*ecode)        switch (*ecode)
2779          {          {
# Line 2723  for (;;) Line 2800  for (;;)
2800          break;          break;
2801    
2802          default:               /* No repeat follows */          default:               /* No repeat follows */
2803          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2804          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2805          }          }
2806    
2807        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2740  for (;;) Line 2810  for (;;)
2810          {          {
2811          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2812          c = *eptr++;          c = *eptr++;
2813          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2814          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2815            runtime caseless, continue if either case is in the map. */
2816    
2817            if (!nasty_case)
2818              {
2819              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2820              if (md->runtime_caseless)
2821                {
2822                c = pcre_fcc[c];
2823                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2824                }
2825              }
2826    
2827            /* Runtime caseless and it was a negative class. Continue only if
2828            both cases are in the map. */
2829    
2830            else
2831            {            {
2832              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2833            c = pcre_fcc[c];            c = pcre_fcc[c];
2834            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2835            }            }
2836    
2837          return FALSE;          return FALSE;
2838          }          }
2839    
# Line 2764  for (;;) Line 2852  for (;;)
2852            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2853            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2854            c = *eptr++;            c = *eptr++;
2855            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2856            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2857              runtime caseless, continue if either case is in the map. */
2858    
2859              if (!nasty_case)
2860                {
2861                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2862                if (md->runtime_caseless)
2863                  {
2864                  c = pcre_fcc[c];
2865                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2866                  }
2867                }
2868    
2869              /* Runtime caseless and it was a negative class. Continue only if
2870              both cases are in the map. */
2871    
2872              else
2873              {              {
2874                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2875              c = pcre_fcc[c];              c = pcre_fcc[c];
2876              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2877              }              }
2878    
2879            return FALSE;            return FALSE;
2880            }            }
2881          /* Control never gets here */          /* Control never gets here */
# Line 2779  for (;;) Line 2885  for (;;)
2885    
2886        else        else
2887          {          {
2888          uschar *pp = eptr;          const uschar *pp = eptr;
2889          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
2890            {            {
2891            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2892            c = *eptr;            c = *eptr;
2893            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2894            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2895              runtime caseless, continue if either case is in the map. */
2896    
2897              if (!nasty_case)
2898                {
2899                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2900                if (md->runtime_caseless)
2901                  {
2902                  c = pcre_fcc[c];
2903                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2904                  }
2905                }
2906    
2907              /* Runtime caseless and it was a negative class. Continue only if
2908              both cases are in the map. */
2909    
2910              else
2911              {              {
2912                if ((data[c/8] & (1 << (c&7))) == 0) break;
2913              c = pcre_fcc[c];              c = pcre_fcc[c];
2914              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2915              }              }
2916    
2917            break;            break;
2918            }            }
2919    
# Line 2807  for (;;) Line 2931  for (;;)
2931        register int length = ecode[1];        register int length = ecode[1];
2932        ecode += 2;        ecode += 2;
2933    
2934        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2935        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2936          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2937        else        else
# Line 2818  for (;;) Line 2942  for (;;)
2942          }          }
2943        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2944        printf("\n");        printf("\n");
2945        #endif  #endif
2946    
2947        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2948        if (md->caseless)        if (md->caseless)
# Line 2875  for (;;) Line 2999  for (;;)
2999      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3000      characters and work backwards. */      characters and work backwards. */
3001    
3002      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3003      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3004    
3005      if (md->caseless)      if (md->caseless)
3006        {        {
# Line 2897  for (;;) Line 3019  for (;;)
3019          }          }
3020        else        else
3021          {          {
3022          uschar *pp = eptr;          const uschar *pp = eptr;
3023          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3024            {            {
3025            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
# Line 2927  for (;;) Line 3049  for (;;)
3049          }          }
3050        else        else
3051          {          {
3052          uschar *pp = eptr;          const uschar *pp = eptr;
3053          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3054            {            {
3055            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
# Line 2943  for (;;) Line 3065  for (;;)
3065      /* Match a negated single character */      /* Match a negated single character */
3066    
3067      case OP_NOT:      case OP_NOT:
3068      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3069      ecode++;      ecode++;
3070      if (md->caseless)      if (md->caseless)
3071        {        {
# Line 3002  for (;;) Line 3124  for (;;)
3124      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3125      characters and work backwards. */      characters and work backwards. */
3126    
3127      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3128      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3129    
3130      if (md->caseless)      if (md->caseless)
3131        {        {
# Line 3024  for (;;) Line 3144  for (;;)
3144          }          }
3145        else        else
3146          {          {
3147          uschar *pp = eptr;          const uschar *pp = eptr;
3148          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3149            {            {
3150            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
# Line 3054  for (;;) Line 3174  for (;;)
3174          }          }
3175        else        else
3176          {          {
3177          uschar *pp = eptr;          const uschar *pp = eptr;
3178          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3179            {            {
3180            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
# Line 3171  for (;;) Line 3291  for (;;)
3291    
3292      else      else
3293        {        {
3294        uschar *pp = eptr;        const uschar *pp = eptr;
3295        switch(ctype)        switch(ctype)
3296          {          {
3297          case OP_ANY:          case OP_ANY:
# Line 3255  for (;;) Line 3375  for (;;)
3375      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3376    
3377      default:      default:
3378      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3379      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3380      return FALSE;      return FALSE;
3381      }      }
# Line 3273  for (;;) Line 3391  for (;;)
3391    
3392    
3393  /*************************************************  /*************************************************
3394    *         Segregate setjmp()                     *
3395    *************************************************/
3396    
3397    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3398    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3399    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3400    since it's needed only for the extension \X option, and with any luck, a good
3401    compiler will spot the tail recursion and compile it efficiently.
3402    
3403    Arguments:
3404       eptr        pointer in subject
3405       ecode       position in code
3406       offset_top  current top pointer
3407       md          pointer to "static" info for the match
3408    
3409    Returns:       TRUE if matched
3410    */
3411    
3412    static BOOL
3413    match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3414      match_data *match_block)
3415    {
3416    return setjmp(match_block->fail_env) == 0 &&
3417          match(eptr, ecode, offset_top, match_block);
3418    }
3419    
3420    
3421    
3422    /*************************************************
3423  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3424  *************************************************/  *************************************************/
3425    
# Line 3299  int Line 3446  int
3446  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3447    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3448  {  {
3449  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3450  int first_char = -1;  int first_char = -1;
3451  match_data match_block;  match_data match_block;
3452  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3453  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3454  uschar *end_subject;  const uschar *end_subject;
3455  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3456  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3457    BOOL using_temporary_offsets = FALSE;
3458  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3459  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3460    
# Line 3317  if (re == NULL || subject == NULL || Line 3464  if (re == NULL || subject == NULL ||
3464     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3465  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3466    
3467  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3468  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3469  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3470    
# Line 3336  match_block.errorcode = PCRE_ERROR_NOMAT Line 3483  match_block.errorcode = PCRE_ERROR_NOMAT
3483    
3484  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3485  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3486  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3487  multiple of 2. */  of 2. */
3488    
3489  ocount &= (-2);  ocount = offsetcount & (-2);
3490  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3491    {    {
3492    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3493    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3494    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3495    #ifdef DEBUG    using_temporary_offsets = TRUE;
3496    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
3497    }    }
3498  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3499    
# Line 3400  if (!anchored) Line 3546  if (!anchored)
3546    
3547  do  do
3548    {    {
3549      int rc;
3550    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3551    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3552    
# Line 3441  do Line 3588  do
3588        }        }
3589      }      }
3590    
3591    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3592    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3593    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3594    printf("\n");    printf("\n");
3595    #endif  #endif
3596    
3597    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3598    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3455  do Line 3602  do
3602    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3603    
3604    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3605    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3606      is done in a separate function to avoid compiler warnings. We need not do
3607      it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3608      enabled. */
3609    
3610    if (setjmp(match_block.fail_env) == 0 &&    if ((re->options & PCRE_EXTRA) != 0)
       match(start_match, re->code, 2, &match_block))  
3611      {      {
3612      int rc;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3613          continue;
3614      if (ocount != offsetcount)      }
3615        {    else if (!match(start_match, re->code, 2, &match_block)) continue;
       if (offsetcount >= 4)  
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3616    
3617        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3618    
3619        (pcre_free)(match_block.offset_vector);    if (using_temporary_offsets)
3620        {
3621        if (offsetcount >= 4)
3622          {
3623          memcpy(offsets + 2, match_block.offset_vector + 2,
3624            (offsetcount - 2) * sizeof(int));
3625          DPRINTF(("Copied offsets from temporary memory\n"));
3626        }        }
3627        if (match_block.end_offset_top > offsetcount)
3628          match_block.offset_overflow = TRUE;
3629    
3630      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3631        (pcre_free)(match_block.offset_vector);
3632        }
3633    
3634      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3635    
3636      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3637      printf(">>>> returning %d\n", rc);      {
3638      #endif      offsets[0] = start_match - match_block.start_subject;
3639      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3640      }      }
3641    
3642      DPRINTF((">>>> returning %d\n", rc));
3643      return rc;
3644    }    }
3645  while (!anchored &&  while (!anchored &&
3646         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3647         start_match++ < end_subject);         start_match++ < end_subject);
3648    
3649  #ifdef DEBUG  if (using_temporary_offsets)
3650  printf(">>>> returning %d\n", match_block.errorcode);    {
3651  #endif    DPRINTF(("Freeing temporary memory\n"));
3652      (pcre_free)(match_block.offset_vector);
3653      }
3654    
3655    DPRINTF((">>>> returning %d\n", match_block.errorcode));
3656    
3657  return match_block.errorcode;  return match_block.errorcode;
3658  }  }

Legend:
Removed from v.3  
changed lines
  Added in v.21

  ViewVC Help
Powered by ViewVC 1.1.5