/[pcre]/code/tags/pcre-2.00/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-2.00/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5 by nigel, Sat Feb 24 21:38:05 2007 UTC revision 19 by nigel, Sat Feb 24 21:38:33 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 40  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
68      "End", "\\A", "\\B", "\\b", "\\D", "\\d",
69    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
70    "not",    "not",
71    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 66  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 81  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 98  typedef struct match_data { Line 116  typedef struct match_data {
116    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
117    BOOL   dotall;                /* Dot matches any char */    BOOL   dotall;                /* Dot matches any char */
118    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
119    uschar *start_subject;        /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
120    uschar *end_subject;          /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
121    jmp_buf fail_env;             /* Environment for longjump() break out */    jmp_buf fail_env;             /* Environment for longjump() break out */
122    uschar *end_match_ptr;        /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
123    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
124  } match_data;  } match_data;
125    
# Line 126  void  (*pcre_free)(void *) = free; Line 144  void  (*pcre_free)(void *) = free;
144  *          Return version string                 *  *          Return version string                 *
145  *************************************************/  *************************************************/
146    
147  char *  const char *
148  pcre_version(void)  pcre_version(void)
149  {  {
150  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns:        number of identifying ex Line 174  Returns:        number of identifying ex
174  int  int
175  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
176  {  {
177  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
178  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
179  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
180  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 204  Arguments:
204  Returns:     nothing  Returns:     nothing
205  */  */
206    
207  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
208    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
209  {  {
210  int c;  int c;
211  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 245  do { Line 264  do {
264        case OP_KETRMIN:        case OP_KETRMIN:
265        return TRUE;        return TRUE;
266    
267          /* Skip over entire bracket groups with zero lower bound */
268    
269          case OP_BRAZERO:
270          case OP_BRAMINZERO:
271          cc++;
272          /* Fall through */
273    
274        /* Skip over assertive subpatterns */        /* Skip over assertive subpatterns */
275    
276        case OP_ASSERT:        case OP_ASSERT:
# Line 259  do { Line 285  do {
285        case OP_EOD:        case OP_EOD:
286        case OP_CIRC:        case OP_CIRC:
287        case OP_DOLL:        case OP_DOLL:
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
288        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
289        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
290        cc++;        cc++;
# Line 295  do { Line 319  do {
319        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
320    
321        case OP_CLASS:        case OP_CLASS:
322          case OP_NEGCLASS:
323        case OP_REF:        case OP_REF:
324        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
325    
# Line 360  Returns:     zero or positive => a data Line 385  Returns:     zero or positive => a data
385  */  */
386    
387  static int  static int
388  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
389    BOOL isclass)    int options, BOOL isclass)
390  {  {
391  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
392  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
393  int i;  int i;
394    
# Line 382  else if ((i = escapes[c - '0']) != 0) c Line 407  else if ((i = escapes[c - '0']) != 0) c
407    
408  else  else
409    {    {
410    uschar *oldptr;    const uschar *oldptr;
411    switch (c)    switch (c)
412      {      {
413      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 502  Returns:    TRUE or FALSE Line 527  Returns:    TRUE or FALSE
527  */  */
528    
529  static BOOL  static BOOL
530  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p)
531  {  {
532  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
533  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
# Line 537  Returns:     pointer to '}' on success; Line 562  Returns:     pointer to '}' on success;
562               current ptr on error, with errorptr set               current ptr on error, with errorptr set
563  */  */
564    
565  static uschar *  static const uschar *
566  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
567  {  {
568  int min = 0;  int min = 0;
569  int max = -1;  int max = -1;
# Line 592  Returns:     TRUE on success Line 617  Returns:     TRUE on success
617  */  */
618    
619  static BOOL  static BOOL
620  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
621    char **errorptr)    const uschar **ptrptr, const char **errorptr)
622  {  {
623  int repeat_type, op_type;  int repeat_type, op_type;
624  int repeat_min, repeat_max;  int repeat_min, repeat_max;
625  int bravalue, length;  int bravalue, length;
626    int greedy_default, greedy_non_default;
627  register int c;  register int c;
628  register uschar *code = *codeptr;  register uschar *code = *codeptr;
629  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
630    const uschar *oldptr;
631  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
632  uschar class[32];  uschar class[32];
633    
634    /* Set up the default and non-default settings for greediness */
635    
636    greedy_default = ((options & PCRE_UNGREEDY) != 0);
637    greedy_non_default = greedy_default ^ 1;
638    
639  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
640    
641  for (;; ptr++)  for (;; ptr++)
# Line 659  for (;; ptr++) Line 690  for (;; ptr++)
690    
691      case '[':      case '[':
692      previous = code;      previous = code;
     *code++ = OP_CLASS;  
693    
694      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
695        different opcode. This only matters if caseless matching is specified at
696        runtime. */
697    
698      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
699        {        {
700        negate_class = TRUE;        negate_class = TRUE;
701          *code++ = OP_NEGCLASS;
702        c = *(++ptr);        c = *(++ptr);
703        }        }
704      else negate_class = FALSE;      else
705          {
706          negate_class = FALSE;
707          *code++ = OP_CLASS;
708          }
709    
710      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
711      character. */      character. */
# Line 697  for (;; ptr++) Line 734  for (;; ptr++)
734        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
735        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
736        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
737        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
738        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
739        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
740        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
# Line 876  for (;; ptr++) Line 913  for (;; ptr++)
913        goto FAILED;        goto FAILED;
914        }        }
915    
916      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
917        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
918      next character. */      next character. */
919    
920      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
921          { repeat_type = greedy_non_default; ptr++; }
922        else repeat_type = greedy_default;
923    
924      /* If the maximum is zero then the minimum must also be zero; Perl allows      /* If the maximum is zero then the minimum must also be zero; Perl allows
925      this case, so we do too - by simply omitting the item altogether. */      this case, so we do too - by simply omitting the item altogether. */
# Line 976  for (;; ptr++) Line 1016  for (;; ptr++)
1016            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1017            }            }
1018    
1019          /* Insert an UPTO if the max is greater than the min. */          /* If the maximum is unlimited, insert an OP_STAR. */
1020    
1021            if (repeat_max < 0)
1022              {
1023              *code++ = c;
1024              *code++ = OP_STAR + repeat_type;
1025              }
1026    
1027            /* Else insert an UPTO if the max is greater than the min. */
1028    
1029          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1030            {            {
1031            *code++ = c;            *code++ = c;
1032            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 996  for (;; ptr++) Line 1044  for (;; ptr++)
1044      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1045      stuff after it. */      stuff after it. */
1046    
1047      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1048                 *previous == OP_REF)
1049        {        {
1050        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1051          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1022  for (;; ptr++) Line 1071  for (;; ptr++)
1071      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1072        {        {
1073        int i;        int i;
1074        int length = code - previous;        int len = code - previous;
1075    
1076        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1077          {          {
# Line 1039  for (;; ptr++) Line 1088  for (;; ptr++)
1088          {          {
1089          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1090            {            {
1091            memcpy(code, previous, length);            memcpy(code, previous, len);
1092            code += length;            code += len;
1093            }            }
1094          }          }
1095    
# Line 1052  for (;; ptr++) Line 1101  for (;; ptr++)
1101          {          {
1102          if (repeat_min == 0)          if (repeat_min == 0)
1103            {            {
1104            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1105            code++;            code++;
1106            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1107            }            }
1108    
1109          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1110            {            {
1111            memcpy(code, previous, length);            memcpy(code, previous, len);
1112            code += length;            code += len;
1113            }            }
1114    
1115          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1116            {            {
1117            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1118            memcpy(code, previous, length);            memcpy(code, previous, len);
1119            code += length;            code += len;
1120            }            }
1121          }          }
1122    
# Line 1109  for (;; ptr++) Line 1158  for (;; ptr++)
1158          case 'm':          case 'm':
1159          case 's':          case 's':
1160          case 'x':          case 'x':
1161            case 'U':
1162            case 'X':
1163          ptr++;          ptr++;
1164          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
1165          previous = NULL;          previous = NULL;
# Line 1214  for (;; ptr++) Line 1265  for (;; ptr++)
1265        continue;        continue;
1266        }        }
1267    
1268      /* Reset and fall through */      /* Data character: reset and fall through */
1269    
1270      ptr = oldptr;      ptr = oldptr;
1271      c = '\\';      c = '\\';
# Line 1268  for (;; ptr++) Line 1319  for (;; ptr++)
1319      the next state. */      the next state. */
1320    
1321      previous[1] = length;      previous[1] = length;
1322      ptr--;      if (length < 255) ptr--;
1323      break;      break;
1324      }      }
1325    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1305  Returns:    TRUE on success Line 1356  Returns:    TRUE on success
1356  */  */
1357    
1358  static BOOL  static BOOL
1359  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int *brackets, uschar **codeptr,
1360    char **errorptr)    const uschar **ptrptr, const char **errorptr)
1361  {  {
1362  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1363  uschar *code = *codeptr;  uschar *code = *codeptr;
1364  uschar *start_bracket = code;  uschar *start_bracket = code;
1365    
# Line 1374  Returns:   TRUE or FALSE Line 1425  Returns:   TRUE or FALSE
1425  */  */
1426    
1427  static BOOL  static BOOL
1428  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, BOOL multiline)
1429  {  {
1430  do {  do {
1431     int op = (int)code[3];     int op = (int)code[3];
# Line 1403  Returns:   TRUE or FALSE Line 1454  Returns:   TRUE or FALSE
1454  */  */
1455    
1456  static BOOL  static BOOL
1457  is_startline(uschar *code)  is_startline(const uschar *code)
1458  {  {
1459  do {  do {
1460     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
# Line 1488  Returns:       pointer to compiled data Line 1539  Returns:       pointer to compiled data
1539  */  */
1540    
1541  pcre *  pcre *
1542  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1543    int *erroroffset)    int *erroroffset)
1544  {  {
1545  real_pcre *re;  real_pcre *re;
# Line 1498  int runlength; Line 1549  int runlength;
1549  int c, size;  int c, size;
1550  int bracount = 0;  int bracount = 0;
1551  int brastack[200];  int brastack[200];
 int brastackptr = 0;  
1552  int top_backref = 0;  int top_backref = 0;
1553  uschar *code, *ptr;  unsigned int brastackptr = 0;
1554    uschar *code;
1555    const uschar *ptr;
1556    
1557  #ifdef DEBUG  #ifdef DEBUG
1558  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1527  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1579  if ((options & ~PUBLIC_OPTIONS) != 0)
1579    return NULL;    return NULL;
1580    }    }
1581    
1582  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1583  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1584    
1585  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1586  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1539  internal flag settings. Make an attempt Line 1589  internal flag settings. Make an attempt
1589  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1590  clever for #-comments. */  clever for #-comments. */
1591    
1592  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1593  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1594    {    {
1595    int min, max;    int min, max;
# Line 1566  while ((c = *(++ptr)) != 0) Line 1616  while ((c = *(++ptr)) != 0)
1616    
1617      case '\\':      case '\\':
1618        {        {
1619        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1620        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1621        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1622        if (c >= 0)        if (c >= 0)
# Line 1645  while ((c = *(++ptr)) != 0) Line 1695  while ((c = *(++ptr)) != 0)
1695        {        {
1696        if (*ptr == '\\')        if (*ptr == '\\')
1697          {          {
1698          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1699          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1700          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1701          }          }
1702        else class_charcount++;        else class_charcount++;
1703        ptr++;        ptr++;
# Line 1662  while ((c = *(++ptr)) != 0) Line 1712  while ((c = *(++ptr)) != 0)
1712    
1713        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1714    
1715        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1716          {          {
1717          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1718          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1713  while ((c = *(++ptr)) != 0) Line 1763  while ((c = *(++ptr)) != 0)
1763          ptr += 2;          ptr += 2;
1764          break;          break;
1765          }          }
1766        /* Else fall thourh */        /* Else fall through */
1767    
1768        /* Else loop setting valid options until ) is met. Anything else is an        /* Else loop setting valid options until ) is met. Anything else is an
1769        error. */        error. */
# Line 1743  while ((c = *(++ptr)) != 0) Line 1793  while ((c = *(++ptr)) != 0)
1793            length -= spaces;          /* Already counted spaces */            length -= spaces;          /* Already counted spaces */
1794            continue;            continue;
1795            }            }
1796            else if (c == 'X')
1797              {
1798              options |= PCRE_EXTRA;
1799              continue;
1800              }
1801            else if (c == 'U')
1802              {
1803              options |= PCRE_UNGREEDY;
1804              continue;
1805              }
1806          else if (c == ')') break;          else if (c == ')') break;
1807    
1808          *errorptr = ERR12;          *errorptr = ERR12;
# Line 1770  while ((c = *(++ptr)) != 0) Line 1830  while ((c = *(++ptr)) != 0)
1830      continue;      continue;
1831    
1832      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1833      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1834        0 this is an unmatched bracket which will generate an error, but take care
1835        not to try to access brastack[-1]. */
1836    
1837      case ')':      case ')':
1838      length += 3;      length += 3;
1839        {        {
1840        int min = 1;        int minval = 1;
1841        int max = 1;        int maxval = 1;
1842        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1843    
1844        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1845        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1846    
1847        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1848          {          {
1849          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1850          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1851          }          }
1852        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1853        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1854        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1855    
1856        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1857        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1858        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1859        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1860    
1861        if (min == 0) length++;        if (minval == 0) length++;
1862          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1863        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1864        }        }
   
1865      continue;      continue;
1866    
1867      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1831  while ((c = *(++ptr)) != 0) Line 1892  while ((c = *(++ptr)) != 0)
1892    
1893        if (c == '\\')        if (c == '\\')
1894          {          {
1895          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
1896          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1897          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1898          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
# Line 1861  if (length > 65539) Line 1922  if (length > 65539)
1922    }    }
1923    
1924  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1925  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1926    rather than just "code", because it has been reported that one broken compiler
1927    fails on "code" because it is also an independent variable. It should make no
1928    difference to the value of the offsetof(). */
1929    
1930  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1931  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1932    
1933  if (re == NULL)  if (re == NULL)
# Line 1872  if (re == NULL) Line 1936  if (re == NULL)
1936    return NULL;    return NULL;
1937    }    }
1938    
1939    /* Put in the magic number and the options. */
1940    
1941  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1942  re->options = options;  re->options = options;
1943    
# Line 1879  re->options = options; Line 1945  re->options = options;
1945  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
1946  of the function here. */  of the function here. */
1947    
1948  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
1949  code = re->code;  code = re->code;
1950  *code = OP_BRA;  *code = OP_BRA;
1951  bracount = 0;  bracount = 0;
# Line 1906  if (*errorptr != NULL) Line 1972  if (*errorptr != NULL)
1972    {    {
1973    (pcre_free)(re);    (pcre_free)(re);
1974    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
1975    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
1976    return NULL;    return NULL;
1977    }    }
1978    
# Line 1922  if ((options & PCRE_ANCHORED) == 0) Line 1988  if ((options & PCRE_ANCHORED) == 0)
1988      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1989    else    else
1990      {      {
1991      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1992      if (c >= 0)      if (ch >= 0)
1993        {        {
1994        re->first_char = c;        re->first_char = ch;
1995        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
1996        }        }
1997      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 1942  printf("Length = %d top_bracket = %d top Line 2008  printf("Length = %d top_bracket = %d top
2008    
2009  if (re->options != 0)  if (re->options != 0)
2010    {    {
2011    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s\n",
2012      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2013      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2014      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2015      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2016      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2017      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2018      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2019        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2020    }    }
2021    
2022  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 2017  while (code < code_end) Line 2084  while (code < code_end)
2084      case OP_MINUPTO:      case OP_MINUPTO:
2085      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2086        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2087      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2088      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2089      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2090      code += 3;      code += 3;
# Line 2062  while (code < code_end) Line 2129  while (code < code_end)
2129    
2130      case OP_REF:      case OP_REF:
2131      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2132      break;      code ++;
2133        goto CLASS_REF_REPEAT;
2134    
2135      case OP_CLASS:      case OP_CLASS:
2136        case OP_NEGCLASS:
2137        {        {
2138        int i, min, max;        int i, min, max;
2139    
2140        code++;        if (*code++ == OP_CLASS) printf("    [");
2141        printf("    [");          else printf("   ^[");
2142    
2143        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2144          {          {
# Line 2092  while (code < code_end) Line 2161  while (code < code_end)
2161        printf("]");        printf("]");
2162        code += 32;        code += 32;
2163    
2164          CLASS_REF_REPEAT:
2165    
2166        switch(*code)        switch(*code)
2167          {          {
2168          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2204  Returns:      TRUE if matched Line 2275  Returns:      TRUE if matched
2275  */  */
2276    
2277  static BOOL  static BOOL
2278  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int number, register const uschar *eptr, int length, match_data *md)
2279  {  {
2280  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[number];
2281    
2282  #ifdef DEBUG  #ifdef DEBUG
2283  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2253  Returns:       TRUE if matched Line 2324  Returns:       TRUE if matched
2324  */  */
2325    
2326  static BOOL  static BOOL
2327  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2328    match_data *md)    match_data *md)
2329  {  {
2330  for (;;)  for (;;)
# Line 2261  for (;;) Line 2332  for (;;)
2332    int min, max, ctype;    int min, max, ctype;
2333    register int i;    register int i;
2334    register int c;    register int c;
2335    BOOL minimize;    BOOL minimize = FALSE;
2336    
2337    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening bracket. Check the alternative branches in turn, failing if none
2338    match. We have to set the start offset if required and there is space    match. We have to set the start offset if required and there is space
# Line 2274  for (;;) Line 2345  for (;;)
2345    if ((int)*ecode >= OP_BRA)    if ((int)*ecode >= OP_BRA)
2346      {      {
2347      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2348      int save_offset1, save_offset2;      int save_offset1 = 0, save_offset2 = 0;
2349    
2350      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2351    
2352      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2353        {        {
# Line 2286  for (;;) Line 2355  for (;;)
2355        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2356        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2357    
2358        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2359        }        }
2360    
2361      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2300  for (;;) Line 2367  for (;;)
2367        }        }
2368      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2369    
2370      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2371    
2372      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2373        {        {
# Line 2401  for (;;) Line 2466  for (;;)
2466    
2467      case OP_BRAZERO:      case OP_BRAZERO:
2468        {        {
2469        uschar *next = ecode+1;        const uschar *next = ecode+1;
2470        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md)) return TRUE;
2471        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2472        ecode = next + 3;        ecode = next + 3;
# Line 2410  for (;;) Line 2475  for (;;)
2475    
2476      case OP_BRAMINZERO:      case OP_BRAMINZERO:
2477        {        {
2478        uschar *next = ecode+1;        const uschar *next = ecode+1;
2479        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2480        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md)) return TRUE;
2481        ecode++;        ecode++;
# Line 2426  for (;;) Line 2491  for (;;)
2491      case OP_KETRMAX:      case OP_KETRMAX:
2492        {        {
2493        int number;        int number;
2494        uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2495    
2496        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2497          {          {
# Line 2441  for (;;) Line 2506  for (;;)
2506    
2507        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2508    
2509        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2510    
2511        if (number > 0)        if (number > 0)
2512          {          {
# Line 2675  for (;;) Line 2738  for (;;)
2738    
2739        else        else
2740          {          {
2741          uschar *pp = eptr;          const uschar *pp = eptr;
2742          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2743            {            {
2744            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(number, eptr, length, md)) break;
# Line 2695  for (;;) Line 2758  for (;;)
2758      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2759      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2760      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2761      versions of a character. */      versions of a character, and we have to behave differently for positive and
2762        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2763        treated differently. */
2764    
2765      case OP_CLASS:      case OP_CLASS:
2766        case OP_NEGCLASS:
2767        {        {
2768        uschar *data = ecode + 1;  /* Save for matching */        BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2769        ecode += 33;               /* Advance past the item */        const uschar *data = ecode + 1;  /* Save for matching */
2770          ecode += 33;                     /* Advance past the item */
2771    
2772        switch (*ecode)        switch (*ecode)
2773          {          {
# Line 2727  for (;;) Line 2794  for (;;)
2794          break;          break;
2795    
2796          default:               /* No repeat follows */          default:               /* No repeat follows */
2797          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2798          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2799          }          }
2800    
2801        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2744  for (;;) Line 2804  for (;;)
2804          {          {
2805          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2806          c = *eptr++;          c = *eptr++;
2807          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2808          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2809            runtime caseless, continue if either case is in the map. */
2810    
2811            if (!nasty_case)
2812            {            {
2813              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2814              if (md->runtime_caseless)
2815                {
2816                c = pcre_fcc[c];
2817                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2818                }
2819              }
2820    
2821            /* Runtime caseless and it was a negative class. Continue only if
2822            both cases are in the map. */
2823    
2824            else
2825              {
2826              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2827            c = pcre_fcc[c];            c = pcre_fcc[c];
2828            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2829            }            }
2830    
2831          return FALSE;          return FALSE;
2832          }          }
2833    
# Line 2768  for (;;) Line 2846  for (;;)
2846            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2847            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2848            c = *eptr++;            c = *eptr++;
2849            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2850            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2851              runtime caseless, continue if either case is in the map. */
2852    
2853              if (!nasty_case)
2854                {
2855                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2856                if (md->runtime_caseless)
2857                  {
2858                  c = pcre_fcc[c];
2859                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2860                  }
2861                }
2862    
2863              /* Runtime caseless and it was a negative class. Continue only if
2864              both cases are in the map. */
2865    
2866              else
2867              {              {
2868                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2869              c = pcre_fcc[c];              c = pcre_fcc[c];
2870              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2871              }              }
2872    
2873            return FALSE;            return FALSE;
2874            }            }
2875          /* Control never gets here */          /* Control never gets here */
# Line 2783  for (;;) Line 2879  for (;;)
2879    
2880        else        else
2881          {          {
2882          uschar *pp = eptr;          const uschar *pp = eptr;
2883          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
2884            {            {
2885            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2886            c = *eptr;            c = *eptr;
2887            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2888            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2889              runtime caseless, continue if either case is in the map. */
2890    
2891              if (!nasty_case)
2892              {              {
2893                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2894                if (md->runtime_caseless)
2895                  {
2896                  c = pcre_fcc[c];
2897                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2898                  }
2899                }
2900    
2901              /* Runtime caseless and it was a negative class. Continue only if
2902              both cases are in the map. */
2903    
2904              else
2905                {
2906                if ((data[c/8] & (1 << (c&7))) == 0) break;
2907              c = pcre_fcc[c];              c = pcre_fcc[c];
2908              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2909              }              }
2910    
2911            break;            break;
2912            }            }
2913    
# Line 2811  for (;;) Line 2925  for (;;)
2925        register int length = ecode[1];        register int length = ecode[1];
2926        ecode += 2;        ecode += 2;
2927    
2928        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2929        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2930          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2931        else        else
# Line 2822  for (;;) Line 2936  for (;;)
2936          }          }
2937        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2938        printf("\n");        printf("\n");
2939        #endif  #endif
2940    
2941        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2942        if (md->caseless)        if (md->caseless)
# Line 2879  for (;;) Line 2993  for (;;)
2993      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
2994      characters and work backwards. */      characters and work backwards. */
2995    
2996      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2997      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
2998    
2999      if (md->caseless)      if (md->caseless)
3000        {        {
# Line 2901  for (;;) Line 3013  for (;;)
3013          }          }
3014        else        else
3015          {          {
3016          uschar *pp = eptr;          const uschar *pp = eptr;
3017          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3018            {            {
3019            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
# Line 2931  for (;;) Line 3043  for (;;)
3043          }          }
3044        else        else
3045          {          {
3046          uschar *pp = eptr;          const uschar *pp = eptr;
3047          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3048            {            {
3049            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
# Line 2947  for (;;) Line 3059  for (;;)
3059      /* Match a negated single character */      /* Match a negated single character */
3060    
3061      case OP_NOT:      case OP_NOT:
3062      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3063      ecode++;      ecode++;
3064      if (md->caseless)      if (md->caseless)
3065        {        {
# Line 3006  for (;;) Line 3118  for (;;)
3118      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3119      characters and work backwards. */      characters and work backwards. */
3120    
3121      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3122      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3123    
3124      if (md->caseless)      if (md->caseless)
3125        {        {
# Line 3028  for (;;) Line 3138  for (;;)
3138          }          }
3139        else        else
3140          {          {
3141          uschar *pp = eptr;          const uschar *pp = eptr;
3142          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3143            {            {
3144            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
# Line 3058  for (;;) Line 3168  for (;;)
3168          }          }
3169        else        else
3170          {          {
3171          uschar *pp = eptr;          const uschar *pp = eptr;
3172          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3173            {            {
3174            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
# Line 3175  for (;;) Line 3285  for (;;)
3285    
3286      else      else
3287        {        {
3288        uschar *pp = eptr;        const uschar *pp = eptr;
3289        switch(ctype)        switch(ctype)
3290          {          {
3291          case OP_ANY:          case OP_ANY:
# Line 3259  for (;;) Line 3369  for (;;)
3369      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3370    
3371      default:      default:
3372      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3373      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3374      return FALSE;      return FALSE;
3375      }      }
# Line 3277  for (;;) Line 3385  for (;;)
3385    
3386    
3387  /*************************************************  /*************************************************
3388    *         Segregate setjmp()                     *
3389    *************************************************/
3390    
3391    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3392    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3393    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3394    since it's needed only for the extension \X option, and with any luck, a good
3395    compiler will spot the tail recursion and compile it efficiently.
3396    
3397    Arguments:
3398       eptr        pointer in subject
3399       ecode       position in code
3400       offset_top  current top pointer
3401       md          pointer to "static" info for the match
3402    
3403    Returns:       TRUE if matched
3404    */
3405    
3406    static BOOL
3407    match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3408      match_data *match_block)
3409    {
3410    return setjmp(match_block->fail_env) == 0 &&
3411          match(eptr, ecode, offset_top, match_block);
3412    }
3413    
3414    
3415    
3416    /*************************************************
3417  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3418  *************************************************/  *************************************************/
3419    
# Line 3303  int Line 3440  int
3440  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3441    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3442  {  {
3443  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3444  int first_char = -1;  int first_char = -1;
3445  match_data match_block;  match_data match_block;
3446  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3447  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3448  uschar *end_subject;  const uschar *end_subject;
3449  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3450  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3451    BOOL using_temporary_offsets = FALSE;
3452  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3453  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3454    
# Line 3321  if (re == NULL || subject == NULL || Line 3458  if (re == NULL || subject == NULL ||
3458     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3459  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3460    
3461  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3462  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3463  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3464    
# Line 3340  match_block.errorcode = PCRE_ERROR_NOMAT Line 3477  match_block.errorcode = PCRE_ERROR_NOMAT
3477    
3478  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3479  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3480  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3481  multiple of 2. */  of 2. */
3482    
3483  ocount &= (-2);  ocount = offsetcount & (-2);
3484  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3485    {    {
3486    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3487    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3488    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3489    #ifdef DEBUG    using_temporary_offsets = TRUE;
3490    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
3491    }    }
3492  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3493    
# Line 3404  if (!anchored) Line 3540  if (!anchored)
3540    
3541  do  do
3542    {    {
3543      int rc;
3544    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3545    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3546    
# Line 3445  do Line 3582  do
3582        }        }
3583      }      }
3584    
3585    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3586    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3587    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3588    printf("\n");    printf("\n");
3589    #endif  #endif
3590    
3591    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3592    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3459  do Line 3596  do
3596    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3597    
3598    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3599    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3600      is done in a separate function to avoid compiler warnings. We need not do
3601      it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3602      enabled. */
3603    
3604    if (setjmp(match_block.fail_env) == 0 &&    if ((re->options & PCRE_EXTRA) != 0)
       match(start_match, re->code, 2, &match_block))  
3605      {      {
3606      int rc;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3607          continue;
3608      if (ocount != offsetcount)      }
3609        {    else if (!match(start_match, re->code, 2, &match_block)) continue;
       if (offsetcount >= 4)  
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3610    
3611        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3612    
3613        (pcre_free)(match_block.offset_vector);    if (using_temporary_offsets)
3614        {
3615        if (offsetcount >= 4)
3616          {
3617          memcpy(offsets + 2, match_block.offset_vector + 2,
3618            (offsetcount - 2) * sizeof(int));
3619          DPRINTF(("Copied offsets from temporary memory\n"));
3620        }        }
3621        if (match_block.end_offset_top > offsetcount)
3622          match_block.offset_overflow = TRUE;
3623    
3624      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3625        (pcre_free)(match_block.offset_vector);
3626        }
3627    
3628      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3629    
3630      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3631      printf(">>>> returning %d\n", rc);      {
3632      #endif      offsets[0] = start_match - match_block.start_subject;
3633      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3634      }      }
3635    
3636      DPRINTF((">>>> returning %d\n", rc));
3637      return rc;
3638    }    }
3639  while (!anchored &&  while (!anchored &&
3640         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3641         start_match++ < end_subject);         start_match++ < end_subject);
3642    
3643  #ifdef DEBUG  if (using_temporary_offsets)
3644  printf(">>>> returning %d\n", match_block.errorcode);    {
3645  #endif    DPRINTF(("Freeing temporary memory\n"));
3646      (pcre_free)(match_block.offset_vector);
3647      }
3648    
3649    DPRINTF((">>>> returning %d\n", match_block.errorcode));
3650    
3651  return match_block.errorcode;  return match_block.errorcode;
3652  }  }

Legend:
Removed from v.5  
changed lines
  Added in v.19

  ViewVC Help
Powered by ViewVC 1.1.5