/[pcre]/code/tags/pcre-2.00/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-2.00/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5 by nigel, Sat Feb 24 21:38:05 2007 UTC revision 17 by nigel, Sat Feb 24 21:38:29 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 40  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
68      "End", "\\A", "\\B", "\\b", "\\D", "\\d",
69    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
70    "not",    "not",
71    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 66  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 81  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 98  typedef struct match_data { Line 116  typedef struct match_data {
116    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
117    BOOL   dotall;                /* Dot matches any char */    BOOL   dotall;                /* Dot matches any char */
118    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
119    uschar *start_subject;        /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
120    uschar *end_subject;          /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
121    jmp_buf fail_env;             /* Environment for longjump() break out */    jmp_buf fail_env;             /* Environment for longjump() break out */
122    uschar *end_match_ptr;        /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
123    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
124  } match_data;  } match_data;
125    
# Line 126  void  (*pcre_free)(void *) = free; Line 144  void  (*pcre_free)(void *) = free;
144  *          Return version string                 *  *          Return version string                 *
145  *************************************************/  *************************************************/
146    
147  char *  const char *
148  pcre_version(void)  pcre_version(void)
149  {  {
150  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns:        number of identifying ex Line 174  Returns:        number of identifying ex
174  int  int
175  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
176  {  {
177  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
178  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
179  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
180  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 204  Arguments:
204  Returns:     nothing  Returns:     nothing
205  */  */
206    
207  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
208    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
209  {  {
210  int c;  int c;
211  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 245  do { Line 264  do {
264        case OP_KETRMIN:        case OP_KETRMIN:
265        return TRUE;        return TRUE;
266    
267          /* Skip over entire bracket groups with zero lower bound */
268    
269          case OP_BRAZERO:
270          case OP_BRAMINZERO:
271          cc++;
272          /* Fall through */
273    
274        /* Skip over assertive subpatterns */        /* Skip over assertive subpatterns */
275    
276        case OP_ASSERT:        case OP_ASSERT:
# Line 259  do { Line 285  do {
285        case OP_EOD:        case OP_EOD:
286        case OP_CIRC:        case OP_CIRC:
287        case OP_DOLL:        case OP_DOLL:
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
288        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
289        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
290        cc++;        cc++;
# Line 295  do { Line 319  do {
319        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
320    
321        case OP_CLASS:        case OP_CLASS:
322          case OP_NEGCLASS:
323        case OP_REF:        case OP_REF:
324        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
325    
# Line 360  Returns:     zero or positive => a data Line 385  Returns:     zero or positive => a data
385  */  */
386    
387  static int  static int
388  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
389    BOOL isclass)    int options, BOOL isclass)
390  {  {
391  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
392  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
393  int i;  int i;
394    
# Line 382  else if ((i = escapes[c - '0']) != 0) c Line 407  else if ((i = escapes[c - '0']) != 0) c
407    
408  else  else
409    {    {
410    uschar *oldptr;    const uschar *oldptr;
411    switch (c)    switch (c)
412      {      {
413      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 502  Returns:    TRUE or FALSE Line 527  Returns:    TRUE or FALSE
527  */  */
528    
529  static BOOL  static BOOL
530  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p)
531  {  {
532  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
533  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
# Line 537  Returns:     pointer to '}' on success; Line 562  Returns:     pointer to '}' on success;
562               current ptr on error, with errorptr set               current ptr on error, with errorptr set
563  */  */
564    
565  static uschar *  static const uschar *
566  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
567  {  {
568  int min = 0;  int min = 0;
569  int max = -1;  int max = -1;
# Line 592  Returns:     TRUE on success Line 617  Returns:     TRUE on success
617  */  */
618    
619  static BOOL  static BOOL
620  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
621    char **errorptr)    const uschar **ptrptr, const char **errorptr)
622  {  {
623  int repeat_type, op_type;  int repeat_type, op_type;
624  int repeat_min, repeat_max;  int repeat_min, repeat_max;
625  int bravalue, length;  int bravalue, length;
626  register int c;  register int c;
627  register uschar *code = *codeptr;  register uschar *code = *codeptr;
628  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
629    const uschar *oldptr;
630  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
631  uschar class[32];  uschar class[32];
632    
633  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
# Line 659  for (;; ptr++) Line 684  for (;; ptr++)
684    
685      case '[':      case '[':
686      previous = code;      previous = code;
     *code++ = OP_CLASS;  
687    
688      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
689        different opcode. This only matters if caseless matching is specified at
690        runtime. */
691    
692      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
693        {        {
694        negate_class = TRUE;        negate_class = TRUE;
695          *code++ = OP_NEGCLASS;
696        c = *(++ptr);        c = *(++ptr);
697        }        }
698      else negate_class = FALSE;      else
699          {
700          negate_class = FALSE;
701          *code++ = OP_CLASS;
702          }
703    
704      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
705      character. */      character. */
# Line 697  for (;; ptr++) Line 728  for (;; ptr++)
728        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
729        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
730        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
731        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
732        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
733        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
734        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
# Line 976  for (;; ptr++) Line 1007  for (;; ptr++)
1007            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1008            }            }
1009    
1010          /* Insert an UPTO if the max is greater than the min. */          /* If the maximum is unlimited, insert an OP_STAR. */
1011    
1012            if (repeat_max < 0)
1013              {
1014              *code++ = c;
1015              *code++ = OP_STAR + repeat_type;
1016              }
1017    
1018            /* Else insert an UPTO if the max is greater than the min. */
1019    
1020          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1021            {            {
1022            *code++ = c;            *code++ = c;
1023            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 996  for (;; ptr++) Line 1035  for (;; ptr++)
1035      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1036      stuff after it. */      stuff after it. */
1037    
1038      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1039                 *previous == OP_REF)
1040        {        {
1041        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1042          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1022  for (;; ptr++) Line 1062  for (;; ptr++)
1062      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1063        {        {
1064        int i;        int i;
1065        int length = code - previous;        int len = code - previous;
1066    
1067        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1068          {          {
# Line 1039  for (;; ptr++) Line 1079  for (;; ptr++)
1079          {          {
1080          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1081            {            {
1082            memcpy(code, previous, length);            memcpy(code, previous, len);
1083            code += length;            code += len;
1084            }            }
1085          }          }
1086    
# Line 1052  for (;; ptr++) Line 1092  for (;; ptr++)
1092          {          {
1093          if (repeat_min == 0)          if (repeat_min == 0)
1094            {            {
1095            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1096            code++;            code++;
1097            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1098            }            }
1099    
1100          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1101            {            {
1102            memcpy(code, previous, length);            memcpy(code, previous, len);
1103            code += length;            code += len;
1104            }            }
1105    
1106          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1107            {            {
1108            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1109            memcpy(code, previous, length);            memcpy(code, previous, len);
1110            code += length;            code += len;
1111            }            }
1112          }          }
1113    
# Line 1214  for (;; ptr++) Line 1254  for (;; ptr++)
1254        continue;        continue;
1255        }        }
1256    
1257      /* Reset and fall through */      /* Data character: reset and fall through */
1258    
1259      ptr = oldptr;      ptr = oldptr;
1260      c = '\\';      c = '\\';
# Line 1268  for (;; ptr++) Line 1308  for (;; ptr++)
1308      the next state. */      the next state. */
1309    
1310      previous[1] = length;      previous[1] = length;
1311      ptr--;      if (length < 255) ptr--;
1312      break;      break;
1313      }      }
1314    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1305  Returns:    TRUE on success Line 1345  Returns:    TRUE on success
1345  */  */
1346    
1347  static BOOL  static BOOL
1348  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int *brackets, uschar **codeptr,
1349    char **errorptr)    const uschar **ptrptr, const char **errorptr)
1350  {  {
1351  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1352  uschar *code = *codeptr;  uschar *code = *codeptr;
1353  uschar *start_bracket = code;  uschar *start_bracket = code;
1354    
# Line 1374  Returns:   TRUE or FALSE Line 1414  Returns:   TRUE or FALSE
1414  */  */
1415    
1416  static BOOL  static BOOL
1417  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, BOOL multiline)
1418  {  {
1419  do {  do {
1420     int op = (int)code[3];     int op = (int)code[3];
# Line 1403  Returns:   TRUE or FALSE Line 1443  Returns:   TRUE or FALSE
1443  */  */
1444    
1445  static BOOL  static BOOL
1446  is_startline(uschar *code)  is_startline(const uschar *code)
1447  {  {
1448  do {  do {
1449     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
# Line 1488  Returns:       pointer to compiled data Line 1528  Returns:       pointer to compiled data
1528  */  */
1529    
1530  pcre *  pcre *
1531  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1532    int *erroroffset)    int *erroroffset)
1533  {  {
1534  real_pcre *re;  real_pcre *re;
# Line 1498  int runlength; Line 1538  int runlength;
1538  int c, size;  int c, size;
1539  int bracount = 0;  int bracount = 0;
1540  int brastack[200];  int brastack[200];
 int brastackptr = 0;  
1541  int top_backref = 0;  int top_backref = 0;
1542  uschar *code, *ptr;  unsigned int brastackptr = 0;
1543    uschar *code;
1544    const uschar *ptr;
1545    
1546  #ifdef DEBUG  #ifdef DEBUG
1547  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1527  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1568  if ((options & ~PUBLIC_OPTIONS) != 0)
1568    return NULL;    return NULL;
1569    }    }
1570    
1571  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1572  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1573    
1574  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1575  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1539  internal flag settings. Make an attempt Line 1578  internal flag settings. Make an attempt
1578  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1579  clever for #-comments. */  clever for #-comments. */
1580    
1581  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1582  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1583    {    {
1584    int min, max;    int min, max;
# Line 1566  while ((c = *(++ptr)) != 0) Line 1605  while ((c = *(++ptr)) != 0)
1605    
1606      case '\\':      case '\\':
1607        {        {
1608        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1609        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1610        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1611        if (c >= 0)        if (c >= 0)
# Line 1645  while ((c = *(++ptr)) != 0) Line 1684  while ((c = *(++ptr)) != 0)
1684        {        {
1685        if (*ptr == '\\')        if (*ptr == '\\')
1686          {          {
1687          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1688          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1689          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1690          }          }
1691        else class_charcount++;        else class_charcount++;
1692        ptr++;        ptr++;
# Line 1662  while ((c = *(++ptr)) != 0) Line 1701  while ((c = *(++ptr)) != 0)
1701    
1702        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1703    
1704        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1705          {          {
1706          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1707          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1770  while ((c = *(++ptr)) != 0) Line 1809  while ((c = *(++ptr)) != 0)
1809      continue;      continue;
1810    
1811      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1812      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1813        0 this is an unmatched bracket which will generate an error, but take care
1814        not to try to access brastack[-1]. */
1815    
1816      case ')':      case ')':
1817      length += 3;      length += 3;
1818        {        {
1819        int min = 1;        int minval = 1;
1820        int max = 1;        int maxval = 1;
1821        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1822    
1823        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1824        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1825    
1826        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1827          {          {
1828          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1829          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1830          }          }
1831        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1832        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1833        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1834    
1835        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1836        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1837        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1838        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1839    
1840        if (min == 0) length++;        if (minval == 0) length++;
1841          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1842        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1843        }        }
   
1844      continue;      continue;
1845    
1846      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1831  while ((c = *(++ptr)) != 0) Line 1871  while ((c = *(++ptr)) != 0)
1871    
1872        if (c == '\\')        if (c == '\\')
1873          {          {
1874          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
1875          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1876          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1877          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
# Line 1861  if (length > 65539) Line 1901  if (length > 65539)
1901    }    }
1902    
1903  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1904  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1905    rather than just "code", because it has been reported that one broken compiler
1906    fails on "code" because it is also an independent variable. It should make no
1907    difference to the value of the offsetof(). */
1908    
1909  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1910  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1911    
1912  if (re == NULL)  if (re == NULL)
# Line 1872  if (re == NULL) Line 1915  if (re == NULL)
1915    return NULL;    return NULL;
1916    }    }
1917    
1918    /* Put in the magic number and the options. */
1919    
1920  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1921  re->options = options;  re->options = options;
1922    
# Line 1879  re->options = options; Line 1924  re->options = options;
1924  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
1925  of the function here. */  of the function here. */
1926    
1927  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
1928  code = re->code;  code = re->code;
1929  *code = OP_BRA;  *code = OP_BRA;
1930  bracount = 0;  bracount = 0;
# Line 1906  if (*errorptr != NULL) Line 1951  if (*errorptr != NULL)
1951    {    {
1952    (pcre_free)(re);    (pcre_free)(re);
1953    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
1954    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
1955    return NULL;    return NULL;
1956    }    }
1957    
# Line 1922  if ((options & PCRE_ANCHORED) == 0) Line 1967  if ((options & PCRE_ANCHORED) == 0)
1967      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1968    else    else
1969      {      {
1970      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1971      if (c >= 0)      if (ch >= 0)
1972        {        {
1973        re->first_char = c;        re->first_char = ch;
1974        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
1975        }        }
1976      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 2017  while (code < code_end) Line 2062  while (code < code_end)
2062      case OP_MINUPTO:      case OP_MINUPTO:
2063      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2064        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2065      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2066      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2067      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2068      code += 3;      code += 3;
# Line 2062  while (code < code_end) Line 2107  while (code < code_end)
2107    
2108      case OP_REF:      case OP_REF:
2109      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2110      break;      code ++;
2111        goto CLASS_REF_REPEAT;
2112    
2113      case OP_CLASS:      case OP_CLASS:
2114        case OP_NEGCLASS:
2115        {        {
2116        int i, min, max;        int i, min, max;
2117    
2118        code++;        if (*code++ == OP_CLASS) printf("    [");
2119        printf("    [");          else printf("   ^[");
2120    
2121        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2122          {          {
# Line 2092  while (code < code_end) Line 2139  while (code < code_end)
2139        printf("]");        printf("]");
2140        code += 32;        code += 32;
2141    
2142          CLASS_REF_REPEAT:
2143    
2144        switch(*code)        switch(*code)
2145          {          {
2146          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2204  Returns:      TRUE if matched Line 2253  Returns:      TRUE if matched
2253  */  */
2254    
2255  static BOOL  static BOOL
2256  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int number, register const uschar *eptr, int length, match_data *md)
2257  {  {
2258  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[number];
2259    
2260  #ifdef DEBUG  #ifdef DEBUG
2261  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2253  Returns:       TRUE if matched Line 2302  Returns:       TRUE if matched
2302  */  */
2303    
2304  static BOOL  static BOOL
2305  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2306    match_data *md)    match_data *md)
2307  {  {
2308  for (;;)  for (;;)
# Line 2261  for (;;) Line 2310  for (;;)
2310    int min, max, ctype;    int min, max, ctype;
2311    register int i;    register int i;
2312    register int c;    register int c;
2313    BOOL minimize;    BOOL minimize = FALSE;
2314    
2315    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening bracket. Check the alternative branches in turn, failing if none
2316    match. We have to set the start offset if required and there is space    match. We have to set the start offset if required and there is space
# Line 2274  for (;;) Line 2323  for (;;)
2323    if ((int)*ecode >= OP_BRA)    if ((int)*ecode >= OP_BRA)
2324      {      {
2325      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2326      int save_offset1, save_offset2;      int save_offset1 = 0, save_offset2 = 0;
2327    
2328      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2329    
2330      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2331        {        {
# Line 2286  for (;;) Line 2333  for (;;)
2333        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2334        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2335    
2336        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2337        }        }
2338    
2339      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2300  for (;;) Line 2345  for (;;)
2345        }        }
2346      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2347    
2348      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2349    
2350      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2351        {        {
# Line 2401  for (;;) Line 2444  for (;;)
2444    
2445      case OP_BRAZERO:      case OP_BRAZERO:
2446        {        {
2447        uschar *next = ecode+1;        const uschar *next = ecode+1;
2448        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md)) return TRUE;
2449        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2450        ecode = next + 3;        ecode = next + 3;
# Line 2410  for (;;) Line 2453  for (;;)
2453    
2454      case OP_BRAMINZERO:      case OP_BRAMINZERO:
2455        {        {
2456        uschar *next = ecode+1;        const uschar *next = ecode+1;
2457        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2458        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md)) return TRUE;
2459        ecode++;        ecode++;
# Line 2426  for (;;) Line 2469  for (;;)
2469      case OP_KETRMAX:      case OP_KETRMAX:
2470        {        {
2471        int number;        int number;
2472        uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2473    
2474        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2475          {          {
# Line 2441  for (;;) Line 2484  for (;;)
2484    
2485        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2486    
2487        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2488    
2489        if (number > 0)        if (number > 0)
2490          {          {
# Line 2675  for (;;) Line 2716  for (;;)
2716    
2717        else        else
2718          {          {
2719          uschar *pp = eptr;          const uschar *pp = eptr;
2720          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2721            {            {
2722            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(number, eptr, length, md)) break;
# Line 2695  for (;;) Line 2736  for (;;)
2736      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2737      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2738      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2739      versions of a character. */      versions of a character, and we have to behave differently for positive and
2740        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2741        treated differently. */
2742    
2743      case OP_CLASS:      case OP_CLASS:
2744        case OP_NEGCLASS:
2745        {        {
2746        uschar *data = ecode + 1;  /* Save for matching */        BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2747        ecode += 33;               /* Advance past the item */        const uschar *data = ecode + 1;  /* Save for matching */
2748          ecode += 33;                     /* Advance past the item */
2749    
2750        switch (*ecode)        switch (*ecode)
2751          {          {
# Line 2727  for (;;) Line 2772  for (;;)
2772          break;          break;
2773    
2774          default:               /* No repeat follows */          default:               /* No repeat follows */
2775          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2776          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2777          }          }
2778    
2779        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2744  for (;;) Line 2782  for (;;)
2782          {          {
2783          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2784          c = *eptr++;          c = *eptr++;
2785          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2786          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2787            runtime caseless, continue if either case is in the map. */
2788    
2789            if (!nasty_case)
2790            {            {
2791              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2792              if (md->runtime_caseless)
2793                {
2794                c = pcre_fcc[c];
2795                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2796                }
2797              }
2798    
2799            /* Runtime caseless and it was a negative class. Continue only if
2800            both cases are in the map. */
2801    
2802            else
2803              {
2804              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2805            c = pcre_fcc[c];            c = pcre_fcc[c];
2806            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2807            }            }
2808    
2809          return FALSE;          return FALSE;
2810          }          }
2811    
# Line 2768  for (;;) Line 2824  for (;;)
2824            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2825            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2826            c = *eptr++;            c = *eptr++;
2827            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2828            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2829              runtime caseless, continue if either case is in the map. */
2830    
2831              if (!nasty_case)
2832              {              {
2833                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2834                if (md->runtime_caseless)
2835                  {
2836                  c = pcre_fcc[c];
2837                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2838                  }
2839                }
2840    
2841              /* Runtime caseless and it was a negative class. Continue only if
2842              both cases are in the map. */
2843    
2844              else
2845                {
2846                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2847              c = pcre_fcc[c];              c = pcre_fcc[c];
2848              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2849              }              }
2850    
2851            return FALSE;            return FALSE;
2852            }            }
2853          /* Control never gets here */          /* Control never gets here */
# Line 2783  for (;;) Line 2857  for (;;)
2857    
2858        else        else
2859          {          {
2860          uschar *pp = eptr;          const uschar *pp = eptr;
2861          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
2862            {            {
2863            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2864            c = *eptr;            c = *eptr;
2865            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2866            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2867              runtime caseless, continue if either case is in the map. */
2868    
2869              if (!nasty_case)
2870                {
2871                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2872                if (md->runtime_caseless)
2873                  {
2874                  c = pcre_fcc[c];
2875                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2876                  }
2877                }
2878    
2879              /* Runtime caseless and it was a negative class. Continue only if
2880              both cases are in the map. */
2881    
2882              else
2883              {              {
2884                if ((data[c/8] & (1 << (c&7))) == 0) break;
2885              c = pcre_fcc[c];              c = pcre_fcc[c];
2886              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2887              }              }
2888    
2889            break;            break;
2890            }            }
2891    
# Line 2811  for (;;) Line 2903  for (;;)
2903        register int length = ecode[1];        register int length = ecode[1];
2904        ecode += 2;        ecode += 2;
2905    
2906        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2907        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2908          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2909        else        else
# Line 2822  for (;;) Line 2914  for (;;)
2914          }          }
2915        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2916        printf("\n");        printf("\n");
2917        #endif  #endif
2918    
2919        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2920        if (md->caseless)        if (md->caseless)
# Line 2879  for (;;) Line 2971  for (;;)
2971      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
2972      characters and work backwards. */      characters and work backwards. */
2973    
2974      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2975      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
2976    
2977      if (md->caseless)      if (md->caseless)
2978        {        {
# Line 2901  for (;;) Line 2991  for (;;)
2991          }          }
2992        else        else
2993          {          {
2994          uschar *pp = eptr;          const uschar *pp = eptr;
2995          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2996            {            {
2997            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
# Line 2931  for (;;) Line 3021  for (;;)
3021          }          }
3022        else        else
3023          {          {
3024          uschar *pp = eptr;          const uschar *pp = eptr;
3025          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3026            {            {
3027            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
# Line 2947  for (;;) Line 3037  for (;;)
3037      /* Match a negated single character */      /* Match a negated single character */
3038    
3039      case OP_NOT:      case OP_NOT:
3040      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3041      ecode++;      ecode++;
3042      if (md->caseless)      if (md->caseless)
3043        {        {
# Line 3006  for (;;) Line 3096  for (;;)
3096      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3097      characters and work backwards. */      characters and work backwards. */
3098    
3099      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3100      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3101    
3102      if (md->caseless)      if (md->caseless)
3103        {        {
# Line 3028  for (;;) Line 3116  for (;;)
3116          }          }
3117        else        else
3118          {          {
3119          uschar *pp = eptr;          const uschar *pp = eptr;
3120          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3121            {            {
3122            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
# Line 3058  for (;;) Line 3146  for (;;)
3146          }          }
3147        else        else
3148          {          {
3149          uschar *pp = eptr;          const uschar *pp = eptr;
3150          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3151            {            {
3152            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
# Line 3175  for (;;) Line 3263  for (;;)
3263    
3264      else      else
3265        {        {
3266        uschar *pp = eptr;        const uschar *pp = eptr;
3267        switch(ctype)        switch(ctype)
3268          {          {
3269          case OP_ANY:          case OP_ANY:
# Line 3259  for (;;) Line 3347  for (;;)
3347      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3348    
3349      default:      default:
3350      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3351      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3352      return FALSE;      return FALSE;
3353      }      }
# Line 3277  for (;;) Line 3363  for (;;)
3363    
3364    
3365  /*************************************************  /*************************************************
3366    *         Segregate setjmp()                     *
3367    *************************************************/
3368    
3369    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3370    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3371    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3372    since it's needed only for the extension \X option, and with any luck, a good
3373    compiler will spot the tail recursion and compile it efficiently.
3374    
3375    Arguments:
3376       eptr        pointer in subject
3377       ecode       position in code
3378       offset_top  current top pointer
3379       md          pointer to "static" info for the match
3380    
3381    Returns:       TRUE if matched
3382    */
3383    
3384    static BOOL
3385    match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3386      match_data *match_block)
3387    {
3388    return setjmp(match_block->fail_env) == 0 &&
3389          match(eptr, ecode, offset_top, match_block);
3390    }
3391    
3392    
3393    
3394    /*************************************************
3395  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3396  *************************************************/  *************************************************/
3397    
# Line 3303  int Line 3418  int
3418  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3419    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3420  {  {
3421  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3422  int first_char = -1;  int first_char = -1;
3423  match_data match_block;  match_data match_block;
3424  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3425  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3426  uschar *end_subject;  const uschar *end_subject;
3427  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3428  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3429    BOOL using_temporary_offsets = FALSE;
3430  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3431  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3432    
# Line 3321  if (re == NULL || subject == NULL || Line 3436  if (re == NULL || subject == NULL ||
3436     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3437  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3438    
3439  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3440  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3441  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3442    
# Line 3340  match_block.errorcode = PCRE_ERROR_NOMAT Line 3455  match_block.errorcode = PCRE_ERROR_NOMAT
3455    
3456  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3457  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3458  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3459  multiple of 2. */  of 2. */
3460    
3461  ocount &= (-2);  ocount = offsetcount & (-2);
3462  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3463    {    {
3464    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3465    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3466    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3467    #ifdef DEBUG    using_temporary_offsets = TRUE;
3468    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
3469    }    }
3470  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3471    
# Line 3404  if (!anchored) Line 3518  if (!anchored)
3518    
3519  do  do
3520    {    {
3521      int rc;
3522    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3523    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3524    
# Line 3445  do Line 3560  do
3560        }        }
3561      }      }
3562    
3563    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3564    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3565    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3566    printf("\n");    printf("\n");
3567    #endif  #endif
3568    
3569    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3570    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3459  do Line 3574  do
3574    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3575    
3576    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3577    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3578      is done in a separate function to avoid compiler warnings. We need not do
3579      it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3580      enabled. */
3581    
3582    if (setjmp(match_block.fail_env) == 0 &&    if ((re->options & PCRE_EXTRA) != 0)
       match(start_match, re->code, 2, &match_block))  
3583      {      {
3584      int rc;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3585          continue;
3586      if (ocount != offsetcount)      }
3587        {    else if (!match(start_match, re->code, 2, &match_block)) continue;
       if (offsetcount >= 4)  
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3588    
3589        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3590    
3591        (pcre_free)(match_block.offset_vector);    if (using_temporary_offsets)
3592        {
3593        if (offsetcount >= 4)
3594          {
3595          memcpy(offsets + 2, match_block.offset_vector + 2,
3596            (offsetcount - 2) * sizeof(int));
3597          DPRINTF(("Copied offsets from temporary memory\n"));
3598        }        }
3599        if (match_block.end_offset_top > offsetcount)
3600          match_block.offset_overflow = TRUE;
3601    
3602      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3603        (pcre_free)(match_block.offset_vector);
3604        }
3605    
3606      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3607    
3608      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3609      printf(">>>> returning %d\n", rc);      {
3610      #endif      offsets[0] = start_match - match_block.start_subject;
3611      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3612      }      }
3613    
3614      DPRINTF((">>>> returning %d\n", rc));
3615      return rc;
3616    }    }
3617  while (!anchored &&  while (!anchored &&
3618         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3619         start_match++ < end_subject);         start_match++ < end_subject);
3620    
3621  #ifdef DEBUG  if (using_temporary_offsets)
3622  printf(">>>> returning %d\n", match_block.errorcode);    {
3623  #endif    DPRINTF(("Freeing temporary memory\n"));
3624      (pcre_free)(match_block.offset_vector);
3625      }
3626    
3627    DPRINTF((">>>> returning %d\n", match_block.errorcode));
3628    
3629  return match_block.errorcode;  return match_block.errorcode;
3630  }  }

Legend:
Removed from v.5  
changed lines
  Added in v.17

  ViewVC Help
Powered by ViewVC 1.1.5